structure saas with tools

2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions
--- a/.venv/lib/python3.10/site-packages/google/cloud/storage/fileio.py
+++ b/.venv/lib/python3.10/site-packages/google/cloud/storage/fileio.py
@@ -0,0 +1,552 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module for file-like access of blobs, usually invoked via Blob.open()."""
+
+import io
+import warnings
+
+from google.api_core.exceptions import RequestRangeNotSatisfiable
+from google.cloud.storage._helpers import _NUM_RETRIES_MESSAGE
+from google.cloud.storage.retry import DEFAULT_RETRY
+from google.cloud.storage.retry import DEFAULT_RETRY_IF_GENERATION_SPECIFIED
+from google.cloud.storage.retry import ConditionalRetryPolicy
+
+
+# Resumable uploads require a chunk size of precisely a multiple of 256 KiB.
+CHUNK_SIZE_MULTIPLE = 256 * 1024  # 256 KiB
+DEFAULT_CHUNK_SIZE = 40 * 1024 * 1024  # 40 MiB
+
+# Valid keyword arguments for download methods, and blob.reload() if needed.
+# Note: Changes here need to be reflected in the blob.open() docstring.
+VALID_DOWNLOAD_KWARGS = {
+    "if_generation_match",
+    "if_generation_not_match",
+    "if_metageneration_match",
+    "if_metageneration_not_match",
+    "timeout",
+    "retry",
+    "raw_download",
+}
+
+# Valid keyword arguments for upload methods.
+# Note: Changes here need to be reflected in the blob.open() docstring.
+VALID_UPLOAD_KWARGS = {
+    "content_type",
+    "predefined_acl",
+    "num_retries",
+    "if_generation_match",
+    "if_generation_not_match",
+    "if_metageneration_match",
+    "if_metageneration_not_match",
+    "timeout",
+    "checksum",
+    "retry",
+}
+
+
+class BlobReader(io.BufferedIOBase):
+    """A file-like object that reads from a blob.
+
+    :type blob: 'google.cloud.storage.blob.Blob'
+    :param blob:
+        The blob to download.
+
+    :type chunk_size: long
+    :param chunk_size:
+        (Optional) The minimum number of bytes to read at a time. If fewer
+        bytes than the chunk_size are requested, the remainder is buffered.
+        The default is the chunk_size of the blob, or 40MiB.
+
+    :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
+    :param retry:
+        (Optional) How to retry the RPC. A None value will disable
+        retries. A google.api_core.retry.Retry value will enable retries,
+        and the object will define retriable response codes and errors and
+        configure backoff and timeout options.
+
+        A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
+        Retry object and activates it only if certain conditions are met.
+        This class exists to provide safe defaults for RPC calls that are
+        not technically safe to retry normally (due to potential data
+        duplication or other side-effects) but become safe to retry if a
+        condition such as if_metageneration_match is set.
+
+        See the retry.py source code and docstrings in this package
+        (google.cloud.storage.retry) for information on retry types and how
+        to configure them.
+
+        Media operations (downloads and uploads) do not support non-default
+        predicates in a Retry object. The default will always be used. Other
+        configuration changes for Retry objects such as delays and deadlines
+        are respected.
+
+    :param download_kwargs:
+        Keyword arguments to pass to the underlying API calls.
+        The following arguments are supported:
+
+        - ``if_generation_match``
+        - ``if_generation_not_match``
+        - ``if_metageneration_match``
+        - ``if_metageneration_not_match``
+        - ``timeout``
+
+        Note that download_kwargs are also applied to blob.reload(), if a reload
+        is needed during seek().
+    """
+
+    def __init__(self, blob, chunk_size=None, retry=DEFAULT_RETRY, **download_kwargs):
+        for kwarg in download_kwargs:
+            if kwarg not in VALID_DOWNLOAD_KWARGS:
+                raise ValueError(
+                    f"BlobReader does not support keyword argument {kwarg}."
+                )
+
+        self._blob = blob
+        self._pos = 0
+        self._buffer = io.BytesIO()
+        self._chunk_size = chunk_size or blob.chunk_size or DEFAULT_CHUNK_SIZE
+        self._retry = retry
+        self._download_kwargs = download_kwargs
+
+    def read(self, size=-1):
+        self._checkClosed()  # Raises ValueError if closed.
+
+        result = self._buffer.read(size)
+        # If the read request demands more bytes than are buffered, fetch more.
+        remaining_size = size - len(result)
+        if remaining_size > 0 or size < 0:
+            self._pos += self._buffer.tell()
+            read_size = len(result)
+
+            self._buffer.seek(0)
+            self._buffer.truncate(0)  # Clear the buffer to make way for new data.
+            fetch_start = self._pos
+            if size > 0:
+                # Fetch the larger of self._chunk_size or the remaining_size.
+                fetch_end = fetch_start + max(remaining_size, self._chunk_size)
+            else:
+                fetch_end = None
+
+            # Download the blob. Checksumming must be disabled as we are using
+            # chunked downloads, and the server only knows the checksum of the
+            # entire file.
+            try:
+                result += self._blob.download_as_bytes(
+                    start=fetch_start,
+                    end=fetch_end,
+                    checksum=None,
+                    retry=self._retry,
+                    **self._download_kwargs,
+                )
+            except RequestRangeNotSatisfiable:
+                # We've reached the end of the file. Python file objects should
+                # return an empty response in this case, not raise an error.
+                pass
+
+            # If more bytes were read than is immediately needed, buffer the
+            # remainder and then trim the result.
+            if size > 0 and len(result) > size:
+                self._buffer.write(result[size:])
+                self._buffer.seek(0)
+                result = result[:size]
+            # Increment relative offset by true amount read.
+            self._pos += len(result) - read_size
+        return result
+
+    def read1(self, size=-1):
+        return self.read(size)
+
+    def seek(self, pos, whence=0):
+        """Seek within the blob.
+
+        This implementation of seek() uses knowledge of the blob size to
+        validate that the reported position does not exceed the blob last byte.
+        If the blob size is not already known it will call blob.reload().
+        """
+        self._checkClosed()  # Raises ValueError if closed.
+
+        if self._blob.size is None:
+            self._blob.reload(**self._download_kwargs)
+
+        initial_offset = self._pos + self._buffer.tell()
+
+        if whence == 0:
+            target_pos = pos
+        elif whence == 1:
+            target_pos = initial_offset + pos
+        elif whence == 2:
+            target_pos = self._blob.size + pos
+        if whence not in {0, 1, 2}:
+            raise ValueError("invalid whence value")
+
+        if target_pos > self._blob.size:
+            target_pos = self._blob.size
+
+        # Seek or invalidate buffer as needed.
+        if target_pos < self._pos:
+            # Target position < relative offset <= true offset.
+            # As data is not in buffer, invalidate buffer.
+            self._buffer.seek(0)
+            self._buffer.truncate(0)
+            new_pos = target_pos
+            self._pos = target_pos
+        else:
+            # relative offset <= target position <= size of file.
+            difference = target_pos - initial_offset
+            new_pos = self._pos + self._buffer.seek(difference, 1)
+        return new_pos
+
+    def close(self):
+        self._buffer.close()
+
+    @property
+    def closed(self):
+        return self._buffer.closed
+
+    def readable(self):
+        return True
+
+    def writable(self):
+        return False
+
+    def seekable(self):
+        return True
+
+
+class BlobWriter(io.BufferedIOBase):
+    """A file-like object that writes to a blob.
+
+    :type blob: 'google.cloud.storage.blob.Blob'
+    :param blob:
+        The blob to which to write.
+
+    :type chunk_size: long
+    :param chunk_size:
+        (Optional) The maximum number of bytes to buffer before sending data
+        to the server, and the size of each request when data is sent.
+        Writes are implemented as a "resumable upload", so chunk_size for
+        writes must be exactly a multiple of 256KiB as with other resumable
+        uploads. The default is the chunk_size of the blob, or 40 MiB.
+
+    :type text_mode: bool
+    :param text_mode:
+        (Deprecated) A synonym for ignore_flush. For backwards-compatibility,
+        if True, sets ignore_flush to True. Use ignore_flush instead. This
+        parameter will be removed in a future release.
+
+    :type ignore_flush: bool
+    :param ignore_flush:
+        Makes flush() do nothing instead of raise an error. flush() without
+        closing is not supported by the remote service and therefore calling it
+        on this class normally results in io.UnsupportedOperation. However, that
+        behavior is incompatible with some consumers and wrappers of file
+        objects in Python, such as zipfile.ZipFile or io.TextIOWrapper. Setting
+        ignore_flush will cause flush() to successfully do nothing, for
+        compatibility with those contexts. The correct way to actually flush
+        data to the remote server is to close() (using this object as a context
+        manager is recommended).
+
+    :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy
+    :param retry:
+        (Optional) How to retry the RPC. A None value will disable
+        retries. A google.api_core.retry.Retry value will enable retries,
+        and the object will define retriable response codes and errors and
+        configure backoff and timeout options.
+
+        A google.cloud.storage.retry.ConditionalRetryPolicy value wraps a
+        Retry object and activates it only if certain conditions are met.
+        This class exists to provide safe defaults for RPC calls that are
+        not technically safe to retry normally (due to potential data
+        duplication or other side-effects) but become safe to retry if a
+        condition such as if_metageneration_match is set.
+
+        See the retry.py source code and docstrings in this package
+        (google.cloud.storage.retry) for information on retry types and how
+        to configure them.
+
+        Media operations (downloads and uploads) do not support non-default
+        predicates in a Retry object. The default will always be used. Other
+        configuration changes for Retry objects such as delays and deadlines
+        are respected.
+
+    :param upload_kwargs:
+        Keyword arguments to pass to the underlying API
+        calls. The following arguments are supported:
+
+        - ``if_generation_match``
+        - ``if_generation_not_match``
+        - ``if_metageneration_match``
+        - ``if_metageneration_not_match``
+        - ``timeout``
+        - ``content_type``
+        - ``num_retries``
+        - ``predefined_acl``
+        - ``checksum``
+    """
+
+    def __init__(
+        self,
+        blob,
+        chunk_size=None,
+        text_mode=False,
+        ignore_flush=False,
+        retry=DEFAULT_RETRY_IF_GENERATION_SPECIFIED,
+        **upload_kwargs,
+    ):
+        for kwarg in upload_kwargs:
+            if kwarg not in VALID_UPLOAD_KWARGS:
+                raise ValueError(
+                    f"BlobWriter does not support keyword argument {kwarg}."
+                )
+        self._blob = blob
+        self._buffer = SlidingBuffer()
+        self._upload_and_transport = None
+        # Resumable uploads require a chunk size of a multiple of 256KiB.
+        # self._chunk_size must not be changed after the upload is initiated.
+        self._chunk_size = chunk_size or blob.chunk_size or DEFAULT_CHUNK_SIZE
+        # text_mode is a deprecated synonym for ignore_flush
+        self._ignore_flush = ignore_flush or text_mode
+        self._retry = retry
+        self._upload_kwargs = upload_kwargs
+
+    @property
+    def _chunk_size(self):
+        """Get the blob's default chunk size.
+
+        :rtype: int or ``NoneType``
+        :returns: The current blob's chunk size, if it is set.
+        """
+        return self.__chunk_size
+
+    @_chunk_size.setter
+    def _chunk_size(self, value):
+        """Set the blob's default chunk size.
+
+        :type value: int
+        :param value: (Optional) The current blob's chunk size, if it is set.
+
+        :raises: :class:`ValueError` if ``value`` is not ``None`` and is not a
+                 multiple of 256 KiB.
+        """
+        if value is not None and value > 0 and value % CHUNK_SIZE_MULTIPLE != 0:
+            raise ValueError(
+                "Chunk size must be a multiple of %d." % CHUNK_SIZE_MULTIPLE
+            )
+        self.__chunk_size = value
+
+    def write(self, b):
+        self._checkClosed()  # Raises ValueError if closed.
+
+        pos = self._buffer.write(b)
+
+        # If there is enough content, upload chunks.
+        num_chunks = len(self._buffer) // self._chunk_size
+        if num_chunks:
+            self._upload_chunks_from_buffer(num_chunks)
+
+        return pos
+
+    def _initiate_upload(self):
+        # num_retries is only supported for backwards-compatibility reasons.
+        num_retries = self._upload_kwargs.pop("num_retries", None)
+        retry = self._retry
+        content_type = self._upload_kwargs.pop("content_type", None)
+
+        if num_retries is not None:
+            warnings.warn(_NUM_RETRIES_MESSAGE, DeprecationWarning, stacklevel=2)
+            # num_retries and retry are mutually exclusive. If num_retries is
+            # set and retry is exactly the default, then nullify retry for
+            # backwards compatibility.
+            if retry is DEFAULT_RETRY_IF_GENERATION_SPECIFIED:
+                retry = None
+
+        # Handle ConditionalRetryPolicy.
+        if isinstance(retry, ConditionalRetryPolicy):
+            # Conditional retries are designed for non-media calls, which change
+            # arguments into query_params dictionaries. Media operations work
+            # differently, so here we make a "fake" query_params to feed to the
+            # ConditionalRetryPolicy.
+            query_params = {
+                "ifGenerationMatch": self._upload_kwargs.get("if_generation_match"),
+                "ifMetagenerationMatch": self._upload_kwargs.get(
+                    "if_metageneration_match"
+                ),
+            }
+            retry = retry.get_retry_policy_if_conditions_met(query_params=query_params)
+
+        self._upload_and_transport = self._blob._initiate_resumable_upload(
+            self._blob.bucket.client,
+            self._buffer,
+            content_type,
+            None,
+            num_retries,
+            chunk_size=self._chunk_size,
+            retry=retry,
+            **self._upload_kwargs,
+        )
+
+    def _upload_chunks_from_buffer(self, num_chunks):
+        """Upload a specified number of chunks."""
+
+        # Initialize the upload if necessary.
+        if not self._upload_and_transport:
+            self._initiate_upload()
+
+        upload, transport = self._upload_and_transport
+
+        # Attach timeout if specified in the keyword arguments.
+        # Otherwise, the default timeout will be used from the media library.
+        kwargs = {}
+        if "timeout" in self._upload_kwargs:
+            kwargs = {"timeout": self._upload_kwargs.get("timeout")}
+
+        # Upload chunks. The SlidingBuffer class will manage seek position.
+        for _ in range(num_chunks):
+            upload.transmit_next_chunk(transport, **kwargs)
+
+        # Wipe the buffer of chunks uploaded, preserving any remaining data.
+        self._buffer.flush()
+
+    def tell(self):
+        return self._buffer.tell() + len(self._buffer)
+
+    def flush(self):
+        # flush() is not fully supported by the remote service, so raise an
+        # error here, unless self._ignore_flush is set.
+        if not self._ignore_flush:
+            raise io.UnsupportedOperation(
+                "Cannot flush without finalizing upload. Use close() instead, "
+                "or set ignore_flush=True when constructing this class (see "
+                "docstring)."
+            )
+
+    def close(self):
+        if not self._buffer.closed:
+            self._upload_chunks_from_buffer(1)
+        self._buffer.close()
+
+    @property
+    def closed(self):
+        return self._buffer.closed
+
+    def readable(self):
+        return False
+
+    def writable(self):
+        return True
+
+    def seekable(self):
+        return False
+
+
+class SlidingBuffer(object):
+    """A non-rewindable buffer that frees memory of chunks already consumed.
+
+    This class is necessary because `google-resumable-media-python` expects
+    `tell()` to work relative to the start of the file, not relative to a place
+    in an intermediate buffer. Using this class, we present an external
+    interface with consistent seek and tell behavior without having to actually
+    store bytes already sent.
+
+    Behavior of this class differs from an ordinary BytesIO buffer. `write()`
+    will always append to the end of the file only and not change the seek
+    position otherwise. `flush()` will delete all data already read (data to the
+    left of the seek position). `tell()` will report the seek position of the
+    buffer including all deleted data. Additionally the class implements
+    __len__() which will report the size of the actual underlying buffer.
+
+    This class does not attempt to implement the entire Python I/O interface.
+    """
+
+    def __init__(self):
+        self._buffer = io.BytesIO()
+        self._cursor = 0
+
+    def write(self, b):
+        """Append to the end of the buffer without changing the position."""
+        self._checkClosed()  # Raises ValueError if closed.
+
+        bookmark = self._buffer.tell()
+        self._buffer.seek(0, io.SEEK_END)
+        pos = self._buffer.write(b)
+        self._buffer.seek(bookmark)
+        return self._cursor + pos
+
+    def read(self, size=-1):
+        """Read and move the cursor."""
+        self._checkClosed()  # Raises ValueError if closed.
+
+        data = self._buffer.read(size)
+        self._cursor += len(data)
+        return data
+
+    def flush(self):
+        """Delete already-read data (all data to the left of the position)."""
+        self._checkClosed()  # Raises ValueError if closed.
+
+        # BytesIO can't be deleted from the left, so save any leftover, unread
+        # data and truncate at 0, then readd leftover data.
+        leftover = self._buffer.read()
+        self._buffer.seek(0)
+        self._buffer.truncate(0)
+        self._buffer.write(leftover)
+        self._buffer.seek(0)
+
+    def tell(self):
+        """Report how many bytes have been read from the buffer in total."""
+        return self._cursor
+
+    def seek(self, pos):
+        """Seek to a position (backwards only) within the internal buffer.
+
+        This implementation of seek() verifies that the seek destination is
+        contained in _buffer. It will raise ValueError if the destination byte
+        has already been purged from the buffer.
+
+        The "whence" argument is not supported in this implementation.
+        """
+        self._checkClosed()  # Raises ValueError if closed.
+
+        buffer_initial_pos = self._buffer.tell()
+        difference = pos - self._cursor
+        buffer_seek_result = self._buffer.seek(difference, io.SEEK_CUR)
+        if (
+            not buffer_seek_result - buffer_initial_pos == difference
+            or pos > self._cursor
+        ):
+            # The seek did not arrive at the expected byte because the internal
+            # buffer does not (or no longer) contains the byte. Reset and raise.
+            self._buffer.seek(buffer_initial_pos)
+            raise ValueError("Cannot seek() to that value.")
+
+        self._cursor = pos
+        return self._cursor
+
+    def __len__(self):
+        """Determine the size of the buffer by seeking to the end."""
+        bookmark = self._buffer.tell()
+        length = self._buffer.seek(0, io.SEEK_END)
+        self._buffer.seek(bookmark)
+        return length
+
+    def close(self):
+        return self._buffer.close()
+
+    def _checkClosed(self):
+        return self._buffer._checkClosed()
+
+    @property
+    def closed(self):
+        return self._buffer.closed