structure saas with tools

This commit is contained in:
Davidson Gomes
2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions

View File

@@ -0,0 +1,61 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for Google Media Downloads and Resumable Uploads.
This package has some general purposes modules, e.g.
:mod:`~google.resumable_media.common`, but the majority of the
public interface will be contained in subpackages.
===========
Subpackages
===========
Each subpackage is tailored to a specific transport library:
* the :mod:`~google.resumable_media.requests` subpackage uses the ``requests``
transport library.
.. _requests: http://docs.python-requests.org/
==========
Installing
==========
To install with `pip`_:
.. code-block:: console
$ pip install --upgrade google-resumable-media
.. _pip: https://pip.pypa.io/
"""
from google.resumable_media.common import DataCorruption
from google.resumable_media.common import InvalidResponse
from google.resumable_media.common import PERMANENT_REDIRECT
from google.resumable_media.common import RetryStrategy
from google.resumable_media.common import TOO_MANY_REQUESTS
from google.resumable_media.common import UPLOAD_CHUNK_SIZE
__all__ = [
"DataCorruption",
"InvalidResponse",
"PERMANENT_REDIRECT",
"RetryStrategy",
"TOO_MANY_REQUESTS",
"UPLOAD_CHUNK_SIZE",
]

View File

@@ -0,0 +1,559 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Virtual bases classes for downloading media from Google APIs."""
import http.client
import re
from google.resumable_media import _helpers
from google.resumable_media import common
_CONTENT_RANGE_RE = re.compile(
r"bytes (?P<start_byte>\d+)-(?P<end_byte>\d+)/(?P<total_bytes>\d+)",
flags=re.IGNORECASE,
)
_ACCEPTABLE_STATUS_CODES = (http.client.OK, http.client.PARTIAL_CONTENT)
_GET = "GET"
_ZERO_CONTENT_RANGE_HEADER = "bytes */0"
class DownloadBase(object):
"""Base class for download helpers.
Defines core shared behavior across different download types.
Args:
media_url (str): The URL containing the media to be downloaded.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
the downloaded resource can be written to.
start (int): The first byte in a range to be downloaded.
end (int): The last byte in a range to be downloaded.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
"""
def __init__(self, media_url, stream=None, start=None, end=None, headers=None):
self.media_url = media_url
self._stream = stream
self.start = start
self.end = end
if headers is None:
headers = {}
self._headers = headers
self._finished = False
self._retry_strategy = common.RetryStrategy()
@property
def finished(self):
"""bool: Flag indicating if the download has completed."""
return self._finished
@staticmethod
def _get_status_code(response):
"""Access the status code from an HTTP response.
Args:
response (object): The HTTP response object.
Raises:
NotImplementedError: Always, since virtual.
"""
raise NotImplementedError("This implementation is virtual.")
@staticmethod
def _get_headers(response):
"""Access the headers from an HTTP response.
Args:
response (object): The HTTP response object.
Raises:
NotImplementedError: Always, since virtual.
"""
raise NotImplementedError("This implementation is virtual.")
@staticmethod
def _get_body(response):
"""Access the response body from an HTTP response.
Args:
response (object): The HTTP response object.
Raises:
NotImplementedError: Always, since virtual.
"""
raise NotImplementedError("This implementation is virtual.")
class Download(DownloadBase):
"""Helper to manage downloading a resource from a Google API.
"Slices" of the resource can be retrieved by specifying a range
with ``start`` and / or ``end``. However, in typical usage, neither
``start`` nor ``end`` is expected to be provided.
Args:
media_url (str): The URL containing the media to be downloaded.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
the downloaded resource can be written to.
start (int): The first byte in a range to be downloaded. If not
provided, but ``end`` is provided, will download from the
beginning to ``end`` of the media.
end (int): The last byte in a range to be downloaded. If not
provided, but ``start`` is provided, will download from the
``start`` to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. The response headers must contain
a checksum of the requested type. If the headers lack an
appropriate checksum (for instance in the case of transcoded or
ranged downloads where the remote service does not know the
correct checksum) an INFO-level log will be emitted. Supported
values are "md5", "crc32c" and None.
"""
def __init__(
self, media_url, stream=None, start=None, end=None, headers=None, checksum="md5"
):
super(Download, self).__init__(
media_url, stream=stream, start=start, end=end, headers=headers
)
self.checksum = checksum
self._bytes_downloaded = 0
self._expected_checksum = None
self._checksum_object = None
self._object_generation = None
def _prepare_request(self):
"""Prepare the contents of an HTTP request.
This is everything that must be done before a request that doesn't
require network I/O (or other I/O). This is based on the `sans-I/O`_
philosophy.
Returns:
Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple
* HTTP verb for the request (always GET)
* the URL for the request
* the body of the request (always :data:`None`)
* headers for the request
Raises:
ValueError: If the current :class:`Download` has already
finished.
.. _sans-I/O: https://sans-io.readthedocs.io/
"""
if self.finished:
raise ValueError("A download can only be used once.")
add_bytes_range(self.start, self.end, self._headers)
return _GET, self.media_url, None, self._headers
def _process_response(self, response):
"""Process the response from an HTTP request.
This is everything that must be done after a request that doesn't
require network I/O (or other I/O). This is based on the `sans-I/O`_
philosophy.
Args:
response (object): The HTTP response object.
.. _sans-I/O: https://sans-io.readthedocs.io/
"""
# Tombstone the current Download so it cannot be used again.
self._finished = True
_helpers.require_status_code(
response, _ACCEPTABLE_STATUS_CODES, self._get_status_code
)
def consume(self, transport, timeout=None):
"""Consume the resource to be downloaded.
If a ``stream`` is attached to this download, then the downloaded
resource will be written to the stream.
Args:
transport (object): An object which can make authenticated
requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Raises:
NotImplementedError: Always, since virtual.
"""
raise NotImplementedError("This implementation is virtual.")
class ChunkedDownload(DownloadBase):
"""Download a resource in chunks from a Google API.
Args:
media_url (str): The URL containing the media to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each
request.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
will be used to concatenate chunks of the resource as they are
downloaded.
start (int): The first byte in a range to be downloaded. If not
provided, defaults to ``0``.
end (int): The last byte in a range to be downloaded. If not
provided, will download to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with each request, e.g. headers for data encryption
key headers.
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each request.
Raises:
ValueError: If ``start`` is negative.
"""
def __init__(self, media_url, chunk_size, stream, start=0, end=None, headers=None):
if start < 0:
raise ValueError(
"On a chunked download the starting " "value cannot be negative."
)
super(ChunkedDownload, self).__init__(
media_url, stream=stream, start=start, end=end, headers=headers
)
self.chunk_size = chunk_size
self._bytes_downloaded = 0
self._total_bytes = None
self._invalid = False
@property
def bytes_downloaded(self):
"""int: Number of bytes that have been downloaded."""
return self._bytes_downloaded
@property
def total_bytes(self):
"""Optional[int]: The total number of bytes to be downloaded."""
return self._total_bytes
@property
def invalid(self):
"""bool: Indicates if the download is in an invalid state.
This will occur if a call to :meth:`consume_next_chunk` fails.
"""
return self._invalid
def _get_byte_range(self):
"""Determines the byte range for the next request.
Returns:
Tuple[int, int]: The pair of begin and end byte for the next
chunked request.
"""
curr_start = self.start + self.bytes_downloaded
curr_end = curr_start + self.chunk_size - 1
# Make sure ``curr_end`` does not exceed ``end``.
if self.end is not None:
curr_end = min(curr_end, self.end)
# Make sure ``curr_end`` does not exceed ``total_bytes - 1``.
if self.total_bytes is not None:
curr_end = min(curr_end, self.total_bytes - 1)
return curr_start, curr_end
def _prepare_request(self):
"""Prepare the contents of an HTTP request.
This is everything that must be done before a request that doesn't
require network I/O (or other I/O). This is based on the `sans-I/O`_
philosophy.
.. note:
This method will be used multiple times, so ``headers`` will
be mutated in between requests. However, we don't make a copy
since the same keys are being updated.
Returns:
Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple
* HTTP verb for the request (always GET)
* the URL for the request
* the body of the request (always :data:`None`)
* headers for the request
Raises:
ValueError: If the current download has finished.
ValueError: If the current download is invalid.
.. _sans-I/O: https://sans-io.readthedocs.io/
"""
if self.finished:
raise ValueError("Download has finished.")
if self.invalid:
raise ValueError("Download is invalid and cannot be re-used.")
curr_start, curr_end = self._get_byte_range()
add_bytes_range(curr_start, curr_end, self._headers)
return _GET, self.media_url, None, self._headers
def _make_invalid(self):
"""Simple setter for ``invalid``.
This is intended to be passed along as a callback to helpers that
raise an exception so they can mark this instance as invalid before
raising.
"""
self._invalid = True
def _process_response(self, response):
"""Process the response from an HTTP request.
This is everything that must be done after a request that doesn't
require network I/O. This is based on the `sans-I/O`_ philosophy.
For the time being, this **does require** some form of I/O to write
a chunk to ``stream``. However, this will (almost) certainly not be
network I/O.
Updates the current state after consuming a chunk. First,
increments ``bytes_downloaded`` by the number of bytes in the
``content-length`` header.
If ``total_bytes`` is already set, this assumes (but does not check)
that we already have the correct value and doesn't bother to check
that it agrees with the headers.
We expect the **total** length to be in the ``content-range`` header,
but this header is only present on requests which sent the ``range``
header. This response header should be of the form
``bytes {start}-{end}/{total}`` and ``{end} - {start} + 1``
should be the same as the ``Content-Length``.
Args:
response (object): The HTTP response object (need headers).
Raises:
~google.resumable_media.common.InvalidResponse: If the number
of bytes in the body doesn't match the content length header.
.. _sans-I/O: https://sans-io.readthedocs.io/
"""
# Verify the response before updating the current instance.
if _check_for_zero_content_range(
response, self._get_status_code, self._get_headers
):
self._finished = True
return
_helpers.require_status_code(
response,
_ACCEPTABLE_STATUS_CODES,
self._get_status_code,
callback=self._make_invalid,
)
headers = self._get_headers(response)
response_body = self._get_body(response)
start_byte, end_byte, total_bytes = get_range_info(
response, self._get_headers, callback=self._make_invalid
)
transfer_encoding = headers.get("transfer-encoding")
if transfer_encoding is None:
content_length = _helpers.header_required(
response,
"content-length",
self._get_headers,
callback=self._make_invalid,
)
num_bytes = int(content_length)
if len(response_body) != num_bytes:
self._make_invalid()
raise common.InvalidResponse(
response,
"Response is different size than content-length",
"Expected",
num_bytes,
"Received",
len(response_body),
)
else:
# 'content-length' header not allowed with chunked encoding.
num_bytes = end_byte - start_byte + 1
# First update ``bytes_downloaded``.
self._bytes_downloaded += num_bytes
# If the end byte is past ``end`` or ``total_bytes - 1`` we are done.
if self.end is not None and end_byte >= self.end:
self._finished = True
elif end_byte >= total_bytes - 1:
self._finished = True
# NOTE: We only use ``total_bytes`` if not already known.
if self.total_bytes is None:
self._total_bytes = total_bytes
# Write the response body to the stream.
self._stream.write(response_body)
def consume_next_chunk(self, transport, timeout=None):
"""Consume the next chunk of the resource to be downloaded.
Args:
transport (object): An object which can make authenticated
requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Raises:
NotImplementedError: Always, since virtual.
"""
raise NotImplementedError("This implementation is virtual.")
def add_bytes_range(start, end, headers):
"""Add a bytes range to a header dictionary.
Some possible inputs and the corresponding bytes ranges::
>>> headers = {}
>>> add_bytes_range(None, None, headers)
>>> headers
{}
>>> add_bytes_range(500, 999, headers)
>>> headers['range']
'bytes=500-999'
>>> add_bytes_range(None, 499, headers)
>>> headers['range']
'bytes=0-499'
>>> add_bytes_range(-500, None, headers)
>>> headers['range']
'bytes=-500'
>>> add_bytes_range(9500, None, headers)
>>> headers['range']
'bytes=9500-'
Args:
start (Optional[int]): The first byte in a range. Can be zero,
positive, negative or :data:`None`.
end (Optional[int]): The last byte in a range. Assumed to be
positive.
headers (Mapping[str, str]): A headers mapping which can have the
bytes range added if at least one of ``start`` or ``end``
is not :data:`None`.
"""
if start is None:
if end is None:
# No range to add.
return
else:
# NOTE: This assumes ``end`` is non-negative.
bytes_range = "0-{:d}".format(end)
else:
if end is None:
if start < 0:
bytes_range = "{:d}".format(start)
else:
bytes_range = "{:d}-".format(start)
else:
# NOTE: This is invalid if ``start < 0``.
bytes_range = "{:d}-{:d}".format(start, end)
headers[_helpers.RANGE_HEADER] = "bytes=" + bytes_range
def get_range_info(response, get_headers, callback=_helpers.do_nothing):
"""Get the start, end and total bytes from a content range header.
Args:
response (object): An HTTP response object.
get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
from an HTTP response.
callback (Optional[Callable]): A callback that takes no arguments,
to be executed when an exception is being raised.
Returns:
Tuple[int, int, int]: The start byte, end byte and total bytes.
Raises:
~google.resumable_media.common.InvalidResponse: If the
``Content-Range`` header is not of the form
``bytes {start}-{end}/{total}``.
"""
content_range = _helpers.header_required(
response, _helpers.CONTENT_RANGE_HEADER, get_headers, callback=callback
)
match = _CONTENT_RANGE_RE.match(content_range)
if match is None:
callback()
raise common.InvalidResponse(
response,
"Unexpected content-range header",
content_range,
'Expected to be of the form "bytes {start}-{end}/{total}"',
)
return (
int(match.group("start_byte")),
int(match.group("end_byte")),
int(match.group("total_bytes")),
)
def _check_for_zero_content_range(response, get_status_code, get_headers):
"""Validate if response status code is 416 and content range is zero.
This is the special case for handling zero bytes files.
Args:
response (object): An HTTP response object.
get_status_code (Callable[Any, int]): Helper to get a status code
from a response.
get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
from an HTTP response.
Returns:
bool: True if content range total bytes is zero, false otherwise.
"""
if get_status_code(response) == http.client.REQUESTED_RANGE_NOT_SATISFIABLE:
content_range = _helpers.header_required(
response,
_helpers.CONTENT_RANGE_HEADER,
get_headers,
callback=_helpers.do_nothing,
)
if content_range == _ZERO_CONTENT_RANGE_HEADER:
return True
return False

View File

@@ -0,0 +1,434 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Shared utilities used by both downloads and uploads."""
from __future__ import absolute_import
import base64
import hashlib
import logging
import random
import warnings
from urllib.parse import parse_qs
from urllib.parse import urlencode
from urllib.parse import urlsplit
from urllib.parse import urlunsplit
from google.resumable_media import common
RANGE_HEADER = "range"
CONTENT_RANGE_HEADER = "content-range"
CONTENT_ENCODING_HEADER = "content-encoding"
_SLOW_CRC32C_WARNING = (
"Currently using crcmod in pure python form. This is a slow "
"implementation. Python 3 has a faster implementation, `google-crc32c`, "
"which will be used if it is installed."
)
_GENERATION_HEADER = "x-goog-generation"
_HASH_HEADER = "x-goog-hash"
_STORED_CONTENT_ENCODING_HEADER = "x-goog-stored-content-encoding"
_MISSING_CHECKSUM = """\
No {checksum_type} checksum was returned from the service while downloading {}
(which happens for composite objects), so client-side content integrity
checking is not being performed."""
_LOGGER = logging.getLogger(__name__)
def do_nothing():
"""Simple default callback."""
def header_required(response, name, get_headers, callback=do_nothing):
"""Checks that a specific header is in a headers dictionary.
Args:
response (object): An HTTP response object, expected to have a
``headers`` attribute that is a ``Mapping[str, str]``.
name (str): The name of a required header.
get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers
from an HTTP response.
callback (Optional[Callable]): A callback that takes no arguments,
to be executed when an exception is being raised.
Returns:
str: The desired header.
Raises:
~google.resumable_media.common.InvalidResponse: If the header
is missing.
"""
headers = get_headers(response)
if name not in headers:
callback()
raise common.InvalidResponse(
response, "Response headers must contain header", name
)
return headers[name]
def require_status_code(response, status_codes, get_status_code, callback=do_nothing):
"""Require a response has a status code among a list.
Args:
response (object): The HTTP response object.
status_codes (tuple): The acceptable status codes.
get_status_code (Callable[Any, int]): Helper to get a status code
from a response.
callback (Optional[Callable]): A callback that takes no arguments,
to be executed when an exception is being raised.
Returns:
int: The status code.
Raises:
~google.resumable_media.common.InvalidResponse: If the status code
is not one of the values in ``status_codes``.
"""
status_code = get_status_code(response)
if status_code not in status_codes:
if status_code not in common.RETRYABLE:
callback()
raise common.InvalidResponse(
response,
"Request failed with status code",
status_code,
"Expected one of",
*status_codes
)
return status_code
def calculate_retry_wait(base_wait, max_sleep, multiplier=2.0):
"""Calculate the amount of time to wait before a retry attempt.
Wait time grows exponentially with the number of attempts, until
``max_sleep``.
A random amount of jitter (between 0 and 1 seconds) is added to spread out
retry attempts from different clients.
Args:
base_wait (float): The "base" wait time (i.e. without any jitter)
that will be multiplied until it reaches the maximum sleep.
max_sleep (float): Maximum value that a sleep time is allowed to be.
multiplier (float): Multiplier to apply to the base wait.
Returns:
Tuple[float, float]: The new base wait time as well as the wait time
to be applied (with a random amount of jitter between 0 and 1 seconds
added).
"""
new_base_wait = multiplier * base_wait
if new_base_wait > max_sleep:
new_base_wait = max_sleep
jitter_ms = random.randint(0, 1000)
return new_base_wait, new_base_wait + 0.001 * jitter_ms
def _get_crc32c_object():
"""Get crc32c object
Attempt to use the Google-CRC32c package. If it isn't available, try
to use CRCMod. CRCMod might be using a 'slow' varietal. If so, warn...
"""
try:
import google_crc32c # type: ignore
crc_obj = google_crc32c.Checksum()
except ImportError:
try:
import crcmod # type: ignore
crc_obj = crcmod.predefined.Crc("crc-32c")
_is_fast_crcmod()
except ImportError:
raise ImportError("Failed to import either `google-crc32c` or `crcmod`")
return crc_obj
def _is_fast_crcmod():
# Determine if this is using the slow form of crcmod.
nested_crcmod = __import__(
"crcmod.crcmod",
globals(),
locals(),
["_usingExtension"],
0,
)
fast_crc = getattr(nested_crcmod, "_usingExtension", False)
if not fast_crc:
warnings.warn(_SLOW_CRC32C_WARNING, RuntimeWarning, stacklevel=2)
return fast_crc
def _get_metadata_key(checksum_type):
if checksum_type == "md5":
return "md5Hash"
else:
return checksum_type
def prepare_checksum_digest(digest_bytestring):
"""Convert a checksum object into a digest encoded for an HTTP header.
Args:
bytes: A checksum digest bytestring.
Returns:
str: A base64 string representation of the input.
"""
encoded_digest = base64.b64encode(digest_bytestring)
# NOTE: ``b64encode`` returns ``bytes``, but HTTP headers expect ``str``.
return encoded_digest.decode("utf-8")
def _get_expected_checksum(response, get_headers, media_url, checksum_type):
"""Get the expected checksum and checksum object for the download response.
Args:
response (~requests.Response): The HTTP response object.
get_headers (callable: response->dict): returns response headers.
media_url (str): The URL containing the media to be downloaded.
checksum_type Optional(str): The checksum type to read from the headers,
exactly as it will appear in the headers (case-sensitive). Must be
"md5", "crc32c" or None.
Returns:
Tuple (Optional[str], object): The expected checksum of the response,
if it can be detected from the ``X-Goog-Hash`` header, and the
appropriate checksum object for the expected checksum.
"""
if checksum_type not in ["md5", "crc32c", None]:
raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``")
elif checksum_type in ["md5", "crc32c"]:
headers = get_headers(response)
expected_checksum = _parse_checksum_header(
headers.get(_HASH_HEADER), response, checksum_label=checksum_type
)
if expected_checksum is None:
msg = _MISSING_CHECKSUM.format(
media_url, checksum_type=checksum_type.upper()
)
_LOGGER.info(msg)
checksum_object = _DoNothingHash()
else:
if checksum_type == "md5":
checksum_object = hashlib.md5()
else:
checksum_object = _get_crc32c_object()
else:
expected_checksum = None
checksum_object = _DoNothingHash()
return (expected_checksum, checksum_object)
def _get_uploaded_checksum_from_headers(response, get_headers, checksum_type):
"""Get the computed checksum and checksum object from the response headers.
Args:
response (~requests.Response): The HTTP response object.
get_headers (callable: response->dict): returns response headers.
checksum_type Optional(str): The checksum type to read from the headers,
exactly as it will appear in the headers (case-sensitive). Must be
"md5", "crc32c" or None.
Returns:
Tuple (Optional[str], object): The checksum of the response,
if it can be detected from the ``X-Goog-Hash`` header, and the
appropriate checksum object for the expected checksum.
"""
if checksum_type not in ["md5", "crc32c", None]:
raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``")
elif checksum_type in ["md5", "crc32c"]:
headers = get_headers(response)
remote_checksum = _parse_checksum_header(
headers.get(_HASH_HEADER), response, checksum_label=checksum_type
)
else:
remote_checksum = None
return remote_checksum
def _parse_checksum_header(header_value, response, checksum_label):
"""Parses the checksum header from an ``X-Goog-Hash`` value.
.. _header reference: https://cloud.google.com/storage/docs/\
xml-api/reference-headers#xgooghash
Expects ``header_value`` (if not :data:`None`) to be in one of the three
following formats:
* ``crc32c=n03x6A==``
* ``md5=Ojk9c3dhfxgoKVVHYwFbHQ==``
* ``crc32c=n03x6A==,md5=Ojk9c3dhfxgoKVVHYwFbHQ==``
See the `header reference`_ for more information.
Args:
header_value (Optional[str]): The ``X-Goog-Hash`` header from
a download response.
response (~requests.Response): The HTTP response object.
checksum_label (str): The label of the header value to read, as in the
examples above. Typically "md5" or "crc32c"
Returns:
Optional[str]: The expected checksum of the response, if it
can be detected from the ``X-Goog-Hash`` header; otherwise, None.
Raises:
~google.resumable_media.common.InvalidResponse: If there are
multiple checksums of the requested type in ``header_value``.
"""
if header_value is None:
return None
matches = []
for checksum in header_value.split(","):
name, value = checksum.split("=", 1)
# Official docs say "," is the separator, but real-world responses have encountered ", "
if name.lstrip() == checksum_label:
matches.append(value)
if len(matches) == 0:
return None
elif len(matches) == 1:
return matches[0]
else:
raise common.InvalidResponse(
response,
"X-Goog-Hash header had multiple ``{}`` values.".format(checksum_label),
header_value,
matches,
)
def _get_checksum_object(checksum_type):
"""Respond with a checksum object for a supported type, if not None.
Raises ValueError if checksum_type is unsupported.
"""
if checksum_type == "md5":
return hashlib.md5()
elif checksum_type == "crc32c":
return _get_crc32c_object()
elif checksum_type is None:
return None
else:
raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``")
def _parse_generation_header(response, get_headers):
"""Parses the generation header from an ``X-Goog-Generation`` value.
Args:
response (~requests.Response): The HTTP response object.
get_headers (callable: response->dict): returns response headers.
Returns:
Optional[long]: The object generation from the response, if it
can be detected from the ``X-Goog-Generation`` header; otherwise, None.
"""
headers = get_headers(response)
object_generation = headers.get(_GENERATION_HEADER, None)
if object_generation is None:
return None
else:
return int(object_generation)
def _get_generation_from_url(media_url):
"""Retrieve the object generation query param specified in the media url.
Args:
media_url (str): The URL containing the media to be downloaded.
Returns:
long: The object generation from the media url if exists; otherwise, None.
"""
_, _, _, query, _ = urlsplit(media_url)
query_params = parse_qs(query)
object_generation = query_params.get("generation", None)
if object_generation is None:
return None
else:
return int(object_generation[0])
def add_query_parameters(media_url, query_params):
"""Add query parameters to a base url.
Args:
media_url (str): The URL containing the media to be downloaded.
query_params (dict): Names and values of the query parameters to add.
Returns:
str: URL with additional query strings appended.
"""
if len(query_params) == 0:
return media_url
scheme, netloc, path, query, frag = urlsplit(media_url)
params = parse_qs(query)
new_params = {**params, **query_params}
query = urlencode(new_params, doseq=True)
return urlunsplit((scheme, netloc, path, query, frag))
def _is_decompressive_transcoding(response, get_headers):
"""Returns True if the object was served decompressed. This happens when the
"x-goog-stored-content-encoding" header is "gzip" and "content-encoding" header
is not "gzip". See more at: https://cloud.google.com/storage/docs/transcoding#transcoding_and_gzip
Args:
response (~requests.Response): The HTTP response object.
get_headers (callable: response->dict): returns response headers.
Returns:
bool: Returns True if decompressive transcoding has occurred; otherwise, False.
"""
headers = get_headers(response)
return (
headers.get(_STORED_CONTENT_ENCODING_HEADER) == "gzip"
and headers.get(CONTENT_ENCODING_HEADER) != "gzip"
)
class _DoNothingHash(object):
"""Do-nothing hash object.
Intended as a stand-in for ``hashlib.md5`` or a crc32c checksum
implementation in cases where it isn't necessary to compute the hash.
"""
def update(self, unused_chunk):
"""Do-nothing ``update`` method.
Intended to match the interface of ``hashlib.md5`` and other checksums.
Args:
unused_chunk (bytes): A chunk of data.
"""

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,179 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Common utilities for Google Media Downloads and Resumable Uploads.
Includes custom exception types, useful constants and shared helpers.
"""
import http.client
_SLEEP_RETRY_ERROR_MSG = (
"At most one of `max_cumulative_retry` and `max_retries` " "can be specified."
)
UPLOAD_CHUNK_SIZE = 262144 # 256 * 1024
"""int: Chunks in a resumable upload must come in multiples of 256 KB."""
PERMANENT_REDIRECT = http.client.PERMANENT_REDIRECT # type: ignore
"""int: Permanent redirect status code.
.. note::
This is a backward-compatibility alias.
It is used by Google services to indicate some (but not all) of
a resumable upload has been completed.
For more information, see `RFC 7238`_.
.. _RFC 7238: https://tools.ietf.org/html/rfc7238
"""
TOO_MANY_REQUESTS = http.client.TOO_MANY_REQUESTS
"""int: Status code indicating rate-limiting.
.. note::
This is a backward-compatibility alias.
For more information, see `RFC 6585`_.
.. _RFC 6585: https://tools.ietf.org/html/rfc6585#section-4
"""
MAX_SLEEP = 64.0
"""float: Maximum amount of time allowed between requests.
Used during the retry process for sleep after a failed request.
Chosen since it is the power of two nearest to one minute.
"""
MAX_CUMULATIVE_RETRY = 600.0
"""float: Maximum total sleep time allowed during retry process.
This is provided (10 minutes) as a default. When the cumulative sleep
exceeds this limit, no more retries will occur.
"""
RETRYABLE = (
http.client.TOO_MANY_REQUESTS, # 429
http.client.REQUEST_TIMEOUT, # 408
http.client.INTERNAL_SERVER_ERROR, # 500
http.client.BAD_GATEWAY, # 502
http.client.SERVICE_UNAVAILABLE, # 503
http.client.GATEWAY_TIMEOUT, # 504
)
"""iterable: HTTP status codes that indicate a retryable error.
Connection errors are also retried, but are not listed as they are
exceptions, not status codes.
"""
class InvalidResponse(Exception):
"""Error class for responses which are not in the correct state.
Args:
response (object): The HTTP response which caused the failure.
args (tuple): The positional arguments typically passed to an
exception class.
"""
def __init__(self, response, *args):
super(InvalidResponse, self).__init__(*args)
self.response = response
"""object: The HTTP response object that caused the failure."""
class DataCorruption(Exception):
"""Error class for corrupt media transfers.
Args:
response (object): The HTTP response which caused the failure.
args (tuple): The positional arguments typically passed to an
exception class.
"""
def __init__(self, response, *args):
super(DataCorruption, self).__init__(*args)
self.response = response
"""object: The HTTP response object that caused the failure."""
class RetryStrategy(object):
"""Configuration class for retrying failed requests.
At most one of ``max_cumulative_retry`` and ``max_retries`` can be
specified (they are both caps on the total number of retries). If
neither are specified, then ``max_cumulative_retry`` is set as
:data:`MAX_CUMULATIVE_RETRY`.
Args:
max_sleep (Optional[float]): The maximum amount of time to sleep after
a failed request. Default is :attr:`MAX_SLEEP`.
max_cumulative_retry (Optional[float]): The maximum **total** amount of
time to sleep during retry process.
max_retries (Optional[int]): The number of retries to attempt.
initial_delay (Optional[float]): The initial delay. Default 1.0 second.
muiltiplier (Optional[float]): Exponent of the backoff. Default is 2.0.
Attributes:
max_sleep (float): Maximum amount of time allowed between requests.
max_cumulative_retry (Optional[float]): Maximum total sleep time
allowed during retry process.
max_retries (Optional[int]): The number retries to attempt.
initial_delay (Optional[float]): The initial delay. Default 1.0 second.
muiltiplier (Optional[float]): Exponent of the backoff. Default is 2.0.
Raises:
ValueError: If both of ``max_cumulative_retry`` and ``max_retries``
are passed.
"""
def __init__(
self,
max_sleep=MAX_SLEEP,
max_cumulative_retry=None,
max_retries=None,
initial_delay=1.0,
multiplier=2.0,
):
if max_cumulative_retry is not None and max_retries is not None:
raise ValueError(_SLEEP_RETRY_ERROR_MSG)
if max_cumulative_retry is None and max_retries is None:
max_cumulative_retry = MAX_CUMULATIVE_RETRY
self.max_sleep = max_sleep
self.max_cumulative_retry = max_cumulative_retry
self.max_retries = max_retries
self.initial_delay = initial_delay
self.multiplier = multiplier
def retry_allowed(self, total_sleep, num_retries):
"""Check if another retry is allowed.
Args:
total_sleep (float): With another retry, the amount of sleep that
will be accumulated by the caller.
num_retries (int): With another retry, the number of retries that
will be attempted by the caller.
Returns:
bool: Indicating if another retry is allowed (depending on either
the cumulative sleep allowed or the maximum number of retries
allowed.
"""
if self.max_cumulative_retry is None:
return num_retries <= self.max_retries
else:
return total_sleep <= self.max_cumulative_retry

View File

@@ -0,0 +1,2 @@
# Marker file for PEP 561.
# The google-resumable_media package uses inline types.

View File

@@ -0,0 +1,685 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""``requests`` utilities for Google Media Downloads and Resumable Uploads.
This sub-package assumes callers will use the `requests`_ library
as transport and `google-auth`_ for sending authenticated HTTP traffic
with ``requests``.
.. _requests: http://docs.python-requests.org/
.. _google-auth: https://google-auth.readthedocs.io/
====================
Authorized Transport
====================
To use ``google-auth`` and ``requests`` to create an authorized transport
that has read-only access to Google Cloud Storage (GCS):
.. testsetup:: get-credentials
import google.auth
import google.auth.credentials as creds_mod
import mock
def mock_default(scopes=None):
credentials = mock.Mock(spec=creds_mod.Credentials)
return credentials, 'mock-project'
# Patch the ``default`` function on the module.
original_default = google.auth.default
google.auth.default = mock_default
.. doctest:: get-credentials
>>> import google.auth
>>> import google.auth.transport.requests as tr_requests
>>>
>>> ro_scope = 'https://www.googleapis.com/auth/devstorage.read_only'
>>> credentials, _ = google.auth.default(scopes=(ro_scope,))
>>> transport = tr_requests.AuthorizedSession(credentials)
>>> transport
<google.auth.transport.requests.AuthorizedSession object at 0x...>
.. testcleanup:: get-credentials
# Put back the correct ``default`` function on the module.
google.auth.default = original_default
================
Simple Downloads
================
To download an object from Google Cloud Storage, construct the media URL
for the GCS object and download it with an authorized transport that has
access to the resource:
.. testsetup:: basic-download
import mock
import requests
import http.client
bucket = 'bucket-foo'
blob_name = 'file.txt'
fake_response = requests.Response()
fake_response.status_code = int(http.client.OK)
fake_response.headers['Content-Length'] = '1364156'
fake_content = mock.MagicMock(spec=['__len__'])
fake_content.__len__.return_value = 1364156
fake_response._content = fake_content
get_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=get_method, spec=['request'])
.. doctest:: basic-download
>>> from google.resumable_media.requests import Download
>>>
>>> url_template = (
... 'https://www.googleapis.com/download/storage/v1/b/'
... '{bucket}/o/{blob_name}?alt=media')
>>> media_url = url_template.format(
... bucket=bucket, blob_name=blob_name)
>>>
>>> download = Download(media_url)
>>> response = download.consume(transport)
>>> download.finished
True
>>> response
<Response [200]>
>>> response.headers['Content-Length']
'1364156'
>>> len(response.content)
1364156
To download only a portion of the bytes in the object,
specify ``start`` and ``end`` byte positions (both optional):
.. testsetup:: basic-download-with-slice
import mock
import requests
import http.client
from google.resumable_media.requests import Download
media_url = 'http://test.invalid'
start = 4096
end = 8191
slice_size = end - start + 1
fake_response = requests.Response()
fake_response.status_code = int(http.client.PARTIAL_CONTENT)
fake_response.headers['Content-Length'] = '{:d}'.format(slice_size)
content_range = 'bytes {:d}-{:d}/1364156'.format(start, end)
fake_response.headers['Content-Range'] = content_range
fake_content = mock.MagicMock(spec=['__len__'])
fake_content.__len__.return_value = slice_size
fake_response._content = fake_content
get_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=get_method, spec=['request'])
.. doctest:: basic-download-with-slice
>>> download = Download(media_url, start=4096, end=8191)
>>> response = download.consume(transport)
>>> download.finished
True
>>> response
<Response [206]>
>>> response.headers['Content-Length']
'4096'
>>> response.headers['Content-Range']
'bytes 4096-8191/1364156'
>>> len(response.content)
4096
=================
Chunked Downloads
=================
For very large objects or objects of unknown size, it may make more sense
to download the object in chunks rather than all at once. This can be done
to avoid dropped connections with a poor internet connection or can allow
multiple chunks to be downloaded in parallel to speed up the total
download.
A :class:`.ChunkedDownload` uses the same media URL and authorized
transport that a basic :class:`.Download` would use, but also
requires a chunk size and a write-able byte ``stream``. The chunk size is used
to determine how much of the resouce to consume with each request and the
stream is to allow the resource to be written out (e.g. to disk) without
having to fit in memory all at once.
.. testsetup:: chunked-download
import io
import mock
import requests
import http.client
media_url = 'http://test.invalid'
fifty_mb = 50 * 1024 * 1024
one_gb = 1024 * 1024 * 1024
fake_response = requests.Response()
fake_response.status_code = int(http.client.PARTIAL_CONTENT)
fake_response.headers['Content-Length'] = '{:d}'.format(fifty_mb)
content_range = 'bytes 0-{:d}/{:d}'.format(fifty_mb - 1, one_gb)
fake_response.headers['Content-Range'] = content_range
fake_content_begin = b'The beginning of the chunk...'
fake_content = fake_content_begin + b'1' * (fifty_mb - 29)
fake_response._content = fake_content
get_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=get_method, spec=['request'])
.. doctest:: chunked-download
>>> from google.resumable_media.requests import ChunkedDownload
>>>
>>> chunk_size = 50 * 1024 * 1024 # 50MB
>>> stream = io.BytesIO()
>>> download = ChunkedDownload(
... media_url, chunk_size, stream)
>>> # Check the state of the download before starting.
>>> download.bytes_downloaded
0
>>> download.total_bytes is None
True
>>> response = download.consume_next_chunk(transport)
>>> # Check the state of the download after consuming one chunk.
>>> download.finished
False
>>> download.bytes_downloaded # chunk_size
52428800
>>> download.total_bytes # 1GB
1073741824
>>> response
<Response [206]>
>>> response.headers['Content-Length']
'52428800'
>>> response.headers['Content-Range']
'bytes 0-52428799/1073741824'
>>> len(response.content) == chunk_size
True
>>> stream.seek(0)
0
>>> stream.read(29)
b'The beginning of the chunk...'
The download will change it's ``finished`` status to :data:`True`
once the final chunk is consumed. In some cases, the final chunk may
not be the same size as the other chunks:
.. testsetup:: chunked-download-end
import mock
import requests
import http.client
from google.resumable_media.requests import ChunkedDownload
media_url = 'http://test.invalid'
fifty_mb = 50 * 1024 * 1024
one_gb = 1024 * 1024 * 1024
stream = mock.Mock(spec=['write'])
download = ChunkedDownload(media_url, fifty_mb, stream)
download._bytes_downloaded = 20 * fifty_mb
download._total_bytes = one_gb
fake_response = requests.Response()
fake_response.status_code = int(http.client.PARTIAL_CONTENT)
slice_size = one_gb - 20 * fifty_mb
fake_response.headers['Content-Length'] = '{:d}'.format(slice_size)
content_range = 'bytes {:d}-{:d}/{:d}'.format(
20 * fifty_mb, one_gb - 1, one_gb)
fake_response.headers['Content-Range'] = content_range
fake_content = mock.MagicMock(spec=['__len__'])
fake_content.__len__.return_value = slice_size
fake_response._content = fake_content
get_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=get_method, spec=['request'])
.. doctest:: chunked-download-end
>>> # The state of the download in progress.
>>> download.finished
False
>>> download.bytes_downloaded # 20 chunks at 50MB
1048576000
>>> download.total_bytes # 1GB
1073741824
>>> response = download.consume_next_chunk(transport)
>>> # The state of the download after consuming the final chunk.
>>> download.finished
True
>>> download.bytes_downloaded == download.total_bytes
True
>>> response
<Response [206]>
>>> response.headers['Content-Length']
'25165824'
>>> response.headers['Content-Range']
'bytes 1048576000-1073741823/1073741824'
>>> len(response.content) < download.chunk_size
True
In addition, a :class:`.ChunkedDownload` can also take optional
``start`` and ``end`` byte positions.
Usually, no checksum is returned with a chunked download. Even if one is returned,
it is not validated. If you need to validate the checksum, you can do so
by buffering the chunks and validating the checksum against the completed download.
==============
Simple Uploads
==============
Among the three supported upload classes, the simplest is
:class:`.SimpleUpload`. A simple upload should be used when the resource
being uploaded is small and when there is no metadata (other than the name)
associated with the resource.
.. testsetup:: simple-upload
import json
import mock
import requests
import http.client
bucket = 'some-bucket'
blob_name = 'file.txt'
fake_response = requests.Response()
fake_response.status_code = int(http.client.OK)
payload = {
'bucket': bucket,
'contentType': 'text/plain',
'md5Hash': 'M0XLEsX9/sMdiI+4pB4CAQ==',
'name': blob_name,
'size': '27',
}
fake_response._content = json.dumps(payload).encode('utf-8')
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
.. doctest:: simple-upload
:options: +NORMALIZE_WHITESPACE
>>> from google.resumable_media.requests import SimpleUpload
>>>
>>> url_template = (
... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
... 'uploadType=media&'
... 'name={blob_name}')
>>> upload_url = url_template.format(
... bucket=bucket, blob_name=blob_name)
>>>
>>> upload = SimpleUpload(upload_url)
>>> data = b'Some not too large content.'
>>> content_type = 'text/plain'
>>> response = upload.transmit(transport, data, content_type)
>>> upload.finished
True
>>> response
<Response [200]>
>>> json_response = response.json()
>>> json_response['bucket'] == bucket
True
>>> json_response['name'] == blob_name
True
>>> json_response['contentType'] == content_type
True
>>> json_response['md5Hash']
'M0XLEsX9/sMdiI+4pB4CAQ=='
>>> int(json_response['size']) == len(data)
True
In the rare case that an upload fails, an :exc:`.InvalidResponse`
will be raised:
.. testsetup:: simple-upload-fail
import time
import mock
import requests
import http.client
from google import resumable_media
from google.resumable_media import _helpers
from google.resumable_media.requests import SimpleUpload as constructor
upload_url = 'http://test.invalid'
data = b'Some not too large content.'
content_type = 'text/plain'
fake_response = requests.Response()
fake_response.status_code = int(http.client.SERVICE_UNAVAILABLE)
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
time_sleep = time.sleep
def dont_sleep(seconds):
raise RuntimeError('No sleep', seconds)
def SimpleUpload(*args, **kwargs):
upload = constructor(*args, **kwargs)
# Mock the cumulative sleep to avoid retries (and `time.sleep()`).
upload._retry_strategy = resumable_media.RetryStrategy(
max_cumulative_retry=-1.0)
return upload
time.sleep = dont_sleep
.. doctest:: simple-upload-fail
:options: +NORMALIZE_WHITESPACE
>>> upload = SimpleUpload(upload_url)
>>> error = None
>>> try:
... upload.transmit(transport, data, content_type)
... except resumable_media.InvalidResponse as caught_exc:
... error = caught_exc
...
>>> error
InvalidResponse('Request failed with status code', 503,
'Expected one of', <HTTPStatus.OK: 200>)
>>> error.response
<Response [503]>
>>>
>>> upload.finished
True
.. testcleanup:: simple-upload-fail
# Put back the correct ``sleep`` function on the ``time`` module.
time.sleep = time_sleep
Even in the case of failure, we see that the upload is
:attr:`~.SimpleUpload.finished`, i.e. it cannot be re-used.
=================
Multipart Uploads
=================
After the simple upload, the :class:`.MultipartUpload` can be used to
achieve essentially the same task. However, a multipart upload allows some
metadata about the resource to be sent along as well. (This is the "multi":
we send a first part with the metadata and a second part with the actual
bytes in the resource.)
Usage is similar to the simple upload, but :meth:`~.MultipartUpload.transmit`
accepts an extra required argument: ``metadata``.
.. testsetup:: multipart-upload
import json
import mock
import requests
import http.client
bucket = 'some-bucket'
blob_name = 'file.txt'
data = b'Some not too large content.'
content_type = 'text/plain'
fake_response = requests.Response()
fake_response.status_code = int(http.client.OK)
payload = {
'bucket': bucket,
'name': blob_name,
'metadata': {'color': 'grurple'},
}
fake_response._content = json.dumps(payload).encode('utf-8')
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
.. doctest:: multipart-upload
>>> from google.resumable_media.requests import MultipartUpload
>>>
>>> url_template = (
... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
... 'uploadType=multipart')
>>> upload_url = url_template.format(bucket=bucket)
>>>
>>> upload = MultipartUpload(upload_url)
>>> metadata = {
... 'name': blob_name,
... 'metadata': {
... 'color': 'grurple',
... },
... }
>>> response = upload.transmit(transport, data, metadata, content_type)
>>> upload.finished
True
>>> response
<Response [200]>
>>> json_response = response.json()
>>> json_response['bucket'] == bucket
True
>>> json_response['name'] == blob_name
True
>>> json_response['metadata'] == metadata['metadata']
True
As with the simple upload, in the case of failure an :exc:`.InvalidResponse`
is raised, enclosing the :attr:`~.InvalidResponse.response` that caused
the failure and the ``upload`` object cannot be re-used after a failure.
=================
Resumable Uploads
=================
A :class:`.ResumableUpload` deviates from the other two upload classes:
it transmits a resource over the course of multiple requests. This
is intended to be used in cases where:
* the size of the resource is not known (i.e. it is generated on the fly)
* requests must be short-lived
* the client has request **size** limitations
* the resource is too large to fit into memory
In general, a resource should be sent in a **single** request to avoid
latency and reduce QPS. See `GCS best practices`_ for more things to
consider when using a resumable upload.
.. _GCS best practices: https://cloud.google.com/storage/docs/\
best-practices#uploading
After creating a :class:`.ResumableUpload` instance, a
**resumable upload session** must be initiated to let the server know that
a series of chunked upload requests will be coming and to obtain an
``upload_id`` for the session. In contrast to the other two upload classes,
:meth:`~.ResumableUpload.initiate` takes a byte ``stream`` as input rather
than raw bytes as ``data``. This can be a file object, a :class:`~io.BytesIO`
object or any other stream implementing the same interface.
.. testsetup:: resumable-initiate
import io
import mock
import requests
import http.client
bucket = 'some-bucket'
blob_name = 'file.txt'
data = b'Some resumable bytes.'
content_type = 'text/plain'
fake_response = requests.Response()
fake_response.status_code = int(http.client.OK)
fake_response._content = b''
upload_id = 'ABCdef189XY_super_serious'
resumable_url_template = (
'https://www.googleapis.com/upload/storage/v1/b/{bucket}'
'/o?uploadType=resumable&upload_id={upload_id}')
resumable_url = resumable_url_template.format(
bucket=bucket, upload_id=upload_id)
fake_response.headers['location'] = resumable_url
fake_response.headers['x-guploader-uploadid'] = upload_id
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
.. doctest:: resumable-initiate
>>> from google.resumable_media.requests import ResumableUpload
>>>
>>> url_template = (
... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
... 'uploadType=resumable')
>>> upload_url = url_template.format(bucket=bucket)
>>>
>>> chunk_size = 1024 * 1024 # 1MB
>>> upload = ResumableUpload(upload_url, chunk_size)
>>> stream = io.BytesIO(data)
>>> # The upload doesn't know how "big" it is until seeing a stream.
>>> upload.total_bytes is None
True
>>> metadata = {'name': blob_name}
>>> response = upload.initiate(transport, stream, metadata, content_type)
>>> response
<Response [200]>
>>> upload.resumable_url == response.headers['Location']
True
>>> upload.total_bytes == len(data)
True
>>> upload_id = response.headers['X-GUploader-UploadID']
>>> upload_id
'ABCdef189XY_super_serious'
>>> upload.resumable_url == upload_url + '&upload_id=' + upload_id
True
Once a :class:`.ResumableUpload` has been initiated, the resource is
transmitted in chunks until completion:
.. testsetup:: resumable-transmit
import io
import json
import mock
import requests
import http.client
from google import resumable_media
import google.resumable_media.requests.upload as upload_mod
data = b'01234567891'
stream = io.BytesIO(data)
# Create an "already initiated" upload.
upload_url = 'http://test.invalid'
chunk_size = 256 * 1024 # 256KB
upload = upload_mod.ResumableUpload(upload_url, chunk_size)
upload._resumable_url = 'http://test.invalid?upload_id=mocked'
upload._stream = stream
upload._content_type = 'text/plain'
upload._total_bytes = len(data)
# After-the-fact update the chunk size so that len(data)
# is split into three.
upload._chunk_size = 4
# Make three fake responses.
fake_response0 = requests.Response()
fake_response0.status_code = http.client.PERMANENT_REDIRECT
fake_response0.headers['range'] = 'bytes=0-3'
fake_response1 = requests.Response()
fake_response1.status_code = http.client.PERMANENT_REDIRECT
fake_response1.headers['range'] = 'bytes=0-7'
fake_response2 = requests.Response()
fake_response2.status_code = int(http.client.OK)
bucket = 'some-bucket'
blob_name = 'file.txt'
payload = {
'bucket': bucket,
'name': blob_name,
'size': '{:d}'.format(len(data)),
}
fake_response2._content = json.dumps(payload).encode('utf-8')
# Use the fake responses to mock a transport.
responses = [fake_response0, fake_response1, fake_response2]
put_method = mock.Mock(side_effect=responses, spec=[])
transport = mock.Mock(request=put_method, spec=['request'])
.. doctest:: resumable-transmit
>>> response0 = upload.transmit_next_chunk(transport)
>>> response0
<Response [308]>
>>> upload.finished
False
>>> upload.bytes_uploaded == upload.chunk_size
True
>>>
>>> response1 = upload.transmit_next_chunk(transport)
>>> response1
<Response [308]>
>>> upload.finished
False
>>> upload.bytes_uploaded == 2 * upload.chunk_size
True
>>>
>>> response2 = upload.transmit_next_chunk(transport)
>>> response2
<Response [200]>
>>> upload.finished
True
>>> upload.bytes_uploaded == upload.total_bytes
True
>>> json_response = response2.json()
>>> json_response['bucket'] == bucket
True
>>> json_response['name'] == blob_name
True
"""
from google.resumable_media.requests.download import ChunkedDownload
from google.resumable_media.requests.download import Download
from google.resumable_media.requests.upload import MultipartUpload
from google.resumable_media.requests.download import RawChunkedDownload
from google.resumable_media.requests.download import RawDownload
from google.resumable_media.requests.upload import ResumableUpload
from google.resumable_media.requests.upload import SimpleUpload
from google.resumable_media.requests.upload import XMLMPUContainer
from google.resumable_media.requests.upload import XMLMPUPart
__all__ = [
"ChunkedDownload",
"Download",
"MultipartUpload",
"RawChunkedDownload",
"RawDownload",
"ResumableUpload",
"SimpleUpload",
"XMLMPUContainer",
"XMLMPUPart",
]

View File

@@ -0,0 +1,180 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Shared utilities used by both downloads and uploads.
This utilities are explicitly catered to ``requests``-like transports.
"""
import http.client
import requests.exceptions
import urllib3.exceptions # type: ignore
import time
from google.resumable_media import common
from google.resumable_media import _helpers
_DEFAULT_RETRY_STRATEGY = common.RetryStrategy()
_SINGLE_GET_CHUNK_SIZE = 8192
# The number of seconds to wait to establish a connection
# (connect() call on socket). Avoid setting this to a multiple of 3 to not
# Align with TCP Retransmission timing. (typically 2.5-3s)
_DEFAULT_CONNECT_TIMEOUT = 61
# The number of seconds to wait between bytes sent from the server.
_DEFAULT_READ_TIMEOUT = 60
_CONNECTION_ERROR_CLASSES = (
http.client.BadStatusLine,
http.client.IncompleteRead,
http.client.ResponseNotReady,
requests.exceptions.ConnectionError,
requests.exceptions.ChunkedEncodingError,
requests.exceptions.Timeout,
urllib3.exceptions.PoolError,
urllib3.exceptions.ProtocolError,
urllib3.exceptions.SSLError,
urllib3.exceptions.TimeoutError,
ConnectionError, # Python 3.x only, superclass of ConnectionResetError.
)
class RequestsMixin(object):
"""Mix-in class implementing ``requests``-specific behavior.
These are methods that are more general purpose, with implementations
specific to the types defined in ``requests``.
"""
@staticmethod
def _get_status_code(response):
"""Access the status code from an HTTP response.
Args:
response (~requests.Response): The HTTP response object.
Returns:
int: The status code.
"""
return response.status_code
@staticmethod
def _get_headers(response):
"""Access the headers from an HTTP response.
Args:
response (~requests.Response): The HTTP response object.
Returns:
~requests.structures.CaseInsensitiveDict: The header mapping (keys
are case-insensitive).
"""
return response.headers
@staticmethod
def _get_body(response):
"""Access the response body from an HTTP response.
Args:
response (~requests.Response): The HTTP response object.
Returns:
bytes: The body of the ``response``.
"""
return response.content
class RawRequestsMixin(RequestsMixin):
@staticmethod
def _get_body(response):
"""Access the response body from an HTTP response.
Args:
response (~requests.Response): The HTTP response object.
Returns:
bytes: The body of the ``response``.
"""
if response._content is False:
response._content = b"".join(
response.raw.stream(_SINGLE_GET_CHUNK_SIZE, decode_content=False)
)
response._content_consumed = True
return response._content
def wait_and_retry(func, get_status_code, retry_strategy):
"""Attempts to retry a call to ``func`` until success.
Expects ``func`` to return an HTTP response and uses ``get_status_code``
to check if the response is retry-able.
``func`` is expected to raise a failure status code as a
common.InvalidResponse, at which point this method will check the code
against the common.RETRIABLE list of retriable status codes.
Will retry until :meth:`~.RetryStrategy.retry_allowed` (on the current
``retry_strategy``) returns :data:`False`. Uses
:func:`_helpers.calculate_retry_wait` to double the wait time (with jitter)
after each attempt.
Args:
func (Callable): A callable that takes no arguments and produces
an HTTP response which will be checked as retry-able.
get_status_code (Callable[Any, int]): Helper to get a status code
from a response.
retry_strategy (~google.resumable_media.common.RetryStrategy): The
strategy to use if the request fails and must be retried.
Returns:
object: The return value of ``func``.
"""
total_sleep = 0.0
num_retries = 0
# base_wait will be multiplied by the multiplier on the first retry.
base_wait = float(retry_strategy.initial_delay) / retry_strategy.multiplier
# Set the retriable_exception_type if possible. We expect requests to be
# present here and the transport to be using requests.exceptions errors,
# but due to loose coupling with the transport layer we can't guarantee it.
while True: # return on success or when retries exhausted.
error = None
try:
response = func()
except _CONNECTION_ERROR_CLASSES as e:
error = e # Fall through to retry, if there are retries left.
except common.InvalidResponse as e:
# An InvalidResponse is only retriable if its status code matches.
# The `process_response()` method on a Download or Upload method
# will convert the status code into an exception.
if get_status_code(e.response) in common.RETRYABLE:
error = e # Fall through to retry, if there are retries left.
else:
raise # If the status code is not retriable, raise w/o retry.
else:
return response
base_wait, wait_time = _helpers.calculate_retry_wait(
base_wait, retry_strategy.max_sleep, retry_strategy.multiplier
)
num_retries += 1
total_sleep += wait_time
# Check if (another) retry is allowed. If retries are exhausted and
# no acceptable response was received, raise the retriable error.
if not retry_strategy.retry_allowed(total_sleep, num_retries):
raise error
time.sleep(wait_time)

View File

@@ -0,0 +1,722 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Support for downloading media from Google APIs."""
import urllib3.response # type: ignore
import http
from google.resumable_media import _download
from google.resumable_media import common
from google.resumable_media import _helpers
from google.resumable_media.requests import _request_helpers
_CHECKSUM_MISMATCH = """\
Checksum mismatch while downloading:
{}
The X-Goog-Hash header indicated an {checksum_type} checksum of:
{}
but the actual {checksum_type} checksum of the downloaded contents was:
{}
"""
_STREAM_SEEK_ERROR = """\
Incomplete download for:
{}
Error writing to stream while handling a gzip-compressed file download.
Please restart the download.
"""
_RESPONSE_HEADERS_INFO = """\
The X-Goog-Stored-Content-Length is {}. The X-Goog-Stored-Content-Encoding is {}.
The download request read {} bytes of data.
If the download was incomplete, please check the network connection and restart the download.
"""
class Download(_request_helpers.RequestsMixin, _download.Download):
"""Helper to manage downloading a resource from a Google API.
"Slices" of the resource can be retrieved by specifying a range
with ``start`` and / or ``end``. However, in typical usage, neither
``start`` nor ``end`` is expected to be provided.
Args:
media_url (str): The URL containing the media to be downloaded.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
the downloaded resource can be written to.
start (int): The first byte in a range to be downloaded. If not
provided, but ``end`` is provided, will download from the
beginning to ``end`` of the media.
end (int): The last byte in a range to be downloaded. If not
provided, but ``start`` is provided, will download from the
``start`` to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. The response headers must contain
a checksum of the requested type. If the headers lack an
appropriate checksum (for instance in the case of transcoded or
ranged downloads where the remote service does not know the
correct checksum) an INFO-level log will be emitted. Supported
values are "md5", "crc32c" and None. The default is "md5".
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
"""
def _write_to_stream(self, response):
"""Write response body to a write-able stream.
.. note:
This method assumes that the ``_stream`` attribute is set on the
current download.
Args:
response (~requests.Response): The HTTP response object.
Raises:
~google.resumable_media.common.DataCorruption: If the download's
checksum doesn't agree with server-computed checksum.
"""
# Retrieve the expected checksum only once for the download request,
# then compute and validate the checksum when the full download completes.
# Retried requests are range requests, and there's no way to detect
# data corruption for that byte range alone.
if self._expected_checksum is None and self._checksum_object is None:
# `_get_expected_checksum()` may return None even if a checksum was
# requested, in which case it will emit an info log _MISSING_CHECKSUM.
# If an invalid checksum type is specified, this will raise ValueError.
expected_checksum, checksum_object = _helpers._get_expected_checksum(
response, self._get_headers, self.media_url, checksum_type=self.checksum
)
self._expected_checksum = expected_checksum
self._checksum_object = checksum_object
else:
expected_checksum = self._expected_checksum
checksum_object = self._checksum_object
with response:
# NOTE: In order to handle compressed streams gracefully, we try
# to insert our checksum object into the decompression stream. If
# the stream is indeed compressed, this will delegate the checksum
# object to the decoder and return a _DoNothingHash here.
local_checksum_object = _add_decoder(response.raw, checksum_object)
body_iter = response.iter_content(
chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False
)
for chunk in body_iter:
self._stream.write(chunk)
self._bytes_downloaded += len(chunk)
local_checksum_object.update(chunk)
# Don't validate the checksum for partial responses.
if (
expected_checksum is not None
and response.status_code != http.client.PARTIAL_CONTENT
):
actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest())
if actual_checksum != expected_checksum:
headers = self._get_headers(response)
x_goog_encoding = headers.get("x-goog-stored-content-encoding")
x_goog_length = headers.get("x-goog-stored-content-length")
content_length_msg = _RESPONSE_HEADERS_INFO.format(
x_goog_length, x_goog_encoding, self._bytes_downloaded
)
if (
x_goog_length
and self._bytes_downloaded < int(x_goog_length)
and x_goog_encoding != "gzip"
):
# The library will attempt to trigger a retry by raising a ConnectionError, if
# (a) bytes_downloaded is less than response header x-goog-stored-content-length, and
# (b) the object is not gzip-compressed when stored in Cloud Storage.
raise ConnectionError(content_length_msg)
else:
msg = _CHECKSUM_MISMATCH.format(
self.media_url,
expected_checksum,
actual_checksum,
checksum_type=self.checksum.upper(),
)
msg += content_length_msg
raise common.DataCorruption(response, msg)
def consume(
self,
transport,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Consume the resource to be downloaded.
If a ``stream`` is attached to this download, then the downloaded
resource will be written to the stream.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
Raises:
~google.resumable_media.common.DataCorruption: If the download's
checksum doesn't agree with server-computed checksum.
ValueError: If the current :class:`Download` has already
finished.
"""
method, _, payload, headers = self._prepare_request()
# NOTE: We assume "payload is None" but pass it along anyway.
request_kwargs = {
"data": payload,
"headers": headers,
"timeout": timeout,
}
if self._stream is not None:
request_kwargs["stream"] = True
# Assign object generation if generation is specified in the media url.
if self._object_generation is None:
self._object_generation = _helpers._get_generation_from_url(self.media_url)
# Wrap the request business logic in a function to be retried.
def retriable_request():
url = self.media_url
# To restart an interrupted download, read from the offset of last byte
# received using a range request, and set object generation query param.
if self._bytes_downloaded > 0:
_download.add_bytes_range(
(self.start or 0) + self._bytes_downloaded, self.end, self._headers
)
request_kwargs["headers"] = self._headers
# Set object generation query param to ensure the same object content is requested.
if (
self._object_generation is not None
and _helpers._get_generation_from_url(self.media_url) is None
):
query_param = {"generation": self._object_generation}
url = _helpers.add_query_parameters(self.media_url, query_param)
result = transport.request(method, url, **request_kwargs)
# If a generation hasn't been specified, and this is the first response we get, let's record the
# generation. In future requests we'll specify the generation query param to avoid data races.
if self._object_generation is None:
self._object_generation = _helpers._parse_generation_header(
result, self._get_headers
)
self._process_response(result)
# With decompressive transcoding, GCS serves back the whole file regardless of the range request,
# thus we reset the stream position to the start of the stream.
# See: https://cloud.google.com/storage/docs/transcoding#range
if self._stream is not None:
if _helpers._is_decompressive_transcoding(result, self._get_headers):
try:
self._stream.seek(0)
except Exception as exc:
msg = _STREAM_SEEK_ERROR.format(url)
raise Exception(msg) from exc
self._bytes_downloaded = 0
self._write_to_stream(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
class RawDownload(_request_helpers.RawRequestsMixin, _download.Download):
"""Helper to manage downloading a raw resource from a Google API.
"Slices" of the resource can be retrieved by specifying a range
with ``start`` and / or ``end``. However, in typical usage, neither
``start`` nor ``end`` is expected to be provided.
Args:
media_url (str): The URL containing the media to be downloaded.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
the downloaded resource can be written to.
start (int): The first byte in a range to be downloaded. If not
provided, but ``end`` is provided, will download from the
beginning to ``end`` of the media.
end (int): The last byte in a range to be downloaded. If not
provided, but ``start`` is provided, will download from the
``start`` to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. The response headers must contain
a checksum of the requested type. If the headers lack an
appropriate checksum (for instance in the case of transcoded or
ranged downloads where the remote service does not know the
correct checksum) an INFO-level log will be emitted. Supported
values are "md5", "crc32c" and None. The default is "md5".
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
"""
def _write_to_stream(self, response):
"""Write response body to a write-able stream.
.. note:
This method assumes that the ``_stream`` attribute is set on the
current download.
Args:
response (~requests.Response): The HTTP response object.
Raises:
~google.resumable_media.common.DataCorruption: If the download's
checksum doesn't agree with server-computed checksum.
"""
# Retrieve the expected checksum only once for the download request,
# then compute and validate the checksum when the full download completes.
# Retried requests are range requests, and there's no way to detect
# data corruption for that byte range alone.
if self._expected_checksum is None and self._checksum_object is None:
# `_get_expected_checksum()` may return None even if a checksum was
# requested, in which case it will emit an info log _MISSING_CHECKSUM.
# If an invalid checksum type is specified, this will raise ValueError.
expected_checksum, checksum_object = _helpers._get_expected_checksum(
response, self._get_headers, self.media_url, checksum_type=self.checksum
)
self._expected_checksum = expected_checksum
self._checksum_object = checksum_object
else:
expected_checksum = self._expected_checksum
checksum_object = self._checksum_object
with response:
body_iter = response.raw.stream(
_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False
)
for chunk in body_iter:
self._stream.write(chunk)
self._bytes_downloaded += len(chunk)
checksum_object.update(chunk)
response._content_consumed = True
# Don't validate the checksum for partial responses.
if (
expected_checksum is not None
and response.status_code != http.client.PARTIAL_CONTENT
):
actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest())
if actual_checksum != expected_checksum:
headers = self._get_headers(response)
x_goog_encoding = headers.get("x-goog-stored-content-encoding")
x_goog_length = headers.get("x-goog-stored-content-length")
content_length_msg = _RESPONSE_HEADERS_INFO.format(
x_goog_length, x_goog_encoding, self._bytes_downloaded
)
if (
x_goog_length
and self._bytes_downloaded < int(x_goog_length)
and x_goog_encoding != "gzip"
):
# The library will attempt to trigger a retry by raising a ConnectionError, if
# (a) bytes_downloaded is less than response header x-goog-stored-content-length, and
# (b) the object is not gzip-compressed when stored in Cloud Storage.
raise ConnectionError(content_length_msg)
else:
msg = _CHECKSUM_MISMATCH.format(
self.media_url,
expected_checksum,
actual_checksum,
checksum_type=self.checksum.upper(),
)
msg += content_length_msg
raise common.DataCorruption(response, msg)
def consume(
self,
transport,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Consume the resource to be downloaded.
If a ``stream`` is attached to this download, then the downloaded
resource will be written to the stream.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
Raises:
~google.resumable_media.common.DataCorruption: If the download's
checksum doesn't agree with server-computed checksum.
ValueError: If the current :class:`Download` has already
finished.
"""
method, _, payload, headers = self._prepare_request()
# NOTE: We assume "payload is None" but pass it along anyway.
request_kwargs = {
"data": payload,
"headers": headers,
"timeout": timeout,
"stream": True,
}
# Assign object generation if generation is specified in the media url.
if self._object_generation is None:
self._object_generation = _helpers._get_generation_from_url(self.media_url)
# Wrap the request business logic in a function to be retried.
def retriable_request():
url = self.media_url
# To restart an interrupted download, read from the offset of last byte
# received using a range request, and set object generation query param.
if self._bytes_downloaded > 0:
_download.add_bytes_range(
(self.start or 0) + self._bytes_downloaded, self.end, self._headers
)
request_kwargs["headers"] = self._headers
# Set object generation query param to ensure the same object content is requested.
if (
self._object_generation is not None
and _helpers._get_generation_from_url(self.media_url) is None
):
query_param = {"generation": self._object_generation}
url = _helpers.add_query_parameters(self.media_url, query_param)
result = transport.request(method, url, **request_kwargs)
# If a generation hasn't been specified, and this is the first response we get, let's record the
# generation. In future requests we'll specify the generation query param to avoid data races.
if self._object_generation is None:
self._object_generation = _helpers._parse_generation_header(
result, self._get_headers
)
self._process_response(result)
# With decompressive transcoding, GCS serves back the whole file regardless of the range request,
# thus we reset the stream position to the start of the stream.
# See: https://cloud.google.com/storage/docs/transcoding#range
if self._stream is not None:
if _helpers._is_decompressive_transcoding(result, self._get_headers):
try:
self._stream.seek(0)
except Exception as exc:
msg = _STREAM_SEEK_ERROR.format(url)
raise Exception(msg) from exc
self._bytes_downloaded = 0
self._write_to_stream(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
class ChunkedDownload(_request_helpers.RequestsMixin, _download.ChunkedDownload):
"""Download a resource in chunks from a Google API.
Args:
media_url (str): The URL containing the media to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each
request.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
will be used to concatenate chunks of the resource as they are
downloaded.
start (int): The first byte in a range to be downloaded. If not
provided, defaults to ``0``.
end (int): The last byte in a range to be downloaded. If not
provided, will download to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with each request, e.g. headers for data encryption
key headers.
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each request.
Raises:
ValueError: If ``start`` is negative.
"""
def consume_next_chunk(
self,
transport,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Consume the next chunk of the resource to be downloaded.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
Raises:
ValueError: If the current download has finished.
"""
method, url, payload, headers = self._prepare_request()
# Wrap the request business logic in a function to be retried.
def retriable_request():
# NOTE: We assume "payload is None" but pass it along anyway.
result = transport.request(
method,
url,
data=payload,
headers=headers,
timeout=timeout,
)
self._process_response(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
class RawChunkedDownload(_request_helpers.RawRequestsMixin, _download.ChunkedDownload):
"""Download a raw resource in chunks from a Google API.
Args:
media_url (str): The URL containing the media to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each
request.
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
will be used to concatenate chunks of the resource as they are
downloaded.
start (int): The first byte in a range to be downloaded. If not
provided, defaults to ``0``.
end (int): The last byte in a range to be downloaded. If not
provided, will download to the end of the media.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with each request, e.g. headers for data encryption
key headers.
Attributes:
media_url (str): The URL containing the media to be downloaded.
start (Optional[int]): The first byte in a range to be downloaded.
end (Optional[int]): The last byte in a range to be downloaded.
chunk_size (int): The number of bytes to be retrieved in each request.
Raises:
ValueError: If ``start`` is negative.
"""
def consume_next_chunk(
self,
transport,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Consume the next chunk of the resource to be downloaded.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
Raises:
ValueError: If the current download has finished.
"""
method, url, payload, headers = self._prepare_request()
# Wrap the request business logic in a function to be retried.
def retriable_request():
# NOTE: We assume "payload is None" but pass it along anyway.
result = transport.request(
method,
url,
data=payload,
headers=headers,
stream=True,
timeout=timeout,
)
self._process_response(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
def _add_decoder(response_raw, checksum):
"""Patch the ``_decoder`` on a ``urllib3`` response.
This is so that we can intercept the compressed bytes before they are
decoded.
Only patches if the content encoding is ``gzip`` or ``br``.
Args:
response_raw (urllib3.response.HTTPResponse): The raw response for
an HTTP request.
checksum (object):
A checksum which will be updated with compressed bytes.
Returns:
object: Either the original ``checksum`` if ``_decoder`` is not
patched, or a ``_DoNothingHash`` if the decoder is patched, since the
caller will no longer need to hash to decoded bytes.
"""
encoding = response_raw.headers.get("content-encoding", "").lower()
if encoding == "gzip":
response_raw._decoder = _GzipDecoder(checksum)
return _helpers._DoNothingHash()
# Only activate if brotli is installed
elif encoding == "br" and _BrotliDecoder: # type: ignore
response_raw._decoder = _BrotliDecoder(checksum)
return _helpers._DoNothingHash()
else:
return checksum
class _GzipDecoder(urllib3.response.GzipDecoder):
"""Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes.
Allows a checksum function to see the compressed bytes before they are
decoded. This way the checksum of the compressed value can be computed.
Args:
checksum (object):
A checksum which will be updated with compressed bytes.
"""
def __init__(self, checksum):
super().__init__()
self._checksum = checksum
def decompress(self, data):
"""Decompress the bytes.
Args:
data (bytes): The compressed bytes to be decompressed.
Returns:
bytes: The decompressed bytes from ``data``.
"""
self._checksum.update(data)
return super().decompress(data)
# urllib3.response.BrotliDecoder might not exist depending on whether brotli is
# installed.
if hasattr(urllib3.response, "BrotliDecoder"):
class _BrotliDecoder:
"""Handler for ``brotli`` encoded bytes.
Allows a checksum function to see the compressed bytes before they are
decoded. This way the checksum of the compressed value can be computed.
Because BrotliDecoder's decompress method is dynamically created in
urllib3, a subclass is not practical. Instead, this class creates a
captive urllib3.requests.BrotliDecoder instance and acts as a proxy.
Args:
checksum (object):
A checksum which will be updated with compressed bytes.
"""
def __init__(self, checksum):
self._decoder = urllib3.response.BrotliDecoder()
self._checksum = checksum
def decompress(self, data):
"""Decompress the bytes.
Args:
data (bytes): The compressed bytes to be decompressed.
Returns:
bytes: The decompressed bytes from ``data``.
"""
self._checksum.update(data)
return self._decoder.decompress(data)
def flush(self):
return self._decoder.flush()
else: # pragma: NO COVER
_BrotliDecoder = None # type: ignore # pragma: NO COVER

View File

@@ -0,0 +1,762 @@
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Support for resumable uploads.
Also supported here are simple (media) uploads and multipart
uploads that contain both metadata and a small file as payload.
"""
from google.resumable_media import _upload
from google.resumable_media.requests import _request_helpers
class SimpleUpload(_request_helpers.RequestsMixin, _upload.SimpleUpload):
"""Upload a resource to a Google API.
A **simple** media upload sends no metadata and completes the upload
in a single request.
Args:
upload_url (str): The URL where the content will be uploaded.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
Attributes:
upload_url (str): The URL where the content will be uploaded.
"""
def transmit(
self,
transport,
data,
content_type,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Transmit the resource to be uploaded.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
data (bytes): The resource content to be uploaded.
content_type (str): The content type of the resource, e.g. a JPEG
image has content type ``image/jpeg``.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_request(data, content_type)
# Wrap the request business logic in a function to be retried.
def retriable_request():
result = transport.request(
method, url, data=payload, headers=headers, timeout=timeout
)
self._process_response(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
class MultipartUpload(_request_helpers.RequestsMixin, _upload.MultipartUpload):
"""Upload a resource with metadata to a Google API.
A **multipart** upload sends both metadata and the resource in a single
(multipart) request.
Args:
upload_url (str): The URL where the content will be uploaded.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the request, e.g. headers for encrypted data.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. The request metadata will be amended
to include the computed value. Using this option will override a
manually-set checksum value. Supported values are "md5",
"crc32c" and None. The default is None.
Attributes:
upload_url (str): The URL where the content will be uploaded.
"""
def transmit(
self,
transport,
data,
metadata,
content_type,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Transmit the resource to be uploaded.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
data (bytes): The resource content to be uploaded.
metadata (Mapping[str, str]): The resource metadata, such as an
ACL list.
content_type (str): The content type of the resource, e.g. a JPEG
image has content type ``image/jpeg``.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_request(
data, metadata, content_type
)
# Wrap the request business logic in a function to be retried.
def retriable_request():
result = transport.request(
method, url, data=payload, headers=headers, timeout=timeout
)
self._process_response(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
class ResumableUpload(_request_helpers.RequestsMixin, _upload.ResumableUpload):
"""Initiate and fulfill a resumable upload to a Google API.
A **resumable** upload sends an initial request with the resource metadata
and then gets assigned an upload ID / upload URL to send bytes to.
Using the upload URL, the upload is then done in chunks (determined by
the user) until all bytes have been uploaded.
When constructing a resumable upload, only the resumable upload URL and
the chunk size are required:
.. testsetup:: resumable-constructor
bucket = 'bucket-foo'
.. doctest:: resumable-constructor
>>> from google.resumable_media.requests import ResumableUpload
>>>
>>> url_template = (
... 'https://www.googleapis.com/upload/storage/v1/b/{bucket}/o?'
... 'uploadType=resumable')
>>> upload_url = url_template.format(bucket=bucket)
>>>
>>> chunk_size = 3 * 1024 * 1024 # 3MB
>>> upload = ResumableUpload(upload_url, chunk_size)
When initiating an upload (via :meth:`initiate`), the caller is expected
to pass the resource being uploaded as a file-like ``stream``. If the size
of the resource is explicitly known, it can be passed in directly:
.. testsetup:: resumable-explicit-size
import os
import tempfile
import mock
import requests
import http.client
from google.resumable_media.requests import ResumableUpload
upload_url = 'http://test.invalid'
chunk_size = 3 * 1024 * 1024 # 3MB
upload = ResumableUpload(upload_url, chunk_size)
file_desc, filename = tempfile.mkstemp()
os.close(file_desc)
data = b'some bytes!'
with open(filename, 'wb') as file_obj:
file_obj.write(data)
fake_response = requests.Response()
fake_response.status_code = int(http.client.OK)
fake_response._content = b''
resumable_url = 'http://test.invalid?upload_id=7up'
fake_response.headers['location'] = resumable_url
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
.. doctest:: resumable-explicit-size
>>> import os
>>>
>>> upload.total_bytes is None
True
>>>
>>> stream = open(filename, 'rb')
>>> total_bytes = os.path.getsize(filename)
>>> metadata = {'name': filename}
>>> response = upload.initiate(
... transport, stream, metadata, 'text/plain',
... total_bytes=total_bytes)
>>> response
<Response [200]>
>>>
>>> upload.total_bytes == total_bytes
True
.. testcleanup:: resumable-explicit-size
os.remove(filename)
If the stream is in a "final" state (i.e. it won't have any more bytes
written to it), the total number of bytes can be determined implicitly
from the ``stream`` itself:
.. testsetup:: resumable-implicit-size
import io
import mock
import requests
import http.client
from google.resumable_media.requests import ResumableUpload
upload_url = 'http://test.invalid'
chunk_size = 3 * 1024 * 1024 # 3MB
upload = ResumableUpload(upload_url, chunk_size)
fake_response = requests.Response()
fake_response.status_code = int(http.client.OK)
fake_response._content = b''
resumable_url = 'http://test.invalid?upload_id=7up'
fake_response.headers['location'] = resumable_url
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
data = b'some MOAR bytes!'
metadata = {'name': 'some-file.jpg'}
content_type = 'image/jpeg'
.. doctest:: resumable-implicit-size
>>> stream = io.BytesIO(data)
>>> response = upload.initiate(
... transport, stream, metadata, content_type)
>>>
>>> upload.total_bytes == len(data)
True
If the size of the resource is **unknown** when the upload is initiated,
the ``stream_final`` argument can be used. This might occur if the
resource is being dynamically created on the client (e.g. application
logs). To use this argument:
.. testsetup:: resumable-unknown-size
import io
import mock
import requests
import http.client
from google.resumable_media.requests import ResumableUpload
upload_url = 'http://test.invalid'
chunk_size = 3 * 1024 * 1024 # 3MB
upload = ResumableUpload(upload_url, chunk_size)
fake_response = requests.Response()
fake_response.status_code = int(http.client.OK)
fake_response._content = b''
resumable_url = 'http://test.invalid?upload_id=7up'
fake_response.headers['location'] = resumable_url
post_method = mock.Mock(return_value=fake_response, spec=[])
transport = mock.Mock(request=post_method, spec=['request'])
metadata = {'name': 'some-file.jpg'}
content_type = 'application/octet-stream'
stream = io.BytesIO(b'data')
.. doctest:: resumable-unknown-size
>>> response = upload.initiate(
... transport, stream, metadata, content_type,
... stream_final=False)
>>>
>>> upload.total_bytes is None
True
Args:
upload_url (str): The URL where the resumable upload will be initiated.
chunk_size (int): The size of each chunk used to upload the resource.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the :meth:`initiate` request, e.g. headers for
encrypted data. These **will not** be sent with
:meth:`transmit_next_chunk` or :meth:`recover` requests.
checksum Optional([str]): The type of checksum to compute to verify
the integrity of the object. After the upload is complete, the
server-computed checksum of the resulting object will be checked
and google.resumable_media.common.DataCorruption will be raised on
a mismatch. The corrupted file will not be deleted from the remote
host automatically. Supported values are "md5", "crc32c" and None.
The default is None.
Attributes:
upload_url (str): The URL where the content will be uploaded.
Raises:
ValueError: If ``chunk_size`` is not a multiple of
:data:`.UPLOAD_CHUNK_SIZE`.
"""
def initiate(
self,
transport,
stream,
metadata,
content_type,
total_bytes=None,
stream_final=True,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Initiate a resumable upload.
By default, this method assumes your ``stream`` is in a "final"
state ready to transmit. However, ``stream_final=False`` can be used
to indicate that the size of the resource is not known. This can happen
if bytes are being dynamically fed into ``stream``, e.g. if the stream
is attached to application logs.
If ``stream_final=False`` is used, :attr:`chunk_size` bytes will be
read from the stream every time :meth:`transmit_next_chunk` is called.
If one of those reads produces strictly fewer bites than the chunk
size, the upload will be concluded.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
stream (IO[bytes]): The stream (i.e. file-like object) that will
be uploaded. The stream **must** be at the beginning (i.e.
``stream.tell() == 0``).
metadata (Mapping[str, str]): The resource metadata, such as an
ACL list.
content_type (str): The content type of the resource, e.g. a JPEG
image has content type ``image/jpeg``.
total_bytes (Optional[int]): The total number of bytes to be
uploaded. If specified, the upload size **will not** be
determined from the stream (even if ``stream_final=True``).
stream_final (Optional[bool]): Indicates if the ``stream`` is
"final" (i.e. no more bytes will be added to it). In this case
we determine the upload size from the size of the stream. If
``total_bytes`` is passed, this argument will be ignored.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_initiate_request(
stream,
metadata,
content_type,
total_bytes=total_bytes,
stream_final=stream_final,
)
# Wrap the request business logic in a function to be retried.
def retriable_request():
result = transport.request(
method, url, data=payload, headers=headers, timeout=timeout
)
self._process_initiate_response(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
def transmit_next_chunk(
self,
transport,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Transmit the next chunk of the resource to be uploaded.
If the current upload was initiated with ``stream_final=False``,
this method will dynamically determine if the upload has completed.
The upload will be considered complete if the stream produces
fewer than :attr:`chunk_size` bytes when a chunk is read from it.
In the case of failure, an exception is thrown that preserves the
failed response:
.. testsetup:: bad-response
import io
import mock
import requests
import http.client
from google import resumable_media
import google.resumable_media.requests.upload as upload_mod
transport = mock.Mock(spec=['request'])
fake_response = requests.Response()
fake_response.status_code = int(http.client.BAD_REQUEST)
transport.request.return_value = fake_response
upload_url = 'http://test.invalid'
upload = upload_mod.ResumableUpload(
upload_url, resumable_media.UPLOAD_CHUNK_SIZE)
# Fake that the upload has been initiate()-d
data = b'data is here'
upload._stream = io.BytesIO(data)
upload._total_bytes = len(data)
upload._resumable_url = 'http://test.invalid?upload_id=nope'
.. doctest:: bad-response
:options: +NORMALIZE_WHITESPACE
>>> error = None
>>> try:
... upload.transmit_next_chunk(transport)
... except resumable_media.InvalidResponse as caught_exc:
... error = caught_exc
...
>>> error
InvalidResponse('Request failed with status code', 400,
'Expected one of', <HTTPStatus.OK: 200>, <HTTPStatus.PERMANENT_REDIRECT: 308>)
>>> error.response
<Response [400]>
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
Raises:
~google.resumable_media.common.InvalidResponse: If the status
code is not 200 or http.client.PERMANENT_REDIRECT.
~google.resumable_media.common.DataCorruption: If this is the final
chunk, a checksum validation was requested, and the checksum
does not match or is not available.
"""
method, url, payload, headers = self._prepare_request()
# Wrap the request business logic in a function to be retried.
def retriable_request():
result = transport.request(
method, url, data=payload, headers=headers, timeout=timeout
)
self._process_resumable_response(result, len(payload))
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
def recover(self, transport):
"""Recover from a failure and check the status of the current upload.
This will verify the progress with the server and make sure the
current upload is in a valid state before :meth:`transmit_next_chunk`
can be used again. See https://cloud.google.com/storage/docs/performing-resumable-uploads#status-check
for more information.
This method can be used when a :class:`ResumableUpload` is in an
:attr:`~ResumableUpload.invalid` state due to a request failure.
Args:
transport (~requests.Session): A ``requests`` object which can
make authenticated requests.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
timeout = (
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
)
method, url, payload, headers = self._prepare_recover_request()
# NOTE: We assume "payload is None" but pass it along anyway.
# Wrap the request business logic in a function to be retried.
def retriable_request():
result = transport.request(
method, url, data=payload, headers=headers, timeout=timeout
)
self._process_recover_response(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
class XMLMPUContainer(_request_helpers.RequestsMixin, _upload.XMLMPUContainer):
"""Initiate and close an upload using the XML MPU API.
An XML MPU sends an initial request and then receives an upload ID.
Using the upload ID, the upload is then done in numbered parts and the
parts can be uploaded concurrently.
In order to avoid concurrency issues with this container object, the
uploading of individual parts is handled separately, by XMLMPUPart objects
spawned from this container class. The XMLMPUPart objects are not
necessarily in the same process as the container, so they do not update the
container automatically.
MPUs are sometimes referred to as "Multipart Uploads", which is ambiguous
given the JSON multipart upload, so the abbreviation "MPU" will be used
throughout.
See: https://cloud.google.com/storage/docs/multipart-uploads
Args:
upload_url (str): The URL of the object (without query parameters). The
initiate, PUT, and finalization requests will all use this URL, with
varying query parameters.
headers (Optional[Mapping[str, str]]): Extra headers that should
be sent with the :meth:`initiate` request, e.g. headers for
encrypted data. These headers will be propagated to individual
XMLMPUPart objects spawned from this container as well.
Attributes:
upload_url (str): The URL where the content will be uploaded.
upload_id (Optional(int)): The ID of the upload from the initialization
response.
"""
def initiate(
self,
transport,
content_type,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Initiate an MPU and record the upload ID.
Args:
transport (object): An object which can make authenticated
requests.
content_type (str): The content type of the resource, e.g. a JPEG
image has content type ``image/jpeg``.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_initiate_request(
content_type,
)
# Wrap the request business logic in a function to be retried.
def retriable_request():
result = transport.request(
method, url, data=payload, headers=headers, timeout=timeout
)
self._process_initiate_response(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
def finalize(
self,
transport,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Finalize an MPU request with all the parts.
Args:
transport (object): An object which can make authenticated
requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_finalize_request()
# Wrap the request business logic in a function to be retried.
def retriable_request():
result = transport.request(
method, url, data=payload, headers=headers, timeout=timeout
)
self._process_finalize_response(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
def cancel(
self,
transport,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Cancel an MPU request and permanently delete any uploaded parts.
This cannot be undone.
Args:
transport (object): An object which can make authenticated
requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_cancel_request()
# Wrap the request business logic in a function to be retried.
def retriable_request():
result = transport.request(
method, url, data=payload, headers=headers, timeout=timeout
)
self._process_cancel_response(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)
class XMLMPUPart(_request_helpers.RequestsMixin, _upload.XMLMPUPart):
def upload(
self,
transport,
timeout=(
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
_request_helpers._DEFAULT_READ_TIMEOUT,
),
):
"""Upload the part.
Args:
transport (object): An object which can make authenticated
requests.
timeout (Optional[Union[float, Tuple[float, float]]]):
The number of seconds to wait for the server response.
Depending on the retry strategy, a request may be repeated
several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
Returns:
~requests.Response: The HTTP response returned by ``transport``.
"""
method, url, payload, headers = self._prepare_upload_request()
# Wrap the request business logic in a function to be retried.
def retriable_request():
result = transport.request(
method, url, data=payload, headers=headers, timeout=timeout
)
self._process_upload_response(result)
return result
return _request_helpers.wait_and_retry(
retriable_request, self._get_status_code, self._retry_strategy
)