structure saas with tools
This commit is contained in:
@@ -0,0 +1,722 @@
|
||||
# Copyright 2017 Google Inc.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Support for downloading media from Google APIs."""
|
||||
|
||||
import urllib3.response # type: ignore
|
||||
import http
|
||||
|
||||
from google.resumable_media import _download
|
||||
from google.resumable_media import common
|
||||
from google.resumable_media import _helpers
|
||||
from google.resumable_media.requests import _request_helpers
|
||||
|
||||
|
||||
_CHECKSUM_MISMATCH = """\
|
||||
Checksum mismatch while downloading:
|
||||
|
||||
{}
|
||||
|
||||
The X-Goog-Hash header indicated an {checksum_type} checksum of:
|
||||
|
||||
{}
|
||||
|
||||
but the actual {checksum_type} checksum of the downloaded contents was:
|
||||
|
||||
{}
|
||||
"""
|
||||
|
||||
_STREAM_SEEK_ERROR = """\
|
||||
Incomplete download for:
|
||||
{}
|
||||
Error writing to stream while handling a gzip-compressed file download.
|
||||
Please restart the download.
|
||||
"""
|
||||
|
||||
_RESPONSE_HEADERS_INFO = """\
|
||||
|
||||
The X-Goog-Stored-Content-Length is {}. The X-Goog-Stored-Content-Encoding is {}.
|
||||
|
||||
The download request read {} bytes of data.
|
||||
If the download was incomplete, please check the network connection and restart the download.
|
||||
"""
|
||||
|
||||
|
||||
class Download(_request_helpers.RequestsMixin, _download.Download):
|
||||
"""Helper to manage downloading a resource from a Google API.
|
||||
|
||||
"Slices" of the resource can be retrieved by specifying a range
|
||||
with ``start`` and / or ``end``. However, in typical usage, neither
|
||||
``start`` nor ``end`` is expected to be provided.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
the downloaded resource can be written to.
|
||||
start (int): The first byte in a range to be downloaded. If not
|
||||
provided, but ``end`` is provided, will download from the
|
||||
beginning to ``end`` of the media.
|
||||
end (int): The last byte in a range to be downloaded. If not
|
||||
provided, but ``start`` is provided, will download from the
|
||||
``start`` to the end of the media.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
checksum Optional([str]): The type of checksum to compute to verify
|
||||
the integrity of the object. The response headers must contain
|
||||
a checksum of the requested type. If the headers lack an
|
||||
appropriate checksum (for instance in the case of transcoded or
|
||||
ranged downloads where the remote service does not know the
|
||||
correct checksum) an INFO-level log will be emitted. Supported
|
||||
values are "md5", "crc32c" and None. The default is "md5".
|
||||
|
||||
Attributes:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
start (Optional[int]): The first byte in a range to be downloaded.
|
||||
end (Optional[int]): The last byte in a range to be downloaded.
|
||||
"""
|
||||
|
||||
def _write_to_stream(self, response):
|
||||
"""Write response body to a write-able stream.
|
||||
|
||||
.. note:
|
||||
|
||||
This method assumes that the ``_stream`` attribute is set on the
|
||||
current download.
|
||||
|
||||
Args:
|
||||
response (~requests.Response): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.DataCorruption: If the download's
|
||||
checksum doesn't agree with server-computed checksum.
|
||||
"""
|
||||
|
||||
# Retrieve the expected checksum only once for the download request,
|
||||
# then compute and validate the checksum when the full download completes.
|
||||
# Retried requests are range requests, and there's no way to detect
|
||||
# data corruption for that byte range alone.
|
||||
if self._expected_checksum is None and self._checksum_object is None:
|
||||
# `_get_expected_checksum()` may return None even if a checksum was
|
||||
# requested, in which case it will emit an info log _MISSING_CHECKSUM.
|
||||
# If an invalid checksum type is specified, this will raise ValueError.
|
||||
expected_checksum, checksum_object = _helpers._get_expected_checksum(
|
||||
response, self._get_headers, self.media_url, checksum_type=self.checksum
|
||||
)
|
||||
self._expected_checksum = expected_checksum
|
||||
self._checksum_object = checksum_object
|
||||
else:
|
||||
expected_checksum = self._expected_checksum
|
||||
checksum_object = self._checksum_object
|
||||
|
||||
with response:
|
||||
# NOTE: In order to handle compressed streams gracefully, we try
|
||||
# to insert our checksum object into the decompression stream. If
|
||||
# the stream is indeed compressed, this will delegate the checksum
|
||||
# object to the decoder and return a _DoNothingHash here.
|
||||
local_checksum_object = _add_decoder(response.raw, checksum_object)
|
||||
body_iter = response.iter_content(
|
||||
chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_unicode=False
|
||||
)
|
||||
for chunk in body_iter:
|
||||
self._stream.write(chunk)
|
||||
self._bytes_downloaded += len(chunk)
|
||||
local_checksum_object.update(chunk)
|
||||
|
||||
# Don't validate the checksum for partial responses.
|
||||
if (
|
||||
expected_checksum is not None
|
||||
and response.status_code != http.client.PARTIAL_CONTENT
|
||||
):
|
||||
actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest())
|
||||
|
||||
if actual_checksum != expected_checksum:
|
||||
headers = self._get_headers(response)
|
||||
x_goog_encoding = headers.get("x-goog-stored-content-encoding")
|
||||
x_goog_length = headers.get("x-goog-stored-content-length")
|
||||
content_length_msg = _RESPONSE_HEADERS_INFO.format(
|
||||
x_goog_length, x_goog_encoding, self._bytes_downloaded
|
||||
)
|
||||
if (
|
||||
x_goog_length
|
||||
and self._bytes_downloaded < int(x_goog_length)
|
||||
and x_goog_encoding != "gzip"
|
||||
):
|
||||
# The library will attempt to trigger a retry by raising a ConnectionError, if
|
||||
# (a) bytes_downloaded is less than response header x-goog-stored-content-length, and
|
||||
# (b) the object is not gzip-compressed when stored in Cloud Storage.
|
||||
raise ConnectionError(content_length_msg)
|
||||
else:
|
||||
msg = _CHECKSUM_MISMATCH.format(
|
||||
self.media_url,
|
||||
expected_checksum,
|
||||
actual_checksum,
|
||||
checksum_type=self.checksum.upper(),
|
||||
)
|
||||
msg += content_length_msg
|
||||
raise common.DataCorruption(response, msg)
|
||||
|
||||
def consume(
|
||||
self,
|
||||
transport,
|
||||
timeout=(
|
||||
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
|
||||
_request_helpers._DEFAULT_READ_TIMEOUT,
|
||||
),
|
||||
):
|
||||
"""Consume the resource to be downloaded.
|
||||
|
||||
If a ``stream`` is attached to this download, then the downloaded
|
||||
resource will be written to the stream.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
timeout (Optional[Union[float, Tuple[float, float]]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
|
||||
Can also be passed as a tuple (connect_timeout, read_timeout).
|
||||
See :meth:`requests.Session.request` documentation for details.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.DataCorruption: If the download's
|
||||
checksum doesn't agree with server-computed checksum.
|
||||
ValueError: If the current :class:`Download` has already
|
||||
finished.
|
||||
"""
|
||||
method, _, payload, headers = self._prepare_request()
|
||||
# NOTE: We assume "payload is None" but pass it along anyway.
|
||||
request_kwargs = {
|
||||
"data": payload,
|
||||
"headers": headers,
|
||||
"timeout": timeout,
|
||||
}
|
||||
if self._stream is not None:
|
||||
request_kwargs["stream"] = True
|
||||
|
||||
# Assign object generation if generation is specified in the media url.
|
||||
if self._object_generation is None:
|
||||
self._object_generation = _helpers._get_generation_from_url(self.media_url)
|
||||
|
||||
# Wrap the request business logic in a function to be retried.
|
||||
def retriable_request():
|
||||
url = self.media_url
|
||||
|
||||
# To restart an interrupted download, read from the offset of last byte
|
||||
# received using a range request, and set object generation query param.
|
||||
if self._bytes_downloaded > 0:
|
||||
_download.add_bytes_range(
|
||||
(self.start or 0) + self._bytes_downloaded, self.end, self._headers
|
||||
)
|
||||
request_kwargs["headers"] = self._headers
|
||||
|
||||
# Set object generation query param to ensure the same object content is requested.
|
||||
if (
|
||||
self._object_generation is not None
|
||||
and _helpers._get_generation_from_url(self.media_url) is None
|
||||
):
|
||||
query_param = {"generation": self._object_generation}
|
||||
url = _helpers.add_query_parameters(self.media_url, query_param)
|
||||
|
||||
result = transport.request(method, url, **request_kwargs)
|
||||
|
||||
# If a generation hasn't been specified, and this is the first response we get, let's record the
|
||||
# generation. In future requests we'll specify the generation query param to avoid data races.
|
||||
if self._object_generation is None:
|
||||
self._object_generation = _helpers._parse_generation_header(
|
||||
result, self._get_headers
|
||||
)
|
||||
|
||||
self._process_response(result)
|
||||
|
||||
# With decompressive transcoding, GCS serves back the whole file regardless of the range request,
|
||||
# thus we reset the stream position to the start of the stream.
|
||||
# See: https://cloud.google.com/storage/docs/transcoding#range
|
||||
if self._stream is not None:
|
||||
if _helpers._is_decompressive_transcoding(result, self._get_headers):
|
||||
try:
|
||||
self._stream.seek(0)
|
||||
except Exception as exc:
|
||||
msg = _STREAM_SEEK_ERROR.format(url)
|
||||
raise Exception(msg) from exc
|
||||
self._bytes_downloaded = 0
|
||||
|
||||
self._write_to_stream(result)
|
||||
|
||||
return result
|
||||
|
||||
return _request_helpers.wait_and_retry(
|
||||
retriable_request, self._get_status_code, self._retry_strategy
|
||||
)
|
||||
|
||||
|
||||
class RawDownload(_request_helpers.RawRequestsMixin, _download.Download):
|
||||
"""Helper to manage downloading a raw resource from a Google API.
|
||||
|
||||
"Slices" of the resource can be retrieved by specifying a range
|
||||
with ``start`` and / or ``end``. However, in typical usage, neither
|
||||
``start`` nor ``end`` is expected to be provided.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
the downloaded resource can be written to.
|
||||
start (int): The first byte in a range to be downloaded. If not
|
||||
provided, but ``end`` is provided, will download from the
|
||||
beginning to ``end`` of the media.
|
||||
end (int): The last byte in a range to be downloaded. If not
|
||||
provided, but ``start`` is provided, will download from the
|
||||
``start`` to the end of the media.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with the request, e.g. headers for encrypted data.
|
||||
checksum Optional([str]): The type of checksum to compute to verify
|
||||
the integrity of the object. The response headers must contain
|
||||
a checksum of the requested type. If the headers lack an
|
||||
appropriate checksum (for instance in the case of transcoded or
|
||||
ranged downloads where the remote service does not know the
|
||||
correct checksum) an INFO-level log will be emitted. Supported
|
||||
values are "md5", "crc32c" and None. The default is "md5".
|
||||
Attributes:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
start (Optional[int]): The first byte in a range to be downloaded.
|
||||
end (Optional[int]): The last byte in a range to be downloaded.
|
||||
"""
|
||||
|
||||
def _write_to_stream(self, response):
|
||||
"""Write response body to a write-able stream.
|
||||
|
||||
.. note:
|
||||
|
||||
This method assumes that the ``_stream`` attribute is set on the
|
||||
current download.
|
||||
|
||||
Args:
|
||||
response (~requests.Response): The HTTP response object.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.DataCorruption: If the download's
|
||||
checksum doesn't agree with server-computed checksum.
|
||||
"""
|
||||
# Retrieve the expected checksum only once for the download request,
|
||||
# then compute and validate the checksum when the full download completes.
|
||||
# Retried requests are range requests, and there's no way to detect
|
||||
# data corruption for that byte range alone.
|
||||
if self._expected_checksum is None and self._checksum_object is None:
|
||||
# `_get_expected_checksum()` may return None even if a checksum was
|
||||
# requested, in which case it will emit an info log _MISSING_CHECKSUM.
|
||||
# If an invalid checksum type is specified, this will raise ValueError.
|
||||
expected_checksum, checksum_object = _helpers._get_expected_checksum(
|
||||
response, self._get_headers, self.media_url, checksum_type=self.checksum
|
||||
)
|
||||
self._expected_checksum = expected_checksum
|
||||
self._checksum_object = checksum_object
|
||||
else:
|
||||
expected_checksum = self._expected_checksum
|
||||
checksum_object = self._checksum_object
|
||||
|
||||
with response:
|
||||
body_iter = response.raw.stream(
|
||||
_request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False
|
||||
)
|
||||
for chunk in body_iter:
|
||||
self._stream.write(chunk)
|
||||
self._bytes_downloaded += len(chunk)
|
||||
checksum_object.update(chunk)
|
||||
response._content_consumed = True
|
||||
|
||||
# Don't validate the checksum for partial responses.
|
||||
if (
|
||||
expected_checksum is not None
|
||||
and response.status_code != http.client.PARTIAL_CONTENT
|
||||
):
|
||||
actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest())
|
||||
|
||||
if actual_checksum != expected_checksum:
|
||||
headers = self._get_headers(response)
|
||||
x_goog_encoding = headers.get("x-goog-stored-content-encoding")
|
||||
x_goog_length = headers.get("x-goog-stored-content-length")
|
||||
content_length_msg = _RESPONSE_HEADERS_INFO.format(
|
||||
x_goog_length, x_goog_encoding, self._bytes_downloaded
|
||||
)
|
||||
if (
|
||||
x_goog_length
|
||||
and self._bytes_downloaded < int(x_goog_length)
|
||||
and x_goog_encoding != "gzip"
|
||||
):
|
||||
# The library will attempt to trigger a retry by raising a ConnectionError, if
|
||||
# (a) bytes_downloaded is less than response header x-goog-stored-content-length, and
|
||||
# (b) the object is not gzip-compressed when stored in Cloud Storage.
|
||||
raise ConnectionError(content_length_msg)
|
||||
else:
|
||||
msg = _CHECKSUM_MISMATCH.format(
|
||||
self.media_url,
|
||||
expected_checksum,
|
||||
actual_checksum,
|
||||
checksum_type=self.checksum.upper(),
|
||||
)
|
||||
msg += content_length_msg
|
||||
raise common.DataCorruption(response, msg)
|
||||
|
||||
def consume(
|
||||
self,
|
||||
transport,
|
||||
timeout=(
|
||||
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
|
||||
_request_helpers._DEFAULT_READ_TIMEOUT,
|
||||
),
|
||||
):
|
||||
"""Consume the resource to be downloaded.
|
||||
|
||||
If a ``stream`` is attached to this download, then the downloaded
|
||||
resource will be written to the stream.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
timeout (Optional[Union[float, Tuple[float, float]]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
|
||||
Can also be passed as a tuple (connect_timeout, read_timeout).
|
||||
See :meth:`requests.Session.request` documentation for details.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
|
||||
Raises:
|
||||
~google.resumable_media.common.DataCorruption: If the download's
|
||||
checksum doesn't agree with server-computed checksum.
|
||||
ValueError: If the current :class:`Download` has already
|
||||
finished.
|
||||
"""
|
||||
method, _, payload, headers = self._prepare_request()
|
||||
# NOTE: We assume "payload is None" but pass it along anyway.
|
||||
request_kwargs = {
|
||||
"data": payload,
|
||||
"headers": headers,
|
||||
"timeout": timeout,
|
||||
"stream": True,
|
||||
}
|
||||
|
||||
# Assign object generation if generation is specified in the media url.
|
||||
if self._object_generation is None:
|
||||
self._object_generation = _helpers._get_generation_from_url(self.media_url)
|
||||
|
||||
# Wrap the request business logic in a function to be retried.
|
||||
def retriable_request():
|
||||
url = self.media_url
|
||||
|
||||
# To restart an interrupted download, read from the offset of last byte
|
||||
# received using a range request, and set object generation query param.
|
||||
if self._bytes_downloaded > 0:
|
||||
_download.add_bytes_range(
|
||||
(self.start or 0) + self._bytes_downloaded, self.end, self._headers
|
||||
)
|
||||
request_kwargs["headers"] = self._headers
|
||||
|
||||
# Set object generation query param to ensure the same object content is requested.
|
||||
if (
|
||||
self._object_generation is not None
|
||||
and _helpers._get_generation_from_url(self.media_url) is None
|
||||
):
|
||||
query_param = {"generation": self._object_generation}
|
||||
url = _helpers.add_query_parameters(self.media_url, query_param)
|
||||
|
||||
result = transport.request(method, url, **request_kwargs)
|
||||
|
||||
# If a generation hasn't been specified, and this is the first response we get, let's record the
|
||||
# generation. In future requests we'll specify the generation query param to avoid data races.
|
||||
if self._object_generation is None:
|
||||
self._object_generation = _helpers._parse_generation_header(
|
||||
result, self._get_headers
|
||||
)
|
||||
|
||||
self._process_response(result)
|
||||
|
||||
# With decompressive transcoding, GCS serves back the whole file regardless of the range request,
|
||||
# thus we reset the stream position to the start of the stream.
|
||||
# See: https://cloud.google.com/storage/docs/transcoding#range
|
||||
if self._stream is not None:
|
||||
if _helpers._is_decompressive_transcoding(result, self._get_headers):
|
||||
try:
|
||||
self._stream.seek(0)
|
||||
except Exception as exc:
|
||||
msg = _STREAM_SEEK_ERROR.format(url)
|
||||
raise Exception(msg) from exc
|
||||
self._bytes_downloaded = 0
|
||||
|
||||
self._write_to_stream(result)
|
||||
|
||||
return result
|
||||
|
||||
return _request_helpers.wait_and_retry(
|
||||
retriable_request, self._get_status_code, self._retry_strategy
|
||||
)
|
||||
|
||||
|
||||
class ChunkedDownload(_request_helpers.RequestsMixin, _download.ChunkedDownload):
|
||||
"""Download a resource in chunks from a Google API.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
chunk_size (int): The number of bytes to be retrieved in each
|
||||
request.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
will be used to concatenate chunks of the resource as they are
|
||||
downloaded.
|
||||
start (int): The first byte in a range to be downloaded. If not
|
||||
provided, defaults to ``0``.
|
||||
end (int): The last byte in a range to be downloaded. If not
|
||||
provided, will download to the end of the media.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with each request, e.g. headers for data encryption
|
||||
key headers.
|
||||
|
||||
Attributes:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
start (Optional[int]): The first byte in a range to be downloaded.
|
||||
end (Optional[int]): The last byte in a range to be downloaded.
|
||||
chunk_size (int): The number of bytes to be retrieved in each request.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``start`` is negative.
|
||||
"""
|
||||
|
||||
def consume_next_chunk(
|
||||
self,
|
||||
transport,
|
||||
timeout=(
|
||||
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
|
||||
_request_helpers._DEFAULT_READ_TIMEOUT,
|
||||
),
|
||||
):
|
||||
"""Consume the next chunk of the resource to be downloaded.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
timeout (Optional[Union[float, Tuple[float, float]]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
|
||||
Can also be passed as a tuple (connect_timeout, read_timeout).
|
||||
See :meth:`requests.Session.request` documentation for details.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
|
||||
Raises:
|
||||
ValueError: If the current download has finished.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_request()
|
||||
|
||||
# Wrap the request business logic in a function to be retried.
|
||||
def retriable_request():
|
||||
# NOTE: We assume "payload is None" but pass it along anyway.
|
||||
result = transport.request(
|
||||
method,
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
timeout=timeout,
|
||||
)
|
||||
self._process_response(result)
|
||||
return result
|
||||
|
||||
return _request_helpers.wait_and_retry(
|
||||
retriable_request, self._get_status_code, self._retry_strategy
|
||||
)
|
||||
|
||||
|
||||
class RawChunkedDownload(_request_helpers.RawRequestsMixin, _download.ChunkedDownload):
|
||||
"""Download a raw resource in chunks from a Google API.
|
||||
|
||||
Args:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
chunk_size (int): The number of bytes to be retrieved in each
|
||||
request.
|
||||
stream (IO[bytes]): A write-able stream (i.e. file-like object) that
|
||||
will be used to concatenate chunks of the resource as they are
|
||||
downloaded.
|
||||
start (int): The first byte in a range to be downloaded. If not
|
||||
provided, defaults to ``0``.
|
||||
end (int): The last byte in a range to be downloaded. If not
|
||||
provided, will download to the end of the media.
|
||||
headers (Optional[Mapping[str, str]]): Extra headers that should
|
||||
be sent with each request, e.g. headers for data encryption
|
||||
key headers.
|
||||
|
||||
Attributes:
|
||||
media_url (str): The URL containing the media to be downloaded.
|
||||
start (Optional[int]): The first byte in a range to be downloaded.
|
||||
end (Optional[int]): The last byte in a range to be downloaded.
|
||||
chunk_size (int): The number of bytes to be retrieved in each request.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``start`` is negative.
|
||||
"""
|
||||
|
||||
def consume_next_chunk(
|
||||
self,
|
||||
transport,
|
||||
timeout=(
|
||||
_request_helpers._DEFAULT_CONNECT_TIMEOUT,
|
||||
_request_helpers._DEFAULT_READ_TIMEOUT,
|
||||
),
|
||||
):
|
||||
"""Consume the next chunk of the resource to be downloaded.
|
||||
|
||||
Args:
|
||||
transport (~requests.Session): A ``requests`` object which can
|
||||
make authenticated requests.
|
||||
timeout (Optional[Union[float, Tuple[float, float]]]):
|
||||
The number of seconds to wait for the server response.
|
||||
Depending on the retry strategy, a request may be repeated
|
||||
several times using the same timeout each time.
|
||||
|
||||
Can also be passed as a tuple (connect_timeout, read_timeout).
|
||||
See :meth:`requests.Session.request` documentation for details.
|
||||
|
||||
Returns:
|
||||
~requests.Response: The HTTP response returned by ``transport``.
|
||||
|
||||
Raises:
|
||||
ValueError: If the current download has finished.
|
||||
"""
|
||||
method, url, payload, headers = self._prepare_request()
|
||||
|
||||
# Wrap the request business logic in a function to be retried.
|
||||
def retriable_request():
|
||||
# NOTE: We assume "payload is None" but pass it along anyway.
|
||||
result = transport.request(
|
||||
method,
|
||||
url,
|
||||
data=payload,
|
||||
headers=headers,
|
||||
stream=True,
|
||||
timeout=timeout,
|
||||
)
|
||||
self._process_response(result)
|
||||
return result
|
||||
|
||||
return _request_helpers.wait_and_retry(
|
||||
retriable_request, self._get_status_code, self._retry_strategy
|
||||
)
|
||||
|
||||
|
||||
def _add_decoder(response_raw, checksum):
|
||||
"""Patch the ``_decoder`` on a ``urllib3`` response.
|
||||
|
||||
This is so that we can intercept the compressed bytes before they are
|
||||
decoded.
|
||||
|
||||
Only patches if the content encoding is ``gzip`` or ``br``.
|
||||
|
||||
Args:
|
||||
response_raw (urllib3.response.HTTPResponse): The raw response for
|
||||
an HTTP request.
|
||||
checksum (object):
|
||||
A checksum which will be updated with compressed bytes.
|
||||
|
||||
Returns:
|
||||
object: Either the original ``checksum`` if ``_decoder`` is not
|
||||
patched, or a ``_DoNothingHash`` if the decoder is patched, since the
|
||||
caller will no longer need to hash to decoded bytes.
|
||||
"""
|
||||
encoding = response_raw.headers.get("content-encoding", "").lower()
|
||||
if encoding == "gzip":
|
||||
response_raw._decoder = _GzipDecoder(checksum)
|
||||
return _helpers._DoNothingHash()
|
||||
# Only activate if brotli is installed
|
||||
elif encoding == "br" and _BrotliDecoder: # type: ignore
|
||||
response_raw._decoder = _BrotliDecoder(checksum)
|
||||
return _helpers._DoNothingHash()
|
||||
else:
|
||||
return checksum
|
||||
|
||||
|
||||
class _GzipDecoder(urllib3.response.GzipDecoder):
|
||||
"""Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes.
|
||||
|
||||
Allows a checksum function to see the compressed bytes before they are
|
||||
decoded. This way the checksum of the compressed value can be computed.
|
||||
|
||||
Args:
|
||||
checksum (object):
|
||||
A checksum which will be updated with compressed bytes.
|
||||
"""
|
||||
|
||||
def __init__(self, checksum):
|
||||
super().__init__()
|
||||
self._checksum = checksum
|
||||
|
||||
def decompress(self, data):
|
||||
"""Decompress the bytes.
|
||||
|
||||
Args:
|
||||
data (bytes): The compressed bytes to be decompressed.
|
||||
|
||||
Returns:
|
||||
bytes: The decompressed bytes from ``data``.
|
||||
"""
|
||||
self._checksum.update(data)
|
||||
return super().decompress(data)
|
||||
|
||||
|
||||
# urllib3.response.BrotliDecoder might not exist depending on whether brotli is
|
||||
# installed.
|
||||
if hasattr(urllib3.response, "BrotliDecoder"):
|
||||
|
||||
class _BrotliDecoder:
|
||||
"""Handler for ``brotli`` encoded bytes.
|
||||
|
||||
Allows a checksum function to see the compressed bytes before they are
|
||||
decoded. This way the checksum of the compressed value can be computed.
|
||||
|
||||
Because BrotliDecoder's decompress method is dynamically created in
|
||||
urllib3, a subclass is not practical. Instead, this class creates a
|
||||
captive urllib3.requests.BrotliDecoder instance and acts as a proxy.
|
||||
|
||||
Args:
|
||||
checksum (object):
|
||||
A checksum which will be updated with compressed bytes.
|
||||
"""
|
||||
|
||||
def __init__(self, checksum):
|
||||
self._decoder = urllib3.response.BrotliDecoder()
|
||||
self._checksum = checksum
|
||||
|
||||
def decompress(self, data):
|
||||
"""Decompress the bytes.
|
||||
|
||||
Args:
|
||||
data (bytes): The compressed bytes to be decompressed.
|
||||
|
||||
Returns:
|
||||
bytes: The decompressed bytes from ``data``.
|
||||
"""
|
||||
self._checksum.update(data)
|
||||
return self._decoder.decompress(data)
|
||||
|
||||
def flush(self):
|
||||
return self._decoder.flush()
|
||||
|
||||
else: # pragma: NO COVER
|
||||
_BrotliDecoder = None # type: ignore # pragma: NO COVER
|
||||
Reference in New Issue
Block a user