Files
evo-ai/.venv/lib/python3.10/site-packages/google/cloud/bigquery/job/extract.py
2025-04-25 15:30:54 -03:00

272 lines
9.3 KiB
Python

# Copyright 2015 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Classes for extract (export) jobs."""
import typing
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.table import TableListItem
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.job.base import _AsyncJob
from google.cloud.bigquery.job.base import _JobConfig
from google.cloud.bigquery.job.base import _JobReference
class ExtractJobConfig(_JobConfig):
"""Configuration options for extract jobs.
All properties in this class are optional. Values which are :data:`None` ->
server defaults. Set properties on the constructed configuration by using
the property name as the name of a keyword argument.
"""
def __init__(self, **kwargs):
super(ExtractJobConfig, self).__init__("extract", **kwargs)
@property
def compression(self):
"""google.cloud.bigquery.job.Compression: Compression type to use for
exported files.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression
"""
return self._get_sub_prop("compression")
@compression.setter
def compression(self, value):
self._set_sub_prop("compression", value)
@property
def destination_format(self):
"""google.cloud.bigquery.job.DestinationFormat: Exported file format.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format
"""
return self._get_sub_prop("destinationFormat")
@destination_format.setter
def destination_format(self, value):
self._set_sub_prop("destinationFormat", value)
@property
def field_delimiter(self):
"""str: Delimiter to use between fields in the exported data.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter
"""
return self._get_sub_prop("fieldDelimiter")
@field_delimiter.setter
def field_delimiter(self, value):
self._set_sub_prop("fieldDelimiter", value)
@property
def print_header(self):
"""bool: Print a header row in the exported data.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header
"""
return self._get_sub_prop("printHeader")
@print_header.setter
def print_header(self, value):
self._set_sub_prop("printHeader", value)
@property
def use_avro_logical_types(self):
"""bool: For loads of Avro data, governs whether Avro logical types are
converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than
raw types (e.g. INTEGER).
"""
return self._get_sub_prop("useAvroLogicalTypes")
@use_avro_logical_types.setter
def use_avro_logical_types(self, value):
self._set_sub_prop("useAvroLogicalTypes", bool(value))
class ExtractJob(_AsyncJob):
"""Asynchronous job: extract data from a table into Cloud Storage.
Args:
job_id (str): the job's ID.
source (Union[ \
google.cloud.bigquery.table.TableReference, \
google.cloud.bigquery.model.ModelReference \
]):
Table or Model from which data is to be loaded or extracted.
destination_uris (List[str]):
URIs describing where the extracted data will be written in Cloud
Storage, using the format ``gs://<bucket_name>/<object_name_or_glob>``.
client (google.cloud.bigquery.client.Client):
A client which holds credentials and project configuration.
job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]):
Extra configuration options for the extract job.
"""
_JOB_TYPE = "extract"
_CONFIG_CLASS = ExtractJobConfig
def __init__(self, job_id, source, destination_uris, client, job_config=None):
super(ExtractJob, self).__init__(job_id, client)
if job_config is not None:
self._properties["configuration"] = job_config._properties
if source:
source_ref = {"projectId": source.project, "datasetId": source.dataset_id}
if isinstance(source, (Table, TableListItem, TableReference)):
source_ref["tableId"] = source.table_id
source_key = "sourceTable"
else:
source_ref["modelId"] = source.model_id
source_key = "sourceModel"
_helpers._set_sub_prop(
self._properties, ["configuration", "extract", source_key], source_ref
)
if destination_uris:
_helpers._set_sub_prop(
self._properties,
["configuration", "extract", "destinationUris"],
destination_uris,
)
@property
def configuration(self) -> ExtractJobConfig:
"""The configuration for this extract job."""
return typing.cast(ExtractJobConfig, super().configuration)
@property
def source(self):
"""Union[ \
google.cloud.bigquery.table.TableReference, \
google.cloud.bigquery.model.ModelReference \
]: Table or Model from which data is to be loaded or extracted.
"""
source_config = _helpers._get_sub_prop(
self._properties, ["configuration", "extract", "sourceTable"]
)
if source_config:
return TableReference.from_api_repr(source_config)
else:
source_config = _helpers._get_sub_prop(
self._properties, ["configuration", "extract", "sourceModel"]
)
return ModelReference.from_api_repr(source_config)
@property
def destination_uris(self):
"""List[str]: URIs describing where the extracted data will be
written in Cloud Storage, using the format
``gs://<bucket_name>/<object_name_or_glob>``.
"""
return _helpers._get_sub_prop(
self._properties, ["configuration", "extract", "destinationUris"]
)
@property
def compression(self):
"""See
:attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`.
"""
return self.configuration.compression
@property
def destination_format(self):
"""See
:attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`.
"""
return self.configuration.destination_format
@property
def field_delimiter(self):
"""See
:attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`.
"""
return self.configuration.field_delimiter
@property
def print_header(self):
"""See
:attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`.
"""
return self.configuration.print_header
@property
def destination_uri_file_counts(self):
"""Return file counts from job statistics, if present.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts
Returns:
List[int]:
A list of integer counts, each representing the number of files
per destination URI or URI pattern specified in the extract
configuration. These values will be in the same order as the URIs
specified in the 'destinationUris' field. Returns None if job is
not yet complete.
"""
counts = self._job_statistics().get("destinationUriFileCounts")
if counts is not None:
return [int(count) for count in counts]
return None
def to_api_repr(self):
"""Generate a resource for :meth:`_begin`."""
# Exclude statistics, if set.
return {
"jobReference": self._properties["jobReference"],
"configuration": self._properties["configuration"],
}
@classmethod
def from_api_repr(cls, resource: dict, client) -> "ExtractJob":
"""Factory: construct a job given its API representation
.. note::
This method assumes that the project found in the resource matches
the client's project.
Args:
resource (Dict): dataset job representation returned from the API
client (google.cloud.bigquery.client.Client):
Client which holds credentials and project
configuration for the dataset.
Returns:
google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``.
"""
cls._check_resource_config(resource)
job_ref = _JobReference._from_api_repr(resource["jobReference"])
job = cls(job_ref, None, None, client=client)
job._set_properties(resource)
return job