# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Classes for extract (export) jobs.""" import typing from google.cloud.bigquery import _helpers from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableListItem from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.job.base import _AsyncJob from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference class ExtractJobConfig(_JobConfig): """Configuration options for extract jobs. All properties in this class are optional. Values which are :data:`None` -> server defaults. Set properties on the constructed configuration by using the property name as the name of a keyword argument. """ def __init__(self, **kwargs): super(ExtractJobConfig, self).__init__("extract", **kwargs) @property def compression(self): """google.cloud.bigquery.job.Compression: Compression type to use for exported files. See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression """ return self._get_sub_prop("compression") @compression.setter def compression(self, value): self._set_sub_prop("compression", value) @property def destination_format(self): """google.cloud.bigquery.job.DestinationFormat: Exported file format. See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format """ return self._get_sub_prop("destinationFormat") @destination_format.setter def destination_format(self, value): self._set_sub_prop("destinationFormat", value) @property def field_delimiter(self): """str: Delimiter to use between fields in the exported data. See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter """ return self._get_sub_prop("fieldDelimiter") @field_delimiter.setter def field_delimiter(self, value): self._set_sub_prop("fieldDelimiter", value) @property def print_header(self): """bool: Print a header row in the exported data. See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header """ return self._get_sub_prop("printHeader") @print_header.setter def print_header(self, value): self._set_sub_prop("printHeader", value) @property def use_avro_logical_types(self): """bool: For loads of Avro data, governs whether Avro logical types are converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than raw types (e.g. INTEGER). """ return self._get_sub_prop("useAvroLogicalTypes") @use_avro_logical_types.setter def use_avro_logical_types(self, value): self._set_sub_prop("useAvroLogicalTypes", bool(value)) class ExtractJob(_AsyncJob): """Asynchronous job: extract data from a table into Cloud Storage. Args: job_id (str): the job's ID. source (Union[ \ google.cloud.bigquery.table.TableReference, \ google.cloud.bigquery.model.ModelReference \ ]): Table or Model from which data is to be loaded or extracted. destination_uris (List[str]): URIs describing where the extracted data will be written in Cloud Storage, using the format ``gs:///``. client (google.cloud.bigquery.client.Client): A client which holds credentials and project configuration. job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]): Extra configuration options for the extract job. """ _JOB_TYPE = "extract" _CONFIG_CLASS = ExtractJobConfig def __init__(self, job_id, source, destination_uris, client, job_config=None): super(ExtractJob, self).__init__(job_id, client) if job_config is not None: self._properties["configuration"] = job_config._properties if source: source_ref = {"projectId": source.project, "datasetId": source.dataset_id} if isinstance(source, (Table, TableListItem, TableReference)): source_ref["tableId"] = source.table_id source_key = "sourceTable" else: source_ref["modelId"] = source.model_id source_key = "sourceModel" _helpers._set_sub_prop( self._properties, ["configuration", "extract", source_key], source_ref ) if destination_uris: _helpers._set_sub_prop( self._properties, ["configuration", "extract", "destinationUris"], destination_uris, ) @property def configuration(self) -> ExtractJobConfig: """The configuration for this extract job.""" return typing.cast(ExtractJobConfig, super().configuration) @property def source(self): """Union[ \ google.cloud.bigquery.table.TableReference, \ google.cloud.bigquery.model.ModelReference \ ]: Table or Model from which data is to be loaded or extracted. """ source_config = _helpers._get_sub_prop( self._properties, ["configuration", "extract", "sourceTable"] ) if source_config: return TableReference.from_api_repr(source_config) else: source_config = _helpers._get_sub_prop( self._properties, ["configuration", "extract", "sourceModel"] ) return ModelReference.from_api_repr(source_config) @property def destination_uris(self): """List[str]: URIs describing where the extracted data will be written in Cloud Storage, using the format ``gs:///``. """ return _helpers._get_sub_prop( self._properties, ["configuration", "extract", "destinationUris"] ) @property def compression(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`. """ return self.configuration.compression @property def destination_format(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`. """ return self.configuration.destination_format @property def field_delimiter(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. """ return self.configuration.field_delimiter @property def print_header(self): """See :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`. """ return self.configuration.print_header @property def destination_uri_file_counts(self): """Return file counts from job statistics, if present. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts Returns: List[int]: A list of integer counts, each representing the number of files per destination URI or URI pattern specified in the extract configuration. These values will be in the same order as the URIs specified in the 'destinationUris' field. Returns None if job is not yet complete. """ counts = self._job_statistics().get("destinationUriFileCounts") if counts is not None: return [int(count) for count in counts] return None def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" # Exclude statistics, if set. return { "jobReference": self._properties["jobReference"], "configuration": self._properties["configuration"], } @classmethod def from_api_repr(cls, resource: dict, client) -> "ExtractJob": """Factory: construct a job given its API representation .. note:: This method assumes that the project found in the resource matches the client's project. Args: resource (Dict): dataset job representation returned from the API client (google.cloud.bigquery.client.Client): Client which holds credentials and project configuration for the dataset. Returns: google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``. """ cls._check_resource_config(resource) job_ref = _JobReference._from_api_repr(resource["jobReference"]) job = cls(job_ref, None, None, client=client) job._set_properties(resource) return job