Files
evo-ai/.venv/lib/python3.10/site-packages/google/cloud/bigquery/external_config.py
2025-04-25 15:30:54 -03:00

1189 lines
40 KiB
Python

# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Define classes that describe external data sources.
These are used for both Table.externalDataConfiguration and
Job.configuration.query.tableDefinitions.
"""
from __future__ import absolute_import, annotations
import base64
import copy
from typing import Any, Dict, FrozenSet, Iterable, Optional, Union
from google.cloud.bigquery._helpers import _to_bytes
from google.cloud.bigquery._helpers import _bytes_to_json
from google.cloud.bigquery._helpers import _int_or_none
from google.cloud.bigquery._helpers import _str_or_none
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
from google.cloud.bigquery import schema
from google.cloud.bigquery.schema import SchemaField
class ExternalSourceFormat(object):
"""The format for external data files.
Note that the set of allowed values for external data sources is different
than the set used for loading data (see
:class:`~google.cloud.bigquery.job.SourceFormat`).
"""
CSV = "CSV"
"""Specifies CSV format."""
GOOGLE_SHEETS = "GOOGLE_SHEETS"
"""Specifies Google Sheets format."""
NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON"
"""Specifies newline delimited JSON format."""
AVRO = "AVRO"
"""Specifies Avro format."""
DATASTORE_BACKUP = "DATASTORE_BACKUP"
"""Specifies datastore backup format"""
ORC = "ORC"
"""Specifies ORC format."""
PARQUET = "PARQUET"
"""Specifies Parquet format."""
BIGTABLE = "BIGTABLE"
"""Specifies Bigtable format."""
class BigtableColumn(object):
"""Options for a Bigtable column."""
def __init__(self):
self._properties = {}
@property
def encoding(self):
"""str: The encoding of the values when the type is not `STRING`
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.encoding
"""
return self._properties.get("encoding")
@encoding.setter
def encoding(self, value):
self._properties["encoding"] = value
@property
def field_name(self):
"""str: An identifier to use if the qualifier is not a valid BigQuery
field identifier
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.field_name
"""
return self._properties.get("fieldName")
@field_name.setter
def field_name(self, value):
self._properties["fieldName"] = value
@property
def only_read_latest(self):
"""bool: If this is set, only the latest version of value in this
column are exposed.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.only_read_latest
"""
return self._properties.get("onlyReadLatest")
@only_read_latest.setter
def only_read_latest(self, value):
self._properties["onlyReadLatest"] = value
@property
def qualifier_encoded(self):
"""Union[str, bytes]: The qualifier encoded in binary.
The type is ``str`` (Python 2.x) or ``bytes`` (Python 3.x). The module
will handle base64 encoding for you.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_encoded
"""
prop = self._properties.get("qualifierEncoded")
if prop is None:
return None
return base64.standard_b64decode(_to_bytes(prop))
@qualifier_encoded.setter
def qualifier_encoded(self, value):
self._properties["qualifierEncoded"] = _bytes_to_json(value)
@property
def qualifier_string(self):
"""str: A valid UTF-8 string qualifier
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_string
"""
return self._properties.get("qualifierString")
@qualifier_string.setter
def qualifier_string(self, value):
self._properties["qualifierString"] = value
@property
def type_(self):
"""str: The type to convert the value in cells of this column.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.type
"""
return self._properties.get("type")
@type_.setter
def type_(self, value):
self._properties["type"] = value
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
return copy.deepcopy(self._properties)
@classmethod
def from_api_repr(cls, resource: dict) -> "BigtableColumn":
"""Factory: construct a :class:`~.external_config.BigtableColumn`
instance given its API representation.
Args:
resource (Dict[str, Any]):
Definition of a :class:`~.external_config.BigtableColumn`
instance in the same representation as is returned from the
API.
Returns:
external_config.BigtableColumn: Configuration parsed from ``resource``.
"""
config = cls()
config._properties = copy.deepcopy(resource)
return config
class BigtableColumnFamily(object):
"""Options for a Bigtable column family."""
def __init__(self):
self._properties = {}
@property
def encoding(self):
"""str: The encoding of the values when the type is not `STRING`
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.encoding
"""
return self._properties.get("encoding")
@encoding.setter
def encoding(self, value):
self._properties["encoding"] = value
@property
def family_id(self):
"""str: Identifier of the column family.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.family_id
"""
return self._properties.get("familyId")
@family_id.setter
def family_id(self, value):
self._properties["familyId"] = value
@property
def only_read_latest(self):
"""bool: If this is set only the latest version of value are exposed
for all columns in this column family.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.only_read_latest
"""
return self._properties.get("onlyReadLatest")
@only_read_latest.setter
def only_read_latest(self, value):
self._properties["onlyReadLatest"] = value
@property
def type_(self):
"""str: The type to convert the value in cells of this column family.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.type
"""
return self._properties.get("type")
@type_.setter
def type_(self, value):
self._properties["type"] = value
@property
def columns(self):
"""List[BigtableColumn]: Lists of columns
that should be exposed as individual fields.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.columns
"""
prop = self._properties.get("columns", [])
return [BigtableColumn.from_api_repr(col) for col in prop]
@columns.setter
def columns(self, value):
self._properties["columns"] = [col.to_api_repr() for col in value]
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
return copy.deepcopy(self._properties)
@classmethod
def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily":
"""Factory: construct a :class:`~.external_config.BigtableColumnFamily`
instance given its API representation.
Args:
resource (Dict[str, Any]):
Definition of a :class:`~.external_config.BigtableColumnFamily`
instance in the same representation as is returned from the
API.
Returns:
:class:`~.external_config.BigtableColumnFamily`:
Configuration parsed from ``resource``.
"""
config = cls()
config._properties = copy.deepcopy(resource)
return config
class BigtableOptions(object):
"""Options that describe how to treat Bigtable tables as BigQuery tables."""
_SOURCE_FORMAT = "BIGTABLE"
_RESOURCE_NAME = "bigtableOptions"
def __init__(self):
self._properties = {}
@property
def ignore_unspecified_column_families(self):
"""bool: If :data:`True`, ignore columns not specified in
:attr:`column_families` list. Defaults to :data:`False`.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.ignore_unspecified_column_families
"""
return self._properties.get("ignoreUnspecifiedColumnFamilies")
@ignore_unspecified_column_families.setter
def ignore_unspecified_column_families(self, value):
self._properties["ignoreUnspecifiedColumnFamilies"] = value
@property
def read_rowkey_as_string(self):
"""bool: If :data:`True`, rowkey column families will be read and
converted to string. Defaults to :data:`False`.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.read_rowkey_as_string
"""
return self._properties.get("readRowkeyAsString")
@read_rowkey_as_string.setter
def read_rowkey_as_string(self, value):
self._properties["readRowkeyAsString"] = value
@property
def column_families(self):
"""List[:class:`~.external_config.BigtableColumnFamily`]: List of
column families to expose in the table schema along with their types.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.column_families
"""
prop = self._properties.get("columnFamilies", [])
return [BigtableColumnFamily.from_api_repr(cf) for cf in prop]
@column_families.setter
def column_families(self, value):
self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value]
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
return copy.deepcopy(self._properties)
@classmethod
def from_api_repr(cls, resource: dict) -> "BigtableOptions":
"""Factory: construct a :class:`~.external_config.BigtableOptions`
instance given its API representation.
Args:
resource (Dict[str, Any]):
Definition of a :class:`~.external_config.BigtableOptions`
instance in the same representation as is returned from the
API.
Returns:
BigtableOptions: Configuration parsed from ``resource``.
"""
config = cls()
config._properties = copy.deepcopy(resource)
return config
class CSVOptions(object):
"""Options that describe how to treat CSV files as BigQuery tables."""
_SOURCE_FORMAT = "CSV"
_RESOURCE_NAME = "csvOptions"
def __init__(self):
self._properties = {}
@property
def allow_jagged_rows(self):
"""bool: If :data:`True`, BigQuery treats missing trailing columns as
null values. Defaults to :data:`False`.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_jagged_rows
"""
return self._properties.get("allowJaggedRows")
@allow_jagged_rows.setter
def allow_jagged_rows(self, value):
self._properties["allowJaggedRows"] = value
@property
def allow_quoted_newlines(self):
"""bool: If :data:`True`, quoted data sections that contain newline
characters in a CSV file are allowed. Defaults to :data:`False`.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_quoted_newlines
"""
return self._properties.get("allowQuotedNewlines")
@allow_quoted_newlines.setter
def allow_quoted_newlines(self, value):
self._properties["allowQuotedNewlines"] = value
@property
def encoding(self):
"""str: The character encoding of the data.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.encoding
"""
return self._properties.get("encoding")
@encoding.setter
def encoding(self, value):
self._properties["encoding"] = value
@property
def preserve_ascii_control_characters(self):
"""bool: Indicates if the embedded ASCII control characters
(the first 32 characters in the ASCII-table, from '\x00' to '\x1F') are preserved.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.preserve_ascii_control_characters
"""
return self._properties.get("preserveAsciiControlCharacters")
@preserve_ascii_control_characters.setter
def preserve_ascii_control_characters(self, value):
self._properties["preserveAsciiControlCharacters"] = value
@property
def field_delimiter(self):
"""str: The separator for fields in a CSV file. Defaults to comma (',').
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.field_delimiter
"""
return self._properties.get("fieldDelimiter")
@field_delimiter.setter
def field_delimiter(self, value):
self._properties["fieldDelimiter"] = value
@property
def quote_character(self):
"""str: The value that is used to quote data sections in a CSV file.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.quote
"""
return self._properties.get("quote")
@quote_character.setter
def quote_character(self, value):
self._properties["quote"] = value
@property
def skip_leading_rows(self):
"""int: The number of rows at the top of a CSV file.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.skip_leading_rows
"""
return _int_or_none(self._properties.get("skipLeadingRows"))
@skip_leading_rows.setter
def skip_leading_rows(self, value):
self._properties["skipLeadingRows"] = str(value)
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]: A dictionary in the format used by the BigQuery API.
"""
return copy.deepcopy(self._properties)
@classmethod
def from_api_repr(cls, resource: dict) -> "CSVOptions":
"""Factory: construct a :class:`~.external_config.CSVOptions` instance
given its API representation.
Args:
resource (Dict[str, Any]):
Definition of a :class:`~.external_config.CSVOptions`
instance in the same representation as is returned from the
API.
Returns:
CSVOptions: Configuration parsed from ``resource``.
"""
config = cls()
config._properties = copy.deepcopy(resource)
return config
class GoogleSheetsOptions(object):
"""Options that describe how to treat Google Sheets as BigQuery tables."""
_SOURCE_FORMAT = "GOOGLE_SHEETS"
_RESOURCE_NAME = "googleSheetsOptions"
def __init__(self):
self._properties = {}
@property
def skip_leading_rows(self):
"""int: The number of rows at the top of a sheet that BigQuery will
skip when reading the data.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.skip_leading_rows
"""
return _int_or_none(self._properties.get("skipLeadingRows"))
@skip_leading_rows.setter
def skip_leading_rows(self, value):
self._properties["skipLeadingRows"] = str(value)
@property
def range(self):
"""str: The range of a sheet that BigQuery will query from.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.range
"""
return _str_or_none(self._properties.get("range"))
@range.setter
def range(self, value):
self._properties["range"] = value
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]: A dictionary in the format used by the BigQuery API.
"""
return copy.deepcopy(self._properties)
@classmethod
def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions":
"""Factory: construct a :class:`~.external_config.GoogleSheetsOptions`
instance given its API representation.
Args:
resource (Dict[str, Any]):
Definition of a :class:`~.external_config.GoogleSheetsOptions`
instance in the same representation as is returned from the
API.
Returns:
GoogleSheetsOptions: Configuration parsed from ``resource``.
"""
config = cls()
config._properties = copy.deepcopy(resource)
return config
_OPTION_CLASSES = (
AvroOptions,
BigtableOptions,
CSVOptions,
GoogleSheetsOptions,
ParquetOptions,
)
OptionsType = Union[
AvroOptions,
BigtableOptions,
CSVOptions,
GoogleSheetsOptions,
ParquetOptions,
]
class HivePartitioningOptions(object):
"""[Beta] Options that configure hive partitioning.
.. note::
**Experimental**. This feature is experimental and might change or
have limited support.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions
"""
def __init__(self) -> None:
self._properties: Dict[str, Any] = {}
@property
def mode(self):
"""Optional[str]: When set, what mode of hive partitioning to use when reading data.
Two modes are supported: "AUTO" and "STRINGS".
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
"""
return self._properties.get("mode")
@mode.setter
def mode(self, value):
self._properties["mode"] = value
@property
def source_uri_prefix(self):
"""Optional[str]: When hive partition detection is requested, a common prefix for
all source URIs is required.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix
"""
return self._properties.get("sourceUriPrefix")
@source_uri_prefix.setter
def source_uri_prefix(self, value):
self._properties["sourceUriPrefix"] = value
@property
def require_partition_filter(self):
"""Optional[bool]: If set to true, queries over the partitioned table require a
partition filter that can be used for partition elimination to be
specified.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
"""
return self._properties.get("requirePartitionFilter")
@require_partition_filter.setter
def require_partition_filter(self, value):
self._properties["requirePartitionFilter"] = value
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]: A dictionary in the format used by the BigQuery API.
"""
return copy.deepcopy(self._properties)
@classmethod
def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions":
"""Factory: construct a :class:`~.external_config.HivePartitioningOptions`
instance given its API representation.
Args:
resource (Dict[str, Any]):
Definition of a :class:`~.external_config.HivePartitioningOptions`
instance in the same representation as is returned from the
API.
Returns:
HivePartitioningOptions: Configuration parsed from ``resource``.
"""
config = cls()
config._properties = copy.deepcopy(resource)
return config
class ExternalConfig(object):
"""Description of an external data source.
Args:
source_format (ExternalSourceFormat):
See :attr:`source_format`.
"""
def __init__(self, source_format) -> None:
self._properties = {"sourceFormat": source_format}
@property
def source_format(self):
""":class:`~.external_config.ExternalSourceFormat`:
Format of external source.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_format
"""
return self._properties["sourceFormat"]
@property
def options(self) -> Optional[OptionsType]:
"""Source-specific options."""
for optcls in _OPTION_CLASSES:
# The code below is too much magic for mypy to handle.
if self.source_format == optcls._SOURCE_FORMAT: # type: ignore
options: OptionsType = optcls() # type: ignore
options._properties = self._properties.setdefault(
optcls._RESOURCE_NAME, {} # type: ignore
)
return options
# No matching source format found.
return None
@property
def autodetect(self):
"""bool: If :data:`True`, try to detect schema and format options
automatically.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.autodetect
"""
return self._properties.get("autodetect")
@autodetect.setter
def autodetect(self, value):
self._properties["autodetect"] = value
@property
def compression(self):
"""str: The compression type of the data source.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.compression
"""
return self._properties.get("compression")
@compression.setter
def compression(self, value):
self._properties["compression"] = value
@property
def decimal_target_types(self) -> Optional[FrozenSet[str]]:
"""Possible SQL data types to which the source decimal values are converted.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types
.. versionadded:: 2.21.0
"""
prop = self._properties.get("decimalTargetTypes")
if prop is not None:
prop = frozenset(prop)
return prop
@decimal_target_types.setter
def decimal_target_types(self, value: Optional[Iterable[str]]):
if value is not None:
self._properties["decimalTargetTypes"] = list(value)
else:
if "decimalTargetTypes" in self._properties:
del self._properties["decimalTargetTypes"]
@property
def hive_partitioning(self):
"""Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
it configures hive partitioning support.
.. note::
**Experimental**. This feature is experimental and might change or
have limited support.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options
"""
prop = self._properties.get("hivePartitioningOptions")
if prop is None:
return None
return HivePartitioningOptions.from_api_repr(prop)
@hive_partitioning.setter
def hive_partitioning(self, value):
prop = value.to_api_repr() if value is not None else None
self._properties["hivePartitioningOptions"] = prop
@property
def reference_file_schema_uri(self):
"""Optional[str]:
When creating an external table, the user can provide a reference file with the
table schema. This is enabled for the following formats:
AVRO, PARQUET, ORC
"""
return self._properties.get("referenceFileSchemaUri")
@reference_file_schema_uri.setter
def reference_file_schema_uri(self, value):
self._properties["referenceFileSchemaUri"] = value
@property
def ignore_unknown_values(self):
"""bool: If :data:`True`, extra values that are not represented in the
table schema are ignored. Defaults to :data:`False`.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.ignore_unknown_values
"""
return self._properties.get("ignoreUnknownValues")
@ignore_unknown_values.setter
def ignore_unknown_values(self, value):
self._properties["ignoreUnknownValues"] = value
@property
def max_bad_records(self):
"""int: The maximum number of bad records that BigQuery can ignore when
reading data.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.max_bad_records
"""
return self._properties.get("maxBadRecords")
@max_bad_records.setter
def max_bad_records(self, value):
self._properties["maxBadRecords"] = value
@property
def source_uris(self):
"""List[str]: URIs that point to your data in Google Cloud.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_uris
"""
return self._properties.get("sourceUris", [])
@source_uris.setter
def source_uris(self, value):
self._properties["sourceUris"] = value
@property
def schema(self):
"""List[:class:`~google.cloud.bigquery.schema.SchemaField`]: The schema
for the data.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema
"""
# TODO: The typehinting for this needs work. Setting this pragma to temporarily
# manage a pytype issue that came up in another PR. See Issue: #2132
prop = self._properties.get("schema", {}) # type: ignore
return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] # type: ignore
@schema.setter
def schema(self, value):
prop = value
if value is not None:
prop = {"fields": [field.to_api_repr() for field in value]}
self._properties["schema"] = prop
@property
def connection_id(self):
"""Optional[str]: [Experimental] ID of a BigQuery Connection API
resource.
.. WARNING::
This feature is experimental. Pre-GA features may have limited
support, and changes to pre-GA features may not be compatible with
other pre-GA versions.
"""
return self._properties.get("connectionId")
@connection_id.setter
def connection_id(self, value):
self._properties["connectionId"] = value
@property
def avro_options(self) -> Optional[AvroOptions]:
"""Additional properties to set if ``sourceFormat`` is set to AVRO.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.avro_options
"""
if self.source_format == ExternalSourceFormat.AVRO:
self._properties.setdefault(AvroOptions._RESOURCE_NAME, {})
resource = self._properties.get(AvroOptions._RESOURCE_NAME)
if resource is None:
return None
options = AvroOptions()
options._properties = resource
return options
@avro_options.setter
def avro_options(self, value):
if self.source_format != ExternalSourceFormat.AVRO:
msg = f"Cannot set Avro options, source format is {self.source_format}"
raise TypeError(msg)
self._properties[AvroOptions._RESOURCE_NAME] = value._properties
@property
def bigtable_options(self) -> Optional[BigtableOptions]:
"""Additional properties to set if ``sourceFormat`` is set to BIGTABLE.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.bigtable_options
"""
if self.source_format == ExternalSourceFormat.BIGTABLE:
self._properties.setdefault(BigtableOptions._RESOURCE_NAME, {})
resource = self._properties.get(BigtableOptions._RESOURCE_NAME)
if resource is None:
return None
options = BigtableOptions()
options._properties = resource
return options
@bigtable_options.setter
def bigtable_options(self, value):
if self.source_format != ExternalSourceFormat.BIGTABLE:
msg = f"Cannot set Bigtable options, source format is {self.source_format}"
raise TypeError(msg)
self._properties[BigtableOptions._RESOURCE_NAME] = value._properties
@property
def csv_options(self) -> Optional[CSVOptions]:
"""Additional properties to set if ``sourceFormat`` is set to CSV.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.csv_options
"""
if self.source_format == ExternalSourceFormat.CSV:
self._properties.setdefault(CSVOptions._RESOURCE_NAME, {})
resource = self._properties.get(CSVOptions._RESOURCE_NAME)
if resource is None:
return None
options = CSVOptions()
options._properties = resource
return options
@csv_options.setter
def csv_options(self, value):
if self.source_format != ExternalSourceFormat.CSV:
msg = f"Cannot set CSV options, source format is {self.source_format}"
raise TypeError(msg)
self._properties[CSVOptions._RESOURCE_NAME] = value._properties
@property
def google_sheets_options(self) -> Optional[GoogleSheetsOptions]:
"""Additional properties to set if ``sourceFormat`` is set to
GOOGLE_SHEETS.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.google_sheets_options
"""
if self.source_format == ExternalSourceFormat.GOOGLE_SHEETS:
self._properties.setdefault(GoogleSheetsOptions._RESOURCE_NAME, {})
resource = self._properties.get(GoogleSheetsOptions._RESOURCE_NAME)
if resource is None:
return None
options = GoogleSheetsOptions()
options._properties = resource
return options
@google_sheets_options.setter
def google_sheets_options(self, value):
if self.source_format != ExternalSourceFormat.GOOGLE_SHEETS:
msg = f"Cannot set Google Sheets options, source format is {self.source_format}"
raise TypeError(msg)
self._properties[GoogleSheetsOptions._RESOURCE_NAME] = value._properties
@property
def parquet_options(self) -> Optional[ParquetOptions]:
"""Additional properties to set if ``sourceFormat`` is set to PARQUET.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options
"""
if self.source_format == ExternalSourceFormat.PARQUET:
self._properties.setdefault(ParquetOptions._RESOURCE_NAME, {})
resource = self._properties.get(ParquetOptions._RESOURCE_NAME)
if resource is None:
return None
options = ParquetOptions()
options._properties = resource
return options
@parquet_options.setter
def parquet_options(self, value):
if self.source_format != ExternalSourceFormat.PARQUET:
msg = f"Cannot set Parquet options, source format is {self.source_format}"
raise TypeError(msg)
self._properties[ParquetOptions._RESOURCE_NAME] = value._properties
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
config = copy.deepcopy(self._properties)
return config
@classmethod
def from_api_repr(cls, resource: dict) -> "ExternalConfig":
"""Factory: construct an :class:`~.external_config.ExternalConfig`
instance given its API representation.
Args:
resource (Dict[str, Any]):
Definition of an :class:`~.external_config.ExternalConfig`
instance in the same representation as is returned from the
API.
Returns:
ExternalConfig: Configuration parsed from ``resource``.
"""
config = cls(resource["sourceFormat"])
config._properties = copy.deepcopy(resource)
return config
class ExternalCatalogDatasetOptions:
"""Options defining open source compatible datasets living in the BigQuery catalog.
Contains metadata of open source database, schema or namespace represented
by the current dataset.
Args:
default_storage_location_uri (Optional[str]): The storage location URI for all
tables in the dataset. Equivalent to hive metastore's database
locationUri. Maximum length of 1024 characters. (str)
parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters
and properties of the open source schema. Maximum size of 2Mib.
"""
def __init__(
self,
default_storage_location_uri: Optional[str] = None,
parameters: Optional[Dict[str, Any]] = None,
):
self._properties: Dict[str, Any] = {}
self.default_storage_location_uri = default_storage_location_uri
self.parameters = parameters
@property
def default_storage_location_uri(self) -> Optional[str]:
"""Optional. The storage location URI for all tables in the dataset.
Equivalent to hive metastore's database locationUri. Maximum length of
1024 characters."""
return self._properties.get("defaultStorageLocationUri")
@default_storage_location_uri.setter
def default_storage_location_uri(self, value: Optional[str]):
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
self._properties["defaultStorageLocationUri"] = value
@property
def parameters(self) -> Optional[Dict[str, Any]]:
"""Optional. A map of key value pairs defining the parameters and
properties of the open source schema. Maximum size of 2Mib."""
return self._properties.get("parameters")
@parameters.setter
def parameters(self, value: Optional[Dict[str, Any]]):
value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
self._properties["parameters"] = value
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
return self._properties
@classmethod
def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions:
"""Factory: constructs an instance of the class (cls)
given its API representation.
Args:
api_repr (Dict[str, Any]):
API representation of the object to be instantiated.
Returns:
An instance of the class initialized with data from 'resource'.
"""
config = cls()
config._properties = api_repr
return config
class ExternalCatalogTableOptions:
"""Metadata about open source compatible table. The fields contained in these
options correspond to hive metastore's table level properties.
Args:
connection_id (Optional[str]): The connection specifying the credentials to be
used to read external storage, such as Azure Blob, Cloud Storage, or
S3. The connection is needed to read the open source table from
BigQuery Engine. The connection_id can have the form `..` or
`projects//locations//connections/`.
parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters
and properties of the open source table. Corresponds with hive meta
store table parameters. Maximum size of 4Mib.
storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information
about the physical storage of this table.
"""
def __init__(
self,
connection_id: Optional[str] = None,
parameters: Union[Dict[str, Any], None] = None,
storage_descriptor: Optional[schema.StorageDescriptor] = None,
):
self._properties: Dict[str, Any] = {}
self.connection_id = connection_id
self.parameters = parameters
self.storage_descriptor = storage_descriptor
@property
def connection_id(self) -> Optional[str]:
"""Optional. The connection specifying the credentials to be
used to read external storage, such as Azure Blob, Cloud Storage, or
S3. The connection is needed to read the open source table from
BigQuery Engine. The connection_id can have the form `..` or
`projects//locations//connections/`.
"""
return self._properties.get("connectionId")
@connection_id.setter
def connection_id(self, value: Optional[str]):
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
self._properties["connectionId"] = value
@property
def parameters(self) -> Union[Dict[str, Any], None]:
"""Optional. A map of key value pairs defining the parameters and
properties of the open source table. Corresponds with hive meta
store table parameters. Maximum size of 4Mib.
"""
return self._properties.get("parameters")
@parameters.setter
def parameters(self, value: Union[Dict[str, Any], None]):
value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
self._properties["parameters"] = value
@property
def storage_descriptor(self) -> Any:
"""Optional. A storage descriptor containing information about the
physical storage of this table."""
prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"])
if prop is not None:
return schema.StorageDescriptor.from_api_repr(prop)
return None
@storage_descriptor.setter
def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]):
value = _helpers._isinstance_or_raise(
value, (schema.StorageDescriptor, dict), none_allowed=True
)
if isinstance(value, schema.StorageDescriptor):
self._properties["storageDescriptor"] = value.to_api_repr()
else:
self._properties["storageDescriptor"] = value
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
Dict[str, Any]:
A dictionary in the format used by the BigQuery API.
"""
return self._properties
@classmethod
def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions:
"""Factory: constructs an instance of the class (cls)
given its API representation.
Args:
api_repr (Dict[str, Any]):
API representation of the object to be instantiated.
Returns:
An instance of the class initialized with data from 'api_repr'.
"""
config = cls()
config._properties = api_repr
return config