# Copyright 2015 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Shared helper functions for BigQuery API classes.""" import base64 import datetime import decimal import json import math import re import os import textwrap import warnings from typing import Any, Optional, Tuple, Type, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes from google.auth import credentials as ga_credentials # type: ignore from google.api_core import client_options as client_options_lib TimeoutType = Union[float, None] _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" _PROJECT_PREFIX_PATTERN = re.compile( r""" (?P\S+\:[^.]+)\.(?P[^.]+)(?:$|\.(?P[^.]+)$) """, re.VERBOSE, ) # BigQuery sends INTERVAL data in "canonical format" # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type _INTERVAL_PATTERN = re.compile( r"(?P-?)(?P\d+)-(?P\d+) " r"(?P-?\d+) " r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) _RANGE_PATTERN = re.compile(r"\[.*, .*\)") BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" """Environment variable defining host for emulator.""" _DEFAULT_HOST = "https://bigquery.googleapis.com" """Default host for JSON API.""" _DEFAULT_HOST_TEMPLATE = "https://bigquery.{UNIVERSE_DOMAIN}" """ Templatized endpoint format. """ _DEFAULT_UNIVERSE = "googleapis.com" """Default universe for the JSON API.""" _UNIVERSE_DOMAIN_ENV = "GOOGLE_CLOUD_UNIVERSE_DOMAIN" """Environment variable for setting universe domain.""" _SUPPORTED_RANGE_ELEMENTS = {"TIMESTAMP", "DATETIME", "DATE"} def _get_client_universe( client_options: Optional[Union[client_options_lib.ClientOptions, dict]] ) -> str: """Retrieves the specified universe setting. Args: client_options: specified client options. Returns: str: resolved universe setting. """ if isinstance(client_options, dict): client_options = client_options_lib.from_dict(client_options) universe = _DEFAULT_UNIVERSE options_universe = getattr(client_options, "universe_domain", None) if ( options_universe and isinstance(options_universe, str) and len(options_universe) > 0 ): universe = options_universe else: env_universe = os.getenv(_UNIVERSE_DOMAIN_ENV) if isinstance(env_universe, str) and len(env_universe) > 0: universe = env_universe return universe def _validate_universe(client_universe: str, credentials: ga_credentials.Credentials): """Validates that client provided universe and universe embedded in credentials match. Args: client_universe (str): The universe domain configured via the client options. credentials (ga_credentials.Credentials): The credentials being used in the client. Raises: ValueError: when client_universe does not match the universe in credentials. """ if hasattr(credentials, "universe_domain"): cred_universe = getattr(credentials, "universe_domain") if isinstance(cred_universe, str): if client_universe != cred_universe: raise ValueError( "The configured universe domain " f"({client_universe}) does not match the universe domain " f"found in the credentials ({cred_universe}). " "If you haven't configured the universe domain explicitly, " f"`{_DEFAULT_UNIVERSE}` is the default." ) def _get_bigquery_host(): return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST) def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" return value is not None or (field is not None and field.mode != "NULLABLE") class CellDataParser: """Converter from BigQuery REST resource to Python value for RowIterator and similar classes. See: "rows" field of https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list and https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults. """ def to_py(self, resource, field): def default_converter(value, field): _warn_unknown_field_type(field) return value converter = getattr( self, f"{field.field_type.lower()}_to_py", default_converter ) if field.mode == "REPEATED": return [converter(item["v"], field) for item in resource] else: return converter(resource, field) def bool_to_py(self, value, field): """Coerce 'value' to a bool, if set or not nullable.""" if _not_null(value, field): # TODO(tswast): Why does _not_null care if the field is NULLABLE or # REQUIRED? Do we actually need such client-side validation? if value is None: raise TypeError(f"got None for required boolean field {field}") return value.lower() in ("t", "true", "1") def boolean_to_py(self, value, field): """Coerce 'value' to a bool, if set or not nullable.""" return self.bool_to_py(value, field) def integer_to_py(self, value, field): """Coerce 'value' to an int, if set or not nullable.""" if _not_null(value, field): return int(value) def int64_to_py(self, value, field): """Coerce 'value' to an int, if set or not nullable.""" return self.integer_to_py(value, field) def interval_to_py( self, value: Optional[str], field ) -> Optional[relativedelta.relativedelta]: """Coerce 'value' to an interval, if set or not nullable.""" if not _not_null(value, field): return None if value is None: raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") parsed = _INTERVAL_PATTERN.match(value) if parsed is None: raise ValueError( textwrap.dedent( f""" Got interval: '{value}' with unexpected format. Expected interval in canonical format of "[sign]Y-M [sign]D [sign]H:M:S[.F]". See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type for more information. """ ), ) calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 years = calendar_sign * int(parsed.group("years")) months = calendar_sign * int(parsed.group("months")) days = int(parsed.group("days")) time_sign = -1 if parsed.group("time_sign") == "-" else 1 hours = time_sign * int(parsed.group("hours")) minutes = time_sign * int(parsed.group("minutes")) seconds = time_sign * int(parsed.group("seconds")) fraction = parsed.group("fraction") microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0 return relativedelta.relativedelta( years=years, months=months, days=days, hours=hours, minutes=minutes, seconds=seconds, microseconds=microseconds, ) def float_to_py(self, value, field): """Coerce 'value' to a float, if set or not nullable.""" if _not_null(value, field): return float(value) def float64_to_py(self, value, field): """Coerce 'value' to a float, if set or not nullable.""" return self.float_to_py(value, field) def numeric_to_py(self, value, field): """Coerce 'value' to a Decimal, if set or not nullable.""" if _not_null(value, field): return decimal.Decimal(value) def bignumeric_to_py(self, value, field): """Coerce 'value' to a Decimal, if set or not nullable.""" return self.numeric_to_py(value, field) def string_to_py(self, value, _): """NOOP string -> string coercion""" return value def geography_to_py(self, value, _): """NOOP string -> string coercion""" return value def bytes_to_py(self, value, field): """Base64-decode value""" if _not_null(value, field): return base64.standard_b64decode(_to_bytes(value)) def timestamp_to_py(self, value, field): """Coerce 'value' to a datetime, if set or not nullable.""" if _not_null(value, field): # value will be a integer in seconds, to microsecond precision, in UTC. return _datetime_from_microseconds(int(value)) def datetime_to_py(self, value, field): """Coerce 'value' to a datetime, if set or not nullable. Args: value (str): The timestamp. field (google.cloud.bigquery.schema.SchemaField): The field corresponding to the value. Returns: Optional[datetime.datetime]: The parsed datetime object from ``value`` if the ``field`` is not null (otherwise it is :data:`None`). """ if _not_null(value, field): if "." in value: # YYYY-MM-DDTHH:MM:SS.ffffff return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU) else: # YYYY-MM-DDTHH:MM:SS return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) else: return None def date_to_py(self, value, field): """Coerce 'value' to a datetime date, if set or not nullable""" if _not_null(value, field): # value will be a string, in YYYY-MM-DD form. return _date_from_iso8601_date(value) def time_to_py(self, value, field): """Coerce 'value' to a datetime date, if set or not nullable""" if _not_null(value, field): if len(value) == 8: # HH:MM:SS fmt = _TIMEONLY_WO_MICROS elif len(value) == 15: # HH:MM:SS.micros fmt = _TIMEONLY_W_MICROS else: raise ValueError( textwrap.dedent( f""" Got {repr(value)} with unknown time format. Expected HH:MM:SS or HH:MM:SS.micros. See https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type for more information. """ ), ) return datetime.datetime.strptime(value, fmt).time() def record_to_py(self, value, field): """Coerce 'value' to a mapping, if set or not nullable.""" if _not_null(value, field): record = {} record_iter = zip(field.fields, value["f"]) for subfield, cell in record_iter: record[subfield.name] = self.to_py(cell["v"], subfield) return record def struct_to_py(self, value, field): """Coerce 'value' to a mapping, if set or not nullable.""" return self.record_to_py(value, field) def json_to_py(self, value, field): """Coerce 'value' to a Pythonic JSON representation.""" if _not_null(value, field): return json.loads(value) else: return None def _range_element_to_py(self, value, field_element_type): """Coerce 'value' to a range element value.""" # Avoid circular imports by importing here. from google.cloud.bigquery import schema if value == "UNBOUNDED": return None if field_element_type.element_type in _SUPPORTED_RANGE_ELEMENTS: return self.to_py( value, schema.SchemaField("placeholder", field_element_type.element_type), ) else: raise ValueError( textwrap.dedent( f""" Got unsupported range element type: {field_element_type.element_type}. Exptected one of {repr(_SUPPORTED_RANGE_ELEMENTS)}. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declare_a_range_type for more information. """ ), ) def range_to_py(self, value, field): """Coerce 'value' to a range, if set or not nullable. Args: value (str): The literal representation of the range. field (google.cloud.bigquery.schema.SchemaField): The field corresponding to the value. Returns: Optional[dict]: The parsed range object from ``value`` if the ``field`` is not null (otherwise it is :data:`None`). """ if _not_null(value, field): if _RANGE_PATTERN.match(value): start, end = value[1:-1].split(", ") start = self._range_element_to_py(start, field.range_element_type) end = self._range_element_to_py(end, field.range_element_type) return {"start": start, "end": end} else: raise ValueError( textwrap.dedent( f""" Got unknown format for range value: {value}. Expected format '[lower_bound, upper_bound)'. See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_with_literal for more information. """ ), ) CELL_DATA_PARSER = CellDataParser() class DataFrameCellDataParser(CellDataParser): """Override of CellDataParser to handle differences in expection of values in DataFrame-like outputs. This is used to turn the output of the REST API into a pyarrow Table, emulating the serialized arrow from the BigQuery Storage Read API. """ def json_to_py(self, value, _): """No-op because DataFrame expects string for JSON output.""" return value DATA_FRAME_CELL_DATA_PARSER = DataFrameCellDataParser() class ScalarQueryParamParser(CellDataParser): """Override of CellDataParser to handle the differences in the response from query params. See: "value" field of https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#QueryParameterValue """ def timestamp_to_py(self, value, field): """Coerce 'value' to a datetime, if set or not nullable. Args: value (str): The timestamp. field (google.cloud.bigquery.schema.SchemaField): The field corresponding to the value. Returns: Optional[datetime.datetime]: The parsed datetime object from ``value`` if the ``field`` is not null (otherwise it is :data:`None`). """ if _not_null(value, field): # Canonical formats for timestamps in BigQuery are flexible. See: # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type # The separator between the date and time can be 'T' or ' '. value = value.replace(" ", "T", 1) # The UTC timezone may be formatted as Z or +00:00. value = value.replace("Z", "") value = value.replace("+00:00", "") if "." in value: # YYYY-MM-DDTHH:MM:SS.ffffff return datetime.datetime.strptime( value, _RFC3339_MICROS_NO_ZULU ).replace(tzinfo=UTC) else: # YYYY-MM-DDTHH:MM:SS return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION).replace( tzinfo=UTC ) else: return None SCALAR_QUERY_PARAM_PARSER = ScalarQueryParamParser() def _field_to_index_mapping(schema): """Create a mapping from schema field name to index of field.""" return {f.name: i for i, f in enumerate(schema)} def _row_tuple_from_json(row, schema): """Convert JSON row data to row with appropriate types. Note: ``row['f']`` and ``schema`` are presumed to be of the same length. Args: row (Dict): A JSON response row to be converted. schema (Sequence[Union[ \ :class:`~google.cloud.bigquery.schema.SchemaField`, \ Mapping[str, Any] \ ]]): Specification of the field types in ``row``. Returns: Tuple: A tuple of data converted to native types. """ from google.cloud.bigquery.schema import _to_schema_fields schema = _to_schema_fields(schema) row_data = [] for field, cell in zip(schema, row["f"]): row_data.append(CELL_DATA_PARSER.to_py(cell["v"], field)) return tuple(row_data) def _rows_from_json(values, schema): """Convert JSON row data to rows with appropriate types. Args: values (Sequence[Dict]): The list of responses (JSON rows) to convert. schema (Sequence[Union[ \ :class:`~google.cloud.bigquery.schema.SchemaField`, \ Mapping[str, Any] \ ]]): The table's schema. If any item is a mapping, its content must be compatible with :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. Returns: List[:class:`~google.cloud.bigquery.Row`] """ from google.cloud.bigquery import Row from google.cloud.bigquery.schema import _to_schema_fields schema = _to_schema_fields(schema) field_to_index = _field_to_index_mapping(schema) return [Row(_row_tuple_from_json(r, schema), field_to_index) for r in values] def _int_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, int): value = str(value) return value def _float_to_json(value) -> Union[None, str, float]: """Coerce 'value' to an JSON-compatible representation.""" if value is None: return None if isinstance(value, str): value = float(value) return str(value) if (math.isnan(value) or math.isinf(value)) else float(value) def _decimal_to_json(value): """Coerce 'value' to a JSON-compatible representation.""" if isinstance(value, decimal.Decimal): value = str(value) return value def _bool_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, bool): value = "true" if value else "false" return value def _bytes_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, bytes): value = base64.standard_b64encode(value).decode("ascii") return value def _json_to_json(value): """Coerce 'value' to a BigQuery REST API representation.""" if value is None: return None return json.dumps(value) def _string_to_json(value): """NOOP string -> string coercion""" return value def _timestamp_to_json_parameter(value): """Coerce 'value' to an JSON-compatible representation. This version returns the string representation used in query parameters. """ if isinstance(value, datetime.datetime): if value.tzinfo not in (None, UTC): # Convert to UTC and remove the time zone info. value = value.replace(tzinfo=None) - value.utcoffset() value = "%s %s+00:00" % (value.date().isoformat(), value.time().isoformat()) return value def _timestamp_to_json_row(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): # For naive datetime objects UTC timezone is assumed, thus we format # those to string directly without conversion. if value.tzinfo is not None: value = value.astimezone(UTC) value = value.strftime(_RFC3339_MICROS) return value def _datetime_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): # For naive datetime objects UTC timezone is assumed, thus we format # those to string directly without conversion. if value.tzinfo is not None: value = value.astimezone(UTC) value = value.strftime(_RFC3339_MICROS_NO_ZULU) return value def _date_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.date): value = value.isoformat() return value def _time_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.time): value = value.isoformat() return value def _range_element_to_json(value, element_type=None): """Coerce 'value' to an JSON-compatible representation.""" if value is None: return None elif isinstance(value, str): if value.upper() in ("UNBOUNDED", "NULL"): return None else: # We do not enforce range element value to be valid to reduce # redundancy with backend. return value elif ( element_type and element_type.element_type.upper() in _SUPPORTED_RANGE_ELEMENTS ): converter = _SCALAR_VALUE_TO_JSON_ROW.get(element_type.element_type.upper()) return converter(value) else: raise ValueError( f"Unsupported RANGE element type {element_type}, or " "element type is empty. Must be DATE, DATETIME, or " "TIMESTAMP" ) def _range_field_to_json(range_element_type, value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, str): # string literal if _RANGE_PATTERN.match(value): start, end = value[1:-1].split(", ") else: raise ValueError(f"RANGE literal {value} has incorrect format") elif isinstance(value, dict): # dictionary start = value.get("start") end = value.get("end") else: raise ValueError( f"Unsupported type of RANGE value {value}, must be " "string or dict" ) start = _range_element_to_json(start, range_element_type) end = _range_element_to_json(end, range_element_type) return {"start": start, "end": end} # Converters used for scalar values marshalled to the BigQuery API, such as in # query parameters or the tabledata.insert API. _SCALAR_VALUE_TO_JSON_ROW = { "INTEGER": _int_to_json, "INT64": _int_to_json, "FLOAT": _float_to_json, "FLOAT64": _float_to_json, "NUMERIC": _decimal_to_json, "BIGNUMERIC": _decimal_to_json, "BOOLEAN": _bool_to_json, "BOOL": _bool_to_json, "BYTES": _bytes_to_json, "TIMESTAMP": _timestamp_to_json_row, "DATETIME": _datetime_to_json, "DATE": _date_to_json, "TIME": _time_to_json, "JSON": _json_to_json, "STRING": _string_to_json, # Make sure DECIMAL and BIGDECIMAL are handled, even though # requests for them should be converted to NUMERIC. Better safe # than sorry. "DECIMAL": _decimal_to_json, "BIGDECIMAL": _decimal_to_json, } # Converters used for scalar values marshalled as query parameters. _SCALAR_VALUE_TO_JSON_PARAM = _SCALAR_VALUE_TO_JSON_ROW.copy() _SCALAR_VALUE_TO_JSON_PARAM["TIMESTAMP"] = _timestamp_to_json_parameter def _warn_unknown_field_type(field): warnings.warn( "Unknown type '{}' for field '{}'. Behavior reading and writing this type is not officially supported and may change in the future.".format( field.field_type, field.name ), FutureWarning, ) def _scalar_field_to_json(field, row_value): """Maps a field and value to a JSON-safe value. Args: field (google.cloud.bigquery.schema.SchemaField): The SchemaField to use for type conversion and field name. row_value (Any): Value to be converted, based on the field's type. Returns: Any: A JSON-serializable object. """ def default_converter(value): _warn_unknown_field_type(field) return value converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type, default_converter) return converter(row_value) def _repeated_field_to_json(field, row_value): """Convert a repeated/array field to its JSON representation. Args: field (google.cloud.bigquery.schema.SchemaField): The SchemaField to use for type conversion and field name. The field mode must equal ``REPEATED``. row_value (Sequence[Any]): A sequence of values to convert to JSON-serializable values. Returns: List[Any]: A list of JSON-serializable objects. """ values = [] for item in row_value: values.append(_single_field_to_json(field, item)) return values def _record_field_to_json(fields, row_value): """Convert a record/struct field to its JSON representation. Args: fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The :class:`~google.cloud.bigquery.schema.SchemaField`s of the record's subfields to use for type conversion and field names. row_value (Union[Tuple[Any], Mapping[str, Any]): A tuple or dictionary to convert to JSON-serializable values. Returns: Mapping[str, Any]: A JSON-serializable dictionary. """ isdict = isinstance(row_value, dict) # If row is passed as a tuple, make the length sanity check to avoid either # uninformative index errors a few lines below or silently omitting some of # the values from the result (we cannot know exactly which fields are missing # or redundant, since we don't have their names). if not isdict and len(row_value) != len(fields): msg = "The number of row fields ({}) does not match schema length ({}).".format( len(row_value), len(fields) ) raise ValueError(msg) record = {} if isdict: processed_fields = set() for subindex, subfield in enumerate(fields): subname = subfield.name subvalue = row_value.get(subname) if isdict else row_value[subindex] # None values are unconditionally omitted if subvalue is not None: record[subname] = _field_to_json(subfield, subvalue) if isdict: processed_fields.add(subname) # Unknown fields should not be silently dropped, include them. Since there # is no schema information available for them, include them as strings # to make them JSON-serializable. if isdict: not_processed = set(row_value.keys()) - processed_fields for field_name in not_processed: value = row_value[field_name] if value is not None: record[field_name] = str(value) return record def _single_field_to_json(field, row_value): """Convert a single field into JSON-serializable values. Ignores mode so that this can function for ARRAY / REPEATING fields without requiring a deepcopy of the field. See: https://github.com/googleapis/python-bigquery/issues/6 Args: field (google.cloud.bigquery.schema.SchemaField): The SchemaField to use for type conversion and field name. row_value (Any): Scalar or Struct to be inserted. The type is inferred from the SchemaField's field_type. Returns: Any: A JSON-serializable object. """ if row_value is None: return None if field.field_type == "RECORD": return _record_field_to_json(field.fields, row_value) if field.field_type == "RANGE": return _range_field_to_json(field.range_element_type, row_value) return _scalar_field_to_json(field, row_value) def _field_to_json(field, row_value): """Convert a field into JSON-serializable values. Args: field (google.cloud.bigquery.schema.SchemaField): The SchemaField to use for type conversion and field name. row_value (Union[Sequence[List], Any]): Row data to be inserted. If the SchemaField's mode is REPEATED, assume this is a list. If not, the type is inferred from the SchemaField's field_type. Returns: Any: A JSON-serializable object. """ if row_value is None: return None if field.mode == "REPEATED": return _repeated_field_to_json(field, row_value) return _single_field_to_json(field, row_value) def _snake_to_camel_case(value): """Convert snake case string to camel case.""" words = value.split("_") return words[0] + "".join(map(str.capitalize, words[1:])) def _get_sub_prop(container, keys, default=None): """Get a nested value from a dictionary. This method works like ``dict.get(key)``, but for nested values. Args: container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): A sequence of keys to attempt to get the value for. If ``keys`` is a string, it is treated as sequence containing a single string key. Each item in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. default (Optional[object]): Value to returned if any of the keys are not found. Defaults to ``None``. Examples: Get a top-level value (equivalent to ``container.get('key')``). >>> _get_sub_prop({'key': 'value'}, ['key']) 'value' Get a top-level value, providing a default (equivalent to ``container.get('key', default='default')``). >>> _get_sub_prop({'nothere': 123}, ['key'], default='not found') 'not found' Get a nested value. >>> _get_sub_prop({'key': {'subkey': 'value'}}, ['key', 'subkey']) 'value' Returns: object: The value if present or the default. """ if isinstance(keys, str): keys = [keys] sub_val = container for key in keys: if key not in sub_val: return default sub_val = sub_val[key] return sub_val def _set_sub_prop(container, keys, value): """Set a nested value in a dictionary. Args: container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): A sequence of keys to attempt to set the value for. If ``keys`` is a string, it is treated as sequence containing a single string key. Each item in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. value (object): Value to set within the container. Examples: Set a top-level value (equivalent to ``container['key'] = 'value'``). >>> container = {} >>> _set_sub_prop(container, ['key'], 'value') >>> container {'key': 'value'} Set a nested value. >>> container = {} >>> _set_sub_prop(container, ['key', 'subkey'], 'value') >>> container {'key': {'subkey': 'value'}} Replace a nested value. >>> container = {'key': {'subkey': 'prev'}} >>> _set_sub_prop(container, ['key', 'subkey'], 'new') >>> container {'key': {'subkey': 'new'}} """ if isinstance(keys, str): keys = [keys] sub_val = container for key in keys[:-1]: if key not in sub_val: sub_val[key] = {} sub_val = sub_val[key] sub_val[keys[-1]] = value def _del_sub_prop(container, keys): """Remove a nested key fro a dictionary. Args: container (Dict): A dictionary which may contain other dictionaries as values. keys (Iterable): A sequence of keys to attempt to clear the value for. Each item in the sequence represents a deeper nesting. The first key is for the top level. If there is a dictionary there, the second key attempts to get the value within that, and so on. Examples: Remove a top-level value (equivalent to ``del container['key']``). >>> container = {'key': 'value'} >>> _del_sub_prop(container, ['key']) >>> container {} Remove a nested value. >>> container = {'key': {'subkey': 'value'}} >>> _del_sub_prop(container, ['key', 'subkey']) >>> container {'key': {}} """ sub_val = container for key in keys[:-1]: if key not in sub_val: sub_val[key] = {} sub_val = sub_val[key] if keys[-1] in sub_val: del sub_val[keys[-1]] def _int_or_none(value): """Helper: deserialize int value from JSON string.""" if isinstance(value, int): return value if value is not None: return int(value) def _str_or_none(value): """Helper: serialize value to JSON string.""" if value is not None: return str(value) def _split_id(full_id): """Helper: split full_id into composite parts. Args: full_id (str): Fully-qualified ID in standard SQL format. Returns: List[str]: ID's parts separated into components. """ with_prefix = _PROJECT_PREFIX_PATTERN.match(full_id) if with_prefix is None: parts = full_id.split(".") else: parts = with_prefix.groups() parts = [part for part in parts if part] return parts def _parse_3_part_id(full_id, default_project=None, property_name="table_id"): output_project_id = default_project output_dataset_id = None output_resource_id = None parts = _split_id(full_id) if len(parts) != 2 and len(parts) != 3: raise ValueError( "{property_name} must be a fully-qualified ID in " 'standard SQL format, e.g., "project.dataset.{property_name}", ' "got {}".format(full_id, property_name=property_name) ) if len(parts) == 2 and not default_project: raise ValueError( "When default_project is not set, {property_name} must be a " "fully-qualified ID in standard SQL format, " 'e.g., "project.dataset_id.{property_name}", got {}'.format( full_id, property_name=property_name ) ) if len(parts) == 2: output_dataset_id, output_resource_id = parts else: output_project_id, output_dataset_id, output_resource_id = parts return output_project_id, output_dataset_id, output_resource_id def _build_resource_from_properties(obj, filter_fields): """Build a resource based on a ``_properties`` dictionary, filtered by ``filter_fields``, which follow the name of the Python object. """ partial = {} for filter_field in filter_fields: api_field = _get_sub_prop(obj._PROPERTY_TO_API_FIELD, filter_field) if api_field is None and filter_field not in obj._properties: raise ValueError("No property %s" % filter_field) elif api_field is not None: _set_sub_prop(partial, api_field, _get_sub_prop(obj._properties, api_field)) else: # allows properties that are not defined in the library # and properties that have the same name as API resource key partial[filter_field] = obj._properties[filter_field] return partial def _verify_job_config_type(job_config, expected_type, param_name="job_config"): if not isinstance(job_config, expected_type): msg = ( "Expected an instance of {expected_type} class for the {param_name} parameter, " "but received {param_name} = {job_config}" ) raise TypeError( msg.format( expected_type=expected_type.__name__, param_name=param_name, job_config=job_config, ) ) def _isinstance_or_raise( value: Any, dtype: Union[Type, Tuple[Type, ...]], none_allowed: Optional[bool] = False, ) -> Any: """Determine whether a value type matches a given datatype or None. Args: value (Any): Value to be checked. dtype (type): Expected data type or tuple of data types. none_allowed Optional(bool): whether value is allowed to be None. Default is False. Returns: Any: Returns the input value if the type check is successful. Raises: TypeError: If the input value's type does not match the expected data type(s). """ if none_allowed and value is None: return value if isinstance(value, dtype): return value or_none = "" if none_allowed: or_none = " (or None)" msg = f"Pass {value} as a '{dtype}'{or_none}. Got {type(value)}." raise TypeError(msg)