structure saas with tools
This commit is contained in:
@@ -0,0 +1,249 @@
|
||||
# Copyright 2015 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Google BigQuery API wrapper.
|
||||
|
||||
The main concepts with this API are:
|
||||
|
||||
- :class:`~google.cloud.bigquery.client.Client` manages connections to the
|
||||
BigQuery API. Use the client methods to run jobs (such as a
|
||||
:class:`~google.cloud.bigquery.job.QueryJob` via
|
||||
:meth:`~google.cloud.bigquery.client.Client.query`) and manage resources.
|
||||
|
||||
- :class:`~google.cloud.bigquery.dataset.Dataset` represents a
|
||||
collection of tables.
|
||||
|
||||
- :class:`~google.cloud.bigquery.table.Table` represents a single "relation".
|
||||
"""
|
||||
|
||||
import warnings
|
||||
|
||||
from google.cloud.bigquery import version as bigquery_version
|
||||
|
||||
__version__ = bigquery_version.__version__
|
||||
|
||||
from google.cloud.bigquery.client import Client
|
||||
from google.cloud.bigquery.dataset import AccessEntry
|
||||
from google.cloud.bigquery.dataset import Dataset
|
||||
from google.cloud.bigquery.dataset import DatasetReference
|
||||
from google.cloud.bigquery import enums
|
||||
from google.cloud.bigquery.enums import AutoRowIDs
|
||||
from google.cloud.bigquery.enums import DecimalTargetType
|
||||
from google.cloud.bigquery.enums import KeyResultStatementKind
|
||||
from google.cloud.bigquery.enums import SqlTypeNames
|
||||
from google.cloud.bigquery.enums import StandardSqlTypeNames
|
||||
from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
|
||||
from google.cloud.bigquery.exceptions import LegacyPandasError
|
||||
from google.cloud.bigquery.exceptions import LegacyPyarrowError
|
||||
from google.cloud.bigquery.external_config import ExternalConfig
|
||||
from google.cloud.bigquery.external_config import BigtableOptions
|
||||
from google.cloud.bigquery.external_config import BigtableColumnFamily
|
||||
from google.cloud.bigquery.external_config import BigtableColumn
|
||||
from google.cloud.bigquery.external_config import CSVOptions
|
||||
from google.cloud.bigquery.external_config import GoogleSheetsOptions
|
||||
from google.cloud.bigquery.external_config import ExternalSourceFormat
|
||||
from google.cloud.bigquery.external_config import HivePartitioningOptions
|
||||
from google.cloud.bigquery.format_options import AvroOptions
|
||||
from google.cloud.bigquery.format_options import ParquetOptions
|
||||
from google.cloud.bigquery.job.base import SessionInfo
|
||||
from google.cloud.bigquery.job import Compression
|
||||
from google.cloud.bigquery.job import CopyJob
|
||||
from google.cloud.bigquery.job import CopyJobConfig
|
||||
from google.cloud.bigquery.job import CreateDisposition
|
||||
from google.cloud.bigquery.job import DestinationFormat
|
||||
from google.cloud.bigquery.job import DmlStats
|
||||
from google.cloud.bigquery.job import Encoding
|
||||
from google.cloud.bigquery.job import ExtractJob
|
||||
from google.cloud.bigquery.job import ExtractJobConfig
|
||||
from google.cloud.bigquery.job import LoadJob
|
||||
from google.cloud.bigquery.job import LoadJobConfig
|
||||
from google.cloud.bigquery.job import OperationType
|
||||
from google.cloud.bigquery.job import QueryJob
|
||||
from google.cloud.bigquery.job import QueryJobConfig
|
||||
from google.cloud.bigquery.job import QueryPriority
|
||||
from google.cloud.bigquery.job import SchemaUpdateOption
|
||||
from google.cloud.bigquery.job import ScriptOptions
|
||||
from google.cloud.bigquery.job import SourceFormat
|
||||
from google.cloud.bigquery.job import UnknownJob
|
||||
from google.cloud.bigquery.job import TransactionInfo
|
||||
from google.cloud.bigquery.job import WriteDisposition
|
||||
from google.cloud.bigquery.model import Model
|
||||
from google.cloud.bigquery.model import ModelReference
|
||||
from google.cloud.bigquery.query import ArrayQueryParameter
|
||||
from google.cloud.bigquery.query import ArrayQueryParameterType
|
||||
from google.cloud.bigquery.query import ConnectionProperty
|
||||
from google.cloud.bigquery.query import ScalarQueryParameter
|
||||
from google.cloud.bigquery.query import ScalarQueryParameterType
|
||||
from google.cloud.bigquery.query import RangeQueryParameter
|
||||
from google.cloud.bigquery.query import RangeQueryParameterType
|
||||
from google.cloud.bigquery.query import SqlParameterScalarTypes
|
||||
from google.cloud.bigquery.query import StructQueryParameter
|
||||
from google.cloud.bigquery.query import StructQueryParameterType
|
||||
from google.cloud.bigquery.query import UDFResource
|
||||
from google.cloud.bigquery.retry import DEFAULT_RETRY
|
||||
from google.cloud.bigquery.routine import DeterminismLevel
|
||||
from google.cloud.bigquery.routine import Routine
|
||||
from google.cloud.bigquery.routine import RoutineArgument
|
||||
from google.cloud.bigquery.routine import RoutineReference
|
||||
from google.cloud.bigquery.routine import RoutineType
|
||||
from google.cloud.bigquery.routine import RemoteFunctionOptions
|
||||
from google.cloud.bigquery.schema import PolicyTagList
|
||||
from google.cloud.bigquery.schema import SchemaField
|
||||
from google.cloud.bigquery.schema import FieldElementType
|
||||
from google.cloud.bigquery.standard_sql import StandardSqlDataType
|
||||
from google.cloud.bigquery.standard_sql import StandardSqlField
|
||||
from google.cloud.bigquery.standard_sql import StandardSqlStructType
|
||||
from google.cloud.bigquery.standard_sql import StandardSqlTableType
|
||||
from google.cloud.bigquery.table import PartitionRange
|
||||
from google.cloud.bigquery.table import RangePartitioning
|
||||
from google.cloud.bigquery.table import Row
|
||||
from google.cloud.bigquery.table import SnapshotDefinition
|
||||
from google.cloud.bigquery.table import CloneDefinition
|
||||
from google.cloud.bigquery.table import Table
|
||||
from google.cloud.bigquery.table import TableReference
|
||||
from google.cloud.bigquery.table import TimePartitioningType
|
||||
from google.cloud.bigquery.table import TimePartitioning
|
||||
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
|
||||
from google.cloud.bigquery import _versions_helpers
|
||||
|
||||
try:
|
||||
import bigquery_magics # type: ignore
|
||||
except ImportError:
|
||||
bigquery_magics = None
|
||||
|
||||
sys_major, sys_minor, sys_micro = _versions_helpers.extract_runtime_version()
|
||||
|
||||
if sys_major == 3 and sys_minor in (7, 8):
|
||||
warnings.warn(
|
||||
"The python-bigquery library no longer supports Python 3.7 "
|
||||
"and Python 3.8. "
|
||||
f"Your Python version is {sys_major}.{sys_minor}.{sys_micro}. We "
|
||||
"recommend that you update soon to ensure ongoing support. For "
|
||||
"more details, see: [Google Cloud Client Libraries Supported Python Versions policy](https://cloud.google.com/python/docs/supported-python-versions)",
|
||||
FutureWarning,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"__version__",
|
||||
"Client",
|
||||
# Queries
|
||||
"ConnectionProperty",
|
||||
"QueryJob",
|
||||
"QueryJobConfig",
|
||||
"ArrayQueryParameter",
|
||||
"ScalarQueryParameter",
|
||||
"StructQueryParameter",
|
||||
"RangeQueryParameter",
|
||||
"ArrayQueryParameterType",
|
||||
"ScalarQueryParameterType",
|
||||
"SqlParameterScalarTypes",
|
||||
"StructQueryParameterType",
|
||||
"RangeQueryParameterType",
|
||||
# Datasets
|
||||
"Dataset",
|
||||
"DatasetReference",
|
||||
"AccessEntry",
|
||||
# Tables
|
||||
"Table",
|
||||
"TableReference",
|
||||
"PartitionRange",
|
||||
"RangePartitioning",
|
||||
"Row",
|
||||
"SnapshotDefinition",
|
||||
"CloneDefinition",
|
||||
"TimePartitioning",
|
||||
"TimePartitioningType",
|
||||
# Jobs
|
||||
"CopyJob",
|
||||
"CopyJobConfig",
|
||||
"ExtractJob",
|
||||
"ExtractJobConfig",
|
||||
"LoadJob",
|
||||
"LoadJobConfig",
|
||||
"SessionInfo",
|
||||
"UnknownJob",
|
||||
# Models
|
||||
"Model",
|
||||
"ModelReference",
|
||||
# Routines
|
||||
"Routine",
|
||||
"RoutineArgument",
|
||||
"RoutineReference",
|
||||
"RemoteFunctionOptions",
|
||||
# Shared helpers
|
||||
"SchemaField",
|
||||
"FieldElementType",
|
||||
"PolicyTagList",
|
||||
"UDFResource",
|
||||
"ExternalConfig",
|
||||
"AvroOptions",
|
||||
"BigtableOptions",
|
||||
"BigtableColumnFamily",
|
||||
"BigtableColumn",
|
||||
"DmlStats",
|
||||
"CSVOptions",
|
||||
"GoogleSheetsOptions",
|
||||
"HivePartitioningOptions",
|
||||
"ParquetOptions",
|
||||
"ScriptOptions",
|
||||
"TransactionInfo",
|
||||
"DEFAULT_RETRY",
|
||||
# Standard SQL types
|
||||
"StandardSqlDataType",
|
||||
"StandardSqlField",
|
||||
"StandardSqlStructType",
|
||||
"StandardSqlTableType",
|
||||
# Enum Constants
|
||||
"enums",
|
||||
"AutoRowIDs",
|
||||
"Compression",
|
||||
"CreateDisposition",
|
||||
"DecimalTargetType",
|
||||
"DestinationFormat",
|
||||
"DeterminismLevel",
|
||||
"ExternalSourceFormat",
|
||||
"Encoding",
|
||||
"KeyResultStatementKind",
|
||||
"OperationType",
|
||||
"QueryPriority",
|
||||
"RoutineType",
|
||||
"SchemaUpdateOption",
|
||||
"SourceFormat",
|
||||
"SqlTypeNames",
|
||||
"StandardSqlTypeNames",
|
||||
"WriteDisposition",
|
||||
# EncryptionConfiguration
|
||||
"EncryptionConfiguration",
|
||||
# Custom exceptions
|
||||
"LegacyBigQueryStorageError",
|
||||
"LegacyPyarrowError",
|
||||
"LegacyPandasError",
|
||||
]
|
||||
|
||||
|
||||
def load_ipython_extension(ipython):
|
||||
"""Called by IPython when this module is loaded as an IPython extension."""
|
||||
warnings.warn(
|
||||
"%load_ext google.cloud.bigquery is deprecated. Install bigquery-magics package and use `%load_ext bigquery_magics`, instead.",
|
||||
category=FutureWarning,
|
||||
)
|
||||
|
||||
if bigquery_magics is not None:
|
||||
bigquery_magics.load_ipython_extension(ipython)
|
||||
else:
|
||||
from google.cloud.bigquery.magics.magics import _cell_magic
|
||||
|
||||
ipython.register_magic_function(
|
||||
_cell_magic, magic_kind="cell", magic_name="bigquery"
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1102
.venv/lib/python3.10/site-packages/google/cloud/bigquery/_helpers.py
Normal file
1102
.venv/lib/python3.10/site-packages/google/cloud/bigquery/_helpers.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,47 @@
|
||||
# Copyright 2015 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Create / interact with Google BigQuery connections."""
|
||||
|
||||
from google.cloud import _http # type: ignore # pytype: disable=import-error
|
||||
from google.cloud.bigquery import __version__
|
||||
|
||||
|
||||
class Connection(_http.JSONConnection):
|
||||
"""A connection to Google BigQuery via the JSON REST API.
|
||||
|
||||
Args:
|
||||
client (google.cloud.bigquery.client.Client): The client that owns the current connection.
|
||||
|
||||
client_info (Optional[google.api_core.client_info.ClientInfo]): Instance used to generate user agent.
|
||||
|
||||
api_endpoint (str): The api_endpoint to use. If None, the library will decide what endpoint to use.
|
||||
"""
|
||||
|
||||
DEFAULT_API_ENDPOINT = "https://bigquery.googleapis.com"
|
||||
DEFAULT_API_MTLS_ENDPOINT = "https://bigquery.mtls.googleapis.com"
|
||||
|
||||
def __init__(self, client, client_info=None, api_endpoint=None):
|
||||
super(Connection, self).__init__(client, client_info)
|
||||
self.API_BASE_URL = api_endpoint or self.DEFAULT_API_ENDPOINT
|
||||
self.API_BASE_MTLS_URL = self.DEFAULT_API_MTLS_ENDPOINT
|
||||
self.ALLOW_AUTO_SWITCH_TO_MTLS_URL = api_endpoint is None
|
||||
self._client_info.gapic_version = __version__
|
||||
self._client_info.client_library_version = __version__
|
||||
|
||||
API_VERSION = "v2" # type: ignore
|
||||
"""The version of the API, used in building the API call's URL."""
|
||||
|
||||
API_URL_TEMPLATE = "{api_base_url}/bigquery/{api_version}{path}" # type: ignore
|
||||
"""A template for the URL of a particular API call."""
|
||||
@@ -0,0 +1,600 @@
|
||||
# Copyright 2021 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Helpers for interacting with the job REST APIs from the client.
|
||||
|
||||
For queries, there are three cases to consider:
|
||||
|
||||
1. jobs.insert: This always returns a job resource.
|
||||
2. jobs.query, jobCreationMode=JOB_CREATION_REQUIRED:
|
||||
This sometimes can return the results inline, but always includes a job ID.
|
||||
3. jobs.query, jobCreationMode=JOB_CREATION_OPTIONAL:
|
||||
This sometimes doesn't create a job at all, instead returning the results.
|
||||
For better debugging, an auto-generated query ID is included in the
|
||||
response.
|
||||
|
||||
Client.query() calls either (1) or (2), depending on what the user provides
|
||||
for the api_method parameter. query() always returns a QueryJob object, which
|
||||
can retry the query when the query job fails for a retriable reason.
|
||||
|
||||
Client.query_and_wait() calls (3). This returns a RowIterator that may wrap
|
||||
local results from the response or may wrap a query job containing multiple
|
||||
pages of results. Even though query_and_wait() waits for the job to complete,
|
||||
we still need a separate job_retry object because there are different
|
||||
predicates where it is safe to generate a new query ID.
|
||||
"""
|
||||
|
||||
import copy
|
||||
import functools
|
||||
import os
|
||||
import uuid
|
||||
from typing import Any, Dict, Optional, TYPE_CHECKING, Union
|
||||
|
||||
import google.api_core.exceptions as core_exceptions
|
||||
from google.api_core import retry as retries
|
||||
|
||||
from google.cloud.bigquery import job
|
||||
import google.cloud.bigquery.query
|
||||
from google.cloud.bigquery import table
|
||||
import google.cloud.bigquery.retry
|
||||
from google.cloud.bigquery.retry import POLLING_DEFAULT_VALUE
|
||||
|
||||
# Avoid circular imports
|
||||
if TYPE_CHECKING: # pragma: NO COVER
|
||||
from google.cloud.bigquery.client import Client
|
||||
|
||||
|
||||
# The purpose of _TIMEOUT_BUFFER_MILLIS is to allow the server-side timeout to
|
||||
# happen before the client-side timeout. This is not strictly necessary, as the
|
||||
# client retries client-side timeouts, but the hope by making the server-side
|
||||
# timeout slightly shorter is that it can save the server from some unncessary
|
||||
# processing time.
|
||||
#
|
||||
# 250 milliseconds is chosen arbitrarily, though should be about the right
|
||||
# order of magnitude for network latency and switching delays. It is about the
|
||||
# amount of time for light to circumnavigate the world twice.
|
||||
_TIMEOUT_BUFFER_MILLIS = 250
|
||||
|
||||
|
||||
def make_job_id(job_id: Optional[str] = None, prefix: Optional[str] = None) -> str:
|
||||
"""Construct an ID for a new job.
|
||||
|
||||
Args:
|
||||
job_id: the user-provided job ID.
|
||||
prefix: the user-provided prefix for a job ID.
|
||||
|
||||
Returns:
|
||||
str: A job ID
|
||||
"""
|
||||
if job_id is not None:
|
||||
return job_id
|
||||
elif prefix is not None:
|
||||
return str(prefix) + str(uuid.uuid4())
|
||||
else:
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
def job_config_with_defaults(
|
||||
job_config: Optional[job.QueryJobConfig],
|
||||
default_job_config: Optional[job.QueryJobConfig],
|
||||
) -> Optional[job.QueryJobConfig]:
|
||||
"""Create a copy of `job_config`, replacing unset values with those from
|
||||
`default_job_config`.
|
||||
"""
|
||||
if job_config is None:
|
||||
return default_job_config
|
||||
|
||||
if default_job_config is None:
|
||||
return job_config
|
||||
|
||||
# Both job_config and default_job_config are not None, so make a copy of
|
||||
# job_config merged with default_job_config. Anything already explicitly
|
||||
# set on job_config should not be replaced.
|
||||
return job_config._fill_from_default(default_job_config)
|
||||
|
||||
|
||||
def query_jobs_insert(
|
||||
client: "Client",
|
||||
query: str,
|
||||
job_config: Optional[job.QueryJobConfig],
|
||||
job_id: Optional[str],
|
||||
job_id_prefix: Optional[str],
|
||||
location: Optional[str],
|
||||
project: str,
|
||||
retry: Optional[retries.Retry],
|
||||
timeout: Optional[float],
|
||||
job_retry: Optional[retries.Retry],
|
||||
) -> job.QueryJob:
|
||||
"""Initiate a query using jobs.insert.
|
||||
|
||||
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert
|
||||
"""
|
||||
job_id_given = job_id is not None
|
||||
job_id_save = job_id
|
||||
job_config_save = job_config
|
||||
|
||||
def do_query():
|
||||
# Make a copy now, so that original doesn't get changed by the process
|
||||
# below and to facilitate retry
|
||||
job_config = copy.deepcopy(job_config_save)
|
||||
|
||||
job_id = make_job_id(job_id_save, job_id_prefix)
|
||||
job_ref = job._JobReference(job_id, project=project, location=location)
|
||||
query_job = job.QueryJob(job_ref, query, client=client, job_config=job_config)
|
||||
|
||||
try:
|
||||
query_job._begin(retry=retry, timeout=timeout)
|
||||
except core_exceptions.Conflict as create_exc:
|
||||
# The thought is if someone is providing their own job IDs and they get
|
||||
# their job ID generation wrong, this could end up returning results for
|
||||
# the wrong query. We thus only try to recover if job ID was not given.
|
||||
if job_id_given:
|
||||
raise create_exc
|
||||
|
||||
try:
|
||||
# Sometimes we get a 404 after a Conflict. In this case, we
|
||||
# have pretty high confidence that by retrying the 404, we'll
|
||||
# (hopefully) eventually recover the job.
|
||||
# https://github.com/googleapis/python-bigquery/issues/2134
|
||||
#
|
||||
# Allow users who want to completely disable retries to
|
||||
# continue to do so by setting retry to None.
|
||||
get_job_retry = retry
|
||||
if retry is not None:
|
||||
# TODO(tswast): Amend the user's retry object with allowing
|
||||
# 404 to retry when there's a public way to do so.
|
||||
# https://github.com/googleapis/python-api-core/issues/796
|
||||
get_job_retry = (
|
||||
google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY
|
||||
)
|
||||
|
||||
query_job = client.get_job(
|
||||
job_id,
|
||||
project=project,
|
||||
location=location,
|
||||
retry=get_job_retry,
|
||||
timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
|
||||
)
|
||||
except core_exceptions.GoogleAPIError: # (includes RetryError)
|
||||
raise
|
||||
else:
|
||||
return query_job
|
||||
else:
|
||||
return query_job
|
||||
|
||||
# Allow users who want to completely disable retries to
|
||||
# continue to do so by setting job_retry to None.
|
||||
if job_retry is not None:
|
||||
do_query = google.cloud.bigquery.retry._DEFAULT_QUERY_JOB_INSERT_RETRY(do_query)
|
||||
|
||||
future = do_query()
|
||||
|
||||
# The future might be in a failed state now, but if it's
|
||||
# unrecoverable, we'll find out when we ask for it's result, at which
|
||||
# point, we may retry.
|
||||
if not job_id_given:
|
||||
future._retry_do_query = do_query # in case we have to retry later
|
||||
future._job_retry = job_retry
|
||||
|
||||
return future
|
||||
|
||||
|
||||
def _validate_job_config(request_body: Dict[str, Any], invalid_key: str):
|
||||
"""Catch common mistakes, such as passing in a *JobConfig object of the
|
||||
wrong type.
|
||||
"""
|
||||
if invalid_key in request_body:
|
||||
raise ValueError(f"got unexpected key {repr(invalid_key)} in job_config")
|
||||
|
||||
|
||||
def _to_query_request(
|
||||
job_config: Optional[job.QueryJobConfig] = None,
|
||||
*,
|
||||
query: str,
|
||||
location: Optional[str] = None,
|
||||
timeout: Optional[float] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""Transform from Job resource to QueryRequest resource.
|
||||
|
||||
Most of the keys in job.configuration.query are in common with
|
||||
QueryRequest. If any configuration property is set that is not available in
|
||||
jobs.query, it will result in a server-side error.
|
||||
"""
|
||||
request_body = copy.copy(job_config.to_api_repr()) if job_config else {}
|
||||
|
||||
_validate_job_config(request_body, job.CopyJob._JOB_TYPE)
|
||||
_validate_job_config(request_body, job.ExtractJob._JOB_TYPE)
|
||||
_validate_job_config(request_body, job.LoadJob._JOB_TYPE)
|
||||
|
||||
# Move query.* properties to top-level.
|
||||
query_config_resource = request_body.pop("query", {})
|
||||
request_body.update(query_config_resource)
|
||||
|
||||
# Default to standard SQL.
|
||||
request_body.setdefault("useLegacySql", False)
|
||||
|
||||
# Since jobs.query can return results, ensure we use the lossless timestamp
|
||||
# format. See: https://github.com/googleapis/python-bigquery/issues/395
|
||||
request_body.setdefault("formatOptions", {})
|
||||
request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore
|
||||
|
||||
if timeout is not None:
|
||||
# Subtract a buffer for context switching, network latency, etc.
|
||||
request_body["timeoutMs"] = max(0, int(1000 * timeout) - _TIMEOUT_BUFFER_MILLIS)
|
||||
|
||||
if location is not None:
|
||||
request_body["location"] = location
|
||||
|
||||
request_body["query"] = query
|
||||
|
||||
return request_body
|
||||
|
||||
|
||||
def _to_query_job(
|
||||
client: "Client",
|
||||
query: str,
|
||||
request_config: Optional[job.QueryJobConfig],
|
||||
query_response: Dict[str, Any],
|
||||
) -> job.QueryJob:
|
||||
job_ref_resource = query_response["jobReference"]
|
||||
job_ref = job._JobReference._from_api_repr(job_ref_resource)
|
||||
query_job = job.QueryJob(job_ref, query, client=client)
|
||||
query_job._properties.setdefault("configuration", {})
|
||||
|
||||
# Not all relevant properties are in the jobs.query response. Populate some
|
||||
# expected properties based on the job configuration.
|
||||
if request_config is not None:
|
||||
query_job._properties["configuration"].update(request_config.to_api_repr())
|
||||
|
||||
query_job._properties["configuration"].setdefault("query", {})
|
||||
query_job._properties["configuration"]["query"]["query"] = query
|
||||
query_job._properties["configuration"]["query"].setdefault("useLegacySql", False)
|
||||
|
||||
query_job._properties.setdefault("statistics", {})
|
||||
query_job._properties["statistics"].setdefault("query", {})
|
||||
query_job._properties["statistics"]["query"]["cacheHit"] = query_response.get(
|
||||
"cacheHit"
|
||||
)
|
||||
query_job._properties["statistics"]["query"]["schema"] = query_response.get(
|
||||
"schema"
|
||||
)
|
||||
query_job._properties["statistics"]["query"][
|
||||
"totalBytesProcessed"
|
||||
] = query_response.get("totalBytesProcessed")
|
||||
|
||||
# Set errors if any were encountered.
|
||||
query_job._properties.setdefault("status", {})
|
||||
if "errors" in query_response:
|
||||
# Set errors but not errorResult. If there was an error that failed
|
||||
# the job, jobs.query behaves like jobs.getQueryResults and returns a
|
||||
# non-success HTTP status code.
|
||||
errors = query_response["errors"]
|
||||
query_job._properties["status"]["errors"] = errors
|
||||
|
||||
# Avoid an extra call to `getQueryResults` if the query has finished.
|
||||
job_complete = query_response.get("jobComplete")
|
||||
if job_complete:
|
||||
query_job._query_results = google.cloud.bigquery.query._QueryResults(
|
||||
query_response
|
||||
)
|
||||
|
||||
# We want job.result() to refresh the job state, so the conversion is
|
||||
# always "PENDING", even if the job is finished.
|
||||
query_job._properties["status"]["state"] = "PENDING"
|
||||
|
||||
return query_job
|
||||
|
||||
|
||||
def _to_query_path(project: str) -> str:
|
||||
return f"/projects/{project}/queries"
|
||||
|
||||
|
||||
def query_jobs_query(
|
||||
client: "Client",
|
||||
query: str,
|
||||
job_config: Optional[job.QueryJobConfig],
|
||||
location: Optional[str],
|
||||
project: str,
|
||||
retry: retries.Retry,
|
||||
timeout: Optional[float],
|
||||
job_retry: retries.Retry,
|
||||
) -> job.QueryJob:
|
||||
"""Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED.
|
||||
|
||||
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
|
||||
"""
|
||||
path = _to_query_path(project)
|
||||
request_body = _to_query_request(
|
||||
query=query, job_config=job_config, location=location, timeout=timeout
|
||||
)
|
||||
|
||||
def do_query():
|
||||
request_body["requestId"] = make_job_id()
|
||||
span_attributes = {"path": path}
|
||||
api_response = client._call_api(
|
||||
retry,
|
||||
span_name="BigQuery.query",
|
||||
span_attributes=span_attributes,
|
||||
method="POST",
|
||||
path=path,
|
||||
data=request_body,
|
||||
timeout=timeout,
|
||||
)
|
||||
return _to_query_job(client, query, job_config, api_response)
|
||||
|
||||
future = do_query()
|
||||
|
||||
# The future might be in a failed state now, but if it's
|
||||
# unrecoverable, we'll find out when we ask for it's result, at which
|
||||
# point, we may retry.
|
||||
future._retry_do_query = do_query # in case we have to retry later
|
||||
future._job_retry = job_retry
|
||||
|
||||
return future
|
||||
|
||||
|
||||
def query_and_wait(
|
||||
client: "Client",
|
||||
query: str,
|
||||
*,
|
||||
job_config: Optional[job.QueryJobConfig],
|
||||
location: Optional[str],
|
||||
project: str,
|
||||
api_timeout: Optional[float] = None,
|
||||
wait_timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE,
|
||||
retry: Optional[retries.Retry],
|
||||
job_retry: Optional[retries.Retry],
|
||||
page_size: Optional[int] = None,
|
||||
max_results: Optional[int] = None,
|
||||
) -> table.RowIterator:
|
||||
"""Run the query, wait for it to finish, and return the results.
|
||||
|
||||
While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the
|
||||
``jobs.query`` REST API, use the default ``jobCreationMode`` unless
|
||||
the environment variable ``QUERY_PREVIEW_ENABLED=true``. After
|
||||
``jobCreationMode`` is GA, this method will always use
|
||||
``jobCreationMode=JOB_CREATION_OPTIONAL``. See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
|
||||
|
||||
Args:
|
||||
client:
|
||||
BigQuery client to make API calls.
|
||||
query (str):
|
||||
SQL query to be executed. Defaults to the standard SQL
|
||||
dialect. Use the ``job_config`` parameter to change dialects.
|
||||
job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
|
||||
Extra configuration options for the job.
|
||||
To override any options that were previously set in
|
||||
the ``default_query_job_config`` given to the
|
||||
``Client`` constructor, manually set those options to ``None``,
|
||||
or whatever value is preferred.
|
||||
location (Optional[str]):
|
||||
Location where to run the job. Must match the location of the
|
||||
table used in the query as well as the destination table.
|
||||
project (Optional[str]):
|
||||
Project ID of the project of where to run the job. Defaults
|
||||
to the client's project.
|
||||
api_timeout (Optional[float]):
|
||||
The number of seconds to wait for the underlying HTTP transport
|
||||
before using ``retry``.
|
||||
wait_timeout (Optional[Union[float, object]]):
|
||||
The number of seconds to wait for the query to finish. If the
|
||||
query doesn't finish before this timeout, the client attempts
|
||||
to cancel the query. If unset, the underlying Client.get_job() API
|
||||
call has timeout, but we still wait indefinitely for the job to
|
||||
finish.
|
||||
retry (Optional[google.api_core.retry.Retry]):
|
||||
How to retry the RPC. This only applies to making RPC
|
||||
calls. It isn't used to retry failed jobs. This has
|
||||
a reasonable default that should only be overridden
|
||||
with care.
|
||||
job_retry (Optional[google.api_core.retry.Retry]):
|
||||
How to retry failed jobs. The default retries
|
||||
rate-limit-exceeded errors. Passing ``None`` disables
|
||||
job retry. Not all jobs can be retried.
|
||||
page_size (Optional[int]):
|
||||
The maximum number of rows in each page of results from this
|
||||
request. Non-positive values are ignored.
|
||||
max_results (Optional[int]):
|
||||
The maximum total number of rows from this request.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.table.RowIterator:
|
||||
Iterator of row data
|
||||
:class:`~google.cloud.bigquery.table.Row`-s. During each
|
||||
page, the iterator will have the ``total_rows`` attribute
|
||||
set, which counts the total number of rows **in the result
|
||||
set** (this is distinct from the total number of rows in the
|
||||
current page: ``iterator.page.num_items``).
|
||||
|
||||
If the query is a special query that produces no results, e.g.
|
||||
a DDL query, an ``_EmptyRowIterator`` instance is returned.
|
||||
|
||||
Raises:
|
||||
TypeError:
|
||||
If ``job_config`` is not an instance of
|
||||
:class:`~google.cloud.bigquery.job.QueryJobConfig`
|
||||
class.
|
||||
"""
|
||||
request_body = _to_query_request(
|
||||
query=query, job_config=job_config, location=location, timeout=api_timeout
|
||||
)
|
||||
|
||||
# Some API parameters aren't supported by the jobs.query API. In these
|
||||
# cases, fallback to a jobs.insert call.
|
||||
if not _supported_by_jobs_query(request_body):
|
||||
return _wait_or_cancel(
|
||||
query_jobs_insert(
|
||||
client=client,
|
||||
query=query,
|
||||
job_id=None,
|
||||
job_id_prefix=None,
|
||||
job_config=job_config,
|
||||
location=location,
|
||||
project=project,
|
||||
retry=retry,
|
||||
timeout=api_timeout,
|
||||
job_retry=job_retry,
|
||||
),
|
||||
api_timeout=api_timeout,
|
||||
wait_timeout=wait_timeout,
|
||||
retry=retry,
|
||||
page_size=page_size,
|
||||
max_results=max_results,
|
||||
)
|
||||
|
||||
path = _to_query_path(project)
|
||||
|
||||
if page_size is not None and max_results is not None:
|
||||
request_body["maxResults"] = min(page_size, max_results)
|
||||
elif page_size is not None or max_results is not None:
|
||||
request_body["maxResults"] = page_size or max_results
|
||||
|
||||
if os.getenv("QUERY_PREVIEW_ENABLED", "").casefold() == "true":
|
||||
request_body["jobCreationMode"] = "JOB_CREATION_OPTIONAL"
|
||||
|
||||
def do_query():
|
||||
request_body["requestId"] = make_job_id()
|
||||
span_attributes = {"path": path}
|
||||
|
||||
# For easier testing, handle the retries ourselves.
|
||||
if retry is not None:
|
||||
response = retry(client._call_api)(
|
||||
retry=None, # We're calling the retry decorator ourselves.
|
||||
span_name="BigQuery.query",
|
||||
span_attributes=span_attributes,
|
||||
method="POST",
|
||||
path=path,
|
||||
data=request_body,
|
||||
timeout=api_timeout,
|
||||
)
|
||||
else:
|
||||
response = client._call_api(
|
||||
retry=None,
|
||||
span_name="BigQuery.query",
|
||||
span_attributes=span_attributes,
|
||||
method="POST",
|
||||
path=path,
|
||||
data=request_body,
|
||||
timeout=api_timeout,
|
||||
)
|
||||
|
||||
# Even if we run with JOB_CREATION_OPTIONAL, if there are more pages
|
||||
# to fetch, there will be a job ID for jobs.getQueryResults.
|
||||
query_results = google.cloud.bigquery.query._QueryResults.from_api_repr(
|
||||
response
|
||||
)
|
||||
page_token = query_results.page_token
|
||||
more_pages = page_token is not None
|
||||
|
||||
if more_pages or not query_results.complete:
|
||||
# TODO(swast): Avoid a call to jobs.get in some cases (few
|
||||
# remaining pages) by waiting for the query to finish and calling
|
||||
# client._list_rows_from_query_results directly. Need to update
|
||||
# RowIterator to fetch destination table via the job ID if needed.
|
||||
return _wait_or_cancel(
|
||||
_to_query_job(client, query, job_config, response),
|
||||
api_timeout=api_timeout,
|
||||
wait_timeout=wait_timeout,
|
||||
retry=retry,
|
||||
page_size=page_size,
|
||||
max_results=max_results,
|
||||
)
|
||||
|
||||
return table.RowIterator(
|
||||
client=client,
|
||||
api_request=functools.partial(client._call_api, retry, timeout=api_timeout),
|
||||
path=None,
|
||||
schema=query_results.schema,
|
||||
max_results=max_results,
|
||||
page_size=page_size,
|
||||
total_rows=query_results.total_rows,
|
||||
first_page_response=response,
|
||||
location=query_results.location,
|
||||
job_id=query_results.job_id,
|
||||
query_id=query_results.query_id,
|
||||
project=query_results.project,
|
||||
num_dml_affected_rows=query_results.num_dml_affected_rows,
|
||||
query=query,
|
||||
total_bytes_processed=query_results.total_bytes_processed,
|
||||
)
|
||||
|
||||
if job_retry is not None:
|
||||
return job_retry(do_query)()
|
||||
else:
|
||||
return do_query()
|
||||
|
||||
|
||||
def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool:
|
||||
"""True if jobs.query can be used. False if jobs.insert is needed."""
|
||||
request_keys = frozenset(request_body.keys())
|
||||
|
||||
# Per issue: https://github.com/googleapis/python-bigquery/issues/1867
|
||||
# use an allowlist here instead of a denylist because the backend API allows
|
||||
# unsupported parameters without any warning or failure. Instead, keep this
|
||||
# set in sync with those in QueryRequest:
|
||||
# https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest
|
||||
keys_allowlist = {
|
||||
"kind",
|
||||
"query",
|
||||
"maxResults",
|
||||
"defaultDataset",
|
||||
"timeoutMs",
|
||||
"dryRun",
|
||||
"preserveNulls",
|
||||
"useQueryCache",
|
||||
"useLegacySql",
|
||||
"parameterMode",
|
||||
"queryParameters",
|
||||
"location",
|
||||
"formatOptions",
|
||||
"connectionProperties",
|
||||
"labels",
|
||||
"maximumBytesBilled",
|
||||
"requestId",
|
||||
"createSession",
|
||||
}
|
||||
|
||||
unsupported_keys = request_keys - keys_allowlist
|
||||
return len(unsupported_keys) == 0
|
||||
|
||||
|
||||
def _wait_or_cancel(
|
||||
job: job.QueryJob,
|
||||
api_timeout: Optional[float],
|
||||
wait_timeout: Optional[Union[object, float]],
|
||||
retry: Optional[retries.Retry],
|
||||
page_size: Optional[int],
|
||||
max_results: Optional[int],
|
||||
) -> table.RowIterator:
|
||||
"""Wait for a job to complete and return the results.
|
||||
|
||||
If we can't return the results within the ``wait_timeout``, try to cancel
|
||||
the job.
|
||||
"""
|
||||
try:
|
||||
return job.result(
|
||||
page_size=page_size,
|
||||
max_results=max_results,
|
||||
retry=retry,
|
||||
timeout=wait_timeout,
|
||||
)
|
||||
except Exception:
|
||||
# Attempt to cancel the job since we can't return the results.
|
||||
try:
|
||||
job.cancel(retry=retry, timeout=api_timeout)
|
||||
except Exception:
|
||||
# Don't eat the original exception if cancel fails.
|
||||
pass
|
||||
raise
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,147 @@
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Shared helper functions for connecting BigQuery and pyarrow.
|
||||
|
||||
NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package,
|
||||
instead. See: go/pandas-gbq-and-bigframes-redundancy,
|
||||
https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/bigquery_to_pyarrow.py
|
||||
and
|
||||
https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py
|
||||
"""
|
||||
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
import pyarrow # type: ignore
|
||||
except ImportError:
|
||||
pyarrow = None
|
||||
|
||||
try:
|
||||
import db_dtypes # type: ignore
|
||||
|
||||
db_dtypes_import_exception = None
|
||||
except ImportError as exc:
|
||||
db_dtypes = None
|
||||
db_dtypes_import_exception = exc
|
||||
|
||||
|
||||
def pyarrow_datetime():
|
||||
return pyarrow.timestamp("us", tz=None)
|
||||
|
||||
|
||||
def pyarrow_numeric():
|
||||
return pyarrow.decimal128(38, 9)
|
||||
|
||||
|
||||
def pyarrow_bignumeric():
|
||||
# 77th digit is partial.
|
||||
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
|
||||
return pyarrow.decimal256(76, 38)
|
||||
|
||||
|
||||
def pyarrow_time():
|
||||
return pyarrow.time64("us")
|
||||
|
||||
|
||||
def pyarrow_timestamp():
|
||||
return pyarrow.timestamp("us", tz="UTC")
|
||||
|
||||
|
||||
_BQ_TO_ARROW_SCALARS = {}
|
||||
_ARROW_SCALAR_IDS_TO_BQ = {}
|
||||
|
||||
if pyarrow:
|
||||
# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
|
||||
# When modifying it be sure to update it there as well.
|
||||
# Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py
|
||||
_BQ_TO_ARROW_SCALARS = {
|
||||
"BOOL": pyarrow.bool_,
|
||||
"BOOLEAN": pyarrow.bool_,
|
||||
"BYTES": pyarrow.binary,
|
||||
"DATE": pyarrow.date32,
|
||||
"DATETIME": pyarrow_datetime,
|
||||
"FLOAT": pyarrow.float64,
|
||||
"FLOAT64": pyarrow.float64,
|
||||
"GEOGRAPHY": pyarrow.string,
|
||||
"INT64": pyarrow.int64,
|
||||
"INTEGER": pyarrow.int64,
|
||||
# Normally, we'd prefer JSON type built-in to pyarrow (added in 19.0.0),
|
||||
# but we'd like this to map as closely to the BQ Storage API as
|
||||
# possible, which uses the string() dtype, as JSON support in Arrow
|
||||
# predates JSON support in BigQuery by several years.
|
||||
"JSON": pyarrow.string,
|
||||
"NUMERIC": pyarrow_numeric,
|
||||
"STRING": pyarrow.string,
|
||||
"TIME": pyarrow_time,
|
||||
"TIMESTAMP": pyarrow_timestamp,
|
||||
}
|
||||
|
||||
# DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead.
|
||||
_ARROW_SCALAR_IDS_TO_BQ = {
|
||||
# https://arrow.apache.org/docs/python/api/datatypes.html#type-classes
|
||||
pyarrow.bool_().id: "BOOL",
|
||||
pyarrow.int8().id: "INT64",
|
||||
pyarrow.int16().id: "INT64",
|
||||
pyarrow.int32().id: "INT64",
|
||||
pyarrow.int64().id: "INT64",
|
||||
pyarrow.uint8().id: "INT64",
|
||||
pyarrow.uint16().id: "INT64",
|
||||
pyarrow.uint32().id: "INT64",
|
||||
pyarrow.uint64().id: "INT64",
|
||||
pyarrow.float16().id: "FLOAT64",
|
||||
pyarrow.float32().id: "FLOAT64",
|
||||
pyarrow.float64().id: "FLOAT64",
|
||||
pyarrow.time32("ms").id: "TIME",
|
||||
pyarrow.time64("ns").id: "TIME",
|
||||
pyarrow.timestamp("ns").id: "TIMESTAMP",
|
||||
pyarrow.date32().id: "DATE",
|
||||
pyarrow.date64().id: "DATETIME", # because millisecond resolution
|
||||
pyarrow.binary().id: "BYTES",
|
||||
pyarrow.string().id: "STRING", # also alias for pyarrow.utf8()
|
||||
pyarrow.large_string().id: "STRING",
|
||||
# The exact scale and precision don't matter, see below.
|
||||
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
|
||||
# NOTE: all extension types (e.g. json_, uuid, db_dtypes.JSONArrowType)
|
||||
# have the same id (31 as of version 19.0.1), so these should not be
|
||||
# matched by id.
|
||||
}
|
||||
|
||||
_BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric
|
||||
# The exact decimal's scale and precision are not important, as only
|
||||
# the type ID matters, and it's the same for all decimal256 instances.
|
||||
_ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC"
|
||||
|
||||
|
||||
def bq_to_arrow_scalars(bq_scalar: str):
|
||||
"""
|
||||
DEPRECATED: update pandas_gbq.schema.bigquery_to_pyarrow, instead, which is
|
||||
to be added in https://github.com/googleapis/python-bigquery-pandas/pull/893.
|
||||
|
||||
Returns:
|
||||
The Arrow scalar type that the input BigQuery scalar type maps to.
|
||||
If it cannot find the BigQuery scalar, return None.
|
||||
"""
|
||||
return _BQ_TO_ARROW_SCALARS.get(bq_scalar)
|
||||
|
||||
|
||||
def arrow_scalar_ids_to_bq(arrow_scalar: Any):
|
||||
"""
|
||||
DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead.
|
||||
|
||||
Returns:
|
||||
The BigQuery scalar type that the input arrow scalar type maps to.
|
||||
If it cannot find the arrow scalar, return None.
|
||||
"""
|
||||
return _ARROW_SCALAR_IDS_TO_BQ.get(arrow_scalar)
|
||||
@@ -0,0 +1,137 @@
|
||||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Shared helper functions for tqdm progress bar."""
|
||||
|
||||
import concurrent.futures
|
||||
import sys
|
||||
import time
|
||||
import typing
|
||||
from typing import Optional
|
||||
import warnings
|
||||
|
||||
try:
|
||||
import tqdm # type: ignore
|
||||
except ImportError:
|
||||
tqdm = None
|
||||
|
||||
try:
|
||||
import tqdm.notebook as tqdm_notebook # type: ignore
|
||||
except ImportError:
|
||||
tqdm_notebook = None
|
||||
|
||||
if typing.TYPE_CHECKING: # pragma: NO COVER
|
||||
from google.cloud.bigquery import QueryJob
|
||||
from google.cloud.bigquery.table import RowIterator
|
||||
|
||||
_NO_TQDM_ERROR = (
|
||||
"A progress bar was requested, but there was an error loading the tqdm "
|
||||
"library. Please install tqdm to use the progress bar functionality."
|
||||
)
|
||||
|
||||
_PROGRESS_BAR_UPDATE_INTERVAL = 0.5
|
||||
|
||||
|
||||
def get_progress_bar(progress_bar_type, description, total, unit):
|
||||
"""Construct a tqdm progress bar object, if tqdm is installed."""
|
||||
if tqdm is None or tqdm_notebook is None and progress_bar_type == "tqdm_notebook":
|
||||
if progress_bar_type is not None:
|
||||
warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3)
|
||||
return None
|
||||
|
||||
try:
|
||||
if progress_bar_type == "tqdm":
|
||||
return tqdm.tqdm(
|
||||
bar_format="{l_bar}{bar}|",
|
||||
colour="green",
|
||||
desc=description,
|
||||
file=sys.stdout,
|
||||
total=total,
|
||||
unit=unit,
|
||||
)
|
||||
elif progress_bar_type == "tqdm_notebook":
|
||||
return tqdm_notebook.tqdm(
|
||||
bar_format="{l_bar}{bar}|",
|
||||
desc=description,
|
||||
file=sys.stdout,
|
||||
total=total,
|
||||
unit=unit,
|
||||
)
|
||||
elif progress_bar_type == "tqdm_gui":
|
||||
return tqdm.tqdm_gui(desc=description, total=total, unit=unit)
|
||||
except (KeyError, TypeError): # pragma: NO COVER
|
||||
# Protect ourselves from any tqdm errors. In case of
|
||||
# unexpected tqdm behavior, just fall back to showing
|
||||
# no progress bar.
|
||||
warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3)
|
||||
return None
|
||||
|
||||
|
||||
def wait_for_query(
|
||||
query_job: "QueryJob",
|
||||
progress_bar_type: Optional[str] = None,
|
||||
max_results: Optional[int] = None,
|
||||
) -> "RowIterator":
|
||||
"""Return query result and display a progress bar while the query running, if tqdm is installed.
|
||||
|
||||
Args:
|
||||
query_job:
|
||||
The job representing the execution of the query on the server.
|
||||
progress_bar_type:
|
||||
The type of progress bar to use to show query progress.
|
||||
max_results:
|
||||
The maximum number of rows the row iterator should return.
|
||||
|
||||
Returns:
|
||||
A row iterator over the query results.
|
||||
"""
|
||||
default_total = 1
|
||||
current_stage = None
|
||||
start_time = time.perf_counter()
|
||||
|
||||
progress_bar = get_progress_bar(
|
||||
progress_bar_type, "Query is running", default_total, "query"
|
||||
)
|
||||
if progress_bar is None:
|
||||
return query_job.result(max_results=max_results)
|
||||
|
||||
i = 0
|
||||
while True:
|
||||
if query_job.query_plan:
|
||||
default_total = len(query_job.query_plan)
|
||||
current_stage = query_job.query_plan[i]
|
||||
progress_bar.total = len(query_job.query_plan)
|
||||
progress_bar.set_description(
|
||||
f"Query executing stage {current_stage.name} and status {current_stage.status} : {time.perf_counter() - start_time:.2f}s"
|
||||
)
|
||||
try:
|
||||
query_result = query_job.result(
|
||||
timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=max_results
|
||||
)
|
||||
progress_bar.update(default_total)
|
||||
progress_bar.set_description(
|
||||
f"Job ID {query_job.job_id} successfully executed",
|
||||
)
|
||||
break
|
||||
except concurrent.futures.TimeoutError:
|
||||
query_job.reload() # Refreshes the state via a GET request.
|
||||
if current_stage:
|
||||
if current_stage.status == "COMPLETE":
|
||||
if i < default_total - 1:
|
||||
progress_bar.update(i + 1)
|
||||
i += 1
|
||||
continue
|
||||
|
||||
progress_bar.close()
|
||||
return query_result
|
||||
@@ -0,0 +1,264 @@
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Shared helper functions for verifying versions of installed modules."""
|
||||
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
import packaging.version
|
||||
|
||||
from google.cloud.bigquery import exceptions
|
||||
|
||||
|
||||
_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
|
||||
_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
|
||||
_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
|
||||
_MIN_PANDAS_VERSION = packaging.version.Version("1.1.0")
|
||||
|
||||
_MIN_PANDAS_VERSION_RANGE = packaging.version.Version("1.5.0")
|
||||
_MIN_PYARROW_VERSION_RANGE = packaging.version.Version("10.0.1")
|
||||
|
||||
|
||||
class PyarrowVersions:
|
||||
"""Version comparisons for pyarrow package."""
|
||||
|
||||
def __init__(self):
|
||||
self._installed_version = None
|
||||
|
||||
@property
|
||||
def installed_version(self) -> packaging.version.Version:
|
||||
"""Return the parsed version of pyarrow."""
|
||||
if self._installed_version is None:
|
||||
import pyarrow # type: ignore
|
||||
|
||||
self._installed_version = packaging.version.parse(
|
||||
# Use 0.0.0, since it is earlier than any released version.
|
||||
# Legacy versions also have the same property, but
|
||||
# creating a LegacyVersion has been deprecated.
|
||||
# https://github.com/pypa/packaging/issues/321
|
||||
getattr(pyarrow, "__version__", "0.0.0")
|
||||
)
|
||||
|
||||
return self._installed_version
|
||||
|
||||
@property
|
||||
def use_compliant_nested_type(self) -> bool:
|
||||
return self.installed_version.major >= 4
|
||||
|
||||
def try_import(self, raise_if_error: bool = False) -> Any:
|
||||
"""Verifies that a recent enough version of pyarrow extra is installed.
|
||||
|
||||
The function assumes that pyarrow extra is installed, and should thus
|
||||
be used in places where this assumption holds.
|
||||
|
||||
Because `pip` can install an outdated version of this extra despite
|
||||
the constraints in `setup.py`, the calling code can use this helper
|
||||
to verify the version compatibility at runtime.
|
||||
|
||||
Returns:
|
||||
The ``pyarrow`` module or ``None``.
|
||||
|
||||
Raises:
|
||||
exceptions.LegacyPyarrowError:
|
||||
If the pyarrow package is outdated and ``raise_if_error`` is
|
||||
``True``.
|
||||
"""
|
||||
try:
|
||||
import pyarrow
|
||||
except ImportError as exc:
|
||||
if raise_if_error:
|
||||
raise exceptions.LegacyPyarrowError(
|
||||
"pyarrow package not found. Install pyarrow version >="
|
||||
f" {_MIN_PYARROW_VERSION}."
|
||||
) from exc
|
||||
return None
|
||||
|
||||
if self.installed_version < _MIN_PYARROW_VERSION:
|
||||
if raise_if_error:
|
||||
msg = (
|
||||
"Dependency pyarrow is outdated, please upgrade"
|
||||
f" it to version >= {_MIN_PYARROW_VERSION}"
|
||||
f" (version found: {self.installed_version})."
|
||||
)
|
||||
raise exceptions.LegacyPyarrowError(msg)
|
||||
return None
|
||||
|
||||
return pyarrow
|
||||
|
||||
|
||||
PYARROW_VERSIONS = PyarrowVersions()
|
||||
|
||||
|
||||
class BQStorageVersions:
|
||||
"""Version comparisons for google-cloud-bigqueyr-storage package."""
|
||||
|
||||
def __init__(self):
|
||||
self._installed_version = None
|
||||
|
||||
@property
|
||||
def installed_version(self) -> packaging.version.Version:
|
||||
"""Return the parsed version of google-cloud-bigquery-storage."""
|
||||
if self._installed_version is None:
|
||||
from google.cloud import bigquery_storage
|
||||
|
||||
self._installed_version = packaging.version.parse(
|
||||
# Use 0.0.0, since it is earlier than any released version.
|
||||
# Legacy versions also have the same property, but
|
||||
# creating a LegacyVersion has been deprecated.
|
||||
# https://github.com/pypa/packaging/issues/321
|
||||
getattr(bigquery_storage, "__version__", "0.0.0")
|
||||
)
|
||||
|
||||
return self._installed_version # type: ignore
|
||||
|
||||
@property
|
||||
def is_read_session_optional(self) -> bool:
|
||||
"""True if read_session is optional to rows().
|
||||
|
||||
See: https://github.com/googleapis/python-bigquery-storage/pull/228
|
||||
"""
|
||||
return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION
|
||||
|
||||
def try_import(self, raise_if_error: bool = False) -> Any:
|
||||
"""Tries to import the bigquery_storage module, and returns results
|
||||
accordingly. It also verifies the module version is recent enough.
|
||||
|
||||
If the import succeeds, returns the ``bigquery_storage`` module.
|
||||
|
||||
If the import fails,
|
||||
returns ``None`` when ``raise_if_error == False``,
|
||||
raises Error when ``raise_if_error == True``.
|
||||
|
||||
Returns:
|
||||
The ``bigquery_storage`` module or ``None``.
|
||||
|
||||
Raises:
|
||||
exceptions.BigQueryStorageNotFoundError:
|
||||
If google-cloud-bigquery-storage is not installed
|
||||
exceptions.LegacyBigQueryStorageError:
|
||||
If google-cloud-bigquery-storage package is outdated
|
||||
"""
|
||||
try:
|
||||
from google.cloud import bigquery_storage # type: ignore
|
||||
except ImportError:
|
||||
if raise_if_error:
|
||||
msg = (
|
||||
"Package google-cloud-bigquery-storage not found. "
|
||||
"Install google-cloud-bigquery-storage version >= "
|
||||
f"{_MIN_BQ_STORAGE_VERSION}."
|
||||
)
|
||||
raise exceptions.BigQueryStorageNotFoundError(msg)
|
||||
return None
|
||||
|
||||
if self.installed_version < _MIN_BQ_STORAGE_VERSION:
|
||||
if raise_if_error:
|
||||
msg = (
|
||||
"Dependency google-cloud-bigquery-storage is outdated, "
|
||||
f"please upgrade it to version >= {_MIN_BQ_STORAGE_VERSION} "
|
||||
f"(version found: {self.installed_version})."
|
||||
)
|
||||
raise exceptions.LegacyBigQueryStorageError(msg)
|
||||
return None
|
||||
|
||||
return bigquery_storage
|
||||
|
||||
|
||||
BQ_STORAGE_VERSIONS = BQStorageVersions()
|
||||
|
||||
|
||||
class PandasVersions:
|
||||
"""Version comparisons for pandas package."""
|
||||
|
||||
def __init__(self):
|
||||
self._installed_version = None
|
||||
|
||||
@property
|
||||
def installed_version(self) -> packaging.version.Version:
|
||||
"""Return the parsed version of pandas"""
|
||||
if self._installed_version is None:
|
||||
import pandas # type: ignore
|
||||
|
||||
self._installed_version = packaging.version.parse(
|
||||
# Use 0.0.0, since it is earlier than any released version.
|
||||
# Legacy versions also have the same property, but
|
||||
# creating a LegacyVersion has been deprecated.
|
||||
# https://github.com/pypa/packaging/issues/321
|
||||
getattr(pandas, "__version__", "0.0.0")
|
||||
)
|
||||
|
||||
return self._installed_version
|
||||
|
||||
def try_import(self, raise_if_error: bool = False) -> Any:
|
||||
"""Verify that a recent enough version of pandas extra is installed.
|
||||
The function assumes that pandas extra is installed, and should thus
|
||||
be used in places where this assumption holds.
|
||||
Because `pip` can install an outdated version of this extra despite
|
||||
the constraints in `setup.py`, the calling code can use this helper
|
||||
to verify the version compatibility at runtime.
|
||||
Returns:
|
||||
The ``pandas`` module or ``None``.
|
||||
Raises:
|
||||
exceptions.LegacyPandasError:
|
||||
If the pandas package is outdated and ``raise_if_error`` is
|
||||
``True``.
|
||||
"""
|
||||
try:
|
||||
import pandas
|
||||
except ImportError as exc:
|
||||
if raise_if_error:
|
||||
raise exceptions.LegacyPandasError(
|
||||
"pandas package not found. Install pandas version >="
|
||||
f" {_MIN_PANDAS_VERSION}"
|
||||
) from exc
|
||||
return None
|
||||
|
||||
if self.installed_version < _MIN_PANDAS_VERSION:
|
||||
if raise_if_error:
|
||||
msg = (
|
||||
"Dependency pandas is outdated, please upgrade"
|
||||
f" it to version >= {_MIN_PANDAS_VERSION}"
|
||||
f" (version found: {self.installed_version})."
|
||||
)
|
||||
raise exceptions.LegacyPandasError(msg)
|
||||
return None
|
||||
|
||||
return pandas
|
||||
|
||||
|
||||
PANDAS_VERSIONS = PandasVersions()
|
||||
|
||||
# Since RANGE support in pandas requires specific versions
|
||||
# of both pyarrow and pandas, we make this a separate
|
||||
# constant instead of as a property of PANDAS_VERSIONS
|
||||
# or PYARROW_VERSIONS.
|
||||
SUPPORTS_RANGE_PYARROW = (
|
||||
PANDAS_VERSIONS.try_import() is not None
|
||||
and PANDAS_VERSIONS.installed_version >= _MIN_PANDAS_VERSION_RANGE
|
||||
and PYARROW_VERSIONS.try_import() is not None
|
||||
and PYARROW_VERSIONS.installed_version >= _MIN_PYARROW_VERSION_RANGE
|
||||
)
|
||||
|
||||
|
||||
def extract_runtime_version():
|
||||
# Retrieve the version information
|
||||
version_info = sys.version_info
|
||||
|
||||
# Extract the major, minor, and micro components
|
||||
major = version_info.major
|
||||
minor = version_info.minor
|
||||
micro = version_info.micro
|
||||
|
||||
# Display the version number in a clear format
|
||||
return major, minor, micro
|
||||
4406
.venv/lib/python3.10/site-packages/google/cloud/bigquery/client.py
Normal file
4406
.venv/lib/python3.10/site-packages/google/cloud/bigquery/client.py
Normal file
File diff suppressed because it is too large
Load Diff
1076
.venv/lib/python3.10/site-packages/google/cloud/bigquery/dataset.py
Normal file
1076
.venv/lib/python3.10/site-packages/google/cloud/bigquery/dataset.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,87 @@
|
||||
# Copyright 2017 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Google BigQuery implementation of the Database API Specification v2.0.
|
||||
|
||||
This module implements the `Python Database API Specification v2.0 (DB-API)`_
|
||||
for Google BigQuery.
|
||||
|
||||
.. _Python Database API Specification v2.0 (DB-API):
|
||||
https://www.python.org/dev/peps/pep-0249/
|
||||
"""
|
||||
|
||||
from google.cloud.bigquery.dbapi.connection import connect
|
||||
from google.cloud.bigquery.dbapi.connection import Connection
|
||||
from google.cloud.bigquery.dbapi.cursor import Cursor
|
||||
from google.cloud.bigquery.dbapi.exceptions import Warning
|
||||
from google.cloud.bigquery.dbapi.exceptions import Error
|
||||
from google.cloud.bigquery.dbapi.exceptions import InterfaceError
|
||||
from google.cloud.bigquery.dbapi.exceptions import DatabaseError
|
||||
from google.cloud.bigquery.dbapi.exceptions import DataError
|
||||
from google.cloud.bigquery.dbapi.exceptions import OperationalError
|
||||
from google.cloud.bigquery.dbapi.exceptions import IntegrityError
|
||||
from google.cloud.bigquery.dbapi.exceptions import InternalError
|
||||
from google.cloud.bigquery.dbapi.exceptions import ProgrammingError
|
||||
from google.cloud.bigquery.dbapi.exceptions import NotSupportedError
|
||||
from google.cloud.bigquery.dbapi.types import Binary
|
||||
from google.cloud.bigquery.dbapi.types import Date
|
||||
from google.cloud.bigquery.dbapi.types import DateFromTicks
|
||||
from google.cloud.bigquery.dbapi.types import Time
|
||||
from google.cloud.bigquery.dbapi.types import TimeFromTicks
|
||||
from google.cloud.bigquery.dbapi.types import Timestamp
|
||||
from google.cloud.bigquery.dbapi.types import TimestampFromTicks
|
||||
from google.cloud.bigquery.dbapi.types import BINARY
|
||||
from google.cloud.bigquery.dbapi.types import DATETIME
|
||||
from google.cloud.bigquery.dbapi.types import NUMBER
|
||||
from google.cloud.bigquery.dbapi.types import ROWID
|
||||
from google.cloud.bigquery.dbapi.types import STRING
|
||||
|
||||
|
||||
apilevel = "2.0"
|
||||
|
||||
# Threads may share the module and connections, but not cursors.
|
||||
threadsafety = 2
|
||||
|
||||
paramstyle = "pyformat"
|
||||
|
||||
__all__ = [
|
||||
"apilevel",
|
||||
"threadsafety",
|
||||
"paramstyle",
|
||||
"connect",
|
||||
"Connection",
|
||||
"Cursor",
|
||||
"Warning",
|
||||
"Error",
|
||||
"InterfaceError",
|
||||
"DatabaseError",
|
||||
"DataError",
|
||||
"OperationalError",
|
||||
"IntegrityError",
|
||||
"InternalError",
|
||||
"ProgrammingError",
|
||||
"NotSupportedError",
|
||||
"Binary",
|
||||
"Date",
|
||||
"DateFromTicks",
|
||||
"Time",
|
||||
"TimeFromTicks",
|
||||
"Timestamp",
|
||||
"TimestampFromTicks",
|
||||
"BINARY",
|
||||
"DATETIME",
|
||||
"NUMBER",
|
||||
"ROWID",
|
||||
"STRING",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,522 @@
|
||||
# Copyright 2017 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
from collections import abc as collections_abc
|
||||
import datetime
|
||||
import decimal
|
||||
import functools
|
||||
import numbers
|
||||
import re
|
||||
import typing
|
||||
|
||||
from google.cloud import bigquery
|
||||
from google.cloud.bigquery import table, query
|
||||
from google.cloud.bigquery.dbapi import exceptions
|
||||
|
||||
|
||||
_NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28")
|
||||
_NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28")
|
||||
|
||||
type_parameters_re = re.compile(
|
||||
r"""
|
||||
\(
|
||||
\s*[0-9]+\s*
|
||||
(,
|
||||
\s*[0-9]+\s*
|
||||
)*
|
||||
\)
|
||||
""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
|
||||
def _parameter_type(name, value, query_parameter_type=None, value_doc=""):
|
||||
if query_parameter_type:
|
||||
# Strip type parameters
|
||||
query_parameter_type = type_parameters_re.sub("", query_parameter_type)
|
||||
try:
|
||||
parameter_type = getattr(
|
||||
query.SqlParameterScalarTypes, query_parameter_type.upper()
|
||||
)._type
|
||||
except AttributeError:
|
||||
raise exceptions.ProgrammingError(
|
||||
f"The given parameter type, {query_parameter_type},"
|
||||
f" for {name} is not a valid BigQuery scalar type."
|
||||
)
|
||||
else:
|
||||
parameter_type = bigquery_scalar_type(value)
|
||||
if parameter_type is None:
|
||||
raise exceptions.ProgrammingError(
|
||||
f"Encountered parameter {name} with "
|
||||
f"{value_doc} value {value} of unexpected type."
|
||||
)
|
||||
return parameter_type
|
||||
|
||||
|
||||
def scalar_to_query_parameter(value, name=None, query_parameter_type=None):
|
||||
"""Convert a scalar value into a query parameter.
|
||||
|
||||
Args:
|
||||
value (Any):
|
||||
A scalar value to convert into a query parameter.
|
||||
|
||||
name (str):
|
||||
(Optional) Name of the query parameter.
|
||||
query_parameter_type (Optional[str]): Given type for the parameter.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.ScalarQueryParameter:
|
||||
A query parameter corresponding with the type and value of the plain
|
||||
Python object.
|
||||
|
||||
Raises:
|
||||
google.cloud.bigquery.dbapi.exceptions.ProgrammingError:
|
||||
if the type cannot be determined.
|
||||
"""
|
||||
return bigquery.ScalarQueryParameter(
|
||||
name, _parameter_type(name, value, query_parameter_type), value
|
||||
)
|
||||
|
||||
|
||||
def array_to_query_parameter(value, name=None, query_parameter_type=None):
|
||||
"""Convert an array-like value into a query parameter.
|
||||
|
||||
Args:
|
||||
value (Sequence[Any]): The elements of the array (should not be a
|
||||
string-like Sequence).
|
||||
name (Optional[str]): Name of the query parameter.
|
||||
query_parameter_type (Optional[str]): Given type for the parameter.
|
||||
|
||||
Returns:
|
||||
A query parameter corresponding with the type and value of the plain
|
||||
Python object.
|
||||
|
||||
Raises:
|
||||
google.cloud.bigquery.dbapi.exceptions.ProgrammingError:
|
||||
if the type of array elements cannot be determined.
|
||||
"""
|
||||
if not array_like(value):
|
||||
raise exceptions.ProgrammingError(
|
||||
"The value of parameter {} must be a sequence that is "
|
||||
"not string-like.".format(name)
|
||||
)
|
||||
|
||||
if query_parameter_type or value:
|
||||
array_type = _parameter_type(
|
||||
name,
|
||||
value[0] if value else None,
|
||||
query_parameter_type,
|
||||
value_doc="array element ",
|
||||
)
|
||||
else:
|
||||
raise exceptions.ProgrammingError(
|
||||
"Encountered an empty array-like value of parameter {}, cannot "
|
||||
"determine array elements type.".format(name)
|
||||
)
|
||||
|
||||
return bigquery.ArrayQueryParameter(name, array_type, value)
|
||||
|
||||
|
||||
def _parse_struct_fields(
|
||||
fields,
|
||||
base,
|
||||
parse_struct_field=re.compile(
|
||||
r"""
|
||||
(?:(\w+)\s+) # field name
|
||||
([A-Z0-9<> ,()]+) # Field type
|
||||
$""",
|
||||
re.VERBOSE | re.IGNORECASE,
|
||||
).match,
|
||||
):
|
||||
# Split a string of struct fields. They're defined by commas, but
|
||||
# we have to avoid splitting on commas internal to fields. For
|
||||
# example:
|
||||
# name string, children array<struct<name string, bdate date>>
|
||||
#
|
||||
# only has 2 top-level fields.
|
||||
fields = fields.split(",")
|
||||
fields = list(reversed(fields)) # in the off chance that there are very many
|
||||
while fields:
|
||||
field = fields.pop()
|
||||
while fields and field.count("<") != field.count(">"):
|
||||
field += "," + fields.pop()
|
||||
|
||||
m = parse_struct_field(field.strip())
|
||||
if not m:
|
||||
raise exceptions.ProgrammingError(
|
||||
f"Invalid struct field, {field}, in {base}"
|
||||
)
|
||||
yield m.group(1, 2)
|
||||
|
||||
|
||||
SCALAR, ARRAY, STRUCT = ("s", "a", "r")
|
||||
|
||||
|
||||
def _parse_type(
|
||||
type_,
|
||||
name,
|
||||
base,
|
||||
complex_query_parameter_parse=re.compile(
|
||||
r"""
|
||||
\s*
|
||||
(ARRAY|STRUCT|RECORD) # Type
|
||||
\s*
|
||||
<([A-Z0-9_<> ,()]+)> # Subtype(s)
|
||||
\s*$
|
||||
""",
|
||||
re.IGNORECASE | re.VERBOSE,
|
||||
).match,
|
||||
):
|
||||
if "<" not in type_:
|
||||
# Scalar
|
||||
|
||||
# Strip type parameters
|
||||
type_ = type_parameters_re.sub("", type_).strip()
|
||||
try:
|
||||
type_ = getattr(query.SqlParameterScalarTypes, type_.upper())
|
||||
except AttributeError:
|
||||
raise exceptions.ProgrammingError(
|
||||
f"The given parameter type, {type_},"
|
||||
f"{' for ' + name if name else ''}"
|
||||
f" is not a valid BigQuery scalar type, in {base}."
|
||||
)
|
||||
if name:
|
||||
type_ = type_.with_name(name)
|
||||
return SCALAR, type_
|
||||
|
||||
m = complex_query_parameter_parse(type_)
|
||||
if not m:
|
||||
raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}")
|
||||
tname, sub = m.group(1, 2)
|
||||
if tname.upper() == "ARRAY":
|
||||
sub_type = complex_query_parameter_type(None, sub, base)
|
||||
if isinstance(sub_type, query.ArrayQueryParameterType):
|
||||
raise exceptions.ProgrammingError(f"Array can't contain an array in {base}")
|
||||
sub_type._complex__src = sub
|
||||
return ARRAY, sub_type
|
||||
else:
|
||||
return STRUCT, _parse_struct_fields(sub, base)
|
||||
|
||||
|
||||
def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str):
|
||||
"""Construct a parameter type (`StructQueryParameterType`) for a complex type
|
||||
|
||||
or a non-complex type that's part of a complex type.
|
||||
|
||||
Examples:
|
||||
|
||||
array<struct<x float64, y float64>>
|
||||
|
||||
struct<name string, children array<struct<name string, bdate date>>>
|
||||
|
||||
This is used for computing array types.
|
||||
"""
|
||||
|
||||
type_type, sub_type = _parse_type(type_, name, base)
|
||||
if type_type == SCALAR:
|
||||
result_type = sub_type
|
||||
elif type_type == ARRAY:
|
||||
result_type = query.ArrayQueryParameterType(sub_type, name=name)
|
||||
elif type_type == STRUCT:
|
||||
fields = [
|
||||
complex_query_parameter_type(field_name, field_type, base)
|
||||
for field_name, field_type in sub_type
|
||||
]
|
||||
result_type = query.StructQueryParameterType(*fields, name=name)
|
||||
else: # pragma: NO COVER
|
||||
raise AssertionError("Bad type_type", type_type) # Can't happen :)
|
||||
|
||||
return result_type
|
||||
|
||||
|
||||
def complex_query_parameter(
|
||||
name: typing.Optional[str], value, type_: str, base: typing.Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Construct a query parameter for a complex type (array or struct record)
|
||||
|
||||
or for a subtype, which may not be complex
|
||||
|
||||
Examples:
|
||||
|
||||
array<struct<x float64, y float64>>
|
||||
|
||||
struct<name string, children array<struct<name string, bdate date>>>
|
||||
|
||||
"""
|
||||
param: typing.Union[
|
||||
query.ScalarQueryParameter,
|
||||
query.ArrayQueryParameter,
|
||||
query.StructQueryParameter,
|
||||
]
|
||||
|
||||
base = base or type_
|
||||
|
||||
type_type, sub_type = _parse_type(type_, name, base)
|
||||
|
||||
if type_type == SCALAR:
|
||||
param = query.ScalarQueryParameter(name, sub_type._type, value)
|
||||
elif type_type == ARRAY:
|
||||
if not array_like(value):
|
||||
raise exceptions.ProgrammingError(
|
||||
f"Array type with non-array-like value"
|
||||
f" with type {type(value).__name__}"
|
||||
)
|
||||
param = query.ArrayQueryParameter(
|
||||
name,
|
||||
sub_type,
|
||||
(
|
||||
value
|
||||
if isinstance(sub_type, query.ScalarQueryParameterType)
|
||||
else [
|
||||
complex_query_parameter(None, v, sub_type._complex__src, base)
|
||||
for v in value
|
||||
]
|
||||
),
|
||||
)
|
||||
elif type_type == STRUCT:
|
||||
if not isinstance(value, collections_abc.Mapping):
|
||||
raise exceptions.ProgrammingError(f"Non-mapping value for type {type_}")
|
||||
value_keys = set(value)
|
||||
fields = []
|
||||
for field_name, field_type in sub_type:
|
||||
if field_name not in value:
|
||||
raise exceptions.ProgrammingError(
|
||||
f"No field value for {field_name} in {type_}"
|
||||
)
|
||||
value_keys.remove(field_name)
|
||||
fields.append(
|
||||
complex_query_parameter(field_name, value[field_name], field_type, base)
|
||||
)
|
||||
if value_keys:
|
||||
raise exceptions.ProgrammingError(f"Extra data keys for {type_}")
|
||||
|
||||
param = query.StructQueryParameter(name, *fields)
|
||||
else: # pragma: NO COVER
|
||||
raise AssertionError("Bad type_type", type_type) # Can't happen :)
|
||||
|
||||
return param
|
||||
|
||||
|
||||
def _dispatch_parameter(type_, value, name=None):
|
||||
if type_ is not None and "<" in type_:
|
||||
param = complex_query_parameter(name, value, type_)
|
||||
elif isinstance(value, collections_abc.Mapping):
|
||||
raise NotImplementedError(
|
||||
f"STRUCT-like parameter values are not supported"
|
||||
f"{' (parameter ' + name + ')' if name else ''},"
|
||||
f" unless an explicit type is give in the parameter placeholder"
|
||||
f" (e.g. '%({name if name else ''}:struct<...>)s')."
|
||||
)
|
||||
elif array_like(value):
|
||||
param = array_to_query_parameter(value, name, type_)
|
||||
else:
|
||||
param = scalar_to_query_parameter(value, name, type_)
|
||||
|
||||
return param
|
||||
|
||||
|
||||
def to_query_parameters_list(parameters, parameter_types):
|
||||
"""Converts a sequence of parameter values into query parameters.
|
||||
|
||||
Args:
|
||||
parameters (Sequence[Any]): Sequence of query parameter values.
|
||||
parameter_types:
|
||||
A list of parameter types, one for each parameter.
|
||||
Unknown types are provided as None.
|
||||
|
||||
Returns:
|
||||
List[google.cloud.bigquery.query._AbstractQueryParameter]:
|
||||
A list of query parameters.
|
||||
"""
|
||||
return [
|
||||
_dispatch_parameter(type_, value)
|
||||
for value, type_ in zip(parameters, parameter_types)
|
||||
]
|
||||
|
||||
|
||||
def to_query_parameters_dict(parameters, query_parameter_types):
|
||||
"""Converts a dictionary of parameter values into query parameters.
|
||||
|
||||
Args:
|
||||
parameters (Mapping[str, Any]): Dictionary of query parameter values.
|
||||
parameter_types:
|
||||
A dictionary of parameter types. It needn't have a key for each
|
||||
parameter.
|
||||
|
||||
Returns:
|
||||
List[google.cloud.bigquery.query._AbstractQueryParameter]:
|
||||
A list of named query parameters.
|
||||
"""
|
||||
return [
|
||||
_dispatch_parameter(query_parameter_types.get(name), value, name)
|
||||
for name, value in parameters.items()
|
||||
]
|
||||
|
||||
|
||||
def to_query_parameters(parameters, parameter_types):
|
||||
"""Converts DB-API parameter values into query parameters.
|
||||
|
||||
Args:
|
||||
parameters (Union[Mapping[str, Any], Sequence[Any]]):
|
||||
A dictionary or sequence of query parameter values.
|
||||
parameter_types (Union[Mapping[str, str], Sequence[str]]):
|
||||
A dictionary or list of parameter types.
|
||||
|
||||
If parameters is a mapping, then this must be a dictionary
|
||||
of parameter types. It needn't have a key for each
|
||||
parameter.
|
||||
|
||||
If parameters is a sequence, then this must be a list of
|
||||
parameter types, one for each paramater. Unknown types
|
||||
are provided as None.
|
||||
|
||||
Returns:
|
||||
List[google.cloud.bigquery.query._AbstractQueryParameter]:
|
||||
A list of query parameters.
|
||||
"""
|
||||
if parameters is None:
|
||||
return []
|
||||
|
||||
if isinstance(parameters, collections_abc.Mapping):
|
||||
return to_query_parameters_dict(parameters, parameter_types)
|
||||
else:
|
||||
return to_query_parameters_list(parameters, parameter_types)
|
||||
|
||||
|
||||
def bigquery_scalar_type(value):
|
||||
"""Return a BigQuery name of the scalar type that matches the given value.
|
||||
|
||||
If the scalar type name could not be determined (e.g. for non-scalar
|
||||
values), ``None`` is returned.
|
||||
|
||||
Args:
|
||||
value (Any)
|
||||
|
||||
Returns:
|
||||
Optional[str]: The BigQuery scalar type name.
|
||||
"""
|
||||
if isinstance(value, bool):
|
||||
return "BOOL"
|
||||
elif isinstance(value, numbers.Integral):
|
||||
return "INT64"
|
||||
elif isinstance(value, numbers.Real):
|
||||
return "FLOAT64"
|
||||
elif isinstance(value, decimal.Decimal):
|
||||
vtuple = value.as_tuple()
|
||||
# NUMERIC values have precision of 38 (number of digits) and scale of 9 (number
|
||||
# of fractional digits), and their max absolute value must be strictly smaller
|
||||
# than 1.0E+29.
|
||||
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
|
||||
if (
|
||||
len(vtuple.digits) <= 38 # max precision: 38
|
||||
and vtuple.exponent >= -9 # max scale: 9
|
||||
and _NUMERIC_SERVER_MIN <= value <= _NUMERIC_SERVER_MAX
|
||||
):
|
||||
return "NUMERIC"
|
||||
else:
|
||||
return "BIGNUMERIC"
|
||||
|
||||
elif isinstance(value, str):
|
||||
return "STRING"
|
||||
elif isinstance(value, bytes):
|
||||
return "BYTES"
|
||||
elif isinstance(value, datetime.datetime):
|
||||
return "DATETIME" if value.tzinfo is None else "TIMESTAMP"
|
||||
elif isinstance(value, datetime.date):
|
||||
return "DATE"
|
||||
elif isinstance(value, datetime.time):
|
||||
return "TIME"
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def array_like(value):
|
||||
"""Determine if the given value is array-like.
|
||||
|
||||
Examples of array-like values (as interpreted by this function) are
|
||||
sequences such as ``list`` and ``tuple``, but not strings and other
|
||||
iterables such as sets.
|
||||
|
||||
Args:
|
||||
value (Any)
|
||||
|
||||
Returns:
|
||||
bool: ``True`` if the value is considered array-like, ``False`` otherwise.
|
||||
"""
|
||||
return isinstance(value, collections_abc.Sequence) and not isinstance(
|
||||
value, (str, bytes, bytearray)
|
||||
)
|
||||
|
||||
|
||||
def to_bq_table_rows(rows_iterable):
|
||||
"""Convert table rows to BigQuery table Row instances.
|
||||
|
||||
Args:
|
||||
rows_iterable (Iterable[Mapping]):
|
||||
An iterable of row data items to convert to ``Row`` instances.
|
||||
|
||||
Returns:
|
||||
Iterable[google.cloud.bigquery.table.Row]
|
||||
"""
|
||||
|
||||
def to_table_row(row):
|
||||
# NOTE: We fetch ARROW values, thus we need to convert them to Python
|
||||
# objects with as_py().
|
||||
values = tuple(value.as_py() for value in row.values())
|
||||
keys_to_index = {key: i for i, key in enumerate(row.keys())}
|
||||
return table.Row(values, keys_to_index)
|
||||
|
||||
return (to_table_row(row_data) for row_data in rows_iterable)
|
||||
|
||||
|
||||
def raise_on_closed(
|
||||
exc_msg, exc_class=exceptions.ProgrammingError, closed_attr_name="_closed"
|
||||
):
|
||||
"""Make public instance methods raise an error if the instance is closed."""
|
||||
|
||||
def _raise_on_closed(method):
|
||||
"""Make a non-static method raise an error if its containing instance is closed."""
|
||||
|
||||
def with_closed_check(self, *args, **kwargs):
|
||||
if getattr(self, closed_attr_name):
|
||||
raise exc_class(exc_msg)
|
||||
return method(self, *args, **kwargs)
|
||||
|
||||
functools.update_wrapper(with_closed_check, method)
|
||||
return with_closed_check
|
||||
|
||||
def decorate_public_methods(klass):
|
||||
"""Apply ``_raise_on_closed()`` decorator to public instance methods."""
|
||||
for name in dir(klass):
|
||||
if name.startswith("_") and name != "__iter__":
|
||||
continue
|
||||
|
||||
member = getattr(klass, name)
|
||||
if not callable(member):
|
||||
continue
|
||||
|
||||
# We need to check for class/static methods directly in the instance
|
||||
# __dict__, not via the retrieved attribute (`member`), as the
|
||||
# latter is already a callable *produced* by one of these descriptors.
|
||||
if isinstance(klass.__dict__[name], (staticmethod, classmethod)):
|
||||
continue
|
||||
|
||||
member = _raise_on_closed(member)
|
||||
setattr(klass, name, member)
|
||||
|
||||
return klass
|
||||
|
||||
return decorate_public_methods
|
||||
@@ -0,0 +1,128 @@
|
||||
# Copyright 2017 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Connection for the Google BigQuery DB-API."""
|
||||
|
||||
import weakref
|
||||
|
||||
from google.cloud import bigquery
|
||||
from google.cloud.bigquery.dbapi import cursor
|
||||
from google.cloud.bigquery.dbapi import _helpers
|
||||
|
||||
|
||||
@_helpers.raise_on_closed("Operating on a closed connection.")
|
||||
class Connection(object):
|
||||
"""DB-API Connection to Google BigQuery.
|
||||
|
||||
Args:
|
||||
client (Optional[google.cloud.bigquery.Client]):
|
||||
A REST API client used to connect to BigQuery. If not passed, a
|
||||
client is created using default options inferred from the environment.
|
||||
bqstorage_client(\
|
||||
Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient] \
|
||||
):
|
||||
A client that uses the faster BigQuery Storage API to fetch rows from
|
||||
BigQuery. If not passed, it is created using the same credentials
|
||||
as ``client`` (provided that BigQuery Storage dependencies are installed).
|
||||
prefer_bqstorage_client (Optional[bool]):
|
||||
Prefer the BigQuery Storage client over the REST client. If Storage
|
||||
client isn't available, fall back to the REST client. Defaults to
|
||||
``True``.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
client=None,
|
||||
bqstorage_client=None,
|
||||
prefer_bqstorage_client=True,
|
||||
):
|
||||
if client is None:
|
||||
client = bigquery.Client()
|
||||
self._owns_client = True
|
||||
else:
|
||||
self._owns_client = False
|
||||
|
||||
# A warning is already raised by the BQ Storage client factory factory if
|
||||
# instantiation fails, or if the given BQ Storage client instance is outdated.
|
||||
if not prefer_bqstorage_client:
|
||||
bqstorage_client = None
|
||||
self._owns_bqstorage_client = False
|
||||
elif bqstorage_client is None:
|
||||
bqstorage_client = client._ensure_bqstorage_client()
|
||||
self._owns_bqstorage_client = bqstorage_client is not None
|
||||
else:
|
||||
self._owns_bqstorage_client = False
|
||||
bqstorage_client = client._ensure_bqstorage_client(bqstorage_client)
|
||||
|
||||
self._client = client
|
||||
self._bqstorage_client = bqstorage_client
|
||||
|
||||
self._closed = False
|
||||
self._cursors_created = weakref.WeakSet()
|
||||
|
||||
def close(self):
|
||||
"""Close the connection and any cursors created from it.
|
||||
|
||||
Any BigQuery clients explicitly passed to the constructor are *not*
|
||||
closed, only those created by the connection instance itself.
|
||||
"""
|
||||
self._closed = True
|
||||
|
||||
if self._owns_client:
|
||||
self._client.close()
|
||||
|
||||
if self._owns_bqstorage_client:
|
||||
# There is no close() on the BQ Storage client itself.
|
||||
self._bqstorage_client._transport.grpc_channel.close()
|
||||
|
||||
for cursor_ in self._cursors_created:
|
||||
if not cursor_._closed:
|
||||
cursor_.close()
|
||||
|
||||
def commit(self):
|
||||
"""No-op, but for consistency raise an error if connection is closed."""
|
||||
|
||||
def cursor(self):
|
||||
"""Return a new cursor object.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.dbapi.Cursor: A DB-API cursor that uses this connection.
|
||||
"""
|
||||
new_cursor = cursor.Cursor(self)
|
||||
self._cursors_created.add(new_cursor)
|
||||
return new_cursor
|
||||
|
||||
|
||||
def connect(client=None, bqstorage_client=None, prefer_bqstorage_client=True):
|
||||
"""Construct a DB-API connection to Google BigQuery.
|
||||
|
||||
Args:
|
||||
client (Optional[google.cloud.bigquery.Client]):
|
||||
A REST API client used to connect to BigQuery. If not passed, a
|
||||
client is created using default options inferred from the environment.
|
||||
bqstorage_client(\
|
||||
Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient] \
|
||||
):
|
||||
A client that uses the faster BigQuery Storage API to fetch rows from
|
||||
BigQuery. If not passed, it is created using the same credentials
|
||||
as ``client`` (provided that BigQuery Storage dependencies are installed).
|
||||
prefer_bqstorage_client (Optional[bool]):
|
||||
Prefer the BigQuery Storage client over the REST client. If Storage
|
||||
client isn't available, fall back to the REST client. Defaults to
|
||||
``True``.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.dbapi.Connection: A new DB-API connection to BigQuery.
|
||||
"""
|
||||
return Connection(client, bqstorage_client, prefer_bqstorage_client)
|
||||
@@ -0,0 +1,586 @@
|
||||
# Copyright 2017 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Cursor for the Google BigQuery DB-API."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
from collections import abc as collections_abc
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
try:
|
||||
from google.cloud.bigquery_storage import ArrowSerializationOptions
|
||||
except ImportError:
|
||||
_ARROW_COMPRESSION_SUPPORT = False
|
||||
else:
|
||||
# Having BQ Storage available implies that pyarrow >=1.0.0 is available, too.
|
||||
_ARROW_COMPRESSION_SUPPORT = True
|
||||
|
||||
from google.cloud.bigquery import job
|
||||
from google.cloud.bigquery.dbapi import _helpers
|
||||
from google.cloud.bigquery.dbapi import exceptions
|
||||
import google.cloud.exceptions # type: ignore
|
||||
|
||||
|
||||
# Per PEP 249: A 7-item sequence containing information describing one result
|
||||
# column. The first two items (name and type_code) are mandatory, the other
|
||||
# five are optional and are set to None if no meaningful values can be
|
||||
# provided.
|
||||
Column = collections.namedtuple(
|
||||
"Column",
|
||||
[
|
||||
"name",
|
||||
"type_code",
|
||||
"display_size",
|
||||
"internal_size",
|
||||
"precision",
|
||||
"scale",
|
||||
"null_ok",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@_helpers.raise_on_closed("Operating on a closed cursor.")
|
||||
class Cursor(object):
|
||||
"""DB-API Cursor to Google BigQuery.
|
||||
|
||||
Args:
|
||||
connection (google.cloud.bigquery.dbapi.Connection):
|
||||
A DB-API connection to Google BigQuery.
|
||||
"""
|
||||
|
||||
def __init__(self, connection):
|
||||
self.connection = connection
|
||||
self.description = None
|
||||
# Per PEP 249: The attribute is -1 in case no .execute*() has been
|
||||
# performed on the cursor or the rowcount of the last operation
|
||||
# cannot be determined by the interface.
|
||||
self.rowcount = -1
|
||||
# Per PEP 249: The arraysize attribute defaults to 1, meaning to fetch
|
||||
# a single row at a time. However, we deviate from that, and set the
|
||||
# default to None, allowing the backend to automatically determine the
|
||||
# most appropriate size.
|
||||
self.arraysize = None
|
||||
self._query_data = None
|
||||
self._query_rows = None
|
||||
self._closed = False
|
||||
|
||||
@property
|
||||
def query_job(self) -> Optional[job.QueryJob]:
|
||||
"""google.cloud.bigquery.job.query.QueryJob | None: The query job
|
||||
created by the last ``execute*()`` call, if a query job was created.
|
||||
|
||||
.. note::
|
||||
If the last ``execute*()`` call was ``executemany()``, this is the
|
||||
last job created by ``executemany()``."""
|
||||
rows = self._query_rows
|
||||
|
||||
if rows is None:
|
||||
return None
|
||||
|
||||
job_id = rows.job_id
|
||||
project = rows.project
|
||||
location = rows.location
|
||||
client = self.connection._client
|
||||
|
||||
if job_id is None:
|
||||
return None
|
||||
|
||||
return client.get_job(job_id, location=location, project=project)
|
||||
|
||||
def close(self):
|
||||
"""Mark the cursor as closed, preventing its further use."""
|
||||
self._closed = True
|
||||
|
||||
def _set_description(self, schema):
|
||||
"""Set description from schema.
|
||||
|
||||
Args:
|
||||
schema (Sequence[google.cloud.bigquery.schema.SchemaField]):
|
||||
A description of fields in the schema.
|
||||
"""
|
||||
if schema is None:
|
||||
self.description = None
|
||||
return
|
||||
|
||||
self.description = tuple(
|
||||
Column(
|
||||
name=field.name,
|
||||
type_code=field.field_type,
|
||||
display_size=None,
|
||||
internal_size=None,
|
||||
precision=None,
|
||||
scale=None,
|
||||
null_ok=field.is_nullable,
|
||||
)
|
||||
for field in schema
|
||||
)
|
||||
|
||||
def _set_rowcount(self, rows):
|
||||
"""Set the rowcount from a RowIterator.
|
||||
|
||||
Normally, this sets rowcount to the number of rows returned by the
|
||||
query, but if it was a DML statement, it sets rowcount to the number
|
||||
of modified rows.
|
||||
|
||||
Args:
|
||||
query_results (google.cloud.bigquery.query._QueryResults):
|
||||
Results of a query.
|
||||
"""
|
||||
total_rows = 0
|
||||
num_dml_affected_rows = rows.num_dml_affected_rows
|
||||
|
||||
if rows.total_rows is not None and rows.total_rows > 0:
|
||||
total_rows = rows.total_rows
|
||||
if num_dml_affected_rows is not None and num_dml_affected_rows > 0:
|
||||
total_rows = num_dml_affected_rows
|
||||
self.rowcount = total_rows
|
||||
|
||||
def execute(self, operation, parameters=None, job_id=None, job_config=None):
|
||||
"""Prepare and execute a database operation.
|
||||
|
||||
.. note::
|
||||
When setting query parameters, values which are "text"
|
||||
(``unicode`` in Python2, ``str`` in Python3) will use
|
||||
the 'STRING' BigQuery type. Values which are "bytes" (``str`` in
|
||||
Python2, ``bytes`` in Python3), will use using the 'BYTES' type.
|
||||
|
||||
A `~datetime.datetime` parameter without timezone information uses
|
||||
the 'DATETIME' BigQuery type (example: Global Pi Day Celebration
|
||||
March 14, 2017 at 1:59pm). A `~datetime.datetime` parameter with
|
||||
timezone information uses the 'TIMESTAMP' BigQuery type (example:
|
||||
a wedding on April 29, 2011 at 11am, British Summer Time).
|
||||
|
||||
For more information about BigQuery data types, see:
|
||||
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
|
||||
|
||||
``STRUCT``/``RECORD`` and ``REPEATED`` query parameters are not
|
||||
yet supported. See:
|
||||
https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3524
|
||||
|
||||
Args:
|
||||
operation (str): A Google BigQuery query string.
|
||||
|
||||
parameters (Union[Mapping[str, Any], Sequence[Any]]):
|
||||
(Optional) dictionary or sequence of parameter values.
|
||||
|
||||
job_id (str | None):
|
||||
(Optional and discouraged) The job ID to use when creating
|
||||
the query job. For best performance and reliability, manually
|
||||
setting a job ID is discouraged.
|
||||
|
||||
job_config (google.cloud.bigquery.job.QueryJobConfig):
|
||||
(Optional) Extra configuration options for the query job.
|
||||
"""
|
||||
formatted_operation, parameter_types = _format_operation(operation, parameters)
|
||||
self._execute(
|
||||
formatted_operation, parameters, job_id, job_config, parameter_types
|
||||
)
|
||||
|
||||
def _execute(
|
||||
self, formatted_operation, parameters, job_id, job_config, parameter_types
|
||||
):
|
||||
self._query_data = None
|
||||
self._query_results = None
|
||||
client = self.connection._client
|
||||
|
||||
# The DB-API uses the pyformat formatting, since the way BigQuery does
|
||||
# query parameters was not one of the standard options. Convert both
|
||||
# the query and the parameters to the format expected by the client
|
||||
# libraries.
|
||||
query_parameters = _helpers.to_query_parameters(parameters, parameter_types)
|
||||
|
||||
config = job_config or job.QueryJobConfig()
|
||||
config.query_parameters = query_parameters
|
||||
|
||||
# Start the query and wait for the query to finish.
|
||||
try:
|
||||
if job_id is not None:
|
||||
rows = client.query(
|
||||
formatted_operation,
|
||||
job_config=job_config,
|
||||
job_id=job_id,
|
||||
).result(
|
||||
page_size=self.arraysize,
|
||||
)
|
||||
else:
|
||||
rows = client.query_and_wait(
|
||||
formatted_operation,
|
||||
job_config=config,
|
||||
page_size=self.arraysize,
|
||||
)
|
||||
except google.cloud.exceptions.GoogleCloudError as exc:
|
||||
raise exceptions.DatabaseError(exc)
|
||||
|
||||
self._query_rows = rows
|
||||
self._set_description(rows.schema)
|
||||
|
||||
if config.dry_run:
|
||||
self.rowcount = 0
|
||||
else:
|
||||
self._set_rowcount(rows)
|
||||
|
||||
def executemany(self, operation, seq_of_parameters):
|
||||
"""Prepare and execute a database operation multiple times.
|
||||
|
||||
Args:
|
||||
operation (str): A Google BigQuery query string.
|
||||
|
||||
seq_of_parameters (Union[Sequence[Mapping[str, Any], Sequence[Any]]]):
|
||||
Sequence of many sets of parameter values.
|
||||
"""
|
||||
if seq_of_parameters:
|
||||
rowcount = 0
|
||||
# There's no reason to format the line more than once, as
|
||||
# the operation only barely depends on the parameters. So
|
||||
# we just use the first set of parameters. If there are
|
||||
# different numbers or types of parameters, we'll error
|
||||
# anyway.
|
||||
formatted_operation, parameter_types = _format_operation(
|
||||
operation, seq_of_parameters[0]
|
||||
)
|
||||
for parameters in seq_of_parameters:
|
||||
self._execute(
|
||||
formatted_operation, parameters, None, None, parameter_types
|
||||
)
|
||||
rowcount += self.rowcount
|
||||
|
||||
self.rowcount = rowcount
|
||||
|
||||
def _try_fetch(self, size=None):
|
||||
"""Try to start fetching data, if not yet started.
|
||||
|
||||
Mutates self to indicate that iteration has started.
|
||||
"""
|
||||
if self._query_data is not None:
|
||||
# Already started fetching the data.
|
||||
return
|
||||
|
||||
rows = self._query_rows
|
||||
if rows is None:
|
||||
raise exceptions.InterfaceError(
|
||||
"No query results: execute() must be called before fetch."
|
||||
)
|
||||
|
||||
bqstorage_client = self.connection._bqstorage_client
|
||||
if rows._should_use_bqstorage(
|
||||
bqstorage_client,
|
||||
create_bqstorage_client=False,
|
||||
):
|
||||
rows_iterable = self._bqstorage_fetch(bqstorage_client)
|
||||
self._query_data = _helpers.to_bq_table_rows(rows_iterable)
|
||||
return
|
||||
|
||||
self._query_data = iter(rows)
|
||||
|
||||
def _bqstorage_fetch(self, bqstorage_client):
|
||||
"""Start fetching data with the BigQuery Storage API.
|
||||
|
||||
The method assumes that the data about the relevant query job already
|
||||
exists internally.
|
||||
|
||||
Args:
|
||||
bqstorage_client(\
|
||||
google.cloud.bigquery_storage_v1.BigQueryReadClient \
|
||||
):
|
||||
A client tha know how to talk to the BigQuery Storage API.
|
||||
|
||||
Returns:
|
||||
Iterable[Mapping]:
|
||||
A sequence of rows, represented as dictionaries.
|
||||
"""
|
||||
# Hitting this code path with a BQ Storage client instance implies that
|
||||
# bigquery_storage can indeed be imported here without errors.
|
||||
from google.cloud import bigquery_storage
|
||||
|
||||
table_reference = self._query_rows._table
|
||||
|
||||
requested_session = bigquery_storage.types.ReadSession(
|
||||
table=table_reference.to_bqstorage(),
|
||||
data_format=bigquery_storage.types.DataFormat.ARROW,
|
||||
)
|
||||
|
||||
if _ARROW_COMPRESSION_SUPPORT:
|
||||
requested_session.read_options.arrow_serialization_options.buffer_compression = (
|
||||
ArrowSerializationOptions.CompressionCodec.LZ4_FRAME
|
||||
)
|
||||
|
||||
read_session = bqstorage_client.create_read_session(
|
||||
parent="projects/{}".format(table_reference.project),
|
||||
read_session=requested_session,
|
||||
# a single stream only, as DB API is not well-suited for multithreading
|
||||
max_stream_count=1,
|
||||
)
|
||||
|
||||
if not read_session.streams:
|
||||
return iter([]) # empty table, nothing to read
|
||||
|
||||
stream_name = read_session.streams[0].name
|
||||
read_rows_stream = bqstorage_client.read_rows(stream_name)
|
||||
|
||||
rows_iterable = read_rows_stream.rows(read_session)
|
||||
return rows_iterable
|
||||
|
||||
def fetchone(self):
|
||||
"""Fetch a single row from the results of the last ``execute*()`` call.
|
||||
|
||||
.. note::
|
||||
If a dry run query was executed, no rows are returned.
|
||||
|
||||
Returns:
|
||||
Tuple:
|
||||
A tuple representing a row or ``None`` if no more data is
|
||||
available.
|
||||
|
||||
Raises:
|
||||
google.cloud.bigquery.dbapi.InterfaceError: if called before ``execute()``.
|
||||
"""
|
||||
self._try_fetch()
|
||||
try:
|
||||
return next(self._query_data)
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def fetchmany(self, size=None):
|
||||
"""Fetch multiple results from the last ``execute*()`` call.
|
||||
|
||||
.. note::
|
||||
If a dry run query was executed, no rows are returned.
|
||||
|
||||
.. note::
|
||||
The size parameter is not used for the request/response size.
|
||||
Set the ``arraysize`` attribute before calling ``execute()`` to
|
||||
set the batch size.
|
||||
|
||||
Args:
|
||||
size (int):
|
||||
(Optional) Maximum number of rows to return. Defaults to the
|
||||
``arraysize`` property value. If ``arraysize`` is not set, it
|
||||
defaults to ``1``.
|
||||
|
||||
Returns:
|
||||
List[Tuple]: A list of rows.
|
||||
|
||||
Raises:
|
||||
google.cloud.bigquery.dbapi.InterfaceError: if called before ``execute()``.
|
||||
"""
|
||||
if size is None:
|
||||
# Since self.arraysize can be None (a deviation from PEP 249),
|
||||
# use an actual PEP 249 default of 1 in such case (*some* number
|
||||
# is needed here).
|
||||
size = self.arraysize if self.arraysize else 1
|
||||
|
||||
self._try_fetch(size=size)
|
||||
rows = []
|
||||
|
||||
for row in self._query_data:
|
||||
rows.append(row)
|
||||
if len(rows) >= size:
|
||||
break
|
||||
|
||||
return rows
|
||||
|
||||
def fetchall(self):
|
||||
"""Fetch all remaining results from the last ``execute*()`` call.
|
||||
|
||||
.. note::
|
||||
If a dry run query was executed, no rows are returned.
|
||||
|
||||
Returns:
|
||||
List[Tuple]: A list of all the rows in the results.
|
||||
|
||||
Raises:
|
||||
google.cloud.bigquery.dbapi.InterfaceError: if called before ``execute()``.
|
||||
"""
|
||||
self._try_fetch()
|
||||
return list(self._query_data)
|
||||
|
||||
def setinputsizes(self, sizes):
|
||||
"""No-op, but for consistency raise an error if cursor is closed."""
|
||||
|
||||
def setoutputsize(self, size, column=None):
|
||||
"""No-op, but for consistency raise an error if cursor is closed."""
|
||||
|
||||
def __iter__(self):
|
||||
self._try_fetch()
|
||||
return iter(self._query_data)
|
||||
|
||||
|
||||
def _format_operation_list(operation, parameters):
|
||||
"""Formats parameters in operation in the way BigQuery expects.
|
||||
|
||||
The input operation will be a query like ``SELECT %s`` and the output
|
||||
will be a query like ``SELECT ?``.
|
||||
|
||||
Args:
|
||||
operation (str): A Google BigQuery query string.
|
||||
|
||||
parameters (Sequence[Any]): Sequence of parameter values.
|
||||
|
||||
Returns:
|
||||
str: A formatted query string.
|
||||
|
||||
Raises:
|
||||
google.cloud.bigquery.dbapi.ProgrammingError:
|
||||
if a parameter used in the operation is not found in the
|
||||
``parameters`` argument.
|
||||
"""
|
||||
formatted_params = ["?" for _ in parameters]
|
||||
|
||||
try:
|
||||
return operation % tuple(formatted_params)
|
||||
except (TypeError, ValueError) as exc:
|
||||
raise exceptions.ProgrammingError(exc)
|
||||
|
||||
|
||||
def _format_operation_dict(operation, parameters):
|
||||
"""Formats parameters in operation in the way BigQuery expects.
|
||||
|
||||
The input operation will be a query like ``SELECT %(namedparam)s`` and
|
||||
the output will be a query like ``SELECT @namedparam``.
|
||||
|
||||
Args:
|
||||
operation (str): A Google BigQuery query string.
|
||||
|
||||
parameters (Mapping[str, Any]): Dictionary of parameter values.
|
||||
|
||||
Returns:
|
||||
str: A formatted query string.
|
||||
|
||||
Raises:
|
||||
google.cloud.bigquery.dbapi.ProgrammingError:
|
||||
if a parameter used in the operation is not found in the
|
||||
``parameters`` argument.
|
||||
"""
|
||||
formatted_params = {}
|
||||
for name in parameters:
|
||||
escaped_name = name.replace("`", r"\`")
|
||||
formatted_params[name] = "@`{}`".format(escaped_name)
|
||||
|
||||
try:
|
||||
return operation % formatted_params
|
||||
except (KeyError, ValueError, TypeError) as exc:
|
||||
raise exceptions.ProgrammingError(exc)
|
||||
|
||||
|
||||
def _format_operation(operation, parameters):
|
||||
"""Formats parameters in operation in way BigQuery expects.
|
||||
|
||||
Args:
|
||||
operation (str): A Google BigQuery query string.
|
||||
|
||||
parameters (Union[Mapping[str, Any], Sequence[Any]]):
|
||||
Optional parameter values.
|
||||
|
||||
Returns:
|
||||
str: A formatted query string.
|
||||
|
||||
Raises:
|
||||
google.cloud.bigquery.dbapi.ProgrammingError:
|
||||
if a parameter used in the operation is not found in the
|
||||
``parameters`` argument.
|
||||
"""
|
||||
if parameters is None or len(parameters) == 0:
|
||||
return operation.replace("%%", "%"), None # Still do percent de-escaping.
|
||||
|
||||
operation, parameter_types = _extract_types(operation)
|
||||
if parameter_types is None:
|
||||
raise exceptions.ProgrammingError(
|
||||
f"Parameters were provided, but {repr(operation)} has no placeholders."
|
||||
)
|
||||
|
||||
if isinstance(parameters, collections_abc.Mapping):
|
||||
return _format_operation_dict(operation, parameters), parameter_types
|
||||
|
||||
return _format_operation_list(operation, parameters), parameter_types
|
||||
|
||||
|
||||
def _extract_types(
|
||||
operation,
|
||||
extra_type_sub=re.compile(
|
||||
r"""
|
||||
(%*) # Extra %s. We'll deal with these in the replacement code
|
||||
|
||||
% # Beginning of replacement, %s, %(...)s
|
||||
|
||||
(?:\( # Begin of optional name and/or type
|
||||
([^:)]*) # name
|
||||
(?:: # ':' introduces type
|
||||
( # start of type group
|
||||
[a-zA-Z0-9_<>, ]+ # First part, no parens
|
||||
|
||||
(?: # start sets of parens + non-paren text
|
||||
\([0-9 ,]+\) # comma-separated groups of digits in parens
|
||||
# (e.g. string(10))
|
||||
(?=[, >)]) # Must be followed by ,>) or space
|
||||
[a-zA-Z0-9<>, ]* # Optional non-paren chars
|
||||
)* # Can be zero or more of parens and following text
|
||||
) # end of type group
|
||||
)? # close type clause ":type"
|
||||
\))? # End of optional name and/or type
|
||||
|
||||
s # End of replacement
|
||||
""",
|
||||
re.VERBOSE,
|
||||
).sub,
|
||||
):
|
||||
"""Remove type information from parameter placeholders.
|
||||
|
||||
For every parameter of the form %(name:type)s, replace with %(name)s and add the
|
||||
item name->type to dict that's returned.
|
||||
|
||||
Returns operation without type information and a dictionary of names and types.
|
||||
"""
|
||||
parameter_types = None
|
||||
|
||||
def repl(m):
|
||||
nonlocal parameter_types
|
||||
prefix, name, type_ = m.groups()
|
||||
if len(prefix) % 2:
|
||||
# The prefix has an odd number of %s, the last of which
|
||||
# escapes the % we're looking for, so we don't want to
|
||||
# change anything.
|
||||
return m.group(0)
|
||||
|
||||
try:
|
||||
if name:
|
||||
if not parameter_types:
|
||||
parameter_types = {}
|
||||
if type_:
|
||||
if name in parameter_types:
|
||||
if type_ != parameter_types[name]:
|
||||
raise exceptions.ProgrammingError(
|
||||
f"Conflicting types for {name}: "
|
||||
f"{parameter_types[name]} and {type_}."
|
||||
)
|
||||
else:
|
||||
parameter_types[name] = type_
|
||||
else:
|
||||
if not isinstance(parameter_types, dict):
|
||||
raise TypeError()
|
||||
|
||||
return f"{prefix}%({name})s"
|
||||
else:
|
||||
if parameter_types is None:
|
||||
parameter_types = []
|
||||
parameter_types.append(type_)
|
||||
return f"{prefix}%s"
|
||||
except (AttributeError, TypeError):
|
||||
raise exceptions.ProgrammingError(
|
||||
f"{repr(operation)} mixes named and unamed parameters."
|
||||
)
|
||||
|
||||
return extra_type_sub(repl, operation), parameter_types
|
||||
@@ -0,0 +1,58 @@
|
||||
# Copyright 2017 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Exceptions used in the Google BigQuery DB-API."""
|
||||
|
||||
|
||||
class Warning(Exception):
|
||||
"""Exception raised for important DB-API warnings."""
|
||||
|
||||
|
||||
class Error(Exception):
|
||||
"""Exception representing all non-warning DB-API errors."""
|
||||
|
||||
|
||||
class InterfaceError(Error):
|
||||
"""DB-API error related to the database interface."""
|
||||
|
||||
|
||||
class DatabaseError(Error):
|
||||
"""DB-API error related to the database."""
|
||||
|
||||
|
||||
class DataError(DatabaseError):
|
||||
"""DB-API error due to problems with the processed data."""
|
||||
|
||||
|
||||
class OperationalError(DatabaseError):
|
||||
"""DB-API error related to the database operation.
|
||||
|
||||
These errors are not necessarily under the control of the programmer.
|
||||
"""
|
||||
|
||||
|
||||
class IntegrityError(DatabaseError):
|
||||
"""DB-API error when integrity of the database is affected."""
|
||||
|
||||
|
||||
class InternalError(DatabaseError):
|
||||
"""DB-API error when the database encounters an internal error."""
|
||||
|
||||
|
||||
class ProgrammingError(DatabaseError):
|
||||
"""DB-API exception raised for programming errors."""
|
||||
|
||||
|
||||
class NotSupportedError(DatabaseError):
|
||||
"""DB-API error for operations not supported by the database or API."""
|
||||
@@ -0,0 +1,96 @@
|
||||
# Copyright 2017 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Types used in the Google BigQuery DB-API.
|
||||
|
||||
See `PEP-249`_ for details.
|
||||
|
||||
.. _PEP-249:
|
||||
https://www.python.org/dev/peps/pep-0249/#type-objects-and-constructors
|
||||
"""
|
||||
|
||||
import datetime
|
||||
|
||||
|
||||
Date = datetime.date
|
||||
Time = datetime.time
|
||||
Timestamp = datetime.datetime
|
||||
DateFromTicks = datetime.date.fromtimestamp
|
||||
TimestampFromTicks = datetime.datetime.fromtimestamp
|
||||
|
||||
|
||||
def Binary(data):
|
||||
"""Contruct a DB-API binary value.
|
||||
|
||||
Args:
|
||||
data (bytes-like): An object containing binary data and that
|
||||
can be converted to bytes with the `bytes` builtin.
|
||||
|
||||
Returns:
|
||||
bytes: The binary data as a bytes object.
|
||||
"""
|
||||
if isinstance(data, int):
|
||||
# This is not the conversion we're looking for, because it
|
||||
# will simply create a bytes object of the given size.
|
||||
raise TypeError("cannot convert `int` object to binary")
|
||||
|
||||
try:
|
||||
return bytes(data)
|
||||
except TypeError:
|
||||
if isinstance(data, str):
|
||||
return data.encode("utf-8")
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def TimeFromTicks(ticks, tz=None):
|
||||
"""Construct a DB-API time value from the given ticks value.
|
||||
|
||||
Args:
|
||||
ticks (float):
|
||||
a number of seconds since the epoch; see the documentation of the
|
||||
standard Python time module for details.
|
||||
|
||||
tz (datetime.tzinfo): (Optional) time zone to use for conversion
|
||||
|
||||
Returns:
|
||||
datetime.time: time represented by ticks.
|
||||
"""
|
||||
dt = datetime.datetime.fromtimestamp(ticks, tz=tz)
|
||||
return dt.timetz()
|
||||
|
||||
|
||||
class _DBAPITypeObject(object):
|
||||
"""DB-API type object which compares equal to many different strings.
|
||||
|
||||
See `PEP-249`_ for details.
|
||||
|
||||
.. _PEP-249:
|
||||
https://www.python.org/dev/peps/pep-0249/#implementation-hints-for-module-authors
|
||||
"""
|
||||
|
||||
def __init__(self, *values):
|
||||
self.values = values
|
||||
|
||||
def __eq__(self, other):
|
||||
return other in self.values
|
||||
|
||||
|
||||
STRING = "STRING"
|
||||
BINARY = _DBAPITypeObject("BYTES", "RECORD", "STRUCT")
|
||||
NUMBER = _DBAPITypeObject(
|
||||
"INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC", "BOOLEAN", "BOOL"
|
||||
)
|
||||
DATETIME = _DBAPITypeObject("TIMESTAMP", "DATE", "TIME", "DATETIME")
|
||||
ROWID = "ROWID"
|
||||
@@ -0,0 +1,84 @@
|
||||
# Copyright 2015 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Define class for the custom encryption configuration."""
|
||||
|
||||
import copy
|
||||
|
||||
|
||||
class EncryptionConfiguration(object):
|
||||
"""Custom encryption configuration (e.g., Cloud KMS keys).
|
||||
|
||||
Args:
|
||||
kms_key_name (str): resource ID of Cloud KMS key used for encryption
|
||||
"""
|
||||
|
||||
def __init__(self, kms_key_name=None) -> None:
|
||||
self._properties = {}
|
||||
if kms_key_name is not None:
|
||||
self._properties["kmsKeyName"] = kms_key_name
|
||||
|
||||
@property
|
||||
def kms_key_name(self):
|
||||
"""str: Resource ID of Cloud KMS key
|
||||
|
||||
Resource ID of Cloud KMS key or :data:`None` if using default
|
||||
encryption.
|
||||
"""
|
||||
return self._properties.get("kmsKeyName")
|
||||
|
||||
@kms_key_name.setter
|
||||
def kms_key_name(self, value):
|
||||
self._properties["kmsKeyName"] = value
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource):
|
||||
"""Construct an encryption configuration from its API representation
|
||||
|
||||
Args:
|
||||
resource (Dict[str, object]):
|
||||
An encryption configuration representation as returned from
|
||||
the API.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.table.EncryptionConfiguration:
|
||||
An encryption configuration parsed from ``resource``.
|
||||
"""
|
||||
config = cls()
|
||||
config._properties = copy.deepcopy(resource)
|
||||
return config
|
||||
|
||||
def to_api_repr(self):
|
||||
"""Construct the API resource representation of this encryption
|
||||
configuration.
|
||||
|
||||
Returns:
|
||||
Dict[str, object]:
|
||||
Encryption configuration as represented as an API resource
|
||||
"""
|
||||
return copy.deepcopy(self._properties)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, EncryptionConfiguration):
|
||||
return NotImplemented
|
||||
return self.kms_key_name == other.kms_key_name
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.kms_key_name)
|
||||
|
||||
def __repr__(self):
|
||||
return "EncryptionConfiguration({})".format(self.kms_key_name)
|
||||
@@ -0,0 +1,389 @@
|
||||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import enum
|
||||
|
||||
|
||||
class AutoRowIDs(enum.Enum):
|
||||
"""How to handle automatic insert IDs when inserting rows as a stream."""
|
||||
|
||||
DISABLED = enum.auto()
|
||||
GENERATE_UUID = enum.auto()
|
||||
|
||||
|
||||
class Compression(str, enum.Enum):
|
||||
"""The compression type to use for exported files. The default value is
|
||||
:attr:`NONE`.
|
||||
|
||||
:attr:`DEFLATE` and :attr:`SNAPPY` are
|
||||
only supported for Avro.
|
||||
"""
|
||||
|
||||
GZIP = "GZIP"
|
||||
"""Specifies GZIP format."""
|
||||
|
||||
DEFLATE = "DEFLATE"
|
||||
"""Specifies DEFLATE format."""
|
||||
|
||||
SNAPPY = "SNAPPY"
|
||||
"""Specifies SNAPPY format."""
|
||||
|
||||
ZSTD = "ZSTD"
|
||||
"""Specifies ZSTD format."""
|
||||
|
||||
NONE = "NONE"
|
||||
"""Specifies no compression."""
|
||||
|
||||
|
||||
class DecimalTargetType:
|
||||
"""The data types that could be used as a target type when converting decimal values.
|
||||
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#DecimalTargetType
|
||||
|
||||
.. versionadded:: 2.21.0
|
||||
"""
|
||||
|
||||
NUMERIC = "NUMERIC"
|
||||
"""Decimal values could be converted to NUMERIC type."""
|
||||
|
||||
BIGNUMERIC = "BIGNUMERIC"
|
||||
"""Decimal values could be converted to BIGNUMERIC type."""
|
||||
|
||||
STRING = "STRING"
|
||||
"""Decimal values could be converted to STRING type."""
|
||||
|
||||
|
||||
class CreateDisposition(object):
|
||||
"""Specifies whether the job is allowed to create new tables. The default
|
||||
value is :attr:`CREATE_IF_NEEDED`.
|
||||
|
||||
Creation, truncation and append actions occur as one atomic update
|
||||
upon job completion.
|
||||
"""
|
||||
|
||||
CREATE_IF_NEEDED = "CREATE_IF_NEEDED"
|
||||
"""If the table does not exist, BigQuery creates the table."""
|
||||
|
||||
CREATE_NEVER = "CREATE_NEVER"
|
||||
"""The table must already exist. If it does not, a 'notFound' error is
|
||||
returned in the job result."""
|
||||
|
||||
|
||||
class DefaultPandasDTypes(enum.Enum):
|
||||
"""Default Pandas DataFrem DTypes to convert BigQuery data. These
|
||||
Sentinel values are used instead of None to maintain backward compatibility,
|
||||
and allow Pandas package is not available. For more information:
|
||||
https://stackoverflow.com/a/60605919/101923
|
||||
"""
|
||||
|
||||
BOOL_DTYPE = object()
|
||||
"""Specifies default bool dtype"""
|
||||
|
||||
INT_DTYPE = object()
|
||||
"""Specifies default integer dtype"""
|
||||
|
||||
DATE_DTYPE = object()
|
||||
"""Specifies default date dtype"""
|
||||
|
||||
TIME_DTYPE = object()
|
||||
"""Specifies default time dtype"""
|
||||
|
||||
RANGE_DATE_DTYPE = object()
|
||||
"""Specifies default range date dtype"""
|
||||
|
||||
RANGE_DATETIME_DTYPE = object()
|
||||
"""Specifies default range datetime dtype"""
|
||||
|
||||
RANGE_TIMESTAMP_DTYPE = object()
|
||||
"""Specifies default range timestamp dtype"""
|
||||
|
||||
|
||||
class DestinationFormat(object):
|
||||
"""The exported file format. The default value is :attr:`CSV`.
|
||||
|
||||
Tables with nested or repeated fields cannot be exported as CSV.
|
||||
"""
|
||||
|
||||
CSV = "CSV"
|
||||
"""Specifies CSV format."""
|
||||
|
||||
NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON"
|
||||
"""Specifies newline delimited JSON format."""
|
||||
|
||||
AVRO = "AVRO"
|
||||
"""Specifies Avro format."""
|
||||
|
||||
PARQUET = "PARQUET"
|
||||
"""Specifies Parquet format."""
|
||||
|
||||
|
||||
class Encoding(object):
|
||||
"""The character encoding of the data. The default is :attr:`UTF_8`.
|
||||
|
||||
BigQuery decodes the data after the raw, binary data has been
|
||||
split using the values of the quote and fieldDelimiter properties.
|
||||
"""
|
||||
|
||||
UTF_8 = "UTF-8"
|
||||
"""Specifies UTF-8 encoding."""
|
||||
|
||||
ISO_8859_1 = "ISO-8859-1"
|
||||
"""Specifies ISO-8859-1 encoding."""
|
||||
|
||||
|
||||
class QueryPriority(object):
|
||||
"""Specifies a priority for the query. The default value is
|
||||
:attr:`INTERACTIVE`.
|
||||
"""
|
||||
|
||||
INTERACTIVE = "INTERACTIVE"
|
||||
"""Specifies interactive priority."""
|
||||
|
||||
BATCH = "BATCH"
|
||||
"""Specifies batch priority."""
|
||||
|
||||
|
||||
class QueryApiMethod(str, enum.Enum):
|
||||
"""API method used to start the query. The default value is
|
||||
:attr:`INSERT`.
|
||||
"""
|
||||
|
||||
INSERT = "INSERT"
|
||||
"""Submit a query job by using the `jobs.insert REST API method
|
||||
<https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert>`_.
|
||||
|
||||
This supports all job configuration options.
|
||||
"""
|
||||
|
||||
QUERY = "QUERY"
|
||||
"""Submit a query job by using the `jobs.query REST API method
|
||||
<https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query>`_.
|
||||
|
||||
Differences from ``INSERT``:
|
||||
|
||||
* Many parameters and job configuration options, including job ID and
|
||||
destination table, cannot be used
|
||||
with this API method. See the `jobs.query REST API documentation
|
||||
<https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query>`_ for
|
||||
the complete list of supported configuration options.
|
||||
|
||||
* API blocks up to a specified timeout, waiting for the query to
|
||||
finish.
|
||||
|
||||
* The full job resource (including job statistics) may not be available.
|
||||
Call :meth:`~google.cloud.bigquery.job.QueryJob.reload` or
|
||||
:meth:`~google.cloud.bigquery.client.Client.get_job` to get full job
|
||||
statistics and configuration.
|
||||
|
||||
* :meth:`~google.cloud.bigquery.Client.query` can raise API exceptions if
|
||||
the query fails, whereas the same errors don't appear until calling
|
||||
:meth:`~google.cloud.bigquery.job.QueryJob.result` when the ``INSERT``
|
||||
API method is used.
|
||||
"""
|
||||
|
||||
|
||||
class SchemaUpdateOption(object):
|
||||
"""Specifies an update to the destination table schema as a side effect of
|
||||
a load job.
|
||||
"""
|
||||
|
||||
ALLOW_FIELD_ADDITION = "ALLOW_FIELD_ADDITION"
|
||||
"""Allow adding a nullable field to the schema."""
|
||||
|
||||
ALLOW_FIELD_RELAXATION = "ALLOW_FIELD_RELAXATION"
|
||||
"""Allow relaxing a required field in the original schema to nullable."""
|
||||
|
||||
|
||||
class SourceFormat(object):
|
||||
"""The format of the data files. The default value is :attr:`CSV`.
|
||||
|
||||
Note that the set of allowed values for loading data is different
|
||||
than the set used for external data sources (see
|
||||
:class:`~google.cloud.bigquery.external_config.ExternalSourceFormat`).
|
||||
"""
|
||||
|
||||
CSV = "CSV"
|
||||
"""Specifies CSV format."""
|
||||
|
||||
DATASTORE_BACKUP = "DATASTORE_BACKUP"
|
||||
"""Specifies datastore backup format"""
|
||||
|
||||
NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON"
|
||||
"""Specifies newline delimited JSON format."""
|
||||
|
||||
AVRO = "AVRO"
|
||||
"""Specifies Avro format."""
|
||||
|
||||
PARQUET = "PARQUET"
|
||||
"""Specifies Parquet format."""
|
||||
|
||||
ORC = "ORC"
|
||||
"""Specifies Orc format."""
|
||||
|
||||
|
||||
class KeyResultStatementKind:
|
||||
"""Determines which statement in the script represents the "key result".
|
||||
|
||||
The "key result" is used to populate the schema and query results of the script job.
|
||||
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#keyresultstatementkind
|
||||
"""
|
||||
|
||||
KEY_RESULT_STATEMENT_KIND_UNSPECIFIED = "KEY_RESULT_STATEMENT_KIND_UNSPECIFIED"
|
||||
LAST = "LAST"
|
||||
FIRST_SELECT = "FIRST_SELECT"
|
||||
|
||||
|
||||
class StandardSqlTypeNames(str, enum.Enum):
|
||||
"""Enum of allowed SQL type names in schema.SchemaField.
|
||||
|
||||
Datatype used in GoogleSQL.
|
||||
"""
|
||||
|
||||
def _generate_next_value_(name, start, count, last_values):
|
||||
return name
|
||||
|
||||
TYPE_KIND_UNSPECIFIED = enum.auto()
|
||||
INT64 = enum.auto()
|
||||
BOOL = enum.auto()
|
||||
FLOAT64 = enum.auto()
|
||||
STRING = enum.auto()
|
||||
BYTES = enum.auto()
|
||||
TIMESTAMP = enum.auto()
|
||||
DATE = enum.auto()
|
||||
TIME = enum.auto()
|
||||
DATETIME = enum.auto()
|
||||
INTERVAL = enum.auto()
|
||||
GEOGRAPHY = enum.auto()
|
||||
NUMERIC = enum.auto()
|
||||
BIGNUMERIC = enum.auto()
|
||||
JSON = enum.auto()
|
||||
ARRAY = enum.auto()
|
||||
STRUCT = enum.auto()
|
||||
RANGE = enum.auto()
|
||||
# NOTE: FOREIGN acts as a wrapper for data types
|
||||
# not natively understood by BigQuery unless translated
|
||||
FOREIGN = enum.auto()
|
||||
|
||||
|
||||
class EntityTypes(str, enum.Enum):
|
||||
"""Enum of allowed entity type names in AccessEntry"""
|
||||
|
||||
USER_BY_EMAIL = "userByEmail"
|
||||
GROUP_BY_EMAIL = "groupByEmail"
|
||||
DOMAIN = "domain"
|
||||
DATASET = "dataset"
|
||||
SPECIAL_GROUP = "specialGroup"
|
||||
VIEW = "view"
|
||||
IAM_MEMBER = "iamMember"
|
||||
ROUTINE = "routine"
|
||||
|
||||
|
||||
# See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
|
||||
# and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
|
||||
class SqlTypeNames(str, enum.Enum):
|
||||
"""Enum of allowed SQL type names in schema.SchemaField.
|
||||
|
||||
Datatype used in Legacy SQL.
|
||||
"""
|
||||
|
||||
STRING = "STRING"
|
||||
BYTES = "BYTES"
|
||||
INTEGER = "INTEGER"
|
||||
INT64 = "INTEGER"
|
||||
FLOAT = "FLOAT"
|
||||
FLOAT64 = "FLOAT"
|
||||
DECIMAL = NUMERIC = "NUMERIC"
|
||||
BIGDECIMAL = BIGNUMERIC = "BIGNUMERIC"
|
||||
BOOLEAN = "BOOLEAN"
|
||||
BOOL = "BOOLEAN"
|
||||
GEOGRAPHY = "GEOGRAPHY" # NOTE: not available in legacy types
|
||||
RECORD = "RECORD"
|
||||
STRUCT = "RECORD"
|
||||
TIMESTAMP = "TIMESTAMP"
|
||||
DATE = "DATE"
|
||||
TIME = "TIME"
|
||||
DATETIME = "DATETIME"
|
||||
INTERVAL = "INTERVAL" # NOTE: not available in legacy types
|
||||
RANGE = "RANGE" # NOTE: not available in legacy types
|
||||
# NOTE: FOREIGN acts as a wrapper for data types
|
||||
# not natively understood by BigQuery unless translated
|
||||
FOREIGN = "FOREIGN"
|
||||
|
||||
|
||||
class WriteDisposition(object):
|
||||
"""Specifies the action that occurs if destination table already exists.
|
||||
|
||||
The default value is :attr:`WRITE_APPEND`.
|
||||
|
||||
Each action is atomic and only occurs if BigQuery is able to complete
|
||||
the job successfully. Creation, truncation and append actions occur as one
|
||||
atomic update upon job completion.
|
||||
"""
|
||||
|
||||
WRITE_APPEND = "WRITE_APPEND"
|
||||
"""If the table already exists, BigQuery appends the data to the table."""
|
||||
|
||||
WRITE_TRUNCATE = "WRITE_TRUNCATE"
|
||||
"""If the table already exists, BigQuery overwrites the table data."""
|
||||
|
||||
WRITE_EMPTY = "WRITE_EMPTY"
|
||||
"""If the table already exists and contains data, a 'duplicate' error is
|
||||
returned in the job result."""
|
||||
|
||||
|
||||
class DeterminismLevel:
|
||||
"""Specifies determinism level for JavaScript user-defined functions (UDFs).
|
||||
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#DeterminismLevel
|
||||
"""
|
||||
|
||||
DETERMINISM_LEVEL_UNSPECIFIED = "DETERMINISM_LEVEL_UNSPECIFIED"
|
||||
"""The determinism of the UDF is unspecified."""
|
||||
|
||||
DETERMINISTIC = "DETERMINISTIC"
|
||||
"""The UDF is deterministic, meaning that 2 function calls with the same inputs
|
||||
always produce the same result, even across 2 query runs."""
|
||||
|
||||
NOT_DETERMINISTIC = "NOT_DETERMINISTIC"
|
||||
"""The UDF is not deterministic."""
|
||||
|
||||
|
||||
class RoundingMode(str, enum.Enum):
|
||||
"""Rounding mode options that can be used when storing NUMERIC or BIGNUMERIC
|
||||
values.
|
||||
|
||||
ROUNDING_MODE_UNSPECIFIED: will default to using ROUND_HALF_AWAY_FROM_ZERO.
|
||||
|
||||
ROUND_HALF_AWAY_FROM_ZERO: rounds half values away from zero when applying
|
||||
precision and scale upon writing of NUMERIC and BIGNUMERIC values.
|
||||
For Scale: 0
|
||||
* 1.1, 1.2, 1.3, 1.4 => 1
|
||||
* 1.5, 1.6, 1.7, 1.8, 1.9 => 2
|
||||
|
||||
ROUND_HALF_EVEN: rounds half values to the nearest even value when applying
|
||||
precision and scale upon writing of NUMERIC and BIGNUMERIC values.
|
||||
For Scale: 0
|
||||
* 1.1, 1.2, 1.3, 1.4 => 1
|
||||
* 1.5 => 2
|
||||
* 1.6, 1.7, 1.8, 1.9 => 2
|
||||
* 2.5 => 2
|
||||
"""
|
||||
|
||||
def _generate_next_value_(name, start, count, last_values):
|
||||
return name
|
||||
|
||||
ROUNDING_MODE_UNSPECIFIED = enum.auto()
|
||||
ROUND_HALF_AWAY_FROM_ZERO = enum.auto()
|
||||
ROUND_HALF_EVEN = enum.auto()
|
||||
@@ -0,0 +1,35 @@
|
||||
# Copyright 2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
class BigQueryError(Exception):
|
||||
"""Base class for all custom exceptions defined by the BigQuery client."""
|
||||
|
||||
|
||||
class LegacyBigQueryStorageError(BigQueryError):
|
||||
"""Raised when too old a version of BigQuery Storage extra is detected at runtime."""
|
||||
|
||||
|
||||
class LegacyPyarrowError(BigQueryError):
|
||||
"""Raised when too old a version of pyarrow package is detected at runtime."""
|
||||
|
||||
|
||||
class BigQueryStorageNotFoundError(BigQueryError):
|
||||
"""Raised when BigQuery Storage extra is not installed when trying to
|
||||
import it.
|
||||
"""
|
||||
|
||||
|
||||
class LegacyPandasError(BigQueryError):
|
||||
"""Raised when too old a version of pandas package is detected at runtime."""
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,147 @@
|
||||
# Copyright 2021 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import copy
|
||||
from typing import Dict, Optional, Union
|
||||
|
||||
|
||||
class AvroOptions:
|
||||
"""Options if source format is set to AVRO."""
|
||||
|
||||
_SOURCE_FORMAT = "AVRO"
|
||||
_RESOURCE_NAME = "avroOptions"
|
||||
|
||||
def __init__(self):
|
||||
self._properties = {}
|
||||
|
||||
@property
|
||||
def use_avro_logical_types(self) -> Optional[bool]:
|
||||
"""[Optional] If sourceFormat is set to 'AVRO', indicates whether to
|
||||
interpret logical types as the corresponding BigQuery data type (for
|
||||
example, TIMESTAMP), instead of using the raw type (for example,
|
||||
INTEGER).
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#AvroOptions.FIELDS.use_avro_logical_types
|
||||
"""
|
||||
return self._properties.get("useAvroLogicalTypes")
|
||||
|
||||
@use_avro_logical_types.setter
|
||||
def use_avro_logical_types(self, value):
|
||||
self._properties["useAvroLogicalTypes"] = value
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: Dict[str, bool]) -> "AvroOptions":
|
||||
"""Factory: construct an instance from a resource dict.
|
||||
|
||||
Args:
|
||||
resource (Dict[str, bool]):
|
||||
Definition of a :class:`~.format_options.AvroOptions` instance in
|
||||
the same representation as is returned from the API.
|
||||
|
||||
Returns:
|
||||
:class:`~.format_options.AvroOptions`:
|
||||
Configuration parsed from ``resource``.
|
||||
"""
|
||||
config = cls()
|
||||
config._properties = copy.deepcopy(resource)
|
||||
return config
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Build an API representation of this object.
|
||||
|
||||
Returns:
|
||||
Dict[str, bool]:
|
||||
A dictionary in the format used by the BigQuery API.
|
||||
"""
|
||||
return copy.deepcopy(self._properties)
|
||||
|
||||
|
||||
class ParquetOptions:
|
||||
"""Additional options if the PARQUET source format is used."""
|
||||
|
||||
_SOURCE_FORMAT = "PARQUET"
|
||||
_RESOURCE_NAME = "parquetOptions"
|
||||
|
||||
def __init__(self):
|
||||
self._properties = {}
|
||||
|
||||
@property
|
||||
def enum_as_string(self) -> bool:
|
||||
"""Indicates whether to infer Parquet ENUM logical type as STRING instead of
|
||||
BYTES by default.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions.FIELDS.enum_as_string
|
||||
"""
|
||||
return self._properties.get("enumAsString")
|
||||
|
||||
@enum_as_string.setter
|
||||
def enum_as_string(self, value: bool) -> None:
|
||||
self._properties["enumAsString"] = value
|
||||
|
||||
@property
|
||||
def enable_list_inference(self) -> bool:
|
||||
"""Indicates whether to use schema inference specifically for Parquet LIST
|
||||
logical type.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions.FIELDS.enable_list_inference
|
||||
"""
|
||||
return self._properties.get("enableListInference")
|
||||
|
||||
@enable_list_inference.setter
|
||||
def enable_list_inference(self, value: bool) -> None:
|
||||
self._properties["enableListInference"] = value
|
||||
|
||||
@property
|
||||
def map_target_type(self) -> Optional[Union[bool, str]]:
|
||||
"""Indicates whether to simplify the representation of parquet maps to only show keys and values."""
|
||||
|
||||
return self._properties.get("mapTargetType")
|
||||
|
||||
@map_target_type.setter
|
||||
def map_target_type(self, value: str) -> None:
|
||||
"""Sets the map target type.
|
||||
|
||||
Args:
|
||||
value: The map target type (eg ARRAY_OF_STRUCT).
|
||||
"""
|
||||
self._properties["mapTargetType"] = value
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: Dict[str, bool]) -> "ParquetOptions":
|
||||
"""Factory: construct an instance from a resource dict.
|
||||
|
||||
Args:
|
||||
resource (Dict[str, bool]):
|
||||
Definition of a :class:`~.format_options.ParquetOptions` instance in
|
||||
the same representation as is returned from the API.
|
||||
|
||||
Returns:
|
||||
:class:`~.format_options.ParquetOptions`:
|
||||
Configuration parsed from ``resource``.
|
||||
"""
|
||||
config = cls()
|
||||
config._properties = copy.deepcopy(resource)
|
||||
return config
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Build an API representation of this object.
|
||||
|
||||
Returns:
|
||||
Dict[str, bool]:
|
||||
A dictionary in the format used by the BigQuery API.
|
||||
"""
|
||||
return copy.deepcopy(self._properties)
|
||||
@@ -0,0 +1,38 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""BigQuery API IAM policy definitions
|
||||
|
||||
For all allowed roles and permissions, see:
|
||||
|
||||
https://cloud.google.com/bigquery/docs/access-control
|
||||
"""
|
||||
|
||||
# BigQuery-specific IAM roles available for tables and views
|
||||
|
||||
BIGQUERY_DATA_EDITOR_ROLE = "roles/bigquery.dataEditor"
|
||||
"""When applied to a table or view, this role provides permissions to
|
||||
read and update data and metadata for the table or view."""
|
||||
|
||||
BIGQUERY_DATA_OWNER_ROLE = "roles/bigquery.dataOwner"
|
||||
"""When applied to a table or view, this role provides permissions to
|
||||
read and update data and metadata for the table or view, share the
|
||||
table/view, and delete the table/view."""
|
||||
|
||||
BIGQUERY_DATA_VIEWER_ROLE = "roles/bigquery.dataViewer"
|
||||
"""When applied to a table or view, this role provides permissions to
|
||||
read data and metadata from the table or view."""
|
||||
|
||||
BIGQUERY_METADATA_VIEWER_ROLE = "roles/bigquery.metadataViewer"
|
||||
"""When applied to a table or view, this role provides persmissions to
|
||||
read metadata from the table or view."""
|
||||
@@ -0,0 +1,87 @@
|
||||
# Copyright 2015 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Define API Jobs."""
|
||||
|
||||
from google.cloud.bigquery.job.base import _AsyncJob
|
||||
from google.cloud.bigquery.job.base import _error_result_to_exception
|
||||
from google.cloud.bigquery.job.base import _DONE_STATE
|
||||
from google.cloud.bigquery.job.base import _JobConfig
|
||||
from google.cloud.bigquery.job.base import _JobReference
|
||||
from google.cloud.bigquery.job.base import ReservationUsage
|
||||
from google.cloud.bigquery.job.base import ScriptStatistics
|
||||
from google.cloud.bigquery.job.base import ScriptStackFrame
|
||||
from google.cloud.bigquery.job.base import TransactionInfo
|
||||
from google.cloud.bigquery.job.base import UnknownJob
|
||||
from google.cloud.bigquery.job.copy_ import CopyJob
|
||||
from google.cloud.bigquery.job.copy_ import CopyJobConfig
|
||||
from google.cloud.bigquery.job.copy_ import OperationType
|
||||
from google.cloud.bigquery.job.extract import ExtractJob
|
||||
from google.cloud.bigquery.job.extract import ExtractJobConfig
|
||||
from google.cloud.bigquery.job.load import LoadJob
|
||||
from google.cloud.bigquery.job.load import LoadJobConfig
|
||||
from google.cloud.bigquery.job.query import _contains_order_by
|
||||
from google.cloud.bigquery.job.query import DmlStats
|
||||
from google.cloud.bigquery.job.query import QueryJob
|
||||
from google.cloud.bigquery.job.query import QueryJobConfig
|
||||
from google.cloud.bigquery.job.query import QueryPlanEntry
|
||||
from google.cloud.bigquery.job.query import QueryPlanEntryStep
|
||||
from google.cloud.bigquery.job.query import ScriptOptions
|
||||
from google.cloud.bigquery.job.query import TimelineEntry
|
||||
from google.cloud.bigquery.enums import Compression
|
||||
from google.cloud.bigquery.enums import CreateDisposition
|
||||
from google.cloud.bigquery.enums import DestinationFormat
|
||||
from google.cloud.bigquery.enums import Encoding
|
||||
from google.cloud.bigquery.enums import QueryPriority
|
||||
from google.cloud.bigquery.enums import SchemaUpdateOption
|
||||
from google.cloud.bigquery.enums import SourceFormat
|
||||
from google.cloud.bigquery.enums import WriteDisposition
|
||||
|
||||
|
||||
# Include classes previously in job.py for backwards compatibility.
|
||||
__all__ = [
|
||||
"_AsyncJob",
|
||||
"_error_result_to_exception",
|
||||
"_DONE_STATE",
|
||||
"_JobConfig",
|
||||
"_JobReference",
|
||||
"ReservationUsage",
|
||||
"ScriptStatistics",
|
||||
"ScriptStackFrame",
|
||||
"UnknownJob",
|
||||
"CopyJob",
|
||||
"CopyJobConfig",
|
||||
"OperationType",
|
||||
"ExtractJob",
|
||||
"ExtractJobConfig",
|
||||
"LoadJob",
|
||||
"LoadJobConfig",
|
||||
"_contains_order_by",
|
||||
"DmlStats",
|
||||
"QueryJob",
|
||||
"QueryJobConfig",
|
||||
"QueryPlanEntry",
|
||||
"QueryPlanEntryStep",
|
||||
"ScriptOptions",
|
||||
"TimelineEntry",
|
||||
"Compression",
|
||||
"CreateDisposition",
|
||||
"DestinationFormat",
|
||||
"Encoding",
|
||||
"QueryPriority",
|
||||
"SchemaUpdateOption",
|
||||
"SourceFormat",
|
||||
"TransactionInfo",
|
||||
"WriteDisposition",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1116
.venv/lib/python3.10/site-packages/google/cloud/bigquery/job/base.py
Normal file
1116
.venv/lib/python3.10/site-packages/google/cloud/bigquery/job/base.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,282 @@
|
||||
# Copyright 2015 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Classes for copy jobs."""
|
||||
|
||||
import typing
|
||||
from typing import Optional
|
||||
|
||||
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
|
||||
from google.cloud.bigquery import _helpers
|
||||
from google.cloud.bigquery.table import TableReference
|
||||
|
||||
from google.cloud.bigquery.job.base import _AsyncJob
|
||||
from google.cloud.bigquery.job.base import _JobConfig
|
||||
from google.cloud.bigquery.job.base import _JobReference
|
||||
|
||||
|
||||
class OperationType:
|
||||
"""Different operation types supported in table copy job.
|
||||
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#operationtype
|
||||
"""
|
||||
|
||||
OPERATION_TYPE_UNSPECIFIED = "OPERATION_TYPE_UNSPECIFIED"
|
||||
"""Unspecified operation type."""
|
||||
|
||||
COPY = "COPY"
|
||||
"""The source and destination table have the same table type."""
|
||||
|
||||
SNAPSHOT = "SNAPSHOT"
|
||||
"""The source table type is TABLE and the destination table type is SNAPSHOT."""
|
||||
|
||||
CLONE = "CLONE"
|
||||
"""The source table type is TABLE and the destination table type is CLONE."""
|
||||
|
||||
RESTORE = "RESTORE"
|
||||
"""The source table type is SNAPSHOT and the destination table type is TABLE."""
|
||||
|
||||
|
||||
class CopyJobConfig(_JobConfig):
|
||||
"""Configuration options for copy jobs.
|
||||
|
||||
All properties in this class are optional. Values which are :data:`None` ->
|
||||
server defaults. Set properties on the constructed configuration by using
|
||||
the property name as the name of a keyword argument.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super(CopyJobConfig, self).__init__("copy", **kwargs)
|
||||
|
||||
@property
|
||||
def create_disposition(self):
|
||||
"""google.cloud.bigquery.job.CreateDisposition: Specifies behavior
|
||||
for creating tables.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.create_disposition
|
||||
"""
|
||||
return self._get_sub_prop("createDisposition")
|
||||
|
||||
@create_disposition.setter
|
||||
def create_disposition(self, value):
|
||||
self._set_sub_prop("createDisposition", value)
|
||||
|
||||
@property
|
||||
def write_disposition(self):
|
||||
"""google.cloud.bigquery.job.WriteDisposition: Action that occurs if
|
||||
the destination table already exists.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.write_disposition
|
||||
"""
|
||||
return self._get_sub_prop("writeDisposition")
|
||||
|
||||
@write_disposition.setter
|
||||
def write_disposition(self, value):
|
||||
self._set_sub_prop("writeDisposition", value)
|
||||
|
||||
@property
|
||||
def destination_encryption_configuration(self):
|
||||
"""google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom
|
||||
encryption configuration for the destination table.
|
||||
|
||||
Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None`
|
||||
if using default encryption.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.destination_encryption_configuration
|
||||
"""
|
||||
prop = self._get_sub_prop("destinationEncryptionConfiguration")
|
||||
if prop is not None:
|
||||
prop = EncryptionConfiguration.from_api_repr(prop)
|
||||
return prop
|
||||
|
||||
@destination_encryption_configuration.setter
|
||||
def destination_encryption_configuration(self, value):
|
||||
api_repr = value
|
||||
if value is not None:
|
||||
api_repr = value.to_api_repr()
|
||||
self._set_sub_prop("destinationEncryptionConfiguration", api_repr)
|
||||
|
||||
@property
|
||||
def operation_type(self) -> str:
|
||||
"""The operation to perform with this copy job.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.operation_type
|
||||
"""
|
||||
return self._get_sub_prop(
|
||||
"operationType", OperationType.OPERATION_TYPE_UNSPECIFIED
|
||||
)
|
||||
|
||||
@operation_type.setter
|
||||
def operation_type(self, value: Optional[str]):
|
||||
if value is None:
|
||||
value = OperationType.OPERATION_TYPE_UNSPECIFIED
|
||||
self._set_sub_prop("operationType", value)
|
||||
|
||||
@property
|
||||
def destination_expiration_time(self) -> str:
|
||||
"""google.cloud.bigquery.job.DestinationExpirationTime: The time when the
|
||||
destination table expires. Expired tables will be deleted and their storage reclaimed.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.destination_expiration_time
|
||||
"""
|
||||
return self._get_sub_prop("destinationExpirationTime")
|
||||
|
||||
@destination_expiration_time.setter
|
||||
def destination_expiration_time(self, value: str):
|
||||
self._set_sub_prop("destinationExpirationTime", value)
|
||||
|
||||
|
||||
class CopyJob(_AsyncJob):
|
||||
"""Asynchronous job: copy data into a table from other tables.
|
||||
|
||||
Args:
|
||||
job_id (str): the job's ID, within the project belonging to ``client``.
|
||||
|
||||
sources (List[google.cloud.bigquery.table.TableReference]): Table from which data is to be loaded.
|
||||
|
||||
destination (google.cloud.bigquery.table.TableReference): Table into which data is to be loaded.
|
||||
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
A client which holds credentials and project configuration
|
||||
for the dataset (which requires a project).
|
||||
|
||||
job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]):
|
||||
Extra configuration options for the copy job.
|
||||
"""
|
||||
|
||||
_JOB_TYPE = "copy"
|
||||
_CONFIG_CLASS = CopyJobConfig
|
||||
|
||||
def __init__(self, job_id, sources, destination, client, job_config=None):
|
||||
super(CopyJob, self).__init__(job_id, client)
|
||||
|
||||
if job_config is not None:
|
||||
self._properties["configuration"] = job_config._properties
|
||||
|
||||
if destination:
|
||||
_helpers._set_sub_prop(
|
||||
self._properties,
|
||||
["configuration", "copy", "destinationTable"],
|
||||
destination.to_api_repr(),
|
||||
)
|
||||
|
||||
if sources:
|
||||
source_resources = [source.to_api_repr() for source in sources]
|
||||
_helpers._set_sub_prop(
|
||||
self._properties,
|
||||
["configuration", "copy", "sourceTables"],
|
||||
source_resources,
|
||||
)
|
||||
|
||||
@property
|
||||
def configuration(self) -> CopyJobConfig:
|
||||
"""The configuration for this copy job."""
|
||||
return typing.cast(CopyJobConfig, super().configuration)
|
||||
|
||||
@property
|
||||
def destination(self):
|
||||
"""google.cloud.bigquery.table.TableReference: Table into which data
|
||||
is to be loaded.
|
||||
"""
|
||||
return TableReference.from_api_repr(
|
||||
_helpers._get_sub_prop(
|
||||
self._properties, ["configuration", "copy", "destinationTable"]
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def sources(self):
|
||||
"""List[google.cloud.bigquery.table.TableReference]): Table(s) from
|
||||
which data is to be loaded.
|
||||
"""
|
||||
source_configs = _helpers._get_sub_prop(
|
||||
self._properties, ["configuration", "copy", "sourceTables"]
|
||||
)
|
||||
if source_configs is None:
|
||||
single = _helpers._get_sub_prop(
|
||||
self._properties, ["configuration", "copy", "sourceTable"]
|
||||
)
|
||||
if single is None:
|
||||
raise KeyError("Resource missing 'sourceTables' / 'sourceTable'")
|
||||
source_configs = [single]
|
||||
|
||||
sources = []
|
||||
for source_config in source_configs:
|
||||
table_ref = TableReference.from_api_repr(source_config)
|
||||
sources.append(table_ref)
|
||||
return sources
|
||||
|
||||
@property
|
||||
def create_disposition(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`.
|
||||
"""
|
||||
return self.configuration.create_disposition
|
||||
|
||||
@property
|
||||
def write_disposition(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`.
|
||||
"""
|
||||
return self.configuration.write_disposition
|
||||
|
||||
@property
|
||||
def destination_encryption_configuration(self):
|
||||
"""google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom
|
||||
encryption configuration for the destination table.
|
||||
|
||||
Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None`
|
||||
if using default encryption.
|
||||
|
||||
See
|
||||
:attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`.
|
||||
"""
|
||||
return self.configuration.destination_encryption_configuration
|
||||
|
||||
def to_api_repr(self):
|
||||
"""Generate a resource for :meth:`_begin`."""
|
||||
# Exclude statistics, if set.
|
||||
return {
|
||||
"jobReference": self._properties["jobReference"],
|
||||
"configuration": self._properties["configuration"],
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource, client):
|
||||
"""Factory: construct a job given its API representation
|
||||
|
||||
.. note::
|
||||
|
||||
This method assumes that the project found in the resource matches
|
||||
the client's project.
|
||||
|
||||
Args:
|
||||
resource (Dict): dataset job representation returned from the API
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
Client which holds credentials and project
|
||||
configuration for the dataset.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.job.CopyJob: Job parsed from ``resource``.
|
||||
"""
|
||||
cls._check_resource_config(resource)
|
||||
job_ref = _JobReference._from_api_repr(resource["jobReference"])
|
||||
job = cls(job_ref, None, None, client=client)
|
||||
job._set_properties(resource)
|
||||
return job
|
||||
@@ -0,0 +1,271 @@
|
||||
# Copyright 2015 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Classes for extract (export) jobs."""
|
||||
|
||||
import typing
|
||||
|
||||
from google.cloud.bigquery import _helpers
|
||||
from google.cloud.bigquery.model import ModelReference
|
||||
from google.cloud.bigquery.table import Table
|
||||
from google.cloud.bigquery.table import TableListItem
|
||||
from google.cloud.bigquery.table import TableReference
|
||||
from google.cloud.bigquery.job.base import _AsyncJob
|
||||
from google.cloud.bigquery.job.base import _JobConfig
|
||||
from google.cloud.bigquery.job.base import _JobReference
|
||||
|
||||
|
||||
class ExtractJobConfig(_JobConfig):
|
||||
"""Configuration options for extract jobs.
|
||||
|
||||
All properties in this class are optional. Values which are :data:`None` ->
|
||||
server defaults. Set properties on the constructed configuration by using
|
||||
the property name as the name of a keyword argument.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super(ExtractJobConfig, self).__init__("extract", **kwargs)
|
||||
|
||||
@property
|
||||
def compression(self):
|
||||
"""google.cloud.bigquery.job.Compression: Compression type to use for
|
||||
exported files.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression
|
||||
"""
|
||||
return self._get_sub_prop("compression")
|
||||
|
||||
@compression.setter
|
||||
def compression(self, value):
|
||||
self._set_sub_prop("compression", value)
|
||||
|
||||
@property
|
||||
def destination_format(self):
|
||||
"""google.cloud.bigquery.job.DestinationFormat: Exported file format.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format
|
||||
"""
|
||||
return self._get_sub_prop("destinationFormat")
|
||||
|
||||
@destination_format.setter
|
||||
def destination_format(self, value):
|
||||
self._set_sub_prop("destinationFormat", value)
|
||||
|
||||
@property
|
||||
def field_delimiter(self):
|
||||
"""str: Delimiter to use between fields in the exported data.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter
|
||||
"""
|
||||
return self._get_sub_prop("fieldDelimiter")
|
||||
|
||||
@field_delimiter.setter
|
||||
def field_delimiter(self, value):
|
||||
self._set_sub_prop("fieldDelimiter", value)
|
||||
|
||||
@property
|
||||
def print_header(self):
|
||||
"""bool: Print a header row in the exported data.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header
|
||||
"""
|
||||
return self._get_sub_prop("printHeader")
|
||||
|
||||
@print_header.setter
|
||||
def print_header(self, value):
|
||||
self._set_sub_prop("printHeader", value)
|
||||
|
||||
@property
|
||||
def use_avro_logical_types(self):
|
||||
"""bool: For loads of Avro data, governs whether Avro logical types are
|
||||
converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than
|
||||
raw types (e.g. INTEGER).
|
||||
"""
|
||||
return self._get_sub_prop("useAvroLogicalTypes")
|
||||
|
||||
@use_avro_logical_types.setter
|
||||
def use_avro_logical_types(self, value):
|
||||
self._set_sub_prop("useAvroLogicalTypes", bool(value))
|
||||
|
||||
|
||||
class ExtractJob(_AsyncJob):
|
||||
"""Asynchronous job: extract data from a table into Cloud Storage.
|
||||
|
||||
Args:
|
||||
job_id (str): the job's ID.
|
||||
|
||||
source (Union[ \
|
||||
google.cloud.bigquery.table.TableReference, \
|
||||
google.cloud.bigquery.model.ModelReference \
|
||||
]):
|
||||
Table or Model from which data is to be loaded or extracted.
|
||||
|
||||
destination_uris (List[str]):
|
||||
URIs describing where the extracted data will be written in Cloud
|
||||
Storage, using the format ``gs://<bucket_name>/<object_name_or_glob>``.
|
||||
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
A client which holds credentials and project configuration.
|
||||
|
||||
job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]):
|
||||
Extra configuration options for the extract job.
|
||||
"""
|
||||
|
||||
_JOB_TYPE = "extract"
|
||||
_CONFIG_CLASS = ExtractJobConfig
|
||||
|
||||
def __init__(self, job_id, source, destination_uris, client, job_config=None):
|
||||
super(ExtractJob, self).__init__(job_id, client)
|
||||
|
||||
if job_config is not None:
|
||||
self._properties["configuration"] = job_config._properties
|
||||
|
||||
if source:
|
||||
source_ref = {"projectId": source.project, "datasetId": source.dataset_id}
|
||||
|
||||
if isinstance(source, (Table, TableListItem, TableReference)):
|
||||
source_ref["tableId"] = source.table_id
|
||||
source_key = "sourceTable"
|
||||
else:
|
||||
source_ref["modelId"] = source.model_id
|
||||
source_key = "sourceModel"
|
||||
|
||||
_helpers._set_sub_prop(
|
||||
self._properties, ["configuration", "extract", source_key], source_ref
|
||||
)
|
||||
|
||||
if destination_uris:
|
||||
_helpers._set_sub_prop(
|
||||
self._properties,
|
||||
["configuration", "extract", "destinationUris"],
|
||||
destination_uris,
|
||||
)
|
||||
|
||||
@property
|
||||
def configuration(self) -> ExtractJobConfig:
|
||||
"""The configuration for this extract job."""
|
||||
return typing.cast(ExtractJobConfig, super().configuration)
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
"""Union[ \
|
||||
google.cloud.bigquery.table.TableReference, \
|
||||
google.cloud.bigquery.model.ModelReference \
|
||||
]: Table or Model from which data is to be loaded or extracted.
|
||||
"""
|
||||
source_config = _helpers._get_sub_prop(
|
||||
self._properties, ["configuration", "extract", "sourceTable"]
|
||||
)
|
||||
if source_config:
|
||||
return TableReference.from_api_repr(source_config)
|
||||
else:
|
||||
source_config = _helpers._get_sub_prop(
|
||||
self._properties, ["configuration", "extract", "sourceModel"]
|
||||
)
|
||||
return ModelReference.from_api_repr(source_config)
|
||||
|
||||
@property
|
||||
def destination_uris(self):
|
||||
"""List[str]: URIs describing where the extracted data will be
|
||||
written in Cloud Storage, using the format
|
||||
``gs://<bucket_name>/<object_name_or_glob>``.
|
||||
"""
|
||||
return _helpers._get_sub_prop(
|
||||
self._properties, ["configuration", "extract", "destinationUris"]
|
||||
)
|
||||
|
||||
@property
|
||||
def compression(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`.
|
||||
"""
|
||||
return self.configuration.compression
|
||||
|
||||
@property
|
||||
def destination_format(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`.
|
||||
"""
|
||||
return self.configuration.destination_format
|
||||
|
||||
@property
|
||||
def field_delimiter(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`.
|
||||
"""
|
||||
return self.configuration.field_delimiter
|
||||
|
||||
@property
|
||||
def print_header(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`.
|
||||
"""
|
||||
return self.configuration.print_header
|
||||
|
||||
@property
|
||||
def destination_uri_file_counts(self):
|
||||
"""Return file counts from job statistics, if present.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts
|
||||
|
||||
Returns:
|
||||
List[int]:
|
||||
A list of integer counts, each representing the number of files
|
||||
per destination URI or URI pattern specified in the extract
|
||||
configuration. These values will be in the same order as the URIs
|
||||
specified in the 'destinationUris' field. Returns None if job is
|
||||
not yet complete.
|
||||
"""
|
||||
counts = self._job_statistics().get("destinationUriFileCounts")
|
||||
if counts is not None:
|
||||
return [int(count) for count in counts]
|
||||
return None
|
||||
|
||||
def to_api_repr(self):
|
||||
"""Generate a resource for :meth:`_begin`."""
|
||||
# Exclude statistics, if set.
|
||||
return {
|
||||
"jobReference": self._properties["jobReference"],
|
||||
"configuration": self._properties["configuration"],
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: dict, client) -> "ExtractJob":
|
||||
"""Factory: construct a job given its API representation
|
||||
|
||||
.. note::
|
||||
|
||||
This method assumes that the project found in the resource matches
|
||||
the client's project.
|
||||
|
||||
Args:
|
||||
resource (Dict): dataset job representation returned from the API
|
||||
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
Client which holds credentials and project
|
||||
configuration for the dataset.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``.
|
||||
"""
|
||||
cls._check_resource_config(resource)
|
||||
job_ref = _JobReference._from_api_repr(resource["jobReference"])
|
||||
job = cls(job_ref, None, None, client=client)
|
||||
job._set_properties(resource)
|
||||
return job
|
||||
@@ -0,0 +1,985 @@
|
||||
# Copyright 2015 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Classes for load jobs."""
|
||||
|
||||
import typing
|
||||
from typing import FrozenSet, List, Iterable, Optional
|
||||
|
||||
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
|
||||
from google.cloud.bigquery.external_config import HivePartitioningOptions
|
||||
from google.cloud.bigquery.format_options import ParquetOptions
|
||||
from google.cloud.bigquery import _helpers
|
||||
from google.cloud.bigquery.schema import SchemaField
|
||||
from google.cloud.bigquery.schema import _to_schema_fields
|
||||
from google.cloud.bigquery.table import RangePartitioning
|
||||
from google.cloud.bigquery.table import TableReference
|
||||
from google.cloud.bigquery.table import TimePartitioning
|
||||
from google.cloud.bigquery.job.base import _AsyncJob
|
||||
from google.cloud.bigquery.job.base import _JobConfig
|
||||
from google.cloud.bigquery.job.base import _JobReference
|
||||
from google.cloud.bigquery.query import ConnectionProperty
|
||||
|
||||
|
||||
class ColumnNameCharacterMap:
|
||||
"""Indicates the character map used for column names.
|
||||
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap
|
||||
"""
|
||||
|
||||
COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED = "COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED"
|
||||
"""Unspecified column name character map."""
|
||||
|
||||
STRICT = "STRICT"
|
||||
"""Support flexible column name and reject invalid column names."""
|
||||
|
||||
V1 = "V1"
|
||||
""" Support alphanumeric + underscore characters and names must start with
|
||||
a letter or underscore. Invalid column names will be normalized."""
|
||||
|
||||
V2 = "V2"
|
||||
"""Support flexible column name. Invalid column names will be normalized."""
|
||||
|
||||
|
||||
class LoadJobConfig(_JobConfig):
|
||||
"""Configuration options for load jobs.
|
||||
|
||||
Set properties on the constructed configuration by using the property name
|
||||
as the name of a keyword argument. Values which are unset or :data:`None`
|
||||
use the BigQuery REST API default values. See the `BigQuery REST API
|
||||
reference documentation
|
||||
<https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad>`_
|
||||
for a list of default values.
|
||||
|
||||
Required options differ based on the
|
||||
:attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format` value.
|
||||
For example, the BigQuery API's default value for
|
||||
:attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format` is ``"CSV"``.
|
||||
When loading a CSV file, either
|
||||
:attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` must be set or
|
||||
:attr:`~google.cloud.bigquery.job.LoadJobConfig.autodetect` must be set to
|
||||
:data:`True`.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
super(LoadJobConfig, self).__init__("load", **kwargs)
|
||||
|
||||
@property
|
||||
def allow_jagged_rows(self):
|
||||
"""Optional[bool]: Allow missing trailing optional columns (CSV only).
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_jagged_rows
|
||||
"""
|
||||
return self._get_sub_prop("allowJaggedRows")
|
||||
|
||||
@allow_jagged_rows.setter
|
||||
def allow_jagged_rows(self, value):
|
||||
self._set_sub_prop("allowJaggedRows", value)
|
||||
|
||||
@property
|
||||
def allow_quoted_newlines(self):
|
||||
"""Optional[bool]: Allow quoted data containing newline characters (CSV only).
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_quoted_newlines
|
||||
"""
|
||||
return self._get_sub_prop("allowQuotedNewlines")
|
||||
|
||||
@allow_quoted_newlines.setter
|
||||
def allow_quoted_newlines(self, value):
|
||||
self._set_sub_prop("allowQuotedNewlines", value)
|
||||
|
||||
@property
|
||||
def autodetect(self):
|
||||
"""Optional[bool]: Automatically infer the schema from a sample of the data.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.autodetect
|
||||
"""
|
||||
return self._get_sub_prop("autodetect")
|
||||
|
||||
@autodetect.setter
|
||||
def autodetect(self, value):
|
||||
self._set_sub_prop("autodetect", value)
|
||||
|
||||
@property
|
||||
def clustering_fields(self):
|
||||
"""Optional[List[str]]: Fields defining clustering for the table
|
||||
|
||||
(Defaults to :data:`None`).
|
||||
|
||||
Clustering fields are immutable after table creation.
|
||||
|
||||
.. note::
|
||||
|
||||
BigQuery supports clustering for both partitioned and
|
||||
non-partitioned tables.
|
||||
"""
|
||||
prop = self._get_sub_prop("clustering")
|
||||
if prop is not None:
|
||||
return list(prop.get("fields", ()))
|
||||
|
||||
@clustering_fields.setter
|
||||
def clustering_fields(self, value):
|
||||
"""Optional[List[str]]: Fields defining clustering for the table
|
||||
|
||||
(Defaults to :data:`None`).
|
||||
"""
|
||||
if value is not None:
|
||||
self._set_sub_prop("clustering", {"fields": value})
|
||||
else:
|
||||
self._del_sub_prop("clustering")
|
||||
|
||||
@property
|
||||
def connection_properties(self) -> List[ConnectionProperty]:
|
||||
"""Connection properties.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.connection_properties
|
||||
|
||||
.. versionadded:: 3.7.0
|
||||
"""
|
||||
resource = self._get_sub_prop("connectionProperties", [])
|
||||
return [ConnectionProperty.from_api_repr(prop) for prop in resource]
|
||||
|
||||
@connection_properties.setter
|
||||
def connection_properties(self, value: Iterable[ConnectionProperty]):
|
||||
self._set_sub_prop(
|
||||
"connectionProperties",
|
||||
[prop.to_api_repr() for prop in value],
|
||||
)
|
||||
|
||||
@property
|
||||
def create_disposition(self):
|
||||
"""Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior
|
||||
for creating tables.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition
|
||||
"""
|
||||
return self._get_sub_prop("createDisposition")
|
||||
|
||||
@create_disposition.setter
|
||||
def create_disposition(self, value):
|
||||
self._set_sub_prop("createDisposition", value)
|
||||
|
||||
@property
|
||||
def create_session(self) -> Optional[bool]:
|
||||
"""[Preview] If :data:`True`, creates a new session, where
|
||||
:attr:`~google.cloud.bigquery.job.LoadJob.session_info` will contain a
|
||||
random server generated session id.
|
||||
|
||||
If :data:`False`, runs load job with an existing ``session_id`` passed in
|
||||
:attr:`~google.cloud.bigquery.job.LoadJobConfig.connection_properties`,
|
||||
otherwise runs load job in non-session mode.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_session
|
||||
|
||||
.. versionadded:: 3.7.0
|
||||
"""
|
||||
return self._get_sub_prop("createSession")
|
||||
|
||||
@create_session.setter
|
||||
def create_session(self, value: Optional[bool]):
|
||||
self._set_sub_prop("createSession", value)
|
||||
|
||||
@property
|
||||
def decimal_target_types(self) -> Optional[FrozenSet[str]]:
|
||||
"""Possible SQL data types to which the source decimal values are converted.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.decimal_target_types
|
||||
|
||||
.. versionadded:: 2.21.0
|
||||
"""
|
||||
prop = self._get_sub_prop("decimalTargetTypes")
|
||||
if prop is not None:
|
||||
prop = frozenset(prop)
|
||||
return prop
|
||||
|
||||
@decimal_target_types.setter
|
||||
def decimal_target_types(self, value: Optional[Iterable[str]]):
|
||||
if value is not None:
|
||||
self._set_sub_prop("decimalTargetTypes", list(value))
|
||||
else:
|
||||
self._del_sub_prop("decimalTargetTypes")
|
||||
|
||||
@property
|
||||
def destination_encryption_configuration(self):
|
||||
"""Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom
|
||||
encryption configuration for the destination table.
|
||||
|
||||
Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None`
|
||||
if using default encryption.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_encryption_configuration
|
||||
"""
|
||||
prop = self._get_sub_prop("destinationEncryptionConfiguration")
|
||||
if prop is not None:
|
||||
prop = EncryptionConfiguration.from_api_repr(prop)
|
||||
return prop
|
||||
|
||||
@destination_encryption_configuration.setter
|
||||
def destination_encryption_configuration(self, value):
|
||||
api_repr = value
|
||||
if value is not None:
|
||||
api_repr = value.to_api_repr()
|
||||
self._set_sub_prop("destinationEncryptionConfiguration", api_repr)
|
||||
else:
|
||||
self._del_sub_prop("destinationEncryptionConfiguration")
|
||||
|
||||
@property
|
||||
def destination_table_description(self):
|
||||
"""Optional[str]: Description of the destination table.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description
|
||||
"""
|
||||
prop = self._get_sub_prop("destinationTableProperties")
|
||||
if prop is not None:
|
||||
return prop["description"]
|
||||
|
||||
@destination_table_description.setter
|
||||
def destination_table_description(self, value):
|
||||
keys = [self._job_type, "destinationTableProperties", "description"]
|
||||
if value is not None:
|
||||
_helpers._set_sub_prop(self._properties, keys, value)
|
||||
else:
|
||||
_helpers._del_sub_prop(self._properties, keys)
|
||||
|
||||
@property
|
||||
def destination_table_friendly_name(self):
|
||||
"""Optional[str]: Name given to destination table.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name
|
||||
"""
|
||||
prop = self._get_sub_prop("destinationTableProperties")
|
||||
if prop is not None:
|
||||
return prop["friendlyName"]
|
||||
|
||||
@destination_table_friendly_name.setter
|
||||
def destination_table_friendly_name(self, value):
|
||||
keys = [self._job_type, "destinationTableProperties", "friendlyName"]
|
||||
if value is not None:
|
||||
_helpers._set_sub_prop(self._properties, keys, value)
|
||||
else:
|
||||
_helpers._del_sub_prop(self._properties, keys)
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
"""Optional[google.cloud.bigquery.job.Encoding]: The character encoding of the
|
||||
data.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.encoding
|
||||
"""
|
||||
return self._get_sub_prop("encoding")
|
||||
|
||||
@encoding.setter
|
||||
def encoding(self, value):
|
||||
self._set_sub_prop("encoding", value)
|
||||
|
||||
@property
|
||||
def field_delimiter(self):
|
||||
"""Optional[str]: The separator for fields in a CSV file.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.field_delimiter
|
||||
"""
|
||||
return self._get_sub_prop("fieldDelimiter")
|
||||
|
||||
@field_delimiter.setter
|
||||
def field_delimiter(self, value):
|
||||
self._set_sub_prop("fieldDelimiter", value)
|
||||
|
||||
@property
|
||||
def hive_partitioning(self):
|
||||
"""Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
|
||||
it configures hive partitioning support.
|
||||
|
||||
.. note::
|
||||
**Experimental**. This feature is experimental and might change or
|
||||
have limited support.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options
|
||||
"""
|
||||
prop = self._get_sub_prop("hivePartitioningOptions")
|
||||
if prop is None:
|
||||
return None
|
||||
return HivePartitioningOptions.from_api_repr(prop)
|
||||
|
||||
@hive_partitioning.setter
|
||||
def hive_partitioning(self, value):
|
||||
if value is not None:
|
||||
if isinstance(value, HivePartitioningOptions):
|
||||
value = value.to_api_repr()
|
||||
else:
|
||||
raise TypeError("Expected a HivePartitioningOptions instance or None.")
|
||||
|
||||
self._set_sub_prop("hivePartitioningOptions", value)
|
||||
|
||||
@property
|
||||
def ignore_unknown_values(self):
|
||||
"""Optional[bool]: Ignore extra values not represented in the table schema.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.ignore_unknown_values
|
||||
"""
|
||||
return self._get_sub_prop("ignoreUnknownValues")
|
||||
|
||||
@ignore_unknown_values.setter
|
||||
def ignore_unknown_values(self, value):
|
||||
self._set_sub_prop("ignoreUnknownValues", value)
|
||||
|
||||
@property
|
||||
def json_extension(self):
|
||||
"""Optional[str]: The extension to use for writing JSON data to BigQuery. Only supports GeoJSON currently.
|
||||
|
||||
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.json_extension
|
||||
|
||||
"""
|
||||
return self._get_sub_prop("jsonExtension")
|
||||
|
||||
@json_extension.setter
|
||||
def json_extension(self, value):
|
||||
self._set_sub_prop("jsonExtension", value)
|
||||
|
||||
@property
|
||||
def max_bad_records(self):
|
||||
"""Optional[int]: Number of invalid rows to ignore.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.max_bad_records
|
||||
"""
|
||||
return _helpers._int_or_none(self._get_sub_prop("maxBadRecords"))
|
||||
|
||||
@max_bad_records.setter
|
||||
def max_bad_records(self, value):
|
||||
self._set_sub_prop("maxBadRecords", value)
|
||||
|
||||
@property
|
||||
def null_marker(self):
|
||||
"""Optional[str]: Represents a null value (CSV only).
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_marker
|
||||
"""
|
||||
return self._get_sub_prop("nullMarker")
|
||||
|
||||
@null_marker.setter
|
||||
def null_marker(self, value):
|
||||
self._set_sub_prop("nullMarker", value)
|
||||
|
||||
@property
|
||||
def preserve_ascii_control_characters(self):
|
||||
"""Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.preserve_ascii_control_characters
|
||||
"""
|
||||
return self._get_sub_prop("preserveAsciiControlCharacters")
|
||||
|
||||
@preserve_ascii_control_characters.setter
|
||||
def preserve_ascii_control_characters(self, value):
|
||||
self._set_sub_prop("preserveAsciiControlCharacters", bool(value))
|
||||
|
||||
@property
|
||||
def projection_fields(self) -> Optional[List[str]]:
|
||||
"""Optional[List[str]]: If
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.source_format` is set to
|
||||
"DATASTORE_BACKUP", indicates which entity properties to load into
|
||||
BigQuery from a Cloud Datastore backup.
|
||||
|
||||
Property names are case sensitive and must be top-level properties. If
|
||||
no properties are specified, BigQuery loads all properties. If any
|
||||
named property isn't found in the Cloud Datastore backup, an invalid
|
||||
error is returned in the job result.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.projection_fields
|
||||
"""
|
||||
return self._get_sub_prop("projectionFields")
|
||||
|
||||
@projection_fields.setter
|
||||
def projection_fields(self, value: Optional[List[str]]):
|
||||
self._set_sub_prop("projectionFields", value)
|
||||
|
||||
@property
|
||||
def quote_character(self):
|
||||
"""Optional[str]: Character used to quote data sections (CSV only).
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.quote
|
||||
"""
|
||||
return self._get_sub_prop("quote")
|
||||
|
||||
@quote_character.setter
|
||||
def quote_character(self, value):
|
||||
self._set_sub_prop("quote", value)
|
||||
|
||||
@property
|
||||
def range_partitioning(self):
|
||||
"""Optional[google.cloud.bigquery.table.RangePartitioning]:
|
||||
Configures range-based partitioning for destination table.
|
||||
|
||||
.. note::
|
||||
**Beta**. The integer range partitioning feature is in a
|
||||
pre-release state and might change or have limited support.
|
||||
|
||||
Only specify at most one of
|
||||
:attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or
|
||||
:attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`.
|
||||
|
||||
Raises:
|
||||
ValueError:
|
||||
If the value is not
|
||||
:class:`~google.cloud.bigquery.table.RangePartitioning` or
|
||||
:data:`None`.
|
||||
"""
|
||||
resource = self._get_sub_prop("rangePartitioning")
|
||||
if resource is not None:
|
||||
return RangePartitioning(_properties=resource)
|
||||
|
||||
@range_partitioning.setter
|
||||
def range_partitioning(self, value):
|
||||
resource = value
|
||||
if isinstance(value, RangePartitioning):
|
||||
resource = value._properties
|
||||
elif value is not None:
|
||||
raise ValueError(
|
||||
"Expected value to be RangePartitioning or None, got {}.".format(value)
|
||||
)
|
||||
self._set_sub_prop("rangePartitioning", resource)
|
||||
|
||||
@property
|
||||
def reference_file_schema_uri(self):
|
||||
"""Optional[str]:
|
||||
When creating an external table, the user can provide a reference file with the
|
||||
table schema. This is enabled for the following formats:
|
||||
|
||||
AVRO, PARQUET, ORC
|
||||
"""
|
||||
return self._get_sub_prop("referenceFileSchemaUri")
|
||||
|
||||
@reference_file_schema_uri.setter
|
||||
def reference_file_schema_uri(self, value):
|
||||
return self._set_sub_prop("referenceFileSchemaUri", value)
|
||||
|
||||
@property
|
||||
def schema(self):
|
||||
"""Optional[Sequence[Union[ \
|
||||
:class:`~google.cloud.bigquery.schema.SchemaField`, \
|
||||
Mapping[str, Any] \
|
||||
]]]: Schema of the destination table.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema
|
||||
"""
|
||||
schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"])
|
||||
if schema is None:
|
||||
return
|
||||
return [SchemaField.from_api_repr(field) for field in schema]
|
||||
|
||||
@schema.setter
|
||||
def schema(self, value):
|
||||
if value is None:
|
||||
self._del_sub_prop("schema")
|
||||
return
|
||||
|
||||
value = _to_schema_fields(value)
|
||||
|
||||
_helpers._set_sub_prop(
|
||||
self._properties,
|
||||
["load", "schema", "fields"],
|
||||
[field.to_api_repr() for field in value],
|
||||
)
|
||||
|
||||
@property
|
||||
def schema_update_options(self):
|
||||
"""Optional[List[google.cloud.bigquery.job.SchemaUpdateOption]]: Specifies
|
||||
updates to the destination table schema to allow as a side effect of
|
||||
the load job.
|
||||
"""
|
||||
return self._get_sub_prop("schemaUpdateOptions")
|
||||
|
||||
@schema_update_options.setter
|
||||
def schema_update_options(self, values):
|
||||
self._set_sub_prop("schemaUpdateOptions", values)
|
||||
|
||||
@property
|
||||
def skip_leading_rows(self):
|
||||
"""Optional[int]: Number of rows to skip when reading data (CSV only).
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.skip_leading_rows
|
||||
"""
|
||||
return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows"))
|
||||
|
||||
@skip_leading_rows.setter
|
||||
def skip_leading_rows(self, value):
|
||||
self._set_sub_prop("skipLeadingRows", str(value))
|
||||
|
||||
@property
|
||||
def source_format(self):
|
||||
"""Optional[google.cloud.bigquery.job.SourceFormat]: File format of the data.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_format
|
||||
"""
|
||||
return self._get_sub_prop("sourceFormat")
|
||||
|
||||
@source_format.setter
|
||||
def source_format(self, value):
|
||||
self._set_sub_prop("sourceFormat", value)
|
||||
|
||||
@property
|
||||
def time_partitioning(self):
|
||||
"""Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based
|
||||
partitioning for the destination table.
|
||||
|
||||
Only specify at most one of
|
||||
:attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or
|
||||
:attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`.
|
||||
"""
|
||||
prop = self._get_sub_prop("timePartitioning")
|
||||
if prop is not None:
|
||||
prop = TimePartitioning.from_api_repr(prop)
|
||||
return prop
|
||||
|
||||
@time_partitioning.setter
|
||||
def time_partitioning(self, value):
|
||||
api_repr = value
|
||||
if value is not None:
|
||||
api_repr = value.to_api_repr()
|
||||
self._set_sub_prop("timePartitioning", api_repr)
|
||||
else:
|
||||
self._del_sub_prop("timePartitioning")
|
||||
|
||||
@property
|
||||
def use_avro_logical_types(self):
|
||||
"""Optional[bool]: For loads of Avro data, governs whether Avro logical types are
|
||||
converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than
|
||||
raw types (e.g. INTEGER).
|
||||
"""
|
||||
return self._get_sub_prop("useAvroLogicalTypes")
|
||||
|
||||
@use_avro_logical_types.setter
|
||||
def use_avro_logical_types(self, value):
|
||||
self._set_sub_prop("useAvroLogicalTypes", bool(value))
|
||||
|
||||
@property
|
||||
def write_disposition(self):
|
||||
"""Optional[google.cloud.bigquery.job.WriteDisposition]: Action that occurs if
|
||||
the destination table already exists.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.write_disposition
|
||||
"""
|
||||
return self._get_sub_prop("writeDisposition")
|
||||
|
||||
@write_disposition.setter
|
||||
def write_disposition(self, value):
|
||||
self._set_sub_prop("writeDisposition", value)
|
||||
|
||||
@property
|
||||
def parquet_options(self):
|
||||
"""Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional
|
||||
properties to set if ``sourceFormat`` is set to PARQUET.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.parquet_options
|
||||
"""
|
||||
prop = self._get_sub_prop("parquetOptions")
|
||||
if prop is not None:
|
||||
prop = ParquetOptions.from_api_repr(prop)
|
||||
return prop
|
||||
|
||||
@parquet_options.setter
|
||||
def parquet_options(self, value):
|
||||
if value is not None:
|
||||
self._set_sub_prop("parquetOptions", value.to_api_repr())
|
||||
else:
|
||||
self._del_sub_prop("parquetOptions")
|
||||
|
||||
@property
|
||||
def column_name_character_map(self) -> str:
|
||||
"""Optional[google.cloud.bigquery.job.ColumnNameCharacterMap]:
|
||||
Character map supported for column names in CSV/Parquet loads. Defaults
|
||||
to STRICT and can be overridden by Project Config Service. Using this
|
||||
option with unsupported load formats will result in an error.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.column_name_character_map
|
||||
"""
|
||||
return self._get_sub_prop(
|
||||
"columnNameCharacterMap",
|
||||
ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
|
||||
)
|
||||
|
||||
@column_name_character_map.setter
|
||||
def column_name_character_map(self, value: Optional[str]):
|
||||
if value is None:
|
||||
value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED
|
||||
self._set_sub_prop("columnNameCharacterMap", value)
|
||||
|
||||
|
||||
class LoadJob(_AsyncJob):
|
||||
"""Asynchronous job for loading data into a table.
|
||||
|
||||
Can load from Google Cloud Storage URIs or from a file.
|
||||
|
||||
Args:
|
||||
job_id (str): the job's ID
|
||||
|
||||
source_uris (Optional[Sequence[str]]):
|
||||
URIs of one or more data files to be loaded. See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris
|
||||
for supported URI formats. Pass None for jobs that load from a file.
|
||||
|
||||
destination (google.cloud.bigquery.table.TableReference): reference to table into which data is to be loaded.
|
||||
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
A client which holds credentials and project configuration
|
||||
for the dataset (which requires a project).
|
||||
"""
|
||||
|
||||
_JOB_TYPE = "load"
|
||||
_CONFIG_CLASS = LoadJobConfig
|
||||
|
||||
def __init__(self, job_id, source_uris, destination, client, job_config=None):
|
||||
super(LoadJob, self).__init__(job_id, client)
|
||||
|
||||
if job_config is not None:
|
||||
self._properties["configuration"] = job_config._properties
|
||||
|
||||
if source_uris is not None:
|
||||
_helpers._set_sub_prop(
|
||||
self._properties, ["configuration", "load", "sourceUris"], source_uris
|
||||
)
|
||||
|
||||
if destination is not None:
|
||||
_helpers._set_sub_prop(
|
||||
self._properties,
|
||||
["configuration", "load", "destinationTable"],
|
||||
destination.to_api_repr(),
|
||||
)
|
||||
|
||||
@property
|
||||
def configuration(self) -> LoadJobConfig:
|
||||
"""The configuration for this load job."""
|
||||
return typing.cast(LoadJobConfig, super().configuration)
|
||||
|
||||
@property
|
||||
def destination(self):
|
||||
"""google.cloud.bigquery.table.TableReference: table where loaded rows are written
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table
|
||||
"""
|
||||
dest_config = _helpers._get_sub_prop(
|
||||
self._properties, ["configuration", "load", "destinationTable"]
|
||||
)
|
||||
return TableReference.from_api_repr(dest_config)
|
||||
|
||||
@property
|
||||
def source_uris(self):
|
||||
"""Optional[Sequence[str]]: URIs of data files to be loaded. See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris
|
||||
for supported URI formats. None for jobs that load from a file.
|
||||
"""
|
||||
return _helpers._get_sub_prop(
|
||||
self._properties, ["configuration", "load", "sourceUris"]
|
||||
)
|
||||
|
||||
@property
|
||||
def allow_jagged_rows(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`.
|
||||
"""
|
||||
return self.configuration.allow_jagged_rows
|
||||
|
||||
@property
|
||||
def allow_quoted_newlines(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`.
|
||||
"""
|
||||
return self.configuration.allow_quoted_newlines
|
||||
|
||||
@property
|
||||
def autodetect(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`.
|
||||
"""
|
||||
return self.configuration.autodetect
|
||||
|
||||
@property
|
||||
def connection_properties(self) -> List[ConnectionProperty]:
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.connection_properties`.
|
||||
|
||||
.. versionadded:: 3.7.0
|
||||
"""
|
||||
return self.configuration.connection_properties
|
||||
|
||||
@property
|
||||
def create_disposition(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`.
|
||||
"""
|
||||
return self.configuration.create_disposition
|
||||
|
||||
@property
|
||||
def create_session(self) -> Optional[bool]:
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.create_session`.
|
||||
|
||||
.. versionadded:: 3.7.0
|
||||
"""
|
||||
return self.configuration.create_session
|
||||
|
||||
@property
|
||||
def encoding(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`.
|
||||
"""
|
||||
return self.configuration.encoding
|
||||
|
||||
@property
|
||||
def field_delimiter(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`.
|
||||
"""
|
||||
return self.configuration.field_delimiter
|
||||
|
||||
@property
|
||||
def ignore_unknown_values(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`.
|
||||
"""
|
||||
return self.configuration.ignore_unknown_values
|
||||
|
||||
@property
|
||||
def max_bad_records(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`.
|
||||
"""
|
||||
return self.configuration.max_bad_records
|
||||
|
||||
@property
|
||||
def null_marker(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`.
|
||||
"""
|
||||
return self.configuration.null_marker
|
||||
|
||||
@property
|
||||
def quote_character(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`.
|
||||
"""
|
||||
return self.configuration.quote_character
|
||||
|
||||
@property
|
||||
def reference_file_schema_uri(self):
|
||||
"""See:
|
||||
attr:`google.cloud.bigquery.job.LoadJobConfig.reference_file_schema_uri`.
|
||||
"""
|
||||
return self.configuration.reference_file_schema_uri
|
||||
|
||||
@property
|
||||
def skip_leading_rows(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`.
|
||||
"""
|
||||
return self.configuration.skip_leading_rows
|
||||
|
||||
@property
|
||||
def source_format(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`.
|
||||
"""
|
||||
return self.configuration.source_format
|
||||
|
||||
@property
|
||||
def write_disposition(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`.
|
||||
"""
|
||||
return self.configuration.write_disposition
|
||||
|
||||
@property
|
||||
def schema(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.schema`.
|
||||
"""
|
||||
return self.configuration.schema
|
||||
|
||||
@property
|
||||
def destination_encryption_configuration(self):
|
||||
"""google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom
|
||||
encryption configuration for the destination table.
|
||||
|
||||
Custom encryption configuration (e.g., Cloud KMS keys)
|
||||
or :data:`None` if using default encryption.
|
||||
|
||||
See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`.
|
||||
"""
|
||||
return self.configuration.destination_encryption_configuration
|
||||
|
||||
@property
|
||||
def destination_table_description(self):
|
||||
"""Optional[str] name given to destination table.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description
|
||||
"""
|
||||
return self.configuration.destination_table_description
|
||||
|
||||
@property
|
||||
def destination_table_friendly_name(self):
|
||||
"""Optional[str] name given to destination table.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name
|
||||
"""
|
||||
return self.configuration.destination_table_friendly_name
|
||||
|
||||
@property
|
||||
def range_partitioning(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`.
|
||||
"""
|
||||
return self.configuration.range_partitioning
|
||||
|
||||
@property
|
||||
def time_partitioning(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`.
|
||||
"""
|
||||
return self.configuration.time_partitioning
|
||||
|
||||
@property
|
||||
def use_avro_logical_types(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`.
|
||||
"""
|
||||
return self.configuration.use_avro_logical_types
|
||||
|
||||
@property
|
||||
def clustering_fields(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`.
|
||||
"""
|
||||
return self.configuration.clustering_fields
|
||||
|
||||
@property
|
||||
def schema_update_options(self):
|
||||
"""See
|
||||
:attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`.
|
||||
"""
|
||||
return self.configuration.schema_update_options
|
||||
|
||||
@property
|
||||
def input_file_bytes(self):
|
||||
"""Count of bytes loaded from source files.
|
||||
|
||||
Returns:
|
||||
Optional[int]: the count (None until set from the server).
|
||||
|
||||
Raises:
|
||||
ValueError: for invalid value types.
|
||||
"""
|
||||
return _helpers._int_or_none(
|
||||
_helpers._get_sub_prop(
|
||||
self._properties, ["statistics", "load", "inputFileBytes"]
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def input_files(self):
|
||||
"""Count of source files.
|
||||
|
||||
Returns:
|
||||
Optional[int]: the count (None until set from the server).
|
||||
"""
|
||||
return _helpers._int_or_none(
|
||||
_helpers._get_sub_prop(
|
||||
self._properties, ["statistics", "load", "inputFiles"]
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def output_bytes(self):
|
||||
"""Count of bytes saved to destination table.
|
||||
|
||||
Returns:
|
||||
Optional[int]: the count (None until set from the server).
|
||||
"""
|
||||
return _helpers._int_or_none(
|
||||
_helpers._get_sub_prop(
|
||||
self._properties, ["statistics", "load", "outputBytes"]
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def output_rows(self):
|
||||
"""Count of rows saved to destination table.
|
||||
|
||||
Returns:
|
||||
Optional[int]: the count (None until set from the server).
|
||||
"""
|
||||
return _helpers._int_or_none(
|
||||
_helpers._get_sub_prop(
|
||||
self._properties, ["statistics", "load", "outputRows"]
|
||||
)
|
||||
)
|
||||
|
||||
def to_api_repr(self):
|
||||
"""Generate a resource for :meth:`_begin`."""
|
||||
# Exclude statistics, if set.
|
||||
return {
|
||||
"jobReference": self._properties["jobReference"],
|
||||
"configuration": self._properties["configuration"],
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: dict, client) -> "LoadJob":
|
||||
"""Factory: construct a job given its API representation
|
||||
|
||||
.. note::
|
||||
|
||||
This method assumes that the project found in the resource matches
|
||||
the client's project.
|
||||
|
||||
Args:
|
||||
resource (Dict): dataset job representation returned from the API
|
||||
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
Client which holds credentials and project
|
||||
configuration for the dataset.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``.
|
||||
"""
|
||||
cls._check_resource_config(resource)
|
||||
job_ref = _JobReference._from_api_repr(resource["jobReference"])
|
||||
job = cls(job_ref, None, None, client)
|
||||
job._set_properties(resource)
|
||||
return job
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,20 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from google.cloud.bigquery.magics.magics import context
|
||||
|
||||
|
||||
# For backwards compatibility we need to make the context available in the path
|
||||
# google.cloud.bigquery.magics.context
|
||||
__all__ = ("context",)
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,34 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from google.cloud.bigquery.magics.line_arg_parser.exceptions import ParseError
|
||||
from google.cloud.bigquery.magics.line_arg_parser.exceptions import (
|
||||
DuplicateQueryParamsError,
|
||||
QueryParamsParseError,
|
||||
)
|
||||
from google.cloud.bigquery.magics.line_arg_parser.lexer import Lexer
|
||||
from google.cloud.bigquery.magics.line_arg_parser.lexer import TokenType
|
||||
from google.cloud.bigquery.magics.line_arg_parser.parser import Parser
|
||||
from google.cloud.bigquery.magics.line_arg_parser.visitors import QueryParamsExtractor
|
||||
|
||||
|
||||
__all__ = (
|
||||
"DuplicateQueryParamsError",
|
||||
"Lexer",
|
||||
"Parser",
|
||||
"ParseError",
|
||||
"QueryParamsExtractor",
|
||||
"QueryParamsParseError",
|
||||
"TokenType",
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,25 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class QueryParamsParseError(ParseError):
|
||||
"""Raised when --params option is syntactically incorrect."""
|
||||
|
||||
|
||||
class DuplicateQueryParamsError(ParseError):
|
||||
pass
|
||||
@@ -0,0 +1,200 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from collections import namedtuple
|
||||
from collections import OrderedDict
|
||||
import itertools
|
||||
import re
|
||||
|
||||
import enum
|
||||
|
||||
|
||||
Token = namedtuple("Token", ("type_", "lexeme", "pos"))
|
||||
StateTransition = namedtuple("StateTransition", ("new_state", "total_offset"))
|
||||
|
||||
# Pattern matching is done with regexes, and the order in which the token patterns are
|
||||
# defined is important.
|
||||
#
|
||||
# Suppose we had the following token definitions:
|
||||
# * INT - a token matching integers,
|
||||
# * FLOAT - a token matching floating point numbers,
|
||||
# * DOT - a token matching a single literal dot character, i.e. "."
|
||||
#
|
||||
# The FLOAT token would have to be defined first, since we would want the input "1.23"
|
||||
# to be tokenized as a single FLOAT token, and *not* three tokens (INT, DOT, INT).
|
||||
#
|
||||
# Sometimes, however, different tokens match too similar patterns, and it is not
|
||||
# possible to define them in order that would avoid any ambiguity. One such case are
|
||||
# the OPT_VAL and PY_NUMBER tokens, as both can match an integer literal, say "42".
|
||||
#
|
||||
# In order to avoid the dilemmas, the lexer implements a concept of STATES. States are
|
||||
# used to split token definitions into subgroups, and in each lexer state only a single
|
||||
# subgroup is used for tokenizing the input. Lexer states can therefore be though of as
|
||||
# token namespaces.
|
||||
#
|
||||
# For example, while parsing the value of the "--params" option, we do not want to
|
||||
# "recognize" it as a single OPT_VAL token, but instead want to parse it as a Python
|
||||
# dictionary and verify its syntactial correctness. On the other hand, while parsing
|
||||
# the value of an option other than "--params", we do not really care about its
|
||||
# structure, and thus do not want to use any of the "Python tokens" for pattern matching.
|
||||
#
|
||||
# Token definition order is important, thus an OrderedDict is used. In addition, PEP 468
|
||||
# guarantees us that the order of kwargs is preserved in Python 3.6+.
|
||||
token_types = OrderedDict(
|
||||
state_parse_pos_args=OrderedDict(
|
||||
GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P<GOTO_PARSE_NON_PARAMS_OPTIONS>(?=--))", # double dash - starting the options list
|
||||
DEST_VAR=r"(?P<DEST_VAR>[^\d\W]\w*)", # essentially a Python ID
|
||||
),
|
||||
state_parse_non_params_options=OrderedDict(
|
||||
GOTO_PARSE_PARAMS_OPTION=r"(?P<GOTO_PARSE_PARAMS_OPTION>(?=--params(?:\s|=|--|$)))", # the --params option
|
||||
OPTION_SPEC=r"(?P<OPTION_SPEC>--\w+)",
|
||||
OPTION_EQ=r"(?P<OPTION_EQ>=)",
|
||||
OPT_VAL=r"(?P<OPT_VAL>\S+?(?=\s|--|$))",
|
||||
),
|
||||
state_parse_params_option=OrderedDict(
|
||||
PY_STRING=r"(?P<PY_STRING>(?:{})|(?:{}))".format( # single and double quoted strings
|
||||
r"'(?:[^'\\]|\.)*'", r'"(?:[^"\\]|\.)*"'
|
||||
),
|
||||
PARAMS_OPT_SPEC=r"(?P<PARAMS_OPT_SPEC>--params(?=\s|=|--|$))",
|
||||
PARAMS_OPT_EQ=r"(?P<PARAMS_OPT_EQ>=)",
|
||||
GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P<GOTO_PARSE_NON_PARAMS_OPTIONS>(?=--\w+))", # found another option spec
|
||||
PY_BOOL=r"(?P<PY_BOOL>True|False)",
|
||||
DOLLAR_PY_ID=r"(?P<DOLLAR_PY_ID>\$[^\d\W]\w*)",
|
||||
PY_NUMBER=r"(?P<PY_NUMBER>-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)",
|
||||
SQUOTE=r"(?P<SQUOTE>')",
|
||||
DQUOTE=r'(?P<DQUOTE>")',
|
||||
COLON=r"(?P<COLON>:)",
|
||||
COMMA=r"(?P<COMMA>,)",
|
||||
LCURL=r"(?P<LCURL>\{)",
|
||||
RCURL=r"(?P<RCURL>})",
|
||||
LSQUARE=r"(?P<LSQUARE>\[)",
|
||||
RSQUARE=r"(?P<RSQUARE>])",
|
||||
LPAREN=r"(?P<LPAREN>\()",
|
||||
RPAREN=r"(?P<RPAREN>\))",
|
||||
),
|
||||
common=OrderedDict(
|
||||
WS=r"(?P<WS>\s+)",
|
||||
EOL=r"(?P<EOL>$)",
|
||||
UNKNOWN=r"(?P<UNKNOWN>\S+)", # anything not a whitespace or matched by something else
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class AutoStrEnum(str, enum.Enum):
|
||||
"""Base enum class for for name=value str enums."""
|
||||
|
||||
def _generate_next_value_(name, start, count, last_values):
|
||||
return name
|
||||
|
||||
|
||||
TokenType = AutoStrEnum( # type: ignore # pytype: disable=wrong-arg-types
|
||||
"TokenType",
|
||||
[
|
||||
(name, enum.auto())
|
||||
for name in itertools.chain.from_iterable(token_types.values())
|
||||
if not name.startswith("GOTO_")
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class LexerState(AutoStrEnum):
|
||||
PARSE_POS_ARGS = enum.auto() # parsing positional arguments
|
||||
PARSE_NON_PARAMS_OPTIONS = enum.auto() # parsing options other than "--params"
|
||||
PARSE_PARAMS_OPTION = enum.auto() # parsing the "--params" option
|
||||
STATE_END = enum.auto()
|
||||
|
||||
|
||||
class Lexer(object):
|
||||
"""Lexical analyzer for tokenizing the cell magic input line."""
|
||||
|
||||
_GRAND_PATTERNS = {
|
||||
LexerState.PARSE_POS_ARGS: re.compile(
|
||||
"|".join(
|
||||
itertools.chain(
|
||||
token_types["state_parse_pos_args"].values(),
|
||||
token_types["common"].values(),
|
||||
)
|
||||
)
|
||||
),
|
||||
LexerState.PARSE_NON_PARAMS_OPTIONS: re.compile(
|
||||
"|".join(
|
||||
itertools.chain(
|
||||
token_types["state_parse_non_params_options"].values(),
|
||||
token_types["common"].values(),
|
||||
)
|
||||
)
|
||||
),
|
||||
LexerState.PARSE_PARAMS_OPTION: re.compile(
|
||||
"|".join(
|
||||
itertools.chain(
|
||||
token_types["state_parse_params_option"].values(),
|
||||
token_types["common"].values(),
|
||||
)
|
||||
)
|
||||
),
|
||||
}
|
||||
|
||||
def __init__(self, input_text):
|
||||
self._text = input_text
|
||||
|
||||
def __iter__(self):
|
||||
# Since re.scanner does not seem to support manipulating inner scanner states,
|
||||
# we need to implement lexer state transitions manually using special
|
||||
# non-capturing lookahead token patterns to signal when a state transition
|
||||
# should be made.
|
||||
# Since we don't have "nested" states, we don't really need a stack and
|
||||
# this simple mechanism is sufficient.
|
||||
state = LexerState.PARSE_POS_ARGS
|
||||
offset = 0 # the number of characters processed so far
|
||||
|
||||
while state != LexerState.STATE_END:
|
||||
token_stream = self._find_state_tokens(state, offset)
|
||||
|
||||
for maybe_token in token_stream: # pragma: NO COVER
|
||||
if isinstance(maybe_token, StateTransition):
|
||||
state = maybe_token.new_state
|
||||
offset = maybe_token.total_offset
|
||||
break
|
||||
|
||||
if maybe_token.type_ != TokenType.WS:
|
||||
yield maybe_token
|
||||
|
||||
if maybe_token.type_ == TokenType.EOL:
|
||||
state = LexerState.STATE_END
|
||||
break
|
||||
|
||||
def _find_state_tokens(self, state, current_offset):
|
||||
"""Scan the input for current state's tokens starting at ``current_offset``.
|
||||
|
||||
Args:
|
||||
state (LexerState): The current lexer state.
|
||||
current_offset (int): The offset in the input text, i.e. the number
|
||||
of characters already scanned so far.
|
||||
|
||||
Yields:
|
||||
The next ``Token`` or ``StateTransition`` instance.
|
||||
"""
|
||||
pattern = self._GRAND_PATTERNS[state]
|
||||
scanner = pattern.finditer(self._text, current_offset)
|
||||
|
||||
for match in scanner: # pragma: NO COVER
|
||||
token_type = match.lastgroup
|
||||
|
||||
if token_type.startswith("GOTO_"):
|
||||
yield StateTransition(
|
||||
new_state=getattr(LexerState, token_type[5:]), # w/o "GOTO_" prefix
|
||||
total_offset=match.start(),
|
||||
)
|
||||
|
||||
yield Token(token_type, match.group(), match.start())
|
||||
@@ -0,0 +1,484 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from google.cloud.bigquery.magics.line_arg_parser import DuplicateQueryParamsError
|
||||
from google.cloud.bigquery.magics.line_arg_parser import ParseError
|
||||
from google.cloud.bigquery.magics.line_arg_parser import QueryParamsParseError
|
||||
from google.cloud.bigquery.magics.line_arg_parser import TokenType
|
||||
|
||||
|
||||
class ParseNode(object):
|
||||
"""A base class for nodes in the input parsed to an abstract syntax tree."""
|
||||
|
||||
|
||||
class InputLine(ParseNode):
|
||||
def __init__(self, destination_var, option_list):
|
||||
self.destination_var = destination_var
|
||||
self.option_list = option_list
|
||||
|
||||
|
||||
class DestinationVar(ParseNode):
|
||||
def __init__(self, token):
|
||||
# token type is DEST_VAR
|
||||
self.token = token
|
||||
self.name = token.lexeme if token is not None else None
|
||||
|
||||
|
||||
class CmdOptionList(ParseNode):
|
||||
def __init__(self, option_nodes):
|
||||
self.options = [node for node in option_nodes] # shallow copy
|
||||
|
||||
|
||||
class CmdOption(ParseNode):
|
||||
def __init__(self, name, value):
|
||||
self.name = name # string
|
||||
self.value = value # CmdOptionValue node
|
||||
|
||||
|
||||
class ParamsOption(CmdOption):
|
||||
def __init__(self, value):
|
||||
super(ParamsOption, self).__init__("params", value)
|
||||
|
||||
|
||||
class CmdOptionValue(ParseNode):
|
||||
def __init__(self, token):
|
||||
# token type is OPT_VAL
|
||||
self.token = token
|
||||
self.value = token.lexeme
|
||||
|
||||
|
||||
class PyVarExpansion(ParseNode):
|
||||
def __init__(self, token):
|
||||
self.token = token
|
||||
self.raw_value = token.lexeme
|
||||
|
||||
|
||||
class PyDict(ParseNode):
|
||||
def __init__(self, dict_items):
|
||||
self.items = [item for item in dict_items] # shallow copy
|
||||
|
||||
|
||||
class PyDictItem(ParseNode):
|
||||
def __init__(self, key, value):
|
||||
self.key = key
|
||||
self.value = value
|
||||
|
||||
|
||||
class PyDictKey(ParseNode):
|
||||
def __init__(self, token):
|
||||
self.token = token
|
||||
self.key_value = token.lexeme
|
||||
|
||||
|
||||
class PyScalarValue(ParseNode):
|
||||
def __init__(self, token, raw_value):
|
||||
self.token = token
|
||||
self.raw_value = raw_value
|
||||
|
||||
|
||||
class PyTuple(ParseNode):
|
||||
def __init__(self, tuple_items):
|
||||
self.items = [item for item in tuple_items] # shallow copy
|
||||
|
||||
|
||||
class PyList(ParseNode):
|
||||
def __init__(self, list_items):
|
||||
self.items = [item for item in list_items] # shallow copy
|
||||
|
||||
|
||||
class Parser(object):
|
||||
"""Parser for the tokenized cell magic input line.
|
||||
|
||||
The parser recognizes a simplified subset of Python grammar, specifically
|
||||
a dictionary representation in typical use cases when the "--params" option
|
||||
is used with the %%bigquery cell magic.
|
||||
|
||||
The grammar (terminal symbols are CAPITALIZED):
|
||||
|
||||
input_line : destination_var option_list
|
||||
destination_var : DEST_VAR | EMPTY
|
||||
option_list : (OPTION_SPEC [OPTION_EQ] option_value)*
|
||||
(params_option | EMPTY)
|
||||
(OPTION_SPEC [OPTION_EQ] option_value)*
|
||||
|
||||
option_value : OPT_VAL | EMPTY
|
||||
|
||||
# DOLLAR_PY_ID can occur if a variable passed to --params does not exist
|
||||
# and is thus not expanded to a dict.
|
||||
params_option : PARAMS_OPT_SPEC [PARAMS_OPT_EQ] \
|
||||
(DOLLAR_PY_ID | PY_STRING | py_dict)
|
||||
|
||||
py_dict : LCURL dict_items RCURL
|
||||
dict_items : dict_item | (dict_item COMMA dict_items)
|
||||
dict_item : (dict_key COLON py_value) | EMPTY
|
||||
|
||||
# dict items are actually @parameter names in the cell body (i.e. the query),
|
||||
# thus restricting them to strings.
|
||||
dict_key : PY_STRING
|
||||
|
||||
py_value : PY_BOOL
|
||||
| PY_NUMBER
|
||||
| PY_STRING
|
||||
| py_tuple
|
||||
| py_list
|
||||
| py_dict
|
||||
|
||||
py_tuple : LPAREN collection_items RPAREN
|
||||
py_list : LSQUARE collection_items RSQUARE
|
||||
collection_items : collection_item | (collection_item COMMA collection_items)
|
||||
collection_item : py_value | EMPTY
|
||||
|
||||
Args:
|
||||
lexer (line_arg_parser.lexer.Lexer):
|
||||
An iterable producing a tokenized cell magic argument line.
|
||||
"""
|
||||
|
||||
def __init__(self, lexer):
|
||||
self._lexer = lexer
|
||||
self._tokens_iter = iter(self._lexer)
|
||||
self.get_next_token()
|
||||
|
||||
def get_next_token(self):
|
||||
"""Obtain the next token from the token stream and store it as current."""
|
||||
token = next(self._tokens_iter)
|
||||
self._current_token = token
|
||||
|
||||
def consume(self, expected_type, exc_type=ParseError):
|
||||
"""Move to the next token in token stream if it matches the expected type.
|
||||
|
||||
Args:
|
||||
expected_type (lexer.TokenType): The expected token type to be consumed.
|
||||
exc_type (Optional[ParseError]): The type of the exception to raise. Should be
|
||||
the ``ParseError`` class or one of its subclasses. Defaults to
|
||||
``ParseError``.
|
||||
|
||||
Raises:
|
||||
ParseError: If the current token does not match the expected type.
|
||||
"""
|
||||
if self._current_token.type_ == expected_type:
|
||||
if expected_type != TokenType.EOL:
|
||||
self.get_next_token()
|
||||
else:
|
||||
if self._current_token.type_ == TokenType.EOL:
|
||||
msg = "Unexpected end of input, expected {}.".format(expected_type)
|
||||
else:
|
||||
msg = "Expected token type {}, but found {} at position {}.".format(
|
||||
expected_type, self._current_token.lexeme, self._current_token.pos
|
||||
)
|
||||
self.error(message=msg, exc_type=exc_type)
|
||||
|
||||
def error(self, message="Syntax error.", exc_type=ParseError):
|
||||
"""Raise an error with the given message.
|
||||
|
||||
Args:
|
||||
expected_type (lexer.TokenType): The expected token type to be consumed.
|
||||
exc_type (Optional[ParseError]): The type of the exception to raise. Should be
|
||||
the ``ParseError`` class or one of its subclasses. Defaults to
|
||||
``ParseError``.
|
||||
|
||||
Raises:
|
||||
ParseError: If the current token does not match the expected type.
|
||||
"""
|
||||
raise exc_type(message)
|
||||
|
||||
def input_line(self):
|
||||
"""The top level method for parsing the cell magic arguments line.
|
||||
|
||||
Implements the following grammar production rule:
|
||||
|
||||
input_line : destination_var option_list
|
||||
"""
|
||||
dest_var = self.destination_var()
|
||||
options = self.option_list()
|
||||
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ != TokenType.EOL:
|
||||
msg = "Unexpected input at position {}: {}".format(token.pos, token.lexeme)
|
||||
self.error(msg)
|
||||
|
||||
return InputLine(dest_var, options)
|
||||
|
||||
def destination_var(self):
|
||||
"""Implementation of the ``destination_var`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
destination_var : DEST_VAR | EMPTY
|
||||
"""
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ == TokenType.DEST_VAR:
|
||||
self.consume(TokenType.DEST_VAR)
|
||||
result = DestinationVar(token)
|
||||
elif token.type_ == TokenType.UNKNOWN:
|
||||
msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme)
|
||||
self.error(msg)
|
||||
else:
|
||||
result = DestinationVar(None)
|
||||
|
||||
return result
|
||||
|
||||
def option_list(self):
|
||||
"""Implementation of the ``option_list`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
option_list : (OPTION_SPEC [OPTION_EQ] option_value)*
|
||||
(params_option | EMPTY)
|
||||
(OPTION_SPEC [OPTION_EQ] option_value)*
|
||||
"""
|
||||
all_options = []
|
||||
|
||||
def parse_nonparams_options():
|
||||
while self._current_token.type_ == TokenType.OPTION_SPEC:
|
||||
token = self._current_token
|
||||
self.consume(TokenType.OPTION_SPEC)
|
||||
|
||||
opt_name = token.lexeme[2:] # cut off the "--" prefix
|
||||
|
||||
# skip the optional "=" character
|
||||
if self._current_token.type_ == TokenType.OPTION_EQ:
|
||||
self.consume(TokenType.OPTION_EQ)
|
||||
|
||||
opt_value = self.option_value()
|
||||
option = CmdOption(opt_name, opt_value)
|
||||
all_options.append(option)
|
||||
|
||||
parse_nonparams_options()
|
||||
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ == TokenType.PARAMS_OPT_SPEC:
|
||||
option = self.params_option()
|
||||
all_options.append(option)
|
||||
|
||||
parse_nonparams_options()
|
||||
|
||||
if self._current_token.type_ == TokenType.PARAMS_OPT_SPEC:
|
||||
self.error(
|
||||
message="Duplicate --params option", exc_type=DuplicateQueryParamsError
|
||||
)
|
||||
|
||||
return CmdOptionList(all_options)
|
||||
|
||||
def option_value(self):
|
||||
"""Implementation of the ``option_value`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
option_value : OPT_VAL | EMPTY
|
||||
"""
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ == TokenType.OPT_VAL:
|
||||
self.consume(TokenType.OPT_VAL)
|
||||
result = CmdOptionValue(token)
|
||||
elif token.type_ == TokenType.UNKNOWN:
|
||||
msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme)
|
||||
self.error(msg)
|
||||
else:
|
||||
result = None
|
||||
|
||||
return result
|
||||
|
||||
def params_option(self):
|
||||
"""Implementation of the ``params_option`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
params_option : PARAMS_OPT_SPEC [PARAMS_OPT_EQ] \
|
||||
(DOLLAR_PY_ID | PY_STRING | py_dict)
|
||||
"""
|
||||
self.consume(TokenType.PARAMS_OPT_SPEC)
|
||||
|
||||
# skip the optional "=" character
|
||||
if self._current_token.type_ == TokenType.PARAMS_OPT_EQ:
|
||||
self.consume(TokenType.PARAMS_OPT_EQ)
|
||||
|
||||
if self._current_token.type_ == TokenType.DOLLAR_PY_ID:
|
||||
token = self._current_token
|
||||
self.consume(TokenType.DOLLAR_PY_ID)
|
||||
opt_value = PyVarExpansion(token)
|
||||
elif self._current_token.type_ == TokenType.PY_STRING:
|
||||
token = self._current_token
|
||||
self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError)
|
||||
opt_value = PyScalarValue(token, token.lexeme)
|
||||
else:
|
||||
opt_value = self.py_dict()
|
||||
|
||||
result = ParamsOption(opt_value)
|
||||
|
||||
return result
|
||||
|
||||
def py_dict(self):
|
||||
"""Implementation of the ``py_dict`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
py_dict : LCURL dict_items RCURL
|
||||
"""
|
||||
self.consume(TokenType.LCURL, exc_type=QueryParamsParseError)
|
||||
dict_items = self.dict_items()
|
||||
self.consume(TokenType.RCURL, exc_type=QueryParamsParseError)
|
||||
|
||||
return PyDict(dict_items)
|
||||
|
||||
def dict_items(self):
|
||||
"""Implementation of the ``dict_items`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
dict_items : dict_item | (dict_item COMMA dict_items)
|
||||
"""
|
||||
result = []
|
||||
|
||||
item = self.dict_item()
|
||||
if item is not None:
|
||||
result.append(item)
|
||||
|
||||
while self._current_token.type_ == TokenType.COMMA:
|
||||
self.consume(TokenType.COMMA, exc_type=QueryParamsParseError)
|
||||
item = self.dict_item()
|
||||
if item is not None:
|
||||
result.append(item)
|
||||
|
||||
return result
|
||||
|
||||
def dict_item(self):
|
||||
"""Implementation of the ``dict_item`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
dict_item : (dict_key COLON py_value) | EMPTY
|
||||
"""
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ == TokenType.PY_STRING:
|
||||
key = self.dict_key()
|
||||
self.consume(TokenType.COLON, exc_type=QueryParamsParseError)
|
||||
value = self.py_value()
|
||||
result = PyDictItem(key, value)
|
||||
elif token.type_ == TokenType.UNKNOWN:
|
||||
msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme)
|
||||
self.error(msg, exc_type=QueryParamsParseError)
|
||||
else:
|
||||
result = None
|
||||
|
||||
return result
|
||||
|
||||
def dict_key(self):
|
||||
"""Implementation of the ``dict_key`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
dict_key : PY_STRING
|
||||
"""
|
||||
token = self._current_token
|
||||
self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError)
|
||||
return PyDictKey(token)
|
||||
|
||||
def py_value(self):
|
||||
"""Implementation of the ``py_value`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
py_value : PY_BOOL | PY_NUMBER | PY_STRING | py_tuple | py_list | py_dict
|
||||
"""
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ == TokenType.PY_BOOL:
|
||||
self.consume(TokenType.PY_BOOL, exc_type=QueryParamsParseError)
|
||||
return PyScalarValue(token, token.lexeme)
|
||||
elif token.type_ == TokenType.PY_NUMBER:
|
||||
self.consume(TokenType.PY_NUMBER, exc_type=QueryParamsParseError)
|
||||
return PyScalarValue(token, token.lexeme)
|
||||
elif token.type_ == TokenType.PY_STRING:
|
||||
self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError)
|
||||
return PyScalarValue(token, token.lexeme)
|
||||
elif token.type_ == TokenType.LPAREN:
|
||||
tuple_node = self.py_tuple()
|
||||
return tuple_node
|
||||
elif token.type_ == TokenType.LSQUARE:
|
||||
list_node = self.py_list()
|
||||
return list_node
|
||||
elif token.type_ == TokenType.LCURL:
|
||||
dict_node = self.py_dict()
|
||||
return dict_node
|
||||
else:
|
||||
msg = "Unexpected token type {} at position {}.".format(
|
||||
token.type_, token.pos
|
||||
)
|
||||
self.error(msg, exc_type=QueryParamsParseError)
|
||||
|
||||
def py_tuple(self):
|
||||
"""Implementation of the ``py_tuple`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
py_tuple : LPAREN collection_items RPAREN
|
||||
"""
|
||||
self.consume(TokenType.LPAREN, exc_type=QueryParamsParseError)
|
||||
items = self.collection_items()
|
||||
self.consume(TokenType.RPAREN, exc_type=QueryParamsParseError)
|
||||
|
||||
return PyTuple(items)
|
||||
|
||||
def py_list(self):
|
||||
"""Implementation of the ``py_list`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
py_list : LSQUARE collection_items RSQUARE
|
||||
"""
|
||||
self.consume(TokenType.LSQUARE, exc_type=QueryParamsParseError)
|
||||
items = self.collection_items()
|
||||
self.consume(TokenType.RSQUARE, exc_type=QueryParamsParseError)
|
||||
|
||||
return PyList(items)
|
||||
|
||||
def collection_items(self):
|
||||
"""Implementation of the ``collection_items`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
collection_items : collection_item | (collection_item COMMA collection_items)
|
||||
"""
|
||||
result = []
|
||||
|
||||
item = self.collection_item()
|
||||
if item is not None:
|
||||
result.append(item)
|
||||
|
||||
while self._current_token.type_ == TokenType.COMMA:
|
||||
self.consume(TokenType.COMMA, exc_type=QueryParamsParseError)
|
||||
item = self.collection_item()
|
||||
if item is not None:
|
||||
result.append(item)
|
||||
|
||||
return result
|
||||
|
||||
def collection_item(self):
|
||||
"""Implementation of the ``collection_item`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
collection_item : py_value | EMPTY
|
||||
"""
|
||||
if self._current_token.type_ not in {TokenType.RPAREN, TokenType.RSQUARE}:
|
||||
result = self.py_value()
|
||||
else:
|
||||
result = None # end of list/tuple items
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,159 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""This module contains classes that traverse AST and convert it to something else.
|
||||
|
||||
If the parser successfully accepts a valid input (the bigquery cell magic arguments),
|
||||
the result is an Abstract Syntax Tree (AST) that represents the input as a tree
|
||||
with notes containing various useful metadata.
|
||||
|
||||
Node visitors can process such tree and convert it to something else that can
|
||||
be used for further processing, for example:
|
||||
|
||||
* An optimized version of the tree with redundancy removed/simplified (not used here).
|
||||
* The same tree, but with semantic errors checked, because an otherwise syntactically
|
||||
valid input might still contain errors (not used here, semantic errors are detected
|
||||
elsewhere).
|
||||
* A form that can be directly handed to the code that operates on the input. The
|
||||
``QueryParamsExtractor`` class, for instance, splits the input arguments into
|
||||
the "--params <...>" part and everything else.
|
||||
The "everything else" part can be then parsed by the default Jupyter argument parser,
|
||||
while the --params option is processed separately by the Python evaluator.
|
||||
|
||||
More info on the visitor design pattern:
|
||||
https://en.wikipedia.org/wiki/Visitor_pattern
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
class NodeVisitor(object):
|
||||
"""Base visitor class implementing the dispatch machinery."""
|
||||
|
||||
def visit(self, node):
|
||||
method_name = "visit_{}".format(type(node).__name__)
|
||||
visitor_method = getattr(self, method_name, self.method_missing)
|
||||
return visitor_method(node)
|
||||
|
||||
def method_missing(self, node):
|
||||
raise Exception("No visit_{} method".format(type(node).__name__))
|
||||
|
||||
|
||||
class QueryParamsExtractor(NodeVisitor):
|
||||
"""A visitor that extracts the "--params <...>" part from input line arguments."""
|
||||
|
||||
def visit_InputLine(self, node):
|
||||
params_dict_parts = []
|
||||
other_parts = []
|
||||
|
||||
dest_var_parts = self.visit(node.destination_var)
|
||||
params, other_options = self.visit(node.option_list)
|
||||
|
||||
if dest_var_parts:
|
||||
other_parts.extend(dest_var_parts)
|
||||
|
||||
if dest_var_parts and other_options:
|
||||
other_parts.append(" ")
|
||||
other_parts.extend(other_options)
|
||||
|
||||
params_dict_parts.extend(params)
|
||||
|
||||
return "".join(params_dict_parts), "".join(other_parts)
|
||||
|
||||
def visit_DestinationVar(self, node):
|
||||
return [node.name] if node.name is not None else []
|
||||
|
||||
def visit_CmdOptionList(self, node):
|
||||
params_opt_parts = []
|
||||
other_parts = []
|
||||
|
||||
for i, opt in enumerate(node.options):
|
||||
option_parts = self.visit(opt)
|
||||
list_to_extend = params_opt_parts if opt.name == "params" else other_parts
|
||||
|
||||
if list_to_extend:
|
||||
list_to_extend.append(" ")
|
||||
list_to_extend.extend(option_parts)
|
||||
|
||||
return params_opt_parts, other_parts
|
||||
|
||||
def visit_CmdOption(self, node):
|
||||
result = ["--{}".format(node.name)]
|
||||
|
||||
if node.value is not None:
|
||||
result.append(" ")
|
||||
value_parts = self.visit(node.value)
|
||||
result.extend(value_parts)
|
||||
|
||||
return result
|
||||
|
||||
def visit_CmdOptionValue(self, node):
|
||||
return [node.value]
|
||||
|
||||
def visit_ParamsOption(self, node):
|
||||
value_parts = self.visit(node.value)
|
||||
return value_parts
|
||||
|
||||
def visit_PyVarExpansion(self, node):
|
||||
return [node.raw_value]
|
||||
|
||||
def visit_PyDict(self, node):
|
||||
result = ["{"]
|
||||
|
||||
for i, item in enumerate(node.items):
|
||||
if i > 0:
|
||||
result.append(", ")
|
||||
item_parts = self.visit(item)
|
||||
result.extend(item_parts)
|
||||
|
||||
result.append("}")
|
||||
return result
|
||||
|
||||
def visit_PyDictItem(self, node):
|
||||
result = self.visit(node.key) # key parts
|
||||
result.append(": ")
|
||||
value_parts = self.visit(node.value)
|
||||
result.extend(value_parts)
|
||||
return result
|
||||
|
||||
def visit_PyDictKey(self, node):
|
||||
return [node.key_value]
|
||||
|
||||
def visit_PyScalarValue(self, node):
|
||||
return [node.raw_value]
|
||||
|
||||
def visit_PyTuple(self, node):
|
||||
result = ["("]
|
||||
|
||||
for i, item in enumerate(node.items):
|
||||
if i > 0:
|
||||
result.append(", ")
|
||||
item_parts = self.visit(item)
|
||||
result.extend(item_parts)
|
||||
|
||||
result.append(")")
|
||||
return result
|
||||
|
||||
def visit_PyList(self, node):
|
||||
result = ["["]
|
||||
|
||||
for i, item in enumerate(node.items):
|
||||
if i > 0:
|
||||
result.append(", ")
|
||||
item_parts = self.visit(item)
|
||||
result.extend(item_parts)
|
||||
|
||||
result.append("]")
|
||||
return result
|
||||
@@ -0,0 +1,776 @@
|
||||
# Copyright 2018 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""IPython Magics
|
||||
|
||||
Install ``bigquery-magics`` and call ``%load_ext bigquery_magics`` to use the
|
||||
``%%bigquery`` cell magic.
|
||||
|
||||
See the `BigQuery Magics reference documentation
|
||||
<https://googleapis.dev/python/bigquery-magics/latest/>`_.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import re
|
||||
import ast
|
||||
import copy
|
||||
import functools
|
||||
import sys
|
||||
import time
|
||||
import warnings
|
||||
from concurrent import futures
|
||||
|
||||
try:
|
||||
import IPython # type: ignore
|
||||
from IPython import display # type: ignore
|
||||
from IPython.core import magic_arguments # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError("This module can only be loaded in IPython.")
|
||||
|
||||
from google.api_core import client_info
|
||||
from google.api_core import client_options
|
||||
from google.api_core.exceptions import NotFound
|
||||
import google.auth # type: ignore
|
||||
from google.cloud import bigquery
|
||||
import google.cloud.bigquery.dataset
|
||||
from google.cloud.bigquery import _versions_helpers
|
||||
from google.cloud.bigquery import exceptions
|
||||
from google.cloud.bigquery.dbapi import _helpers
|
||||
from google.cloud.bigquery.magics import line_arg_parser as lap
|
||||
|
||||
try:
|
||||
import bigquery_magics # type: ignore
|
||||
except ImportError:
|
||||
bigquery_magics = None
|
||||
|
||||
IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore
|
||||
|
||||
|
||||
class Context(object):
|
||||
"""Storage for objects to be used throughout an IPython notebook session.
|
||||
|
||||
A Context object is initialized when the ``magics`` module is imported,
|
||||
and can be found at ``google.cloud.bigquery.magics.context``.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._credentials = None
|
||||
self._project = None
|
||||
self._connection = None
|
||||
self._default_query_job_config = bigquery.QueryJobConfig()
|
||||
self._bigquery_client_options = client_options.ClientOptions()
|
||||
self._bqstorage_client_options = client_options.ClientOptions()
|
||||
self._progress_bar_type = "tqdm_notebook"
|
||||
|
||||
@property
|
||||
def credentials(self):
|
||||
"""google.auth.credentials.Credentials: Credentials to use for queries
|
||||
performed through IPython magics.
|
||||
|
||||
Note:
|
||||
These credentials do not need to be explicitly defined if you are
|
||||
using Application Default Credentials. If you are not using
|
||||
Application Default Credentials, manually construct a
|
||||
:class:`google.auth.credentials.Credentials` object and set it as
|
||||
the context credentials as demonstrated in the example below. See
|
||||
`auth docs`_ for more information on obtaining credentials.
|
||||
|
||||
Example:
|
||||
Manually setting the context credentials:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> from google.oauth2 import service_account
|
||||
>>> credentials = (service_account
|
||||
... .Credentials.from_service_account_file(
|
||||
... '/path/to/key.json'))
|
||||
>>> magics.context.credentials = credentials
|
||||
|
||||
|
||||
.. _auth docs: http://google-auth.readthedocs.io
|
||||
/en/latest/user-guide.html#obtaining-credentials
|
||||
"""
|
||||
if self._credentials is None:
|
||||
self._credentials, _ = google.auth.default()
|
||||
return self._credentials
|
||||
|
||||
@credentials.setter
|
||||
def credentials(self, value):
|
||||
self._credentials = value
|
||||
|
||||
@property
|
||||
def project(self):
|
||||
"""str: Default project to use for queries performed through IPython
|
||||
magics.
|
||||
|
||||
Note:
|
||||
The project does not need to be explicitly defined if you have an
|
||||
environment default project set. If you do not have a default
|
||||
project set in your environment, manually assign the project as
|
||||
demonstrated in the example below.
|
||||
|
||||
Example:
|
||||
Manually setting the context project:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> magics.context.project = 'my-project'
|
||||
"""
|
||||
if self._project is None:
|
||||
_, self._project = google.auth.default()
|
||||
return self._project
|
||||
|
||||
@project.setter
|
||||
def project(self, value):
|
||||
self._project = value
|
||||
|
||||
@property
|
||||
def bigquery_client_options(self):
|
||||
"""google.api_core.client_options.ClientOptions: client options to be
|
||||
used through IPython magics.
|
||||
|
||||
Note::
|
||||
The client options do not need to be explicitly defined if no
|
||||
special network connections are required. Normally you would be
|
||||
using the https://bigquery.googleapis.com/ end point.
|
||||
|
||||
Example:
|
||||
Manually setting the endpoint:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> client_options = {}
|
||||
>>> client_options['api_endpoint'] = "https://some.special.url"
|
||||
>>> magics.context.bigquery_client_options = client_options
|
||||
"""
|
||||
return self._bigquery_client_options
|
||||
|
||||
@bigquery_client_options.setter
|
||||
def bigquery_client_options(self, value):
|
||||
self._bigquery_client_options = value
|
||||
|
||||
@property
|
||||
def bqstorage_client_options(self):
|
||||
"""google.api_core.client_options.ClientOptions: client options to be
|
||||
used through IPython magics for the storage client.
|
||||
|
||||
Note::
|
||||
The client options do not need to be explicitly defined if no
|
||||
special network connections are required. Normally you would be
|
||||
using the https://bigquerystorage.googleapis.com/ end point.
|
||||
|
||||
Example:
|
||||
Manually setting the endpoint:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> client_options = {}
|
||||
>>> client_options['api_endpoint'] = "https://some.special.url"
|
||||
>>> magics.context.bqstorage_client_options = client_options
|
||||
"""
|
||||
return self._bqstorage_client_options
|
||||
|
||||
@bqstorage_client_options.setter
|
||||
def bqstorage_client_options(self, value):
|
||||
self._bqstorage_client_options = value
|
||||
|
||||
@property
|
||||
def default_query_job_config(self):
|
||||
"""google.cloud.bigquery.job.QueryJobConfig: Default job
|
||||
configuration for queries.
|
||||
|
||||
The context's :class:`~google.cloud.bigquery.job.QueryJobConfig` is
|
||||
used for queries. Some properties can be overridden with arguments to
|
||||
the magics.
|
||||
|
||||
Example:
|
||||
Manually setting the default value for ``maximum_bytes_billed``
|
||||
to 100 MB:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> magics.context.default_query_job_config.maximum_bytes_billed = 100000000
|
||||
"""
|
||||
return self._default_query_job_config
|
||||
|
||||
@default_query_job_config.setter
|
||||
def default_query_job_config(self, value):
|
||||
self._default_query_job_config = value
|
||||
|
||||
@property
|
||||
def progress_bar_type(self):
|
||||
"""str: Default progress bar type to use to display progress bar while
|
||||
executing queries through IPython magics.
|
||||
|
||||
Note::
|
||||
Install the ``tqdm`` package to use this feature.
|
||||
|
||||
Example:
|
||||
Manually setting the progress_bar_type:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> magics.context.progress_bar_type = "tqdm_notebook"
|
||||
"""
|
||||
return self._progress_bar_type
|
||||
|
||||
@progress_bar_type.setter
|
||||
def progress_bar_type(self, value):
|
||||
self._progress_bar_type = value
|
||||
|
||||
|
||||
# If bigquery_magics is available, we load that extension rather than this one.
|
||||
# Ensure google.cloud.bigquery.magics.context setters are on the correct magics
|
||||
# implementation in case the user has installed the package but hasn't updated
|
||||
# their code.
|
||||
if bigquery_magics is not None:
|
||||
context = bigquery_magics.context
|
||||
else:
|
||||
context = Context()
|
||||
|
||||
|
||||
def _handle_error(error, destination_var=None):
|
||||
"""Process a query execution error.
|
||||
|
||||
Args:
|
||||
error (Exception):
|
||||
An exception that occurred during the query execution.
|
||||
destination_var (Optional[str]):
|
||||
The name of the IPython session variable to store the query job.
|
||||
"""
|
||||
if destination_var:
|
||||
query_job = getattr(error, "query_job", None)
|
||||
|
||||
if query_job is not None:
|
||||
IPython.get_ipython().push({destination_var: query_job})
|
||||
else:
|
||||
# this is the case when previewing table rows by providing just
|
||||
# table ID to cell magic
|
||||
print(
|
||||
"Could not save output to variable '{}'.".format(destination_var),
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
print("\nERROR:\n", str(error), file=sys.stderr)
|
||||
|
||||
|
||||
def _run_query(client, query, job_config=None):
|
||||
"""Runs a query while printing status updates
|
||||
|
||||
Args:
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
Client to bundle configuration needed for API requests.
|
||||
query (str):
|
||||
SQL query to be executed. Defaults to the standard SQL dialect.
|
||||
Use the ``job_config`` parameter to change dialects.
|
||||
job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
|
||||
Extra configuration options for the job.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.job.QueryJob: the query job created
|
||||
|
||||
Example:
|
||||
>>> client = bigquery.Client()
|
||||
>>> _run_query(client, "SELECT 17")
|
||||
Executing query with job ID: bf633912-af2c-4780-b568-5d868058632b
|
||||
Query executing: 1.66s
|
||||
Query complete after 2.07s
|
||||
'bf633912-af2c-4780-b568-5d868058632b'
|
||||
"""
|
||||
start_time = time.perf_counter()
|
||||
query_job = client.query(query, job_config=job_config)
|
||||
|
||||
if job_config and job_config.dry_run:
|
||||
return query_job
|
||||
|
||||
print(f"Executing query with job ID: {query_job.job_id}")
|
||||
|
||||
while True:
|
||||
print(
|
||||
f"\rQuery executing: {time.perf_counter() - start_time:.2f}s".format(),
|
||||
end="",
|
||||
)
|
||||
try:
|
||||
query_job.result(timeout=0.5)
|
||||
break
|
||||
except futures.TimeoutError:
|
||||
continue
|
||||
print(f"\nJob ID {query_job.job_id} successfully executed")
|
||||
return query_job
|
||||
|
||||
|
||||
def _create_dataset_if_necessary(client, dataset_id):
|
||||
"""Create a dataset in the current project if it doesn't exist.
|
||||
|
||||
Args:
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
Client to bundle configuration needed for API requests.
|
||||
dataset_id (str):
|
||||
Dataset id.
|
||||
"""
|
||||
dataset_reference = bigquery.dataset.DatasetReference(client.project, dataset_id)
|
||||
try:
|
||||
dataset = client.get_dataset(dataset_reference)
|
||||
return
|
||||
except NotFound:
|
||||
pass
|
||||
dataset = bigquery.Dataset(dataset_reference)
|
||||
dataset.location = client.location
|
||||
print(f"Creating dataset: {dataset_id}")
|
||||
dataset = client.create_dataset(dataset)
|
||||
|
||||
|
||||
@magic_arguments.magic_arguments()
|
||||
@magic_arguments.argument(
|
||||
"destination_var",
|
||||
nargs="?",
|
||||
help=("If provided, save the output to this variable instead of displaying it."),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--destination_table",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"If provided, save the output of the query to a new BigQuery table. "
|
||||
"Variable should be in a format <dataset_id>.<table_id>. "
|
||||
"If table does not exists, it will be created. "
|
||||
"If table already exists, its data will be overwritten."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--project",
|
||||
type=str,
|
||||
default=None,
|
||||
help=("Project to use for executing this query. Defaults to the context project."),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--max_results",
|
||||
default=None,
|
||||
help=(
|
||||
"Maximum number of rows in dataframe returned from executing the query."
|
||||
"Defaults to returning all rows."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--maximum_bytes_billed",
|
||||
default=None,
|
||||
help=(
|
||||
"maximum_bytes_billed to use for executing this query. Defaults to "
|
||||
"the context default_query_job_config.maximum_bytes_billed."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--dry_run",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Sets query to be a dry run to estimate costs. "
|
||||
"Defaults to executing the query instead of dry run if this argument is not used."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--use_legacy_sql",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Sets query to use Legacy SQL instead of Standard SQL. Defaults to "
|
||||
"Standard SQL if this argument is not used."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--bigquery_api_endpoint",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"The desired API endpoint, e.g., bigquery.googlepis.com. Defaults to this "
|
||||
"option's value in the context bigquery_client_options."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--bqstorage_api_endpoint",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"The desired API endpoint, e.g., bigquerystorage.googlepis.com. Defaults to "
|
||||
"this option's value in the context bqstorage_client_options."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--no_query_cache",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=("Do not use cached query results."),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--use_bqstorage_api",
|
||||
action="store_true",
|
||||
default=None,
|
||||
help=(
|
||||
"[Deprecated] The BigQuery Storage API is already used by default to "
|
||||
"download large query results, and this option has no effect. "
|
||||
"If you want to switch to the classic REST API instead, use the "
|
||||
"--use_rest_api option."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--use_rest_api",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Use the classic REST API instead of the BigQuery Storage API to "
|
||||
"download query results."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"If set, print verbose output, including the query job ID and the "
|
||||
"amount of time for the query to finish. By default, this "
|
||||
"information will be displayed as the query runs, but will be "
|
||||
"cleared after the query is finished."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--params",
|
||||
nargs="+",
|
||||
default=None,
|
||||
help=(
|
||||
"Parameters to format the query string. If present, the --params "
|
||||
"flag should be followed by a string representation of a dictionary "
|
||||
"in the format {'param_name': 'param_value'} (ex. {\"num\": 17}), "
|
||||
"or a reference to a dictionary in the same format. The dictionary "
|
||||
"reference can be made by including a '$' before the variable "
|
||||
"name (ex. $my_dict_var)."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--progress_bar_type",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Sets progress bar type to display a progress bar while executing the query."
|
||||
"Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--location",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Set the location to execute query."
|
||||
"Defaults to location set in query setting in console."
|
||||
),
|
||||
)
|
||||
def _cell_magic(line, query):
|
||||
"""Underlying function for bigquery cell magic
|
||||
|
||||
Note:
|
||||
This function contains the underlying logic for the 'bigquery' cell
|
||||
magic. This function is not meant to be called directly.
|
||||
|
||||
Args:
|
||||
line (str): "%%bigquery" followed by arguments as required
|
||||
query (str): SQL query to run
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: the query results.
|
||||
"""
|
||||
# The built-in parser does not recognize Python structures such as dicts, thus
|
||||
# we extract the "--params" option and inteprpret it separately.
|
||||
try:
|
||||
params_option_value, rest_of_args = _split_args_line(line)
|
||||
except lap.exceptions.QueryParamsParseError as exc:
|
||||
rebranded_error = SyntaxError(
|
||||
"--params is not a correctly formatted JSON string or a JSON "
|
||||
"serializable dictionary"
|
||||
)
|
||||
raise rebranded_error from exc
|
||||
except lap.exceptions.DuplicateQueryParamsError as exc:
|
||||
rebranded_error = ValueError("Duplicate --params option.")
|
||||
raise rebranded_error from exc
|
||||
except lap.exceptions.ParseError as exc:
|
||||
rebranded_error = ValueError(
|
||||
"Unrecognized input, are option values correct? "
|
||||
"Error details: {}".format(exc.args[0])
|
||||
)
|
||||
raise rebranded_error from exc
|
||||
|
||||
args = magic_arguments.parse_argstring(_cell_magic, rest_of_args)
|
||||
|
||||
if args.use_bqstorage_api is not None:
|
||||
warnings.warn(
|
||||
"Deprecated option --use_bqstorage_api, the BigQuery "
|
||||
"Storage API is already used by default.",
|
||||
category=DeprecationWarning,
|
||||
)
|
||||
use_bqstorage_api = not args.use_rest_api
|
||||
location = args.location
|
||||
|
||||
params = []
|
||||
if params_option_value:
|
||||
# A non-existing params variable is not expanded and ends up in the input
|
||||
# in its raw form, e.g. "$query_params".
|
||||
if params_option_value.startswith("$"):
|
||||
msg = 'Parameter expansion failed, undefined variable "{}".'.format(
|
||||
params_option_value[1:]
|
||||
)
|
||||
raise NameError(msg)
|
||||
|
||||
params = _helpers.to_query_parameters(ast.literal_eval(params_option_value), {})
|
||||
|
||||
project = args.project or context.project
|
||||
|
||||
bigquery_client_options = copy.deepcopy(context.bigquery_client_options)
|
||||
if args.bigquery_api_endpoint:
|
||||
if isinstance(bigquery_client_options, dict):
|
||||
bigquery_client_options["api_endpoint"] = args.bigquery_api_endpoint
|
||||
else:
|
||||
bigquery_client_options.api_endpoint = args.bigquery_api_endpoint
|
||||
|
||||
client = bigquery.Client(
|
||||
project=project,
|
||||
credentials=context.credentials,
|
||||
default_query_job_config=context.default_query_job_config,
|
||||
client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
|
||||
client_options=bigquery_client_options,
|
||||
location=location,
|
||||
)
|
||||
if context._connection:
|
||||
client._connection = context._connection
|
||||
|
||||
bqstorage_client_options = copy.deepcopy(context.bqstorage_client_options)
|
||||
if args.bqstorage_api_endpoint:
|
||||
if isinstance(bqstorage_client_options, dict):
|
||||
bqstorage_client_options["api_endpoint"] = args.bqstorage_api_endpoint
|
||||
else:
|
||||
bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint
|
||||
|
||||
bqstorage_client = _make_bqstorage_client(
|
||||
client,
|
||||
use_bqstorage_api,
|
||||
bqstorage_client_options,
|
||||
)
|
||||
|
||||
close_transports = functools.partial(_close_transports, client, bqstorage_client)
|
||||
|
||||
try:
|
||||
if args.max_results:
|
||||
max_results = int(args.max_results)
|
||||
else:
|
||||
max_results = None
|
||||
|
||||
query = query.strip()
|
||||
|
||||
if not query:
|
||||
error = ValueError("Query is missing.")
|
||||
_handle_error(error, args.destination_var)
|
||||
return
|
||||
|
||||
# Check if query is given as a reference to a variable.
|
||||
if query.startswith("$"):
|
||||
query_var_name = query[1:]
|
||||
|
||||
if not query_var_name:
|
||||
missing_msg = 'Missing query variable name, empty "$" is not allowed.'
|
||||
raise NameError(missing_msg)
|
||||
|
||||
if query_var_name.isidentifier():
|
||||
ip = IPython.get_ipython()
|
||||
query = ip.user_ns.get(query_var_name, ip) # ip serves as a sentinel
|
||||
|
||||
if query is ip:
|
||||
raise NameError(
|
||||
f"Unknown query, variable {query_var_name} does not exist."
|
||||
)
|
||||
else:
|
||||
if not isinstance(query, (str, bytes)):
|
||||
raise TypeError(
|
||||
f"Query variable {query_var_name} must be a string "
|
||||
"or a bytes-like value."
|
||||
)
|
||||
|
||||
# Any query that does not contain whitespace (aside from leading and trailing whitespace)
|
||||
# is assumed to be a table id
|
||||
if not re.search(r"\s", query):
|
||||
try:
|
||||
rows = client.list_rows(query, max_results=max_results)
|
||||
except Exception as ex:
|
||||
_handle_error(ex, args.destination_var)
|
||||
return
|
||||
|
||||
result = rows.to_dataframe(
|
||||
bqstorage_client=bqstorage_client,
|
||||
create_bqstorage_client=False,
|
||||
)
|
||||
if args.destination_var:
|
||||
IPython.get_ipython().push({args.destination_var: result})
|
||||
return
|
||||
else:
|
||||
return result
|
||||
|
||||
job_config = bigquery.job.QueryJobConfig()
|
||||
job_config.query_parameters = params
|
||||
job_config.use_legacy_sql = args.use_legacy_sql
|
||||
job_config.dry_run = args.dry_run
|
||||
|
||||
# Don't override context job config unless --no_query_cache is explicitly set.
|
||||
if args.no_query_cache:
|
||||
job_config.use_query_cache = False
|
||||
|
||||
if args.destination_table:
|
||||
split = args.destination_table.split(".")
|
||||
if len(split) != 2:
|
||||
raise ValueError(
|
||||
"--destination_table should be in a <dataset_id>.<table_id> format."
|
||||
)
|
||||
dataset_id, table_id = split
|
||||
job_config.allow_large_results = True
|
||||
dataset_ref = bigquery.dataset.DatasetReference(client.project, dataset_id)
|
||||
destination_table_ref = dataset_ref.table(table_id)
|
||||
job_config.destination = destination_table_ref
|
||||
job_config.create_disposition = "CREATE_IF_NEEDED"
|
||||
job_config.write_disposition = "WRITE_TRUNCATE"
|
||||
_create_dataset_if_necessary(client, dataset_id)
|
||||
|
||||
if args.maximum_bytes_billed == "None":
|
||||
job_config.maximum_bytes_billed = 0
|
||||
elif args.maximum_bytes_billed is not None:
|
||||
value = int(args.maximum_bytes_billed)
|
||||
job_config.maximum_bytes_billed = value
|
||||
|
||||
try:
|
||||
query_job = _run_query(client, query, job_config=job_config)
|
||||
except Exception as ex:
|
||||
_handle_error(ex, args.destination_var)
|
||||
return
|
||||
|
||||
if not args.verbose:
|
||||
display.clear_output()
|
||||
|
||||
if args.dry_run and args.destination_var:
|
||||
IPython.get_ipython().push({args.destination_var: query_job})
|
||||
return
|
||||
elif args.dry_run:
|
||||
print(
|
||||
"Query validated. This query will process {} bytes.".format(
|
||||
query_job.total_bytes_processed
|
||||
)
|
||||
)
|
||||
return query_job
|
||||
|
||||
progress_bar = context.progress_bar_type or args.progress_bar_type
|
||||
|
||||
if max_results:
|
||||
result = query_job.result(max_results=max_results).to_dataframe(
|
||||
bqstorage_client=None,
|
||||
create_bqstorage_client=False,
|
||||
progress_bar_type=progress_bar,
|
||||
)
|
||||
else:
|
||||
result = query_job.to_dataframe(
|
||||
bqstorage_client=bqstorage_client,
|
||||
create_bqstorage_client=False,
|
||||
progress_bar_type=progress_bar,
|
||||
)
|
||||
|
||||
if args.destination_var:
|
||||
IPython.get_ipython().push({args.destination_var: result})
|
||||
else:
|
||||
return result
|
||||
finally:
|
||||
close_transports()
|
||||
|
||||
|
||||
def _split_args_line(line):
|
||||
"""Split out the --params option value from the input line arguments.
|
||||
|
||||
Args:
|
||||
line (str): The line arguments passed to the cell magic.
|
||||
|
||||
Returns:
|
||||
Tuple[str, str]
|
||||
"""
|
||||
lexer = lap.Lexer(line)
|
||||
scanner = lap.Parser(lexer)
|
||||
tree = scanner.input_line()
|
||||
|
||||
extractor = lap.QueryParamsExtractor()
|
||||
params_option_value, rest_of_args = extractor.visit(tree)
|
||||
|
||||
return params_option_value, rest_of_args
|
||||
|
||||
|
||||
def _make_bqstorage_client(client, use_bqstorage_api, client_options):
|
||||
"""Creates a BigQuery Storage client.
|
||||
|
||||
Args:
|
||||
client (:class:`~google.cloud.bigquery.client.Client`): BigQuery client.
|
||||
use_bqstorage_api (bool): whether BigQuery Storage API is used or not.
|
||||
client_options (:class:`google.api_core.client_options.ClientOptions`):
|
||||
Custom options used with a new BigQuery Storage client instance
|
||||
if one is created.
|
||||
|
||||
Raises:
|
||||
ImportError: if google-cloud-bigquery-storage is not installed, or
|
||||
grpcio package is not installed.
|
||||
|
||||
|
||||
Returns:
|
||||
None: if ``use_bqstorage_api == False``, or google-cloud-bigquery-storage
|
||||
is outdated.
|
||||
BigQuery Storage Client:
|
||||
"""
|
||||
if not use_bqstorage_api:
|
||||
return None
|
||||
|
||||
try:
|
||||
_versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True)
|
||||
except exceptions.BigQueryStorageNotFoundError as err:
|
||||
customized_error = ImportError(
|
||||
"The default BigQuery Storage API client cannot be used, install "
|
||||
"the missing google-cloud-bigquery-storage and pyarrow packages "
|
||||
"to use it. Alternatively, use the classic REST API by specifying "
|
||||
"the --use_rest_api magic option."
|
||||
)
|
||||
raise customized_error from err
|
||||
except exceptions.LegacyBigQueryStorageError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from google.api_core.gapic_v1 import client_info as gapic_client_info
|
||||
except ImportError as err:
|
||||
customized_error = ImportError(
|
||||
"Install the grpcio package to use the BigQuery Storage API."
|
||||
)
|
||||
raise customized_error from err
|
||||
|
||||
return client._ensure_bqstorage_client(
|
||||
client_options=client_options,
|
||||
client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
|
||||
)
|
||||
|
||||
|
||||
def _close_transports(client, bqstorage_client):
|
||||
"""Close the given clients' underlying transport channels.
|
||||
|
||||
Closing the transport is needed to release system resources, namely open
|
||||
sockets.
|
||||
|
||||
Args:
|
||||
client (:class:`~google.cloud.bigquery.client.Client`):
|
||||
bqstorage_client
|
||||
(Optional[:class:`~google.cloud.bigquery_storage.BigQueryReadClient`]):
|
||||
A client for the BigQuery Storage API.
|
||||
|
||||
"""
|
||||
client.close()
|
||||
if bqstorage_client is not None:
|
||||
bqstorage_client._transport.grpc_channel.close()
|
||||
@@ -0,0 +1,517 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Define resources for the BigQuery ML Models API."""
|
||||
|
||||
from __future__ import annotations # type: ignore
|
||||
|
||||
import copy
|
||||
import datetime
|
||||
import typing
|
||||
from typing import Any, Dict, Optional, Sequence, Union
|
||||
|
||||
import google.cloud._helpers # type: ignore
|
||||
from google.cloud.bigquery import _helpers
|
||||
from google.cloud.bigquery import standard_sql
|
||||
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
|
||||
|
||||
|
||||
class Model:
|
||||
"""Model represents a machine learning model resource.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/models
|
||||
|
||||
Args:
|
||||
model_ref:
|
||||
A pointer to a model. If ``model_ref`` is a string, it must
|
||||
included a project ID, dataset ID, and model ID, each separated
|
||||
by ``.``.
|
||||
"""
|
||||
|
||||
_PROPERTY_TO_API_FIELD = {
|
||||
"expires": "expirationTime",
|
||||
"friendly_name": "friendlyName",
|
||||
# Even though it's not necessary for field mapping to map when the
|
||||
# property name equals the resource name, we add these here so that we
|
||||
# have an exhaustive list of all mutable properties.
|
||||
"labels": "labels",
|
||||
"description": "description",
|
||||
"encryption_configuration": "encryptionConfiguration",
|
||||
}
|
||||
|
||||
def __init__(self, model_ref: Union["ModelReference", str, None]):
|
||||
# Use _properties on read-write properties to match the REST API
|
||||
# semantics. The BigQuery API makes a distinction between an unset
|
||||
# value, a null value, and a default value (0 or ""), but the protocol
|
||||
# buffer classes do not.
|
||||
self._properties: Dict[str, Any] = {}
|
||||
|
||||
if isinstance(model_ref, str):
|
||||
model_ref = ModelReference.from_string(model_ref)
|
||||
|
||||
if model_ref:
|
||||
self._properties["modelReference"] = model_ref.to_api_repr()
|
||||
|
||||
@property
|
||||
def reference(self) -> Optional["ModelReference"]:
|
||||
"""A model reference pointing to this model.
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
resource = self._properties.get("modelReference")
|
||||
if resource is None:
|
||||
return None
|
||||
else:
|
||||
return ModelReference.from_api_repr(resource)
|
||||
|
||||
@property
|
||||
def project(self) -> Optional[str]:
|
||||
"""Project bound to the model."""
|
||||
ref = self.reference
|
||||
return ref.project if ref is not None else None
|
||||
|
||||
@property
|
||||
def dataset_id(self) -> Optional[str]:
|
||||
"""ID of dataset containing the model."""
|
||||
ref = self.reference
|
||||
return ref.dataset_id if ref is not None else None
|
||||
|
||||
@property
|
||||
def model_id(self) -> Optional[str]:
|
||||
"""The model ID."""
|
||||
ref = self.reference
|
||||
return ref.model_id if ref is not None else None
|
||||
|
||||
@property
|
||||
def path(self) -> Optional[str]:
|
||||
"""URL path for the model's APIs."""
|
||||
ref = self.reference
|
||||
return ref.path if ref is not None else None
|
||||
|
||||
@property
|
||||
def location(self) -> Optional[str]:
|
||||
"""The geographic location where the model resides.
|
||||
|
||||
This value is inherited from the dataset.
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
return typing.cast(Optional[str], self._properties.get("location"))
|
||||
|
||||
@property
|
||||
def etag(self) -> Optional[str]:
|
||||
"""ETag for the model resource (:data:`None` until set from the server).
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
return typing.cast(Optional[str], self._properties.get("etag"))
|
||||
|
||||
@property
|
||||
def created(self) -> Optional[datetime.datetime]:
|
||||
"""Datetime at which the model was created (:data:`None` until set from the server).
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
value = typing.cast(Optional[float], self._properties.get("creationTime"))
|
||||
if value is None:
|
||||
return None
|
||||
else:
|
||||
# value will be in milliseconds.
|
||||
return google.cloud._helpers._datetime_from_microseconds(
|
||||
1000.0 * float(value)
|
||||
)
|
||||
|
||||
@property
|
||||
def modified(self) -> Optional[datetime.datetime]:
|
||||
"""Datetime at which the model was last modified (:data:`None` until set from the server).
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
value = typing.cast(Optional[float], self._properties.get("lastModifiedTime"))
|
||||
if value is None:
|
||||
return None
|
||||
else:
|
||||
# value will be in milliseconds.
|
||||
return google.cloud._helpers._datetime_from_microseconds(
|
||||
1000.0 * float(value)
|
||||
)
|
||||
|
||||
@property
|
||||
def model_type(self) -> str:
|
||||
"""Type of the model resource.
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
return typing.cast(
|
||||
str, self._properties.get("modelType", "MODEL_TYPE_UNSPECIFIED")
|
||||
)
|
||||
|
||||
@property
|
||||
def training_runs(self) -> Sequence[Dict[str, Any]]:
|
||||
"""Information for all training runs in increasing order of start time.
|
||||
|
||||
Dictionaries are in REST API format. See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
return typing.cast(
|
||||
Sequence[Dict[str, Any]], self._properties.get("trainingRuns", [])
|
||||
)
|
||||
|
||||
@property
|
||||
def feature_columns(self) -> Sequence[standard_sql.StandardSqlField]:
|
||||
"""Input feature columns that were used to train this model.
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
resource: Sequence[Dict[str, Any]] = typing.cast(
|
||||
Sequence[Dict[str, Any]], self._properties.get("featureColumns", [])
|
||||
)
|
||||
return [
|
||||
standard_sql.StandardSqlField.from_api_repr(column) for column in resource
|
||||
]
|
||||
|
||||
@property
|
||||
def transform_columns(self) -> Sequence[TransformColumn]:
|
||||
"""The input feature columns that were used to train this model.
|
||||
The output transform columns used to train this model.
|
||||
|
||||
See REST API:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/models#transformcolumn
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
resources: Sequence[Dict[str, Any]] = typing.cast(
|
||||
Sequence[Dict[str, Any]], self._properties.get("transformColumns", [])
|
||||
)
|
||||
return [TransformColumn(resource) for resource in resources]
|
||||
|
||||
@property
|
||||
def label_columns(self) -> Sequence[standard_sql.StandardSqlField]:
|
||||
"""Label columns that were used to train this model.
|
||||
|
||||
The output of the model will have a ``predicted_`` prefix to these columns.
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
resource: Sequence[Dict[str, Any]] = typing.cast(
|
||||
Sequence[Dict[str, Any]], self._properties.get("labelColumns", [])
|
||||
)
|
||||
return [
|
||||
standard_sql.StandardSqlField.from_api_repr(column) for column in resource
|
||||
]
|
||||
|
||||
@property
|
||||
def best_trial_id(self) -> Optional[int]:
|
||||
"""The best trial_id across all training runs.
|
||||
|
||||
.. deprecated::
|
||||
This property is deprecated!
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
value = typing.cast(Optional[int], self._properties.get("bestTrialId"))
|
||||
if value is not None:
|
||||
value = int(value)
|
||||
return value
|
||||
|
||||
@property
|
||||
def expires(self) -> Optional[datetime.datetime]:
|
||||
"""The datetime when this model expires.
|
||||
|
||||
If not present, the model will persist indefinitely. Expired models will be
|
||||
deleted and their storage reclaimed.
|
||||
"""
|
||||
value = typing.cast(Optional[float], self._properties.get("expirationTime"))
|
||||
if value is None:
|
||||
return None
|
||||
else:
|
||||
# value will be in milliseconds.
|
||||
return google.cloud._helpers._datetime_from_microseconds(
|
||||
1000.0 * float(value)
|
||||
)
|
||||
|
||||
@expires.setter
|
||||
def expires(self, value: Optional[datetime.datetime]):
|
||||
if value is None:
|
||||
value_to_store: Optional[str] = None
|
||||
else:
|
||||
value_to_store = str(google.cloud._helpers._millis_from_datetime(value))
|
||||
# TODO: Consider using typing.TypedDict when only Python 3.8+ is supported.
|
||||
self._properties["expirationTime"] = value_to_store # type: ignore
|
||||
|
||||
@property
|
||||
def description(self) -> Optional[str]:
|
||||
"""Description of the model (defaults to :data:`None`)."""
|
||||
return typing.cast(Optional[str], self._properties.get("description"))
|
||||
|
||||
@description.setter
|
||||
def description(self, value: Optional[str]):
|
||||
# TODO: Consider using typing.TypedDict when only Python 3.8+ is supported.
|
||||
self._properties["description"] = value # type: ignore
|
||||
|
||||
@property
|
||||
def friendly_name(self) -> Optional[str]:
|
||||
"""Title of the table (defaults to :data:`None`)."""
|
||||
return typing.cast(Optional[str], self._properties.get("friendlyName"))
|
||||
|
||||
@friendly_name.setter
|
||||
def friendly_name(self, value: Optional[str]):
|
||||
# TODO: Consider using typing.TypedDict when only Python 3.8+ is supported.
|
||||
self._properties["friendlyName"] = value # type: ignore
|
||||
|
||||
@property
|
||||
def labels(self) -> Dict[str, str]:
|
||||
"""Labels for the table.
|
||||
|
||||
This method always returns a dict. To change a model's labels, modify the dict,
|
||||
then call ``Client.update_model``. To delete a label, set its value to
|
||||
:data:`None` before updating.
|
||||
"""
|
||||
return self._properties.setdefault("labels", {})
|
||||
|
||||
@labels.setter
|
||||
def labels(self, value: Optional[Dict[str, str]]):
|
||||
if value is None:
|
||||
value = {}
|
||||
self._properties["labels"] = value
|
||||
|
||||
@property
|
||||
def encryption_configuration(self) -> Optional[EncryptionConfiguration]:
|
||||
"""Custom encryption configuration for the model.
|
||||
|
||||
Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None`
|
||||
if using default encryption.
|
||||
|
||||
See `protecting data with Cloud KMS keys
|
||||
<https://cloud.google.com/bigquery/docs/customer-managed-encryption>`_
|
||||
in the BigQuery documentation.
|
||||
"""
|
||||
prop = self._properties.get("encryptionConfiguration")
|
||||
if prop:
|
||||
prop = EncryptionConfiguration.from_api_repr(prop)
|
||||
return typing.cast(Optional[EncryptionConfiguration], prop)
|
||||
|
||||
@encryption_configuration.setter
|
||||
def encryption_configuration(self, value: Optional[EncryptionConfiguration]):
|
||||
api_repr = value.to_api_repr() if value else value
|
||||
self._properties["encryptionConfiguration"] = api_repr
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: Dict[str, Any]) -> "Model":
|
||||
"""Factory: construct a model resource given its API representation
|
||||
|
||||
Args:
|
||||
resource:
|
||||
Model resource representation from the API
|
||||
|
||||
Returns:
|
||||
Model parsed from ``resource``.
|
||||
"""
|
||||
this = cls(None)
|
||||
resource = copy.deepcopy(resource)
|
||||
this._properties = resource
|
||||
return this
|
||||
|
||||
def _build_resource(self, filter_fields):
|
||||
"""Generate a resource for ``update``."""
|
||||
return _helpers._build_resource_from_properties(self, filter_fields)
|
||||
|
||||
def __repr__(self):
|
||||
return f"Model(reference={self.reference!r})"
|
||||
|
||||
def to_api_repr(self) -> Dict[str, Any]:
|
||||
"""Construct the API resource representation of this model.
|
||||
|
||||
Returns:
|
||||
Model reference represented as an API resource
|
||||
"""
|
||||
return copy.deepcopy(self._properties)
|
||||
|
||||
|
||||
class ModelReference:
|
||||
"""ModelReferences are pointers to models.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/models#modelreference
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._properties = {}
|
||||
|
||||
@property
|
||||
def project(self):
|
||||
"""str: Project bound to the model"""
|
||||
return self._properties.get("projectId")
|
||||
|
||||
@property
|
||||
def dataset_id(self):
|
||||
"""str: ID of dataset containing the model."""
|
||||
return self._properties.get("datasetId")
|
||||
|
||||
@property
|
||||
def model_id(self):
|
||||
"""str: The model ID."""
|
||||
return self._properties.get("modelId")
|
||||
|
||||
@property
|
||||
def path(self) -> str:
|
||||
"""URL path for the model's APIs."""
|
||||
return f"/projects/{self.project}/datasets/{self.dataset_id}/models/{self.model_id}"
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: Dict[str, Any]) -> "ModelReference":
|
||||
"""Factory: construct a model reference given its API representation.
|
||||
|
||||
Args:
|
||||
resource:
|
||||
Model reference representation returned from the API
|
||||
|
||||
Returns:
|
||||
Model reference parsed from ``resource``.
|
||||
"""
|
||||
ref = cls()
|
||||
ref._properties = resource
|
||||
return ref
|
||||
|
||||
@classmethod
|
||||
def from_string(
|
||||
cls, model_id: str, default_project: Optional[str] = None
|
||||
) -> "ModelReference":
|
||||
"""Construct a model reference from model ID string.
|
||||
|
||||
Args:
|
||||
model_id:
|
||||
A model ID in standard SQL format. If ``default_project``
|
||||
is not specified, this must included a project ID, dataset
|
||||
ID, and model ID, each separated by ``.``.
|
||||
default_project:
|
||||
The project ID to use when ``model_id`` does not include
|
||||
a project ID.
|
||||
|
||||
Returns:
|
||||
Model reference parsed from ``model_id``.
|
||||
|
||||
Raises:
|
||||
ValueError:
|
||||
If ``model_id`` is not a fully-qualified table ID in
|
||||
standard SQL format.
|
||||
"""
|
||||
proj, dset, model = _helpers._parse_3_part_id(
|
||||
model_id, default_project=default_project, property_name="model_id"
|
||||
)
|
||||
return cls.from_api_repr(
|
||||
{"projectId": proj, "datasetId": dset, "modelId": model}
|
||||
)
|
||||
|
||||
def to_api_repr(self) -> Dict[str, Any]:
|
||||
"""Construct the API resource representation of this model reference.
|
||||
|
||||
Returns:
|
||||
Model reference represented as an API resource.
|
||||
"""
|
||||
return copy.deepcopy(self._properties)
|
||||
|
||||
def _key(self):
|
||||
"""Unique key for this model.
|
||||
|
||||
This is used for hashing a ModelReference.
|
||||
"""
|
||||
return self.project, self.dataset_id, self.model_id
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, ModelReference):
|
||||
return NotImplemented
|
||||
return self._properties == other._properties
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self._key())
|
||||
|
||||
def __repr__(self):
|
||||
return "ModelReference(project_id='{}', dataset_id='{}', model_id='{}')".format(
|
||||
self.project, self.dataset_id, self.model_id
|
||||
)
|
||||
|
||||
|
||||
class TransformColumn:
|
||||
"""TransformColumn represents a transform column feature.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/models#transformcolumn
|
||||
|
||||
Args:
|
||||
resource:
|
||||
A dictionary representing a transform column feature.
|
||||
"""
|
||||
|
||||
def __init__(self, resource: Dict[str, Any]):
|
||||
self._properties = resource
|
||||
|
||||
@property
|
||||
def name(self) -> Optional[str]:
|
||||
"""Name of the column."""
|
||||
return self._properties.get("name")
|
||||
|
||||
@property
|
||||
def type_(self) -> Optional[standard_sql.StandardSqlDataType]:
|
||||
"""Data type of the column after the transform.
|
||||
|
||||
Returns:
|
||||
Optional[google.cloud.bigquery.standard_sql.StandardSqlDataType]:
|
||||
Data type of the column.
|
||||
"""
|
||||
type_json = self._properties.get("type")
|
||||
if type_json is None:
|
||||
return None
|
||||
return standard_sql.StandardSqlDataType.from_api_repr(type_json)
|
||||
|
||||
@property
|
||||
def transform_sql(self) -> Optional[str]:
|
||||
"""The SQL expression used in the column transform."""
|
||||
return self._properties.get("transformSql")
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: Dict[str, Any]) -> "TransformColumn":
|
||||
"""Constructs a transform column feature given its API representation
|
||||
|
||||
Args:
|
||||
resource:
|
||||
Transform column feature representation from the API
|
||||
|
||||
Returns:
|
||||
Transform column feature parsed from ``resource``.
|
||||
"""
|
||||
this = cls({})
|
||||
resource = copy.deepcopy(resource)
|
||||
this._properties = resource
|
||||
return this
|
||||
|
||||
|
||||
def _model_arg_to_model_ref(value, default_project=None):
|
||||
"""Helper to convert a string or Model to ModelReference.
|
||||
|
||||
This function keeps ModelReference and other kinds of objects unchanged.
|
||||
"""
|
||||
if isinstance(value, str):
|
||||
return ModelReference.from_string(value, default_project=default_project)
|
||||
if isinstance(value, Model):
|
||||
return value.reference
|
||||
return value
|
||||
@@ -0,0 +1,164 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
from contextlib import contextmanager
|
||||
from google.api_core.exceptions import GoogleAPICallError # type: ignore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
try:
|
||||
from opentelemetry import trace # type: ignore
|
||||
from opentelemetry.instrumentation.utils import http_status_to_status_code # type: ignore
|
||||
from opentelemetry.trace.status import Status # type: ignore
|
||||
|
||||
HAS_OPENTELEMETRY = True
|
||||
_warned_telemetry = True
|
||||
|
||||
except ImportError:
|
||||
HAS_OPENTELEMETRY = False
|
||||
_warned_telemetry = False
|
||||
|
||||
_default_attributes = {
|
||||
"db.system": "BigQuery"
|
||||
} # static, default values assigned to all spans
|
||||
|
||||
|
||||
@contextmanager
|
||||
def create_span(name, attributes=None, client=None, job_ref=None):
|
||||
"""Creates a ContextManager for a Span to be exported to the configured exporter.
|
||||
If no configuration exists yields None.
|
||||
|
||||
Args:
|
||||
name (str): Name that will be set for the span being created
|
||||
attributes (Optional[dict]):
|
||||
Additional attributes that pertain to
|
||||
the specific API call (i.e. not a default attribute)
|
||||
client (Optional[google.cloud.bigquery.client.Client]):
|
||||
Pass in a Client object to extract any attributes that may be
|
||||
relevant to it and add them to the created spans.
|
||||
job_ref (Optional[google.cloud.bigquery.job._AsyncJob])
|
||||
Pass in a _AsyncJob object to extract any attributes that may be
|
||||
relevant to it and add them to the created spans.
|
||||
|
||||
Yields:
|
||||
opentelemetry.trace.Span: Yields the newly created Span.
|
||||
|
||||
Raises:
|
||||
google.api_core.exceptions.GoogleAPICallError:
|
||||
Raised if a span could not be yielded or issue with call to
|
||||
OpenTelemetry.
|
||||
"""
|
||||
global _warned_telemetry
|
||||
final_attributes = _get_final_span_attributes(attributes, client, job_ref)
|
||||
if not HAS_OPENTELEMETRY:
|
||||
if not _warned_telemetry:
|
||||
logger.debug(
|
||||
"This service is instrumented using OpenTelemetry. "
|
||||
"OpenTelemetry or one of its components could not be imported; "
|
||||
"please add compatible versions of opentelemetry-api and "
|
||||
"opentelemetry-instrumentation packages in order to get BigQuery "
|
||||
"Tracing data."
|
||||
)
|
||||
_warned_telemetry = True
|
||||
|
||||
yield None
|
||||
return
|
||||
tracer = trace.get_tracer(__name__)
|
||||
|
||||
# yield new span value
|
||||
with tracer.start_as_current_span(name=name, attributes=final_attributes) as span:
|
||||
try:
|
||||
yield span
|
||||
except GoogleAPICallError as error:
|
||||
if error.code is not None:
|
||||
span.set_status(Status(http_status_to_status_code(error.code)))
|
||||
raise
|
||||
|
||||
|
||||
def _get_final_span_attributes(attributes=None, client=None, job_ref=None):
|
||||
"""Compiles attributes from: client, job_ref, user-provided attributes.
|
||||
|
||||
Attributes from all of these sources are merged together. Note the
|
||||
attributes are added sequentially based on perceived order of precedence:
|
||||
i.e. attributes added last may overwrite attributes added earlier.
|
||||
|
||||
Args:
|
||||
attributes (Optional[dict]):
|
||||
Additional attributes that pertain to
|
||||
the specific API call (i.e. not a default attribute)
|
||||
|
||||
client (Optional[google.cloud.bigquery.client.Client]):
|
||||
Pass in a Client object to extract any attributes that may be
|
||||
relevant to it and add them to the final_attributes
|
||||
|
||||
job_ref (Optional[google.cloud.bigquery.job._AsyncJob])
|
||||
Pass in a _AsyncJob object to extract any attributes that may be
|
||||
relevant to it and add them to the final_attributes.
|
||||
|
||||
Returns: dict
|
||||
"""
|
||||
|
||||
collected_attributes = _default_attributes.copy()
|
||||
|
||||
if client:
|
||||
collected_attributes.update(_set_client_attributes(client))
|
||||
if job_ref:
|
||||
collected_attributes.update(_set_job_attributes(job_ref))
|
||||
if attributes:
|
||||
collected_attributes.update(attributes)
|
||||
|
||||
final_attributes = {k: v for k, v in collected_attributes.items() if v is not None}
|
||||
return final_attributes
|
||||
|
||||
|
||||
def _set_client_attributes(client):
|
||||
return {"db.name": client.project, "location": client.location}
|
||||
|
||||
|
||||
def _set_job_attributes(job_ref):
|
||||
job_attributes = {
|
||||
"db.name": job_ref.project,
|
||||
"job_id": job_ref.job_id,
|
||||
"state": job_ref.state,
|
||||
}
|
||||
|
||||
job_attributes["hasErrors"] = job_ref.error_result is not None
|
||||
|
||||
if job_ref.created is not None:
|
||||
job_attributes["timeCreated"] = job_ref.created.isoformat()
|
||||
|
||||
if job_ref.started is not None:
|
||||
job_attributes["timeStarted"] = job_ref.started.isoformat()
|
||||
|
||||
if job_ref.ended is not None:
|
||||
job_attributes["timeEnded"] = job_ref.ended.isoformat()
|
||||
|
||||
if job_ref.location is not None:
|
||||
job_attributes["location"] = job_ref.location
|
||||
|
||||
if job_ref.parent_job_id is not None:
|
||||
job_attributes["parent_job_id"] = job_ref.parent_job_id
|
||||
|
||||
if job_ref.num_child_jobs is not None:
|
||||
job_attributes["num_child_jobs"] = job_ref.num_child_jobs
|
||||
|
||||
total_bytes_billed = getattr(job_ref, "total_bytes_billed", None)
|
||||
if total_bytes_billed is not None:
|
||||
job_attributes["total_bytes_billed"] = total_bytes_billed
|
||||
|
||||
total_bytes_processed = getattr(job_ref, "total_bytes_processed", None)
|
||||
if total_bytes_processed is not None:
|
||||
job_attributes["total_bytes_processed"] = total_bytes_processed
|
||||
|
||||
return job_attributes
|
||||
@@ -0,0 +1,2 @@
|
||||
# Marker file for PEP 561.
|
||||
# The google-cloud-bigquery package uses inline types.
|
||||
1344
.venv/lib/python3.10/site-packages/google/cloud/bigquery/query.py
Normal file
1344
.venv/lib/python3.10/site-packages/google/cloud/bigquery/query.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,207 @@
|
||||
# Copyright 2018 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from google.api_core import exceptions
|
||||
from google.api_core import retry
|
||||
import google.api_core.future.polling
|
||||
from google.auth import exceptions as auth_exceptions # type: ignore
|
||||
import requests.exceptions
|
||||
|
||||
|
||||
_RETRYABLE_REASONS = frozenset(
|
||||
["rateLimitExceeded", "backendError", "internalError", "badGateway"]
|
||||
)
|
||||
|
||||
_UNSTRUCTURED_RETRYABLE_TYPES = (
|
||||
ConnectionError,
|
||||
exceptions.TooManyRequests,
|
||||
exceptions.InternalServerError,
|
||||
exceptions.BadGateway,
|
||||
exceptions.ServiceUnavailable,
|
||||
requests.exceptions.ChunkedEncodingError,
|
||||
requests.exceptions.ConnectionError,
|
||||
requests.exceptions.Timeout,
|
||||
auth_exceptions.TransportError,
|
||||
)
|
||||
|
||||
_DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes
|
||||
|
||||
# Ambiguous errors (e.g. internalError, backendError, rateLimitExceeded) retry
|
||||
# until the full `_DEFAULT_RETRY_DEADLINE`. This is because the
|
||||
# `jobs.getQueryResults` REST API translates a job failure into an HTTP error.
|
||||
#
|
||||
# TODO(https://github.com/googleapis/python-bigquery/issues/1903): Investigate
|
||||
# if we can fail early for ambiguous errors in `QueryJob.result()`'s call to
|
||||
# the `jobs.getQueryResult` API.
|
||||
#
|
||||
# We need `_DEFAULT_JOB_DEADLINE` to be some multiple of
|
||||
# `_DEFAULT_RETRY_DEADLINE` to allow for a few retries after the retry
|
||||
# timeout is reached.
|
||||
#
|
||||
# Note: This multiple should actually be a multiple of
|
||||
# (2 * _DEFAULT_RETRY_DEADLINE). After an ambiguous exception, the first
|
||||
# call from `job_retry()` refreshes the job state without actually restarting
|
||||
# the query. The second `job_retry()` actually restarts the query. For a more
|
||||
# detailed explanation, see the comments where we set `restart_query_job = True`
|
||||
# in `QueryJob.result()`'s inner `is_job_done()` function.
|
||||
_DEFAULT_JOB_DEADLINE = 2.0 * (2.0 * _DEFAULT_RETRY_DEADLINE)
|
||||
|
||||
|
||||
def _should_retry(exc):
|
||||
"""Predicate for determining when to retry.
|
||||
|
||||
We retry if and only if the 'reason' is 'backendError'
|
||||
or 'rateLimitExceeded'.
|
||||
"""
|
||||
if not hasattr(exc, "errors") or len(exc.errors) == 0:
|
||||
# Check for unstructured error returns, e.g. from GFE
|
||||
return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES)
|
||||
|
||||
reason = exc.errors[0]["reason"]
|
||||
return reason in _RETRYABLE_REASONS
|
||||
|
||||
|
||||
DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE)
|
||||
"""The default retry object.
|
||||
|
||||
Any method with a ``retry`` parameter will be retried automatically,
|
||||
with reasonable defaults. To disable retry, pass ``retry=None``.
|
||||
To modify the default retry behavior, call a ``with_XXX`` method
|
||||
on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds,
|
||||
pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``.
|
||||
"""
|
||||
|
||||
|
||||
def _should_retry_get_job_conflict(exc):
|
||||
"""Predicate for determining when to retry a jobs.get call after a conflict error.
|
||||
|
||||
Sometimes we get a 404 after a Conflict. In this case, we
|
||||
have pretty high confidence that by retrying the 404, we'll
|
||||
(hopefully) eventually recover the job.
|
||||
https://github.com/googleapis/python-bigquery/issues/2134
|
||||
|
||||
Note: we may be able to extend this to user-specified predicates
|
||||
after https://github.com/googleapis/python-api-core/issues/796
|
||||
to tweak existing Retry object predicates.
|
||||
"""
|
||||
return isinstance(exc, exceptions.NotFound) or _should_retry(exc)
|
||||
|
||||
|
||||
# Pick a deadline smaller than our other deadlines since we want to timeout
|
||||
# before those expire.
|
||||
_DEFAULT_GET_JOB_CONFLICT_DEADLINE = _DEFAULT_RETRY_DEADLINE / 3.0
|
||||
_DEFAULT_GET_JOB_CONFLICT_RETRY = retry.Retry(
|
||||
predicate=_should_retry_get_job_conflict,
|
||||
deadline=_DEFAULT_GET_JOB_CONFLICT_DEADLINE,
|
||||
)
|
||||
"""Private, may be removed in future."""
|
||||
|
||||
|
||||
# Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We
|
||||
# briefly had a default timeout, but even setting it at more than twice the
|
||||
# theoretical server-side default timeout of 2 minutes was not enough for
|
||||
# complex queries. See:
|
||||
# https://github.com/googleapis/python-bigquery/issues/970#issuecomment-921934647
|
||||
DEFAULT_TIMEOUT = None
|
||||
"""The default API timeout.
|
||||
|
||||
This is the time to wait per request. To adjust the total wait time, set a
|
||||
deadline on the retry object.
|
||||
"""
|
||||
|
||||
job_retry_reasons = (
|
||||
"rateLimitExceeded",
|
||||
"backendError",
|
||||
"internalError",
|
||||
"jobRateLimitExceeded",
|
||||
)
|
||||
|
||||
|
||||
def _job_should_retry(exc):
|
||||
# Sometimes we have ambiguous errors, such as 'backendError' which could
|
||||
# be due to an API problem or a job problem. For these, make sure we retry
|
||||
# our is_job_done() function.
|
||||
#
|
||||
# Note: This won't restart the job unless we know for sure it's because of
|
||||
# the job status and set restart_query_job = True in that loop. This means
|
||||
# that we might end up calling this predicate twice for the same job
|
||||
# but from different paths: (1) from jobs.getQueryResults RetryError and
|
||||
# (2) from translating the job error from the body of a jobs.get response.
|
||||
#
|
||||
# Note: If we start retrying job types other than queries where we don't
|
||||
# call the problematic getQueryResults API to check the status, we need
|
||||
# to provide a different predicate, as there shouldn't be ambiguous
|
||||
# errors in those cases.
|
||||
if isinstance(exc, exceptions.RetryError):
|
||||
exc = exc.cause
|
||||
|
||||
# Per https://github.com/googleapis/python-bigquery/issues/1929, sometimes
|
||||
# retriable errors make their way here. Because of the separate
|
||||
# `restart_query_job` logic to make sure we aren't restarting non-failed
|
||||
# jobs, it should be safe to continue and not totally fail our attempt at
|
||||
# waiting for the query to complete.
|
||||
if _should_retry(exc):
|
||||
return True
|
||||
|
||||
if not hasattr(exc, "errors") or len(exc.errors) == 0:
|
||||
return False
|
||||
|
||||
reason = exc.errors[0]["reason"]
|
||||
return reason in job_retry_reasons
|
||||
|
||||
|
||||
DEFAULT_JOB_RETRY = retry.Retry(
|
||||
predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE
|
||||
)
|
||||
"""
|
||||
The default job retry object.
|
||||
"""
|
||||
|
||||
|
||||
def _query_job_insert_should_retry(exc):
|
||||
# Per https://github.com/googleapis/python-bigquery/issues/2134, sometimes
|
||||
# we get a 404 error. In this case, if we get this far, assume that the job
|
||||
# doesn't actually exist and try again. We can't add 404 to the default
|
||||
# job_retry because that happens for errors like "this table does not
|
||||
# exist", which probably won't resolve with a retry.
|
||||
if isinstance(exc, exceptions.RetryError):
|
||||
exc = exc.cause
|
||||
|
||||
if isinstance(exc, exceptions.NotFound):
|
||||
message = exc.message
|
||||
# Don't try to retry table/dataset not found, just job not found.
|
||||
# The URL contains jobs, so use whitespace to disambiguate.
|
||||
return message is not None and " job" in message.lower()
|
||||
|
||||
return _job_should_retry(exc)
|
||||
|
||||
|
||||
_DEFAULT_QUERY_JOB_INSERT_RETRY = retry.Retry(
|
||||
predicate=_query_job_insert_should_retry,
|
||||
# jobs.insert doesn't wait for the job to complete, so we don't need the
|
||||
# long _DEFAULT_JOB_DEADLINE for this part.
|
||||
deadline=_DEFAULT_RETRY_DEADLINE,
|
||||
)
|
||||
"""Private, may be removed in future."""
|
||||
|
||||
|
||||
DEFAULT_GET_JOB_TIMEOUT = 128
|
||||
"""
|
||||
Default timeout for Client.get_job().
|
||||
"""
|
||||
|
||||
POLLING_DEFAULT_VALUE = google.api_core.future.polling.PollingFuture._DEFAULT_VALUE
|
||||
"""
|
||||
Default value defined in google.api_core.future.polling.PollingFuture.
|
||||
"""
|
||||
@@ -0,0 +1,33 @@
|
||||
# Copyright 2021 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""User-Defined Routines."""
|
||||
|
||||
|
||||
from google.cloud.bigquery.enums import DeterminismLevel
|
||||
from google.cloud.bigquery.routine.routine import Routine
|
||||
from google.cloud.bigquery.routine.routine import RoutineArgument
|
||||
from google.cloud.bigquery.routine.routine import RoutineReference
|
||||
from google.cloud.bigquery.routine.routine import RoutineType
|
||||
from google.cloud.bigquery.routine.routine import RemoteFunctionOptions
|
||||
|
||||
|
||||
__all__ = (
|
||||
"DeterminismLevel",
|
||||
"Routine",
|
||||
"RoutineArgument",
|
||||
"RoutineReference",
|
||||
"RoutineType",
|
||||
"RemoteFunctionOptions",
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,744 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Define resources for the BigQuery Routines API."""
|
||||
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
import google.cloud._helpers # type: ignore
|
||||
from google.cloud.bigquery import _helpers
|
||||
from google.cloud.bigquery.standard_sql import StandardSqlDataType
|
||||
from google.cloud.bigquery.standard_sql import StandardSqlTableType
|
||||
|
||||
|
||||
class RoutineType:
|
||||
"""The fine-grained type of the routine.
|
||||
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#routinetype
|
||||
|
||||
.. versionadded:: 2.22.0
|
||||
"""
|
||||
|
||||
ROUTINE_TYPE_UNSPECIFIED = "ROUTINE_TYPE_UNSPECIFIED"
|
||||
SCALAR_FUNCTION = "SCALAR_FUNCTION"
|
||||
PROCEDURE = "PROCEDURE"
|
||||
TABLE_VALUED_FUNCTION = "TABLE_VALUED_FUNCTION"
|
||||
|
||||
|
||||
class Routine(object):
|
||||
"""Resource representing a user-defined routine.
|
||||
|
||||
See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/routines
|
||||
|
||||
Args:
|
||||
routine_ref (Union[str, google.cloud.bigquery.routine.RoutineReference]):
|
||||
A pointer to a routine. If ``routine_ref`` is a string, it must
|
||||
included a project ID, dataset ID, and routine ID, each separated
|
||||
by ``.``.
|
||||
``**kwargs`` (Dict):
|
||||
Initial property values.
|
||||
"""
|
||||
|
||||
_PROPERTY_TO_API_FIELD = {
|
||||
"arguments": "arguments",
|
||||
"body": "definitionBody",
|
||||
"created": "creationTime",
|
||||
"etag": "etag",
|
||||
"imported_libraries": "importedLibraries",
|
||||
"language": "language",
|
||||
"modified": "lastModifiedTime",
|
||||
"reference": "routineReference",
|
||||
"return_type": "returnType",
|
||||
"return_table_type": "returnTableType",
|
||||
"type_": "routineType",
|
||||
"description": "description",
|
||||
"determinism_level": "determinismLevel",
|
||||
"remote_function_options": "remoteFunctionOptions",
|
||||
"data_governance_type": "dataGovernanceType",
|
||||
}
|
||||
|
||||
def __init__(self, routine_ref, **kwargs) -> None:
|
||||
if isinstance(routine_ref, str):
|
||||
routine_ref = RoutineReference.from_string(routine_ref)
|
||||
|
||||
self._properties = {"routineReference": routine_ref.to_api_repr()}
|
||||
for property_name in kwargs:
|
||||
setattr(self, property_name, kwargs[property_name])
|
||||
|
||||
@property
|
||||
def reference(self):
|
||||
"""google.cloud.bigquery.routine.RoutineReference: Reference
|
||||
describing the ID of this routine.
|
||||
"""
|
||||
return RoutineReference.from_api_repr(
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["reference"]]
|
||||
)
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
"""str: URL path for the routine's APIs."""
|
||||
return self.reference.path
|
||||
|
||||
@property
|
||||
def project(self):
|
||||
"""str: ID of the project containing the routine."""
|
||||
return self.reference.project
|
||||
|
||||
@property
|
||||
def dataset_id(self):
|
||||
"""str: ID of dataset containing the routine."""
|
||||
return self.reference.dataset_id
|
||||
|
||||
@property
|
||||
def routine_id(self):
|
||||
"""str: The routine ID."""
|
||||
return self.reference.routine_id
|
||||
|
||||
@property
|
||||
def etag(self):
|
||||
"""str: ETag for the resource (:data:`None` until set from the
|
||||
server).
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"])
|
||||
|
||||
@property
|
||||
def type_(self):
|
||||
"""str: The fine-grained type of the routine.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#RoutineType
|
||||
"""
|
||||
return self._properties.get(self._PROPERTY_TO_API_FIELD["type_"])
|
||||
|
||||
@type_.setter
|
||||
def type_(self, value):
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["type_"]] = value
|
||||
|
||||
@property
|
||||
def created(self):
|
||||
"""Optional[datetime.datetime]: Datetime at which the routine was
|
||||
created (:data:`None` until set from the server).
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
value = self._properties.get(self._PROPERTY_TO_API_FIELD["created"])
|
||||
if value is not None and value != 0:
|
||||
# value will be in milliseconds.
|
||||
return google.cloud._helpers._datetime_from_microseconds(
|
||||
1000.0 * float(value)
|
||||
)
|
||||
|
||||
@property
|
||||
def modified(self):
|
||||
"""Optional[datetime.datetime]: Datetime at which the routine was
|
||||
last modified (:data:`None` until set from the server).
|
||||
|
||||
Read-only.
|
||||
"""
|
||||
value = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"])
|
||||
if value is not None and value != 0:
|
||||
# value will be in milliseconds.
|
||||
return google.cloud._helpers._datetime_from_microseconds(
|
||||
1000.0 * float(value)
|
||||
)
|
||||
|
||||
@property
|
||||
def language(self):
|
||||
"""Optional[str]: The language of the routine.
|
||||
|
||||
Defaults to ``SQL``.
|
||||
"""
|
||||
return self._properties.get(self._PROPERTY_TO_API_FIELD["language"])
|
||||
|
||||
@language.setter
|
||||
def language(self, value):
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["language"]] = value
|
||||
|
||||
@property
|
||||
def arguments(self):
|
||||
"""List[google.cloud.bigquery.routine.RoutineArgument]: Input/output
|
||||
argument of a function or a stored procedure.
|
||||
|
||||
In-place modification is not supported. To set, replace the entire
|
||||
property value with the modified list of
|
||||
:class:`~google.cloud.bigquery.routine.RoutineArgument` objects.
|
||||
"""
|
||||
resources = self._properties.get(self._PROPERTY_TO_API_FIELD["arguments"], [])
|
||||
return [RoutineArgument.from_api_repr(resource) for resource in resources]
|
||||
|
||||
@arguments.setter
|
||||
def arguments(self, value):
|
||||
if not value:
|
||||
resource = []
|
||||
else:
|
||||
resource = [argument.to_api_repr() for argument in value]
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["arguments"]] = resource
|
||||
|
||||
@property
|
||||
def return_type(self):
|
||||
"""google.cloud.bigquery.StandardSqlDataType: Return type of
|
||||
the routine.
|
||||
|
||||
If absent, the return type is inferred from
|
||||
:attr:`~google.cloud.bigquery.routine.Routine.body` at query time in
|
||||
each query that references this routine. If present, then the
|
||||
evaluated result will be cast to the specified returned type at query
|
||||
time.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#Routine.FIELDS.return_type
|
||||
"""
|
||||
resource = self._properties.get(self._PROPERTY_TO_API_FIELD["return_type"])
|
||||
if not resource:
|
||||
return resource
|
||||
|
||||
return StandardSqlDataType.from_api_repr(resource)
|
||||
|
||||
@return_type.setter
|
||||
def return_type(self, value: StandardSqlDataType):
|
||||
resource = None if not value else value.to_api_repr()
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource
|
||||
|
||||
@property
|
||||
def return_table_type(self) -> Union[StandardSqlTableType, Any, None]:
|
||||
"""The return type of a Table Valued Function (TVF) routine.
|
||||
|
||||
.. versionadded:: 2.22.0
|
||||
"""
|
||||
resource = self._properties.get(
|
||||
self._PROPERTY_TO_API_FIELD["return_table_type"]
|
||||
)
|
||||
if not resource:
|
||||
return resource
|
||||
|
||||
return StandardSqlTableType.from_api_repr(resource)
|
||||
|
||||
@return_table_type.setter
|
||||
def return_table_type(self, value: Optional[StandardSqlTableType]):
|
||||
if not value:
|
||||
resource = None
|
||||
else:
|
||||
resource = value.to_api_repr()
|
||||
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["return_table_type"]] = resource
|
||||
|
||||
@property
|
||||
def imported_libraries(self):
|
||||
"""List[str]: The path of the imported JavaScript libraries.
|
||||
|
||||
The :attr:`~google.cloud.bigquery.routine.Routine.language` must
|
||||
equal ``JAVACRIPT``.
|
||||
|
||||
Examples:
|
||||
Set the ``imported_libraries`` to a list of Google Cloud Storage
|
||||
URIs.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
routine = bigquery.Routine("proj.dataset.routine_id")
|
||||
routine.imported_libraries = [
|
||||
"gs://cloud-samples-data/bigquery/udfs/max-value.js",
|
||||
]
|
||||
"""
|
||||
return self._properties.get(
|
||||
self._PROPERTY_TO_API_FIELD["imported_libraries"], []
|
||||
)
|
||||
|
||||
@imported_libraries.setter
|
||||
def imported_libraries(self, value):
|
||||
if not value:
|
||||
resource = []
|
||||
else:
|
||||
resource = value
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["imported_libraries"]] = resource
|
||||
|
||||
@property
|
||||
def body(self):
|
||||
"""str: The body of the routine."""
|
||||
return self._properties.get(self._PROPERTY_TO_API_FIELD["body"])
|
||||
|
||||
@body.setter
|
||||
def body(self, value):
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["body"]] = value
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
"""Optional[str]: Description of the routine (defaults to
|
||||
:data:`None`).
|
||||
"""
|
||||
return self._properties.get(self._PROPERTY_TO_API_FIELD["description"])
|
||||
|
||||
@description.setter
|
||||
def description(self, value):
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value
|
||||
|
||||
@property
|
||||
def determinism_level(self):
|
||||
"""Optional[str]: (experimental) The determinism level of the JavaScript UDF
|
||||
if defined.
|
||||
"""
|
||||
return self._properties.get(self._PROPERTY_TO_API_FIELD["determinism_level"])
|
||||
|
||||
@determinism_level.setter
|
||||
def determinism_level(self, value):
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value
|
||||
|
||||
@property
|
||||
def remote_function_options(self):
|
||||
"""Optional[google.cloud.bigquery.routine.RemoteFunctionOptions]:
|
||||
Configures remote function options for a routine.
|
||||
|
||||
Raises:
|
||||
ValueError:
|
||||
If the value is not
|
||||
:class:`~google.cloud.bigquery.routine.RemoteFunctionOptions` or
|
||||
:data:`None`.
|
||||
"""
|
||||
prop = self._properties.get(
|
||||
self._PROPERTY_TO_API_FIELD["remote_function_options"]
|
||||
)
|
||||
if prop is not None:
|
||||
return RemoteFunctionOptions.from_api_repr(prop)
|
||||
|
||||
@remote_function_options.setter
|
||||
def remote_function_options(self, value):
|
||||
api_repr = value
|
||||
if isinstance(value, RemoteFunctionOptions):
|
||||
api_repr = value.to_api_repr()
|
||||
elif value is not None:
|
||||
raise ValueError(
|
||||
"value must be google.cloud.bigquery.routine.RemoteFunctionOptions "
|
||||
"or None"
|
||||
)
|
||||
self._properties[
|
||||
self._PROPERTY_TO_API_FIELD["remote_function_options"]
|
||||
] = api_repr
|
||||
|
||||
@property
|
||||
def data_governance_type(self):
|
||||
"""Optional[str]: If set to ``DATA_MASKING``, the function is validated
|
||||
and made available as a masking function.
|
||||
|
||||
Raises:
|
||||
ValueError:
|
||||
If the value is not :data:`string` or :data:`None`.
|
||||
"""
|
||||
return self._properties.get(self._PROPERTY_TO_API_FIELD["data_governance_type"])
|
||||
|
||||
@data_governance_type.setter
|
||||
def data_governance_type(self, value):
|
||||
if value is not None and not isinstance(value, str):
|
||||
raise ValueError(
|
||||
"invalid data_governance_type, must be a string or `None`."
|
||||
)
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["data_governance_type"]] = value
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: dict) -> "Routine":
|
||||
"""Factory: construct a routine given its API representation.
|
||||
|
||||
Args:
|
||||
resource (Dict[str, object]):
|
||||
Resource, as returned from the API.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.routine.Routine:
|
||||
Python object, as parsed from ``resource``.
|
||||
"""
|
||||
ref = cls(RoutineReference.from_api_repr(resource["routineReference"]))
|
||||
ref._properties = resource
|
||||
return ref
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Construct the API resource representation of this routine.
|
||||
|
||||
Returns:
|
||||
Dict[str, object]: Routine represented as an API resource.
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
def _build_resource(self, filter_fields):
|
||||
"""Generate a resource for ``update``."""
|
||||
return _helpers._build_resource_from_properties(self, filter_fields)
|
||||
|
||||
def __repr__(self):
|
||||
return "Routine('{}.{}.{}')".format(
|
||||
self.project, self.dataset_id, self.routine_id
|
||||
)
|
||||
|
||||
|
||||
class RoutineArgument(object):
|
||||
"""Input/output argument of a function or a stored procedure.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#argument
|
||||
|
||||
Args:
|
||||
``**kwargs`` (Dict):
|
||||
Initial property values.
|
||||
"""
|
||||
|
||||
_PROPERTY_TO_API_FIELD = {
|
||||
"data_type": "dataType",
|
||||
"kind": "argumentKind",
|
||||
# Even though it's not necessary for field mapping to map when the
|
||||
# property name equals the resource name, we add these here so that we
|
||||
# have an exhaustive list of all properties.
|
||||
"name": "name",
|
||||
"mode": "mode",
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
self._properties: Dict[str, Any] = {}
|
||||
for property_name in kwargs:
|
||||
setattr(self, property_name, kwargs[property_name])
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""Optional[str]: Name of this argument.
|
||||
|
||||
Can be absent for function return argument.
|
||||
"""
|
||||
return self._properties.get(self._PROPERTY_TO_API_FIELD["name"])
|
||||
|
||||
@name.setter
|
||||
def name(self, value):
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["name"]] = value
|
||||
|
||||
@property
|
||||
def kind(self):
|
||||
"""Optional[str]: The kind of argument, for example ``FIXED_TYPE`` or
|
||||
``ANY_TYPE``.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#Argument.FIELDS.argument_kind
|
||||
"""
|
||||
return self._properties.get(self._PROPERTY_TO_API_FIELD["kind"])
|
||||
|
||||
@kind.setter
|
||||
def kind(self, value):
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["kind"]] = value
|
||||
|
||||
@property
|
||||
def mode(self):
|
||||
"""Optional[str]: The input/output mode of the argument."""
|
||||
return self._properties.get(self._PROPERTY_TO_API_FIELD["mode"])
|
||||
|
||||
@mode.setter
|
||||
def mode(self, value):
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["mode"]] = value
|
||||
|
||||
@property
|
||||
def data_type(self):
|
||||
"""Optional[google.cloud.bigquery.StandardSqlDataType]: Type
|
||||
of a variable, e.g., a function argument.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#Argument.FIELDS.data_type
|
||||
"""
|
||||
resource = self._properties.get(self._PROPERTY_TO_API_FIELD["data_type"])
|
||||
if not resource:
|
||||
return resource
|
||||
|
||||
return StandardSqlDataType.from_api_repr(resource)
|
||||
|
||||
@data_type.setter
|
||||
def data_type(self, value):
|
||||
if value:
|
||||
resource = value.to_api_repr()
|
||||
else:
|
||||
resource = None
|
||||
self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: dict) -> "RoutineArgument":
|
||||
"""Factory: construct a routine argument given its API representation.
|
||||
|
||||
Args:
|
||||
resource (Dict[str, object]): Resource, as returned from the API.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.routine.RoutineArgument:
|
||||
Python object, as parsed from ``resource``.
|
||||
"""
|
||||
ref = cls()
|
||||
ref._properties = resource
|
||||
return ref
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Construct the API resource representation of this routine argument.
|
||||
|
||||
Returns:
|
||||
Dict[str, object]: Routine argument represented as an API resource.
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, RoutineArgument):
|
||||
return NotImplemented
|
||||
return self._properties == other._properties
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
all_properties = [
|
||||
"{}={}".format(property_name, repr(getattr(self, property_name)))
|
||||
for property_name in sorted(self._PROPERTY_TO_API_FIELD)
|
||||
]
|
||||
return "RoutineArgument({})".format(", ".join(all_properties))
|
||||
|
||||
|
||||
class RoutineReference(object):
|
||||
"""A pointer to a routine.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#routinereference
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._properties = {}
|
||||
|
||||
@property
|
||||
def project(self):
|
||||
"""str: ID of the project containing the routine."""
|
||||
# TODO: The typehinting for this needs work. Setting this pragma to temporarily
|
||||
# manage a pytype issue that came up in another PR. See Issue: #2132
|
||||
return self._properties["projectId"] # pytype: disable=typed-dict-error
|
||||
|
||||
@property
|
||||
def dataset_id(self):
|
||||
"""str: ID of dataset containing the routine."""
|
||||
# TODO: The typehinting for this needs work. Setting this pragma to temporarily
|
||||
# manage a pytype issue that came up in another PR. See Issue: #2132
|
||||
return self._properties["datasetId"] # pytype: disable=typed-dict-error
|
||||
|
||||
@property
|
||||
def routine_id(self):
|
||||
"""str: The routine ID."""
|
||||
# TODO: The typehinting for this needs work. Setting this pragma to temporarily
|
||||
# manage a pytype issue that came up in another PR. See Issue: #2132
|
||||
return self._properties["routineId"] # pytype: disable=typed-dict-error
|
||||
|
||||
@property
|
||||
def path(self):
|
||||
"""str: URL path for the routine's APIs."""
|
||||
return "/projects/%s/datasets/%s/routines/%s" % (
|
||||
self.project,
|
||||
self.dataset_id,
|
||||
self.routine_id,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: dict) -> "RoutineReference":
|
||||
"""Factory: construct a routine reference given its API representation.
|
||||
|
||||
Args:
|
||||
resource (Dict[str, object]):
|
||||
Routine reference representation returned from the API.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.routine.RoutineReference:
|
||||
Routine reference parsed from ``resource``.
|
||||
"""
|
||||
ref = cls()
|
||||
ref._properties = resource
|
||||
return ref
|
||||
|
||||
@classmethod
|
||||
def from_string(
|
||||
cls, routine_id: str, default_project: Optional[str] = None
|
||||
) -> "RoutineReference":
|
||||
"""Factory: construct a routine reference from routine ID string.
|
||||
|
||||
Args:
|
||||
routine_id (str):
|
||||
A routine ID in standard SQL format. If ``default_project``
|
||||
is not specified, this must included a project ID, dataset
|
||||
ID, and routine ID, each separated by ``.``.
|
||||
default_project (Optional[str]):
|
||||
The project ID to use when ``routine_id`` does not
|
||||
include a project ID.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.routine.RoutineReference:
|
||||
Routine reference parsed from ``routine_id``.
|
||||
|
||||
Raises:
|
||||
ValueError:
|
||||
If ``routine_id`` is not a fully-qualified routine ID in
|
||||
standard SQL format.
|
||||
"""
|
||||
proj, dset, routine = _helpers._parse_3_part_id(
|
||||
routine_id, default_project=default_project, property_name="routine_id"
|
||||
)
|
||||
return cls.from_api_repr(
|
||||
{"projectId": proj, "datasetId": dset, "routineId": routine}
|
||||
)
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Construct the API resource representation of this routine reference.
|
||||
|
||||
Returns:
|
||||
Dict[str, object]: Routine reference represented as an API resource.
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
def __eq__(self, other):
|
||||
"""Two RoutineReferences are equal if they point to the same routine."""
|
||||
if not isinstance(other, RoutineReference):
|
||||
return NotImplemented
|
||||
return str(self) == str(other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return "RoutineReference.from_string('{}')".format(str(self))
|
||||
|
||||
def __str__(self):
|
||||
"""String representation of the reference.
|
||||
|
||||
This is a fully-qualified ID, including the project ID and dataset ID.
|
||||
"""
|
||||
return "{}.{}.{}".format(self.project, self.dataset_id, self.routine_id)
|
||||
|
||||
|
||||
class RemoteFunctionOptions(object):
|
||||
"""Configuration options for controlling remote BigQuery functions."""
|
||||
|
||||
_PROPERTY_TO_API_FIELD = {
|
||||
"endpoint": "endpoint",
|
||||
"connection": "connection",
|
||||
"max_batching_rows": "maxBatchingRows",
|
||||
"user_defined_context": "userDefinedContext",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
endpoint=None,
|
||||
connection=None,
|
||||
max_batching_rows=None,
|
||||
user_defined_context=None,
|
||||
_properties=None,
|
||||
) -> None:
|
||||
if _properties is None:
|
||||
_properties = {}
|
||||
self._properties = _properties
|
||||
|
||||
if endpoint is not None:
|
||||
self.endpoint = endpoint
|
||||
if connection is not None:
|
||||
self.connection = connection
|
||||
if max_batching_rows is not None:
|
||||
self.max_batching_rows = max_batching_rows
|
||||
if user_defined_context is not None:
|
||||
self.user_defined_context = user_defined_context
|
||||
|
||||
@property
|
||||
def connection(self):
|
||||
"""string: Fully qualified name of the user-provided connection object which holds the authentication information to send requests to the remote service.
|
||||
|
||||
Format is "projects/{projectId}/locations/{locationId}/connections/{connectionId}"
|
||||
"""
|
||||
return _helpers._str_or_none(self._properties.get("connection"))
|
||||
|
||||
@connection.setter
|
||||
def connection(self, value):
|
||||
self._properties["connection"] = _helpers._str_or_none(value)
|
||||
|
||||
@property
|
||||
def endpoint(self):
|
||||
"""string: Endpoint of the user-provided remote service
|
||||
|
||||
Example: "https://us-east1-my_gcf_project.cloudfunctions.net/remote_add"
|
||||
"""
|
||||
return _helpers._str_or_none(self._properties.get("endpoint"))
|
||||
|
||||
@endpoint.setter
|
||||
def endpoint(self, value):
|
||||
self._properties["endpoint"] = _helpers._str_or_none(value)
|
||||
|
||||
@property
|
||||
def max_batching_rows(self):
|
||||
"""int64: Max number of rows in each batch sent to the remote service.
|
||||
|
||||
If absent or if 0, BigQuery dynamically decides the number of rows in a batch.
|
||||
"""
|
||||
return _helpers._int_or_none(self._properties.get("maxBatchingRows"))
|
||||
|
||||
@max_batching_rows.setter
|
||||
def max_batching_rows(self, value):
|
||||
self._properties["maxBatchingRows"] = _helpers._str_or_none(value)
|
||||
|
||||
@property
|
||||
def user_defined_context(self):
|
||||
"""Dict[str, str]: User-defined context as a set of key/value pairs,
|
||||
which will be sent as function invocation context together with
|
||||
batched arguments in the requests to the remote service. The total
|
||||
number of bytes of keys and values must be less than 8KB.
|
||||
"""
|
||||
return self._properties.get("userDefinedContext")
|
||||
|
||||
@user_defined_context.setter
|
||||
def user_defined_context(self, value):
|
||||
if not isinstance(value, dict):
|
||||
raise ValueError("value must be dictionary")
|
||||
self._properties["userDefinedContext"] = value
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: dict) -> "RemoteFunctionOptions":
|
||||
"""Factory: construct remote function options given its API representation.
|
||||
|
||||
Args:
|
||||
resource (Dict[str, object]): Resource, as returned from the API.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.routine.RemoteFunctionOptions:
|
||||
Python object, as parsed from ``resource``.
|
||||
"""
|
||||
ref = cls()
|
||||
ref._properties = resource
|
||||
return ref
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Construct the API resource representation of this RemoteFunctionOptions.
|
||||
|
||||
Returns:
|
||||
Dict[str, object]: Remote function options represented as an API resource.
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, RemoteFunctionOptions):
|
||||
return NotImplemented
|
||||
return self._properties == other._properties
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
all_properties = [
|
||||
"{}={}".format(property_name, repr(getattr(self, property_name)))
|
||||
for property_name in sorted(self._PROPERTY_TO_API_FIELD)
|
||||
]
|
||||
return "RemoteFunctionOptions({})".format(", ".join(all_properties))
|
||||
@@ -0,0 +1,896 @@
|
||||
# Copyright 2015 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Schemas for BigQuery tables / queries."""
|
||||
|
||||
from __future__ import annotations
|
||||
import enum
|
||||
import typing
|
||||
from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence
|
||||
|
||||
from google.cloud.bigquery import _helpers
|
||||
from google.cloud.bigquery import standard_sql
|
||||
from google.cloud.bigquery import enums
|
||||
from google.cloud.bigquery.enums import StandardSqlTypeNames
|
||||
|
||||
|
||||
_STRUCT_TYPES = ("RECORD", "STRUCT")
|
||||
|
||||
# SQL types reference:
|
||||
# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
|
||||
# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
|
||||
LEGACY_TO_STANDARD_TYPES = {
|
||||
"STRING": StandardSqlTypeNames.STRING,
|
||||
"BYTES": StandardSqlTypeNames.BYTES,
|
||||
"INTEGER": StandardSqlTypeNames.INT64,
|
||||
"INT64": StandardSqlTypeNames.INT64,
|
||||
"FLOAT": StandardSqlTypeNames.FLOAT64,
|
||||
"FLOAT64": StandardSqlTypeNames.FLOAT64,
|
||||
"NUMERIC": StandardSqlTypeNames.NUMERIC,
|
||||
"BIGNUMERIC": StandardSqlTypeNames.BIGNUMERIC,
|
||||
"BOOLEAN": StandardSqlTypeNames.BOOL,
|
||||
"BOOL": StandardSqlTypeNames.BOOL,
|
||||
"GEOGRAPHY": StandardSqlTypeNames.GEOGRAPHY,
|
||||
"RECORD": StandardSqlTypeNames.STRUCT,
|
||||
"STRUCT": StandardSqlTypeNames.STRUCT,
|
||||
"TIMESTAMP": StandardSqlTypeNames.TIMESTAMP,
|
||||
"DATE": StandardSqlTypeNames.DATE,
|
||||
"TIME": StandardSqlTypeNames.TIME,
|
||||
"DATETIME": StandardSqlTypeNames.DATETIME,
|
||||
"FOREIGN": StandardSqlTypeNames.FOREIGN,
|
||||
# no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
|
||||
}
|
||||
"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql."""
|
||||
|
||||
|
||||
class _DefaultSentinel(enum.Enum):
|
||||
"""Object used as 'sentinel' indicating default value should be used.
|
||||
|
||||
Uses enum so that pytype/mypy knows that this is the only possible value.
|
||||
https://stackoverflow.com/a/60605919/101923
|
||||
|
||||
Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8.
|
||||
https://docs.python.org/3/library/typing.html#typing.Literal
|
||||
"""
|
||||
|
||||
DEFAULT_VALUE = object()
|
||||
|
||||
|
||||
_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE
|
||||
|
||||
|
||||
class FieldElementType(object):
|
||||
"""Represents the type of a field element.
|
||||
|
||||
Args:
|
||||
element_type (str): The type of a field element.
|
||||
"""
|
||||
|
||||
def __init__(self, element_type: str):
|
||||
self._properties = {}
|
||||
self._properties["type"] = element_type.upper()
|
||||
|
||||
@property
|
||||
def element_type(self):
|
||||
return self._properties.get("type")
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, api_repr: Optional[dict]) -> Optional["FieldElementType"]:
|
||||
"""Factory: construct a FieldElementType given its API representation.
|
||||
|
||||
Args:
|
||||
api_repr (Dict[str, str]): field element type as returned from
|
||||
the API.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.FieldElementType:
|
||||
Python object, as parsed from ``api_repr``.
|
||||
"""
|
||||
if not api_repr:
|
||||
return None
|
||||
return cls(api_repr["type"].upper())
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Construct the API resource representation of this field element type.
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: Field element type represented as an API resource.
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
|
||||
class SchemaField(object):
|
||||
"""Describe a single field within a table schema.
|
||||
|
||||
Args:
|
||||
name: The name of the field.
|
||||
|
||||
field_type:
|
||||
The type of the field. See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
|
||||
|
||||
mode:
|
||||
Defaults to ``'NULLABLE'``. The mode of the field. See
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
|
||||
|
||||
description: Description for the field.
|
||||
|
||||
fields: Subfields (requires ``field_type`` of 'RECORD').
|
||||
|
||||
policy_tags: The policy tag list for the field.
|
||||
|
||||
precision:
|
||||
Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type.
|
||||
|
||||
scale:
|
||||
Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
|
||||
|
||||
max_length: Maximum length of fields with STRING or BYTES type.
|
||||
|
||||
default_value_expression: str, Optional
|
||||
Used to specify the default value of a field using a SQL expression. It can only be set for
|
||||
top level fields (columns).
|
||||
|
||||
You can use a struct or array expression to specify default value for the entire struct or
|
||||
array. The valid SQL expressions are:
|
||||
|
||||
- Literals for all data types, including STRUCT and ARRAY.
|
||||
|
||||
- The following functions:
|
||||
|
||||
`CURRENT_TIMESTAMP`
|
||||
`CURRENT_TIME`
|
||||
`CURRENT_DATE`
|
||||
`CURRENT_DATETIME`
|
||||
`GENERATE_UUID`
|
||||
`RAND`
|
||||
`SESSION_USER`
|
||||
`ST_GEOPOINT`
|
||||
|
||||
- Struct or array composed with the above allowed functions, for example:
|
||||
|
||||
"[CURRENT_DATE(), DATE '2020-01-01'"]
|
||||
|
||||
range_element_type: FieldElementType, str, Optional
|
||||
The subtype of the RANGE, if the type of this field is RANGE. If
|
||||
the type is RANGE, this field is required. Possible values for the
|
||||
field element type of a RANGE include `DATE`, `DATETIME` and
|
||||
`TIMESTAMP`.
|
||||
|
||||
rounding_mode: Union[enums.RoundingMode, str, None]
|
||||
Specifies the rounding mode to be used when storing values of
|
||||
NUMERIC and BIGNUMERIC type.
|
||||
|
||||
Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO.
|
||||
ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero
|
||||
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
|
||||
values.
|
||||
|
||||
For Scale: 0
|
||||
1.1, 1.2, 1.3, 1.4 => 1
|
||||
1.5, 1.6, 1.7, 1.8, 1.9 => 2
|
||||
|
||||
ROUND_HALF_EVEN rounds half values to the nearest even value
|
||||
when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
|
||||
values.
|
||||
|
||||
For Scale: 0
|
||||
1.1, 1.2, 1.3, 1.4 => 1
|
||||
1.5 => 2
|
||||
1.6, 1.7, 1.8, 1.9 => 2
|
||||
2.5 => 2
|
||||
|
||||
foreign_type_definition: Optional[str]
|
||||
Definition of the foreign data type.
|
||||
|
||||
Only valid for top-level schema fields (not nested fields).
|
||||
If the type is FOREIGN, this field is required.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
field_type: str,
|
||||
mode: str = "NULLABLE",
|
||||
default_value_expression: Optional[str] = None,
|
||||
description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
|
||||
fields: Iterable["SchemaField"] = (),
|
||||
policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
|
||||
precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
|
||||
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
|
||||
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
|
||||
range_element_type: Union[FieldElementType, str, None] = None,
|
||||
rounding_mode: Union[enums.RoundingMode, str, None] = None,
|
||||
foreign_type_definition: Optional[str] = None,
|
||||
):
|
||||
self._properties: Dict[str, Any] = {
|
||||
"name": name,
|
||||
"type": field_type,
|
||||
}
|
||||
self._properties["name"] = name
|
||||
if mode is not None:
|
||||
self._properties["mode"] = mode.upper()
|
||||
if description is not _DEFAULT_VALUE:
|
||||
self._properties["description"] = description
|
||||
if default_value_expression is not None:
|
||||
self._properties["defaultValueExpression"] = default_value_expression
|
||||
if precision is not _DEFAULT_VALUE:
|
||||
self._properties["precision"] = precision
|
||||
if scale is not _DEFAULT_VALUE:
|
||||
self._properties["scale"] = scale
|
||||
if max_length is not _DEFAULT_VALUE:
|
||||
self._properties["maxLength"] = max_length
|
||||
if policy_tags is not _DEFAULT_VALUE:
|
||||
# TODO: The typehinting for this needs work. Setting this pragma to temporarily
|
||||
# manage a pytype issue that came up in another PR. See Issue: #2132
|
||||
self._properties["policyTags"] = (
|
||||
policy_tags.to_api_repr() # pytype: disable=attribute-error
|
||||
if policy_tags is not None
|
||||
else None
|
||||
)
|
||||
if isinstance(range_element_type, str):
|
||||
self._properties["rangeElementType"] = {"type": range_element_type}
|
||||
if isinstance(range_element_type, FieldElementType):
|
||||
self._properties["rangeElementType"] = range_element_type.to_api_repr()
|
||||
if rounding_mode is not None:
|
||||
self._properties["roundingMode"] = rounding_mode
|
||||
if foreign_type_definition is not None:
|
||||
self._properties["foreignTypeDefinition"] = foreign_type_definition
|
||||
|
||||
if fields: # Don't set the property if it's not set.
|
||||
self._properties["fields"] = [field.to_api_repr() for field in fields]
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, api_repr: dict) -> "SchemaField":
|
||||
"""Return a ``SchemaField`` object deserialized from a dictionary.
|
||||
|
||||
Args:
|
||||
api_repr (Mapping[str, str]): The serialized representation
|
||||
of the SchemaField, such as what is output by
|
||||
:meth:`to_api_repr`.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
|
||||
"""
|
||||
placeholder = cls("this_will_be_replaced", "PLACEHOLDER")
|
||||
|
||||
# Note: we don't make a copy of api_repr because this can cause
|
||||
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
|
||||
# fields. See https://github.com/googleapis/python-bigquery/issues/6
|
||||
placeholder._properties = api_repr
|
||||
|
||||
# Add the field `mode` with default value if it does not exist. Fixes
|
||||
# an incompatibility issue with pandas-gbq:
|
||||
# https://github.com/googleapis/python-bigquery-pandas/issues/854
|
||||
if "mode" not in placeholder._properties:
|
||||
placeholder._properties["mode"] = "NULLABLE"
|
||||
|
||||
return placeholder
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""str: The name of the field."""
|
||||
return self._properties.get("name", "")
|
||||
|
||||
@property
|
||||
def field_type(self):
|
||||
"""str: The type of the field.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
|
||||
"""
|
||||
type_ = self._properties.get("type")
|
||||
if type_ is None: # Shouldn't happen, but some unit tests do this.
|
||||
return None
|
||||
return cast(str, type_).upper()
|
||||
|
||||
@property
|
||||
def mode(self):
|
||||
"""Optional[str]: The mode of the field.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
|
||||
"""
|
||||
return cast(str, self._properties.get("mode", "NULLABLE")).upper()
|
||||
|
||||
@property
|
||||
def is_nullable(self):
|
||||
"""bool: whether 'mode' is 'nullable'."""
|
||||
return self.mode == "NULLABLE"
|
||||
|
||||
@property
|
||||
def default_value_expression(self):
|
||||
"""Optional[str] default value of a field, using an SQL expression"""
|
||||
return self._properties.get("defaultValueExpression")
|
||||
|
||||
@property
|
||||
def description(self):
|
||||
"""Optional[str]: description for the field."""
|
||||
return self._properties.get("description")
|
||||
|
||||
@property
|
||||
def precision(self):
|
||||
"""Optional[int]: Precision (number of digits) for the NUMERIC field."""
|
||||
return _helpers._int_or_none(self._properties.get("precision"))
|
||||
|
||||
@property
|
||||
def scale(self):
|
||||
"""Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
|
||||
return _helpers._int_or_none(self._properties.get("scale"))
|
||||
|
||||
@property
|
||||
def max_length(self):
|
||||
"""Optional[int]: Maximum length for the STRING or BYTES field."""
|
||||
return _helpers._int_or_none(self._properties.get("maxLength"))
|
||||
|
||||
@property
|
||||
def range_element_type(self):
|
||||
"""Optional[FieldElementType]: The subtype of the RANGE, if the
|
||||
type of this field is RANGE.
|
||||
|
||||
Must be set when ``type`` is `"RANGE"`. Must be one of `"DATE"`,
|
||||
`"DATETIME"` or `"TIMESTAMP"`.
|
||||
"""
|
||||
if self._properties.get("rangeElementType"):
|
||||
ret = self._properties.get("rangeElementType")
|
||||
return FieldElementType.from_api_repr(ret)
|
||||
|
||||
@property
|
||||
def rounding_mode(self):
|
||||
"""Enum that specifies the rounding mode to be used when storing values of
|
||||
NUMERIC and BIGNUMERIC type.
|
||||
"""
|
||||
return self._properties.get("roundingMode")
|
||||
|
||||
@property
|
||||
def foreign_type_definition(self):
|
||||
"""Definition of the foreign data type.
|
||||
|
||||
Only valid for top-level schema fields (not nested fields).
|
||||
If the type is FOREIGN, this field is required.
|
||||
"""
|
||||
return self._properties.get("foreignTypeDefinition")
|
||||
|
||||
@property
|
||||
def fields(self):
|
||||
"""Optional[tuple]: Subfields contained in this field.
|
||||
|
||||
Must be empty unset if ``field_type`` is not 'RECORD'.
|
||||
"""
|
||||
return tuple(_to_schema_fields(self._properties.get("fields", [])))
|
||||
|
||||
@property
|
||||
def policy_tags(self):
|
||||
"""Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list
|
||||
definition for this field.
|
||||
"""
|
||||
resource = self._properties.get("policyTags")
|
||||
return PolicyTagList.from_api_repr(resource) if resource is not None else None
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Return a dictionary representing this schema field.
|
||||
|
||||
Returns:
|
||||
Dict: A dictionary representing the SchemaField in a serialized form.
|
||||
"""
|
||||
# Note: we don't make a copy of _properties because this can cause
|
||||
# unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
|
||||
# fields. See https://github.com/googleapis/python-bigquery/issues/6
|
||||
return self._properties
|
||||
|
||||
def _key(self):
|
||||
"""A tuple key that uniquely describes this field.
|
||||
|
||||
Used to compute this instance's hashcode and evaluate equality.
|
||||
|
||||
Returns:
|
||||
Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`.
|
||||
"""
|
||||
field_type = self.field_type.upper() if self.field_type is not None else None
|
||||
|
||||
# Type can temporarily be set to None if the code needs a SchemaField instance,
|
||||
# but has not determined the exact type of the field yet.
|
||||
if field_type is not None:
|
||||
if field_type == "STRING" or field_type == "BYTES":
|
||||
if self.max_length is not None:
|
||||
field_type = f"{field_type}({self.max_length})"
|
||||
elif field_type.endswith("NUMERIC"):
|
||||
if self.precision is not None:
|
||||
if self.scale is not None:
|
||||
field_type = f"{field_type}({self.precision}, {self.scale})"
|
||||
else:
|
||||
field_type = f"{field_type}({self.precision})"
|
||||
|
||||
policy_tags = (
|
||||
None if self.policy_tags is None else tuple(sorted(self.policy_tags.names))
|
||||
)
|
||||
|
||||
return (
|
||||
self.name,
|
||||
field_type,
|
||||
# Mode is always str, if not given it defaults to a str value
|
||||
self.mode.upper(), # pytype: disable=attribute-error
|
||||
self.default_value_expression,
|
||||
self.description,
|
||||
self.fields,
|
||||
policy_tags,
|
||||
)
|
||||
|
||||
def to_standard_sql(self) -> standard_sql.StandardSqlField:
|
||||
"""Return the field as the standard SQL field representation object."""
|
||||
sql_type = standard_sql.StandardSqlDataType()
|
||||
|
||||
if self.mode == "REPEATED":
|
||||
sql_type.type_kind = StandardSqlTypeNames.ARRAY
|
||||
else:
|
||||
sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get(
|
||||
self.field_type,
|
||||
StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
|
||||
)
|
||||
|
||||
if sql_type.type_kind == StandardSqlTypeNames.ARRAY: # noqa: E721
|
||||
array_element_type = LEGACY_TO_STANDARD_TYPES.get(
|
||||
self.field_type,
|
||||
StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
|
||||
)
|
||||
sql_type.array_element_type = standard_sql.StandardSqlDataType(
|
||||
type_kind=array_element_type
|
||||
)
|
||||
|
||||
# ARRAY cannot directly contain other arrays, only scalar types and STRUCTs
|
||||
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type
|
||||
if array_element_type == StandardSqlTypeNames.STRUCT: # noqa: E721
|
||||
sql_type.array_element_type.struct_type = (
|
||||
standard_sql.StandardSqlStructType(
|
||||
fields=(field.to_standard_sql() for field in self.fields)
|
||||
)
|
||||
)
|
||||
elif sql_type.type_kind == StandardSqlTypeNames.STRUCT: # noqa: E721
|
||||
sql_type.struct_type = standard_sql.StandardSqlStructType(
|
||||
fields=(field.to_standard_sql() for field in self.fields)
|
||||
)
|
||||
|
||||
return standard_sql.StandardSqlField(name=self.name, type=sql_type)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, SchemaField):
|
||||
return NotImplemented
|
||||
return self._key() == other._key()
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self._key())
|
||||
|
||||
def __repr__(self):
|
||||
key = self._key()
|
||||
policy_tags = key[-1]
|
||||
policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags)
|
||||
adjusted_key = key[:-1] + (policy_tags_inst,)
|
||||
return f"{self.__class__.__name__}{adjusted_key}"
|
||||
|
||||
|
||||
def _parse_schema_resource(info):
|
||||
"""Parse a resource fragment into a schema field.
|
||||
|
||||
Args:
|
||||
info: (Mapping[str, Dict]): should contain a "fields" key to be parsed
|
||||
|
||||
Returns:
|
||||
Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
|
||||
A list of parsed fields, or ``None`` if no "fields" key found.
|
||||
"""
|
||||
if isinstance(info, list):
|
||||
return [SchemaField.from_api_repr(f) for f in info]
|
||||
return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]
|
||||
|
||||
|
||||
def _build_schema_resource(fields):
|
||||
"""Generate a resource fragment for a schema.
|
||||
|
||||
Args:
|
||||
fields (Sequence[google.cloud.bigquery.schema.SchemaField): schema to be dumped.
|
||||
|
||||
Returns:
|
||||
Sequence[Dict]: Mappings describing the schema of the supplied fields.
|
||||
"""
|
||||
if isinstance(fields, Sequence):
|
||||
# Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
|
||||
return [field.to_api_repr() for field in fields]
|
||||
|
||||
else:
|
||||
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
|
||||
|
||||
|
||||
def _to_schema_fields(schema):
|
||||
"""Coerces schema to a list of SchemaField instances while
|
||||
preserving the original structure as much as possible.
|
||||
|
||||
Args:
|
||||
schema (Sequence[Union[ \
|
||||
:class:`~google.cloud.bigquery.schema.SchemaField`, \
|
||||
Mapping[str, Any] \
|
||||
]
|
||||
]
|
||||
)::
|
||||
Table schema to convert. Can be a list of SchemaField
|
||||
objects or mappings.
|
||||
|
||||
Returns:
|
||||
A list of SchemaField objects.
|
||||
|
||||
Raises:
|
||||
TypeError: If schema is not a Sequence.
|
||||
"""
|
||||
|
||||
if isinstance(schema, Sequence):
|
||||
# Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
|
||||
return [
|
||||
field
|
||||
if isinstance(field, SchemaField)
|
||||
else SchemaField.from_api_repr(field)
|
||||
for field in schema
|
||||
]
|
||||
|
||||
else:
|
||||
raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
|
||||
|
||||
|
||||
class PolicyTagList(object):
|
||||
"""Define Policy Tags for a column.
|
||||
|
||||
Args:
|
||||
names (
|
||||
Optional[Tuple[str]]): list of policy tags to associate with
|
||||
the column. Policy tag identifiers are of the form
|
||||
`projects/*/locations/*/taxonomies/*/policyTags/*`.
|
||||
"""
|
||||
|
||||
def __init__(self, names: Iterable[str] = ()):
|
||||
self._properties = {}
|
||||
self._properties["names"] = tuple(names)
|
||||
|
||||
@property
|
||||
def names(self):
|
||||
"""Tuple[str]: Policy tags associated with this definition."""
|
||||
return self._properties.get("names", ())
|
||||
|
||||
def _key(self):
|
||||
"""A tuple key that uniquely describes this PolicyTagList.
|
||||
|
||||
Used to compute this instance's hashcode and evaluate equality.
|
||||
|
||||
Returns:
|
||||
Tuple: The contents of this :class:`~google.cloud.bigquery.schema.PolicyTagList`.
|
||||
"""
|
||||
return tuple(sorted(self._properties.get("names", ())))
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, PolicyTagList):
|
||||
return NotImplemented
|
||||
return self._key() == other._key()
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self._key())
|
||||
|
||||
def __repr__(self):
|
||||
return f"{self.__class__.__name__}(names={self._key()})"
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, api_repr: dict) -> "PolicyTagList":
|
||||
"""Return a :class:`PolicyTagList` object deserialized from a dict.
|
||||
|
||||
This method creates a new ``PolicyTagList`` instance that points to
|
||||
the ``api_repr`` parameter as its internal properties dict. This means
|
||||
that when a ``PolicyTagList`` instance is stored as a property of
|
||||
another object, any changes made at the higher level will also appear
|
||||
here.
|
||||
|
||||
Args:
|
||||
api_repr (Mapping[str, str]):
|
||||
The serialized representation of the PolicyTagList, such as
|
||||
what is output by :meth:`to_api_repr`.
|
||||
|
||||
Returns:
|
||||
Optional[google.cloud.bigquery.schema.PolicyTagList]:
|
||||
The ``PolicyTagList`` object or None.
|
||||
"""
|
||||
if api_repr is None:
|
||||
return None
|
||||
names = api_repr.get("names", ())
|
||||
return cls(names=names)
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Return a dictionary representing this object.
|
||||
|
||||
This method returns the properties dict of the ``PolicyTagList``
|
||||
instance rather than making a copy. This means that when a
|
||||
``PolicyTagList`` instance is stored as a property of another
|
||||
object, any changes made at the higher level will also appear here.
|
||||
|
||||
Returns:
|
||||
dict:
|
||||
A dictionary representing the PolicyTagList object in
|
||||
serialized form.
|
||||
"""
|
||||
answer = {"names": list(self.names)}
|
||||
return answer
|
||||
|
||||
|
||||
class ForeignTypeInfo:
|
||||
"""Metadata about the foreign data type definition such as the system in which the
|
||||
type is defined.
|
||||
|
||||
Args:
|
||||
type_system (str): Required. Specifies the system which defines the
|
||||
foreign data type.
|
||||
|
||||
TypeSystem enum currently includes:
|
||||
* "TYPE_SYSTEM_UNSPECIFIED"
|
||||
* "HIVE"
|
||||
"""
|
||||
|
||||
def __init__(self, type_system: Optional[str] = None):
|
||||
self._properties: Dict[str, Any] = {}
|
||||
self.type_system = type_system
|
||||
|
||||
@property
|
||||
def type_system(self) -> Optional[str]:
|
||||
"""Required. Specifies the system which defines the foreign data
|
||||
type."""
|
||||
|
||||
return self._properties.get("typeSystem")
|
||||
|
||||
@type_system.setter
|
||||
def type_system(self, value: Optional[str]):
|
||||
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
|
||||
self._properties["typeSystem"] = value
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Build an API representation of this object.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]:
|
||||
A dictionary in the format used by the BigQuery API.
|
||||
"""
|
||||
|
||||
return self._properties
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignTypeInfo":
|
||||
"""Factory: constructs an instance of the class (cls)
|
||||
given its API representation.
|
||||
|
||||
Args:
|
||||
api_repr (Dict[str, Any]):
|
||||
API representation of the object to be instantiated.
|
||||
|
||||
Returns:
|
||||
An instance of the class initialized with data from 'api_repr'.
|
||||
"""
|
||||
|
||||
config = cls()
|
||||
config._properties = api_repr
|
||||
return config
|
||||
|
||||
|
||||
class SerDeInfo:
|
||||
"""Serializer and deserializer information.
|
||||
|
||||
Args:
|
||||
serialization_library (str): Required. Specifies a fully-qualified class
|
||||
name of the serialization library that is responsible for the
|
||||
translation of data between table representation and the underlying
|
||||
low-level input and output format structures. The maximum length is
|
||||
256 characters.
|
||||
name (Optional[str]): Name of the SerDe. The maximum length is 256
|
||||
characters.
|
||||
parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization
|
||||
parameters for the serialization library. Maximum size 10 Kib.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
serialization_library: str,
|
||||
name: Optional[str] = None,
|
||||
parameters: Optional[dict[str, str]] = None,
|
||||
):
|
||||
self._properties: Dict[str, Any] = {}
|
||||
self.serialization_library = serialization_library
|
||||
self.name = name
|
||||
self.parameters = parameters
|
||||
|
||||
@property
|
||||
def serialization_library(self) -> str:
|
||||
"""Required. Specifies a fully-qualified class name of the serialization
|
||||
library that is responsible for the translation of data between table
|
||||
representation and the underlying low-level input and output format
|
||||
structures. The maximum length is 256 characters."""
|
||||
|
||||
return typing.cast(str, self._properties.get("serializationLibrary"))
|
||||
|
||||
@serialization_library.setter
|
||||
def serialization_library(self, value: str):
|
||||
value = _helpers._isinstance_or_raise(value, str, none_allowed=False)
|
||||
self._properties["serializationLibrary"] = value
|
||||
|
||||
@property
|
||||
def name(self) -> Optional[str]:
|
||||
"""Optional. Name of the SerDe. The maximum length is 256 characters."""
|
||||
|
||||
return self._properties.get("name")
|
||||
|
||||
@name.setter
|
||||
def name(self, value: Optional[str] = None):
|
||||
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
|
||||
self._properties["name"] = value
|
||||
|
||||
@property
|
||||
def parameters(self) -> Optional[dict[str, str]]:
|
||||
"""Optional. Key-value pairs that define the initialization parameters
|
||||
for the serialization library. Maximum size 10 Kib."""
|
||||
|
||||
return self._properties.get("parameters")
|
||||
|
||||
@parameters.setter
|
||||
def parameters(self, value: Optional[dict[str, str]] = None):
|
||||
value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
|
||||
self._properties["parameters"] = value
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Build an API representation of this object.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]:
|
||||
A dictionary in the format used by the BigQuery API.
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, api_repr: dict) -> SerDeInfo:
|
||||
"""Factory: constructs an instance of the class (cls)
|
||||
given its API representation.
|
||||
|
||||
Args:
|
||||
api_repr (Dict[str, Any]):
|
||||
API representation of the object to be instantiated.
|
||||
|
||||
Returns:
|
||||
An instance of the class initialized with data from 'api_repr'.
|
||||
"""
|
||||
config = cls("PLACEHOLDER")
|
||||
config._properties = api_repr
|
||||
return config
|
||||
|
||||
|
||||
class StorageDescriptor:
|
||||
"""Contains information about how a table's data is stored and accessed by open
|
||||
source query engines.
|
||||
|
||||
Args:
|
||||
input_format (Optional[str]): Specifies the fully qualified class name of
|
||||
the InputFormat (e.g.
|
||||
"org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum
|
||||
length is 128 characters.
|
||||
location_uri (Optional[str]): The physical location of the table (e.g.
|
||||
'gs://spark-dataproc-data/pangea-data/case_sensitive/' or
|
||||
'gs://spark-dataproc-data/pangea-data/'). The maximum length is
|
||||
2056 bytes.
|
||||
output_format (Optional[str]): Specifies the fully qualified class name
|
||||
of the OutputFormat (e.g.
|
||||
"org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). The maximum
|
||||
length is 128 characters.
|
||||
serde_info (Union[SerDeInfo, dict, None]): Serializer and deserializer information.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
input_format: Optional[str] = None,
|
||||
location_uri: Optional[str] = None,
|
||||
output_format: Optional[str] = None,
|
||||
serde_info: Union[SerDeInfo, dict, None] = None,
|
||||
):
|
||||
self._properties: Dict[str, Any] = {}
|
||||
self.input_format = input_format
|
||||
self.location_uri = location_uri
|
||||
self.output_format = output_format
|
||||
# Using typing.cast() because mypy cannot wrap it's head around the fact that:
|
||||
# the setter can accept Union[SerDeInfo, dict, None]
|
||||
# but the getter will only ever return Optional[SerDeInfo].
|
||||
self.serde_info = typing.cast(Optional[SerDeInfo], serde_info)
|
||||
|
||||
@property
|
||||
def input_format(self) -> Optional[str]:
|
||||
"""Optional. Specifies the fully qualified class name of the InputFormat
|
||||
(e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum
|
||||
length is 128 characters."""
|
||||
|
||||
return self._properties.get("inputFormat")
|
||||
|
||||
@input_format.setter
|
||||
def input_format(self, value: Optional[str]):
|
||||
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
|
||||
self._properties["inputFormat"] = value
|
||||
|
||||
@property
|
||||
def location_uri(self) -> Optional[str]:
|
||||
"""Optional. The physical location of the table (e.g. 'gs://spark-
|
||||
dataproc-data/pangea-data/case_sensitive/' or 'gs://spark-dataproc-
|
||||
data/pangea-data/'). The maximum length is 2056 bytes."""
|
||||
|
||||
return self._properties.get("locationUri")
|
||||
|
||||
@location_uri.setter
|
||||
def location_uri(self, value: Optional[str]):
|
||||
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
|
||||
self._properties["locationUri"] = value
|
||||
|
||||
@property
|
||||
def output_format(self) -> Optional[str]:
|
||||
"""Optional. Specifies the fully qualified class name of the
|
||||
OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat").
|
||||
The maximum length is 128 characters."""
|
||||
|
||||
return self._properties.get("outputFormat")
|
||||
|
||||
@output_format.setter
|
||||
def output_format(self, value: Optional[str]):
|
||||
value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
|
||||
self._properties["outputFormat"] = value
|
||||
|
||||
@property
|
||||
def serde_info(self) -> Optional[SerDeInfo]:
|
||||
"""Optional. Serializer and deserializer information."""
|
||||
|
||||
prop = _helpers._get_sub_prop(self._properties, ["serDeInfo"])
|
||||
if prop is not None:
|
||||
return typing.cast(SerDeInfo, SerDeInfo.from_api_repr(prop))
|
||||
return None
|
||||
|
||||
@serde_info.setter
|
||||
def serde_info(self, value: Union[SerDeInfo, dict, None]):
|
||||
value = _helpers._isinstance_or_raise(
|
||||
value, (SerDeInfo, dict), none_allowed=True
|
||||
)
|
||||
|
||||
if isinstance(value, SerDeInfo):
|
||||
self._properties["serDeInfo"] = value.to_api_repr()
|
||||
else:
|
||||
self._properties["serDeInfo"] = value
|
||||
|
||||
def to_api_repr(self) -> dict:
|
||||
"""Build an API representation of this object.
|
||||
Returns:
|
||||
Dict[str, Any]:
|
||||
A dictionary in the format used by the BigQuery API.
|
||||
"""
|
||||
return self._properties
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: dict) -> StorageDescriptor:
|
||||
"""Factory: constructs an instance of the class (cls)
|
||||
given its API representation.
|
||||
Args:
|
||||
resource (Dict[str, Any]):
|
||||
API representation of the object to be instantiated.
|
||||
Returns:
|
||||
An instance of the class initialized with data from 'resource'.
|
||||
"""
|
||||
config = cls()
|
||||
config._properties = resource
|
||||
return config
|
||||
@@ -0,0 +1,389 @@
|
||||
# Copyright 2021 Google LLC
|
||||
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import copy
|
||||
import typing
|
||||
from typing import Any, Dict, Iterable, List, Optional
|
||||
|
||||
from google.cloud.bigquery.enums import StandardSqlTypeNames
|
||||
|
||||
|
||||
class StandardSqlDataType:
|
||||
"""The type of a variable, e.g., a function argument.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlDataType
|
||||
|
||||
Examples:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
INT64: {type_kind="INT64"}
|
||||
ARRAY: {type_kind="ARRAY", array_element_type="STRING"}
|
||||
STRUCT<x STRING, y ARRAY>: {
|
||||
type_kind="STRUCT",
|
||||
struct_type={
|
||||
fields=[
|
||||
{name="x", type={type_kind="STRING"}},
|
||||
{
|
||||
name="y",
|
||||
type={type_kind="ARRAY", array_element_type="DATE"}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
RANGE: {type_kind="RANGE", range_element_type="DATETIME"}
|
||||
|
||||
Args:
|
||||
type_kind:
|
||||
The top level type of this field. Can be any standard SQL data type,
|
||||
e.g. INT64, DATE, ARRAY.
|
||||
array_element_type:
|
||||
The type of the array's elements, if type_kind is ARRAY.
|
||||
struct_type:
|
||||
The fields of this struct, in order, if type_kind is STRUCT.
|
||||
range_element_type:
|
||||
The type of the range's elements, if type_kind is RANGE.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
type_kind: Optional[
|
||||
StandardSqlTypeNames
|
||||
] = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
|
||||
array_element_type: Optional["StandardSqlDataType"] = None,
|
||||
struct_type: Optional["StandardSqlStructType"] = None,
|
||||
range_element_type: Optional["StandardSqlDataType"] = None,
|
||||
):
|
||||
self._properties: Dict[str, Any] = {}
|
||||
|
||||
self.type_kind = type_kind
|
||||
self.array_element_type = array_element_type
|
||||
self.struct_type = struct_type
|
||||
self.range_element_type = range_element_type
|
||||
|
||||
@property
|
||||
def type_kind(self) -> Optional[StandardSqlTypeNames]:
|
||||
"""The top level type of this field.
|
||||
|
||||
Can be any standard SQL data type, e.g. INT64, DATE, ARRAY.
|
||||
"""
|
||||
kind = self._properties["typeKind"]
|
||||
return StandardSqlTypeNames[kind] # pytype: disable=missing-parameter
|
||||
|
||||
@type_kind.setter
|
||||
def type_kind(self, value: Optional[StandardSqlTypeNames]):
|
||||
if not value:
|
||||
kind = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED.value
|
||||
else:
|
||||
kind = value.value
|
||||
self._properties["typeKind"] = kind
|
||||
|
||||
@property
|
||||
def array_element_type(self) -> Optional["StandardSqlDataType"]:
|
||||
"""The type of the array's elements, if type_kind is ARRAY."""
|
||||
element_type = self._properties.get("arrayElementType")
|
||||
|
||||
if element_type is None:
|
||||
return None
|
||||
|
||||
result = StandardSqlDataType()
|
||||
result._properties = element_type # We do not use a copy on purpose.
|
||||
return result
|
||||
|
||||
@array_element_type.setter
|
||||
def array_element_type(self, value: Optional["StandardSqlDataType"]):
|
||||
element_type = None if value is None else value.to_api_repr()
|
||||
|
||||
if element_type is None:
|
||||
self._properties.pop("arrayElementType", None)
|
||||
else:
|
||||
self._properties["arrayElementType"] = element_type
|
||||
|
||||
@property
|
||||
def struct_type(self) -> Optional["StandardSqlStructType"]:
|
||||
"""The fields of this struct, in order, if type_kind is STRUCT."""
|
||||
struct_info = self._properties.get("structType")
|
||||
|
||||
if struct_info is None:
|
||||
return None
|
||||
|
||||
result = StandardSqlStructType()
|
||||
result._properties = struct_info # We do not use a copy on purpose.
|
||||
return result
|
||||
|
||||
@struct_type.setter
|
||||
def struct_type(self, value: Optional["StandardSqlStructType"]):
|
||||
struct_type = None if value is None else value.to_api_repr()
|
||||
|
||||
if struct_type is None:
|
||||
self._properties.pop("structType", None)
|
||||
else:
|
||||
self._properties["structType"] = struct_type
|
||||
|
||||
@property
|
||||
def range_element_type(self) -> Optional["StandardSqlDataType"]:
|
||||
"""The type of the range's elements, if type_kind = "RANGE". Must be
|
||||
one of DATETIME, DATE, or TIMESTAMP."""
|
||||
range_element_info = self._properties.get("rangeElementType")
|
||||
|
||||
if range_element_info is None:
|
||||
return None
|
||||
|
||||
result = StandardSqlDataType()
|
||||
result._properties = range_element_info # We do not use a copy on purpose.
|
||||
return result
|
||||
|
||||
@range_element_type.setter
|
||||
def range_element_type(self, value: Optional["StandardSqlDataType"]):
|
||||
range_element_type = None if value is None else value.to_api_repr()
|
||||
|
||||
if range_element_type is None:
|
||||
self._properties.pop("rangeElementType", None)
|
||||
else:
|
||||
self._properties["rangeElementType"] = range_element_type
|
||||
|
||||
def to_api_repr(self) -> Dict[str, Any]:
|
||||
"""Construct the API resource representation of this SQL data type."""
|
||||
return copy.deepcopy(self._properties)
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: Dict[str, Any]):
|
||||
"""Construct an SQL data type instance given its API representation."""
|
||||
type_kind = resource.get("typeKind")
|
||||
if type_kind not in StandardSqlTypeNames.__members__:
|
||||
type_kind = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED
|
||||
else:
|
||||
# Convert string to an enum member.
|
||||
type_kind = StandardSqlTypeNames[ # pytype: disable=missing-parameter
|
||||
typing.cast(str, type_kind)
|
||||
]
|
||||
|
||||
array_element_type = None
|
||||
if type_kind == StandardSqlTypeNames.ARRAY:
|
||||
element_type = resource.get("arrayElementType")
|
||||
if element_type:
|
||||
array_element_type = cls.from_api_repr(element_type)
|
||||
|
||||
struct_type = None
|
||||
if type_kind == StandardSqlTypeNames.STRUCT:
|
||||
struct_info = resource.get("structType")
|
||||
if struct_info:
|
||||
struct_type = StandardSqlStructType.from_api_repr(struct_info)
|
||||
|
||||
range_element_type = None
|
||||
if type_kind == StandardSqlTypeNames.RANGE:
|
||||
range_element_info = resource.get("rangeElementType")
|
||||
if range_element_info:
|
||||
range_element_type = cls.from_api_repr(range_element_info)
|
||||
|
||||
return cls(type_kind, array_element_type, struct_type, range_element_type)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, StandardSqlDataType):
|
||||
return NotImplemented
|
||||
else:
|
||||
return (
|
||||
self.type_kind == other.type_kind
|
||||
and self.array_element_type == other.array_element_type
|
||||
and self.struct_type == other.struct_type
|
||||
and self.range_element_type == other.range_element_type
|
||||
)
|
||||
|
||||
def __str__(self):
|
||||
result = f"{self.__class__.__name__}(type_kind={self.type_kind!r}, ...)"
|
||||
return result
|
||||
|
||||
|
||||
class StandardSqlField:
|
||||
"""A field or a column.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlField
|
||||
|
||||
Args:
|
||||
name:
|
||||
The name of this field. Can be absent for struct fields.
|
||||
type:
|
||||
The type of this parameter. Absent if not explicitly specified.
|
||||
|
||||
For example, CREATE FUNCTION statement can omit the return type; in this
|
||||
case the output parameter does not have this "type" field).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, name: Optional[str] = None, type: Optional[StandardSqlDataType] = None
|
||||
):
|
||||
type_repr = None if type is None else type.to_api_repr()
|
||||
self._properties = {"name": name, "type": type_repr}
|
||||
|
||||
@property
|
||||
def name(self) -> Optional[str]:
|
||||
"""The name of this field. Can be absent for struct fields."""
|
||||
return typing.cast(Optional[str], self._properties["name"])
|
||||
|
||||
@name.setter
|
||||
def name(self, value: Optional[str]):
|
||||
self._properties["name"] = value
|
||||
|
||||
@property
|
||||
def type(self) -> Optional[StandardSqlDataType]:
|
||||
"""The type of this parameter. Absent if not explicitly specified.
|
||||
|
||||
For example, CREATE FUNCTION statement can omit the return type; in this
|
||||
case the output parameter does not have this "type" field).
|
||||
"""
|
||||
type_info = self._properties["type"]
|
||||
|
||||
if type_info is None:
|
||||
return None
|
||||
|
||||
result = StandardSqlDataType()
|
||||
# We do not use a properties copy on purpose.
|
||||
result._properties = typing.cast(Dict[str, Any], type_info)
|
||||
|
||||
return result
|
||||
|
||||
@type.setter
|
||||
def type(self, value: Optional[StandardSqlDataType]):
|
||||
value_repr = None if value is None else value.to_api_repr()
|
||||
self._properties["type"] = value_repr
|
||||
|
||||
def to_api_repr(self) -> Dict[str, Any]:
|
||||
"""Construct the API resource representation of this SQL field."""
|
||||
return copy.deepcopy(self._properties)
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: Dict[str, Any]):
|
||||
"""Construct an SQL field instance given its API representation."""
|
||||
result = cls(
|
||||
name=resource.get("name"),
|
||||
type=StandardSqlDataType.from_api_repr(resource.get("type", {})),
|
||||
)
|
||||
return result
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, StandardSqlField):
|
||||
return NotImplemented
|
||||
else:
|
||||
return self.name == other.name and self.type == other.type
|
||||
|
||||
|
||||
class StandardSqlStructType:
|
||||
"""Type of a struct field.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/bigquery/docs/reference/rest/v2/StandardSqlDataType#StandardSqlStructType
|
||||
|
||||
Args:
|
||||
fields: The fields in this struct.
|
||||
"""
|
||||
|
||||
def __init__(self, fields: Optional[Iterable[StandardSqlField]] = None):
|
||||
if fields is None:
|
||||
fields = []
|
||||
self._properties = {"fields": [field.to_api_repr() for field in fields]}
|
||||
|
||||
@property
|
||||
def fields(self) -> List[StandardSqlField]:
|
||||
"""The fields in this struct."""
|
||||
result = []
|
||||
|
||||
for field_resource in self._properties.get("fields", []):
|
||||
field = StandardSqlField()
|
||||
field._properties = field_resource # We do not use a copy on purpose.
|
||||
result.append(field)
|
||||
|
||||
return result
|
||||
|
||||
@fields.setter
|
||||
def fields(self, value: Iterable[StandardSqlField]):
|
||||
self._properties["fields"] = [field.to_api_repr() for field in value]
|
||||
|
||||
def to_api_repr(self) -> Dict[str, Any]:
|
||||
"""Construct the API resource representation of this SQL struct type."""
|
||||
return copy.deepcopy(self._properties)
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: Dict[str, Any]) -> "StandardSqlStructType":
|
||||
"""Construct an SQL struct type instance given its API representation."""
|
||||
fields = (
|
||||
StandardSqlField.from_api_repr(field_resource)
|
||||
for field_resource in resource.get("fields", [])
|
||||
)
|
||||
return cls(fields=fields)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, StandardSqlStructType):
|
||||
return NotImplemented
|
||||
else:
|
||||
return self.fields == other.fields
|
||||
|
||||
|
||||
class StandardSqlTableType:
|
||||
"""A table type.
|
||||
|
||||
See:
|
||||
https://cloud.google.com/workflows/docs/reference/googleapis/bigquery/v2/Overview#StandardSqlTableType
|
||||
|
||||
Args:
|
||||
columns: The columns in this table type.
|
||||
"""
|
||||
|
||||
def __init__(self, columns: Iterable[StandardSqlField]):
|
||||
self._properties = {"columns": [col.to_api_repr() for col in columns]}
|
||||
|
||||
@property
|
||||
def columns(self) -> List[StandardSqlField]:
|
||||
"""The columns in this table type."""
|
||||
result = []
|
||||
|
||||
for column_resource in self._properties.get("columns", []):
|
||||
column = StandardSqlField()
|
||||
column._properties = column_resource # We do not use a copy on purpose.
|
||||
result.append(column)
|
||||
|
||||
return result
|
||||
|
||||
@columns.setter
|
||||
def columns(self, value: Iterable[StandardSqlField]):
|
||||
self._properties["columns"] = [col.to_api_repr() for col in value]
|
||||
|
||||
def to_api_repr(self) -> Dict[str, Any]:
|
||||
"""Construct the API resource representation of this SQL table type."""
|
||||
return copy.deepcopy(self._properties)
|
||||
|
||||
@classmethod
|
||||
def from_api_repr(cls, resource: Dict[str, Any]) -> "StandardSqlTableType":
|
||||
"""Construct an SQL table type instance given its API representation."""
|
||||
columns = []
|
||||
|
||||
for column_resource in resource.get("columns", []):
|
||||
type_ = column_resource.get("type")
|
||||
if type_ is None:
|
||||
type_ = {}
|
||||
|
||||
column = StandardSqlField(
|
||||
name=column_resource.get("name"),
|
||||
type=StandardSqlDataType.from_api_repr(type_),
|
||||
)
|
||||
columns.append(column)
|
||||
|
||||
return cls(columns=columns)
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, StandardSqlTableType):
|
||||
return NotImplemented
|
||||
else:
|
||||
return self.columns == other.columns
|
||||
3594
.venv/lib/python3.10/site-packages/google/cloud/bigquery/table.py
Normal file
3594
.venv/lib/python3.10/site-packages/google/cloud/bigquery/table.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,15 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
__version__ = "3.31.0"
|
||||
Reference in New Issue
Block a user