structure saas with tools

This commit is contained in:
Davidson Gomes
2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions

View File

@@ -0,0 +1,87 @@
# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Google BigQuery implementation of the Database API Specification v2.0.
This module implements the `Python Database API Specification v2.0 (DB-API)`_
for Google BigQuery.
.. _Python Database API Specification v2.0 (DB-API):
https://www.python.org/dev/peps/pep-0249/
"""
from google.cloud.bigquery.dbapi.connection import connect
from google.cloud.bigquery.dbapi.connection import Connection
from google.cloud.bigquery.dbapi.cursor import Cursor
from google.cloud.bigquery.dbapi.exceptions import Warning
from google.cloud.bigquery.dbapi.exceptions import Error
from google.cloud.bigquery.dbapi.exceptions import InterfaceError
from google.cloud.bigquery.dbapi.exceptions import DatabaseError
from google.cloud.bigquery.dbapi.exceptions import DataError
from google.cloud.bigquery.dbapi.exceptions import OperationalError
from google.cloud.bigquery.dbapi.exceptions import IntegrityError
from google.cloud.bigquery.dbapi.exceptions import InternalError
from google.cloud.bigquery.dbapi.exceptions import ProgrammingError
from google.cloud.bigquery.dbapi.exceptions import NotSupportedError
from google.cloud.bigquery.dbapi.types import Binary
from google.cloud.bigquery.dbapi.types import Date
from google.cloud.bigquery.dbapi.types import DateFromTicks
from google.cloud.bigquery.dbapi.types import Time
from google.cloud.bigquery.dbapi.types import TimeFromTicks
from google.cloud.bigquery.dbapi.types import Timestamp
from google.cloud.bigquery.dbapi.types import TimestampFromTicks
from google.cloud.bigquery.dbapi.types import BINARY
from google.cloud.bigquery.dbapi.types import DATETIME
from google.cloud.bigquery.dbapi.types import NUMBER
from google.cloud.bigquery.dbapi.types import ROWID
from google.cloud.bigquery.dbapi.types import STRING
apilevel = "2.0"
# Threads may share the module and connections, but not cursors.
threadsafety = 2
paramstyle = "pyformat"
__all__ = [
"apilevel",
"threadsafety",
"paramstyle",
"connect",
"Connection",
"Cursor",
"Warning",
"Error",
"InterfaceError",
"DatabaseError",
"DataError",
"OperationalError",
"IntegrityError",
"InternalError",
"ProgrammingError",
"NotSupportedError",
"Binary",
"Date",
"DateFromTicks",
"Time",
"TimeFromTicks",
"Timestamp",
"TimestampFromTicks",
"BINARY",
"DATETIME",
"NUMBER",
"ROWID",
"STRING",
]

View File

@@ -0,0 +1,522 @@
# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import abc as collections_abc
import datetime
import decimal
import functools
import numbers
import re
import typing
from google.cloud import bigquery
from google.cloud.bigquery import table, query
from google.cloud.bigquery.dbapi import exceptions
_NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28")
_NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28")
type_parameters_re = re.compile(
r"""
\(
\s*[0-9]+\s*
(,
\s*[0-9]+\s*
)*
\)
""",
re.VERBOSE,
)
def _parameter_type(name, value, query_parameter_type=None, value_doc=""):
if query_parameter_type:
# Strip type parameters
query_parameter_type = type_parameters_re.sub("", query_parameter_type)
try:
parameter_type = getattr(
query.SqlParameterScalarTypes, query_parameter_type.upper()
)._type
except AttributeError:
raise exceptions.ProgrammingError(
f"The given parameter type, {query_parameter_type},"
f" for {name} is not a valid BigQuery scalar type."
)
else:
parameter_type = bigquery_scalar_type(value)
if parameter_type is None:
raise exceptions.ProgrammingError(
f"Encountered parameter {name} with "
f"{value_doc} value {value} of unexpected type."
)
return parameter_type
def scalar_to_query_parameter(value, name=None, query_parameter_type=None):
"""Convert a scalar value into a query parameter.
Args:
value (Any):
A scalar value to convert into a query parameter.
name (str):
(Optional) Name of the query parameter.
query_parameter_type (Optional[str]): Given type for the parameter.
Returns:
google.cloud.bigquery.ScalarQueryParameter:
A query parameter corresponding with the type and value of the plain
Python object.
Raises:
google.cloud.bigquery.dbapi.exceptions.ProgrammingError:
if the type cannot be determined.
"""
return bigquery.ScalarQueryParameter(
name, _parameter_type(name, value, query_parameter_type), value
)
def array_to_query_parameter(value, name=None, query_parameter_type=None):
"""Convert an array-like value into a query parameter.
Args:
value (Sequence[Any]): The elements of the array (should not be a
string-like Sequence).
name (Optional[str]): Name of the query parameter.
query_parameter_type (Optional[str]): Given type for the parameter.
Returns:
A query parameter corresponding with the type and value of the plain
Python object.
Raises:
google.cloud.bigquery.dbapi.exceptions.ProgrammingError:
if the type of array elements cannot be determined.
"""
if not array_like(value):
raise exceptions.ProgrammingError(
"The value of parameter {} must be a sequence that is "
"not string-like.".format(name)
)
if query_parameter_type or value:
array_type = _parameter_type(
name,
value[0] if value else None,
query_parameter_type,
value_doc="array element ",
)
else:
raise exceptions.ProgrammingError(
"Encountered an empty array-like value of parameter {}, cannot "
"determine array elements type.".format(name)
)
return bigquery.ArrayQueryParameter(name, array_type, value)
def _parse_struct_fields(
fields,
base,
parse_struct_field=re.compile(
r"""
(?:(\w+)\s+) # field name
([A-Z0-9<> ,()]+) # Field type
$""",
re.VERBOSE | re.IGNORECASE,
).match,
):
# Split a string of struct fields. They're defined by commas, but
# we have to avoid splitting on commas internal to fields. For
# example:
# name string, children array<struct<name string, bdate date>>
#
# only has 2 top-level fields.
fields = fields.split(",")
fields = list(reversed(fields)) # in the off chance that there are very many
while fields:
field = fields.pop()
while fields and field.count("<") != field.count(">"):
field += "," + fields.pop()
m = parse_struct_field(field.strip())
if not m:
raise exceptions.ProgrammingError(
f"Invalid struct field, {field}, in {base}"
)
yield m.group(1, 2)
SCALAR, ARRAY, STRUCT = ("s", "a", "r")
def _parse_type(
type_,
name,
base,
complex_query_parameter_parse=re.compile(
r"""
\s*
(ARRAY|STRUCT|RECORD) # Type
\s*
<([A-Z0-9_<> ,()]+)> # Subtype(s)
\s*$
""",
re.IGNORECASE | re.VERBOSE,
).match,
):
if "<" not in type_:
# Scalar
# Strip type parameters
type_ = type_parameters_re.sub("", type_).strip()
try:
type_ = getattr(query.SqlParameterScalarTypes, type_.upper())
except AttributeError:
raise exceptions.ProgrammingError(
f"The given parameter type, {type_},"
f"{' for ' + name if name else ''}"
f" is not a valid BigQuery scalar type, in {base}."
)
if name:
type_ = type_.with_name(name)
return SCALAR, type_
m = complex_query_parameter_parse(type_)
if not m:
raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}")
tname, sub = m.group(1, 2)
if tname.upper() == "ARRAY":
sub_type = complex_query_parameter_type(None, sub, base)
if isinstance(sub_type, query.ArrayQueryParameterType):
raise exceptions.ProgrammingError(f"Array can't contain an array in {base}")
sub_type._complex__src = sub
return ARRAY, sub_type
else:
return STRUCT, _parse_struct_fields(sub, base)
def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str):
"""Construct a parameter type (`StructQueryParameterType`) for a complex type
or a non-complex type that's part of a complex type.
Examples:
array<struct<x float64, y float64>>
struct<name string, children array<struct<name string, bdate date>>>
This is used for computing array types.
"""
type_type, sub_type = _parse_type(type_, name, base)
if type_type == SCALAR:
result_type = sub_type
elif type_type == ARRAY:
result_type = query.ArrayQueryParameterType(sub_type, name=name)
elif type_type == STRUCT:
fields = [
complex_query_parameter_type(field_name, field_type, base)
for field_name, field_type in sub_type
]
result_type = query.StructQueryParameterType(*fields, name=name)
else: # pragma: NO COVER
raise AssertionError("Bad type_type", type_type) # Can't happen :)
return result_type
def complex_query_parameter(
name: typing.Optional[str], value, type_: str, base: typing.Optional[str] = None
):
"""
Construct a query parameter for a complex type (array or struct record)
or for a subtype, which may not be complex
Examples:
array<struct<x float64, y float64>>
struct<name string, children array<struct<name string, bdate date>>>
"""
param: typing.Union[
query.ScalarQueryParameter,
query.ArrayQueryParameter,
query.StructQueryParameter,
]
base = base or type_
type_type, sub_type = _parse_type(type_, name, base)
if type_type == SCALAR:
param = query.ScalarQueryParameter(name, sub_type._type, value)
elif type_type == ARRAY:
if not array_like(value):
raise exceptions.ProgrammingError(
f"Array type with non-array-like value"
f" with type {type(value).__name__}"
)
param = query.ArrayQueryParameter(
name,
sub_type,
(
value
if isinstance(sub_type, query.ScalarQueryParameterType)
else [
complex_query_parameter(None, v, sub_type._complex__src, base)
for v in value
]
),
)
elif type_type == STRUCT:
if not isinstance(value, collections_abc.Mapping):
raise exceptions.ProgrammingError(f"Non-mapping value for type {type_}")
value_keys = set(value)
fields = []
for field_name, field_type in sub_type:
if field_name not in value:
raise exceptions.ProgrammingError(
f"No field value for {field_name} in {type_}"
)
value_keys.remove(field_name)
fields.append(
complex_query_parameter(field_name, value[field_name], field_type, base)
)
if value_keys:
raise exceptions.ProgrammingError(f"Extra data keys for {type_}")
param = query.StructQueryParameter(name, *fields)
else: # pragma: NO COVER
raise AssertionError("Bad type_type", type_type) # Can't happen :)
return param
def _dispatch_parameter(type_, value, name=None):
if type_ is not None and "<" in type_:
param = complex_query_parameter(name, value, type_)
elif isinstance(value, collections_abc.Mapping):
raise NotImplementedError(
f"STRUCT-like parameter values are not supported"
f"{' (parameter ' + name + ')' if name else ''},"
f" unless an explicit type is give in the parameter placeholder"
f" (e.g. '%({name if name else ''}:struct<...>)s')."
)
elif array_like(value):
param = array_to_query_parameter(value, name, type_)
else:
param = scalar_to_query_parameter(value, name, type_)
return param
def to_query_parameters_list(parameters, parameter_types):
"""Converts a sequence of parameter values into query parameters.
Args:
parameters (Sequence[Any]): Sequence of query parameter values.
parameter_types:
A list of parameter types, one for each parameter.
Unknown types are provided as None.
Returns:
List[google.cloud.bigquery.query._AbstractQueryParameter]:
A list of query parameters.
"""
return [
_dispatch_parameter(type_, value)
for value, type_ in zip(parameters, parameter_types)
]
def to_query_parameters_dict(parameters, query_parameter_types):
"""Converts a dictionary of parameter values into query parameters.
Args:
parameters (Mapping[str, Any]): Dictionary of query parameter values.
parameter_types:
A dictionary of parameter types. It needn't have a key for each
parameter.
Returns:
List[google.cloud.bigquery.query._AbstractQueryParameter]:
A list of named query parameters.
"""
return [
_dispatch_parameter(query_parameter_types.get(name), value, name)
for name, value in parameters.items()
]
def to_query_parameters(parameters, parameter_types):
"""Converts DB-API parameter values into query parameters.
Args:
parameters (Union[Mapping[str, Any], Sequence[Any]]):
A dictionary or sequence of query parameter values.
parameter_types (Union[Mapping[str, str], Sequence[str]]):
A dictionary or list of parameter types.
If parameters is a mapping, then this must be a dictionary
of parameter types. It needn't have a key for each
parameter.
If parameters is a sequence, then this must be a list of
parameter types, one for each paramater. Unknown types
are provided as None.
Returns:
List[google.cloud.bigquery.query._AbstractQueryParameter]:
A list of query parameters.
"""
if parameters is None:
return []
if isinstance(parameters, collections_abc.Mapping):
return to_query_parameters_dict(parameters, parameter_types)
else:
return to_query_parameters_list(parameters, parameter_types)
def bigquery_scalar_type(value):
"""Return a BigQuery name of the scalar type that matches the given value.
If the scalar type name could not be determined (e.g. for non-scalar
values), ``None`` is returned.
Args:
value (Any)
Returns:
Optional[str]: The BigQuery scalar type name.
"""
if isinstance(value, bool):
return "BOOL"
elif isinstance(value, numbers.Integral):
return "INT64"
elif isinstance(value, numbers.Real):
return "FLOAT64"
elif isinstance(value, decimal.Decimal):
vtuple = value.as_tuple()
# NUMERIC values have precision of 38 (number of digits) and scale of 9 (number
# of fractional digits), and their max absolute value must be strictly smaller
# than 1.0E+29.
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
if (
len(vtuple.digits) <= 38 # max precision: 38
and vtuple.exponent >= -9 # max scale: 9
and _NUMERIC_SERVER_MIN <= value <= _NUMERIC_SERVER_MAX
):
return "NUMERIC"
else:
return "BIGNUMERIC"
elif isinstance(value, str):
return "STRING"
elif isinstance(value, bytes):
return "BYTES"
elif isinstance(value, datetime.datetime):
return "DATETIME" if value.tzinfo is None else "TIMESTAMP"
elif isinstance(value, datetime.date):
return "DATE"
elif isinstance(value, datetime.time):
return "TIME"
return None
def array_like(value):
"""Determine if the given value is array-like.
Examples of array-like values (as interpreted by this function) are
sequences such as ``list`` and ``tuple``, but not strings and other
iterables such as sets.
Args:
value (Any)
Returns:
bool: ``True`` if the value is considered array-like, ``False`` otherwise.
"""
return isinstance(value, collections_abc.Sequence) and not isinstance(
value, (str, bytes, bytearray)
)
def to_bq_table_rows(rows_iterable):
"""Convert table rows to BigQuery table Row instances.
Args:
rows_iterable (Iterable[Mapping]):
An iterable of row data items to convert to ``Row`` instances.
Returns:
Iterable[google.cloud.bigquery.table.Row]
"""
def to_table_row(row):
# NOTE: We fetch ARROW values, thus we need to convert them to Python
# objects with as_py().
values = tuple(value.as_py() for value in row.values())
keys_to_index = {key: i for i, key in enumerate(row.keys())}
return table.Row(values, keys_to_index)
return (to_table_row(row_data) for row_data in rows_iterable)
def raise_on_closed(
exc_msg, exc_class=exceptions.ProgrammingError, closed_attr_name="_closed"
):
"""Make public instance methods raise an error if the instance is closed."""
def _raise_on_closed(method):
"""Make a non-static method raise an error if its containing instance is closed."""
def with_closed_check(self, *args, **kwargs):
if getattr(self, closed_attr_name):
raise exc_class(exc_msg)
return method(self, *args, **kwargs)
functools.update_wrapper(with_closed_check, method)
return with_closed_check
def decorate_public_methods(klass):
"""Apply ``_raise_on_closed()`` decorator to public instance methods."""
for name in dir(klass):
if name.startswith("_") and name != "__iter__":
continue
member = getattr(klass, name)
if not callable(member):
continue
# We need to check for class/static methods directly in the instance
# __dict__, not via the retrieved attribute (`member`), as the
# latter is already a callable *produced* by one of these descriptors.
if isinstance(klass.__dict__[name], (staticmethod, classmethod)):
continue
member = _raise_on_closed(member)
setattr(klass, name, member)
return klass
return decorate_public_methods

View File

@@ -0,0 +1,128 @@
# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Connection for the Google BigQuery DB-API."""
import weakref
from google.cloud import bigquery
from google.cloud.bigquery.dbapi import cursor
from google.cloud.bigquery.dbapi import _helpers
@_helpers.raise_on_closed("Operating on a closed connection.")
class Connection(object):
"""DB-API Connection to Google BigQuery.
Args:
client (Optional[google.cloud.bigquery.Client]):
A REST API client used to connect to BigQuery. If not passed, a
client is created using default options inferred from the environment.
bqstorage_client(\
Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient] \
):
A client that uses the faster BigQuery Storage API to fetch rows from
BigQuery. If not passed, it is created using the same credentials
as ``client`` (provided that BigQuery Storage dependencies are installed).
prefer_bqstorage_client (Optional[bool]):
Prefer the BigQuery Storage client over the REST client. If Storage
client isn't available, fall back to the REST client. Defaults to
``True``.
"""
def __init__(
self,
client=None,
bqstorage_client=None,
prefer_bqstorage_client=True,
):
if client is None:
client = bigquery.Client()
self._owns_client = True
else:
self._owns_client = False
# A warning is already raised by the BQ Storage client factory factory if
# instantiation fails, or if the given BQ Storage client instance is outdated.
if not prefer_bqstorage_client:
bqstorage_client = None
self._owns_bqstorage_client = False
elif bqstorage_client is None:
bqstorage_client = client._ensure_bqstorage_client()
self._owns_bqstorage_client = bqstorage_client is not None
else:
self._owns_bqstorage_client = False
bqstorage_client = client._ensure_bqstorage_client(bqstorage_client)
self._client = client
self._bqstorage_client = bqstorage_client
self._closed = False
self._cursors_created = weakref.WeakSet()
def close(self):
"""Close the connection and any cursors created from it.
Any BigQuery clients explicitly passed to the constructor are *not*
closed, only those created by the connection instance itself.
"""
self._closed = True
if self._owns_client:
self._client.close()
if self._owns_bqstorage_client:
# There is no close() on the BQ Storage client itself.
self._bqstorage_client._transport.grpc_channel.close()
for cursor_ in self._cursors_created:
if not cursor_._closed:
cursor_.close()
def commit(self):
"""No-op, but for consistency raise an error if connection is closed."""
def cursor(self):
"""Return a new cursor object.
Returns:
google.cloud.bigquery.dbapi.Cursor: A DB-API cursor that uses this connection.
"""
new_cursor = cursor.Cursor(self)
self._cursors_created.add(new_cursor)
return new_cursor
def connect(client=None, bqstorage_client=None, prefer_bqstorage_client=True):
"""Construct a DB-API connection to Google BigQuery.
Args:
client (Optional[google.cloud.bigquery.Client]):
A REST API client used to connect to BigQuery. If not passed, a
client is created using default options inferred from the environment.
bqstorage_client(\
Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient] \
):
A client that uses the faster BigQuery Storage API to fetch rows from
BigQuery. If not passed, it is created using the same credentials
as ``client`` (provided that BigQuery Storage dependencies are installed).
prefer_bqstorage_client (Optional[bool]):
Prefer the BigQuery Storage client over the REST client. If Storage
client isn't available, fall back to the REST client. Defaults to
``True``.
Returns:
google.cloud.bigquery.dbapi.Connection: A new DB-API connection to BigQuery.
"""
return Connection(client, bqstorage_client, prefer_bqstorage_client)

View File

@@ -0,0 +1,586 @@
# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Cursor for the Google BigQuery DB-API."""
from __future__ import annotations
import collections
from collections import abc as collections_abc
import re
from typing import Optional
try:
from google.cloud.bigquery_storage import ArrowSerializationOptions
except ImportError:
_ARROW_COMPRESSION_SUPPORT = False
else:
# Having BQ Storage available implies that pyarrow >=1.0.0 is available, too.
_ARROW_COMPRESSION_SUPPORT = True
from google.cloud.bigquery import job
from google.cloud.bigquery.dbapi import _helpers
from google.cloud.bigquery.dbapi import exceptions
import google.cloud.exceptions # type: ignore
# Per PEP 249: A 7-item sequence containing information describing one result
# column. The first two items (name and type_code) are mandatory, the other
# five are optional and are set to None if no meaningful values can be
# provided.
Column = collections.namedtuple(
"Column",
[
"name",
"type_code",
"display_size",
"internal_size",
"precision",
"scale",
"null_ok",
],
)
@_helpers.raise_on_closed("Operating on a closed cursor.")
class Cursor(object):
"""DB-API Cursor to Google BigQuery.
Args:
connection (google.cloud.bigquery.dbapi.Connection):
A DB-API connection to Google BigQuery.
"""
def __init__(self, connection):
self.connection = connection
self.description = None
# Per PEP 249: The attribute is -1 in case no .execute*() has been
# performed on the cursor or the rowcount of the last operation
# cannot be determined by the interface.
self.rowcount = -1
# Per PEP 249: The arraysize attribute defaults to 1, meaning to fetch
# a single row at a time. However, we deviate from that, and set the
# default to None, allowing the backend to automatically determine the
# most appropriate size.
self.arraysize = None
self._query_data = None
self._query_rows = None
self._closed = False
@property
def query_job(self) -> Optional[job.QueryJob]:
"""google.cloud.bigquery.job.query.QueryJob | None: The query job
created by the last ``execute*()`` call, if a query job was created.
.. note::
If the last ``execute*()`` call was ``executemany()``, this is the
last job created by ``executemany()``."""
rows = self._query_rows
if rows is None:
return None
job_id = rows.job_id
project = rows.project
location = rows.location
client = self.connection._client
if job_id is None:
return None
return client.get_job(job_id, location=location, project=project)
def close(self):
"""Mark the cursor as closed, preventing its further use."""
self._closed = True
def _set_description(self, schema):
"""Set description from schema.
Args:
schema (Sequence[google.cloud.bigquery.schema.SchemaField]):
A description of fields in the schema.
"""
if schema is None:
self.description = None
return
self.description = tuple(
Column(
name=field.name,
type_code=field.field_type,
display_size=None,
internal_size=None,
precision=None,
scale=None,
null_ok=field.is_nullable,
)
for field in schema
)
def _set_rowcount(self, rows):
"""Set the rowcount from a RowIterator.
Normally, this sets rowcount to the number of rows returned by the
query, but if it was a DML statement, it sets rowcount to the number
of modified rows.
Args:
query_results (google.cloud.bigquery.query._QueryResults):
Results of a query.
"""
total_rows = 0
num_dml_affected_rows = rows.num_dml_affected_rows
if rows.total_rows is not None and rows.total_rows > 0:
total_rows = rows.total_rows
if num_dml_affected_rows is not None and num_dml_affected_rows > 0:
total_rows = num_dml_affected_rows
self.rowcount = total_rows
def execute(self, operation, parameters=None, job_id=None, job_config=None):
"""Prepare and execute a database operation.
.. note::
When setting query parameters, values which are "text"
(``unicode`` in Python2, ``str`` in Python3) will use
the 'STRING' BigQuery type. Values which are "bytes" (``str`` in
Python2, ``bytes`` in Python3), will use using the 'BYTES' type.
A `~datetime.datetime` parameter without timezone information uses
the 'DATETIME' BigQuery type (example: Global Pi Day Celebration
March 14, 2017 at 1:59pm). A `~datetime.datetime` parameter with
timezone information uses the 'TIMESTAMP' BigQuery type (example:
a wedding on April 29, 2011 at 11am, British Summer Time).
For more information about BigQuery data types, see:
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
``STRUCT``/``RECORD`` and ``REPEATED`` query parameters are not
yet supported. See:
https://github.com/GoogleCloudPlatform/google-cloud-python/issues/3524
Args:
operation (str): A Google BigQuery query string.
parameters (Union[Mapping[str, Any], Sequence[Any]]):
(Optional) dictionary or sequence of parameter values.
job_id (str | None):
(Optional and discouraged) The job ID to use when creating
the query job. For best performance and reliability, manually
setting a job ID is discouraged.
job_config (google.cloud.bigquery.job.QueryJobConfig):
(Optional) Extra configuration options for the query job.
"""
formatted_operation, parameter_types = _format_operation(operation, parameters)
self._execute(
formatted_operation, parameters, job_id, job_config, parameter_types
)
def _execute(
self, formatted_operation, parameters, job_id, job_config, parameter_types
):
self._query_data = None
self._query_results = None
client = self.connection._client
# The DB-API uses the pyformat formatting, since the way BigQuery does
# query parameters was not one of the standard options. Convert both
# the query and the parameters to the format expected by the client
# libraries.
query_parameters = _helpers.to_query_parameters(parameters, parameter_types)
config = job_config or job.QueryJobConfig()
config.query_parameters = query_parameters
# Start the query and wait for the query to finish.
try:
if job_id is not None:
rows = client.query(
formatted_operation,
job_config=job_config,
job_id=job_id,
).result(
page_size=self.arraysize,
)
else:
rows = client.query_and_wait(
formatted_operation,
job_config=config,
page_size=self.arraysize,
)
except google.cloud.exceptions.GoogleCloudError as exc:
raise exceptions.DatabaseError(exc)
self._query_rows = rows
self._set_description(rows.schema)
if config.dry_run:
self.rowcount = 0
else:
self._set_rowcount(rows)
def executemany(self, operation, seq_of_parameters):
"""Prepare and execute a database operation multiple times.
Args:
operation (str): A Google BigQuery query string.
seq_of_parameters (Union[Sequence[Mapping[str, Any], Sequence[Any]]]):
Sequence of many sets of parameter values.
"""
if seq_of_parameters:
rowcount = 0
# There's no reason to format the line more than once, as
# the operation only barely depends on the parameters. So
# we just use the first set of parameters. If there are
# different numbers or types of parameters, we'll error
# anyway.
formatted_operation, parameter_types = _format_operation(
operation, seq_of_parameters[0]
)
for parameters in seq_of_parameters:
self._execute(
formatted_operation, parameters, None, None, parameter_types
)
rowcount += self.rowcount
self.rowcount = rowcount
def _try_fetch(self, size=None):
"""Try to start fetching data, if not yet started.
Mutates self to indicate that iteration has started.
"""
if self._query_data is not None:
# Already started fetching the data.
return
rows = self._query_rows
if rows is None:
raise exceptions.InterfaceError(
"No query results: execute() must be called before fetch."
)
bqstorage_client = self.connection._bqstorage_client
if rows._should_use_bqstorage(
bqstorage_client,
create_bqstorage_client=False,
):
rows_iterable = self._bqstorage_fetch(bqstorage_client)
self._query_data = _helpers.to_bq_table_rows(rows_iterable)
return
self._query_data = iter(rows)
def _bqstorage_fetch(self, bqstorage_client):
"""Start fetching data with the BigQuery Storage API.
The method assumes that the data about the relevant query job already
exists internally.
Args:
bqstorage_client(\
google.cloud.bigquery_storage_v1.BigQueryReadClient \
):
A client tha know how to talk to the BigQuery Storage API.
Returns:
Iterable[Mapping]:
A sequence of rows, represented as dictionaries.
"""
# Hitting this code path with a BQ Storage client instance implies that
# bigquery_storage can indeed be imported here without errors.
from google.cloud import bigquery_storage
table_reference = self._query_rows._table
requested_session = bigquery_storage.types.ReadSession(
table=table_reference.to_bqstorage(),
data_format=bigquery_storage.types.DataFormat.ARROW,
)
if _ARROW_COMPRESSION_SUPPORT:
requested_session.read_options.arrow_serialization_options.buffer_compression = (
ArrowSerializationOptions.CompressionCodec.LZ4_FRAME
)
read_session = bqstorage_client.create_read_session(
parent="projects/{}".format(table_reference.project),
read_session=requested_session,
# a single stream only, as DB API is not well-suited for multithreading
max_stream_count=1,
)
if not read_session.streams:
return iter([]) # empty table, nothing to read
stream_name = read_session.streams[0].name
read_rows_stream = bqstorage_client.read_rows(stream_name)
rows_iterable = read_rows_stream.rows(read_session)
return rows_iterable
def fetchone(self):
"""Fetch a single row from the results of the last ``execute*()`` call.
.. note::
If a dry run query was executed, no rows are returned.
Returns:
Tuple:
A tuple representing a row or ``None`` if no more data is
available.
Raises:
google.cloud.bigquery.dbapi.InterfaceError: if called before ``execute()``.
"""
self._try_fetch()
try:
return next(self._query_data)
except StopIteration:
return None
def fetchmany(self, size=None):
"""Fetch multiple results from the last ``execute*()`` call.
.. note::
If a dry run query was executed, no rows are returned.
.. note::
The size parameter is not used for the request/response size.
Set the ``arraysize`` attribute before calling ``execute()`` to
set the batch size.
Args:
size (int):
(Optional) Maximum number of rows to return. Defaults to the
``arraysize`` property value. If ``arraysize`` is not set, it
defaults to ``1``.
Returns:
List[Tuple]: A list of rows.
Raises:
google.cloud.bigquery.dbapi.InterfaceError: if called before ``execute()``.
"""
if size is None:
# Since self.arraysize can be None (a deviation from PEP 249),
# use an actual PEP 249 default of 1 in such case (*some* number
# is needed here).
size = self.arraysize if self.arraysize else 1
self._try_fetch(size=size)
rows = []
for row in self._query_data:
rows.append(row)
if len(rows) >= size:
break
return rows
def fetchall(self):
"""Fetch all remaining results from the last ``execute*()`` call.
.. note::
If a dry run query was executed, no rows are returned.
Returns:
List[Tuple]: A list of all the rows in the results.
Raises:
google.cloud.bigquery.dbapi.InterfaceError: if called before ``execute()``.
"""
self._try_fetch()
return list(self._query_data)
def setinputsizes(self, sizes):
"""No-op, but for consistency raise an error if cursor is closed."""
def setoutputsize(self, size, column=None):
"""No-op, but for consistency raise an error if cursor is closed."""
def __iter__(self):
self._try_fetch()
return iter(self._query_data)
def _format_operation_list(operation, parameters):
"""Formats parameters in operation in the way BigQuery expects.
The input operation will be a query like ``SELECT %s`` and the output
will be a query like ``SELECT ?``.
Args:
operation (str): A Google BigQuery query string.
parameters (Sequence[Any]): Sequence of parameter values.
Returns:
str: A formatted query string.
Raises:
google.cloud.bigquery.dbapi.ProgrammingError:
if a parameter used in the operation is not found in the
``parameters`` argument.
"""
formatted_params = ["?" for _ in parameters]
try:
return operation % tuple(formatted_params)
except (TypeError, ValueError) as exc:
raise exceptions.ProgrammingError(exc)
def _format_operation_dict(operation, parameters):
"""Formats parameters in operation in the way BigQuery expects.
The input operation will be a query like ``SELECT %(namedparam)s`` and
the output will be a query like ``SELECT @namedparam``.
Args:
operation (str): A Google BigQuery query string.
parameters (Mapping[str, Any]): Dictionary of parameter values.
Returns:
str: A formatted query string.
Raises:
google.cloud.bigquery.dbapi.ProgrammingError:
if a parameter used in the operation is not found in the
``parameters`` argument.
"""
formatted_params = {}
for name in parameters:
escaped_name = name.replace("`", r"\`")
formatted_params[name] = "@`{}`".format(escaped_name)
try:
return operation % formatted_params
except (KeyError, ValueError, TypeError) as exc:
raise exceptions.ProgrammingError(exc)
def _format_operation(operation, parameters):
"""Formats parameters in operation in way BigQuery expects.
Args:
operation (str): A Google BigQuery query string.
parameters (Union[Mapping[str, Any], Sequence[Any]]):
Optional parameter values.
Returns:
str: A formatted query string.
Raises:
google.cloud.bigquery.dbapi.ProgrammingError:
if a parameter used in the operation is not found in the
``parameters`` argument.
"""
if parameters is None or len(parameters) == 0:
return operation.replace("%%", "%"), None # Still do percent de-escaping.
operation, parameter_types = _extract_types(operation)
if parameter_types is None:
raise exceptions.ProgrammingError(
f"Parameters were provided, but {repr(operation)} has no placeholders."
)
if isinstance(parameters, collections_abc.Mapping):
return _format_operation_dict(operation, parameters), parameter_types
return _format_operation_list(operation, parameters), parameter_types
def _extract_types(
operation,
extra_type_sub=re.compile(
r"""
(%*) # Extra %s. We'll deal with these in the replacement code
% # Beginning of replacement, %s, %(...)s
(?:\( # Begin of optional name and/or type
([^:)]*) # name
(?:: # ':' introduces type
( # start of type group
[a-zA-Z0-9_<>, ]+ # First part, no parens
(?: # start sets of parens + non-paren text
\([0-9 ,]+\) # comma-separated groups of digits in parens
# (e.g. string(10))
(?=[, >)]) # Must be followed by ,>) or space
[a-zA-Z0-9<>, ]* # Optional non-paren chars
)* # Can be zero or more of parens and following text
) # end of type group
)? # close type clause ":type"
\))? # End of optional name and/or type
s # End of replacement
""",
re.VERBOSE,
).sub,
):
"""Remove type information from parameter placeholders.
For every parameter of the form %(name:type)s, replace with %(name)s and add the
item name->type to dict that's returned.
Returns operation without type information and a dictionary of names and types.
"""
parameter_types = None
def repl(m):
nonlocal parameter_types
prefix, name, type_ = m.groups()
if len(prefix) % 2:
# The prefix has an odd number of %s, the last of which
# escapes the % we're looking for, so we don't want to
# change anything.
return m.group(0)
try:
if name:
if not parameter_types:
parameter_types = {}
if type_:
if name in parameter_types:
if type_ != parameter_types[name]:
raise exceptions.ProgrammingError(
f"Conflicting types for {name}: "
f"{parameter_types[name]} and {type_}."
)
else:
parameter_types[name] = type_
else:
if not isinstance(parameter_types, dict):
raise TypeError()
return f"{prefix}%({name})s"
else:
if parameter_types is None:
parameter_types = []
parameter_types.append(type_)
return f"{prefix}%s"
except (AttributeError, TypeError):
raise exceptions.ProgrammingError(
f"{repr(operation)} mixes named and unamed parameters."
)
return extra_type_sub(repl, operation), parameter_types

View File

@@ -0,0 +1,58 @@
# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Exceptions used in the Google BigQuery DB-API."""
class Warning(Exception):
"""Exception raised for important DB-API warnings."""
class Error(Exception):
"""Exception representing all non-warning DB-API errors."""
class InterfaceError(Error):
"""DB-API error related to the database interface."""
class DatabaseError(Error):
"""DB-API error related to the database."""
class DataError(DatabaseError):
"""DB-API error due to problems with the processed data."""
class OperationalError(DatabaseError):
"""DB-API error related to the database operation.
These errors are not necessarily under the control of the programmer.
"""
class IntegrityError(DatabaseError):
"""DB-API error when integrity of the database is affected."""
class InternalError(DatabaseError):
"""DB-API error when the database encounters an internal error."""
class ProgrammingError(DatabaseError):
"""DB-API exception raised for programming errors."""
class NotSupportedError(DatabaseError):
"""DB-API error for operations not supported by the database or API."""

View File

@@ -0,0 +1,96 @@
# Copyright 2017 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Types used in the Google BigQuery DB-API.
See `PEP-249`_ for details.
.. _PEP-249:
https://www.python.org/dev/peps/pep-0249/#type-objects-and-constructors
"""
import datetime
Date = datetime.date
Time = datetime.time
Timestamp = datetime.datetime
DateFromTicks = datetime.date.fromtimestamp
TimestampFromTicks = datetime.datetime.fromtimestamp
def Binary(data):
"""Contruct a DB-API binary value.
Args:
data (bytes-like): An object containing binary data and that
can be converted to bytes with the `bytes` builtin.
Returns:
bytes: The binary data as a bytes object.
"""
if isinstance(data, int):
# This is not the conversion we're looking for, because it
# will simply create a bytes object of the given size.
raise TypeError("cannot convert `int` object to binary")
try:
return bytes(data)
except TypeError:
if isinstance(data, str):
return data.encode("utf-8")
else:
raise
def TimeFromTicks(ticks, tz=None):
"""Construct a DB-API time value from the given ticks value.
Args:
ticks (float):
a number of seconds since the epoch; see the documentation of the
standard Python time module for details.
tz (datetime.tzinfo): (Optional) time zone to use for conversion
Returns:
datetime.time: time represented by ticks.
"""
dt = datetime.datetime.fromtimestamp(ticks, tz=tz)
return dt.timetz()
class _DBAPITypeObject(object):
"""DB-API type object which compares equal to many different strings.
See `PEP-249`_ for details.
.. _PEP-249:
https://www.python.org/dev/peps/pep-0249/#implementation-hints-for-module-authors
"""
def __init__(self, *values):
self.values = values
def __eq__(self, other):
return other in self.values
STRING = "STRING"
BINARY = _DBAPITypeObject("BYTES", "RECORD", "STRUCT")
NUMBER = _DBAPITypeObject(
"INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC", "BOOLEAN", "BOOL"
)
DATETIME = _DBAPITypeObject("TIMESTAMP", "DATE", "TIME", "DATETIME")
ROWID = "ROWID"