structure saas with tools
This commit is contained in:
@@ -0,0 +1,776 @@
|
||||
# Copyright 2018 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""IPython Magics
|
||||
|
||||
Install ``bigquery-magics`` and call ``%load_ext bigquery_magics`` to use the
|
||||
``%%bigquery`` cell magic.
|
||||
|
||||
See the `BigQuery Magics reference documentation
|
||||
<https://googleapis.dev/python/bigquery-magics/latest/>`_.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import re
|
||||
import ast
|
||||
import copy
|
||||
import functools
|
||||
import sys
|
||||
import time
|
||||
import warnings
|
||||
from concurrent import futures
|
||||
|
||||
try:
|
||||
import IPython # type: ignore
|
||||
from IPython import display # type: ignore
|
||||
from IPython.core import magic_arguments # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError("This module can only be loaded in IPython.")
|
||||
|
||||
from google.api_core import client_info
|
||||
from google.api_core import client_options
|
||||
from google.api_core.exceptions import NotFound
|
||||
import google.auth # type: ignore
|
||||
from google.cloud import bigquery
|
||||
import google.cloud.bigquery.dataset
|
||||
from google.cloud.bigquery import _versions_helpers
|
||||
from google.cloud.bigquery import exceptions
|
||||
from google.cloud.bigquery.dbapi import _helpers
|
||||
from google.cloud.bigquery.magics import line_arg_parser as lap
|
||||
|
||||
try:
|
||||
import bigquery_magics # type: ignore
|
||||
except ImportError:
|
||||
bigquery_magics = None
|
||||
|
||||
IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore
|
||||
|
||||
|
||||
class Context(object):
|
||||
"""Storage for objects to be used throughout an IPython notebook session.
|
||||
|
||||
A Context object is initialized when the ``magics`` module is imported,
|
||||
and can be found at ``google.cloud.bigquery.magics.context``.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._credentials = None
|
||||
self._project = None
|
||||
self._connection = None
|
||||
self._default_query_job_config = bigquery.QueryJobConfig()
|
||||
self._bigquery_client_options = client_options.ClientOptions()
|
||||
self._bqstorage_client_options = client_options.ClientOptions()
|
||||
self._progress_bar_type = "tqdm_notebook"
|
||||
|
||||
@property
|
||||
def credentials(self):
|
||||
"""google.auth.credentials.Credentials: Credentials to use for queries
|
||||
performed through IPython magics.
|
||||
|
||||
Note:
|
||||
These credentials do not need to be explicitly defined if you are
|
||||
using Application Default Credentials. If you are not using
|
||||
Application Default Credentials, manually construct a
|
||||
:class:`google.auth.credentials.Credentials` object and set it as
|
||||
the context credentials as demonstrated in the example below. See
|
||||
`auth docs`_ for more information on obtaining credentials.
|
||||
|
||||
Example:
|
||||
Manually setting the context credentials:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> from google.oauth2 import service_account
|
||||
>>> credentials = (service_account
|
||||
... .Credentials.from_service_account_file(
|
||||
... '/path/to/key.json'))
|
||||
>>> magics.context.credentials = credentials
|
||||
|
||||
|
||||
.. _auth docs: http://google-auth.readthedocs.io
|
||||
/en/latest/user-guide.html#obtaining-credentials
|
||||
"""
|
||||
if self._credentials is None:
|
||||
self._credentials, _ = google.auth.default()
|
||||
return self._credentials
|
||||
|
||||
@credentials.setter
|
||||
def credentials(self, value):
|
||||
self._credentials = value
|
||||
|
||||
@property
|
||||
def project(self):
|
||||
"""str: Default project to use for queries performed through IPython
|
||||
magics.
|
||||
|
||||
Note:
|
||||
The project does not need to be explicitly defined if you have an
|
||||
environment default project set. If you do not have a default
|
||||
project set in your environment, manually assign the project as
|
||||
demonstrated in the example below.
|
||||
|
||||
Example:
|
||||
Manually setting the context project:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> magics.context.project = 'my-project'
|
||||
"""
|
||||
if self._project is None:
|
||||
_, self._project = google.auth.default()
|
||||
return self._project
|
||||
|
||||
@project.setter
|
||||
def project(self, value):
|
||||
self._project = value
|
||||
|
||||
@property
|
||||
def bigquery_client_options(self):
|
||||
"""google.api_core.client_options.ClientOptions: client options to be
|
||||
used through IPython magics.
|
||||
|
||||
Note::
|
||||
The client options do not need to be explicitly defined if no
|
||||
special network connections are required. Normally you would be
|
||||
using the https://bigquery.googleapis.com/ end point.
|
||||
|
||||
Example:
|
||||
Manually setting the endpoint:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> client_options = {}
|
||||
>>> client_options['api_endpoint'] = "https://some.special.url"
|
||||
>>> magics.context.bigquery_client_options = client_options
|
||||
"""
|
||||
return self._bigquery_client_options
|
||||
|
||||
@bigquery_client_options.setter
|
||||
def bigquery_client_options(self, value):
|
||||
self._bigquery_client_options = value
|
||||
|
||||
@property
|
||||
def bqstorage_client_options(self):
|
||||
"""google.api_core.client_options.ClientOptions: client options to be
|
||||
used through IPython magics for the storage client.
|
||||
|
||||
Note::
|
||||
The client options do not need to be explicitly defined if no
|
||||
special network connections are required. Normally you would be
|
||||
using the https://bigquerystorage.googleapis.com/ end point.
|
||||
|
||||
Example:
|
||||
Manually setting the endpoint:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> client_options = {}
|
||||
>>> client_options['api_endpoint'] = "https://some.special.url"
|
||||
>>> magics.context.bqstorage_client_options = client_options
|
||||
"""
|
||||
return self._bqstorage_client_options
|
||||
|
||||
@bqstorage_client_options.setter
|
||||
def bqstorage_client_options(self, value):
|
||||
self._bqstorage_client_options = value
|
||||
|
||||
@property
|
||||
def default_query_job_config(self):
|
||||
"""google.cloud.bigquery.job.QueryJobConfig: Default job
|
||||
configuration for queries.
|
||||
|
||||
The context's :class:`~google.cloud.bigquery.job.QueryJobConfig` is
|
||||
used for queries. Some properties can be overridden with arguments to
|
||||
the magics.
|
||||
|
||||
Example:
|
||||
Manually setting the default value for ``maximum_bytes_billed``
|
||||
to 100 MB:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> magics.context.default_query_job_config.maximum_bytes_billed = 100000000
|
||||
"""
|
||||
return self._default_query_job_config
|
||||
|
||||
@default_query_job_config.setter
|
||||
def default_query_job_config(self, value):
|
||||
self._default_query_job_config = value
|
||||
|
||||
@property
|
||||
def progress_bar_type(self):
|
||||
"""str: Default progress bar type to use to display progress bar while
|
||||
executing queries through IPython magics.
|
||||
|
||||
Note::
|
||||
Install the ``tqdm`` package to use this feature.
|
||||
|
||||
Example:
|
||||
Manually setting the progress_bar_type:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> magics.context.progress_bar_type = "tqdm_notebook"
|
||||
"""
|
||||
return self._progress_bar_type
|
||||
|
||||
@progress_bar_type.setter
|
||||
def progress_bar_type(self, value):
|
||||
self._progress_bar_type = value
|
||||
|
||||
|
||||
# If bigquery_magics is available, we load that extension rather than this one.
|
||||
# Ensure google.cloud.bigquery.magics.context setters are on the correct magics
|
||||
# implementation in case the user has installed the package but hasn't updated
|
||||
# their code.
|
||||
if bigquery_magics is not None:
|
||||
context = bigquery_magics.context
|
||||
else:
|
||||
context = Context()
|
||||
|
||||
|
||||
def _handle_error(error, destination_var=None):
|
||||
"""Process a query execution error.
|
||||
|
||||
Args:
|
||||
error (Exception):
|
||||
An exception that occurred during the query execution.
|
||||
destination_var (Optional[str]):
|
||||
The name of the IPython session variable to store the query job.
|
||||
"""
|
||||
if destination_var:
|
||||
query_job = getattr(error, "query_job", None)
|
||||
|
||||
if query_job is not None:
|
||||
IPython.get_ipython().push({destination_var: query_job})
|
||||
else:
|
||||
# this is the case when previewing table rows by providing just
|
||||
# table ID to cell magic
|
||||
print(
|
||||
"Could not save output to variable '{}'.".format(destination_var),
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
print("\nERROR:\n", str(error), file=sys.stderr)
|
||||
|
||||
|
||||
def _run_query(client, query, job_config=None):
|
||||
"""Runs a query while printing status updates
|
||||
|
||||
Args:
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
Client to bundle configuration needed for API requests.
|
||||
query (str):
|
||||
SQL query to be executed. Defaults to the standard SQL dialect.
|
||||
Use the ``job_config`` parameter to change dialects.
|
||||
job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
|
||||
Extra configuration options for the job.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.job.QueryJob: the query job created
|
||||
|
||||
Example:
|
||||
>>> client = bigquery.Client()
|
||||
>>> _run_query(client, "SELECT 17")
|
||||
Executing query with job ID: bf633912-af2c-4780-b568-5d868058632b
|
||||
Query executing: 1.66s
|
||||
Query complete after 2.07s
|
||||
'bf633912-af2c-4780-b568-5d868058632b'
|
||||
"""
|
||||
start_time = time.perf_counter()
|
||||
query_job = client.query(query, job_config=job_config)
|
||||
|
||||
if job_config and job_config.dry_run:
|
||||
return query_job
|
||||
|
||||
print(f"Executing query with job ID: {query_job.job_id}")
|
||||
|
||||
while True:
|
||||
print(
|
||||
f"\rQuery executing: {time.perf_counter() - start_time:.2f}s".format(),
|
||||
end="",
|
||||
)
|
||||
try:
|
||||
query_job.result(timeout=0.5)
|
||||
break
|
||||
except futures.TimeoutError:
|
||||
continue
|
||||
print(f"\nJob ID {query_job.job_id} successfully executed")
|
||||
return query_job
|
||||
|
||||
|
||||
def _create_dataset_if_necessary(client, dataset_id):
|
||||
"""Create a dataset in the current project if it doesn't exist.
|
||||
|
||||
Args:
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
Client to bundle configuration needed for API requests.
|
||||
dataset_id (str):
|
||||
Dataset id.
|
||||
"""
|
||||
dataset_reference = bigquery.dataset.DatasetReference(client.project, dataset_id)
|
||||
try:
|
||||
dataset = client.get_dataset(dataset_reference)
|
||||
return
|
||||
except NotFound:
|
||||
pass
|
||||
dataset = bigquery.Dataset(dataset_reference)
|
||||
dataset.location = client.location
|
||||
print(f"Creating dataset: {dataset_id}")
|
||||
dataset = client.create_dataset(dataset)
|
||||
|
||||
|
||||
@magic_arguments.magic_arguments()
|
||||
@magic_arguments.argument(
|
||||
"destination_var",
|
||||
nargs="?",
|
||||
help=("If provided, save the output to this variable instead of displaying it."),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--destination_table",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"If provided, save the output of the query to a new BigQuery table. "
|
||||
"Variable should be in a format <dataset_id>.<table_id>. "
|
||||
"If table does not exists, it will be created. "
|
||||
"If table already exists, its data will be overwritten."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--project",
|
||||
type=str,
|
||||
default=None,
|
||||
help=("Project to use for executing this query. Defaults to the context project."),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--max_results",
|
||||
default=None,
|
||||
help=(
|
||||
"Maximum number of rows in dataframe returned from executing the query."
|
||||
"Defaults to returning all rows."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--maximum_bytes_billed",
|
||||
default=None,
|
||||
help=(
|
||||
"maximum_bytes_billed to use for executing this query. Defaults to "
|
||||
"the context default_query_job_config.maximum_bytes_billed."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--dry_run",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Sets query to be a dry run to estimate costs. "
|
||||
"Defaults to executing the query instead of dry run if this argument is not used."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--use_legacy_sql",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Sets query to use Legacy SQL instead of Standard SQL. Defaults to "
|
||||
"Standard SQL if this argument is not used."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--bigquery_api_endpoint",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"The desired API endpoint, e.g., bigquery.googlepis.com. Defaults to this "
|
||||
"option's value in the context bigquery_client_options."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--bqstorage_api_endpoint",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"The desired API endpoint, e.g., bigquerystorage.googlepis.com. Defaults to "
|
||||
"this option's value in the context bqstorage_client_options."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--no_query_cache",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=("Do not use cached query results."),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--use_bqstorage_api",
|
||||
action="store_true",
|
||||
default=None,
|
||||
help=(
|
||||
"[Deprecated] The BigQuery Storage API is already used by default to "
|
||||
"download large query results, and this option has no effect. "
|
||||
"If you want to switch to the classic REST API instead, use the "
|
||||
"--use_rest_api option."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--use_rest_api",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Use the classic REST API instead of the BigQuery Storage API to "
|
||||
"download query results."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"If set, print verbose output, including the query job ID and the "
|
||||
"amount of time for the query to finish. By default, this "
|
||||
"information will be displayed as the query runs, but will be "
|
||||
"cleared after the query is finished."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--params",
|
||||
nargs="+",
|
||||
default=None,
|
||||
help=(
|
||||
"Parameters to format the query string. If present, the --params "
|
||||
"flag should be followed by a string representation of a dictionary "
|
||||
"in the format {'param_name': 'param_value'} (ex. {\"num\": 17}), "
|
||||
"or a reference to a dictionary in the same format. The dictionary "
|
||||
"reference can be made by including a '$' before the variable "
|
||||
"name (ex. $my_dict_var)."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--progress_bar_type",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Sets progress bar type to display a progress bar while executing the query."
|
||||
"Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--location",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Set the location to execute query."
|
||||
"Defaults to location set in query setting in console."
|
||||
),
|
||||
)
|
||||
def _cell_magic(line, query):
|
||||
"""Underlying function for bigquery cell magic
|
||||
|
||||
Note:
|
||||
This function contains the underlying logic for the 'bigquery' cell
|
||||
magic. This function is not meant to be called directly.
|
||||
|
||||
Args:
|
||||
line (str): "%%bigquery" followed by arguments as required
|
||||
query (str): SQL query to run
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: the query results.
|
||||
"""
|
||||
# The built-in parser does not recognize Python structures such as dicts, thus
|
||||
# we extract the "--params" option and inteprpret it separately.
|
||||
try:
|
||||
params_option_value, rest_of_args = _split_args_line(line)
|
||||
except lap.exceptions.QueryParamsParseError as exc:
|
||||
rebranded_error = SyntaxError(
|
||||
"--params is not a correctly formatted JSON string or a JSON "
|
||||
"serializable dictionary"
|
||||
)
|
||||
raise rebranded_error from exc
|
||||
except lap.exceptions.DuplicateQueryParamsError as exc:
|
||||
rebranded_error = ValueError("Duplicate --params option.")
|
||||
raise rebranded_error from exc
|
||||
except lap.exceptions.ParseError as exc:
|
||||
rebranded_error = ValueError(
|
||||
"Unrecognized input, are option values correct? "
|
||||
"Error details: {}".format(exc.args[0])
|
||||
)
|
||||
raise rebranded_error from exc
|
||||
|
||||
args = magic_arguments.parse_argstring(_cell_magic, rest_of_args)
|
||||
|
||||
if args.use_bqstorage_api is not None:
|
||||
warnings.warn(
|
||||
"Deprecated option --use_bqstorage_api, the BigQuery "
|
||||
"Storage API is already used by default.",
|
||||
category=DeprecationWarning,
|
||||
)
|
||||
use_bqstorage_api = not args.use_rest_api
|
||||
location = args.location
|
||||
|
||||
params = []
|
||||
if params_option_value:
|
||||
# A non-existing params variable is not expanded and ends up in the input
|
||||
# in its raw form, e.g. "$query_params".
|
||||
if params_option_value.startswith("$"):
|
||||
msg = 'Parameter expansion failed, undefined variable "{}".'.format(
|
||||
params_option_value[1:]
|
||||
)
|
||||
raise NameError(msg)
|
||||
|
||||
params = _helpers.to_query_parameters(ast.literal_eval(params_option_value), {})
|
||||
|
||||
project = args.project or context.project
|
||||
|
||||
bigquery_client_options = copy.deepcopy(context.bigquery_client_options)
|
||||
if args.bigquery_api_endpoint:
|
||||
if isinstance(bigquery_client_options, dict):
|
||||
bigquery_client_options["api_endpoint"] = args.bigquery_api_endpoint
|
||||
else:
|
||||
bigquery_client_options.api_endpoint = args.bigquery_api_endpoint
|
||||
|
||||
client = bigquery.Client(
|
||||
project=project,
|
||||
credentials=context.credentials,
|
||||
default_query_job_config=context.default_query_job_config,
|
||||
client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
|
||||
client_options=bigquery_client_options,
|
||||
location=location,
|
||||
)
|
||||
if context._connection:
|
||||
client._connection = context._connection
|
||||
|
||||
bqstorage_client_options = copy.deepcopy(context.bqstorage_client_options)
|
||||
if args.bqstorage_api_endpoint:
|
||||
if isinstance(bqstorage_client_options, dict):
|
||||
bqstorage_client_options["api_endpoint"] = args.bqstorage_api_endpoint
|
||||
else:
|
||||
bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint
|
||||
|
||||
bqstorage_client = _make_bqstorage_client(
|
||||
client,
|
||||
use_bqstorage_api,
|
||||
bqstorage_client_options,
|
||||
)
|
||||
|
||||
close_transports = functools.partial(_close_transports, client, bqstorage_client)
|
||||
|
||||
try:
|
||||
if args.max_results:
|
||||
max_results = int(args.max_results)
|
||||
else:
|
||||
max_results = None
|
||||
|
||||
query = query.strip()
|
||||
|
||||
if not query:
|
||||
error = ValueError("Query is missing.")
|
||||
_handle_error(error, args.destination_var)
|
||||
return
|
||||
|
||||
# Check if query is given as a reference to a variable.
|
||||
if query.startswith("$"):
|
||||
query_var_name = query[1:]
|
||||
|
||||
if not query_var_name:
|
||||
missing_msg = 'Missing query variable name, empty "$" is not allowed.'
|
||||
raise NameError(missing_msg)
|
||||
|
||||
if query_var_name.isidentifier():
|
||||
ip = IPython.get_ipython()
|
||||
query = ip.user_ns.get(query_var_name, ip) # ip serves as a sentinel
|
||||
|
||||
if query is ip:
|
||||
raise NameError(
|
||||
f"Unknown query, variable {query_var_name} does not exist."
|
||||
)
|
||||
else:
|
||||
if not isinstance(query, (str, bytes)):
|
||||
raise TypeError(
|
||||
f"Query variable {query_var_name} must be a string "
|
||||
"or a bytes-like value."
|
||||
)
|
||||
|
||||
# Any query that does not contain whitespace (aside from leading and trailing whitespace)
|
||||
# is assumed to be a table id
|
||||
if not re.search(r"\s", query):
|
||||
try:
|
||||
rows = client.list_rows(query, max_results=max_results)
|
||||
except Exception as ex:
|
||||
_handle_error(ex, args.destination_var)
|
||||
return
|
||||
|
||||
result = rows.to_dataframe(
|
||||
bqstorage_client=bqstorage_client,
|
||||
create_bqstorage_client=False,
|
||||
)
|
||||
if args.destination_var:
|
||||
IPython.get_ipython().push({args.destination_var: result})
|
||||
return
|
||||
else:
|
||||
return result
|
||||
|
||||
job_config = bigquery.job.QueryJobConfig()
|
||||
job_config.query_parameters = params
|
||||
job_config.use_legacy_sql = args.use_legacy_sql
|
||||
job_config.dry_run = args.dry_run
|
||||
|
||||
# Don't override context job config unless --no_query_cache is explicitly set.
|
||||
if args.no_query_cache:
|
||||
job_config.use_query_cache = False
|
||||
|
||||
if args.destination_table:
|
||||
split = args.destination_table.split(".")
|
||||
if len(split) != 2:
|
||||
raise ValueError(
|
||||
"--destination_table should be in a <dataset_id>.<table_id> format."
|
||||
)
|
||||
dataset_id, table_id = split
|
||||
job_config.allow_large_results = True
|
||||
dataset_ref = bigquery.dataset.DatasetReference(client.project, dataset_id)
|
||||
destination_table_ref = dataset_ref.table(table_id)
|
||||
job_config.destination = destination_table_ref
|
||||
job_config.create_disposition = "CREATE_IF_NEEDED"
|
||||
job_config.write_disposition = "WRITE_TRUNCATE"
|
||||
_create_dataset_if_necessary(client, dataset_id)
|
||||
|
||||
if args.maximum_bytes_billed == "None":
|
||||
job_config.maximum_bytes_billed = 0
|
||||
elif args.maximum_bytes_billed is not None:
|
||||
value = int(args.maximum_bytes_billed)
|
||||
job_config.maximum_bytes_billed = value
|
||||
|
||||
try:
|
||||
query_job = _run_query(client, query, job_config=job_config)
|
||||
except Exception as ex:
|
||||
_handle_error(ex, args.destination_var)
|
||||
return
|
||||
|
||||
if not args.verbose:
|
||||
display.clear_output()
|
||||
|
||||
if args.dry_run and args.destination_var:
|
||||
IPython.get_ipython().push({args.destination_var: query_job})
|
||||
return
|
||||
elif args.dry_run:
|
||||
print(
|
||||
"Query validated. This query will process {} bytes.".format(
|
||||
query_job.total_bytes_processed
|
||||
)
|
||||
)
|
||||
return query_job
|
||||
|
||||
progress_bar = context.progress_bar_type or args.progress_bar_type
|
||||
|
||||
if max_results:
|
||||
result = query_job.result(max_results=max_results).to_dataframe(
|
||||
bqstorage_client=None,
|
||||
create_bqstorage_client=False,
|
||||
progress_bar_type=progress_bar,
|
||||
)
|
||||
else:
|
||||
result = query_job.to_dataframe(
|
||||
bqstorage_client=bqstorage_client,
|
||||
create_bqstorage_client=False,
|
||||
progress_bar_type=progress_bar,
|
||||
)
|
||||
|
||||
if args.destination_var:
|
||||
IPython.get_ipython().push({args.destination_var: result})
|
||||
else:
|
||||
return result
|
||||
finally:
|
||||
close_transports()
|
||||
|
||||
|
||||
def _split_args_line(line):
|
||||
"""Split out the --params option value from the input line arguments.
|
||||
|
||||
Args:
|
||||
line (str): The line arguments passed to the cell magic.
|
||||
|
||||
Returns:
|
||||
Tuple[str, str]
|
||||
"""
|
||||
lexer = lap.Lexer(line)
|
||||
scanner = lap.Parser(lexer)
|
||||
tree = scanner.input_line()
|
||||
|
||||
extractor = lap.QueryParamsExtractor()
|
||||
params_option_value, rest_of_args = extractor.visit(tree)
|
||||
|
||||
return params_option_value, rest_of_args
|
||||
|
||||
|
||||
def _make_bqstorage_client(client, use_bqstorage_api, client_options):
|
||||
"""Creates a BigQuery Storage client.
|
||||
|
||||
Args:
|
||||
client (:class:`~google.cloud.bigquery.client.Client`): BigQuery client.
|
||||
use_bqstorage_api (bool): whether BigQuery Storage API is used or not.
|
||||
client_options (:class:`google.api_core.client_options.ClientOptions`):
|
||||
Custom options used with a new BigQuery Storage client instance
|
||||
if one is created.
|
||||
|
||||
Raises:
|
||||
ImportError: if google-cloud-bigquery-storage is not installed, or
|
||||
grpcio package is not installed.
|
||||
|
||||
|
||||
Returns:
|
||||
None: if ``use_bqstorage_api == False``, or google-cloud-bigquery-storage
|
||||
is outdated.
|
||||
BigQuery Storage Client:
|
||||
"""
|
||||
if not use_bqstorage_api:
|
||||
return None
|
||||
|
||||
try:
|
||||
_versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True)
|
||||
except exceptions.BigQueryStorageNotFoundError as err:
|
||||
customized_error = ImportError(
|
||||
"The default BigQuery Storage API client cannot be used, install "
|
||||
"the missing google-cloud-bigquery-storage and pyarrow packages "
|
||||
"to use it. Alternatively, use the classic REST API by specifying "
|
||||
"the --use_rest_api magic option."
|
||||
)
|
||||
raise customized_error from err
|
||||
except exceptions.LegacyBigQueryStorageError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from google.api_core.gapic_v1 import client_info as gapic_client_info
|
||||
except ImportError as err:
|
||||
customized_error = ImportError(
|
||||
"Install the grpcio package to use the BigQuery Storage API."
|
||||
)
|
||||
raise customized_error from err
|
||||
|
||||
return client._ensure_bqstorage_client(
|
||||
client_options=client_options,
|
||||
client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
|
||||
)
|
||||
|
||||
|
||||
def _close_transports(client, bqstorage_client):
|
||||
"""Close the given clients' underlying transport channels.
|
||||
|
||||
Closing the transport is needed to release system resources, namely open
|
||||
sockets.
|
||||
|
||||
Args:
|
||||
client (:class:`~google.cloud.bigquery.client.Client`):
|
||||
bqstorage_client
|
||||
(Optional[:class:`~google.cloud.bigquery_storage.BigQueryReadClient`]):
|
||||
A client for the BigQuery Storage API.
|
||||
|
||||
"""
|
||||
client.close()
|
||||
if bqstorage_client is not None:
|
||||
bqstorage_client._transport.grpc_channel.close()
|
||||
Reference in New Issue
Block a user