777 lines
26 KiB
Python
777 lines
26 KiB
Python
# Copyright 2018 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""IPython Magics
|
|
|
|
Install ``bigquery-magics`` and call ``%load_ext bigquery_magics`` to use the
|
|
``%%bigquery`` cell magic.
|
|
|
|
See the `BigQuery Magics reference documentation
|
|
<https://googleapis.dev/python/bigquery-magics/latest/>`_.
|
|
"""
|
|
|
|
from __future__ import print_function
|
|
|
|
import re
|
|
import ast
|
|
import copy
|
|
import functools
|
|
import sys
|
|
import time
|
|
import warnings
|
|
from concurrent import futures
|
|
|
|
try:
|
|
import IPython # type: ignore
|
|
from IPython import display # type: ignore
|
|
from IPython.core import magic_arguments # type: ignore
|
|
except ImportError:
|
|
raise ImportError("This module can only be loaded in IPython.")
|
|
|
|
from google.api_core import client_info
|
|
from google.api_core import client_options
|
|
from google.api_core.exceptions import NotFound
|
|
import google.auth # type: ignore
|
|
from google.cloud import bigquery
|
|
import google.cloud.bigquery.dataset
|
|
from google.cloud.bigquery import _versions_helpers
|
|
from google.cloud.bigquery import exceptions
|
|
from google.cloud.bigquery.dbapi import _helpers
|
|
from google.cloud.bigquery.magics import line_arg_parser as lap
|
|
|
|
try:
|
|
import bigquery_magics # type: ignore
|
|
except ImportError:
|
|
bigquery_magics = None
|
|
|
|
IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore
|
|
|
|
|
|
class Context(object):
|
|
"""Storage for objects to be used throughout an IPython notebook session.
|
|
|
|
A Context object is initialized when the ``magics`` module is imported,
|
|
and can be found at ``google.cloud.bigquery.magics.context``.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self._credentials = None
|
|
self._project = None
|
|
self._connection = None
|
|
self._default_query_job_config = bigquery.QueryJobConfig()
|
|
self._bigquery_client_options = client_options.ClientOptions()
|
|
self._bqstorage_client_options = client_options.ClientOptions()
|
|
self._progress_bar_type = "tqdm_notebook"
|
|
|
|
@property
|
|
def credentials(self):
|
|
"""google.auth.credentials.Credentials: Credentials to use for queries
|
|
performed through IPython magics.
|
|
|
|
Note:
|
|
These credentials do not need to be explicitly defined if you are
|
|
using Application Default Credentials. If you are not using
|
|
Application Default Credentials, manually construct a
|
|
:class:`google.auth.credentials.Credentials` object and set it as
|
|
the context credentials as demonstrated in the example below. See
|
|
`auth docs`_ for more information on obtaining credentials.
|
|
|
|
Example:
|
|
Manually setting the context credentials:
|
|
|
|
>>> from google.cloud.bigquery import magics
|
|
>>> from google.oauth2 import service_account
|
|
>>> credentials = (service_account
|
|
... .Credentials.from_service_account_file(
|
|
... '/path/to/key.json'))
|
|
>>> magics.context.credentials = credentials
|
|
|
|
|
|
.. _auth docs: http://google-auth.readthedocs.io
|
|
/en/latest/user-guide.html#obtaining-credentials
|
|
"""
|
|
if self._credentials is None:
|
|
self._credentials, _ = google.auth.default()
|
|
return self._credentials
|
|
|
|
@credentials.setter
|
|
def credentials(self, value):
|
|
self._credentials = value
|
|
|
|
@property
|
|
def project(self):
|
|
"""str: Default project to use for queries performed through IPython
|
|
magics.
|
|
|
|
Note:
|
|
The project does not need to be explicitly defined if you have an
|
|
environment default project set. If you do not have a default
|
|
project set in your environment, manually assign the project as
|
|
demonstrated in the example below.
|
|
|
|
Example:
|
|
Manually setting the context project:
|
|
|
|
>>> from google.cloud.bigquery import magics
|
|
>>> magics.context.project = 'my-project'
|
|
"""
|
|
if self._project is None:
|
|
_, self._project = google.auth.default()
|
|
return self._project
|
|
|
|
@project.setter
|
|
def project(self, value):
|
|
self._project = value
|
|
|
|
@property
|
|
def bigquery_client_options(self):
|
|
"""google.api_core.client_options.ClientOptions: client options to be
|
|
used through IPython magics.
|
|
|
|
Note::
|
|
The client options do not need to be explicitly defined if no
|
|
special network connections are required. Normally you would be
|
|
using the https://bigquery.googleapis.com/ end point.
|
|
|
|
Example:
|
|
Manually setting the endpoint:
|
|
|
|
>>> from google.cloud.bigquery import magics
|
|
>>> client_options = {}
|
|
>>> client_options['api_endpoint'] = "https://some.special.url"
|
|
>>> magics.context.bigquery_client_options = client_options
|
|
"""
|
|
return self._bigquery_client_options
|
|
|
|
@bigquery_client_options.setter
|
|
def bigquery_client_options(self, value):
|
|
self._bigquery_client_options = value
|
|
|
|
@property
|
|
def bqstorage_client_options(self):
|
|
"""google.api_core.client_options.ClientOptions: client options to be
|
|
used through IPython magics for the storage client.
|
|
|
|
Note::
|
|
The client options do not need to be explicitly defined if no
|
|
special network connections are required. Normally you would be
|
|
using the https://bigquerystorage.googleapis.com/ end point.
|
|
|
|
Example:
|
|
Manually setting the endpoint:
|
|
|
|
>>> from google.cloud.bigquery import magics
|
|
>>> client_options = {}
|
|
>>> client_options['api_endpoint'] = "https://some.special.url"
|
|
>>> magics.context.bqstorage_client_options = client_options
|
|
"""
|
|
return self._bqstorage_client_options
|
|
|
|
@bqstorage_client_options.setter
|
|
def bqstorage_client_options(self, value):
|
|
self._bqstorage_client_options = value
|
|
|
|
@property
|
|
def default_query_job_config(self):
|
|
"""google.cloud.bigquery.job.QueryJobConfig: Default job
|
|
configuration for queries.
|
|
|
|
The context's :class:`~google.cloud.bigquery.job.QueryJobConfig` is
|
|
used for queries. Some properties can be overridden with arguments to
|
|
the magics.
|
|
|
|
Example:
|
|
Manually setting the default value for ``maximum_bytes_billed``
|
|
to 100 MB:
|
|
|
|
>>> from google.cloud.bigquery import magics
|
|
>>> magics.context.default_query_job_config.maximum_bytes_billed = 100000000
|
|
"""
|
|
return self._default_query_job_config
|
|
|
|
@default_query_job_config.setter
|
|
def default_query_job_config(self, value):
|
|
self._default_query_job_config = value
|
|
|
|
@property
|
|
def progress_bar_type(self):
|
|
"""str: Default progress bar type to use to display progress bar while
|
|
executing queries through IPython magics.
|
|
|
|
Note::
|
|
Install the ``tqdm`` package to use this feature.
|
|
|
|
Example:
|
|
Manually setting the progress_bar_type:
|
|
|
|
>>> from google.cloud.bigquery import magics
|
|
>>> magics.context.progress_bar_type = "tqdm_notebook"
|
|
"""
|
|
return self._progress_bar_type
|
|
|
|
@progress_bar_type.setter
|
|
def progress_bar_type(self, value):
|
|
self._progress_bar_type = value
|
|
|
|
|
|
# If bigquery_magics is available, we load that extension rather than this one.
|
|
# Ensure google.cloud.bigquery.magics.context setters are on the correct magics
|
|
# implementation in case the user has installed the package but hasn't updated
|
|
# their code.
|
|
if bigquery_magics is not None:
|
|
context = bigquery_magics.context
|
|
else:
|
|
context = Context()
|
|
|
|
|
|
def _handle_error(error, destination_var=None):
|
|
"""Process a query execution error.
|
|
|
|
Args:
|
|
error (Exception):
|
|
An exception that occurred during the query execution.
|
|
destination_var (Optional[str]):
|
|
The name of the IPython session variable to store the query job.
|
|
"""
|
|
if destination_var:
|
|
query_job = getattr(error, "query_job", None)
|
|
|
|
if query_job is not None:
|
|
IPython.get_ipython().push({destination_var: query_job})
|
|
else:
|
|
# this is the case when previewing table rows by providing just
|
|
# table ID to cell magic
|
|
print(
|
|
"Could not save output to variable '{}'.".format(destination_var),
|
|
file=sys.stderr,
|
|
)
|
|
|
|
print("\nERROR:\n", str(error), file=sys.stderr)
|
|
|
|
|
|
def _run_query(client, query, job_config=None):
|
|
"""Runs a query while printing status updates
|
|
|
|
Args:
|
|
client (google.cloud.bigquery.client.Client):
|
|
Client to bundle configuration needed for API requests.
|
|
query (str):
|
|
SQL query to be executed. Defaults to the standard SQL dialect.
|
|
Use the ``job_config`` parameter to change dialects.
|
|
job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
|
|
Extra configuration options for the job.
|
|
|
|
Returns:
|
|
google.cloud.bigquery.job.QueryJob: the query job created
|
|
|
|
Example:
|
|
>>> client = bigquery.Client()
|
|
>>> _run_query(client, "SELECT 17")
|
|
Executing query with job ID: bf633912-af2c-4780-b568-5d868058632b
|
|
Query executing: 1.66s
|
|
Query complete after 2.07s
|
|
'bf633912-af2c-4780-b568-5d868058632b'
|
|
"""
|
|
start_time = time.perf_counter()
|
|
query_job = client.query(query, job_config=job_config)
|
|
|
|
if job_config and job_config.dry_run:
|
|
return query_job
|
|
|
|
print(f"Executing query with job ID: {query_job.job_id}")
|
|
|
|
while True:
|
|
print(
|
|
f"\rQuery executing: {time.perf_counter() - start_time:.2f}s".format(),
|
|
end="",
|
|
)
|
|
try:
|
|
query_job.result(timeout=0.5)
|
|
break
|
|
except futures.TimeoutError:
|
|
continue
|
|
print(f"\nJob ID {query_job.job_id} successfully executed")
|
|
return query_job
|
|
|
|
|
|
def _create_dataset_if_necessary(client, dataset_id):
|
|
"""Create a dataset in the current project if it doesn't exist.
|
|
|
|
Args:
|
|
client (google.cloud.bigquery.client.Client):
|
|
Client to bundle configuration needed for API requests.
|
|
dataset_id (str):
|
|
Dataset id.
|
|
"""
|
|
dataset_reference = bigquery.dataset.DatasetReference(client.project, dataset_id)
|
|
try:
|
|
dataset = client.get_dataset(dataset_reference)
|
|
return
|
|
except NotFound:
|
|
pass
|
|
dataset = bigquery.Dataset(dataset_reference)
|
|
dataset.location = client.location
|
|
print(f"Creating dataset: {dataset_id}")
|
|
dataset = client.create_dataset(dataset)
|
|
|
|
|
|
@magic_arguments.magic_arguments()
|
|
@magic_arguments.argument(
|
|
"destination_var",
|
|
nargs="?",
|
|
help=("If provided, save the output to this variable instead of displaying it."),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--destination_table",
|
|
type=str,
|
|
default=None,
|
|
help=(
|
|
"If provided, save the output of the query to a new BigQuery table. "
|
|
"Variable should be in a format <dataset_id>.<table_id>. "
|
|
"If table does not exists, it will be created. "
|
|
"If table already exists, its data will be overwritten."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--project",
|
|
type=str,
|
|
default=None,
|
|
help=("Project to use for executing this query. Defaults to the context project."),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--max_results",
|
|
default=None,
|
|
help=(
|
|
"Maximum number of rows in dataframe returned from executing the query."
|
|
"Defaults to returning all rows."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--maximum_bytes_billed",
|
|
default=None,
|
|
help=(
|
|
"maximum_bytes_billed to use for executing this query. Defaults to "
|
|
"the context default_query_job_config.maximum_bytes_billed."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--dry_run",
|
|
action="store_true",
|
|
default=False,
|
|
help=(
|
|
"Sets query to be a dry run to estimate costs. "
|
|
"Defaults to executing the query instead of dry run if this argument is not used."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--use_legacy_sql",
|
|
action="store_true",
|
|
default=False,
|
|
help=(
|
|
"Sets query to use Legacy SQL instead of Standard SQL. Defaults to "
|
|
"Standard SQL if this argument is not used."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--bigquery_api_endpoint",
|
|
type=str,
|
|
default=None,
|
|
help=(
|
|
"The desired API endpoint, e.g., bigquery.googlepis.com. Defaults to this "
|
|
"option's value in the context bigquery_client_options."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--bqstorage_api_endpoint",
|
|
type=str,
|
|
default=None,
|
|
help=(
|
|
"The desired API endpoint, e.g., bigquerystorage.googlepis.com. Defaults to "
|
|
"this option's value in the context bqstorage_client_options."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--no_query_cache",
|
|
action="store_true",
|
|
default=False,
|
|
help=("Do not use cached query results."),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--use_bqstorage_api",
|
|
action="store_true",
|
|
default=None,
|
|
help=(
|
|
"[Deprecated] The BigQuery Storage API is already used by default to "
|
|
"download large query results, and this option has no effect. "
|
|
"If you want to switch to the classic REST API instead, use the "
|
|
"--use_rest_api option."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--use_rest_api",
|
|
action="store_true",
|
|
default=False,
|
|
help=(
|
|
"Use the classic REST API instead of the BigQuery Storage API to "
|
|
"download query results."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--verbose",
|
|
action="store_true",
|
|
default=False,
|
|
help=(
|
|
"If set, print verbose output, including the query job ID and the "
|
|
"amount of time for the query to finish. By default, this "
|
|
"information will be displayed as the query runs, but will be "
|
|
"cleared after the query is finished."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--params",
|
|
nargs="+",
|
|
default=None,
|
|
help=(
|
|
"Parameters to format the query string. If present, the --params "
|
|
"flag should be followed by a string representation of a dictionary "
|
|
"in the format {'param_name': 'param_value'} (ex. {\"num\": 17}), "
|
|
"or a reference to a dictionary in the same format. The dictionary "
|
|
"reference can be made by including a '$' before the variable "
|
|
"name (ex. $my_dict_var)."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--progress_bar_type",
|
|
type=str,
|
|
default=None,
|
|
help=(
|
|
"Sets progress bar type to display a progress bar while executing the query."
|
|
"Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature."
|
|
),
|
|
)
|
|
@magic_arguments.argument(
|
|
"--location",
|
|
type=str,
|
|
default=None,
|
|
help=(
|
|
"Set the location to execute query."
|
|
"Defaults to location set in query setting in console."
|
|
),
|
|
)
|
|
def _cell_magic(line, query):
|
|
"""Underlying function for bigquery cell magic
|
|
|
|
Note:
|
|
This function contains the underlying logic for the 'bigquery' cell
|
|
magic. This function is not meant to be called directly.
|
|
|
|
Args:
|
|
line (str): "%%bigquery" followed by arguments as required
|
|
query (str): SQL query to run
|
|
|
|
Returns:
|
|
pandas.DataFrame: the query results.
|
|
"""
|
|
# The built-in parser does not recognize Python structures such as dicts, thus
|
|
# we extract the "--params" option and inteprpret it separately.
|
|
try:
|
|
params_option_value, rest_of_args = _split_args_line(line)
|
|
except lap.exceptions.QueryParamsParseError as exc:
|
|
rebranded_error = SyntaxError(
|
|
"--params is not a correctly formatted JSON string or a JSON "
|
|
"serializable dictionary"
|
|
)
|
|
raise rebranded_error from exc
|
|
except lap.exceptions.DuplicateQueryParamsError as exc:
|
|
rebranded_error = ValueError("Duplicate --params option.")
|
|
raise rebranded_error from exc
|
|
except lap.exceptions.ParseError as exc:
|
|
rebranded_error = ValueError(
|
|
"Unrecognized input, are option values correct? "
|
|
"Error details: {}".format(exc.args[0])
|
|
)
|
|
raise rebranded_error from exc
|
|
|
|
args = magic_arguments.parse_argstring(_cell_magic, rest_of_args)
|
|
|
|
if args.use_bqstorage_api is not None:
|
|
warnings.warn(
|
|
"Deprecated option --use_bqstorage_api, the BigQuery "
|
|
"Storage API is already used by default.",
|
|
category=DeprecationWarning,
|
|
)
|
|
use_bqstorage_api = not args.use_rest_api
|
|
location = args.location
|
|
|
|
params = []
|
|
if params_option_value:
|
|
# A non-existing params variable is not expanded and ends up in the input
|
|
# in its raw form, e.g. "$query_params".
|
|
if params_option_value.startswith("$"):
|
|
msg = 'Parameter expansion failed, undefined variable "{}".'.format(
|
|
params_option_value[1:]
|
|
)
|
|
raise NameError(msg)
|
|
|
|
params = _helpers.to_query_parameters(ast.literal_eval(params_option_value), {})
|
|
|
|
project = args.project or context.project
|
|
|
|
bigquery_client_options = copy.deepcopy(context.bigquery_client_options)
|
|
if args.bigquery_api_endpoint:
|
|
if isinstance(bigquery_client_options, dict):
|
|
bigquery_client_options["api_endpoint"] = args.bigquery_api_endpoint
|
|
else:
|
|
bigquery_client_options.api_endpoint = args.bigquery_api_endpoint
|
|
|
|
client = bigquery.Client(
|
|
project=project,
|
|
credentials=context.credentials,
|
|
default_query_job_config=context.default_query_job_config,
|
|
client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
|
|
client_options=bigquery_client_options,
|
|
location=location,
|
|
)
|
|
if context._connection:
|
|
client._connection = context._connection
|
|
|
|
bqstorage_client_options = copy.deepcopy(context.bqstorage_client_options)
|
|
if args.bqstorage_api_endpoint:
|
|
if isinstance(bqstorage_client_options, dict):
|
|
bqstorage_client_options["api_endpoint"] = args.bqstorage_api_endpoint
|
|
else:
|
|
bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint
|
|
|
|
bqstorage_client = _make_bqstorage_client(
|
|
client,
|
|
use_bqstorage_api,
|
|
bqstorage_client_options,
|
|
)
|
|
|
|
close_transports = functools.partial(_close_transports, client, bqstorage_client)
|
|
|
|
try:
|
|
if args.max_results:
|
|
max_results = int(args.max_results)
|
|
else:
|
|
max_results = None
|
|
|
|
query = query.strip()
|
|
|
|
if not query:
|
|
error = ValueError("Query is missing.")
|
|
_handle_error(error, args.destination_var)
|
|
return
|
|
|
|
# Check if query is given as a reference to a variable.
|
|
if query.startswith("$"):
|
|
query_var_name = query[1:]
|
|
|
|
if not query_var_name:
|
|
missing_msg = 'Missing query variable name, empty "$" is not allowed.'
|
|
raise NameError(missing_msg)
|
|
|
|
if query_var_name.isidentifier():
|
|
ip = IPython.get_ipython()
|
|
query = ip.user_ns.get(query_var_name, ip) # ip serves as a sentinel
|
|
|
|
if query is ip:
|
|
raise NameError(
|
|
f"Unknown query, variable {query_var_name} does not exist."
|
|
)
|
|
else:
|
|
if not isinstance(query, (str, bytes)):
|
|
raise TypeError(
|
|
f"Query variable {query_var_name} must be a string "
|
|
"or a bytes-like value."
|
|
)
|
|
|
|
# Any query that does not contain whitespace (aside from leading and trailing whitespace)
|
|
# is assumed to be a table id
|
|
if not re.search(r"\s", query):
|
|
try:
|
|
rows = client.list_rows(query, max_results=max_results)
|
|
except Exception as ex:
|
|
_handle_error(ex, args.destination_var)
|
|
return
|
|
|
|
result = rows.to_dataframe(
|
|
bqstorage_client=bqstorage_client,
|
|
create_bqstorage_client=False,
|
|
)
|
|
if args.destination_var:
|
|
IPython.get_ipython().push({args.destination_var: result})
|
|
return
|
|
else:
|
|
return result
|
|
|
|
job_config = bigquery.job.QueryJobConfig()
|
|
job_config.query_parameters = params
|
|
job_config.use_legacy_sql = args.use_legacy_sql
|
|
job_config.dry_run = args.dry_run
|
|
|
|
# Don't override context job config unless --no_query_cache is explicitly set.
|
|
if args.no_query_cache:
|
|
job_config.use_query_cache = False
|
|
|
|
if args.destination_table:
|
|
split = args.destination_table.split(".")
|
|
if len(split) != 2:
|
|
raise ValueError(
|
|
"--destination_table should be in a <dataset_id>.<table_id> format."
|
|
)
|
|
dataset_id, table_id = split
|
|
job_config.allow_large_results = True
|
|
dataset_ref = bigquery.dataset.DatasetReference(client.project, dataset_id)
|
|
destination_table_ref = dataset_ref.table(table_id)
|
|
job_config.destination = destination_table_ref
|
|
job_config.create_disposition = "CREATE_IF_NEEDED"
|
|
job_config.write_disposition = "WRITE_TRUNCATE"
|
|
_create_dataset_if_necessary(client, dataset_id)
|
|
|
|
if args.maximum_bytes_billed == "None":
|
|
job_config.maximum_bytes_billed = 0
|
|
elif args.maximum_bytes_billed is not None:
|
|
value = int(args.maximum_bytes_billed)
|
|
job_config.maximum_bytes_billed = value
|
|
|
|
try:
|
|
query_job = _run_query(client, query, job_config=job_config)
|
|
except Exception as ex:
|
|
_handle_error(ex, args.destination_var)
|
|
return
|
|
|
|
if not args.verbose:
|
|
display.clear_output()
|
|
|
|
if args.dry_run and args.destination_var:
|
|
IPython.get_ipython().push({args.destination_var: query_job})
|
|
return
|
|
elif args.dry_run:
|
|
print(
|
|
"Query validated. This query will process {} bytes.".format(
|
|
query_job.total_bytes_processed
|
|
)
|
|
)
|
|
return query_job
|
|
|
|
progress_bar = context.progress_bar_type or args.progress_bar_type
|
|
|
|
if max_results:
|
|
result = query_job.result(max_results=max_results).to_dataframe(
|
|
bqstorage_client=None,
|
|
create_bqstorage_client=False,
|
|
progress_bar_type=progress_bar,
|
|
)
|
|
else:
|
|
result = query_job.to_dataframe(
|
|
bqstorage_client=bqstorage_client,
|
|
create_bqstorage_client=False,
|
|
progress_bar_type=progress_bar,
|
|
)
|
|
|
|
if args.destination_var:
|
|
IPython.get_ipython().push({args.destination_var: result})
|
|
else:
|
|
return result
|
|
finally:
|
|
close_transports()
|
|
|
|
|
|
def _split_args_line(line):
|
|
"""Split out the --params option value from the input line arguments.
|
|
|
|
Args:
|
|
line (str): The line arguments passed to the cell magic.
|
|
|
|
Returns:
|
|
Tuple[str, str]
|
|
"""
|
|
lexer = lap.Lexer(line)
|
|
scanner = lap.Parser(lexer)
|
|
tree = scanner.input_line()
|
|
|
|
extractor = lap.QueryParamsExtractor()
|
|
params_option_value, rest_of_args = extractor.visit(tree)
|
|
|
|
return params_option_value, rest_of_args
|
|
|
|
|
|
def _make_bqstorage_client(client, use_bqstorage_api, client_options):
|
|
"""Creates a BigQuery Storage client.
|
|
|
|
Args:
|
|
client (:class:`~google.cloud.bigquery.client.Client`): BigQuery client.
|
|
use_bqstorage_api (bool): whether BigQuery Storage API is used or not.
|
|
client_options (:class:`google.api_core.client_options.ClientOptions`):
|
|
Custom options used with a new BigQuery Storage client instance
|
|
if one is created.
|
|
|
|
Raises:
|
|
ImportError: if google-cloud-bigquery-storage is not installed, or
|
|
grpcio package is not installed.
|
|
|
|
|
|
Returns:
|
|
None: if ``use_bqstorage_api == False``, or google-cloud-bigquery-storage
|
|
is outdated.
|
|
BigQuery Storage Client:
|
|
"""
|
|
if not use_bqstorage_api:
|
|
return None
|
|
|
|
try:
|
|
_versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True)
|
|
except exceptions.BigQueryStorageNotFoundError as err:
|
|
customized_error = ImportError(
|
|
"The default BigQuery Storage API client cannot be used, install "
|
|
"the missing google-cloud-bigquery-storage and pyarrow packages "
|
|
"to use it. Alternatively, use the classic REST API by specifying "
|
|
"the --use_rest_api magic option."
|
|
)
|
|
raise customized_error from err
|
|
except exceptions.LegacyBigQueryStorageError:
|
|
pass
|
|
|
|
try:
|
|
from google.api_core.gapic_v1 import client_info as gapic_client_info
|
|
except ImportError as err:
|
|
customized_error = ImportError(
|
|
"Install the grpcio package to use the BigQuery Storage API."
|
|
)
|
|
raise customized_error from err
|
|
|
|
return client._ensure_bqstorage_client(
|
|
client_options=client_options,
|
|
client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
|
|
)
|
|
|
|
|
|
def _close_transports(client, bqstorage_client):
|
|
"""Close the given clients' underlying transport channels.
|
|
|
|
Closing the transport is needed to release system resources, namely open
|
|
sockets.
|
|
|
|
Args:
|
|
client (:class:`~google.cloud.bigquery.client.Client`):
|
|
bqstorage_client
|
|
(Optional[:class:`~google.cloud.bigquery_storage.BigQueryReadClient`]):
|
|
A client for the BigQuery Storage API.
|
|
|
|
"""
|
|
client.close()
|
|
if bqstorage_client is not None:
|
|
bqstorage_client._transport.grpc_channel.close()
|