structure saas with tools
This commit is contained in:
@@ -0,0 +1,20 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from google.cloud.bigquery.magics.magics import context
|
||||
|
||||
|
||||
# For backwards compatibility we need to make the context available in the path
|
||||
# google.cloud.bigquery.magics.context
|
||||
__all__ = ("context",)
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,34 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from google.cloud.bigquery.magics.line_arg_parser.exceptions import ParseError
|
||||
from google.cloud.bigquery.magics.line_arg_parser.exceptions import (
|
||||
DuplicateQueryParamsError,
|
||||
QueryParamsParseError,
|
||||
)
|
||||
from google.cloud.bigquery.magics.line_arg_parser.lexer import Lexer
|
||||
from google.cloud.bigquery.magics.line_arg_parser.lexer import TokenType
|
||||
from google.cloud.bigquery.magics.line_arg_parser.parser import Parser
|
||||
from google.cloud.bigquery.magics.line_arg_parser.visitors import QueryParamsExtractor
|
||||
|
||||
|
||||
__all__ = (
|
||||
"DuplicateQueryParamsError",
|
||||
"Lexer",
|
||||
"Parser",
|
||||
"ParseError",
|
||||
"QueryParamsExtractor",
|
||||
"QueryParamsParseError",
|
||||
"TokenType",
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,25 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
|
||||
class ParseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class QueryParamsParseError(ParseError):
|
||||
"""Raised when --params option is syntactically incorrect."""
|
||||
|
||||
|
||||
class DuplicateQueryParamsError(ParseError):
|
||||
pass
|
||||
@@ -0,0 +1,200 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from collections import namedtuple
|
||||
from collections import OrderedDict
|
||||
import itertools
|
||||
import re
|
||||
|
||||
import enum
|
||||
|
||||
|
||||
Token = namedtuple("Token", ("type_", "lexeme", "pos"))
|
||||
StateTransition = namedtuple("StateTransition", ("new_state", "total_offset"))
|
||||
|
||||
# Pattern matching is done with regexes, and the order in which the token patterns are
|
||||
# defined is important.
|
||||
#
|
||||
# Suppose we had the following token definitions:
|
||||
# * INT - a token matching integers,
|
||||
# * FLOAT - a token matching floating point numbers,
|
||||
# * DOT - a token matching a single literal dot character, i.e. "."
|
||||
#
|
||||
# The FLOAT token would have to be defined first, since we would want the input "1.23"
|
||||
# to be tokenized as a single FLOAT token, and *not* three tokens (INT, DOT, INT).
|
||||
#
|
||||
# Sometimes, however, different tokens match too similar patterns, and it is not
|
||||
# possible to define them in order that would avoid any ambiguity. One such case are
|
||||
# the OPT_VAL and PY_NUMBER tokens, as both can match an integer literal, say "42".
|
||||
#
|
||||
# In order to avoid the dilemmas, the lexer implements a concept of STATES. States are
|
||||
# used to split token definitions into subgroups, and in each lexer state only a single
|
||||
# subgroup is used for tokenizing the input. Lexer states can therefore be though of as
|
||||
# token namespaces.
|
||||
#
|
||||
# For example, while parsing the value of the "--params" option, we do not want to
|
||||
# "recognize" it as a single OPT_VAL token, but instead want to parse it as a Python
|
||||
# dictionary and verify its syntactial correctness. On the other hand, while parsing
|
||||
# the value of an option other than "--params", we do not really care about its
|
||||
# structure, and thus do not want to use any of the "Python tokens" for pattern matching.
|
||||
#
|
||||
# Token definition order is important, thus an OrderedDict is used. In addition, PEP 468
|
||||
# guarantees us that the order of kwargs is preserved in Python 3.6+.
|
||||
token_types = OrderedDict(
|
||||
state_parse_pos_args=OrderedDict(
|
||||
GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P<GOTO_PARSE_NON_PARAMS_OPTIONS>(?=--))", # double dash - starting the options list
|
||||
DEST_VAR=r"(?P<DEST_VAR>[^\d\W]\w*)", # essentially a Python ID
|
||||
),
|
||||
state_parse_non_params_options=OrderedDict(
|
||||
GOTO_PARSE_PARAMS_OPTION=r"(?P<GOTO_PARSE_PARAMS_OPTION>(?=--params(?:\s|=|--|$)))", # the --params option
|
||||
OPTION_SPEC=r"(?P<OPTION_SPEC>--\w+)",
|
||||
OPTION_EQ=r"(?P<OPTION_EQ>=)",
|
||||
OPT_VAL=r"(?P<OPT_VAL>\S+?(?=\s|--|$))",
|
||||
),
|
||||
state_parse_params_option=OrderedDict(
|
||||
PY_STRING=r"(?P<PY_STRING>(?:{})|(?:{}))".format( # single and double quoted strings
|
||||
r"'(?:[^'\\]|\.)*'", r'"(?:[^"\\]|\.)*"'
|
||||
),
|
||||
PARAMS_OPT_SPEC=r"(?P<PARAMS_OPT_SPEC>--params(?=\s|=|--|$))",
|
||||
PARAMS_OPT_EQ=r"(?P<PARAMS_OPT_EQ>=)",
|
||||
GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P<GOTO_PARSE_NON_PARAMS_OPTIONS>(?=--\w+))", # found another option spec
|
||||
PY_BOOL=r"(?P<PY_BOOL>True|False)",
|
||||
DOLLAR_PY_ID=r"(?P<DOLLAR_PY_ID>\$[^\d\W]\w*)",
|
||||
PY_NUMBER=r"(?P<PY_NUMBER>-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)",
|
||||
SQUOTE=r"(?P<SQUOTE>')",
|
||||
DQUOTE=r'(?P<DQUOTE>")',
|
||||
COLON=r"(?P<COLON>:)",
|
||||
COMMA=r"(?P<COMMA>,)",
|
||||
LCURL=r"(?P<LCURL>\{)",
|
||||
RCURL=r"(?P<RCURL>})",
|
||||
LSQUARE=r"(?P<LSQUARE>\[)",
|
||||
RSQUARE=r"(?P<RSQUARE>])",
|
||||
LPAREN=r"(?P<LPAREN>\()",
|
||||
RPAREN=r"(?P<RPAREN>\))",
|
||||
),
|
||||
common=OrderedDict(
|
||||
WS=r"(?P<WS>\s+)",
|
||||
EOL=r"(?P<EOL>$)",
|
||||
UNKNOWN=r"(?P<UNKNOWN>\S+)", # anything not a whitespace or matched by something else
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class AutoStrEnum(str, enum.Enum):
|
||||
"""Base enum class for for name=value str enums."""
|
||||
|
||||
def _generate_next_value_(name, start, count, last_values):
|
||||
return name
|
||||
|
||||
|
||||
TokenType = AutoStrEnum( # type: ignore # pytype: disable=wrong-arg-types
|
||||
"TokenType",
|
||||
[
|
||||
(name, enum.auto())
|
||||
for name in itertools.chain.from_iterable(token_types.values())
|
||||
if not name.startswith("GOTO_")
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
class LexerState(AutoStrEnum):
|
||||
PARSE_POS_ARGS = enum.auto() # parsing positional arguments
|
||||
PARSE_NON_PARAMS_OPTIONS = enum.auto() # parsing options other than "--params"
|
||||
PARSE_PARAMS_OPTION = enum.auto() # parsing the "--params" option
|
||||
STATE_END = enum.auto()
|
||||
|
||||
|
||||
class Lexer(object):
|
||||
"""Lexical analyzer for tokenizing the cell magic input line."""
|
||||
|
||||
_GRAND_PATTERNS = {
|
||||
LexerState.PARSE_POS_ARGS: re.compile(
|
||||
"|".join(
|
||||
itertools.chain(
|
||||
token_types["state_parse_pos_args"].values(),
|
||||
token_types["common"].values(),
|
||||
)
|
||||
)
|
||||
),
|
||||
LexerState.PARSE_NON_PARAMS_OPTIONS: re.compile(
|
||||
"|".join(
|
||||
itertools.chain(
|
||||
token_types["state_parse_non_params_options"].values(),
|
||||
token_types["common"].values(),
|
||||
)
|
||||
)
|
||||
),
|
||||
LexerState.PARSE_PARAMS_OPTION: re.compile(
|
||||
"|".join(
|
||||
itertools.chain(
|
||||
token_types["state_parse_params_option"].values(),
|
||||
token_types["common"].values(),
|
||||
)
|
||||
)
|
||||
),
|
||||
}
|
||||
|
||||
def __init__(self, input_text):
|
||||
self._text = input_text
|
||||
|
||||
def __iter__(self):
|
||||
# Since re.scanner does not seem to support manipulating inner scanner states,
|
||||
# we need to implement lexer state transitions manually using special
|
||||
# non-capturing lookahead token patterns to signal when a state transition
|
||||
# should be made.
|
||||
# Since we don't have "nested" states, we don't really need a stack and
|
||||
# this simple mechanism is sufficient.
|
||||
state = LexerState.PARSE_POS_ARGS
|
||||
offset = 0 # the number of characters processed so far
|
||||
|
||||
while state != LexerState.STATE_END:
|
||||
token_stream = self._find_state_tokens(state, offset)
|
||||
|
||||
for maybe_token in token_stream: # pragma: NO COVER
|
||||
if isinstance(maybe_token, StateTransition):
|
||||
state = maybe_token.new_state
|
||||
offset = maybe_token.total_offset
|
||||
break
|
||||
|
||||
if maybe_token.type_ != TokenType.WS:
|
||||
yield maybe_token
|
||||
|
||||
if maybe_token.type_ == TokenType.EOL:
|
||||
state = LexerState.STATE_END
|
||||
break
|
||||
|
||||
def _find_state_tokens(self, state, current_offset):
|
||||
"""Scan the input for current state's tokens starting at ``current_offset``.
|
||||
|
||||
Args:
|
||||
state (LexerState): The current lexer state.
|
||||
current_offset (int): The offset in the input text, i.e. the number
|
||||
of characters already scanned so far.
|
||||
|
||||
Yields:
|
||||
The next ``Token`` or ``StateTransition`` instance.
|
||||
"""
|
||||
pattern = self._GRAND_PATTERNS[state]
|
||||
scanner = pattern.finditer(self._text, current_offset)
|
||||
|
||||
for match in scanner: # pragma: NO COVER
|
||||
token_type = match.lastgroup
|
||||
|
||||
if token_type.startswith("GOTO_"):
|
||||
yield StateTransition(
|
||||
new_state=getattr(LexerState, token_type[5:]), # w/o "GOTO_" prefix
|
||||
total_offset=match.start(),
|
||||
)
|
||||
|
||||
yield Token(token_type, match.group(), match.start())
|
||||
@@ -0,0 +1,484 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from google.cloud.bigquery.magics.line_arg_parser import DuplicateQueryParamsError
|
||||
from google.cloud.bigquery.magics.line_arg_parser import ParseError
|
||||
from google.cloud.bigquery.magics.line_arg_parser import QueryParamsParseError
|
||||
from google.cloud.bigquery.magics.line_arg_parser import TokenType
|
||||
|
||||
|
||||
class ParseNode(object):
|
||||
"""A base class for nodes in the input parsed to an abstract syntax tree."""
|
||||
|
||||
|
||||
class InputLine(ParseNode):
|
||||
def __init__(self, destination_var, option_list):
|
||||
self.destination_var = destination_var
|
||||
self.option_list = option_list
|
||||
|
||||
|
||||
class DestinationVar(ParseNode):
|
||||
def __init__(self, token):
|
||||
# token type is DEST_VAR
|
||||
self.token = token
|
||||
self.name = token.lexeme if token is not None else None
|
||||
|
||||
|
||||
class CmdOptionList(ParseNode):
|
||||
def __init__(self, option_nodes):
|
||||
self.options = [node for node in option_nodes] # shallow copy
|
||||
|
||||
|
||||
class CmdOption(ParseNode):
|
||||
def __init__(self, name, value):
|
||||
self.name = name # string
|
||||
self.value = value # CmdOptionValue node
|
||||
|
||||
|
||||
class ParamsOption(CmdOption):
|
||||
def __init__(self, value):
|
||||
super(ParamsOption, self).__init__("params", value)
|
||||
|
||||
|
||||
class CmdOptionValue(ParseNode):
|
||||
def __init__(self, token):
|
||||
# token type is OPT_VAL
|
||||
self.token = token
|
||||
self.value = token.lexeme
|
||||
|
||||
|
||||
class PyVarExpansion(ParseNode):
|
||||
def __init__(self, token):
|
||||
self.token = token
|
||||
self.raw_value = token.lexeme
|
||||
|
||||
|
||||
class PyDict(ParseNode):
|
||||
def __init__(self, dict_items):
|
||||
self.items = [item for item in dict_items] # shallow copy
|
||||
|
||||
|
||||
class PyDictItem(ParseNode):
|
||||
def __init__(self, key, value):
|
||||
self.key = key
|
||||
self.value = value
|
||||
|
||||
|
||||
class PyDictKey(ParseNode):
|
||||
def __init__(self, token):
|
||||
self.token = token
|
||||
self.key_value = token.lexeme
|
||||
|
||||
|
||||
class PyScalarValue(ParseNode):
|
||||
def __init__(self, token, raw_value):
|
||||
self.token = token
|
||||
self.raw_value = raw_value
|
||||
|
||||
|
||||
class PyTuple(ParseNode):
|
||||
def __init__(self, tuple_items):
|
||||
self.items = [item for item in tuple_items] # shallow copy
|
||||
|
||||
|
||||
class PyList(ParseNode):
|
||||
def __init__(self, list_items):
|
||||
self.items = [item for item in list_items] # shallow copy
|
||||
|
||||
|
||||
class Parser(object):
|
||||
"""Parser for the tokenized cell magic input line.
|
||||
|
||||
The parser recognizes a simplified subset of Python grammar, specifically
|
||||
a dictionary representation in typical use cases when the "--params" option
|
||||
is used with the %%bigquery cell magic.
|
||||
|
||||
The grammar (terminal symbols are CAPITALIZED):
|
||||
|
||||
input_line : destination_var option_list
|
||||
destination_var : DEST_VAR | EMPTY
|
||||
option_list : (OPTION_SPEC [OPTION_EQ] option_value)*
|
||||
(params_option | EMPTY)
|
||||
(OPTION_SPEC [OPTION_EQ] option_value)*
|
||||
|
||||
option_value : OPT_VAL | EMPTY
|
||||
|
||||
# DOLLAR_PY_ID can occur if a variable passed to --params does not exist
|
||||
# and is thus not expanded to a dict.
|
||||
params_option : PARAMS_OPT_SPEC [PARAMS_OPT_EQ] \
|
||||
(DOLLAR_PY_ID | PY_STRING | py_dict)
|
||||
|
||||
py_dict : LCURL dict_items RCURL
|
||||
dict_items : dict_item | (dict_item COMMA dict_items)
|
||||
dict_item : (dict_key COLON py_value) | EMPTY
|
||||
|
||||
# dict items are actually @parameter names in the cell body (i.e. the query),
|
||||
# thus restricting them to strings.
|
||||
dict_key : PY_STRING
|
||||
|
||||
py_value : PY_BOOL
|
||||
| PY_NUMBER
|
||||
| PY_STRING
|
||||
| py_tuple
|
||||
| py_list
|
||||
| py_dict
|
||||
|
||||
py_tuple : LPAREN collection_items RPAREN
|
||||
py_list : LSQUARE collection_items RSQUARE
|
||||
collection_items : collection_item | (collection_item COMMA collection_items)
|
||||
collection_item : py_value | EMPTY
|
||||
|
||||
Args:
|
||||
lexer (line_arg_parser.lexer.Lexer):
|
||||
An iterable producing a tokenized cell magic argument line.
|
||||
"""
|
||||
|
||||
def __init__(self, lexer):
|
||||
self._lexer = lexer
|
||||
self._tokens_iter = iter(self._lexer)
|
||||
self.get_next_token()
|
||||
|
||||
def get_next_token(self):
|
||||
"""Obtain the next token from the token stream and store it as current."""
|
||||
token = next(self._tokens_iter)
|
||||
self._current_token = token
|
||||
|
||||
def consume(self, expected_type, exc_type=ParseError):
|
||||
"""Move to the next token in token stream if it matches the expected type.
|
||||
|
||||
Args:
|
||||
expected_type (lexer.TokenType): The expected token type to be consumed.
|
||||
exc_type (Optional[ParseError]): The type of the exception to raise. Should be
|
||||
the ``ParseError`` class or one of its subclasses. Defaults to
|
||||
``ParseError``.
|
||||
|
||||
Raises:
|
||||
ParseError: If the current token does not match the expected type.
|
||||
"""
|
||||
if self._current_token.type_ == expected_type:
|
||||
if expected_type != TokenType.EOL:
|
||||
self.get_next_token()
|
||||
else:
|
||||
if self._current_token.type_ == TokenType.EOL:
|
||||
msg = "Unexpected end of input, expected {}.".format(expected_type)
|
||||
else:
|
||||
msg = "Expected token type {}, but found {} at position {}.".format(
|
||||
expected_type, self._current_token.lexeme, self._current_token.pos
|
||||
)
|
||||
self.error(message=msg, exc_type=exc_type)
|
||||
|
||||
def error(self, message="Syntax error.", exc_type=ParseError):
|
||||
"""Raise an error with the given message.
|
||||
|
||||
Args:
|
||||
expected_type (lexer.TokenType): The expected token type to be consumed.
|
||||
exc_type (Optional[ParseError]): The type of the exception to raise. Should be
|
||||
the ``ParseError`` class or one of its subclasses. Defaults to
|
||||
``ParseError``.
|
||||
|
||||
Raises:
|
||||
ParseError: If the current token does not match the expected type.
|
||||
"""
|
||||
raise exc_type(message)
|
||||
|
||||
def input_line(self):
|
||||
"""The top level method for parsing the cell magic arguments line.
|
||||
|
||||
Implements the following grammar production rule:
|
||||
|
||||
input_line : destination_var option_list
|
||||
"""
|
||||
dest_var = self.destination_var()
|
||||
options = self.option_list()
|
||||
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ != TokenType.EOL:
|
||||
msg = "Unexpected input at position {}: {}".format(token.pos, token.lexeme)
|
||||
self.error(msg)
|
||||
|
||||
return InputLine(dest_var, options)
|
||||
|
||||
def destination_var(self):
|
||||
"""Implementation of the ``destination_var`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
destination_var : DEST_VAR | EMPTY
|
||||
"""
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ == TokenType.DEST_VAR:
|
||||
self.consume(TokenType.DEST_VAR)
|
||||
result = DestinationVar(token)
|
||||
elif token.type_ == TokenType.UNKNOWN:
|
||||
msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme)
|
||||
self.error(msg)
|
||||
else:
|
||||
result = DestinationVar(None)
|
||||
|
||||
return result
|
||||
|
||||
def option_list(self):
|
||||
"""Implementation of the ``option_list`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
option_list : (OPTION_SPEC [OPTION_EQ] option_value)*
|
||||
(params_option | EMPTY)
|
||||
(OPTION_SPEC [OPTION_EQ] option_value)*
|
||||
"""
|
||||
all_options = []
|
||||
|
||||
def parse_nonparams_options():
|
||||
while self._current_token.type_ == TokenType.OPTION_SPEC:
|
||||
token = self._current_token
|
||||
self.consume(TokenType.OPTION_SPEC)
|
||||
|
||||
opt_name = token.lexeme[2:] # cut off the "--" prefix
|
||||
|
||||
# skip the optional "=" character
|
||||
if self._current_token.type_ == TokenType.OPTION_EQ:
|
||||
self.consume(TokenType.OPTION_EQ)
|
||||
|
||||
opt_value = self.option_value()
|
||||
option = CmdOption(opt_name, opt_value)
|
||||
all_options.append(option)
|
||||
|
||||
parse_nonparams_options()
|
||||
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ == TokenType.PARAMS_OPT_SPEC:
|
||||
option = self.params_option()
|
||||
all_options.append(option)
|
||||
|
||||
parse_nonparams_options()
|
||||
|
||||
if self._current_token.type_ == TokenType.PARAMS_OPT_SPEC:
|
||||
self.error(
|
||||
message="Duplicate --params option", exc_type=DuplicateQueryParamsError
|
||||
)
|
||||
|
||||
return CmdOptionList(all_options)
|
||||
|
||||
def option_value(self):
|
||||
"""Implementation of the ``option_value`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
option_value : OPT_VAL | EMPTY
|
||||
"""
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ == TokenType.OPT_VAL:
|
||||
self.consume(TokenType.OPT_VAL)
|
||||
result = CmdOptionValue(token)
|
||||
elif token.type_ == TokenType.UNKNOWN:
|
||||
msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme)
|
||||
self.error(msg)
|
||||
else:
|
||||
result = None
|
||||
|
||||
return result
|
||||
|
||||
def params_option(self):
|
||||
"""Implementation of the ``params_option`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
params_option : PARAMS_OPT_SPEC [PARAMS_OPT_EQ] \
|
||||
(DOLLAR_PY_ID | PY_STRING | py_dict)
|
||||
"""
|
||||
self.consume(TokenType.PARAMS_OPT_SPEC)
|
||||
|
||||
# skip the optional "=" character
|
||||
if self._current_token.type_ == TokenType.PARAMS_OPT_EQ:
|
||||
self.consume(TokenType.PARAMS_OPT_EQ)
|
||||
|
||||
if self._current_token.type_ == TokenType.DOLLAR_PY_ID:
|
||||
token = self._current_token
|
||||
self.consume(TokenType.DOLLAR_PY_ID)
|
||||
opt_value = PyVarExpansion(token)
|
||||
elif self._current_token.type_ == TokenType.PY_STRING:
|
||||
token = self._current_token
|
||||
self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError)
|
||||
opt_value = PyScalarValue(token, token.lexeme)
|
||||
else:
|
||||
opt_value = self.py_dict()
|
||||
|
||||
result = ParamsOption(opt_value)
|
||||
|
||||
return result
|
||||
|
||||
def py_dict(self):
|
||||
"""Implementation of the ``py_dict`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
py_dict : LCURL dict_items RCURL
|
||||
"""
|
||||
self.consume(TokenType.LCURL, exc_type=QueryParamsParseError)
|
||||
dict_items = self.dict_items()
|
||||
self.consume(TokenType.RCURL, exc_type=QueryParamsParseError)
|
||||
|
||||
return PyDict(dict_items)
|
||||
|
||||
def dict_items(self):
|
||||
"""Implementation of the ``dict_items`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
dict_items : dict_item | (dict_item COMMA dict_items)
|
||||
"""
|
||||
result = []
|
||||
|
||||
item = self.dict_item()
|
||||
if item is not None:
|
||||
result.append(item)
|
||||
|
||||
while self._current_token.type_ == TokenType.COMMA:
|
||||
self.consume(TokenType.COMMA, exc_type=QueryParamsParseError)
|
||||
item = self.dict_item()
|
||||
if item is not None:
|
||||
result.append(item)
|
||||
|
||||
return result
|
||||
|
||||
def dict_item(self):
|
||||
"""Implementation of the ``dict_item`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
dict_item : (dict_key COLON py_value) | EMPTY
|
||||
"""
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ == TokenType.PY_STRING:
|
||||
key = self.dict_key()
|
||||
self.consume(TokenType.COLON, exc_type=QueryParamsParseError)
|
||||
value = self.py_value()
|
||||
result = PyDictItem(key, value)
|
||||
elif token.type_ == TokenType.UNKNOWN:
|
||||
msg = "Unknown input at position {}: {}".format(token.pos, token.lexeme)
|
||||
self.error(msg, exc_type=QueryParamsParseError)
|
||||
else:
|
||||
result = None
|
||||
|
||||
return result
|
||||
|
||||
def dict_key(self):
|
||||
"""Implementation of the ``dict_key`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
dict_key : PY_STRING
|
||||
"""
|
||||
token = self._current_token
|
||||
self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError)
|
||||
return PyDictKey(token)
|
||||
|
||||
def py_value(self):
|
||||
"""Implementation of the ``py_value`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
py_value : PY_BOOL | PY_NUMBER | PY_STRING | py_tuple | py_list | py_dict
|
||||
"""
|
||||
token = self._current_token
|
||||
|
||||
if token.type_ == TokenType.PY_BOOL:
|
||||
self.consume(TokenType.PY_BOOL, exc_type=QueryParamsParseError)
|
||||
return PyScalarValue(token, token.lexeme)
|
||||
elif token.type_ == TokenType.PY_NUMBER:
|
||||
self.consume(TokenType.PY_NUMBER, exc_type=QueryParamsParseError)
|
||||
return PyScalarValue(token, token.lexeme)
|
||||
elif token.type_ == TokenType.PY_STRING:
|
||||
self.consume(TokenType.PY_STRING, exc_type=QueryParamsParseError)
|
||||
return PyScalarValue(token, token.lexeme)
|
||||
elif token.type_ == TokenType.LPAREN:
|
||||
tuple_node = self.py_tuple()
|
||||
return tuple_node
|
||||
elif token.type_ == TokenType.LSQUARE:
|
||||
list_node = self.py_list()
|
||||
return list_node
|
||||
elif token.type_ == TokenType.LCURL:
|
||||
dict_node = self.py_dict()
|
||||
return dict_node
|
||||
else:
|
||||
msg = "Unexpected token type {} at position {}.".format(
|
||||
token.type_, token.pos
|
||||
)
|
||||
self.error(msg, exc_type=QueryParamsParseError)
|
||||
|
||||
def py_tuple(self):
|
||||
"""Implementation of the ``py_tuple`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
py_tuple : LPAREN collection_items RPAREN
|
||||
"""
|
||||
self.consume(TokenType.LPAREN, exc_type=QueryParamsParseError)
|
||||
items = self.collection_items()
|
||||
self.consume(TokenType.RPAREN, exc_type=QueryParamsParseError)
|
||||
|
||||
return PyTuple(items)
|
||||
|
||||
def py_list(self):
|
||||
"""Implementation of the ``py_list`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
py_list : LSQUARE collection_items RSQUARE
|
||||
"""
|
||||
self.consume(TokenType.LSQUARE, exc_type=QueryParamsParseError)
|
||||
items = self.collection_items()
|
||||
self.consume(TokenType.RSQUARE, exc_type=QueryParamsParseError)
|
||||
|
||||
return PyList(items)
|
||||
|
||||
def collection_items(self):
|
||||
"""Implementation of the ``collection_items`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
collection_items : collection_item | (collection_item COMMA collection_items)
|
||||
"""
|
||||
result = []
|
||||
|
||||
item = self.collection_item()
|
||||
if item is not None:
|
||||
result.append(item)
|
||||
|
||||
while self._current_token.type_ == TokenType.COMMA:
|
||||
self.consume(TokenType.COMMA, exc_type=QueryParamsParseError)
|
||||
item = self.collection_item()
|
||||
if item is not None:
|
||||
result.append(item)
|
||||
|
||||
return result
|
||||
|
||||
def collection_item(self):
|
||||
"""Implementation of the ``collection_item`` grammar production rule.
|
||||
|
||||
Production:
|
||||
|
||||
collection_item : py_value | EMPTY
|
||||
"""
|
||||
if self._current_token.type_ not in {TokenType.RPAREN, TokenType.RSQUARE}:
|
||||
result = self.py_value()
|
||||
else:
|
||||
result = None # end of list/tuple items
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,159 @@
|
||||
# Copyright 2020 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""This module contains classes that traverse AST and convert it to something else.
|
||||
|
||||
If the parser successfully accepts a valid input (the bigquery cell magic arguments),
|
||||
the result is an Abstract Syntax Tree (AST) that represents the input as a tree
|
||||
with notes containing various useful metadata.
|
||||
|
||||
Node visitors can process such tree and convert it to something else that can
|
||||
be used for further processing, for example:
|
||||
|
||||
* An optimized version of the tree with redundancy removed/simplified (not used here).
|
||||
* The same tree, but with semantic errors checked, because an otherwise syntactically
|
||||
valid input might still contain errors (not used here, semantic errors are detected
|
||||
elsewhere).
|
||||
* A form that can be directly handed to the code that operates on the input. The
|
||||
``QueryParamsExtractor`` class, for instance, splits the input arguments into
|
||||
the "--params <...>" part and everything else.
|
||||
The "everything else" part can be then parsed by the default Jupyter argument parser,
|
||||
while the --params option is processed separately by the Python evaluator.
|
||||
|
||||
More info on the visitor design pattern:
|
||||
https://en.wikipedia.org/wiki/Visitor_pattern
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
class NodeVisitor(object):
|
||||
"""Base visitor class implementing the dispatch machinery."""
|
||||
|
||||
def visit(self, node):
|
||||
method_name = "visit_{}".format(type(node).__name__)
|
||||
visitor_method = getattr(self, method_name, self.method_missing)
|
||||
return visitor_method(node)
|
||||
|
||||
def method_missing(self, node):
|
||||
raise Exception("No visit_{} method".format(type(node).__name__))
|
||||
|
||||
|
||||
class QueryParamsExtractor(NodeVisitor):
|
||||
"""A visitor that extracts the "--params <...>" part from input line arguments."""
|
||||
|
||||
def visit_InputLine(self, node):
|
||||
params_dict_parts = []
|
||||
other_parts = []
|
||||
|
||||
dest_var_parts = self.visit(node.destination_var)
|
||||
params, other_options = self.visit(node.option_list)
|
||||
|
||||
if dest_var_parts:
|
||||
other_parts.extend(dest_var_parts)
|
||||
|
||||
if dest_var_parts and other_options:
|
||||
other_parts.append(" ")
|
||||
other_parts.extend(other_options)
|
||||
|
||||
params_dict_parts.extend(params)
|
||||
|
||||
return "".join(params_dict_parts), "".join(other_parts)
|
||||
|
||||
def visit_DestinationVar(self, node):
|
||||
return [node.name] if node.name is not None else []
|
||||
|
||||
def visit_CmdOptionList(self, node):
|
||||
params_opt_parts = []
|
||||
other_parts = []
|
||||
|
||||
for i, opt in enumerate(node.options):
|
||||
option_parts = self.visit(opt)
|
||||
list_to_extend = params_opt_parts if opt.name == "params" else other_parts
|
||||
|
||||
if list_to_extend:
|
||||
list_to_extend.append(" ")
|
||||
list_to_extend.extend(option_parts)
|
||||
|
||||
return params_opt_parts, other_parts
|
||||
|
||||
def visit_CmdOption(self, node):
|
||||
result = ["--{}".format(node.name)]
|
||||
|
||||
if node.value is not None:
|
||||
result.append(" ")
|
||||
value_parts = self.visit(node.value)
|
||||
result.extend(value_parts)
|
||||
|
||||
return result
|
||||
|
||||
def visit_CmdOptionValue(self, node):
|
||||
return [node.value]
|
||||
|
||||
def visit_ParamsOption(self, node):
|
||||
value_parts = self.visit(node.value)
|
||||
return value_parts
|
||||
|
||||
def visit_PyVarExpansion(self, node):
|
||||
return [node.raw_value]
|
||||
|
||||
def visit_PyDict(self, node):
|
||||
result = ["{"]
|
||||
|
||||
for i, item in enumerate(node.items):
|
||||
if i > 0:
|
||||
result.append(", ")
|
||||
item_parts = self.visit(item)
|
||||
result.extend(item_parts)
|
||||
|
||||
result.append("}")
|
||||
return result
|
||||
|
||||
def visit_PyDictItem(self, node):
|
||||
result = self.visit(node.key) # key parts
|
||||
result.append(": ")
|
||||
value_parts = self.visit(node.value)
|
||||
result.extend(value_parts)
|
||||
return result
|
||||
|
||||
def visit_PyDictKey(self, node):
|
||||
return [node.key_value]
|
||||
|
||||
def visit_PyScalarValue(self, node):
|
||||
return [node.raw_value]
|
||||
|
||||
def visit_PyTuple(self, node):
|
||||
result = ["("]
|
||||
|
||||
for i, item in enumerate(node.items):
|
||||
if i > 0:
|
||||
result.append(", ")
|
||||
item_parts = self.visit(item)
|
||||
result.extend(item_parts)
|
||||
|
||||
result.append(")")
|
||||
return result
|
||||
|
||||
def visit_PyList(self, node):
|
||||
result = ["["]
|
||||
|
||||
for i, item in enumerate(node.items):
|
||||
if i > 0:
|
||||
result.append(", ")
|
||||
item_parts = self.visit(item)
|
||||
result.extend(item_parts)
|
||||
|
||||
result.append("]")
|
||||
return result
|
||||
@@ -0,0 +1,776 @@
|
||||
# Copyright 2018 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""IPython Magics
|
||||
|
||||
Install ``bigquery-magics`` and call ``%load_ext bigquery_magics`` to use the
|
||||
``%%bigquery`` cell magic.
|
||||
|
||||
See the `BigQuery Magics reference documentation
|
||||
<https://googleapis.dev/python/bigquery-magics/latest/>`_.
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
import re
|
||||
import ast
|
||||
import copy
|
||||
import functools
|
||||
import sys
|
||||
import time
|
||||
import warnings
|
||||
from concurrent import futures
|
||||
|
||||
try:
|
||||
import IPython # type: ignore
|
||||
from IPython import display # type: ignore
|
||||
from IPython.core import magic_arguments # type: ignore
|
||||
except ImportError:
|
||||
raise ImportError("This module can only be loaded in IPython.")
|
||||
|
||||
from google.api_core import client_info
|
||||
from google.api_core import client_options
|
||||
from google.api_core.exceptions import NotFound
|
||||
import google.auth # type: ignore
|
||||
from google.cloud import bigquery
|
||||
import google.cloud.bigquery.dataset
|
||||
from google.cloud.bigquery import _versions_helpers
|
||||
from google.cloud.bigquery import exceptions
|
||||
from google.cloud.bigquery.dbapi import _helpers
|
||||
from google.cloud.bigquery.magics import line_arg_parser as lap
|
||||
|
||||
try:
|
||||
import bigquery_magics # type: ignore
|
||||
except ImportError:
|
||||
bigquery_magics = None
|
||||
|
||||
IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore
|
||||
|
||||
|
||||
class Context(object):
|
||||
"""Storage for objects to be used throughout an IPython notebook session.
|
||||
|
||||
A Context object is initialized when the ``magics`` module is imported,
|
||||
and can be found at ``google.cloud.bigquery.magics.context``.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._credentials = None
|
||||
self._project = None
|
||||
self._connection = None
|
||||
self._default_query_job_config = bigquery.QueryJobConfig()
|
||||
self._bigquery_client_options = client_options.ClientOptions()
|
||||
self._bqstorage_client_options = client_options.ClientOptions()
|
||||
self._progress_bar_type = "tqdm_notebook"
|
||||
|
||||
@property
|
||||
def credentials(self):
|
||||
"""google.auth.credentials.Credentials: Credentials to use for queries
|
||||
performed through IPython magics.
|
||||
|
||||
Note:
|
||||
These credentials do not need to be explicitly defined if you are
|
||||
using Application Default Credentials. If you are not using
|
||||
Application Default Credentials, manually construct a
|
||||
:class:`google.auth.credentials.Credentials` object and set it as
|
||||
the context credentials as demonstrated in the example below. See
|
||||
`auth docs`_ for more information on obtaining credentials.
|
||||
|
||||
Example:
|
||||
Manually setting the context credentials:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> from google.oauth2 import service_account
|
||||
>>> credentials = (service_account
|
||||
... .Credentials.from_service_account_file(
|
||||
... '/path/to/key.json'))
|
||||
>>> magics.context.credentials = credentials
|
||||
|
||||
|
||||
.. _auth docs: http://google-auth.readthedocs.io
|
||||
/en/latest/user-guide.html#obtaining-credentials
|
||||
"""
|
||||
if self._credentials is None:
|
||||
self._credentials, _ = google.auth.default()
|
||||
return self._credentials
|
||||
|
||||
@credentials.setter
|
||||
def credentials(self, value):
|
||||
self._credentials = value
|
||||
|
||||
@property
|
||||
def project(self):
|
||||
"""str: Default project to use for queries performed through IPython
|
||||
magics.
|
||||
|
||||
Note:
|
||||
The project does not need to be explicitly defined if you have an
|
||||
environment default project set. If you do not have a default
|
||||
project set in your environment, manually assign the project as
|
||||
demonstrated in the example below.
|
||||
|
||||
Example:
|
||||
Manually setting the context project:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> magics.context.project = 'my-project'
|
||||
"""
|
||||
if self._project is None:
|
||||
_, self._project = google.auth.default()
|
||||
return self._project
|
||||
|
||||
@project.setter
|
||||
def project(self, value):
|
||||
self._project = value
|
||||
|
||||
@property
|
||||
def bigquery_client_options(self):
|
||||
"""google.api_core.client_options.ClientOptions: client options to be
|
||||
used through IPython magics.
|
||||
|
||||
Note::
|
||||
The client options do not need to be explicitly defined if no
|
||||
special network connections are required. Normally you would be
|
||||
using the https://bigquery.googleapis.com/ end point.
|
||||
|
||||
Example:
|
||||
Manually setting the endpoint:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> client_options = {}
|
||||
>>> client_options['api_endpoint'] = "https://some.special.url"
|
||||
>>> magics.context.bigquery_client_options = client_options
|
||||
"""
|
||||
return self._bigquery_client_options
|
||||
|
||||
@bigquery_client_options.setter
|
||||
def bigquery_client_options(self, value):
|
||||
self._bigquery_client_options = value
|
||||
|
||||
@property
|
||||
def bqstorage_client_options(self):
|
||||
"""google.api_core.client_options.ClientOptions: client options to be
|
||||
used through IPython magics for the storage client.
|
||||
|
||||
Note::
|
||||
The client options do not need to be explicitly defined if no
|
||||
special network connections are required. Normally you would be
|
||||
using the https://bigquerystorage.googleapis.com/ end point.
|
||||
|
||||
Example:
|
||||
Manually setting the endpoint:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> client_options = {}
|
||||
>>> client_options['api_endpoint'] = "https://some.special.url"
|
||||
>>> magics.context.bqstorage_client_options = client_options
|
||||
"""
|
||||
return self._bqstorage_client_options
|
||||
|
||||
@bqstorage_client_options.setter
|
||||
def bqstorage_client_options(self, value):
|
||||
self._bqstorage_client_options = value
|
||||
|
||||
@property
|
||||
def default_query_job_config(self):
|
||||
"""google.cloud.bigquery.job.QueryJobConfig: Default job
|
||||
configuration for queries.
|
||||
|
||||
The context's :class:`~google.cloud.bigquery.job.QueryJobConfig` is
|
||||
used for queries. Some properties can be overridden with arguments to
|
||||
the magics.
|
||||
|
||||
Example:
|
||||
Manually setting the default value for ``maximum_bytes_billed``
|
||||
to 100 MB:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> magics.context.default_query_job_config.maximum_bytes_billed = 100000000
|
||||
"""
|
||||
return self._default_query_job_config
|
||||
|
||||
@default_query_job_config.setter
|
||||
def default_query_job_config(self, value):
|
||||
self._default_query_job_config = value
|
||||
|
||||
@property
|
||||
def progress_bar_type(self):
|
||||
"""str: Default progress bar type to use to display progress bar while
|
||||
executing queries through IPython magics.
|
||||
|
||||
Note::
|
||||
Install the ``tqdm`` package to use this feature.
|
||||
|
||||
Example:
|
||||
Manually setting the progress_bar_type:
|
||||
|
||||
>>> from google.cloud.bigquery import magics
|
||||
>>> magics.context.progress_bar_type = "tqdm_notebook"
|
||||
"""
|
||||
return self._progress_bar_type
|
||||
|
||||
@progress_bar_type.setter
|
||||
def progress_bar_type(self, value):
|
||||
self._progress_bar_type = value
|
||||
|
||||
|
||||
# If bigquery_magics is available, we load that extension rather than this one.
|
||||
# Ensure google.cloud.bigquery.magics.context setters are on the correct magics
|
||||
# implementation in case the user has installed the package but hasn't updated
|
||||
# their code.
|
||||
if bigquery_magics is not None:
|
||||
context = bigquery_magics.context
|
||||
else:
|
||||
context = Context()
|
||||
|
||||
|
||||
def _handle_error(error, destination_var=None):
|
||||
"""Process a query execution error.
|
||||
|
||||
Args:
|
||||
error (Exception):
|
||||
An exception that occurred during the query execution.
|
||||
destination_var (Optional[str]):
|
||||
The name of the IPython session variable to store the query job.
|
||||
"""
|
||||
if destination_var:
|
||||
query_job = getattr(error, "query_job", None)
|
||||
|
||||
if query_job is not None:
|
||||
IPython.get_ipython().push({destination_var: query_job})
|
||||
else:
|
||||
# this is the case when previewing table rows by providing just
|
||||
# table ID to cell magic
|
||||
print(
|
||||
"Could not save output to variable '{}'.".format(destination_var),
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
print("\nERROR:\n", str(error), file=sys.stderr)
|
||||
|
||||
|
||||
def _run_query(client, query, job_config=None):
|
||||
"""Runs a query while printing status updates
|
||||
|
||||
Args:
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
Client to bundle configuration needed for API requests.
|
||||
query (str):
|
||||
SQL query to be executed. Defaults to the standard SQL dialect.
|
||||
Use the ``job_config`` parameter to change dialects.
|
||||
job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
|
||||
Extra configuration options for the job.
|
||||
|
||||
Returns:
|
||||
google.cloud.bigquery.job.QueryJob: the query job created
|
||||
|
||||
Example:
|
||||
>>> client = bigquery.Client()
|
||||
>>> _run_query(client, "SELECT 17")
|
||||
Executing query with job ID: bf633912-af2c-4780-b568-5d868058632b
|
||||
Query executing: 1.66s
|
||||
Query complete after 2.07s
|
||||
'bf633912-af2c-4780-b568-5d868058632b'
|
||||
"""
|
||||
start_time = time.perf_counter()
|
||||
query_job = client.query(query, job_config=job_config)
|
||||
|
||||
if job_config and job_config.dry_run:
|
||||
return query_job
|
||||
|
||||
print(f"Executing query with job ID: {query_job.job_id}")
|
||||
|
||||
while True:
|
||||
print(
|
||||
f"\rQuery executing: {time.perf_counter() - start_time:.2f}s".format(),
|
||||
end="",
|
||||
)
|
||||
try:
|
||||
query_job.result(timeout=0.5)
|
||||
break
|
||||
except futures.TimeoutError:
|
||||
continue
|
||||
print(f"\nJob ID {query_job.job_id} successfully executed")
|
||||
return query_job
|
||||
|
||||
|
||||
def _create_dataset_if_necessary(client, dataset_id):
|
||||
"""Create a dataset in the current project if it doesn't exist.
|
||||
|
||||
Args:
|
||||
client (google.cloud.bigquery.client.Client):
|
||||
Client to bundle configuration needed for API requests.
|
||||
dataset_id (str):
|
||||
Dataset id.
|
||||
"""
|
||||
dataset_reference = bigquery.dataset.DatasetReference(client.project, dataset_id)
|
||||
try:
|
||||
dataset = client.get_dataset(dataset_reference)
|
||||
return
|
||||
except NotFound:
|
||||
pass
|
||||
dataset = bigquery.Dataset(dataset_reference)
|
||||
dataset.location = client.location
|
||||
print(f"Creating dataset: {dataset_id}")
|
||||
dataset = client.create_dataset(dataset)
|
||||
|
||||
|
||||
@magic_arguments.magic_arguments()
|
||||
@magic_arguments.argument(
|
||||
"destination_var",
|
||||
nargs="?",
|
||||
help=("If provided, save the output to this variable instead of displaying it."),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--destination_table",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"If provided, save the output of the query to a new BigQuery table. "
|
||||
"Variable should be in a format <dataset_id>.<table_id>. "
|
||||
"If table does not exists, it will be created. "
|
||||
"If table already exists, its data will be overwritten."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--project",
|
||||
type=str,
|
||||
default=None,
|
||||
help=("Project to use for executing this query. Defaults to the context project."),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--max_results",
|
||||
default=None,
|
||||
help=(
|
||||
"Maximum number of rows in dataframe returned from executing the query."
|
||||
"Defaults to returning all rows."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--maximum_bytes_billed",
|
||||
default=None,
|
||||
help=(
|
||||
"maximum_bytes_billed to use for executing this query. Defaults to "
|
||||
"the context default_query_job_config.maximum_bytes_billed."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--dry_run",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Sets query to be a dry run to estimate costs. "
|
||||
"Defaults to executing the query instead of dry run if this argument is not used."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--use_legacy_sql",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Sets query to use Legacy SQL instead of Standard SQL. Defaults to "
|
||||
"Standard SQL if this argument is not used."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--bigquery_api_endpoint",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"The desired API endpoint, e.g., bigquery.googlepis.com. Defaults to this "
|
||||
"option's value in the context bigquery_client_options."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--bqstorage_api_endpoint",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"The desired API endpoint, e.g., bigquerystorage.googlepis.com. Defaults to "
|
||||
"this option's value in the context bqstorage_client_options."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--no_query_cache",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=("Do not use cached query results."),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--use_bqstorage_api",
|
||||
action="store_true",
|
||||
default=None,
|
||||
help=(
|
||||
"[Deprecated] The BigQuery Storage API is already used by default to "
|
||||
"download large query results, and this option has no effect. "
|
||||
"If you want to switch to the classic REST API instead, use the "
|
||||
"--use_rest_api option."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--use_rest_api",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"Use the classic REST API instead of the BigQuery Storage API to "
|
||||
"download query results."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help=(
|
||||
"If set, print verbose output, including the query job ID and the "
|
||||
"amount of time for the query to finish. By default, this "
|
||||
"information will be displayed as the query runs, but will be "
|
||||
"cleared after the query is finished."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--params",
|
||||
nargs="+",
|
||||
default=None,
|
||||
help=(
|
||||
"Parameters to format the query string. If present, the --params "
|
||||
"flag should be followed by a string representation of a dictionary "
|
||||
"in the format {'param_name': 'param_value'} (ex. {\"num\": 17}), "
|
||||
"or a reference to a dictionary in the same format. The dictionary "
|
||||
"reference can be made by including a '$' before the variable "
|
||||
"name (ex. $my_dict_var)."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--progress_bar_type",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Sets progress bar type to display a progress bar while executing the query."
|
||||
"Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature."
|
||||
),
|
||||
)
|
||||
@magic_arguments.argument(
|
||||
"--location",
|
||||
type=str,
|
||||
default=None,
|
||||
help=(
|
||||
"Set the location to execute query."
|
||||
"Defaults to location set in query setting in console."
|
||||
),
|
||||
)
|
||||
def _cell_magic(line, query):
|
||||
"""Underlying function for bigquery cell magic
|
||||
|
||||
Note:
|
||||
This function contains the underlying logic for the 'bigquery' cell
|
||||
magic. This function is not meant to be called directly.
|
||||
|
||||
Args:
|
||||
line (str): "%%bigquery" followed by arguments as required
|
||||
query (str): SQL query to run
|
||||
|
||||
Returns:
|
||||
pandas.DataFrame: the query results.
|
||||
"""
|
||||
# The built-in parser does not recognize Python structures such as dicts, thus
|
||||
# we extract the "--params" option and inteprpret it separately.
|
||||
try:
|
||||
params_option_value, rest_of_args = _split_args_line(line)
|
||||
except lap.exceptions.QueryParamsParseError as exc:
|
||||
rebranded_error = SyntaxError(
|
||||
"--params is not a correctly formatted JSON string or a JSON "
|
||||
"serializable dictionary"
|
||||
)
|
||||
raise rebranded_error from exc
|
||||
except lap.exceptions.DuplicateQueryParamsError as exc:
|
||||
rebranded_error = ValueError("Duplicate --params option.")
|
||||
raise rebranded_error from exc
|
||||
except lap.exceptions.ParseError as exc:
|
||||
rebranded_error = ValueError(
|
||||
"Unrecognized input, are option values correct? "
|
||||
"Error details: {}".format(exc.args[0])
|
||||
)
|
||||
raise rebranded_error from exc
|
||||
|
||||
args = magic_arguments.parse_argstring(_cell_magic, rest_of_args)
|
||||
|
||||
if args.use_bqstorage_api is not None:
|
||||
warnings.warn(
|
||||
"Deprecated option --use_bqstorage_api, the BigQuery "
|
||||
"Storage API is already used by default.",
|
||||
category=DeprecationWarning,
|
||||
)
|
||||
use_bqstorage_api = not args.use_rest_api
|
||||
location = args.location
|
||||
|
||||
params = []
|
||||
if params_option_value:
|
||||
# A non-existing params variable is not expanded and ends up in the input
|
||||
# in its raw form, e.g. "$query_params".
|
||||
if params_option_value.startswith("$"):
|
||||
msg = 'Parameter expansion failed, undefined variable "{}".'.format(
|
||||
params_option_value[1:]
|
||||
)
|
||||
raise NameError(msg)
|
||||
|
||||
params = _helpers.to_query_parameters(ast.literal_eval(params_option_value), {})
|
||||
|
||||
project = args.project or context.project
|
||||
|
||||
bigquery_client_options = copy.deepcopy(context.bigquery_client_options)
|
||||
if args.bigquery_api_endpoint:
|
||||
if isinstance(bigquery_client_options, dict):
|
||||
bigquery_client_options["api_endpoint"] = args.bigquery_api_endpoint
|
||||
else:
|
||||
bigquery_client_options.api_endpoint = args.bigquery_api_endpoint
|
||||
|
||||
client = bigquery.Client(
|
||||
project=project,
|
||||
credentials=context.credentials,
|
||||
default_query_job_config=context.default_query_job_config,
|
||||
client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
|
||||
client_options=bigquery_client_options,
|
||||
location=location,
|
||||
)
|
||||
if context._connection:
|
||||
client._connection = context._connection
|
||||
|
||||
bqstorage_client_options = copy.deepcopy(context.bqstorage_client_options)
|
||||
if args.bqstorage_api_endpoint:
|
||||
if isinstance(bqstorage_client_options, dict):
|
||||
bqstorage_client_options["api_endpoint"] = args.bqstorage_api_endpoint
|
||||
else:
|
||||
bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint
|
||||
|
||||
bqstorage_client = _make_bqstorage_client(
|
||||
client,
|
||||
use_bqstorage_api,
|
||||
bqstorage_client_options,
|
||||
)
|
||||
|
||||
close_transports = functools.partial(_close_transports, client, bqstorage_client)
|
||||
|
||||
try:
|
||||
if args.max_results:
|
||||
max_results = int(args.max_results)
|
||||
else:
|
||||
max_results = None
|
||||
|
||||
query = query.strip()
|
||||
|
||||
if not query:
|
||||
error = ValueError("Query is missing.")
|
||||
_handle_error(error, args.destination_var)
|
||||
return
|
||||
|
||||
# Check if query is given as a reference to a variable.
|
||||
if query.startswith("$"):
|
||||
query_var_name = query[1:]
|
||||
|
||||
if not query_var_name:
|
||||
missing_msg = 'Missing query variable name, empty "$" is not allowed.'
|
||||
raise NameError(missing_msg)
|
||||
|
||||
if query_var_name.isidentifier():
|
||||
ip = IPython.get_ipython()
|
||||
query = ip.user_ns.get(query_var_name, ip) # ip serves as a sentinel
|
||||
|
||||
if query is ip:
|
||||
raise NameError(
|
||||
f"Unknown query, variable {query_var_name} does not exist."
|
||||
)
|
||||
else:
|
||||
if not isinstance(query, (str, bytes)):
|
||||
raise TypeError(
|
||||
f"Query variable {query_var_name} must be a string "
|
||||
"or a bytes-like value."
|
||||
)
|
||||
|
||||
# Any query that does not contain whitespace (aside from leading and trailing whitespace)
|
||||
# is assumed to be a table id
|
||||
if not re.search(r"\s", query):
|
||||
try:
|
||||
rows = client.list_rows(query, max_results=max_results)
|
||||
except Exception as ex:
|
||||
_handle_error(ex, args.destination_var)
|
||||
return
|
||||
|
||||
result = rows.to_dataframe(
|
||||
bqstorage_client=bqstorage_client,
|
||||
create_bqstorage_client=False,
|
||||
)
|
||||
if args.destination_var:
|
||||
IPython.get_ipython().push({args.destination_var: result})
|
||||
return
|
||||
else:
|
||||
return result
|
||||
|
||||
job_config = bigquery.job.QueryJobConfig()
|
||||
job_config.query_parameters = params
|
||||
job_config.use_legacy_sql = args.use_legacy_sql
|
||||
job_config.dry_run = args.dry_run
|
||||
|
||||
# Don't override context job config unless --no_query_cache is explicitly set.
|
||||
if args.no_query_cache:
|
||||
job_config.use_query_cache = False
|
||||
|
||||
if args.destination_table:
|
||||
split = args.destination_table.split(".")
|
||||
if len(split) != 2:
|
||||
raise ValueError(
|
||||
"--destination_table should be in a <dataset_id>.<table_id> format."
|
||||
)
|
||||
dataset_id, table_id = split
|
||||
job_config.allow_large_results = True
|
||||
dataset_ref = bigquery.dataset.DatasetReference(client.project, dataset_id)
|
||||
destination_table_ref = dataset_ref.table(table_id)
|
||||
job_config.destination = destination_table_ref
|
||||
job_config.create_disposition = "CREATE_IF_NEEDED"
|
||||
job_config.write_disposition = "WRITE_TRUNCATE"
|
||||
_create_dataset_if_necessary(client, dataset_id)
|
||||
|
||||
if args.maximum_bytes_billed == "None":
|
||||
job_config.maximum_bytes_billed = 0
|
||||
elif args.maximum_bytes_billed is not None:
|
||||
value = int(args.maximum_bytes_billed)
|
||||
job_config.maximum_bytes_billed = value
|
||||
|
||||
try:
|
||||
query_job = _run_query(client, query, job_config=job_config)
|
||||
except Exception as ex:
|
||||
_handle_error(ex, args.destination_var)
|
||||
return
|
||||
|
||||
if not args.verbose:
|
||||
display.clear_output()
|
||||
|
||||
if args.dry_run and args.destination_var:
|
||||
IPython.get_ipython().push({args.destination_var: query_job})
|
||||
return
|
||||
elif args.dry_run:
|
||||
print(
|
||||
"Query validated. This query will process {} bytes.".format(
|
||||
query_job.total_bytes_processed
|
||||
)
|
||||
)
|
||||
return query_job
|
||||
|
||||
progress_bar = context.progress_bar_type or args.progress_bar_type
|
||||
|
||||
if max_results:
|
||||
result = query_job.result(max_results=max_results).to_dataframe(
|
||||
bqstorage_client=None,
|
||||
create_bqstorage_client=False,
|
||||
progress_bar_type=progress_bar,
|
||||
)
|
||||
else:
|
||||
result = query_job.to_dataframe(
|
||||
bqstorage_client=bqstorage_client,
|
||||
create_bqstorage_client=False,
|
||||
progress_bar_type=progress_bar,
|
||||
)
|
||||
|
||||
if args.destination_var:
|
||||
IPython.get_ipython().push({args.destination_var: result})
|
||||
else:
|
||||
return result
|
||||
finally:
|
||||
close_transports()
|
||||
|
||||
|
||||
def _split_args_line(line):
|
||||
"""Split out the --params option value from the input line arguments.
|
||||
|
||||
Args:
|
||||
line (str): The line arguments passed to the cell magic.
|
||||
|
||||
Returns:
|
||||
Tuple[str, str]
|
||||
"""
|
||||
lexer = lap.Lexer(line)
|
||||
scanner = lap.Parser(lexer)
|
||||
tree = scanner.input_line()
|
||||
|
||||
extractor = lap.QueryParamsExtractor()
|
||||
params_option_value, rest_of_args = extractor.visit(tree)
|
||||
|
||||
return params_option_value, rest_of_args
|
||||
|
||||
|
||||
def _make_bqstorage_client(client, use_bqstorage_api, client_options):
|
||||
"""Creates a BigQuery Storage client.
|
||||
|
||||
Args:
|
||||
client (:class:`~google.cloud.bigquery.client.Client`): BigQuery client.
|
||||
use_bqstorage_api (bool): whether BigQuery Storage API is used or not.
|
||||
client_options (:class:`google.api_core.client_options.ClientOptions`):
|
||||
Custom options used with a new BigQuery Storage client instance
|
||||
if one is created.
|
||||
|
||||
Raises:
|
||||
ImportError: if google-cloud-bigquery-storage is not installed, or
|
||||
grpcio package is not installed.
|
||||
|
||||
|
||||
Returns:
|
||||
None: if ``use_bqstorage_api == False``, or google-cloud-bigquery-storage
|
||||
is outdated.
|
||||
BigQuery Storage Client:
|
||||
"""
|
||||
if not use_bqstorage_api:
|
||||
return None
|
||||
|
||||
try:
|
||||
_versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True)
|
||||
except exceptions.BigQueryStorageNotFoundError as err:
|
||||
customized_error = ImportError(
|
||||
"The default BigQuery Storage API client cannot be used, install "
|
||||
"the missing google-cloud-bigquery-storage and pyarrow packages "
|
||||
"to use it. Alternatively, use the classic REST API by specifying "
|
||||
"the --use_rest_api magic option."
|
||||
)
|
||||
raise customized_error from err
|
||||
except exceptions.LegacyBigQueryStorageError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from google.api_core.gapic_v1 import client_info as gapic_client_info
|
||||
except ImportError as err:
|
||||
customized_error = ImportError(
|
||||
"Install the grpcio package to use the BigQuery Storage API."
|
||||
)
|
||||
raise customized_error from err
|
||||
|
||||
return client._ensure_bqstorage_client(
|
||||
client_options=client_options,
|
||||
client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
|
||||
)
|
||||
|
||||
|
||||
def _close_transports(client, bqstorage_client):
|
||||
"""Close the given clients' underlying transport channels.
|
||||
|
||||
Closing the transport is needed to release system resources, namely open
|
||||
sockets.
|
||||
|
||||
Args:
|
||||
client (:class:`~google.cloud.bigquery.client.Client`):
|
||||
bqstorage_client
|
||||
(Optional[:class:`~google.cloud.bigquery_storage.BigQueryReadClient`]):
|
||||
A client for the BigQuery Storage API.
|
||||
|
||||
"""
|
||||
client.close()
|
||||
if bqstorage_client is not None:
|
||||
bqstorage_client._transport.grpc_channel.close()
|
||||
Reference in New Issue
Block a user