structure saas with tools
This commit is contained in:
@@ -0,0 +1,91 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
"""The vertexai resources preview module."""
|
||||
|
||||
from google.cloud.aiplatform.preview.jobs import (
|
||||
CustomJob,
|
||||
HyperparameterTuningJob,
|
||||
)
|
||||
from google.cloud.aiplatform.preview.models import (
|
||||
Prediction,
|
||||
DeploymentResourcePool,
|
||||
Endpoint,
|
||||
Model,
|
||||
)
|
||||
from google.cloud.aiplatform.preview.featurestore.entity_type import (
|
||||
EntityType,
|
||||
)
|
||||
from google.cloud.aiplatform.preview.persistent_resource import (
|
||||
PersistentResource,
|
||||
)
|
||||
from google.cloud.aiplatform.preview.pipelinejobschedule.pipeline_job_schedules import (
|
||||
PipelineJobSchedule,
|
||||
)
|
||||
|
||||
from vertexai.resources.preview.feature_store import (
|
||||
Feature,
|
||||
FeatureGroup,
|
||||
FeatureGroupBigQuerySource,
|
||||
FeatureMonitor,
|
||||
FeatureOnlineStore,
|
||||
FeatureOnlineStoreType,
|
||||
FeatureView,
|
||||
FeatureViewBigQuerySource,
|
||||
FeatureViewReadResponse,
|
||||
FeatureViewRegistrySource,
|
||||
FeatureViewVertexRagSource,
|
||||
IndexConfig,
|
||||
TreeAhConfig,
|
||||
BruteForceConfig,
|
||||
DistanceMeasureType,
|
||||
AlgorithmConfig,
|
||||
)
|
||||
|
||||
from vertexai.resources.preview.ml_monitoring import (
|
||||
ModelMonitor,
|
||||
ModelMonitoringJob,
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
"CustomJob",
|
||||
"HyperparameterTuningJob",
|
||||
"Prediction",
|
||||
"DeploymentResourcePool",
|
||||
"Endpoint",
|
||||
"Model",
|
||||
"PersistentResource",
|
||||
"EntityType",
|
||||
"PipelineJobSchedule",
|
||||
"Feature",
|
||||
"FeatureGroup",
|
||||
"FeatureGroupBigQuerySource",
|
||||
"FeatureMonitor",
|
||||
"FeatureOnlineStoreType",
|
||||
"FeatureOnlineStore",
|
||||
"FeatureView",
|
||||
"FeatureViewBigQuerySource",
|
||||
"FeatureViewReadResponse",
|
||||
"FeatureViewVertexRagSource",
|
||||
"FeatureViewRegistrySource",
|
||||
"IndexConfig",
|
||||
"TreeAhConfig",
|
||||
"BruteForceConfig",
|
||||
"DistanceMeasureType",
|
||||
"AlgorithmConfig",
|
||||
"ModelMonitor",
|
||||
"ModelMonitoringJob",
|
||||
)
|
||||
Binary file not shown.
@@ -0,0 +1,71 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
"""The vertexai resources preview module."""
|
||||
|
||||
from vertexai.resources.preview.feature_store.feature import (
|
||||
Feature,
|
||||
)
|
||||
|
||||
from vertexai.resources.preview.feature_store.feature_group import (
|
||||
FeatureGroup,
|
||||
)
|
||||
|
||||
from vertexai.resources.preview.feature_store.feature_monitor import (
|
||||
FeatureMonitor,
|
||||
)
|
||||
|
||||
from vertexai.resources.preview.feature_store.feature_online_store import (
|
||||
FeatureOnlineStore,
|
||||
FeatureOnlineStoreType,
|
||||
)
|
||||
|
||||
from vertexai.resources.preview.feature_store.feature_view import (
|
||||
FeatureView,
|
||||
)
|
||||
|
||||
from vertexai.resources.preview.feature_store.utils import (
|
||||
FeatureGroupBigQuerySource,
|
||||
FeatureViewBigQuerySource,
|
||||
FeatureViewReadResponse,
|
||||
FeatureViewVertexRagSource,
|
||||
FeatureViewRegistrySource,
|
||||
IndexConfig,
|
||||
TreeAhConfig,
|
||||
BruteForceConfig,
|
||||
DistanceMeasureType,
|
||||
AlgorithmConfig,
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
Feature,
|
||||
FeatureGroup,
|
||||
FeatureGroupBigQuerySource,
|
||||
FeatureMonitor,
|
||||
FeatureOnlineStoreType,
|
||||
FeatureOnlineStore,
|
||||
FeatureView,
|
||||
FeatureViewBigQuerySource,
|
||||
FeatureViewReadResponse,
|
||||
FeatureViewVertexRagSource,
|
||||
FeatureViewRegistrySource,
|
||||
IndexConfig,
|
||||
IndexConfig,
|
||||
TreeAhConfig,
|
||||
BruteForceConfig,
|
||||
DistanceMeasureType,
|
||||
AlgorithmConfig,
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,190 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import textwrap
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, List
|
||||
|
||||
|
||||
@dataclass
|
||||
class DataSource:
|
||||
"""An object to represent a data source - both entity DataFrame and any feature data.
|
||||
|
||||
Contains helpers for use with SQL templating.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
qualifying_name: str,
|
||||
sql: str,
|
||||
data_columns: List[str],
|
||||
timestamp_column: str,
|
||||
entity_id_columns: Optional[List[str]] = None,
|
||||
):
|
||||
"""Initialize DataSource object.
|
||||
|
||||
Args:
|
||||
qualifying_name:
|
||||
A unique name used to qualify the data in the PITL query.
|
||||
sql:
|
||||
SQL query representing the data_source.
|
||||
data_columns:
|
||||
Columns other than entity ID column(s) and timestamp column.
|
||||
timestamp_column:
|
||||
The column that holds feature timestamp data.
|
||||
entity_id_columns:
|
||||
The column(s) that holds entity IDs. Shouldn't be populated for
|
||||
entity_df.
|
||||
"""
|
||||
self.qualifying_name = qualifying_name
|
||||
self._sql = sql
|
||||
self.data_columns = data_columns
|
||||
self.timestamp_column = timestamp_column
|
||||
self.entity_id_columns = entity_id_columns
|
||||
|
||||
def copy_with_pitl_suffix(self) -> "DataSource":
|
||||
import copy
|
||||
|
||||
data_source = copy.copy(self)
|
||||
data_source.qualifying_name += "_pitl"
|
||||
return data_source
|
||||
|
||||
@property
|
||||
def sql(self):
|
||||
return self._sql
|
||||
|
||||
@property
|
||||
def comma_separated_qualified_data_columns(self):
|
||||
return ", ".join(
|
||||
[self.qualifying_name + "." + col for col in self.data_columns]
|
||||
)
|
||||
|
||||
@property
|
||||
def comma_separated_name_qualified_all_non_timestamp_columns(self):
|
||||
"""Same as `comma_separated_qualified_data_columns` but including entity ID column."""
|
||||
all_columns = self.data_columns.copy()
|
||||
if self.entity_id_columns:
|
||||
all_columns += self.entity_id_columns
|
||||
return ", ".join([self.qualifying_name + "." + col for col in all_columns])
|
||||
|
||||
@property
|
||||
def qualified_timestamp_column(self) -> str:
|
||||
"""Returns name qualified timestamp column e.g. `name.feature_timestamp`."""
|
||||
return f"{self.qualifying_name}.{self.timestamp_column}"
|
||||
|
||||
|
||||
def _generate_eid_check(entity_data: DataSource, feature: DataSource):
|
||||
"""Generate equality check for entity columns of feature against matching columns in entity_data."""
|
||||
e_cols = set(entity_data.data_columns)
|
||||
f_cols = feature.entity_id_columns
|
||||
assert f_cols
|
||||
|
||||
equal_statements = []
|
||||
for col in f_cols:
|
||||
if col not in e_cols:
|
||||
raise ValueError(
|
||||
f"Feature entity ID column '{col}' should be a column in the entity DataFrame."
|
||||
)
|
||||
equal_statements.append(
|
||||
f"{entity_data.qualifying_name}.{col} = {feature.qualifying_name}.{col}"
|
||||
)
|
||||
|
||||
statement = " AND\n".join(equal_statements)
|
||||
|
||||
return statement
|
||||
|
||||
|
||||
# Args:
|
||||
# textwrap: Module
|
||||
# generate_eid_check: function (above)
|
||||
# entity_data: DataSource
|
||||
# feature_data: List[DataSource]
|
||||
_PITL_QUERY_TEMPLATE_RAW = """WITH
|
||||
{{ entity_data.qualifying_name }}_without_row_num AS (
|
||||
{{ textwrap.indent(entity_data.sql, ' ' * 4) }}
|
||||
),
|
||||
{{ entity_data.qualifying_name }} AS (
|
||||
SELECT *, ROW_NUMBER() OVER() AS row_num,
|
||||
FROM entity_df_without_row_num
|
||||
),
|
||||
|
||||
# Features
|
||||
{% for feature_data_elem in feature_data %}
|
||||
{{ feature_data_elem.qualifying_name }} AS (
|
||||
{{ textwrap.indent(feature_data_elem.sql, ' ' * 4) }}
|
||||
),
|
||||
{% endfor %}
|
||||
|
||||
# Features with PITL
|
||||
{% for feature_data_elem in feature_data %}
|
||||
{{ feature_data_elem.qualifying_name }}_pitl AS (
|
||||
SELECT
|
||||
{{ entity_data.qualifying_name }}.row_num,
|
||||
{{ feature_data_elem.comma_separated_qualified_data_columns }},
|
||||
FROM {{ entity_data.qualifying_name }}
|
||||
LEFT JOIN {{ feature_data_elem.qualifying_name }}
|
||||
ON (
|
||||
{{ textwrap.indent(generate_eid_check(entity_data, feature_data_elem) + ' AND', ' ' * 6) }}
|
||||
CAST({{ feature_data_elem.qualified_timestamp_column }} AS TIMESTAMP) <= CAST({{ entity_data.qualified_timestamp_column }} AS TIMESTAMP)
|
||||
)
|
||||
QUALIFY ROW_NUMBER() OVER (PARTITION BY {{ entity_data.qualifying_name }}.row_num ORDER BY {{ feature_data_elem.qualified_timestamp_column }} DESC) = 1
|
||||
){{ ',' if not loop.last else '' }}
|
||||
{% endfor %}
|
||||
|
||||
|
||||
SELECT
|
||||
{{ entity_data.comma_separated_name_qualified_all_non_timestamp_columns }},
|
||||
{% for feature_data_elem in feature_data %}
|
||||
{% set feature_pitl = feature_data_elem.copy_with_pitl_suffix() %}
|
||||
{{ feature_pitl.comma_separated_qualified_data_columns }},
|
||||
{% endfor %}
|
||||
{{ entity_data.qualified_timestamp_column }}
|
||||
|
||||
FROM {{ entity_data.qualifying_name }}
|
||||
{% for feature_data_elem in feature_data %}
|
||||
JOIN {{ feature_data_elem.qualifying_name }}_pitl USING (row_num)
|
||||
{% endfor %}
|
||||
"""
|
||||
|
||||
|
||||
def pitl_query_template():
|
||||
try:
|
||||
import jinja2
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"`Jinja2` is not installed but required for this functionality."
|
||||
) from exc
|
||||
|
||||
return jinja2.Environment(
|
||||
loader=jinja2.BaseLoader, lstrip_blocks=True, trim_blocks=True
|
||||
).from_string(_PITL_QUERY_TEMPLATE_RAW)
|
||||
|
||||
|
||||
def render_pitl_query(entity_data: DataSource, feature_data: List[DataSource]):
|
||||
"""Return the PITL query jinja template.
|
||||
|
||||
The args for the query are as follows:
|
||||
textwrap: The python textwrap module.
|
||||
entity_data[DataSource]: The entity data(frame) as SQL source.
|
||||
feature_data[List[DataSource]]:
|
||||
"""
|
||||
return pitl_query_template().render(
|
||||
textwrap=textwrap,
|
||||
generate_eid_check=_generate_eid_check,
|
||||
entity_data=entity_data,
|
||||
feature_data=feature_data,
|
||||
)
|
||||
@@ -0,0 +1,151 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import re
|
||||
from typing import List, Optional
|
||||
from google.auth import credentials as auth_credentials
|
||||
from google.cloud.aiplatform import base
|
||||
from google.cloud.aiplatform import utils
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
feature as gca_feature,
|
||||
feature_monitor_v1beta1 as gca_feature_monitor,
|
||||
feature_v1beta1 as gca_feature_v1beta1,
|
||||
featurestore_service_v1beta1 as gca_featurestore_service_v1beta1,
|
||||
)
|
||||
|
||||
|
||||
class Feature(base.VertexAiResourceNounWithFutureManager):
|
||||
"""Class for managing Feature resources."""
|
||||
|
||||
client_class = utils.FeatureRegistryClientWithOverride
|
||||
|
||||
_resource_noun = "features"
|
||||
_getter_method = "get_feature"
|
||||
_list_method = "list_features"
|
||||
_delete_method = "delete_feature"
|
||||
_parse_resource_name_method = "parse_feature_path"
|
||||
_format_resource_name_method = "feature_path"
|
||||
_gca_resource: gca_feature.Feature
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
feature_group_id: Optional[str] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
latest_stats_count: Optional[int] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
):
|
||||
"""Retrieves an existing managed feature.
|
||||
|
||||
Args:
|
||||
name:
|
||||
The resource name
|
||||
(`projects/.../locations/.../featureGroups/.../features/...`) or
|
||||
ID.
|
||||
feature_group_id:
|
||||
The feature group ID. Must be passed in if name is an ID and not
|
||||
a resource path.
|
||||
project:
|
||||
Project to retrieve feature from. If not set, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to retrieve feature from. If not set, the location set
|
||||
in aiplatform.init will be used.
|
||||
gca_feature_arg:
|
||||
The GCA feature object.
|
||||
Only set when calling from get_feature with latest_stats_count set.
|
||||
credentials:
|
||||
Custom credentials to use to retrieve this feature. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
"""
|
||||
|
||||
super().__init__(
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
resource_name=name,
|
||||
)
|
||||
|
||||
if re.fullmatch(
|
||||
r"projects/.+/locations/.+/featureGroups/.+/features/.+",
|
||||
name,
|
||||
):
|
||||
if feature_group_id:
|
||||
raise ValueError(
|
||||
f"Since feature '{name}' is provided as a path, feature_group_id should not be specified."
|
||||
)
|
||||
feature = name
|
||||
else:
|
||||
from .feature_group import FeatureGroup
|
||||
|
||||
# Construct the feature path using feature group ID if only the
|
||||
# feature group ID is provided.
|
||||
if not feature_group_id:
|
||||
raise ValueError(
|
||||
f"Since feature '{name}' is not provided as a path, please specify feature_group_id."
|
||||
)
|
||||
|
||||
feature_group_path = utils.full_resource_name(
|
||||
resource_name=feature_group_id,
|
||||
resource_noun=FeatureGroup._resource_noun,
|
||||
parse_resource_name_method=FeatureGroup._parse_resource_name,
|
||||
format_resource_name_method=FeatureGroup._format_resource_name,
|
||||
)
|
||||
|
||||
feature = f"{feature_group_path}/features/{name}"
|
||||
|
||||
if latest_stats_count is not None:
|
||||
api_client = self.__class__._instantiate_client(
|
||||
location=location, credentials=credentials
|
||||
)
|
||||
|
||||
feature_obj: gca_feature_v1beta1.Feature = api_client.select_version(
|
||||
"v1beta1"
|
||||
).get_feature(
|
||||
request=gca_featurestore_service_v1beta1.GetFeatureRequest(
|
||||
name=f"{feature}",
|
||||
feature_stats_and_anomaly_spec=gca_feature_monitor.FeatureStatsAndAnomalySpec(
|
||||
latest_stats_count=latest_stats_count
|
||||
),
|
||||
)
|
||||
)
|
||||
self._gca_resource = feature_obj
|
||||
else:
|
||||
self._gca_resource = self._get_gca_resource(resource_name=feature)
|
||||
|
||||
@property
|
||||
def version_column_name(self) -> str:
|
||||
"""The name of the BigQuery Table/View column hosting data for this version."""
|
||||
return self._gca_resource.version_column_name
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
"""The description of the feature."""
|
||||
return self._gca_resource.description
|
||||
|
||||
@property
|
||||
def point_of_contact(self) -> str:
|
||||
"""The point of contact for the feature."""
|
||||
return self._gca_resource.point_of_contact
|
||||
|
||||
@property
|
||||
def feature_stats_and_anomalies(
|
||||
self,
|
||||
) -> List[gca_feature_monitor.FeatureStatsAndAnomaly]:
|
||||
"""The number of latest stats to return. Only present when gca_feature is set."""
|
||||
return self._gca_resource.feature_stats_and_anomaly
|
||||
@@ -0,0 +1,592 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Dict, List, Optional, Sequence, Tuple
|
||||
from google.auth import credentials as auth_credentials
|
||||
from google.cloud.aiplatform import base, initializer
|
||||
from google.cloud.aiplatform import utils
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
feature as gca_feature,
|
||||
feature_group as gca_feature_group,
|
||||
io as gca_io,
|
||||
feature_monitor_v1beta1 as gca_feature_monitor,
|
||||
)
|
||||
from vertexai.resources.preview.feature_store.utils import (
|
||||
FeatureGroupBigQuerySource,
|
||||
)
|
||||
from vertexai.resources.preview.feature_store import (
|
||||
Feature,
|
||||
)
|
||||
from vertexai.resources.preview.feature_store.feature_monitor import (
|
||||
FeatureMonitor,
|
||||
)
|
||||
|
||||
|
||||
_LOGGER = base.Logger(__name__)
|
||||
|
||||
|
||||
class FeatureGroup(base.VertexAiResourceNounWithFutureManager):
|
||||
"""Class for managing Feature Group resources."""
|
||||
|
||||
client_class = utils.FeatureRegistryClientWithOverride
|
||||
|
||||
_resource_noun = "feature_groups"
|
||||
_getter_method = "get_feature_group"
|
||||
_list_method = "list_feature_groups"
|
||||
_delete_method = "delete_feature_group"
|
||||
_parse_resource_name_method = "parse_feature_group_path"
|
||||
_format_resource_name_method = "feature_group_path"
|
||||
_gca_resource: gca_feature_group.FeatureGroup
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
):
|
||||
"""Retrieves an existing managed feature group.
|
||||
|
||||
Args:
|
||||
name:
|
||||
The resource name
|
||||
(`projects/.../locations/.../featureGroups/...`) or ID.
|
||||
project:
|
||||
Project to retrieve feature group from. If unset, the
|
||||
project set in aiplatform.init will be used.
|
||||
location:
|
||||
Location to retrieve feature group from. If not set,
|
||||
location set in aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to retrieve this feature group.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
"""
|
||||
|
||||
super().__init__(
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
resource_name=name,
|
||||
)
|
||||
|
||||
self._gca_resource = self._get_gca_resource(resource_name=name)
|
||||
|
||||
@classmethod
|
||||
def create(
|
||||
cls,
|
||||
name: str,
|
||||
source: FeatureGroupBigQuerySource = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
description: Optional[str] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
request_metadata: Optional[Sequence[Tuple[str, str]]] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
sync: bool = True,
|
||||
) -> "FeatureGroup":
|
||||
"""Creates a new feature group.
|
||||
|
||||
Args:
|
||||
name: The name of the feature group.
|
||||
source: The BigQuery source of the feature group.
|
||||
labels:
|
||||
The labels with user-defined metadata to organize your
|
||||
FeatureGroup.
|
||||
|
||||
Label keys and values can be no longer than 64
|
||||
characters (Unicode codepoints), can only
|
||||
contain lowercase letters, numeric characters,
|
||||
underscores and dashes. International characters
|
||||
are allowed.
|
||||
|
||||
See https://goo.gl/xmQnxf for more information
|
||||
on and examples of labels. No more than 64 user
|
||||
labels can be associated with one
|
||||
FeatureGroup(System labels are excluded)."
|
||||
System reserved label keys are prefixed with
|
||||
"aiplatform.googleapis.com/" and are immutable.
|
||||
description: Description of the FeatureGroup.
|
||||
project:
|
||||
Project to create feature group in. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to create feature group in. If not set, location set in
|
||||
aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to create this feature group.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
request_metadata:
|
||||
Strings which should be sent along with the request as metadata.
|
||||
create_request_timeout:
|
||||
The timeout for the create request in seconds.
|
||||
sync:
|
||||
Whether to execute this creation synchronously. If False, this
|
||||
method will be executed in concurrent Future and any downstream
|
||||
object will be immediately returned and synced when the Future
|
||||
has completed.
|
||||
|
||||
Returns:
|
||||
FeatureGroup - the FeatureGroup resource object.
|
||||
"""
|
||||
|
||||
if not source:
|
||||
raise ValueError("Please specify a valid source.")
|
||||
|
||||
# Only BigQuery source is supported right now.
|
||||
if not isinstance(source, FeatureGroupBigQuerySource):
|
||||
raise ValueError("Only FeatureGroupBigQuerySource is a supported source.")
|
||||
|
||||
# BigQuery source validation.
|
||||
if not source.uri:
|
||||
raise ValueError("Please specify URI in BigQuery source.")
|
||||
|
||||
if not source.entity_id_columns:
|
||||
_LOGGER.info(
|
||||
"No entity ID columns specified in BigQuery source. Defaulting to ['entity_id']."
|
||||
)
|
||||
entity_id_columns = ["entity_id"]
|
||||
else:
|
||||
entity_id_columns = source.entity_id_columns
|
||||
|
||||
gapic_feature_group = gca_feature_group.FeatureGroup(
|
||||
big_query=gca_feature_group.FeatureGroup.BigQuery(
|
||||
big_query_source=gca_io.BigQuerySource(input_uri=source.uri),
|
||||
entity_id_columns=entity_id_columns,
|
||||
),
|
||||
name=name,
|
||||
description=description,
|
||||
)
|
||||
|
||||
if labels:
|
||||
utils.validate_labels(labels)
|
||||
gapic_feature_group.labels = labels
|
||||
|
||||
if request_metadata is None:
|
||||
request_metadata = ()
|
||||
|
||||
api_client = cls._instantiate_client(location=location, credentials=credentials)
|
||||
|
||||
create_feature_group_lro = api_client.create_feature_group(
|
||||
parent=initializer.global_config.common_location_path(
|
||||
project=project, location=location
|
||||
),
|
||||
feature_group=gapic_feature_group,
|
||||
feature_group_id=name,
|
||||
metadata=request_metadata,
|
||||
timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
_LOGGER.log_create_with_lro(cls, create_feature_group_lro)
|
||||
|
||||
created_feature_group = create_feature_group_lro.result()
|
||||
|
||||
_LOGGER.log_create_complete(cls, created_feature_group, "feature_group")
|
||||
|
||||
feature_group_obj = cls(
|
||||
name=created_feature_group.name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
return feature_group_obj
|
||||
|
||||
@base.optional_sync()
|
||||
def delete(self, force: bool = False, sync: bool = True) -> None:
|
||||
"""Deletes this feature group.
|
||||
|
||||
WARNING: This deletion is permanent.
|
||||
|
||||
Args:
|
||||
force:
|
||||
If set to True, all features under this online store will be
|
||||
deleted prior to online store deletion. Otherwise, deletion
|
||||
will only succeed if the online store has no FeatureViews.
|
||||
|
||||
If set to true, any Features under this FeatureGroup will also
|
||||
be deleted. (Otherwise, the request will only work if the
|
||||
FeatureGroup has no Features.)
|
||||
sync:
|
||||
Whether to execute this deletion synchronously. If False, this
|
||||
method will be executed in concurrent Future and any downstream
|
||||
object will be immediately returned and synced when the Future
|
||||
has completed.
|
||||
"""
|
||||
|
||||
lro = getattr(self.api_client, self._delete_method)(
|
||||
name=self.resource_name,
|
||||
force=force,
|
||||
)
|
||||
_LOGGER.log_delete_with_lro(self, lro)
|
||||
lro.result()
|
||||
_LOGGER.log_delete_complete(self)
|
||||
|
||||
def get_feature(
|
||||
self,
|
||||
feature_id: str,
|
||||
latest_stats_count: Optional[int] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> Feature:
|
||||
"""Retrieves an existing managed feature.
|
||||
|
||||
Args:
|
||||
feature_id: The ID of the feature.
|
||||
latest_stats_count:
|
||||
The number of latest stats to retrieve. Only returns stats if
|
||||
Feature Monitor is created, and historical stats were generated.
|
||||
credentials:
|
||||
Custom credentials to use to retrieve the feature under this
|
||||
feature group. The order of which credentials are used is as
|
||||
follows: (1) this parameter (2) credentials passed to FeatureGroup
|
||||
constructor (3) credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
Feature - the Feature resource object under this feature group.
|
||||
"""
|
||||
credentials = (
|
||||
credentials or self.credentials or initializer.global_config.credentials
|
||||
)
|
||||
if latest_stats_count is not None:
|
||||
return Feature(
|
||||
name=f"{self.resource_name}/features/{feature_id}",
|
||||
latest_stats_count=latest_stats_count,
|
||||
credentials=credentials,
|
||||
)
|
||||
return Feature(
|
||||
f"{self.resource_name}/features/{feature_id}", credentials=credentials
|
||||
)
|
||||
|
||||
def create_feature(
|
||||
self,
|
||||
name: str,
|
||||
version_column_name: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
point_of_contact: Optional[str] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
request_metadata: Optional[Sequence[Tuple[str, str]]] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
sync: bool = True,
|
||||
) -> Feature:
|
||||
"""Creates a new feature.
|
||||
|
||||
Args:
|
||||
name: The name of the feature.
|
||||
version_column_name:
|
||||
The name of the BigQuery Table/View column hosting data for this
|
||||
version. If no value is provided, will use feature_id.
|
||||
description: Description of the feature.
|
||||
labels:
|
||||
The labels with user-defined metadata to organize your Features.
|
||||
Label keys and values can be no longer than 64 characters
|
||||
(Unicode codepoints), can only contain lowercase letters,
|
||||
numeric characters, underscores and dashes. International
|
||||
characters are allowed.
|
||||
|
||||
See https://goo.gl/xmQnxf for more information on and examples
|
||||
of labels. No more than 64 user labels can be associated with
|
||||
one Feature (System labels are excluded)." System reserved label
|
||||
keys are prefixed with "aiplatform.googleapis.com/" and are
|
||||
immutable.
|
||||
point_of_contact:
|
||||
Entity responsible for maintaining this feature. Can be comma
|
||||
separated list of email addresses or URIs.
|
||||
project:
|
||||
Project to create feature in. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to create feature in. If not set, location set in
|
||||
aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to create this feature. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
request_metadata:
|
||||
Strings which should be sent along with the request as metadata.
|
||||
create_request_timeout:
|
||||
The timeout for the create request in seconds.
|
||||
sync:
|
||||
Whether to execute this creation synchronously. If False, this
|
||||
method will be executed in concurrent Future and any downstream
|
||||
object will be immediately returned and synced when the Future
|
||||
has completed.
|
||||
|
||||
Returns:
|
||||
Feature - the Feature resource object.
|
||||
"""
|
||||
|
||||
gapic_feature = gca_feature.Feature()
|
||||
|
||||
if version_column_name:
|
||||
gapic_feature.version_column_name = version_column_name
|
||||
|
||||
if description:
|
||||
gapic_feature.description = description
|
||||
|
||||
if labels:
|
||||
utils.validate_labels(labels)
|
||||
gapic_feature.labels = labels
|
||||
|
||||
if point_of_contact:
|
||||
gapic_feature.point_of_contact = point_of_contact
|
||||
|
||||
if request_metadata is None:
|
||||
request_metadata = ()
|
||||
|
||||
api_client = self.__class__._instantiate_client(
|
||||
location=location, credentials=credentials
|
||||
)
|
||||
|
||||
create_feature_lro = api_client.create_feature(
|
||||
parent=self.resource_name,
|
||||
feature=gapic_feature,
|
||||
feature_id=name,
|
||||
metadata=request_metadata,
|
||||
timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
_LOGGER.log_create_with_lro(Feature, create_feature_lro)
|
||||
|
||||
created_feature = create_feature_lro.result()
|
||||
|
||||
_LOGGER.log_create_complete(Feature, created_feature, "feature")
|
||||
|
||||
feature_obj = Feature(
|
||||
name=created_feature.name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
return feature_obj
|
||||
|
||||
def list_features(
|
||||
self,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> List[Feature]:
|
||||
"""Lists features under this feature group.
|
||||
|
||||
Args:
|
||||
project:
|
||||
Project to list features in. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to list features in. If not set, location set in
|
||||
aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to list features. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
List of features under this feature group.
|
||||
"""
|
||||
|
||||
return Feature.list(
|
||||
parent=self.resource_name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
def get_feature_monitor(
|
||||
self,
|
||||
feature_monitor_id: str,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> FeatureMonitor:
|
||||
"""Retrieves an existing feature monitor.
|
||||
|
||||
Args:
|
||||
feature_monitor_id: The ID of the feature monitor.
|
||||
credentials:
|
||||
Custom credentials to use to retrieve the feature monitor under this
|
||||
feature group. The order of which credentials are used is as
|
||||
follows: (1) this parameter (2) credentials passed to FeatureGroup
|
||||
constructor (3) credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
FeatureMonitor - the Feature Monitor resource object under this
|
||||
feature group.
|
||||
"""
|
||||
credentials = (
|
||||
credentials or self.credentials or initializer.global_config.credentials
|
||||
)
|
||||
return FeatureMonitor(
|
||||
f"{self.resource_name}/featureMonitors/{feature_monitor_id}",
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
def create_feature_monitor(
|
||||
self,
|
||||
name: str,
|
||||
description: Optional[str] = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
schedule_config: Optional[str] = None,
|
||||
feature_selection_configs: Optional[List[Tuple[str, float]]] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
request_metadata: Optional[Sequence[Tuple[str, str]]] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
) -> FeatureMonitor:
|
||||
"""Creates a new feature monitor.
|
||||
|
||||
Args:
|
||||
name: The name of the feature monitor.
|
||||
description: Description of the feature monitor.
|
||||
labels:
|
||||
The labels with user-defined metadata to organize your FeatureMonitors.
|
||||
Label keys and values can be no longer than 64 characters
|
||||
(Unicode codepoints), can only contain lowercase letters,
|
||||
numeric characters, underscores and dashes. International
|
||||
characters are allowed.
|
||||
|
||||
See https://goo.gl/xmQnxf for more information on and examples
|
||||
of labels. No more than 64 user labels can be associated with
|
||||
one FeatureMonitor (System labels are excluded)." System reserved label
|
||||
keys are prefixed with "aiplatform.googleapis.com/" and are
|
||||
immutable.
|
||||
schedule_config:
|
||||
Configures when data is to be monitored for this
|
||||
FeatureMonitor. At the end of the scheduled time,
|
||||
the stats and drift are generated for the selected features.
|
||||
Example format: "TZ=America/New_York 0 9 * * *" (monitors
|
||||
daily at 9 AM EST).
|
||||
feature_selection_configs:
|
||||
List of tuples of feature id and monitoring threshold. If unset,
|
||||
all features in the feature group will be monitored, and the
|
||||
default thresholds 0.3 will be used.
|
||||
project:
|
||||
Project to create feature in. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to create feature in. If not set, location set in
|
||||
aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to create this feature. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
request_metadata:
|
||||
Strings which should be sent along with the request as metadata.
|
||||
create_request_timeout:
|
||||
The timeout for the create request in seconds.
|
||||
|
||||
Returns:
|
||||
FeatureMonitor - the FeatureMonitor resource object.
|
||||
"""
|
||||
|
||||
gapic_feature_monitor = gca_feature_monitor.FeatureMonitor()
|
||||
|
||||
if description:
|
||||
gapic_feature_monitor.description = description
|
||||
|
||||
if labels:
|
||||
utils.validate_labels(labels)
|
||||
gapic_feature_monitor.labels = labels
|
||||
|
||||
if request_metadata is None:
|
||||
request_metadata = ()
|
||||
|
||||
if schedule_config:
|
||||
gapic_feature_monitor.schedule_config = gca_feature_monitor.ScheduleConfig(
|
||||
cron=schedule_config
|
||||
)
|
||||
|
||||
if feature_selection_configs is None:
|
||||
raise ValueError(
|
||||
"Please specify feature_configs: features to be monitored and"
|
||||
" their thresholds."
|
||||
)
|
||||
|
||||
if feature_selection_configs is not None:
|
||||
gapic_feature_monitor.feature_selection_config.feature_configs = [
|
||||
gca_feature_monitor.FeatureSelectionConfig.FeatureConfig(
|
||||
feature_id=feature_id,
|
||||
drift_threshold=threshold if threshold else 0.3,
|
||||
)
|
||||
for feature_id, threshold in feature_selection_configs
|
||||
]
|
||||
|
||||
api_client = self.__class__._instantiate_client(
|
||||
location=location, credentials=credentials
|
||||
)
|
||||
|
||||
create_feature_monitor_lro = api_client.select_version(
|
||||
"v1beta1"
|
||||
).create_feature_monitor(
|
||||
parent=self.resource_name,
|
||||
feature_monitor=gapic_feature_monitor,
|
||||
feature_monitor_id=name,
|
||||
metadata=request_metadata,
|
||||
timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
_LOGGER.log_create_with_lro(FeatureMonitor, create_feature_monitor_lro)
|
||||
|
||||
created_feature_monitor = create_feature_monitor_lro.result()
|
||||
|
||||
_LOGGER.log_create_complete(
|
||||
FeatureMonitor, created_feature_monitor, "feature_monitor"
|
||||
)
|
||||
|
||||
feature_monitor_obj = FeatureMonitor(
|
||||
name=created_feature_monitor.name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
return feature_monitor_obj
|
||||
|
||||
def list_feature_monitors(
|
||||
self,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> List[FeatureMonitor]:
|
||||
"""Lists features monitors under this feature group.
|
||||
|
||||
Args:
|
||||
project:
|
||||
Project to list feature monitors in. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to list feature monitors in. If not set, location set in
|
||||
aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to list feature monitors. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
List of feature monitors under this feature group.
|
||||
"""
|
||||
|
||||
return FeatureMonitor.list(
|
||||
parent=self.resource_name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
@property
|
||||
def source(self) -> FeatureGroupBigQuerySource:
|
||||
return FeatureGroupBigQuerySource(
|
||||
uri=self._gca_resource.big_query.big_query_source.input_uri,
|
||||
entity_id_columns=self._gca_resource.big_query.entity_id_columns,
|
||||
)
|
||||
@@ -0,0 +1,335 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import re
|
||||
from typing import List, Dict, Optional, Tuple, Sequence
|
||||
from google.auth import credentials as auth_credentials
|
||||
from google.cloud.aiplatform import base, initializer
|
||||
from google.cloud.aiplatform import utils
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
feature_monitor_v1beta1 as gca_feature_monitor,
|
||||
feature_monitor_job_v1beta1 as gca_feature_monitor_job,
|
||||
)
|
||||
|
||||
_LOGGER = base.Logger(__name__)
|
||||
|
||||
|
||||
class FeatureMonitor(base.VertexAiResourceNounWithFutureManager):
|
||||
"""Class for managing Feature Monitor resources."""
|
||||
|
||||
client_class = utils.FeatureRegistryClientV1Beta1WithOverride
|
||||
|
||||
_resource_noun = "feature_monitors"
|
||||
_getter_method = "get_feature_monitor"
|
||||
_list_method = "list_feature_monitors"
|
||||
_delete_method = "delete_feature_monitor"
|
||||
_parse_resource_name_method = "parse_feature_monitor_path"
|
||||
_format_resource_name_method = "feature_monitor_path"
|
||||
_gca_resource: gca_feature_monitor.FeatureMonitor
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
feature_group_id: Optional[str] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
):
|
||||
"""Retrieves an existing managed feature.
|
||||
|
||||
Args:
|
||||
name:
|
||||
The resource name
|
||||
(`projects/.../locations/.../featureGroups/.../featureMonitors/...`) or
|
||||
ID.
|
||||
feature_group_id:
|
||||
The feature group ID. Must be passed in if name is an ID and not
|
||||
a resource path.
|
||||
project:
|
||||
Project to retrieve feature from. If not set, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to retrieve feature from. If not set, the location set
|
||||
in aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to retrieve this feature. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
"""
|
||||
|
||||
super().__init__(
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
resource_name=name,
|
||||
)
|
||||
|
||||
if re.fullmatch(
|
||||
r"projects/.+/locations/.+/featureGroups/.+/featureMonitors/.+",
|
||||
name,
|
||||
):
|
||||
if feature_group_id:
|
||||
raise ValueError(
|
||||
f"Since feature monitor '{name}' is provided as a path, feature_group_id should not be specified."
|
||||
)
|
||||
feature_monitor = name
|
||||
else:
|
||||
from .feature_group import FeatureGroup
|
||||
|
||||
# Construct the feature path using feature group ID if only the
|
||||
# feature group ID is provided.
|
||||
if not feature_group_id:
|
||||
raise ValueError(
|
||||
f"Since feature monitor '{name}' is not provided as a path, please specify feature_group_id."
|
||||
)
|
||||
|
||||
feature_group_path = utils.full_resource_name(
|
||||
resource_name=feature_group_id,
|
||||
resource_noun=FeatureGroup._resource_noun,
|
||||
parse_resource_name_method=FeatureGroup._parse_resource_name,
|
||||
format_resource_name_method=FeatureGroup._format_resource_name,
|
||||
)
|
||||
|
||||
feature_monitor = f"{feature_group_path}/featureMonitors/{name}"
|
||||
|
||||
self._gca_resource = self._get_gca_resource(resource_name=feature_monitor)
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
"""The description of the feature monitor."""
|
||||
return self._gca_resource.description
|
||||
|
||||
@property
|
||||
def schedule_config(self) -> str:
|
||||
"""The schedule config of the feature monitor."""
|
||||
return self._gca_resource.schedule_config.cron
|
||||
|
||||
@property
|
||||
def feature_selection_configs(self) -> List[Tuple[str, float]]:
|
||||
"""The feature and it's drift threshold configs of the feature monitor."""
|
||||
configs: List[Tuple[str, float]] = []
|
||||
for (
|
||||
feature_config
|
||||
) in self._gca_resource.feature_selection_config.feature_configs:
|
||||
configs.append(
|
||||
(
|
||||
feature_config.feature_id,
|
||||
feature_config.drift_threshold
|
||||
if feature_config.drift_threshold
|
||||
else 0.3,
|
||||
)
|
||||
)
|
||||
return configs
|
||||
|
||||
class FeatureMonitorJob(base.VertexAiResourceNounWithFutureManager):
|
||||
"""Class for managing Feature Monitor Job resources."""
|
||||
|
||||
client_class = utils.FeatureRegistryClientV1Beta1WithOverride
|
||||
|
||||
_resource_noun = "featureMonitorJobs"
|
||||
_getter_method = "get_feature_monitor_job"
|
||||
_list_method = "list_feature_monitor_jobs"
|
||||
_delete_method = "delete_feature_monitor_job"
|
||||
_parse_resource_name_method = "parse_feature_monitor_job_path"
|
||||
_format_resource_name_method = "feature_monitor_job_path"
|
||||
_gca_resource: gca_feature_monitor_job.FeatureMonitorJob
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
):
|
||||
"""Retrieves an existing managed feature monitor job.
|
||||
|
||||
Args:
|
||||
name: The resource name
|
||||
(`projects/.../locations/.../featureGroups/.../featureMonitors/.../featureMonitorJobs/...`)
|
||||
project: Project to retrieve the feature monitor job from. If
|
||||
unset, the project set in aiplatform.init will be used.
|
||||
location: Location to retrieve the feature monitor job from. If
|
||||
not set, location set in aiplatform.init will be used.
|
||||
credentials: Custom credentials to use to retrieve this feature
|
||||
monitor job. Overrides credentials set in aiplatform.init.
|
||||
"""
|
||||
super().__init__(
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
resource_name=name,
|
||||
)
|
||||
|
||||
if not re.fullmatch(
|
||||
r"projects/.+/locations/.+/featureGroups/.+/featureMonitors/.+/featureMonitorJobs/.+",
|
||||
name,
|
||||
):
|
||||
raise ValueError(
|
||||
"name need to specify the fully qualified"
|
||||
+ " feature monitor job resource path."
|
||||
)
|
||||
|
||||
self._gca_resource = self._get_gca_resource(resource_name=name)
|
||||
|
||||
@property
|
||||
def description(self) -> str:
|
||||
"""The description of the feature monitor."""
|
||||
return self._gca_resource.description
|
||||
|
||||
@property
|
||||
def feature_stats_and_anomalies(
|
||||
self,
|
||||
) -> List[gca_feature_monitor.FeatureStatsAndAnomaly]:
|
||||
"""The feature stats and anomaly of the feature monitor job."""
|
||||
if self._gca_resource.job_summary:
|
||||
return self._gca_resource.job_summary.feature_stats_and_anomalies
|
||||
return []
|
||||
|
||||
def create_feature_monitor_job(
|
||||
self,
|
||||
description: Optional[str] = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
request_metadata: Optional[Sequence[Tuple[str, str]]] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
) -> FeatureMonitorJob:
|
||||
"""Creates a new feature monitor job.
|
||||
|
||||
Args:
|
||||
description: Description of the feature monitor job.
|
||||
labels:
|
||||
The labels with user-defined metadata to organize your
|
||||
FeatureMonitorJobs.
|
||||
Label keys and values can be no longer than 64 characters
|
||||
(Unicode codepoints), can only contain lowercase letters,
|
||||
numeric characters, underscores and dashes. International
|
||||
characters are allowed.
|
||||
|
||||
See https://goo.gl/xmQnxf for more information on and examples
|
||||
of labels. No more than 64 user labels can be associated with
|
||||
one FeatureMonitor (System labels are excluded)." System reserved label
|
||||
keys are prefixed with "aiplatform.googleapis.com/" and are
|
||||
immutable.
|
||||
project:
|
||||
Project to create feature in. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to create feature in. If not set, location set in
|
||||
aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to create this feature. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
request_metadata:
|
||||
Strings which should be sent along with the request as metadata.
|
||||
create_request_timeout:
|
||||
The timeout for the create request in seconds.
|
||||
|
||||
Returns:
|
||||
FeatureMonitorJob - the FeatureMonitorJob resource object.
|
||||
"""
|
||||
|
||||
gapic_feature_monitor_job = gca_feature_monitor_job.FeatureMonitorJob()
|
||||
|
||||
if description:
|
||||
gapic_feature_monitor_job.description = description
|
||||
|
||||
if labels:
|
||||
utils.validate_labels(labels)
|
||||
gapic_feature_monitor_job.labels = labels
|
||||
|
||||
if request_metadata is None:
|
||||
request_metadata = ()
|
||||
|
||||
api_client = self.__class__._instantiate_client(
|
||||
location=location, credentials=credentials
|
||||
)
|
||||
|
||||
created_feature_monitor_job = api_client.select_version(
|
||||
"v1beta1"
|
||||
).create_feature_monitor_job(
|
||||
parent=self.resource_name,
|
||||
feature_monitor_job=gapic_feature_monitor_job,
|
||||
metadata=request_metadata,
|
||||
timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
feature_monitor_job_obj = self.FeatureMonitorJob(
|
||||
name=created_feature_monitor_job.name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
return feature_monitor_job_obj
|
||||
|
||||
def get_feature_monitor_job(
|
||||
self,
|
||||
feature_monitor_job_id: str,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> FeatureMonitorJob:
|
||||
"""Retrieves an existing feature monitor.
|
||||
|
||||
Args:
|
||||
feature_monitor_job_id: The ID of the feature monitor job.
|
||||
credentials:
|
||||
Custom credentials to use to retrieve the feature monitor job under this
|
||||
feature monitor. The order of which credentials are used is as
|
||||
follows - (1) this parameter (2) credentials passed to FeatureMonitor
|
||||
constructor (3) credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
FeatureMonitorJob - the Feature Monitor Job resource object under this
|
||||
feature monitor.
|
||||
"""
|
||||
credentials = (
|
||||
credentials or self.credentials or initializer.global_config.credentials
|
||||
)
|
||||
return FeatureMonitor.FeatureMonitorJob(
|
||||
f"{self.resource_name}/featureMonitorJobs/{feature_monitor_job_id}",
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
def list_feature_monitor_jobs(
|
||||
self,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> List[FeatureMonitorJob]:
|
||||
"""Lists features monitor jobs under this feature monitor.
|
||||
|
||||
Args:
|
||||
project:
|
||||
Project to list feature monitors in. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to list feature monitors in. If not set, location set in
|
||||
aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to list feature monitors. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
List of feature monitor jobs under this feature monitor.
|
||||
"""
|
||||
|
||||
return FeatureMonitor.FeatureMonitorJob.list(
|
||||
parent=self.resource_name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
@@ -0,0 +1,645 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import enum
|
||||
from typing import (
|
||||
Dict,
|
||||
List,
|
||||
Optional,
|
||||
Sequence,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
from google.auth import credentials as auth_credentials
|
||||
from google.cloud.aiplatform import (
|
||||
base,
|
||||
initializer,
|
||||
utils,
|
||||
)
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
feature_online_store as gca_feature_online_store,
|
||||
service_networking as gca_service_networking,
|
||||
feature_view as gca_feature_view,
|
||||
)
|
||||
from vertexai.resources.preview.feature_store.feature_view import (
|
||||
FeatureView,
|
||||
)
|
||||
from vertexai.resources.preview.feature_store.utils import (
|
||||
IndexConfig,
|
||||
FeatureViewBigQuerySource,
|
||||
FeatureViewVertexRagSource,
|
||||
FeatureViewRegistrySource,
|
||||
)
|
||||
|
||||
|
||||
_LOGGER = base.Logger(__name__)
|
||||
|
||||
|
||||
@enum.unique
|
||||
class FeatureOnlineStoreType(enum.Enum):
|
||||
UNKNOWN = 0
|
||||
BIGTABLE = 1
|
||||
OPTIMIZED = 2
|
||||
|
||||
|
||||
class FeatureOnlineStore(base.VertexAiResourceNounWithFutureManager):
|
||||
"""Class for managing Feature Online Store resources."""
|
||||
|
||||
client_class = utils.FeatureOnlineStoreAdminClientWithOverride
|
||||
|
||||
_resource_noun = "feature_online_stores"
|
||||
_getter_method = "get_feature_online_store"
|
||||
_list_method = "list_feature_online_stores"
|
||||
_delete_method = "delete_feature_online_store"
|
||||
_parse_resource_name_method = "parse_feature_online_store_path"
|
||||
_format_resource_name_method = "feature_online_store_path"
|
||||
_gca_resource: gca_feature_online_store.FeatureOnlineStore
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
):
|
||||
"""Retrieves an existing managed feature online store.
|
||||
|
||||
Args:
|
||||
name:
|
||||
The resource name
|
||||
(`projects/.../locations/.../featureOnlineStores/...`) or ID.
|
||||
project:
|
||||
Project to retrieve feature online store from. If unset, the
|
||||
project set in aiplatform.init will be used.
|
||||
location:
|
||||
Location to retrieve feature online store from. If not set,
|
||||
location set in aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to retrieve this feature online store.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
"""
|
||||
|
||||
super().__init__(
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
resource_name=name,
|
||||
)
|
||||
self._gca_resource = self._get_gca_resource(resource_name=name)
|
||||
|
||||
@classmethod
|
||||
@base.optional_sync()
|
||||
def create_bigtable_store(
|
||||
cls,
|
||||
name: str,
|
||||
min_node_count: Optional[int] = 1,
|
||||
max_node_count: Optional[int] = 1,
|
||||
cpu_utilization_target: Optional[int] = 50,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
request_metadata: Optional[Sequence[Tuple[str, str]]] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
sync: bool = True,
|
||||
) -> "FeatureOnlineStore":
|
||||
"""Creates a Bigtable online store.
|
||||
|
||||
Example Usage:
|
||||
|
||||
my_fos = vertexai.preview.FeatureOnlineStore.create_bigtable_store('my_fos')
|
||||
|
||||
Args:
|
||||
name: The name of the feature online store.
|
||||
min_node_count:
|
||||
The minimum number of Bigtable nodes to scale down to. Must be
|
||||
greater than or equal to 1.
|
||||
max_node_count:
|
||||
The maximum number of Bigtable nodes to scale up to. Must
|
||||
satisfy min_node_count <= max_node_count <= (10 *
|
||||
min_node_count).
|
||||
cpu_utilization_target:
|
||||
A percentage of the cluster's CPU capacity. Can be from 10% to
|
||||
80%. When a cluster's CPU utilization exceeds the target that
|
||||
you have set, Bigtable immediately adds nodes to the cluster.
|
||||
When CPU utilization is substantially lower than the target,
|
||||
Bigtable removes nodes. If not set will default to 50%.
|
||||
labels:
|
||||
The labels with user-defined metadata to organize your feature
|
||||
online store. Label keys and values can be no longer than 64
|
||||
characters (Unicode codepoints), can only contain lowercase
|
||||
letters, numeric characters, underscores and dashes.
|
||||
International characters are allowed. See https://goo.gl/xmQnxf
|
||||
for more information on and examples of labels. No more than 64
|
||||
user labels can be associated with one feature online store
|
||||
(System labels are excluded)." System reserved label keys are
|
||||
prefixed with "aiplatform.googleapis.com/" and are immutable.
|
||||
project:
|
||||
Project to create feature online store in. If unset, the project
|
||||
set in aiplatform.init will be used.
|
||||
location:
|
||||
Location to create feature online store in. If not set, location
|
||||
set in aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to create this feature online store.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
request_metadata:
|
||||
Strings which should be sent along with the request as metadata.
|
||||
create_request_timeout:
|
||||
The timeout for the create request in seconds.
|
||||
sync:
|
||||
Whether to execute this creation synchronously. If False, this
|
||||
method will be executed in concurrent Future and any downstream
|
||||
object will be immediately returned and synced when the Future
|
||||
has completed.
|
||||
|
||||
Returns:
|
||||
FeatureOnlineStore - the FeatureOnlineStore resource object.
|
||||
"""
|
||||
|
||||
if min_node_count < 1:
|
||||
raise ValueError("min_node_count must be greater than or equal to 1")
|
||||
|
||||
if max_node_count < min_node_count:
|
||||
raise ValueError(
|
||||
"max_node_count must be greater than or equal to min_node_count"
|
||||
)
|
||||
elif 10 * min_node_count < max_node_count:
|
||||
raise ValueError(
|
||||
"max_node_count must be less than or equal to 10 * min_node_count"
|
||||
)
|
||||
|
||||
if cpu_utilization_target < 10 or cpu_utilization_target > 80:
|
||||
raise ValueError("cpu_utilization_target must be between 10 and 80")
|
||||
|
||||
gapic_feature_online_store = gca_feature_online_store.FeatureOnlineStore(
|
||||
bigtable=gca_feature_online_store.FeatureOnlineStore.Bigtable(
|
||||
auto_scaling=gca_feature_online_store.FeatureOnlineStore.Bigtable.AutoScaling(
|
||||
min_node_count=min_node_count,
|
||||
max_node_count=max_node_count,
|
||||
cpu_utilization_target=cpu_utilization_target,
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
if labels:
|
||||
utils.validate_labels(labels)
|
||||
gapic_feature_online_store.labels = labels
|
||||
|
||||
if request_metadata is None:
|
||||
request_metadata = ()
|
||||
|
||||
api_client = cls._instantiate_client(location=location, credentials=credentials)
|
||||
|
||||
create_online_store_lro = api_client.create_feature_online_store(
|
||||
parent=initializer.global_config.common_location_path(
|
||||
project=project, location=location
|
||||
),
|
||||
feature_online_store=gapic_feature_online_store,
|
||||
feature_online_store_id=name,
|
||||
metadata=request_metadata,
|
||||
timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
_LOGGER.log_create_with_lro(cls, create_online_store_lro)
|
||||
|
||||
created_online_store = create_online_store_lro.result()
|
||||
|
||||
_LOGGER.log_create_complete(cls, created_online_store, "feature_online_store")
|
||||
|
||||
online_store_obj = cls(
|
||||
name=created_online_store.name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
return online_store_obj
|
||||
|
||||
@classmethod
|
||||
@base.optional_sync()
|
||||
def create_optimized_store(
|
||||
cls,
|
||||
name: str,
|
||||
enable_private_service_connect: bool = False,
|
||||
project_allowlist: Optional[Sequence[str]] = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
request_metadata: Optional[Sequence[Tuple[str, str]]] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
sync: bool = True,
|
||||
) -> "FeatureOnlineStore":
|
||||
"""Creates an Optimized online store.
|
||||
|
||||
Example Usage:
|
||||
|
||||
```
|
||||
# Create optimized store with public endpoint.
|
||||
my_fos = vertexai.preview.FeatureOnlineStore.create_optimized_store('my_fos')
|
||||
```
|
||||
|
||||
```
|
||||
# Create optimized online store with private service connect.
|
||||
my_fos = vertexai.preview.FeatureOnlineStore.create_optimized_store(
|
||||
'my_fos',
|
||||
enable_private_service_connect=True,
|
||||
project_allowlist=['my-project'],
|
||||
)
|
||||
```
|
||||
|
||||
Args:
|
||||
name: The name of the feature online store.
|
||||
enable_private_service_connect:
|
||||
Optional. If true, expose the optimized online store
|
||||
via private service connect. Otherwise the optimized online
|
||||
store will be accessible through public endpoint.
|
||||
project_allowlist:
|
||||
A list of Projects from which the forwarding
|
||||
rule will target the service attachment. Only needed when
|
||||
`enable_private_service_connect` is set to true.
|
||||
labels:
|
||||
The labels with user-defined metadata to organize your feature
|
||||
online store. Label keys and values can be no longer than 64
|
||||
characters (Unicode codepoints), can only contain lowercase
|
||||
letters, numeric characters, underscores and dashes.
|
||||
International characters are allowed. See https://goo.gl/xmQnxf
|
||||
for more information on and examples of labels. No more than 64
|
||||
user labels can be associated with one feature online store
|
||||
(System labels are excluded)." System reserved label keys are
|
||||
prefixed with "aiplatform.googleapis.com/" and are immutable.
|
||||
project:
|
||||
Project to create feature online store in. If unset, the project
|
||||
set in aiplatform.init will be used.
|
||||
location:
|
||||
Location to create feature online store in. If not set, location
|
||||
set in aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to create this feature online store.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
request_metadata:
|
||||
Strings which should be sent along with the request as metadata.
|
||||
create_request_timeout:
|
||||
The timeout for the create request in seconds.
|
||||
sync:
|
||||
Whether to execute this creation synchronously. If False, this
|
||||
method will be executed in concurrent Future and any downstream
|
||||
object will be immediately returned and synced when the Future
|
||||
has completed.
|
||||
|
||||
Returns:
|
||||
FeatureOnlineStore - the FeatureOnlineStore resource object.
|
||||
"""
|
||||
if enable_private_service_connect:
|
||||
if not project_allowlist:
|
||||
raise ValueError(
|
||||
"`project_allowlist` cannot be empty when `enable_private_service_connect` is set to true."
|
||||
)
|
||||
|
||||
dedicated_serving_endpoint = gca_feature_online_store.FeatureOnlineStore.DedicatedServingEndpoint(
|
||||
private_service_connect_config=gca_service_networking.PrivateServiceConnectConfig(
|
||||
enable_private_service_connect=True,
|
||||
project_allowlist=project_allowlist,
|
||||
),
|
||||
)
|
||||
else:
|
||||
dedicated_serving_endpoint = (
|
||||
gca_feature_online_store.FeatureOnlineStore.DedicatedServingEndpoint()
|
||||
)
|
||||
|
||||
gapic_feature_online_store = gca_feature_online_store.FeatureOnlineStore(
|
||||
optimized=gca_feature_online_store.FeatureOnlineStore.Optimized(),
|
||||
dedicated_serving_endpoint=dedicated_serving_endpoint,
|
||||
)
|
||||
|
||||
if labels:
|
||||
utils.validate_labels(labels)
|
||||
gapic_feature_online_store.labels = labels
|
||||
|
||||
if request_metadata is None:
|
||||
request_metadata = ()
|
||||
|
||||
api_client = cls._instantiate_client(location=location, credentials=credentials)
|
||||
|
||||
create_online_store_lro = api_client.create_feature_online_store(
|
||||
parent=initializer.global_config.common_location_path(
|
||||
project=project, location=location
|
||||
),
|
||||
feature_online_store=gapic_feature_online_store,
|
||||
feature_online_store_id=name,
|
||||
metadata=request_metadata,
|
||||
timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
_LOGGER.log_create_with_lro(cls, create_online_store_lro)
|
||||
|
||||
created_online_store = create_online_store_lro.result()
|
||||
|
||||
_LOGGER.log_create_complete(cls, created_online_store, "feature_online_store")
|
||||
|
||||
online_store_obj = cls(
|
||||
name=created_online_store.name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
return online_store_obj
|
||||
|
||||
@base.optional_sync()
|
||||
def delete(self, force: bool = False, sync: bool = True) -> None:
|
||||
"""Deletes this online store.
|
||||
|
||||
WARNING: This deletion is permanent.
|
||||
|
||||
Args:
|
||||
force:
|
||||
If set to True, all feature views under this online store will
|
||||
be deleted prior to online store deletion. Otherwise, deletion
|
||||
will only succeed if the online store has no FeatureViews.
|
||||
sync:
|
||||
Whether to execute this deletion synchronously. If False, this
|
||||
method will be executed in concurrent Future and any downstream
|
||||
object will be immediately returned and synced when the Future
|
||||
has completed.
|
||||
"""
|
||||
|
||||
lro = getattr(self.api_client, self._delete_method)(
|
||||
name=self.resource_name,
|
||||
force=force,
|
||||
)
|
||||
_LOGGER.log_delete_with_lro(self, lro)
|
||||
lro.result()
|
||||
_LOGGER.log_delete_complete(self)
|
||||
|
||||
@property
|
||||
def feature_online_store_type(self) -> FeatureOnlineStoreType:
|
||||
if self._gca_resource.bigtable:
|
||||
return FeatureOnlineStoreType.BIGTABLE
|
||||
# Optimized is an empty proto, so self._gca_resource.optimized is always false.
|
||||
elif hasattr(self.gca_resource, "optimized"):
|
||||
return FeatureOnlineStoreType.OPTIMIZED
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Online store does not have type or is unsupported by SDK: {self._gca_resource}."
|
||||
)
|
||||
|
||||
@property
|
||||
def labels(self) -> Dict[str, str]:
|
||||
return self._gca_resource.labels
|
||||
|
||||
@base.optional_sync()
|
||||
def create_feature_view(
|
||||
self,
|
||||
name: str,
|
||||
source: Union[
|
||||
FeatureViewBigQuerySource,
|
||||
FeatureViewVertexRagSource,
|
||||
FeatureViewRegistrySource,
|
||||
],
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
sync_config: Optional[str] = None,
|
||||
index_config: Optional[IndexConfig] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
request_metadata: Optional[Sequence[Tuple[str, str]]] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
sync: bool = True,
|
||||
) -> FeatureView:
|
||||
"""Creates a FeatureView from a BigQuery source.
|
||||
|
||||
Example Usage:
|
||||
```
|
||||
existing_fos = FeatureOnlineStore('my_fos')
|
||||
new_fv = existing_fos.create_feature_view(
|
||||
'my_fos',
|
||||
BigQuerySource(
|
||||
uri='bq://my-proj/dataset/table',
|
||||
entity_id_columns=['entity_id'],
|
||||
)
|
||||
)
|
||||
# Example for how to create an embedding FeatureView.
|
||||
embedding_fv = existing_fos.create_feature_view(
|
||||
'my_fos',
|
||||
BigQuerySource(
|
||||
uri='bq://my-proj/dataset/table',
|
||||
entity_id_columns=['entity_id'],
|
||||
)
|
||||
index_config=IndexConfig(
|
||||
embedding_column="embedding",
|
||||
filter_column=["currency_code", "gender",
|
||||
crowding_column="crowding",
|
||||
dimentions=1536,
|
||||
distance_measure_type=DistanceMeasureType.SQUARED_L2_DISTANCE,
|
||||
algorithm_config=TreeAhConfig(),
|
||||
)
|
||||
)
|
||||
```
|
||||
Args:
|
||||
name: The name of the feature view.
|
||||
source:
|
||||
The source to load data from when a feature view sync runs.
|
||||
Currently supports a BigQuery source, Vertex RAG source, Registry source.
|
||||
labels:
|
||||
The labels with user-defined metadata to organize your
|
||||
FeatureViews.
|
||||
|
||||
Label keys and values can be no longer than 64 characters
|
||||
(Unicode codepoints), can only contain lowercase letters,
|
||||
numeric characters, underscores and dashes. International
|
||||
characters are allowed.
|
||||
|
||||
See https://goo.gl/xmQnxf for more information on and examples
|
||||
of labels. No more than 64 user labels can be associated with
|
||||
one FeatureOnlineStore(System labels are excluded)." System
|
||||
reserved label keys are prefixed with
|
||||
"aiplatform.googleapis.com/" and are immutable.
|
||||
sync_config:
|
||||
Configures when data is to be synced/updated for this
|
||||
FeatureView. At the end of the sync the latest feature values
|
||||
for each entity ID of this FeatureView are made ready for online
|
||||
serving. Example format: "TZ=America/New_York 0 9 * * *" (sync
|
||||
daily at 9 AM EST).
|
||||
index_config:
|
||||
Configuration for index preparation for vector search. It
|
||||
contains the required configurations to create an index from
|
||||
source data, so that approximate nearest neighbor (a.k.a ANN)
|
||||
algorithms search can be performed during online serving.
|
||||
project:
|
||||
Project to create feature view in. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to create feature view in. If not set, location set in
|
||||
aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to create this feature view.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
request_metadata:
|
||||
Strings which should be sent along with the request as metadata.
|
||||
create_request_timeout:
|
||||
The timeout for the create request in seconds.
|
||||
sync:
|
||||
Whether to execute this creation synchronously. If False, this
|
||||
method will be executed in concurrent Future and any downstream
|
||||
object will be immediately returned and synced when the Future
|
||||
has completed.
|
||||
|
||||
Returns:
|
||||
FeatureView - the FeatureView resource object.
|
||||
"""
|
||||
if not source:
|
||||
raise ValueError("Please specify a valid source.")
|
||||
|
||||
big_query_source = None
|
||||
vertex_rag_source = None
|
||||
feature_registry_source = None
|
||||
|
||||
if isinstance(source, FeatureViewBigQuerySource):
|
||||
if not source.uri:
|
||||
raise ValueError("Please specify URI in BigQuery source.")
|
||||
|
||||
if not source.entity_id_columns:
|
||||
raise ValueError("Please specify entity ID columns in BigQuery source.")
|
||||
|
||||
big_query_source = gca_feature_view.FeatureView.BigQuerySource(
|
||||
uri=source.uri,
|
||||
entity_id_columns=source.entity_id_columns,
|
||||
)
|
||||
elif isinstance(source, FeatureViewVertexRagSource):
|
||||
if not source.uri:
|
||||
raise ValueError("Please specify URI in Vertex RAG source.")
|
||||
|
||||
vertex_rag_source = gca_feature_view.FeatureView.VertexRagSource(
|
||||
uri=source.uri,
|
||||
rag_corpus_id=source.rag_corpus_id or None,
|
||||
)
|
||||
elif isinstance(source, FeatureViewRegistrySource):
|
||||
if not source.features:
|
||||
raise ValueError(
|
||||
"Please specify features in Registry Source in format `<feature_group_id>.<feature_id>`."
|
||||
)
|
||||
feature_group_mappings = {}
|
||||
for feature in source.features:
|
||||
feature_group_id, feature_id = feature.split(".")
|
||||
if not feature_id or not feature_group_id:
|
||||
raise ValueError(
|
||||
"Please specify features in Registry Source in format `<feature_group_id>.<feature_id>`."
|
||||
)
|
||||
if feature_group_id in feature_group_mappings:
|
||||
feature_group_mappings[feature_group_id].append(feature_id)
|
||||
else:
|
||||
feature_group_mappings[feature_group_id] = [feature_id]
|
||||
feature_groups = []
|
||||
for feature_group_id in feature_group_mappings:
|
||||
feature_ids = feature_group_mappings[feature_group_id]
|
||||
feature_groups.append(
|
||||
gca_feature_view.FeatureView.FeatureRegistrySource.FeatureGroup(
|
||||
feature_group_id=feature_group_id,
|
||||
feature_ids=feature_ids,
|
||||
)
|
||||
)
|
||||
feature_registry_source = (
|
||||
gca_feature_view.FeatureView.FeatureRegistrySource(
|
||||
feature_groups=feature_groups,
|
||||
project_number=source.project_number or None,
|
||||
)
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Only FeatureViewBigQuerySource, FeatureViewVertexRagSource and FeatureViewRegistrySource are supported sources."
|
||||
)
|
||||
|
||||
gapic_feature_view = gca_feature_view.FeatureView(
|
||||
big_query_source=big_query_source,
|
||||
vertex_rag_source=vertex_rag_source,
|
||||
feature_registry_source=feature_registry_source,
|
||||
sync_config=gca_feature_view.FeatureView.SyncConfig(cron=sync_config)
|
||||
if sync_config
|
||||
else None,
|
||||
)
|
||||
|
||||
if labels:
|
||||
utils.validate_labels(labels)
|
||||
gapic_feature_view.labels = labels
|
||||
|
||||
if request_metadata is None:
|
||||
request_metadata = ()
|
||||
|
||||
if index_config:
|
||||
gapic_feature_view.index_config = gca_feature_view.FeatureView.IndexConfig(
|
||||
index_config.as_dict()
|
||||
)
|
||||
|
||||
api_client = self.__class__._instantiate_client(
|
||||
location=location, credentials=credentials
|
||||
)
|
||||
|
||||
create_feature_view_lro = api_client.create_feature_view(
|
||||
parent=self.resource_name,
|
||||
feature_view=gapic_feature_view,
|
||||
feature_view_id=name,
|
||||
metadata=request_metadata,
|
||||
timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
_LOGGER.log_create_with_lro(FeatureView, create_feature_view_lro)
|
||||
|
||||
created_feature_view = create_feature_view_lro.result()
|
||||
|
||||
_LOGGER.log_create_complete(FeatureView, created_feature_view, "feature_view")
|
||||
|
||||
feature_view_obj = FeatureView(
|
||||
name=created_feature_view.name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
return feature_view_obj
|
||||
|
||||
def list_feature_views(
|
||||
self,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> List[FeatureView]:
|
||||
"""Lists feature views under this feature online store.
|
||||
|
||||
Args:
|
||||
project:
|
||||
Project to list feature views in. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
Location to list feature views in. If not set, location set in
|
||||
aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to list feature views. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
List of feature views under this feature online store.
|
||||
"""
|
||||
|
||||
return FeatureView.list(
|
||||
feature_online_store_id=self.name,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
@@ -0,0 +1,539 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import re
|
||||
from typing import List, Dict, Optional
|
||||
from google.cloud.aiplatform import initializer
|
||||
from google.auth import credentials as auth_credentials
|
||||
from google.cloud.aiplatform import base
|
||||
from google.cloud.aiplatform import utils
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
feature_view_sync as gca_feature_view_sync,
|
||||
feature_view as gca_feature_view,
|
||||
feature_online_store_service as fos_service,
|
||||
)
|
||||
import vertexai.resources.preview.feature_store.utils as fs_utils
|
||||
|
||||
_LOGGER = base.Logger(__name__)
|
||||
|
||||
|
||||
class FeatureView(base.VertexAiResourceNounWithFutureManager):
|
||||
"""Class for managing Feature View resources."""
|
||||
|
||||
client_class = utils.FeatureOnlineStoreAdminClientWithOverride
|
||||
|
||||
_resource_noun = "featureViews"
|
||||
_getter_method = "get_feature_view"
|
||||
_list_method = "list_feature_views"
|
||||
_delete_method = "delete_feature_view"
|
||||
_parse_resource_name_method = "parse_feature_view_path"
|
||||
_format_resource_name_method = "feature_view_path"
|
||||
_gca_resource: gca_feature_view.FeatureView
|
||||
_online_store_client: utils.FeatureOnlineStoreClientWithOverride
|
||||
|
||||
_online_store_clients_with_connection_options: Dict[
|
||||
fs_utils.ConnectionOptions, utils.FeatureOnlineStoreClientWithOverride
|
||||
] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
feature_online_store_id: Optional[str] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
):
|
||||
"""Retrieves an existing managed feature view.
|
||||
|
||||
Args:
|
||||
name:
|
||||
The resource name
|
||||
(`projects/.../locations/.../featureOnlineStores/.../featureViews/...`)
|
||||
or ID.
|
||||
feature_online_store_id:
|
||||
The feature online store ID. Must be passed in if name is an ID
|
||||
and not a resource path.
|
||||
project:
|
||||
Project to retrieve the feature view from. If unset, the project
|
||||
set in aiplatform.init will be used.
|
||||
location:
|
||||
Location to retrieve the feature view from. If not set, location
|
||||
set in aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to retrieve this feature view.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
"""
|
||||
|
||||
super().__init__(
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
resource_name=name,
|
||||
)
|
||||
|
||||
if re.fullmatch(
|
||||
r"projects/.+/locations/.+/featureOnlineStores/.+/featureViews/.+",
|
||||
name,
|
||||
):
|
||||
feature_view = name
|
||||
else:
|
||||
from .feature_online_store import FeatureOnlineStore
|
||||
|
||||
# Construct the feature view path using feature online store ID if
|
||||
# only the feature view ID is provided.
|
||||
if not feature_online_store_id:
|
||||
raise ValueError(
|
||||
"Since feature view is not provided as a path, please specify"
|
||||
+ " feature_online_store_id."
|
||||
)
|
||||
|
||||
feature_online_store_path = utils.full_resource_name(
|
||||
resource_name=feature_online_store_id,
|
||||
resource_noun=FeatureOnlineStore._resource_noun,
|
||||
parse_resource_name_method=FeatureOnlineStore._parse_resource_name,
|
||||
format_resource_name_method=FeatureOnlineStore._format_resource_name,
|
||||
)
|
||||
|
||||
feature_view = f"{feature_online_store_path}/featureViews/{name}"
|
||||
|
||||
self._gca_resource = self._get_gca_resource(resource_name=feature_view)
|
||||
|
||||
def _get_online_store_client(
|
||||
self, connection_options: Optional[fs_utils.ConnectionOptions] = None
|
||||
) -> utils.FeatureOnlineStoreClientWithOverride:
|
||||
"""Return the online store client.
|
||||
|
||||
Also sets the `_online_store_client` attr if not set yet. Note that if
|
||||
`connection_options` is passed in, the `_online_store_client` attr will
|
||||
not be set - only the client will be returned. If the same
|
||||
`connection_options` is passed in, this code will return the same
|
||||
(cached) client as previously built.
|
||||
"""
|
||||
if getattr(self, "_online_store_client", None):
|
||||
return self._online_store_client
|
||||
|
||||
fos_name = fs_utils.get_feature_online_store_name(self.resource_name)
|
||||
from .feature_online_store import FeatureOnlineStore
|
||||
|
||||
fos = FeatureOnlineStore(name=fos_name)
|
||||
|
||||
if connection_options:
|
||||
# Check if we have a previously client created for these
|
||||
# connection_options.
|
||||
if self._online_store_clients_with_connection_options is None:
|
||||
self._online_store_clients_with_connection_options = {}
|
||||
if connection_options in self._online_store_clients_with_connection_options:
|
||||
return self._online_store_clients_with_connection_options[
|
||||
connection_options
|
||||
]
|
||||
host = connection_options.host
|
||||
|
||||
if isinstance(
|
||||
connection_options.transport,
|
||||
fs_utils.ConnectionOptions.InsecureGrpcChannel,
|
||||
):
|
||||
import grpc
|
||||
from google.cloud.aiplatform_v1.services import (
|
||||
feature_online_store_service as feature_online_store_service_v1,
|
||||
)
|
||||
from google.cloud.aiplatform_v1beta1.services import (
|
||||
feature_online_store_service as feature_online_store_service_v1beta1,
|
||||
)
|
||||
|
||||
gapic_client_class = (
|
||||
utils.FeatureOnlineStoreClientWithOverride.get_gapic_client_class()
|
||||
)
|
||||
gapic_client_class_to_transport_class = {
|
||||
feature_online_store_service_v1.client.FeatureOnlineStoreServiceClient: (
|
||||
feature_online_store_service_v1.transports.grpc.FeatureOnlineStoreServiceGrpcTransport
|
||||
),
|
||||
feature_online_store_service_v1beta1.client.FeatureOnlineStoreServiceClient: (
|
||||
feature_online_store_service_v1beta1.transports.grpc.FeatureOnlineStoreServiceGrpcTransport
|
||||
),
|
||||
}
|
||||
if gapic_client_class not in gapic_client_class_to_transport_class:
|
||||
raise ValueError(
|
||||
f"Unexpected gapic class '{gapic_client_class}' used by internal client."
|
||||
)
|
||||
|
||||
transport_class = gapic_client_class_to_transport_class[
|
||||
gapic_client_class
|
||||
]
|
||||
|
||||
client = gapic_client_class(
|
||||
transport=transport_class(
|
||||
channel=grpc.insecure_channel(host + ":10002")
|
||||
),
|
||||
)
|
||||
|
||||
self._online_store_clients_with_connection_options[
|
||||
connection_options
|
||||
] = client
|
||||
return client
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported connection transport type, got transport: {connection_options.transport}"
|
||||
)
|
||||
|
||||
if fos._gca_resource.bigtable.auto_scaling:
|
||||
# This is Bigtable online store.
|
||||
_LOGGER.info(f"Connecting to Bigtable online store name {fos_name}")
|
||||
self._online_store_client = initializer.global_config.create_client(
|
||||
client_class=utils.FeatureOnlineStoreClientWithOverride,
|
||||
credentials=self.credentials,
|
||||
location_override=self.location,
|
||||
)
|
||||
return self._online_store_client
|
||||
|
||||
if (
|
||||
fos._gca_resource.dedicated_serving_endpoint.private_service_connect_config.enable_private_service_connect
|
||||
):
|
||||
raise ValueError(
|
||||
"Use `connection_options` to specify an IP address. Required for optimized online store with private service connect."
|
||||
)
|
||||
|
||||
# From here, optimized serving with public endpoint.
|
||||
if not fos._gca_resource.dedicated_serving_endpoint.public_endpoint_domain_name:
|
||||
raise fs_utils.PublicEndpointNotFoundError(
|
||||
"Public endpoint is not created yet for the optimized online store:"
|
||||
f"{fos_name}. Please run sync and wait for it to complete."
|
||||
)
|
||||
|
||||
_LOGGER.info(
|
||||
f"Public endpoint for the optimized online store {fos_name} is"
|
||||
f" {fos._gca_resource.dedicated_serving_endpoint.public_endpoint_domain_name}"
|
||||
)
|
||||
self._online_store_client = initializer.global_config.create_client(
|
||||
client_class=utils.FeatureOnlineStoreClientWithOverride,
|
||||
credentials=self.credentials,
|
||||
location_override=self.location,
|
||||
prediction_client=True,
|
||||
api_path_override=fos._gca_resource.dedicated_serving_endpoint.public_endpoint_domain_name,
|
||||
)
|
||||
return self._online_store_client
|
||||
|
||||
@classmethod
|
||||
def list(
|
||||
cls,
|
||||
feature_online_store_id: str,
|
||||
filter: Optional[str] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> List["FeatureView"]:
|
||||
"""List all feature view under feature_online_store_id.
|
||||
|
||||
Example Usage:
|
||||
```
|
||||
feature_views = vertexai.preview.FeatureView.list(
|
||||
feature_online_store_id="my_fos",
|
||||
filter=labels.label_key=label_value)
|
||||
```
|
||||
Args:
|
||||
feature_online_store_id:
|
||||
Parentfeature online store ID.
|
||||
filter:
|
||||
Filter to apply on the returned feature online store.
|
||||
project:
|
||||
Project to use to get a list of feature views. If unset, the
|
||||
project set in aiplatform.init will be used.
|
||||
location:
|
||||
Location to use to get a list feature views. If not set,
|
||||
location set in aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use to get a list of feature views.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
List[FeatureView] - list of FeatureView resource object.
|
||||
"""
|
||||
from .feature_online_store import FeatureOnlineStore
|
||||
|
||||
fos = FeatureOnlineStore(
|
||||
name=feature_online_store_id,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
return cls._list(
|
||||
filter=filter, credentials=credentials, parent=fos.resource_name
|
||||
)
|
||||
|
||||
@base.optional_sync()
|
||||
def delete(self, sync: bool = True) -> None:
|
||||
"""Deletes this feature view.
|
||||
|
||||
WARNING: This deletion is permanent.
|
||||
|
||||
Args:
|
||||
sync:
|
||||
Whether to execute this deletion synchronously. If False, this
|
||||
method will be executed in concurrent Future and any downstream
|
||||
object will be immediately returned and synced when the Future
|
||||
has completed.
|
||||
"""
|
||||
lro = getattr(self.api_client, self._delete_method)(name=self.resource_name)
|
||||
_LOGGER.log_delete_with_lro(self, lro)
|
||||
lro.result()
|
||||
_LOGGER.log_delete_complete(self)
|
||||
|
||||
def sync(self) -> "FeatureViewSync":
|
||||
"""Starts an on-demand Sync for the FeatureView.
|
||||
|
||||
Args: None
|
||||
|
||||
Returns:
|
||||
"FeatureViewSync" - FeatureViewSync instance
|
||||
"""
|
||||
sync_method = getattr(self.api_client, self.FeatureViewSync.sync_method())
|
||||
|
||||
sync_request = {
|
||||
"feature_view": self.resource_name,
|
||||
}
|
||||
sync_response = sync_method(request=sync_request)
|
||||
|
||||
return self.FeatureViewSync(name=sync_response.feature_view_sync)
|
||||
|
||||
def get_sync(self, name) -> "FeatureViewSync":
|
||||
"""Gets the FeatureViewSync resource for the given name.
|
||||
|
||||
Args:
|
||||
name: The resource ID
|
||||
|
||||
Returns:
|
||||
"FeatureViewSync" - FeatureViewSync instance
|
||||
"""
|
||||
feature_view_path = self.resource_name
|
||||
feature_view_sync = f"{feature_view_path}/featureViewSyncs/{name}"
|
||||
return self.FeatureViewSync(name=feature_view_sync)
|
||||
|
||||
def list_syncs(
|
||||
self,
|
||||
filter: Optional[str] = None,
|
||||
) -> List["FeatureViewSync"]:
|
||||
"""List all feature view under this FeatureView.
|
||||
|
||||
Args:
|
||||
parent_resource_name: Fully qualified name of the parent FeatureView
|
||||
resource.
|
||||
filter: Filter to apply on the returned feature online store.
|
||||
|
||||
Returns:
|
||||
List[FeatureViewSync] - list of FeatureViewSync resource object.
|
||||
"""
|
||||
|
||||
return self.FeatureViewSync._list(
|
||||
filter=filter, credentials=self.credentials, parent=self.resource_name
|
||||
)
|
||||
|
||||
def read(
|
||||
self,
|
||||
key: List[str],
|
||||
connection_options: Optional[fs_utils.ConnectionOptions] = None,
|
||||
request_timeout: Optional[float] = None,
|
||||
) -> fs_utils.FeatureViewReadResponse:
|
||||
"""Read the feature values from FeatureView.
|
||||
|
||||
Example Usage:
|
||||
Read feature view. Use this for Bigtable online stores and for
|
||||
Optimized online stores that use public endpoint.
|
||||
```
|
||||
data = vertexai.preview.FeatureView(
|
||||
name='feature_view_name', feature_online_store_id='fos_name')
|
||||
.read(key=[12345, 6789])
|
||||
.to_dict()
|
||||
```
|
||||
|
||||
Read feature view using IP with an insecure gRPC channel. Use this
|
||||
for optimized online stores using private service connect.
|
||||
```
|
||||
data = vertexai.preview.FeatureView(
|
||||
name='feature_view_name', feature_online_store_id='fos_name')
|
||||
.read(
|
||||
key=[12345, 6789],
|
||||
connection_options=fs_utils.ConnectionOptions(
|
||||
host="<ip>",
|
||||
transport=fs_utils.ConnectionOptions.InsecureGrpcChannel()))
|
||||
.to_dict()
|
||||
```
|
||||
Args:
|
||||
key: The request key to read feature values for.
|
||||
connection_options:
|
||||
If specified, use these options to connect to a host for sending
|
||||
requests instead of the default
|
||||
`<region>-aiplatform.googleapis.com` or the feature online
|
||||
store's public endpoint.
|
||||
|
||||
Returns:
|
||||
"FeatureViewReadResponse" - FeatureViewReadResponse object. It is
|
||||
intermediate class that can be further converted by to_dict() or
|
||||
to_proto().
|
||||
"""
|
||||
self.wait()
|
||||
|
||||
online_store_client = self._get_online_store_client(
|
||||
connection_options=connection_options
|
||||
)
|
||||
|
||||
response = online_store_client.fetch_feature_values(
|
||||
feature_view=self.resource_name,
|
||||
data_key=fos_service.FeatureViewDataKey(
|
||||
composite_key=fos_service.FeatureViewDataKey.CompositeKey(parts=key)
|
||||
),
|
||||
timeout=request_timeout,
|
||||
)
|
||||
return fs_utils.FeatureViewReadResponse(response)
|
||||
|
||||
def search(
|
||||
self,
|
||||
entity_id: Optional[str] = None,
|
||||
embedding_value: Optional[List[float]] = None,
|
||||
neighbor_count: Optional[int] = None,
|
||||
string_filters: Optional[
|
||||
List[fos_service.NearestNeighborQuery.StringFilter]
|
||||
] = None,
|
||||
per_crowding_attribute_neighbor_count: Optional[int] = None,
|
||||
return_full_entity: bool = False,
|
||||
approximate_neighbor_candidates: Optional[int] = None,
|
||||
leaf_nodes_search_fraction: Optional[float] = None,
|
||||
request_timeout: Optional[float] = None,
|
||||
) -> fs_utils.SearchNearestEntitiesResponse:
|
||||
"""Search the nearest entities from FeatureView.
|
||||
|
||||
Example Usage:
|
||||
```
|
||||
data = vertexai.preview.FeatureView(
|
||||
name='feature_view_name', feature_online_store_id='fos_name')
|
||||
.search(entity_id='sample_entity')
|
||||
.to_dict()
|
||||
```
|
||||
Args:
|
||||
entity_id: The entity id whose similar entities should be searched
|
||||
for.
|
||||
embedding_value: The embedding vector that be used for similar
|
||||
search.
|
||||
neighbor_count: The number of similar entities to be retrieved
|
||||
from feature view for each query.
|
||||
string_filters: The list of string filters.
|
||||
per_crowding_attribute_neighbor_count: Crowding is a constraint on a
|
||||
neighbor list produced by nearest neighbor search requiring that
|
||||
no more than sper_crowding_attribute_neighbor_count of the k
|
||||
neighbors returned have the same value of crowding_attribute.
|
||||
It's used for improving result diversity.
|
||||
return_full_entity: If true, return full entities including the
|
||||
features other than embeddings.
|
||||
approximate_neighbor_candidates: The number of neighbors to find via
|
||||
approximate search before exact reordering is performed; if set,
|
||||
this value must be > neighbor_count.
|
||||
leaf_nodes_search_fraction: The fraction of the number of leaves to
|
||||
search, set at query time allows user to tune search performance.
|
||||
This value increase result in both search accuracy and latency
|
||||
increase. The value should be between 0.0 and 1.0.
|
||||
|
||||
Returns:
|
||||
"SearchNearestEntitiesResponse" - SearchNearestEntitiesResponse
|
||||
object. It is intermediate class that can be further converted by
|
||||
to_dict() or to_proto()
|
||||
"""
|
||||
self.wait()
|
||||
if entity_id:
|
||||
embedding = None
|
||||
elif embedding_value:
|
||||
embedding = fos_service.NearestNeighborQuery.Embedding(
|
||||
value=embedding_value
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Either entity_id or embedding_value needs to be provided for search."
|
||||
)
|
||||
response = self._get_online_store_client().search_nearest_entities(
|
||||
request=fos_service.SearchNearestEntitiesRequest(
|
||||
feature_view=self.resource_name,
|
||||
query=fos_service.NearestNeighborQuery(
|
||||
entity_id=entity_id,
|
||||
embedding=embedding,
|
||||
neighbor_count=neighbor_count,
|
||||
string_filters=string_filters,
|
||||
per_crowding_attribute_neighbor_count=per_crowding_attribute_neighbor_count, # pylint: disable=line-too-long
|
||||
parameters=fos_service.NearestNeighborQuery.Parameters(
|
||||
approximate_neighbor_candidates=approximate_neighbor_candidates,
|
||||
leaf_nodes_search_fraction=leaf_nodes_search_fraction,
|
||||
),
|
||||
),
|
||||
return_full_entity=return_full_entity,
|
||||
),
|
||||
timeout=request_timeout,
|
||||
)
|
||||
return fs_utils.SearchNearestEntitiesResponse(response)
|
||||
|
||||
class FeatureViewSync(base.VertexAiResourceNounWithFutureManager):
|
||||
"""Class for managing Feature View Sync resources."""
|
||||
|
||||
client_class = utils.FeatureOnlineStoreAdminClientWithOverride
|
||||
|
||||
_resource_noun = "featureViewSyncs"
|
||||
_getter_method = "get_feature_view_sync"
|
||||
_list_method = "list_feature_view_syncs"
|
||||
_delete_method = "delete_feature_view"
|
||||
_sync_method = "sync_feature_view"
|
||||
_parse_resource_name_method = "parse_feature_view_sync_path"
|
||||
_format_resource_name_method = "feature_view_sync_path"
|
||||
_gca_resource: gca_feature_view_sync.FeatureViewSync
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
):
|
||||
"""Retrieves an existing managed feature view sync.
|
||||
|
||||
Args:
|
||||
name: The resource name
|
||||
(`projects/.../locations/.../featureOnlineStores/.../featureViews/.../featureViewSyncs/...`)
|
||||
project: Project to retrieve the feature view from. If unset, the
|
||||
project set in aiplatform.init will be used.
|
||||
location: Location to retrieve the feature view from. If not set,
|
||||
location set in aiplatform.init will be used.
|
||||
credentials: Custom credentials to use to retrieve this feature view.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
"""
|
||||
super().__init__(
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
resource_name=name,
|
||||
)
|
||||
|
||||
if not re.fullmatch(
|
||||
r"projects/.+/locations/.+/featureOnlineStores/.+/featureViews/.+/featureViewSyncs/.+",
|
||||
name,
|
||||
):
|
||||
raise ValueError(
|
||||
"name need to specify the fully qualified"
|
||||
+ " feature_view_sync resource path."
|
||||
)
|
||||
|
||||
self._gca_resource = getattr(self.api_client, self._getter_method)(
|
||||
name=name, retry=base._DEFAULT_RETRY
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sync_method(cls) -> str:
|
||||
"""Returns the sync method."""
|
||||
return cls._sync_method
|
||||
@@ -0,0 +1,291 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import datetime
|
||||
import re
|
||||
|
||||
from typing import Optional, List, Tuple, Union, TYPE_CHECKING
|
||||
from google.auth import credentials as auth_credentials
|
||||
from vertexai.resources.preview.feature_store import (
|
||||
FeatureGroup,
|
||||
Feature,
|
||||
)
|
||||
from google.cloud.aiplatform import initializer, __version__
|
||||
|
||||
from . import _offline_store_impl as impl
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
try:
|
||||
import bigframes
|
||||
except ImportError:
|
||||
bigframes = None
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
pd = None
|
||||
|
||||
|
||||
def _try_import_bigframes():
|
||||
"""Try to import `bigframes` and return it if successful - otherwise raise an import error."""
|
||||
try:
|
||||
import bigframes
|
||||
import bigframes.pandas
|
||||
|
||||
return bigframes
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"`bigframes` is not installed but required for this functionality."
|
||||
) from exc
|
||||
|
||||
|
||||
def _get_feature_group_from_feature(
|
||||
feature: Feature, credentials: auth_credentials.Credentials
|
||||
):
|
||||
"""Given a feature, return the feature group resource."""
|
||||
result = re.fullmatch(
|
||||
r"projects/(?P<project>.+)/locations/(?P<location>.+)/featureGroups/(?P<feature_group>.+)/features/.+",
|
||||
feature.resource_name,
|
||||
)
|
||||
|
||||
if not result:
|
||||
raise ValueError("Couldn't find feature group in feature.")
|
||||
|
||||
project = feature.project
|
||||
location = feature.location
|
||||
feature_group = result.group("feature_group")
|
||||
|
||||
return FeatureGroup(
|
||||
feature_group, project=project, location=location, credentials=credentials
|
||||
)
|
||||
|
||||
|
||||
def _extract_feature_from_str_repr(
|
||||
str_feature: str, credentials: auth_credentials.Credentials
|
||||
) -> Tuple[FeatureGroup, Feature]:
|
||||
"""Given a feature in string representation, return the feature and feature group."""
|
||||
# TODO: compile expr + place it in a constant
|
||||
result = re.fullmatch(
|
||||
r"((?P<project>.*)\.)?(?P<feature_group>.*)\.(?P<feature>.*)",
|
||||
str_feature,
|
||||
)
|
||||
if not result:
|
||||
raise ValueError(
|
||||
f"Feature '{str_feature}' is a string but not in expected format 'feature_group.feature' or 'project.feature_group.feature'."
|
||||
)
|
||||
|
||||
feature_group = FeatureGroup(
|
||||
result.group("feature_group"),
|
||||
project=result.group("project"), # None if no match.
|
||||
credentials=credentials,
|
||||
)
|
||||
feature = feature_group.get_feature(result.group("feature"))
|
||||
|
||||
return (feature_group, feature)
|
||||
|
||||
|
||||
def _feature_to_data_source(
|
||||
feature_group: FeatureGroup, feature: Feature
|
||||
) -> impl.DataSource:
|
||||
qualifying_name = f"{feature_group.name}__{feature.name}"
|
||||
gbq_column = feature.version_column_name
|
||||
assert gbq_column
|
||||
|
||||
column_name = feature.name
|
||||
assert column_name
|
||||
|
||||
timestamp_column = "feature_timestamp"
|
||||
|
||||
# TODO: Expose entity_id_columns as a property in FeatureGroup
|
||||
entity_id_columns = feature_group._gca_resource.big_query.entity_id_columns
|
||||
assert entity_id_columns
|
||||
|
||||
bq_uri = feature_group._gca_resource.big_query.big_query_source.input_uri
|
||||
assert bq_uri
|
||||
|
||||
fully_qualified_table = bq_uri.lstrip("bq://")
|
||||
assert fully_qualified_table
|
||||
|
||||
query = (
|
||||
f"SELECT\n"
|
||||
f' {", ".join(entity_id_columns)},\n'
|
||||
f" {gbq_column} AS {column_name},\n"
|
||||
f" {timestamp_column}\n"
|
||||
f"FROM {fully_qualified_table}"
|
||||
)
|
||||
|
||||
return impl.DataSource(
|
||||
qualifying_name=qualifying_name,
|
||||
sql=query,
|
||||
data_columns=[column_name],
|
||||
# TODO: this will be parameterized in the future
|
||||
timestamp_column=timestamp_column,
|
||||
entity_id_columns=entity_id_columns,
|
||||
)
|
||||
|
||||
|
||||
class _DataFrameToBigQueryDataFramesConverter:
|
||||
@classmethod
|
||||
def to_bigquery_dataframe(
|
||||
cls, df: "pd.DataFrame", session: "Optional[bigframes.session.Session]" = None
|
||||
) -> "bigframes.pandas.DataFrame":
|
||||
bigframes = _try_import_bigframes()
|
||||
return bigframes.pandas.DataFrame(data=df, session=session)
|
||||
|
||||
|
||||
def fetch_historical_feature_values(
|
||||
entity_df: "bigframes.pandas.DataFrame",
|
||||
# TODO: Add support for FeatureView | FeatureGroup | bigframes.pandas.DataFrame
|
||||
features: List[Union[str, Feature]],
|
||||
# TODO: Add support for feature_age_threshold
|
||||
feature_age_threshold: Optional[datetime.timedelta] = None,
|
||||
dry_run: bool = False,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> "Union[bigframes.pandas.DataFrame, None]":
|
||||
"""Fetch historical data at the timestamp specified for each entity.
|
||||
|
||||
This runs a Point-In-Time Lookup (PITL) query in BigQuery across all
|
||||
features and returns the historical feature values. Feature data will be
|
||||
joined by matching their entity_id_column(s) with corresponding columns in
|
||||
the entity data frame.
|
||||
|
||||
Args:
|
||||
entity_df:
|
||||
An entity DataFrame where one/multiple columns have entity ID.
|
||||
One column should have a timestamp (used for feature lookup). Other
|
||||
columns may have feature data. Entity IDs may be repeated with
|
||||
different timestamp values (in the timestamp column) to lookup data for
|
||||
entities at different points in time.
|
||||
features:
|
||||
Feature data will be joined with the entity data frame.
|
||||
* If `str` is given use `project.feature_group.feature` as the format.
|
||||
`project_id.feature_group_id.feature_id` may be used if features are
|
||||
in another project.
|
||||
* If `FeatureView` is given, the *sources* of the FeatureView will be
|
||||
used - but data will be read from the backing BigQuery table.
|
||||
feature_age_threshold:
|
||||
How far back from the timestamp to look for features values. If no
|
||||
feature values are found, empty/null value will be populated.
|
||||
dry_run:
|
||||
Build the Point-In-Time Lookup (PITL) query but don't run it. The PITL
|
||||
query will be printed to stdout.
|
||||
project:
|
||||
The project to use for feature lookup and running the Point-In-Time
|
||||
Lookup (PITL) query in BigQuery. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
location:
|
||||
The location to use for feature lookup and running the Point-In-Time
|
||||
Lookup (PITL) query in BigQuery. If unset, the project set in
|
||||
aiplatform.init will be used.
|
||||
credentials:
|
||||
Custom credentials to use for feature lookup and running the
|
||||
Point-In-Time Lookup (PITL) query in BigQuery. Overrides credentials
|
||||
set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
A `bigframes.pandas.DataFrame` with the historical feature values. `None`
|
||||
if in `dry_run` mode.
|
||||
"""
|
||||
|
||||
bigframes = _try_import_bigframes()
|
||||
project = project or initializer.global_config.project
|
||||
location = location or initializer.global_config.location
|
||||
credentials = credentials or initializer.global_config.credentials
|
||||
application_name = (
|
||||
f"vertexai-offline-store/{__version__}+fetch-historical-feature-values"
|
||||
)
|
||||
session_options = bigframes.BigQueryOptions(
|
||||
credentials=credentials,
|
||||
project=project,
|
||||
location=location,
|
||||
application_name=application_name,
|
||||
)
|
||||
session = bigframes.connect(session_options)
|
||||
|
||||
if feature_age_threshold is not None:
|
||||
raise NotImplementedError("feature_age_threshold is not yet supported.")
|
||||
|
||||
if not features:
|
||||
raise ValueError("Please specify a non-empty list of features.")
|
||||
|
||||
# Convert to bigframe if needed.
|
||||
if not isinstance(entity_df, bigframes.pandas.DataFrame):
|
||||
entity_df = _DataFrameToBigQueryDataFramesConverter.to_bigquery_dataframe(
|
||||
df=entity_df,
|
||||
session=session,
|
||||
)
|
||||
|
||||
# Ensure one timestamp column is present in the entity DataFrame.
|
||||
ts_cols = entity_df.select_dtypes(include=["datetime"]).columns
|
||||
if len(ts_cols) > 1:
|
||||
# TODO: Support multiple timestamp columns by specifying feature_timestamp column in an override.
|
||||
raise ValueError(
|
||||
'Multiple timestamp columns ("datetime" dtype) found in entity DataFrame. '
|
||||
"Only one timestamp column is allowed. "
|
||||
f"Timestamp columns: {', '.join([col for col in ts_cols])}"
|
||||
)
|
||||
elif len(ts_cols) == 0:
|
||||
raise ValueError(
|
||||
'No timestamp column ("datetime" dtype) found in entity DataFrame.'
|
||||
)
|
||||
entity_df_ts_col = ts_cols[0]
|
||||
entity_df_non_ts_cols = [c for c in entity_df.columns if c != entity_df_ts_col]
|
||||
entity_data_source = impl.DataSource(
|
||||
qualifying_name="entity_df",
|
||||
sql=entity_df.sql,
|
||||
data_columns=entity_df_non_ts_cols,
|
||||
timestamp_column=entity_df_ts_col,
|
||||
)
|
||||
|
||||
feature_data: List[impl.DataSource] = []
|
||||
for feature in features:
|
||||
if isinstance(feature, Feature):
|
||||
feature_group = _get_feature_group_from_feature(feature, credentials)
|
||||
feature_data.append(_feature_to_data_source(feature_group, feature))
|
||||
elif isinstance(feature, str):
|
||||
feature_group, feature = _extract_feature_from_str_repr(
|
||||
feature, credentials
|
||||
)
|
||||
feature_data.append(_feature_to_data_source(feature_group, feature))
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Unsupported feature type {type(feature)} found in feature list. Feature: {feature}"
|
||||
)
|
||||
|
||||
# TODO: Verify `feature_data`.
|
||||
# * Ensure that qualifying_names are not interfering.
|
||||
# * Ensure that feature names are not interfering.
|
||||
# * Ensure that entity id columns of all features are present in the entity DF.
|
||||
|
||||
query = impl.render_pitl_query(
|
||||
entity_data=entity_data_source,
|
||||
feature_data=feature_data,
|
||||
)
|
||||
|
||||
if dry_run:
|
||||
print("--- Dry run mode: PITL QUERY BEGIN ---")
|
||||
print(query)
|
||||
print("--- Dry run mode: PITL QUERY END ---")
|
||||
return None
|
||||
|
||||
return session.read_gbq_query(
|
||||
query,
|
||||
index_col=bigframes.enums.DefaultIndexKind.NULL,
|
||||
)
|
||||
@@ -0,0 +1,233 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import abc
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import field
|
||||
import enum
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
feature_online_store_service as fos_service,
|
||||
)
|
||||
import proto
|
||||
from typing_extensions import override
|
||||
|
||||
|
||||
def get_feature_online_store_name(online_store_name: str) -> str:
|
||||
"""Extract Feature Online Store's name from FeatureView's full resource name.
|
||||
|
||||
Args:
|
||||
online_store_name: Full resource name is projects/project_number/
|
||||
locations/us-central1/featureOnlineStores/fos_name/featureViews/fv_name
|
||||
|
||||
Returns:
|
||||
str: feature online store name.
|
||||
"""
|
||||
arr = online_store_name.split("/")
|
||||
return arr[5]
|
||||
|
||||
|
||||
class PublicEndpointNotFoundError(RuntimeError):
|
||||
"""Public endpoint has not been created yet."""
|
||||
|
||||
|
||||
@dataclass
|
||||
class FeatureViewBigQuerySource:
|
||||
uri: str
|
||||
entity_id_columns: List[str]
|
||||
|
||||
|
||||
@dataclass
|
||||
class FeatureViewVertexRagSource:
|
||||
uri: str
|
||||
rag_corpus_id: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class FeatureViewRegistrySource:
|
||||
"""Configuration options for Feature View being registered with Feature Registry features.
|
||||
|
||||
Attributes:
|
||||
features : Use `<feature_group_id>.<feature_id>` as
|
||||
the format for each feature.
|
||||
project_number : Optional. The project number of the project that owns the
|
||||
Feature Registry if in a different project.
|
||||
"""
|
||||
|
||||
features: List[str]
|
||||
project_number: Optional[int] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ConnectionOptions:
|
||||
"""Represents connection options used for sending RPCs to the online store."""
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class InsecureGrpcChannel:
|
||||
"""Use an insecure gRPC channel to connect to the host."""
|
||||
|
||||
pass
|
||||
|
||||
host: str # IP address or DNS.
|
||||
transport: Union[
|
||||
InsecureGrpcChannel
|
||||
] # Currently only insecure gRPC channel is supported.
|
||||
|
||||
def __eq__(self, other):
|
||||
if self.host != other.host:
|
||||
return False
|
||||
|
||||
if isinstance(self.transport, ConnectionOptions.InsecureGrpcChannel):
|
||||
# Insecure grpc channel has no other parameters to check.
|
||||
if isinstance(other.transport, ConnectionOptions.InsecureGrpcChannel):
|
||||
return True
|
||||
|
||||
# Otherwise, can't compare against a different transport type.
|
||||
raise ValueError(
|
||||
f"Transport '{self.transport}' cannot be compared to transport '{other.transport}'."
|
||||
)
|
||||
|
||||
# Currently only InsecureGrpcChannel is supported.
|
||||
raise ValueError(f"Unsupported transport supplied: {self.transport}")
|
||||
|
||||
|
||||
@dataclass
|
||||
class FeatureViewReadResponse:
|
||||
_response: fos_service.FetchFeatureValuesResponse
|
||||
|
||||
def __init__(self, response: fos_service.FetchFeatureValuesResponse):
|
||||
self._response = response
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return proto.Message.to_dict(self._response.key_values)
|
||||
|
||||
def to_proto(self) -> fos_service.FetchFeatureValuesResponse:
|
||||
return self._response
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchNearestEntitiesResponse:
|
||||
_response: fos_service.SearchNearestEntitiesResponse
|
||||
|
||||
def __init__(self, response: fos_service.SearchNearestEntitiesResponse):
|
||||
self._response = response
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return proto.Message.to_dict(self._response.nearest_neighbors)
|
||||
|
||||
def to_proto(self) -> fos_service.SearchNearestEntitiesResponse:
|
||||
return self._response
|
||||
|
||||
|
||||
class DistanceMeasureType(enum.Enum):
|
||||
"""The distance measure used in nearest neighbor search."""
|
||||
|
||||
DISTANCE_MEASURE_TYPE_UNSPECIFIED = 0
|
||||
# Euclidean (L_2) Distance.
|
||||
SQUARED_L2_DISTANCE = 1
|
||||
# Cosine Distance. Defined as 1 - cosine similarity.
|
||||
COSINE_DISTANCE = 2
|
||||
# Dot Product Distance. Defined as a negative of the dot product.
|
||||
DOT_PRODUCT_DISTANCE = 3
|
||||
|
||||
|
||||
class AlgorithmConfig(abc.ABC):
|
||||
"""Base class for configuration options for matching algorithm."""
|
||||
|
||||
def as_dict(self) -> Dict:
|
||||
"""Returns the configuration as a dictionary.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class TreeAhConfig(AlgorithmConfig):
|
||||
"""Configuration options for using the tree-AH algorithm (Shallow tree + Asymmetric Hashing).
|
||||
|
||||
Please refer to this paper for more details: https://arxiv.org/abs/1908.10396
|
||||
|
||||
Args:
|
||||
leaf_node_embedding_count (int): Optional. Number of embeddings on each
|
||||
leaf node. The default value is 1000 if not set.
|
||||
"""
|
||||
|
||||
leaf_node_embedding_count: Optional[int] = None
|
||||
|
||||
@override
|
||||
def as_dict(self) -> Dict:
|
||||
return {"leaf_node_embedding_count": self.leaf_node_embedding_count}
|
||||
|
||||
|
||||
@dataclass
|
||||
class BruteForceConfig(AlgorithmConfig):
|
||||
"""Configuration options for using brute force search.
|
||||
|
||||
It simply implements the standard linear search in the database for each
|
||||
query.
|
||||
"""
|
||||
|
||||
@override
|
||||
def as_dict(self) -> Dict[str, Any]:
|
||||
return {"bruteForceConfig": {}}
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexConfig:
|
||||
"""Configuration options for the Vertex FeatureView for embedding."""
|
||||
|
||||
embedding_column: str
|
||||
dimensions: int
|
||||
algorithm_config: AlgorithmConfig = field(default_factory=TreeAhConfig())
|
||||
filter_columns: Optional[List[str]] = None
|
||||
crowding_column: Optional[str] = None
|
||||
distance_measure_type: Optional[DistanceMeasureType] = None
|
||||
|
||||
def as_dict(self) -> Dict[str, Any]:
|
||||
"""Returns the configuration as a dictionary.
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]
|
||||
"""
|
||||
config = {
|
||||
"embedding_column": self.embedding_column,
|
||||
"embedding_dimension": self.dimensions,
|
||||
}
|
||||
if self.distance_measure_type is not None:
|
||||
config["distance_measure_type"] = self.distance_measure_type.value
|
||||
if self.filter_columns is not None:
|
||||
config["filter_columns"] = self.filter_columns
|
||||
if self.crowding_column is not None:
|
||||
config["crowding_column"] = self.crowding_column
|
||||
|
||||
if isinstance(self.algorithm_config, TreeAhConfig):
|
||||
config["tree_ah_config"] = self.algorithm_config.as_dict()
|
||||
else:
|
||||
config["brute_force_config"] = self.algorithm_config.as_dict()
|
||||
return config
|
||||
|
||||
|
||||
@dataclass
|
||||
class FeatureGroupBigQuerySource:
|
||||
"""BigQuery source for the Feature Group."""
|
||||
|
||||
# The URI for the BigQuery table/view.
|
||||
uri: str
|
||||
# The entity ID columns. If not specified, defaults to ['entity_id'].
|
||||
entity_id_columns: Optional[List[str]] = None
|
||||
@@ -0,0 +1,26 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from vertexai.resources.preview.ml_monitoring.model_monitors import (
|
||||
ModelMonitor,
|
||||
ModelMonitoringJob,
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
"ModelMonitor",
|
||||
"ModelMonitoringJob",
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,46 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from vertexai.resources.preview.ml_monitoring.spec.notification import (
|
||||
NotificationSpec,
|
||||
)
|
||||
from vertexai.resources.preview.ml_monitoring.spec.objective import (
|
||||
FeatureAttributionSpec,
|
||||
DataDriftSpec,
|
||||
MonitoringInput,
|
||||
ObjectiveSpec,
|
||||
TabularObjective,
|
||||
)
|
||||
from vertexai.resources.preview.ml_monitoring.spec.output import (
|
||||
OutputSpec,
|
||||
)
|
||||
from vertexai.resources.preview.ml_monitoring.spec.schema import (
|
||||
FieldSchema,
|
||||
ModelMonitoringSchema,
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
"NotificationSpec",
|
||||
"OutputSpec",
|
||||
"ObjectiveSpec",
|
||||
"FeatureAttributionSpec",
|
||||
"DataDriftSpec",
|
||||
"MonitoringInput",
|
||||
"TabularObjective",
|
||||
"FieldSchema",
|
||||
"ModelMonitoringSchema",
|
||||
)
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Optional, List
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
model_monitoring_spec_v1beta1 as model_monitoring_spec,
|
||||
)
|
||||
|
||||
|
||||
class NotificationSpec:
|
||||
"""Initializer for NotificationSpec.
|
||||
|
||||
Args:
|
||||
user_emails (List[str]):
|
||||
Optional. The email addresses to send the alert to.
|
||||
notification_channels (List[str]):
|
||||
Optional. The notification channels to send the alert to.
|
||||
Format: ``projects/{project}/notificationChannels/{channel}``
|
||||
enable_cloud_logging (bool):
|
||||
Optional. If dump the anomalies to Cloud Logging. The anomalies will
|
||||
be put to json payload. This can be further sinked to Pub/Sub or any
|
||||
other services supported by Cloud Logging.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
user_emails: Optional[List[str]] = None,
|
||||
notification_channels: Optional[List[str]] = None,
|
||||
enable_cloud_logging: Optional[bool] = False,
|
||||
):
|
||||
self.user_emails = user_emails
|
||||
self.notification_channels = notification_channels
|
||||
self.enable_cloud_logging = enable_cloud_logging
|
||||
|
||||
def _as_proto(self) -> model_monitoring_spec.ModelMonitoringNotificationSpec:
|
||||
"""Converts ModelMonitoringNotificationSpec to a proto message.
|
||||
|
||||
Returns:
|
||||
The GAPIC representation of the notification alert config.
|
||||
"""
|
||||
user_email_config = None
|
||||
if self.user_emails is not None:
|
||||
user_email_config = (
|
||||
model_monitoring_spec.ModelMonitoringNotificationSpec.EmailConfig(
|
||||
user_emails=self.user_emails
|
||||
)
|
||||
)
|
||||
user_notification_channel_config = []
|
||||
if self.notification_channels:
|
||||
for notification_channel in self.notification_channels:
|
||||
user_notification_channel_config.append(
|
||||
model_monitoring_spec.ModelMonitoringNotificationSpec.NotificationChannelConfig(
|
||||
notification_channel=notification_channel
|
||||
)
|
||||
)
|
||||
return model_monitoring_spec.ModelMonitoringNotificationSpec(
|
||||
email_config=user_email_config,
|
||||
notification_channel_configs=user_notification_channel_config,
|
||||
enable_cloud_logging=self.enable_cloud_logging,
|
||||
)
|
||||
@@ -0,0 +1,522 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
explanation_v1beta1 as explanation,
|
||||
machine_resources_v1beta1 as machine_resources,
|
||||
model_monitoring_alert_v1beta1 as model_monitoring_alert,
|
||||
model_monitoring_spec_v1beta1 as model_monitoring_spec,
|
||||
)
|
||||
|
||||
from google.protobuf import timestamp_pb2
|
||||
from google.type import interval_pb2
|
||||
|
||||
|
||||
TF_RECORD = "tf-record"
|
||||
CSV = "csv"
|
||||
JSONL = "jsonl"
|
||||
JENSEN_SHANNON_DIVERGENCE = "jensen_shannon_divergence"
|
||||
L_INFINITY = "l_infinity"
|
||||
SUPPORTED_NUMERIC_METRICS = [JENSEN_SHANNON_DIVERGENCE]
|
||||
SUPPORTED_CATEGORICAL_METRICS = [JENSEN_SHANNON_DIVERGENCE, L_INFINITY]
|
||||
|
||||
|
||||
class DataDriftSpec:
|
||||
"""Data drift monitoring spec.
|
||||
|
||||
Data drift measures the distribution distance between the current dataset
|
||||
and a baseline dataset. A typical use case is to detect data drift between
|
||||
the recent production serving dataset and the training dataset, or to
|
||||
compare the recent production dataset with a dataset from a previous period.
|
||||
|
||||
Example:
|
||||
feature_drift_spec=DataDriftSpec(
|
||||
features=["feature1"]
|
||||
categorical_metric_type="l_infinity",
|
||||
numeric_metric_type="jensen_shannon_divergence",
|
||||
default_categorical_alert_threshold=0.01,
|
||||
default_numeric_alert_threshold=0.02,
|
||||
feature_alert_thresholds={"feature1":0.02, "feature2":0.01},
|
||||
)
|
||||
|
||||
Attributes:
|
||||
features (List[str]):
|
||||
Optional. Feature names / Prediction output names interested in
|
||||
monitoring. These should be a subset of the input feature names or
|
||||
prediction output names specified in the monitoring schema.
|
||||
If not specified, all features / prediction outputs outlied in the
|
||||
monitoring schema will be used.
|
||||
categorical_metric_type (str):
|
||||
Optional. Supported metrics type: l_infinity, jensen_shannon_divergence
|
||||
numeric_metric_type (str):
|
||||
Optional. Supported metrics type: jensen_shannon_divergence
|
||||
default_categorical_alert_threshold (float):
|
||||
Optional. Default alert threshold for all the categorical features.
|
||||
default_numeric_alert_threshold (float):
|
||||
Optional. Default alert threshold for all the numeric features.
|
||||
feature_alert_thresholds (Dict[str, float]):
|
||||
Optional. Per feature alert threshold will override default alert
|
||||
threshold.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
features: Optional[List[str]] = None,
|
||||
categorical_metric_type: Optional[str] = L_INFINITY,
|
||||
numeric_metric_type: Optional[str] = JENSEN_SHANNON_DIVERGENCE,
|
||||
default_categorical_alert_threshold: Optional[float] = None,
|
||||
default_numeric_alert_threshold: Optional[float] = None,
|
||||
feature_alert_thresholds: Optional[Dict[str, float]] = None,
|
||||
):
|
||||
self.features = features
|
||||
self.categorical_metric_type = categorical_metric_type
|
||||
self.numeric_metric_type = numeric_metric_type
|
||||
self.default_categorical_alert_threshold = default_categorical_alert_threshold
|
||||
self.default_numeric_alert_threshold = default_numeric_alert_threshold
|
||||
self.feature_alert_thresholds = feature_alert_thresholds
|
||||
|
||||
def _as_proto(
|
||||
self,
|
||||
) -> model_monitoring_spec.ModelMonitoringObjectiveSpec.DataDriftSpec:
|
||||
"""Converts DataDriftSpec to a proto message.
|
||||
|
||||
Returns:
|
||||
The GAPIC representation of the data drift spec.
|
||||
"""
|
||||
user_default_categorical_alert_threshold = None
|
||||
user_default_numeric_alert_threshold = None
|
||||
user_alert_thresholds = None
|
||||
user_features = None
|
||||
if self.numeric_metric_type not in SUPPORTED_NUMERIC_METRICS:
|
||||
raise ValueError(
|
||||
f"The numeric metric type is not supported"
|
||||
f" {self.numeric_metric_type}"
|
||||
)
|
||||
user_numeric_metric_type = self.numeric_metric_type
|
||||
if self.categorical_metric_type not in SUPPORTED_CATEGORICAL_METRICS:
|
||||
raise ValueError(
|
||||
f"The categorical metric type is not supported"
|
||||
f" {self.categorical_metric_type}"
|
||||
)
|
||||
user_categorical_metric_type = self.categorical_metric_type
|
||||
if self.default_categorical_alert_threshold:
|
||||
user_default_categorical_alert_threshold = (
|
||||
model_monitoring_alert.ModelMonitoringAlertCondition(
|
||||
threshold=self.default_categorical_alert_threshold
|
||||
)
|
||||
)
|
||||
if self.default_numeric_alert_threshold:
|
||||
user_default_numeric_alert_threshold = (
|
||||
model_monitoring_alert.ModelMonitoringAlertCondition(
|
||||
threshold=self.default_numeric_alert_threshold
|
||||
)
|
||||
)
|
||||
if self.feature_alert_thresholds:
|
||||
user_alert_thresholds = {}
|
||||
for feature in self.feature_alert_thresholds:
|
||||
user_alert_thresholds.update(
|
||||
{
|
||||
feature: model_monitoring_alert.ModelMonitoringAlertCondition(
|
||||
threshold=self.feature_alert_thresholds[feature]
|
||||
)
|
||||
}
|
||||
)
|
||||
if self.features:
|
||||
user_features = self.features
|
||||
return model_monitoring_spec.ModelMonitoringObjectiveSpec.DataDriftSpec(
|
||||
default_categorical_alert_condition=user_default_categorical_alert_threshold,
|
||||
default_numeric_alert_condition=user_default_numeric_alert_threshold,
|
||||
categorical_metric_type=user_categorical_metric_type,
|
||||
numeric_metric_type=user_numeric_metric_type,
|
||||
feature_alert_conditions=user_alert_thresholds,
|
||||
features=user_features,
|
||||
)
|
||||
|
||||
|
||||
class FeatureAttributionSpec:
|
||||
"""Feature attribution spec.
|
||||
|
||||
Example:
|
||||
feature_attribution_spec=FeatureAttributionSpec(
|
||||
features=["feature1"]
|
||||
default_alert_threshold=0.01,
|
||||
feature_alert_thresholds={"feature1":0.02, "feature2":0.01},
|
||||
batch_dedicated_resources=BatchDedicatedResources(
|
||||
starting_replica_count=1,
|
||||
max_replica_count=2,
|
||||
machine_spec=my_machine_spec,
|
||||
),
|
||||
)
|
||||
|
||||
Attributes:
|
||||
features (List[str]):
|
||||
Optional. Input feature names interested in monitoring. These should
|
||||
be a subset of the input feature names specified in the monitoring
|
||||
schema.
|
||||
If not specified, all features outlied in the monitoring schema will
|
||||
be used.
|
||||
default_alert_threshold (float):
|
||||
Optional. Default alert threshold for all the features.
|
||||
feature_alert_thresholds (Dict[str, float]):
|
||||
Optional. Per feature alert threshold will override default alert
|
||||
threshold.
|
||||
batch_dedicated_resources (machine_resources.BatchDedicatedResources):
|
||||
Optional. The config of resources used by the Model Monitoring during
|
||||
the batch explanation for non-AutoML models. If not set, `n1-standard-2`
|
||||
machine type will be used by default.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
features: Optional[List[str]] = None,
|
||||
default_alert_threshold: Optional[float] = None,
|
||||
feature_alert_thresholds: Optional[Dict[str, float]] = None,
|
||||
batch_dedicated_resources: Optional[
|
||||
machine_resources.BatchDedicatedResources
|
||||
] = None,
|
||||
):
|
||||
self.features = features
|
||||
self.default_alert_threshold = default_alert_threshold
|
||||
self.feature_alert_thresholds = feature_alert_thresholds
|
||||
self.batch_dedicated_resources = batch_dedicated_resources
|
||||
|
||||
def _as_proto(
|
||||
self,
|
||||
) -> model_monitoring_spec.ModelMonitoringObjectiveSpec.FeatureAttributionSpec:
|
||||
"""Converts FeatureAttributionSpec to a proto message.
|
||||
|
||||
Returns:
|
||||
The GAPIC representation of the feature attribution spec.
|
||||
"""
|
||||
user_default_alert_threshold = None
|
||||
user_alert_thresholds = None
|
||||
user_features = None
|
||||
if self.default_alert_threshold:
|
||||
user_default_alert_threshold = (
|
||||
model_monitoring_alert.ModelMonitoringAlertCondition(
|
||||
threshold=self.default_alert_threshold
|
||||
)
|
||||
)
|
||||
if self.feature_alert_thresholds:
|
||||
user_alert_thresholds = {}
|
||||
for feature in self.feature_alert_thresholds:
|
||||
user_alert_thresholds.update(
|
||||
{
|
||||
feature: model_monitoring_alert.ModelMonitoringAlertCondition(
|
||||
threshold=self.feature_alert_thresholds[feature]
|
||||
)
|
||||
}
|
||||
)
|
||||
if self.features:
|
||||
user_features = self.features
|
||||
return (
|
||||
model_monitoring_spec.ModelMonitoringObjectiveSpec.FeatureAttributionSpec(
|
||||
default_alert_condition=user_default_alert_threshold,
|
||||
feature_alert_conditions=user_alert_thresholds,
|
||||
features=user_features,
|
||||
batch_explanation_dedicated_resources=self.batch_dedicated_resources,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class MonitoringInput:
|
||||
"""Model monitoring data input spec.
|
||||
|
||||
Attributes:
|
||||
vertex_dataset (str):
|
||||
Optional. Resource name of the Vertex AI managed dataset.
|
||||
Format: ``projects/{project}/locations/{location}/datasets/{dataset}``
|
||||
At least one source of dataset should be provided, and if one of the
|
||||
fields is set, no need to set other sources
|
||||
(vertex_dataset, gcs_uri, table_uri, query, batch_prediction_job,
|
||||
endpoints).
|
||||
gcs_uri (str):
|
||||
Optional. Google Cloud Storage URI to the input file(s). May contain
|
||||
wildcards.
|
||||
data_format (str):
|
||||
Optional. Data format of Google Cloud Storage file(s). Should be
|
||||
provided if a gcs_uri is set.
|
||||
Supported formats:
|
||||
"csv", "jsonl", "tf-record"
|
||||
table_uri (str):
|
||||
Optonal. BigQuery URI to a table, up to 2000 characters long.
|
||||
All the columns in the table will be selected. Accepted forms:
|
||||
|
||||
- BigQuery path. For example:
|
||||
``bq://projectId.bqDatasetId.bqTableId``.
|
||||
query (str):
|
||||
Optional. Standard SQL for BigQuery to be used instead of the
|
||||
``table_uri``.
|
||||
timestamp_field (str):
|
||||
Optional. The timestamp field in the dataset.
|
||||
the ``timestamp_field`` must be specified if you'd like to use
|
||||
``start_time``, ``end_time``, ``offset`` or ``window``.
|
||||
If you use ``query`` to specify the dataset, make sure the
|
||||
``timestamp_field`` is in the selection fields.
|
||||
batch_prediction_job (str):
|
||||
Optional. Vertex AI Batch Prediction Job resource name.
|
||||
Format: ``projects/{project}/locations/{location}/batchPredictionJobs/{batch_prediction_job}``
|
||||
endpoints (List[str]):
|
||||
Optional. List of Vertex AI Endpoint resource names.
|
||||
Format: ``projects/{project}/locations/{location}/endpoints/{endpoint}``
|
||||
start_time (timestamp_pb2.Timestamp):
|
||||
Optional. Inclusive start of the time interval for which results
|
||||
should be returned. Should be set together with ``end_time``.
|
||||
end_time (timestamp_pb2.Timestamp):
|
||||
Optional. Exclusive end of the time interval for which results
|
||||
should be returned. Should be set together with ``start_time`.`
|
||||
offset (str):
|
||||
Optional. Offset is the time difference from the cut-off time.
|
||||
For scheduled jobs, the cut-off time is the scheduled time.
|
||||
For non-scheduled jobs, it's the time when the job was created.
|
||||
Currently we support the following format:
|
||||
'w|W': Week, 'd|D': Day, 'h|H': Hour
|
||||
E.g. '1h' stands for 1 hour, '2d' stands for 2 days.
|
||||
window (str):
|
||||
Optional. Window refers to the scope of data selected for analysis.
|
||||
It allows you to specify the quantity of data you wish to examine.
|
||||
It refers to the data time window prior to the cut-off time or the
|
||||
cut-off time minus the offset.
|
||||
Currently we support the following format:
|
||||
'w|W': Week, 'd|D': Day, 'h|H': Hour
|
||||
E.g. '1h' stands for 1 hour, '2d' stands for 2 days.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
vertex_dataset: Optional[str] = None,
|
||||
gcs_uri: Optional[str] = None,
|
||||
data_format: Optional[str] = None,
|
||||
table_uri: Optional[str] = None,
|
||||
query: Optional[str] = None,
|
||||
timestamp_field: Optional[str] = None,
|
||||
batch_prediction_job: Optional[str] = None,
|
||||
endpoints: Optional[List[str]] = None,
|
||||
start_time: Optional[timestamp_pb2.Timestamp] = None,
|
||||
end_time: Optional[timestamp_pb2.Timestamp] = None,
|
||||
offset: Optional[str] = None,
|
||||
window: Optional[str] = None,
|
||||
):
|
||||
self.vertex_dataset = vertex_dataset
|
||||
self.gcs_uri = gcs_uri
|
||||
self.data_format = data_format
|
||||
self.table_uri = table_uri
|
||||
self.query = query
|
||||
self.timestamp_field = timestamp_field
|
||||
self.batch_prediction_job = batch_prediction_job
|
||||
self.endpoints = endpoints
|
||||
self.start_time = start_time
|
||||
self.end_time = end_time
|
||||
self.offset = offset
|
||||
self.window = window
|
||||
|
||||
def _as_proto(self) -> model_monitoring_spec.ModelMonitoringInput:
|
||||
"""Converts ModelMonitoringInput to a proto message.
|
||||
|
||||
Returns:
|
||||
The GAPIC representation of the model monitoring input.
|
||||
"""
|
||||
user_time_interval = None
|
||||
user_time_spec = None
|
||||
if self.offset or self.window:
|
||||
user_time_spec = model_monitoring_spec.ModelMonitoringInput.TimeOffset(
|
||||
offset=self.offset if self.offset else None,
|
||||
window=self.window if self.window else None,
|
||||
)
|
||||
elif self.start_time or self.end_time:
|
||||
user_time_interval = interval_pb2.Interval(
|
||||
start_time=self.start_time if self.start_time else None,
|
||||
end_time=self.end_time if self.end_time else None,
|
||||
)
|
||||
if self.vertex_dataset or self.gcs_uri or self.table_uri or self.query:
|
||||
user_vertex_dataset = None
|
||||
user_gcs_source = None
|
||||
user_bigquery_source = None
|
||||
if self.vertex_dataset:
|
||||
user_vertex_dataset = self.vertex_dataset
|
||||
elif self.gcs_uri:
|
||||
if not self.data_format:
|
||||
raise ValueError("`data_format` must be provided with gcs uri.")
|
||||
if self.data_format == CSV:
|
||||
user_data_format = (
|
||||
model_monitoring_spec.ModelMonitoringInput.ModelMonitoringDataset.ModelMonitoringGcsSource.DataFormat.CSV
|
||||
)
|
||||
elif self.data_format == JSONL:
|
||||
user_data_format = (
|
||||
model_monitoring_spec.ModelMonitoringInput.ModelMonitoringDataset.ModelMonitoringGcsSource.DataFormat.JSONL
|
||||
)
|
||||
elif self.data_format == TF_RECORD:
|
||||
user_data_format = (
|
||||
model_monitoring_spec.ModelMonitoringInput.ModelMonitoringDataset.ModelMonitoringGcsSource.DataFormat.TF_RECORD
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
(
|
||||
"Unsupported value in data format. `data_format` "
|
||||
"must be one of %s, %s, or %s"
|
||||
)
|
||||
% (TF_RECORD, CSV, JSONL)
|
||||
)
|
||||
user_gcs_source = model_monitoring_spec.ModelMonitoringInput.ModelMonitoringDataset.ModelMonitoringGcsSource(
|
||||
gcs_uri=self.gcs_uri,
|
||||
format_=user_data_format,
|
||||
)
|
||||
elif self.table_uri or self.query:
|
||||
user_bigquery_source = model_monitoring_spec.ModelMonitoringInput.ModelMonitoringDataset.ModelMonitoringBigQuerySource(
|
||||
table_uri=self.table_uri,
|
||||
query=self.query,
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
("At least one source of dataset must" " be provided.")
|
||||
)
|
||||
user_model_monitoring_dataset = (
|
||||
model_monitoring_spec.ModelMonitoringInput.ModelMonitoringDataset(
|
||||
vertex_dataset=user_vertex_dataset,
|
||||
gcs_source=user_gcs_source,
|
||||
bigquery_source=user_bigquery_source,
|
||||
timestamp_field=self.timestamp_field,
|
||||
)
|
||||
)
|
||||
return model_monitoring_spec.ModelMonitoringInput(
|
||||
columnized_dataset=user_model_monitoring_dataset,
|
||||
time_offset=user_time_spec,
|
||||
time_interval=user_time_interval,
|
||||
)
|
||||
elif self.batch_prediction_job:
|
||||
user_batch_prediction_output = (
|
||||
model_monitoring_spec.ModelMonitoringInput.BatchPredictionOutput(
|
||||
batch_prediction_job=self.batch_prediction_job,
|
||||
)
|
||||
)
|
||||
return model_monitoring_spec.ModelMonitoringInput(
|
||||
batch_prediction_output=user_batch_prediction_output,
|
||||
time_offset=user_time_spec,
|
||||
time_interval=user_time_interval,
|
||||
)
|
||||
elif self.endpoints:
|
||||
user_vertex_endpoint_logs = (
|
||||
model_monitoring_spec.ModelMonitoringInput.VertexEndpointLogs(
|
||||
endpoints=self.endpoints,
|
||||
)
|
||||
)
|
||||
return model_monitoring_spec.ModelMonitoringInput(
|
||||
vertex_endpoint_logs=user_vertex_endpoint_logs,
|
||||
time_offset=user_time_spec,
|
||||
time_interval=user_time_interval,
|
||||
)
|
||||
else:
|
||||
raise ValueError("At least one source of dataInput must be provided.")
|
||||
|
||||
|
||||
class TabularObjective:
|
||||
"""Initializer for TabularObjective.
|
||||
|
||||
Attributes:
|
||||
feature_drift_spec (DataDriftSpec):
|
||||
Optional. Input feature distribution drift monitoring spec.
|
||||
prediction_output_drift_spec (DataDriftSpec):
|
||||
Optional. Prediction output distribution drift monitoring spec.
|
||||
feature_attribution_spec (FeatureAttributionSpec):
|
||||
Optional. Feature attribution monitoring spec.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
feature_drift_spec: Optional[DataDriftSpec] = None,
|
||||
prediction_output_drift_spec: Optional[DataDriftSpec] = None,
|
||||
feature_attribution_spec: Optional[FeatureAttributionSpec] = None,
|
||||
):
|
||||
self.feature_drift_spec = feature_drift_spec
|
||||
self.prediction_output_drift_spec = prediction_output_drift_spec
|
||||
self.feature_attribution_spec = feature_attribution_spec
|
||||
|
||||
def _as_proto(
|
||||
self,
|
||||
) -> model_monitoring_spec.ModelMonitoringObjectiveSpec.TabularObjective:
|
||||
"""Converts TabularObjective to a proto message.
|
||||
|
||||
Returns:
|
||||
The GAPIC representation of the model monitoring tabular objective.
|
||||
"""
|
||||
user_feature_drift_spec = None
|
||||
user_prediction_output_drift_spec = None
|
||||
user_feature_attribution_spec = None
|
||||
if self.feature_drift_spec:
|
||||
user_feature_drift_spec = self.feature_drift_spec._as_proto()
|
||||
if self.prediction_output_drift_spec:
|
||||
user_prediction_output_drift_spec = (
|
||||
self.prediction_output_drift_spec._as_proto()
|
||||
)
|
||||
if self.feature_attribution_spec:
|
||||
user_feature_attribution_spec = self.feature_attribution_spec._as_proto()
|
||||
return model_monitoring_spec.ModelMonitoringObjectiveSpec.TabularObjective(
|
||||
feature_drift_spec=user_feature_drift_spec,
|
||||
prediction_output_drift_spec=user_prediction_output_drift_spec,
|
||||
feature_attribution_spec=user_feature_attribution_spec,
|
||||
)
|
||||
|
||||
|
||||
class ObjectiveSpec:
|
||||
"""Initializer for ObjectiveSpec.
|
||||
|
||||
Args:
|
||||
baseline_dataset (MonitoringInput):
|
||||
Required. Baseline datasets that are used by all the monitoring
|
||||
objectives. It could be the training dataset or production serving
|
||||
dataset from a previous period.
|
||||
target_dataset (MonitoringInput):
|
||||
Required. Target dataset for monitoring analysis, it's used by all
|
||||
the monitoring objectives.
|
||||
tabular_objective (TabularObjective):
|
||||
Optional. The tabular monitoring objective.
|
||||
explanation_spec (explanation.ExplanationSpec):
|
||||
Optional. The explanation spec. This spec is required when the
|
||||
objectives spec includes feature attribution objectives.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
baseline_dataset: MonitoringInput,
|
||||
target_dataset: MonitoringInput,
|
||||
tabular_objective: Optional[TabularObjective] = None,
|
||||
explanation_spec: Optional[explanation.ExplanationSpec] = None,
|
||||
):
|
||||
self.baseline = baseline_dataset
|
||||
self.target = target_dataset
|
||||
self.tabular_objective = tabular_objective
|
||||
self.explanation_spec = explanation_spec
|
||||
|
||||
def _as_proto(self) -> model_monitoring_spec.ModelMonitoringObjectiveSpec:
|
||||
"""Converts ModelMonitoringObjectiveSpec to a proto message.
|
||||
|
||||
Returns:
|
||||
The GAPIC representation of the model monitoring objective config.
|
||||
"""
|
||||
user_tabular_objective = None
|
||||
if not self.baseline or not self.target:
|
||||
raise ValueError("At least one objective must be provided.")
|
||||
if self.tabular_objective:
|
||||
user_tabular_objective = self.tabular_objective._as_proto()
|
||||
return model_monitoring_spec.ModelMonitoringObjectiveSpec(
|
||||
tabular_objective=user_tabular_objective,
|
||||
explanation_spec=self.explanation_spec if self.explanation_spec else None,
|
||||
target_dataset=self.target._as_proto(),
|
||||
baseline_dataset=self.baseline._as_proto(),
|
||||
)
|
||||
@@ -0,0 +1,48 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
io_v1beta1 as io,
|
||||
model_monitoring_spec_v1beta1 as model_monitoring_spec,
|
||||
)
|
||||
|
||||
|
||||
class OutputSpec:
|
||||
"""Initializer for OutputSpec.
|
||||
|
||||
Args:
|
||||
data_source (str):
|
||||
Optional. Google Cloud Storage base folder path for metrics, error
|
||||
logs, etc.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
gcs_base_dir: str,
|
||||
):
|
||||
self.gcs_base_dir = gcs_base_dir
|
||||
|
||||
def _as_proto(self) -> model_monitoring_spec.ModelMonitoringOutputSpec:
|
||||
"""Converts ModelMonitoringOutputSpec to a proto message.
|
||||
|
||||
Returns:
|
||||
The GAPIC representation of the notification alert config.
|
||||
"""
|
||||
user_gcs_base_dir = io.GcsDestination(output_uri_prefix=self.gcs_base_dir)
|
||||
return model_monitoring_spec.ModelMonitoringOutputSpec(
|
||||
gcs_base_directory=user_gcs_base_dir,
|
||||
)
|
||||
@@ -0,0 +1,441 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Dict, List, MutableSequence, Optional
|
||||
from google.cloud import bigquery
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
model_monitor_v1beta1 as model_monitor,
|
||||
)
|
||||
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError:
|
||||
pd = None
|
||||
try:
|
||||
import tensorflow as tf
|
||||
except ImportError:
|
||||
tf = None
|
||||
|
||||
|
||||
class FieldSchema:
|
||||
"""Field Schema.
|
||||
|
||||
The class identifies the data type of a single feature,
|
||||
which combines together to form the Schema for different fields in
|
||||
ModelMonitoringSchema.
|
||||
|
||||
Attributes:
|
||||
name (str):
|
||||
Required. Field name.
|
||||
data_type (str):
|
||||
Required. Supported data types are: ``float``, ``integer``
|
||||
``boolean``, ``string``, ``categorical``.
|
||||
repeated (bool):
|
||||
Optional. Describes if the schema field is an array of given data
|
||||
type.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
data_type: str,
|
||||
repeated: Optional[bool] = False,
|
||||
):
|
||||
self.name = name
|
||||
self.data_type = data_type
|
||||
self.repeated = repeated
|
||||
|
||||
def _as_proto(self) -> model_monitor.ModelMonitoringSchema.FieldSchema:
|
||||
"""Converts ModelMonitoringSchema.FieldSchema to a proto message.
|
||||
|
||||
Returns:
|
||||
The GAPIC representation of the model monitoring field schema.
|
||||
"""
|
||||
return model_monitor.ModelMonitoringSchema.FieldSchema(
|
||||
name=self.name,
|
||||
data_type=self.data_type,
|
||||
repeated=self.repeated,
|
||||
)
|
||||
|
||||
|
||||
class ModelMonitoringSchema:
|
||||
"""Initializer for ModelMonitoringSchema.
|
||||
|
||||
Args:
|
||||
feature_fields (MutableSequence[FieldSchema]):
|
||||
Required. Feature names of the model. Vertex AI will try to match
|
||||
the features from your dataset as follows:
|
||||
* For 'csv' files, the header names are required, and we will
|
||||
extract thecorresponding feature values when the header names
|
||||
align with the feature names.
|
||||
* For 'jsonl' files, we will extract the corresponding feature
|
||||
values if the key names match the feature names. Note: Nested
|
||||
features are not supported, so please ensure your features are
|
||||
flattened. Ensure the feature values are scalar or an array of
|
||||
scalars.
|
||||
* For 'bigquery' dataset, we will extract the corresponding feature
|
||||
values if the column names match the feature names.
|
||||
Note: The column type can be a scalar or an array of scalars.
|
||||
STRUCT or JSON types are not supported. You may use SQL queries to
|
||||
select or aggregate the relevant features from your original
|
||||
table. However, ensure that the 'schema' of the query results
|
||||
meets our requirements.
|
||||
* For the Vertex AI Endpoint Request Response Logging table or
|
||||
Vertex AI Batch Prediction Job results. If the prediction
|
||||
instance format is an array, ensure that the sequence in
|
||||
``feature_fields`` matches the order of features in the prediction
|
||||
instance. We will match the feature with the array in the order
|
||||
specified in ``feature_fields``.
|
||||
prediction_fields (MutableSequence[FieldSchema]):
|
||||
Optional. Prediction output names of the model. The requirements are
|
||||
the same as the ``feature_fields``.
|
||||
For AutoML Tables, the prediction output name presented in schema
|
||||
will be: `predicted_{target_column}`, the `target_column` is the one
|
||||
you specified when you train the model.
|
||||
For Prediction output drift analysis:
|
||||
* AutoML Classification, the distribution of the argmax label will
|
||||
be analyzed.
|
||||
* AutoML Regression, the distribution of the value will be analyzed.
|
||||
ground_truth_fields (MutableSequence[FieldSchema]):
|
||||
Optional. Target /ground truth names of the model.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
feature_fields: MutableSequence[FieldSchema],
|
||||
ground_truth_fields: Optional[MutableSequence[FieldSchema]] = None,
|
||||
prediction_fields: Optional[MutableSequence[FieldSchema]] = None,
|
||||
):
|
||||
self.feature_fields = feature_fields
|
||||
self.prediction_fields = prediction_fields
|
||||
self.ground_truth_fields = ground_truth_fields
|
||||
|
||||
def _as_proto(self) -> model_monitor.ModelMonitoringSchema:
|
||||
"""Converts ModelMonitoringSchema to a proto message.
|
||||
|
||||
Returns:
|
||||
The GAPIC representation of the model monitoring schema.
|
||||
"""
|
||||
user_feature_fields = list()
|
||||
user_prediction_fields = list()
|
||||
user_ground_truth_fields = list()
|
||||
for field in self.feature_fields:
|
||||
user_feature_fields.append(field._as_proto())
|
||||
if self.prediction_fields:
|
||||
for field in self.prediction_fields:
|
||||
user_prediction_fields.append(field._as_proto())
|
||||
if self.ground_truth_fields:
|
||||
for field in self.ground_truth_fields:
|
||||
user_ground_truth_fields.append(field._as_proto())
|
||||
return model_monitor.ModelMonitoringSchema(
|
||||
feature_fields=user_feature_fields,
|
||||
prediction_fields=user_prediction_fields
|
||||
if self.prediction_fields
|
||||
else None,
|
||||
ground_truth_fields=user_ground_truth_fields
|
||||
if self.ground_truth_fields
|
||||
else None,
|
||||
)
|
||||
|
||||
def to_json(self, output_dir: Optional[str] = None) -> str:
|
||||
"""Transform ModelMonitoringSchema to json format.
|
||||
|
||||
Args:
|
||||
output_dir (str):
|
||||
Optional. The output directory that the transformed json file
|
||||
would be put into.
|
||||
"""
|
||||
result = model_monitor.ModelMonitoringSchema.to_json(self._as_proto())
|
||||
if output_dir:
|
||||
result_path = os.path.join(output_dir, "model_monitoring_schema.json")
|
||||
with tf.io.gfile.GFile(result_path, "w") as f:
|
||||
json.dump(result, f)
|
||||
f.close()
|
||||
logging.info("Transformed schema to json file: %s", result_path)
|
||||
return result
|
||||
|
||||
|
||||
def _check_duplicate(
|
||||
field: str,
|
||||
feature_fields: Optional[List[str]] = None,
|
||||
ground_truth_fields: Optional[List[str]] = None,
|
||||
prediction_fields: Optional[List[str]] = None,
|
||||
) -> bool:
|
||||
"""Check if a field appears in two field lists."""
|
||||
feature = True
|
||||
ground_truth = True
|
||||
prediction = True
|
||||
if not feature_fields or field not in feature_fields:
|
||||
feature = False
|
||||
if not ground_truth_fields or field not in ground_truth_fields:
|
||||
ground_truth = False
|
||||
if not prediction_fields or field not in prediction_fields:
|
||||
prediction = False
|
||||
return feature if (feature == ground_truth) else prediction
|
||||
|
||||
|
||||
def _transform_schema_pandas(
|
||||
dataset: Dict[str, str],
|
||||
feature_fields: Optional[List[str]] = None,
|
||||
ground_truth_fields: Optional[List[str]] = None,
|
||||
prediction_fields: Optional[List[str]] = None,
|
||||
) -> ModelMonitoringSchema:
|
||||
"""Transforms the pandas schema to model monitoring schema."""
|
||||
ground_truth_fields_list = list()
|
||||
prediction_fields_list = list()
|
||||
feature_fields_list = list()
|
||||
pandas_integer_types = ["integer", "Int32", "Int64", "UInt32", "UInt64"]
|
||||
pandas_string_types = [
|
||||
"string",
|
||||
"bytes",
|
||||
"date",
|
||||
"time",
|
||||
"datetime64",
|
||||
"datetime",
|
||||
"mixed-integer",
|
||||
"inteval",
|
||||
"Interval",
|
||||
]
|
||||
pandas_float_types = [
|
||||
"floating",
|
||||
"decimal",
|
||||
"mixed-integer-float",
|
||||
"Float32",
|
||||
"Float64",
|
||||
]
|
||||
for field in dataset:
|
||||
infer_type = dataset[field]
|
||||
if infer_type in pandas_string_types:
|
||||
data_type = "string"
|
||||
elif infer_type in pandas_integer_types:
|
||||
data_type = "integer"
|
||||
elif infer_type in pandas_float_types:
|
||||
data_type = "float"
|
||||
elif infer_type == "boolean":
|
||||
data_type = "boolean"
|
||||
elif infer_type == "categorical" or infer_type == "category":
|
||||
data_type = "categorical"
|
||||
else:
|
||||
raise ValueError(f"Unsupported data type: {infer_type}")
|
||||
if _check_duplicate(
|
||||
field, feature_fields, ground_truth_fields, prediction_fields
|
||||
):
|
||||
raise ValueError(f"The field {field} specified in two or more field lists")
|
||||
if ground_truth_fields and field in ground_truth_fields:
|
||||
ground_truth_fields_list.append(
|
||||
FieldSchema(
|
||||
name=field,
|
||||
data_type=data_type,
|
||||
)
|
||||
)
|
||||
elif prediction_fields and field in prediction_fields:
|
||||
prediction_fields_list.append(
|
||||
FieldSchema(
|
||||
name=field,
|
||||
data_type=data_type,
|
||||
)
|
||||
)
|
||||
elif (feature_fields and field in feature_fields) or not feature_fields:
|
||||
feature_fields_list.append(
|
||||
FieldSchema(
|
||||
name=field,
|
||||
data_type=data_type,
|
||||
)
|
||||
)
|
||||
return ModelMonitoringSchema(
|
||||
ground_truth_fields=ground_truth_fields_list if ground_truth_fields else None,
|
||||
prediction_fields=prediction_fields_list if prediction_fields else None,
|
||||
feature_fields=feature_fields_list,
|
||||
)
|
||||
|
||||
|
||||
def transform_schema_from_bigquery(
|
||||
feature_fields: Optional[List[str]] = None,
|
||||
ground_truth_fields: Optional[List[str]] = None,
|
||||
prediction_fields: Optional[List[str]] = None,
|
||||
table: Optional[str] = None,
|
||||
query: Optional[str] = None,
|
||||
) -> ModelMonitoringSchema:
|
||||
"""Transform the existing dataset to ModelMonitoringSchema as model monitor
|
||||
could accept.
|
||||
|
||||
Args:
|
||||
feature_fields (List[str]):
|
||||
Optional. The input feature fields for given dataset.
|
||||
By default all features we find would be the input features.
|
||||
ground_truth_fields (List[str]):
|
||||
Optional. The ground truth fields for given dataset.
|
||||
By default all features we find would be the input features.
|
||||
prediction_fields (List[str]):
|
||||
Optional. The prediction output field for given dataset.
|
||||
By default all features we find would be the input features.
|
||||
table (str):
|
||||
Optional. The BigQuery table uri.
|
||||
query (str):
|
||||
Optional. The BigQuery query.
|
||||
"""
|
||||
ground_truth_fields_list = list()
|
||||
prediction_fields_list = list()
|
||||
feature_fields_list = list()
|
||||
bq_string_types = [
|
||||
"STRING",
|
||||
"BYTES",
|
||||
"DATE",
|
||||
"TIME",
|
||||
"GEOGRAPHY",
|
||||
"DATETIME",
|
||||
"JSON",
|
||||
"INTEVAL",
|
||||
"RANGE",
|
||||
]
|
||||
bq_integer_types = ["INTEGER", "INT64", "TIMESTAMP"]
|
||||
bq_float_types = ["FLOAT", "DOUBLE", "FLOAT64", "NUMERIC", "BIGNUMERIC"]
|
||||
if table:
|
||||
if table.startswith("bq://"):
|
||||
table = table[len("bq://") :]
|
||||
try:
|
||||
client = bigquery.Client()
|
||||
table = client.get_table(table)
|
||||
bq_schema = table.schema
|
||||
except Exception as e:
|
||||
raise ValueError("Failed to get table from bq address provided.") from e
|
||||
elif query:
|
||||
try:
|
||||
client = bigquery.Client()
|
||||
bq_schema = client.query(
|
||||
query=query, job_config=bigquery.job.QueryJobConfig(dry_run=True)
|
||||
).schema
|
||||
except Exception as e:
|
||||
raise ValueError("Failed to get query from bq address provided.") from e
|
||||
else:
|
||||
raise ValueError("Either table or query must be provided.")
|
||||
for field in bq_schema:
|
||||
if field.field_type in bq_string_types:
|
||||
data_type = "string"
|
||||
elif field.field_type in bq_integer_types:
|
||||
data_type = "integer"
|
||||
elif field.field_type in bq_float_types:
|
||||
data_type = "float"
|
||||
elif field.field_type == "BOOLEAN" or field.field_type == "BOOL":
|
||||
data_type = "boolean"
|
||||
else:
|
||||
raise ValueError(f"Unsupported data type: {field.field_type}")
|
||||
if _check_duplicate(
|
||||
field.name, feature_fields, ground_truth_fields, prediction_fields
|
||||
):
|
||||
raise ValueError(
|
||||
f"The field {field.name} specified in two or more field lists"
|
||||
)
|
||||
if ground_truth_fields and field.name in ground_truth_fields:
|
||||
ground_truth_fields_list.append(
|
||||
FieldSchema(
|
||||
name=field.name,
|
||||
data_type=data_type,
|
||||
repeated=True if field.mode == "REPEATED" else False,
|
||||
)
|
||||
)
|
||||
elif prediction_fields and field.name in prediction_fields:
|
||||
prediction_fields_list.append(
|
||||
FieldSchema(
|
||||
name=field.name,
|
||||
data_type=data_type,
|
||||
repeated=True if field.mode == "REPEATED" else False,
|
||||
)
|
||||
)
|
||||
elif (feature_fields and field.name in feature_fields) or not feature_fields:
|
||||
feature_fields_list.append(
|
||||
FieldSchema(
|
||||
name=field.name,
|
||||
data_type=data_type,
|
||||
repeated=True if field.mode == "REPEATED" else False,
|
||||
)
|
||||
)
|
||||
return ModelMonitoringSchema(
|
||||
ground_truth_fields=ground_truth_fields_list if ground_truth_fields else None,
|
||||
prediction_fields=prediction_fields_list if prediction_fields else None,
|
||||
feature_fields=feature_fields_list,
|
||||
)
|
||||
|
||||
|
||||
def transform_schema_from_csv(
|
||||
file_path: str,
|
||||
feature_fields: Optional[List[str]] = None,
|
||||
ground_truth_fields: Optional[List[str]] = None,
|
||||
prediction_fields: Optional[List[str]] = None,
|
||||
) -> ModelMonitoringSchema:
|
||||
"""Transform the existing dataset to ModelMonitoringSchema as model monitor could accept.
|
||||
|
||||
Args:
|
||||
file_path (str):
|
||||
Required. The dataset file path.
|
||||
feature_fields (List[str]):
|
||||
Optional. The input feature fields for given dataset.
|
||||
By default all features we find would be the input features.
|
||||
ground_truth_fields (List[str]):
|
||||
Optional. The ground truth fields for given dataset.
|
||||
By default all features we find would be the input features.
|
||||
prediction_fields (List[str]):s
|
||||
Optional. The prediction output field for given dataset.
|
||||
By default all features we find would be the input features.
|
||||
"""
|
||||
with tf.io.gfile.GFile(file_path, "r") as f:
|
||||
input_dataset = pd.read_csv(f)
|
||||
dict_dataset = dict()
|
||||
for field in input_dataset.columns:
|
||||
dict_dataset[field] = input_dataset.convert_dtypes().dtypes[field]
|
||||
monitoring_schema = _transform_schema_pandas(
|
||||
dict_dataset, feature_fields, ground_truth_fields, prediction_fields
|
||||
)
|
||||
f.close()
|
||||
return monitoring_schema
|
||||
|
||||
|
||||
def transform_schema_from_json(
|
||||
file_path: str,
|
||||
feature_fields: Optional[List[str]] = None,
|
||||
ground_truth_fields: Optional[List[str]] = None,
|
||||
prediction_fields: Optional[List[str]] = None,
|
||||
) -> ModelMonitoringSchema:
|
||||
"""Transform the existing dataset to ModelMonitoringSchema as model monitor
|
||||
could accept.
|
||||
|
||||
Args:
|
||||
file_path (str):
|
||||
Required. The dataset file path.
|
||||
feature_fields (List[str]):
|
||||
Optional. The input feature fields for given dataset.
|
||||
By default all features we find would be the input features.
|
||||
ground_truth_fields (List[str]):
|
||||
Optional. The ground truth fields for given dataset.
|
||||
By default all features we find would be the input features.
|
||||
prediction_fields (List[str]):
|
||||
Optional. The prediction output field for given dataset.
|
||||
By default all features we find would be the input features.
|
||||
"""
|
||||
with tf.io.gfile.GFile(file_path, "r") as f:
|
||||
input_dataset = pd.read_json(f, lines=True)
|
||||
dict_dataset = dict()
|
||||
for field in input_dataset.columns:
|
||||
dict_dataset[field] = input_dataset.convert_dtypes().dtypes[field]
|
||||
monitoring_schema = _transform_schema_pandas(
|
||||
dict_dataset, feature_fields, ground_truth_fields, prediction_fields
|
||||
)
|
||||
f.close()
|
||||
return monitoring_schema
|
||||
Reference in New Issue
Block a user