Files
evo-ai/.venv/lib/python3.10/site-packages/google/cloud/aiplatform/models.py
2025-04-25 15:30:54 -03:00

7793 lines
359 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import itertools
import json
import pathlib
import re
import shutil
import tempfile
import requests
from typing import (
Any,
Dict,
Iterator,
List,
NamedTuple,
Optional,
Sequence,
Tuple,
TYPE_CHECKING,
Union,
)
from google.api_core import operation
from google.api_core import exceptions as api_exceptions
from google.auth import credentials as auth_credentials
from google.auth.transport import requests as google_auth_requests
from google.protobuf import duration_pb2
import proto
from google.cloud import aiplatform
from google.cloud.aiplatform import base
from google.cloud.aiplatform import constants
from google.cloud.aiplatform import explain
from google.cloud.aiplatform import initializer
from google.cloud.aiplatform import jobs
from google.cloud.aiplatform import models
from google.cloud.aiplatform import utils
from google.cloud.aiplatform.utils import gcs_utils
from google.cloud.aiplatform.utils import _explanation_utils
from google.cloud.aiplatform.utils import _ipython_utils
from google.cloud.aiplatform import model_evaluation
from google.cloud.aiplatform.compat.services import endpoint_service_client
from google.cloud.aiplatform.compat.services import (
deployment_resource_pool_service_client,
)
from google.cloud.aiplatform.compat.types import (
deployment_resource_pool as gca_deployment_resource_pool_compat,
deployed_model_ref as gca_deployed_model_ref_compat,
encryption_spec as gca_encryption_spec,
endpoint as gca_endpoint_compat,
explanation as gca_explanation_compat,
io as gca_io_compat,
machine_resources as gca_machine_resources_compat,
model as gca_model_compat,
model_service as gca_model_service_compat,
env_var as gca_env_var_compat,
service_networking as gca_service_networking,
)
from google.cloud.aiplatform.constants import (
prediction as prediction_constants,
)
from google.cloud.aiplatform_v1.types import model as model_v1
from google.protobuf import field_mask_pb2, timestamp_pb2
from google.protobuf import json_format
if TYPE_CHECKING:
from google.cloud.aiplatform.prediction import LocalModel
_DEFAULT_MACHINE_TYPE = "n1-standard-2"
_DEPLOYING_MODEL_TRAFFIC_SPLIT_KEY = "0"
_SUCCESSFUL_HTTP_RESPONSE = 300
_RAW_PREDICT_DEPLOYED_MODEL_ID_KEY = "X-Vertex-AI-Deployed-Model-Id"
_RAW_PREDICT_MODEL_RESOURCE_KEY = "X-Vertex-AI-Model"
_RAW_PREDICT_MODEL_VERSION_ID_KEY = "X-Vertex-AI-Model-Version-Id"
_LOGGER = base.Logger(__name__)
_SUPPORTED_MODEL_FILE_NAMES = [
"model.pkl",
"model.joblib",
"model.bst",
"model.mar",
"saved_model.pb",
"saved_model.pbtxt",
]
_SUPPORTED_EVAL_PREDICTION_TYPES = [
"classification",
"regression",
]
class VersionInfo(NamedTuple):
"""VersionInfo class envelopes returned Model version information.
Attributes:
version_id:
The version ID of the model.
create_time:
Timestamp when this Model version was uploaded into Vertex AI.
update_time:
Timestamp when this Model version was most recently updated.
model_display_name:
The user-defined name of the model this version belongs to.
model_resource_name:
The fully-qualified model resource name.
e.g. projects/{project}/locations/{location}/models/{model_display_name}
version_aliases:
User provided version aliases so that a model version can be referenced via
alias (i.e. projects/{project}/locations/{location}/models/{model_display_name}@{version_alias}).
Default is None.
version_description:
The description of this version.
Default is None.
"""
version_id: str
version_create_time: timestamp_pb2.Timestamp
version_update_time: timestamp_pb2.Timestamp
model_display_name: str
model_resource_name: str
version_aliases: Optional[Sequence[str]] = None
version_description: Optional[str] = None
class Prediction(NamedTuple):
"""Prediction class envelopes returned Model predictions and the Model id.
Attributes:
predictions:
The predictions that are the output of the predictions
call. The schema of any single prediction may be specified via
Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
deployed_model_id:
ID of the Endpoint's DeployedModel that served this prediction.
metadata:
The metadata that is the output of the predictions call.
model_version_id:
ID of the DeployedModel's version that served this prediction.
model_resource_name:
The fully-qualified resource name of the model that served this prediction.
explanations:
The explanations of the Model's predictions. It has the same number
of elements as instances to be explained. Default is None.
"""
predictions: List[Any]
deployed_model_id: str
metadata: Optional[Any] = None
model_version_id: Optional[str] = None
model_resource_name: Optional[str] = None
explanations: Optional[Sequence[gca_explanation_compat.Explanation]] = None
class DeploymentResourcePool(base.VertexAiResourceNounWithFutureManager):
client_class = utils.DeploymentResourcePoolClientWithOverride
_resource_noun = "deploymentResourcePools"
_getter_method = "get_deployment_resource_pool"
_list_method = "list_deployment_resource_pools"
_delete_method = "delete_deployment_resource_pool"
_parse_resource_name_method = "parse_deployment_resource_pool_path"
_format_resource_name_method = "deployment_resource_pool_path"
def __init__(
self,
deployment_resource_pool_name: str,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
):
"""Retrieves a DeploymentResourcePool.
Args:
deployment_resource_pool_name (str):
Required. The fully-qualified resource name or ID of the
deployment resource pool. Example:
"projects/123/locations/us-central1/deploymentResourcePools/456"
or "456" when project and location are initialized or passed.
project (str):
Optional. Project containing the deployment resource pool to
retrieve. If not set, the project given to `aiplatform.init`
will be used.
location (str):
Optional. Location containing the deployment resource pool to
retrieve. If not set, the location given to `aiplatform.init`
will be used.
credentials: Optional[auth_credentials.Credentials]=None,
Custom credentials to use to retrieve the deployment resource
pool. If not set, the credentials given to `aiplatform.init`
will be used.
"""
super().__init__(
project=project,
location=location,
credentials=credentials,
resource_name=deployment_resource_pool_name,
)
deployment_resource_pool_name = utils.full_resource_name(
resource_name=deployment_resource_pool_name,
resource_noun=self._resource_noun,
parse_resource_name_method=self._parse_resource_name,
format_resource_name_method=self._format_resource_name,
project=project,
location=location,
)
self._gca_resource = self._get_gca_resource(
resource_name=deployment_resource_pool_name
)
@classmethod
def create(
cls,
deployment_resource_pool_id: str,
project: Optional[str] = None,
location: Optional[str] = None,
metadata: Sequence[Tuple[str, str]] = (),
credentials: Optional[auth_credentials.Credentials] = None,
machine_type: Optional[str] = None,
min_replica_count: int = 1,
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
autoscaling_target_cpu_utilization: Optional[int] = None,
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
sync=True,
create_request_timeout: Optional[float] = None,
reservation_affinity_type: Optional[str] = None,
reservation_affinity_key: Optional[str] = None,
reservation_affinity_values: Optional[List[str]] = None,
spot: bool = False,
required_replica_count: Optional[int] = 0,
) -> "DeploymentResourcePool":
"""Creates a new DeploymentResourcePool.
Args:
deployment_resource_pool_id (str):
Required. User-specified name for the new deployment resource
pool.
project (str):
Optional. Project containing the deployment resource pool to
retrieve. If not set, the project given to `aiplatform.init`
will be used.
location (str):
Optional. Location containing the deployment resource pool to
retrieve. If not set, the location given to `aiplatform.init`
will be used.
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
credentials: Optional[auth_credentials.Credentials]=None,
Optional. Custom credentials to use to retrieve the deployment
resource pool. If not set, the credentials given to
`aiplatform.init` will be used.
machine_type (str):
Optional. Machine type to use for the deployment resource pool.
If not set, the default machine type of `n1-standard-2` is
used.
min_replica_count (int):
Optional. The minimum replica count of the new deployment
resource pool. Each replica serves a copy of each model deployed
on the deployment resource pool. If this value is less than
`max_replica_count`, then autoscaling is enabled, and the actual
number of replicas will be adjusted to bring resource usage in
line with the autoscaling targets.
max_replica_count (int):
Optional. The maximum replica count of the new deployment
resource pool.
accelerator_type (str):
Optional. Hardware accelerator type. Must also set accelerator_
count if used. One of NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4, or
NVIDIA_TESLA_A100.
accelerator_count (int):
Optional. The number of accelerators attached to each replica.
autoscaling_target_cpu_utilization (int):
Optional. Target CPU utilization value for autoscaling. A
default value of 60 will be used if not specified.
autoscaling_target_accelerator_duty_cycle (int):
Optional. Target accelerator duty cycle percentage to use for
autoscaling. Must also set accelerator_type and accelerator
count if specified. A default value of 60 will be used if
accelerators are requested and this is not specified.
sync (bool):
Optional. Whether to execute this method synchronously. If
False, this method will be executed in a concurrent Future and
any downstream object will be immediately returned and synced
when the Future has completed.
create_request_timeout (float):
Optional. The create request timeout in seconds.
reservation_affinity_type (str):
Optional. The type of reservation affinity.
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
reservation_affinity_key (str):
Optional. Corresponds to the label key of a reservation resource.
To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
and specify the name of your reservation as its value.
reservation_affinity_values (List[str]):
Optional. Corresponds to the label values of a reservation resource.
This must be the full resource name of the reservation.
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
spot (bool):
Optional. Whether to schedule the deployment workload on spot VMs.
required_replica_count (int):
Optional. Number of required available replicas for the
deployment to succeed. This field is only needed when partial
model deployment/mutation is desired, with a value greater than
or equal to 1 and fewer than or equal to min_replica_count. If
set, the model deploy/mutate operation will succeed once
available_replica_count reaches required_replica_count, and the
rest of the replicas will be retried.
Returns:
DeploymentResourcePool
"""
api_client = cls._instantiate_client(location=location, credentials=credentials)
project = project or initializer.global_config.project
location = location or initializer.global_config.location
return cls._create(
api_client=api_client,
deployment_resource_pool_id=deployment_resource_pool_id,
project=project,
location=location,
metadata=metadata,
credentials=credentials,
machine_type=machine_type,
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
reservation_affinity_type=reservation_affinity_type,
reservation_affinity_key=reservation_affinity_key,
reservation_affinity_values=reservation_affinity_values,
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
spot=spot,
sync=sync,
create_request_timeout=create_request_timeout,
required_replica_count=required_replica_count,
)
@classmethod
@base.optional_sync()
def _create(
cls,
api_client: deployment_resource_pool_service_client.DeploymentResourcePoolServiceClient,
deployment_resource_pool_id: str,
project: Optional[str] = None,
location: Optional[str] = None,
metadata: Sequence[Tuple[str, str]] = (),
credentials: Optional[auth_credentials.Credentials] = None,
machine_type: Optional[str] = None,
min_replica_count: int = 1,
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
reservation_affinity_type: Optional[str] = None,
reservation_affinity_key: Optional[str] = None,
reservation_affinity_values: Optional[List[str]] = None,
autoscaling_target_cpu_utilization: Optional[int] = None,
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
spot: bool = False,
sync=True,
create_request_timeout: Optional[float] = None,
required_replica_count: Optional[int] = 0,
) -> "DeploymentResourcePool":
"""Creates a new DeploymentResourcePool.
Args:
api_client (DeploymentResourcePoolServiceClient):
Required. DeploymentResourcePoolServiceClient used to make the
underlying CreateDeploymentResourcePool API call.
deployment_resource_pool_id (str):
Required. User-specified name for the new deployment resource
pool.
project (str):
Optional. Project containing the deployment resource pool to
retrieve. If not set, the project given to `aiplatform.init`
will be used.
location (str):
Optional. Location containing the deployment resource pool to
retrieve. If not set, the location given to `aiplatform.init`
will be used.
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
credentials: Optional[auth_credentials.Credentials]=None,
Optional. Custom credentials to use to retrieve the deployment
resource pool. If not set, the credentials given to
`aiplatform.init` will be used.
machine_type (str):
Optional. Machine type to use for the deployment resource pool.
If not set, the default machine type of `n1-standard-2` is
used.
min_replica_count (int):
Optional. The minimum replica count of the new deployment
resource pool. Each replica serves a copy of each model deployed
on the deployment resource pool. If this value is less than
`max_replica_count`, then autoscaling is enabled, and the actual
number of replicas will be adjusted to bring resource usage in
line with the autoscaling targets.
max_replica_count (int):
Optional. The maximum replica count of the new deployment
resource pool.
accelerator_type (str):
Optional. Hardware accelerator type. Must also set accelerator_
count if used. One of NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4, or
NVIDIA_TESLA_A100.
accelerator_count (int):
Optional. The number of accelerators attached to each replica.
reservation_affinity_type (str):
Optional. The type of reservation affinity.
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
reservation_affinity_key (str):
Optional. Corresponds to the label key of a reservation resource.
To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
and specify the name of your reservation as its value.
reservation_affinity_values (List[str]):
Optional. Corresponds to the label values of a reservation resource.
This must be the full resource name of the reservation.
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
autoscaling_target_cpu_utilization (int):
Optional. Target CPU utilization value for autoscaling. A
default value of 60 will be used if not specified.
autoscaling_target_accelerator_duty_cycle (int):
Optional. Target accelerator duty cycle percentage to use for
autoscaling. Must also set accelerator_type and accelerator
count if specified. A default value of 60 will be used if
accelerators are requested and this is not specified.
spot (bool):
Optional. Whether to schedule the deployment workload on spot VMs.
sync (bool):
Optional. Whether to execute this method synchronously. If
False, this method will be executed in a concurrent Future and
any downstream object will be immediately returned and synced
when the Future has completed.
create_request_timeout (float):
Optional. The create request timeout in seconds.
required_replica_count (int):
Optional. Number of required available replicas for the
deployment to succeed. This field is only needed when partial
model deployment/mutation is desired, with a value greater than
or equal to 1 and fewer than or equal to min_replica_count. If
set, the model deploy/mutate operation will succeed once
available_replica_count reaches required_replica_count, and the
rest of the replicas will be retried.
Returns:
DeploymentResourcePool
"""
parent = initializer.global_config.common_location_path(
project=project, location=location
)
dedicated_resources = gca_machine_resources_compat.DedicatedResources(
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
spot=spot,
required_replica_count=required_replica_count,
)
machine_spec = gca_machine_resources_compat.MachineSpec(
machine_type=machine_type
)
if autoscaling_target_cpu_utilization:
autoscaling_metric_spec = (
gca_machine_resources_compat.AutoscalingMetricSpec(
metric_name=(
"aiplatform.googleapis.com/prediction/online/cpu/utilization"
),
target=autoscaling_target_cpu_utilization,
)
)
dedicated_resources.autoscaling_metric_specs.extend(
[autoscaling_metric_spec]
)
if accelerator_type and accelerator_count:
utils.validate_accelerator_type(accelerator_type)
machine_spec.accelerator_type = accelerator_type
machine_spec.accelerator_count = accelerator_count
if autoscaling_target_accelerator_duty_cycle:
autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
metric_name="aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle",
target=autoscaling_target_accelerator_duty_cycle,
)
dedicated_resources.autoscaling_metric_specs.extend(
[autoscaling_metric_spec]
)
if reservation_affinity_type:
machine_spec.reservation_affinity = utils.get_reservation_affinity(
reservation_affinity_type,
reservation_affinity_key,
reservation_affinity_values,
)
dedicated_resources.machine_spec = machine_spec
gapic_drp = gca_deployment_resource_pool_compat.DeploymentResourcePool(
dedicated_resources=dedicated_resources
)
operation_future = api_client.create_deployment_resource_pool(
parent=parent,
deployment_resource_pool=gapic_drp,
deployment_resource_pool_id=deployment_resource_pool_id,
metadata=metadata,
timeout=create_request_timeout,
)
_LOGGER.log_create_with_lro(cls, operation_future)
created_drp = operation_future.result()
_LOGGER.log_create_complete(cls, created_drp, "deployment resource pool")
return cls._construct_sdk_resource_from_gapic(
gapic_resource=created_drp,
project=project,
location=location,
credentials=credentials,
)
def query_deployed_models(
self,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
) -> List[gca_deployed_model_ref_compat.DeployedModelRef]:
"""Lists the deployed models using this resource pool.
Args:
project (str):
Optional. Project to retrieve list from. If not set, project
set in aiplatform.init will be used.
location (str):
Optional. Location to retrieve list from. If not set, location
set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to retrieve list. Overrides
credentials set in aiplatform.init.
Returns:
List of DeployedModelRef objects containing the endpoint ID and
deployed model ID of the deployed models using this resource pool.
"""
location = location or initializer.global_config.location
api_client = DeploymentResourcePool._instantiate_client(
location=location, credentials=credentials
)
response = api_client.query_deployed_models(
deployment_resource_pool=self.resource_name
)
return list(
itertools.chain(page.deployed_model_refs for page in response.pages)
)
@classmethod
def list(
cls,
filter: Optional[str] = None,
order_by: Optional[str] = None,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
) -> List["models.DeploymentResourcePool"]:
"""Lists the deployment resource pools.
filter (str):
Optional. An expression for filtering the results of the request.
For field names both snake_case and camelCase are supported.
order_by (str):
Optional. A comma-separated list of fields to order by, sorted in
ascending order. Use "desc" after a field name for descending.
Supported fields: `display_name`, `create_time`, `update_time`
project (str):
Optional. Project to retrieve list from. If not set, project
set in aiplatform.init will be used.
location (str):
Optional. Location to retrieve list from. If not set, location
set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to retrieve list. Overrides
credentials set in aiplatform.init.
Returns:
List of deployment resource pools.
"""
return cls._list(
filter=filter,
order_by=order_by,
project=project,
location=location,
credentials=credentials,
)
class Endpoint(base.VertexAiResourceNounWithFutureManager, base.PreviewMixin):
client_class = utils.EndpointClientWithOverride
_resource_noun = "endpoints"
_getter_method = "get_endpoint"
_list_method = "list_endpoints"
_delete_method = "delete_endpoint"
_parse_resource_name_method = "parse_endpoint_path"
_format_resource_name_method = "endpoint_path"
_preview_class = "google.cloud.aiplatform.aiplatform.preview.models.Endpoint"
@property
def preview(self):
"""Return an Endpoint instance with preview features enabled."""
from google.cloud.aiplatform.preview import models as preview_models
if not hasattr(self, "_preview_instance"):
self._preview_instance = preview_models.Endpoint(
self.resource_name, credentials=self.credentials
)
return self._preview_instance
def __init__(
self,
endpoint_name: str,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
):
"""Retrieves an endpoint resource.
Args:
endpoint_name (str):
Required. A fully-qualified endpoint resource name or endpoint ID.
Example: "projects/123/locations/us-central1/endpoints/456" or
"456" when project and location are initialized or passed.
project (str):
Optional. Project to retrieve endpoint from. If not set, project
set in aiplatform.init will be used.
location (str):
Optional. Location to retrieve endpoint from. If not set, location
set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to upload this model. Overrides
credentials set in aiplatform.init.
"""
super().__init__(
project=project,
location=location,
credentials=credentials,
resource_name=endpoint_name,
)
endpoint_name = utils.full_resource_name(
resource_name=endpoint_name,
resource_noun="endpoints",
parse_resource_name_method=self._parse_resource_name,
format_resource_name_method=self._format_resource_name,
project=project,
location=location,
)
# Lazy load the Endpoint gca_resource until needed
self._gca_resource = gca_endpoint_compat.Endpoint(name=endpoint_name)
self.authorized_session = None
self.raw_predict_request_url = None
self.stream_raw_predict_request_url = None
@property
def _prediction_client(self) -> utils.PredictionClientWithOverride:
# The attribute might not exist due to issues in
# `VertexAiResourceNounWithFutureManager._sync_object_with_future_result`
# We should switch to @functools.cached_property once its available.
if not getattr(self, "_prediction_client_value", None):
self._prediction_client_value = initializer.global_config.create_client(
client_class=utils.PredictionClientWithOverride,
credentials=self.credentials,
location_override=self.location,
prediction_client=True,
)
return self._prediction_client_value
@property
def _prediction_async_client(self) -> utils.PredictionAsyncClientWithOverride:
# The attribute might not exist due to issues in
# `VertexAiResourceNounWithFutureManager._sync_object_with_future_result`
# We should switch to @functools.cached_property once its available.
if not getattr(self, "_prediction_async_client_value", None):
self._prediction_async_client_value = (
initializer.global_config.create_client(
client_class=utils.PredictionAsyncClientWithOverride,
credentials=self.credentials,
location_override=self.location,
prediction_client=True,
)
)
return self._prediction_async_client_value
def _skipped_getter_call(self) -> bool:
"""Check if GAPIC resource was populated by call to get/list API methods
Returns False if `_gca_resource` is None or fully populated. Returns True
if `_gca_resource` is partially populated
"""
return self._gca_resource and not self._gca_resource.create_time
def _sync_gca_resource_if_skipped(self) -> None:
"""Sync GAPIC service representation of Endpoint class resource only if
get_endpoint() was never called."""
if self._skipped_getter_call():
self._gca_resource = self._get_gca_resource(
resource_name=self._gca_resource.name
)
def _assert_gca_resource_is_available(self) -> None:
"""Ensures Endpoint getter was called at least once before
asserting on gca_resource's availability."""
super()._assert_gca_resource_is_available()
self._sync_gca_resource_if_skipped()
@property
def traffic_split(self) -> Dict[str, int]:
"""A map from a DeployedModel's ID to the percentage of this Endpoint's
traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives no traffic.
The traffic percentage values must add up to 100, or map must be empty if
the Endpoint is to not accept any traffic at a moment.
"""
self._sync_gca_resource()
return dict(self._gca_resource.traffic_split)
@property
def network(self) -> Optional[str]:
"""The full name of the Google Compute Engine
[network](https://cloud.google.com/vpc/docs/vpc#networks) to which this
Endpoint should be peered.
Takes the format `projects/{project}/global/networks/{network}`. Where
{project} is a project number, as in `12345`, and {network} is a network name.
Private services access must already be configured for the network. If left
unspecified, the Endpoint is not peered with any network.
"""
self._assert_gca_resource_is_available()
return getattr(self._gca_resource, "network", None)
@property
def private_service_connect_config(
self,
) -> Optional[gca_service_networking.PrivateServiceConnectConfig]:
"""The Private Service Connect configuration for this Endpoint."""
self._assert_gca_resource_is_available()
return self._gca_resource.private_service_connect_config
@classmethod
def create(
cls,
display_name: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
encryption_spec_key_name: Optional[str] = None,
sync=True,
create_request_timeout: Optional[float] = None,
endpoint_id: Optional[str] = None,
enable_request_response_logging=False,
request_response_logging_sampling_rate: Optional[float] = None,
request_response_logging_bq_destination_table: Optional[str] = None,
dedicated_endpoint_enabled=False,
inference_timeout: Optional[int] = None,
) -> "Endpoint":
"""Creates a new endpoint.
Args:
display_name (str):
Optional. The user-defined name of the Endpoint.
The name can be up to 128 characters long and can be consist
of any UTF-8 characters.
description (str):
Optional. The description of the Endpoint.
labels (Dict[str, str]):
Optional. The labels with user-defined metadata to
organize your Endpoints.
Label keys and values can be no longer than 64
characters (Unicode codepoints), can only
contain lowercase letters, numeric characters,
underscores and dashes. International characters
are allowed.
See https://goo.gl/xmQnxf for more information
and examples of labels.
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
project (str):
Optional. Project to retrieve endpoint from. If not set, project
set in aiplatform.init will be used.
location (str):
Optional. Location to retrieve endpoint from. If not set, location
set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to upload this model. Overrides
credentials set in aiplatform.init.
encryption_spec_key_name (str):
Optional. The Cloud KMS resource identifier of the customer
managed encryption key used to protect the model. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created.
If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key.
Overrides encryption_spec_key_name set in aiplatform.init.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
create_request_timeout (float):
Optional. The timeout for the create request in seconds.
endpoint_id (str):
Optional. The ID to use for endpoint, which will become
the final component of the endpoint resource name. If
not provided, Vertex AI will generate a value for this
ID.
This value should be 1-10 characters, and valid
characters are /[0-9]/. When using HTTP/JSON, this field
is populated based on a query string argument, such as
``?endpoint_id=12345``. This is the fallback for fields
that are not included in either the URI or the body.
enable_request_response_logging (bool):
Optional. Whether to enable request & response logging for this endpoint.
request_response_logging_sampling_rate (float):
Optional. The request response logging sampling rate. If not set, default is 0.0.
request_response_logging_bq_destination_table (str):
Optional. The request response logging bigquery destination. If not set, will create a table with name:
``bq://{project_id}.logging_{endpoint_display_name}_{endpoint_id}.request_response_logging``.
dedicated_endpoint_enabled (bool):
Optional. If enabled, a dedicated dns will be created and your
traffic will be fully isolated from other customers' traffic and
latency will be reduced.
inference_timeout (int):
Optional. It defines the prediction timeout, in seconds, for online predictions using cloud-based endpoints. This applies to either PSC endpoints, when private_service_connect_config is set, or dedicated endpoints, when dedicated_endpoint_enabled is true.
Returns:
endpoint (aiplatform.Endpoint):
Created endpoint.
"""
api_client = cls._instantiate_client(location=location, credentials=credentials)
if not display_name:
display_name = cls._generate_display_name()
utils.validate_display_name(display_name)
if labels:
utils.validate_labels(labels)
project = project or initializer.global_config.project
location = location or initializer.global_config.location
predict_request_response_logging_config = None
if enable_request_response_logging:
predict_request_response_logging_config = (
gca_endpoint_compat.PredictRequestResponseLoggingConfig(
enabled=True,
sampling_rate=request_response_logging_sampling_rate,
bigquery_destination=gca_io_compat.BigQueryDestination(
output_uri=request_response_logging_bq_destination_table
),
)
)
client_connection_config = None
if (
inference_timeout is not None
and inference_timeout > 0
and dedicated_endpoint_enabled
):
client_connection_config = gca_endpoint_compat.ClientConnectionConfig(
inference_timeout=duration_pb2.Duration(seconds=inference_timeout)
)
return cls._create(
api_client=api_client,
display_name=display_name,
project=project,
location=location,
description=description,
labels=labels,
metadata=metadata,
credentials=credentials,
encryption_spec=initializer.global_config.get_encryption_spec(
encryption_spec_key_name=encryption_spec_key_name
),
sync=sync,
create_request_timeout=create_request_timeout,
endpoint_id=endpoint_id,
predict_request_response_logging_config=predict_request_response_logging_config,
dedicated_endpoint_enabled=dedicated_endpoint_enabled,
client_connection_config=client_connection_config,
)
@classmethod
@base.optional_sync()
def _create(
cls,
api_client: endpoint_service_client.EndpointServiceClient,
display_name: str,
project: str,
location: str,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
credentials: Optional[auth_credentials.Credentials] = None,
encryption_spec: Optional[gca_encryption_spec.EncryptionSpec] = None,
network: Optional[str] = None,
sync=True,
create_request_timeout: Optional[float] = None,
endpoint_id: Optional[str] = None,
predict_request_response_logging_config: Optional[
gca_endpoint_compat.PredictRequestResponseLoggingConfig
] = None,
private_service_connect_config: Optional[
gca_service_networking.PrivateServiceConnectConfig
] = None,
dedicated_endpoint_enabled=False,
client_connection_config: Optional[
gca_endpoint_compat.ClientConnectionConfig
] = None,
) -> "Endpoint":
"""Creates a new endpoint by calling the API client.
Args:
api_client (EndpointServiceClient):
Required. An instance of EndpointServiceClient with the correct
api_endpoint already set based on user's preferences.
display_name (str):
Required. The user-defined name of the Endpoint.
The name can be up to 128 characters long and can be consist
of any UTF-8 characters.
project (str):
Required. Project to retrieve endpoint from.
location (str):
Required. Location to retrieve endpoint from.
description (str):
Optional. The description of the Endpoint.
labels (Dict[str, str]):
Optional. The labels with user-defined metadata to
organize your Endpoints.
Label keys and values can be no longer than 64
characters (Unicode codepoints), can only
contain lowercase letters, numeric characters,
underscores and dashes. International characters
are allowed.
See https://goo.gl/xmQnxf for more information
and examples of labels.
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to upload this model. Overrides
credentials set in aiplatform.init.
encryption_spec (gca_encryption_spec.EncryptionSpec):
Optional. The Cloud KMS customer managed encryption key used to protect the dataset.
The key needs to be in the same region as where the compute
resource is created.
If set, this Dataset and all sub-resources of this Dataset will be secured by this key.
network (str):
Optional. The full name of the Compute Engine network to which
this Endpoint will be peered. E.g. "projects/12345/global/networks/myVPC".
Private services access must already be configured for the network.
Cannot be specified when private_service_connect is enabled.
Read more about PrivateEndpoints
[in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints)
sync (bool):
Whether to create this endpoint synchronously.
create_request_timeout (float):
Optional. The timeout for the create request in seconds.
endpoint_id (str):
Optional. The ID to use for endpoint, which will become
the final component of the endpoint resource name. If
not provided, Vertex AI will generate a value for this
ID.
This value should be 1-10 characters, and valid
characters are /[0-9]/. When using HTTP/JSON, this field
is populated based on a query string argument, such as
``?endpoint_id=12345``. This is the fallback for fields
that are not included in either the URI or the body.
predict_request_response_logging_config (aiplatform.endpoint.PredictRequestResponseLoggingConfig):
Optional. The request response logging configuration for online prediction.
private_service_connect_config (aiplatform.service_network.PrivateServiceConnectConfig):
If enabled, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect).
Cannot be enabled when network is specified.
dedicated_endpoint_enabled (bool):
Optional. If enabled, a dedicated dns will be created and your
traffic will be fully isolated from other customers' traffic and
latency will be reduced.
client_connection_config (aiplatform.endpoint.ClientConnectionConfig):
Optional. The inference timeout which is applied on cloud-based (PSC, or dedicated) endpoints for online prediction.
Returns:
endpoint (aiplatform.Endpoint):
Created endpoint.
"""
parent = initializer.global_config.common_location_path(
project=project, location=location
)
gapic_endpoint = gca_endpoint_compat.Endpoint(
display_name=display_name,
description=description,
labels=labels,
encryption_spec=encryption_spec,
network=network,
predict_request_response_logging_config=predict_request_response_logging_config,
private_service_connect_config=private_service_connect_config,
dedicated_endpoint_enabled=dedicated_endpoint_enabled,
client_connection_config=client_connection_config,
)
operation_future = api_client.create_endpoint(
parent=parent,
endpoint=gapic_endpoint,
endpoint_id=endpoint_id,
metadata=metadata,
timeout=create_request_timeout,
)
_LOGGER.log_create_with_lro(cls, operation_future)
created_endpoint = operation_future.result()
_LOGGER.log_create_complete(cls, created_endpoint, "endpoint")
return cls._construct_sdk_resource_from_gapic(
gapic_resource=created_endpoint,
project=project,
location=location,
credentials=credentials,
)
@classmethod
def _construct_sdk_resource_from_gapic(
cls,
gapic_resource: proto.Message,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
) -> "Endpoint":
"""Given a GAPIC Endpoint object, return the SDK representation.
Args:
gapic_resource (proto.Message):
A GAPIC representation of a Endpoint resource, usually
retrieved by a get_* or in a list_* API call.
project (str):
Optional. Project to construct Endpoint object from. If not set,
project set in aiplatform.init will be used.
location (str):
Optional. Location to construct Endpoint object from. If not set,
location set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to construct Endpoint.
Overrides credentials set in aiplatform.init.
Returns:
Endpoint (aiplatform.Endpoint):
An initialized Endpoint resource.
"""
endpoint = super()._construct_sdk_resource_from_gapic(
gapic_resource=gapic_resource,
project=project,
location=location,
credentials=credentials,
)
endpoint.authorized_session = None
endpoint.raw_predict_request_url = None
endpoint.stream_raw_predict_request_url = None
return endpoint
@staticmethod
def _allocate_traffic(
traffic_split: Dict[str, int],
traffic_percentage: int,
) -> Dict[str, int]:
"""Allocates desired traffic to new deployed model and scales traffic
of older deployed models.
Args:
traffic_split (Dict[str, int]):
Required. Current traffic split of deployed models in endpoint.
traffic_percentage (int):
Required. Desired traffic to new deployed model.
Returns:
new_traffic_split (Dict[str, int]):
Traffic split to use.
"""
new_traffic_split = {}
old_models_traffic = 100 - traffic_percentage
if old_models_traffic:
unallocated_traffic = old_models_traffic
for deployed_model in traffic_split:
current_traffic = traffic_split[deployed_model]
new_traffic = int(current_traffic / 100 * old_models_traffic)
new_traffic_split[deployed_model] = new_traffic
unallocated_traffic -= new_traffic
# will likely under-allocate. make total 100.
for deployed_model in new_traffic_split:
if unallocated_traffic == 0:
break
new_traffic_split[deployed_model] += 1
unallocated_traffic -= 1
new_traffic_split[_DEPLOYING_MODEL_TRAFFIC_SPLIT_KEY] = traffic_percentage
return new_traffic_split
@staticmethod
def _unallocate_traffic(
traffic_split: Dict[str, int],
deployed_model_id: str,
) -> Dict[str, int]:
"""Sets deployed model id's traffic to 0 and scales the traffic of
other deployed models.
Args:
traffic_split (Dict[str, int]):
Required. Current traffic split of deployed models in endpoint.
deployed_model_id (str):
Required. Desired traffic to new deployed model.
Returns:
new_traffic_split (Dict[str, int]):
Traffic split to use.
"""
new_traffic_split = traffic_split.copy()
del new_traffic_split[deployed_model_id]
deployed_model_id_traffic = traffic_split[deployed_model_id]
traffic_percent_left = 100 - deployed_model_id_traffic
if traffic_percent_left:
unallocated_traffic = 100
for deployed_model in new_traffic_split:
current_traffic = traffic_split[deployed_model]
new_traffic = int(current_traffic / traffic_percent_left * 100)
new_traffic_split[deployed_model] = new_traffic
unallocated_traffic -= new_traffic
# will likely under-allocate. make total 100.
for deployed_model in new_traffic_split:
if unallocated_traffic == 0:
break
new_traffic_split[deployed_model] += 1
unallocated_traffic -= 1
new_traffic_split[deployed_model_id] = 0
return new_traffic_split
@staticmethod
def _validate_deploy_args(
min_replica_count: Optional[int],
max_replica_count: Optional[int],
accelerator_type: Optional[str],
deployed_model_display_name: Optional[str],
traffic_split: Optional[Dict[str, int]],
traffic_percentage: Optional[int],
deployment_resource_pool: Optional[DeploymentResourcePool],
required_replica_count: Optional[int],
):
"""Helper method to validate deploy arguments.
Args:
min_replica_count (int):
Required. The minimum number of machine replicas this deployed
model will be always deployed on. If traffic against it increases,
it may dynamically be deployed onto more replicas, and as traffic
decreases, some of these extra replicas may be freed.
max_replica_count (int):
Required. The maximum number of replicas this deployed model may
be deployed on when the traffic against it increases. If requested
value is too large, the deployment will error, but if deployment
succeeds then the ability to scale the model to that many replicas
is guaranteed (barring service outages). If traffic against the
deployed model increases beyond what its replicas at maximum may
handle, a portion of the traffic will be dropped. If this value
is not provided, the larger value of min_replica_count or 1 will
be used. If value provided is smaller than min_replica_count, it
will automatically be increased to be min_replica_count.
accelerator_type (str):
Required. Hardware accelerator type. One of ACCELERATOR_TYPE_UNSPECIFIED,
NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4,
NVIDIA_TESLA_T4
deployed_model_display_name (str):
Required. The display name of the DeployedModel. If not provided
upon creation, the Model's display_name is used.
traffic_split (Dict[str, int]):
Optional. A map from a DeployedModel's ID to the percentage of
this Endpoint's traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives
no traffic. The traffic percentage values must add up to 100, or
map must be empty if the Endpoint is to not accept any traffic at
the moment. Key for model being deployed is "0". Should not be
provided if traffic_percentage is provided.
traffic_percentage (int):
Optional. Desired traffic to newly deployed model. Defaults to
0 if there are pre-existing deployed models. Defaults to 100 if
there are no pre-existing deployed models. Negative values should
not be provided. Traffic of previously deployed models at the endpoint
will be scaled down to accommodate new deployed model's traffic.
Should not be provided if traffic_split is provided.
deployment_resource_pool (DeploymentResourcePool): Optional.
Resource pool where the model will be deployed. All models that
are deployed to the same DeploymentResourcePool will be hosted in
a shared model server. If provided, will override replica count
arguments.
required_replica_count (int):
Optional. Number of required available replicas for the
deployment to succeed. This field is only needed when partial
model deployment/mutation is desired, with a value greater than
or equal to 1 and fewer than or equal to min_replica_count. If
set, the model deploy/mutate operation will succeed once
available_replica_count reaches required_replica_count, and the
rest of the replicas will be retried.
Raises:
ValueError: if Min or Max replica is negative. Traffic percentage > 100 or
< 0. Or if traffic_split does not sum to 100.
"""
if deployment_resource_pool:
# Validate that replica count and deployment resource pool are not
# both specified.
if (
min_replica_count
and min_replica_count != 1
or max_replica_count
and max_replica_count != 1
or required_replica_count
and required_replica_count != 0
):
raise ValueError(
"Ignoring explicitly specified replica counts, "
"since deployment_resource_pool was also given."
)
if accelerator_type:
raise ValueError(
"Conflicting deployment parameters were given."
"deployment_resource_pool may not be specified at the same"
"time as accelerator_type. "
)
else:
# Validate that a non-negative replica count is given, and validate
# the accelerator type.
if min_replica_count < 0:
raise ValueError("Min replica cannot be negative.")
if max_replica_count < 0:
raise ValueError("Max replica cannot be negative.")
if required_replica_count and required_replica_count < 0:
raise ValueError("Required replica cannot be negative.")
if accelerator_type:
utils.validate_accelerator_type(accelerator_type)
if deployed_model_display_name is not None:
utils.validate_display_name(deployed_model_display_name)
if traffic_split is None:
if traffic_percentage > 100:
raise ValueError("Traffic percentage cannot be greater than 100.")
if traffic_percentage < 0:
raise ValueError("Traffic percentage cannot be negative.")
elif traffic_split:
if sum(traffic_split.values()) != 100:
raise ValueError(
"Sum of all traffic within traffic split needs to be 100."
)
def deploy(
self,
model: "Model",
deployed_model_display_name: Optional[str] = None,
traffic_percentage: int = 0,
traffic_split: Optional[Dict[str, int]] = None,
machine_type: Optional[str] = None,
min_replica_count: int = 1,
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
service_account: Optional[str] = None,
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
explanation_parameters: Optional[
aiplatform.explain.ExplanationParameters
] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
sync=True,
deploy_request_timeout: Optional[float] = None,
autoscaling_target_cpu_utilization: Optional[int] = None,
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
enable_access_logging=False,
disable_container_logging: bool = False,
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
reservation_affinity_type: Optional[str] = None,
reservation_affinity_key: Optional[str] = None,
reservation_affinity_values: Optional[List[str]] = None,
spot: bool = False,
fast_tryout_enabled: bool = False,
system_labels: Optional[Dict[str, str]] = None,
required_replica_count: Optional[int] = 0,
) -> None:
"""Deploys a Model to the Endpoint.
Args:
model (aiplatform.Model):
Required. Model to be deployed.
deployed_model_display_name (str):
Optional. The display name of the DeployedModel. If not provided
upon creation, the Model's display_name is used.
traffic_percentage (int):
Optional. Desired traffic to newly deployed model. Defaults to
0 if there are pre-existing deployed models. Defaults to 100 if
there are no pre-existing deployed models. Negative values should
not be provided. Traffic of previously deployed models at the endpoint
will be scaled down to accommodate new deployed model's traffic.
Should not be provided if traffic_split is provided.
traffic_split (Dict[str, int]):
Optional. A map from a DeployedModel's ID to the percentage of
this Endpoint's traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives
no traffic. The traffic percentage values must add up to 100, or
map must be empty if the Endpoint is to not accept any traffic at
the moment. Key for model being deployed is "0". Should not be
provided if traffic_percentage is provided.
machine_type (str):
Optional. The type of machine. Not specifying machine type will
result in model to be deployed with automatic resources.
min_replica_count (int):
Optional. The minimum number of machine replicas this deployed
model will be always deployed on. If traffic against it increases,
it may dynamically be deployed onto more replicas, and as traffic
decreases, some of these extra replicas may be freed.
max_replica_count (int):
Optional. The maximum number of replicas this deployed model may
be deployed on when the traffic against it increases. If requested
value is too large, the deployment will error, but if deployment
succeeds then the ability to scale the model to that many replicas
is guaranteed (barring service outages). If traffic against the
deployed model increases beyond what its replicas at maximum may
handle, a portion of the traffic will be dropped. If this value
is not provided, the larger value of min_replica_count or 1 will
be used. If value provided is smaller than min_replica_count, it
will automatically be increased to be min_replica_count.
accelerator_type (str):
Optional. Hardware accelerator type. Must also set accelerator_count if used.
One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Required for CloudTPU multihost deployments.
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
specified, the container runs as a service account that doesn't have access
to the resource project.
Users deploying the Model must have the `iam.serviceAccounts.actAs`
permission on this service account.
explanation_metadata (aiplatform.explain.ExplanationMetadata):
Optional. Metadata describing the Model's input and output for explanation.
`explanation_metadata` is optional while `explanation_parameters` must be
specified when used.
For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
explanation_parameters (aiplatform.explain.ExplanationParameters):
Optional. Parameters to configure explaining for Model's predictions.
For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
deploy_request_timeout (float):
Optional. The timeout for the deploy request in seconds.
autoscaling_target_cpu_utilization (int):
Target CPU Utilization to use for Autoscaling Replicas.
A default value of 60 will be used if not specified.
autoscaling_target_accelerator_duty_cycle (int):
Target Accelerator Duty Cycle.
Must also set accelerator_type and accelerator_count if specified.
A default value of 60 will be used if not specified.
enable_access_logging (bool):
Whether to enable endpoint access logging. Defaults to False.
disable_container_logging (bool):
If True, container logs from the deployed model will not be
written to Cloud Logging. Defaults to False.
deployment_resource_pool (DeploymentResourcePool):
Resource pool where the model will be deployed. All models that
are deployed to the same DeploymentResourcePool will be hosted in
a shared model server. If provided, will override replica count
arguments.
reservation_affinity_type (str):
Optional. The type of reservation affinity.
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
reservation_affinity_key (str):
Optional. Corresponds to the label key of a reservation resource.
To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
and specify the name of your reservation as its value.
reservation_affinity_values (List[str]):
Optional. Corresponds to the label values of a reservation resource.
This must be the full resource name of the reservation.
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
spot (bool):
Optional. Whether to schedule the deployment workload on spot VMs.
fast_tryout_enabled (bool):
Optional. Defaults to False.
If True, model will be deployed using faster deployment path.
Useful for quick experiments. Not for production workloads. Only
available for most popular models with certain machine types.
system_labels (Dict[str, str]):
Optional. System labels to apply to Model Garden deployments.
System labels are managed by Google for internal use only.
required_replica_count (int):
Optional. Number of required available replicas for the
deployment to succeed. This field is only needed when partial
model deployment/mutation is desired, with a value greater than
or equal to 1 and fewer than or equal to min_replica_count. If
set, the model deploy/mutate operation will succeed once
available_replica_count reaches required_replica_count, and the
rest of the replicas will be retried.
"""
self._sync_gca_resource_if_skipped()
self._validate_deploy_args(
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
deployed_model_display_name=deployed_model_display_name,
traffic_split=traffic_split,
traffic_percentage=traffic_percentage,
deployment_resource_pool=deployment_resource_pool,
required_replica_count=required_replica_count,
)
explanation_spec = _explanation_utils.create_and_validate_explanation_spec(
explanation_metadata=explanation_metadata,
explanation_parameters=explanation_parameters,
)
self._deploy(
model=model,
deployed_model_display_name=deployed_model_display_name,
traffic_percentage=traffic_percentage,
traffic_split=traffic_split,
machine_type=machine_type,
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
tpu_topology=tpu_topology,
reservation_affinity_type=reservation_affinity_type,
reservation_affinity_key=reservation_affinity_key,
reservation_affinity_values=reservation_affinity_values,
service_account=service_account,
explanation_spec=explanation_spec,
metadata=metadata,
sync=sync,
deploy_request_timeout=deploy_request_timeout,
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
spot=spot,
enable_access_logging=enable_access_logging,
disable_container_logging=disable_container_logging,
deployment_resource_pool=deployment_resource_pool,
fast_tryout_enabled=fast_tryout_enabled,
system_labels=system_labels,
required_replica_count=required_replica_count,
)
@base.optional_sync()
def _deploy(
self,
model: "Model",
deployed_model_display_name: Optional[str] = None,
traffic_percentage: Optional[int] = 0,
traffic_split: Optional[Dict[str, int]] = None,
machine_type: Optional[str] = None,
min_replica_count: int = 1,
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
reservation_affinity_type: Optional[str] = None,
reservation_affinity_key: Optional[str] = None,
reservation_affinity_values: Optional[List[str]] = None,
service_account: Optional[str] = None,
explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
sync=True,
deploy_request_timeout: Optional[float] = None,
autoscaling_target_cpu_utilization: Optional[int] = None,
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
spot: bool = False,
enable_access_logging=False,
disable_container_logging: bool = False,
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
fast_tryout_enabled: bool = False,
system_labels: Optional[Dict[str, str]] = None,
required_replica_count: Optional[int] = 0,
) -> None:
"""Deploys a Model to the Endpoint.
Args:
model (aiplatform.Model):
Required. Model to be deployed.
deployed_model_display_name (str):
Optional. The display name of the DeployedModel. If not provided
upon creation, the Model's display_name is used.
traffic_percentage (int):
Optional. Desired traffic to newly deployed model. Defaults to
0 if there are pre-existing deployed models. Defaults to 100 if
there are no pre-existing deployed models. Negative values should
not be provided. Traffic of previously deployed models at the endpoint
will be scaled down to accommodate new deployed model's traffic.
Should not be provided if traffic_split is provided.
traffic_split (Dict[str, int]):
Optional. A map from a DeployedModel's ID to the percentage of
this Endpoint's traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives
no traffic. The traffic percentage values must add up to 100, or
map must be empty if the Endpoint is to not accept any traffic at
the moment. Key for model being deployed is "0". Should not be
provided if traffic_percentage is provided.
machine_type (str):
Optional. The type of machine. Not specifying machine type will
result in model to be deployed with automatic resources.
min_replica_count (int):
Optional. The minimum number of machine replicas this deployed
model will be always deployed on. If traffic against it increases,
it may dynamically be deployed onto more replicas, and as traffic
decreases, some of these extra replicas may be freed.
max_replica_count (int):
Optional. The maximum number of replicas this deployed model may
be deployed on when the traffic against it increases. If requested
value is too large, the deployment will error, but if deployment
succeeds then the ability to scale the model to that many replicas
is guaranteed (barring service outages). If traffic against the
deployed model increases beyond what its replicas at maximum may
handle, a portion of the traffic will be dropped. If this value
is not provided, the larger value of min_replica_count or 1 will
be used. If value provided is smaller than min_replica_count, it
will automatically be increased to be min_replica_count.
accelerator_type (str):
Optional. Hardware accelerator type. Must also set accelerator_count if used.
One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Required for CloudTPU multihost deployments.
reservation_affinity_type (str):
Optional. The type of reservation affinity.
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
reservation_affinity_key (str):
Optional. Corresponds to the label key of a reservation resource.
To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
and specify the name of your reservation as its value.
reservation_affinity_values (List[str]):
Optional. Corresponds to the label values of a reservation resource.
This must be the full resource name of the reservation.
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
specified, the container runs as a service account that doesn't have access
to the resource project.
Users deploying the Model must have the `iam.serviceAccounts.actAs`
permission on this service account.
explanation_spec (aiplatform.explain.ExplanationSpec):
Optional. Specification of Model explanation.
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
deploy_request_timeout (float):
Optional. The timeout for the deploy request in seconds.
autoscaling_target_cpu_utilization (int):
Target CPU Utilization to use for Autoscaling Replicas.
A default value of 60 will be used if not specified.
autoscaling_target_accelerator_duty_cycle (int):
Target Accelerator Duty Cycle.
Must also set accelerator_type and accelerator_count if specified.
A default value of 60 will be used if not specified.
spot (bool):
Optional. Whether to schedule the deployment workload on spot VMs.
enable_access_logging (bool):
Whether to enable endpoint access logging. Defaults to False.
disable_container_logging (bool):
If True, container logs from the deployed model will not be
written to Cloud Logging. Defaults to False.
deployment_resource_pool (DeploymentResourcePool):
Resource pool where the model will be deployed. All models that
are deployed to the same DeploymentResourcePool will be hosted in
a shared model server. If provided, will override replica count
arguments.
fast_tryout_enabled (bool):
Optional. Defaults to False.
If True, model will be deployed using faster deployment path.
Useful for quick experiments. Not for production workloads. Only
available for most popular models with certain machine types.
system_labels (Dict[str, str]):
Optional. System labels to apply to Model Garden deployments.
System labels are managed by Google for internal use only.
required_replica_count (int):
Optional. Number of required available replicas for the
deployment to succeed. This field is only needed when partial
model deployment/mutation is desired, with a value greater than
or equal to 1 and fewer than or equal to min_replica_count. If
set, the model deploy/mutate operation will succeed once
available_replica_count reaches required_replica_count, and the
rest of the replicas will be retried.
"""
_LOGGER.log_action_start_against_resource(
f"Deploying Model {model.resource_name} to", "", self
)
self._deploy_call(
api_client=self.api_client,
endpoint_resource_name=self.resource_name,
model=model,
endpoint_resource_traffic_split=self._gca_resource.traffic_split,
network=self.network,
deployed_model_display_name=deployed_model_display_name,
traffic_percentage=traffic_percentage,
traffic_split=traffic_split,
machine_type=machine_type,
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
tpu_topology=tpu_topology,
reservation_affinity_type=reservation_affinity_type,
reservation_affinity_key=reservation_affinity_key,
reservation_affinity_values=reservation_affinity_values,
service_account=service_account,
explanation_spec=explanation_spec,
metadata=metadata,
deploy_request_timeout=deploy_request_timeout,
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
spot=spot,
enable_access_logging=enable_access_logging,
disable_container_logging=disable_container_logging,
deployment_resource_pool=deployment_resource_pool,
fast_tryout_enabled=fast_tryout_enabled,
system_labels=system_labels,
required_replica_count=required_replica_count,
)
_LOGGER.log_action_completed_against_resource("model", "deployed", self)
self._sync_gca_resource()
@classmethod
def _deploy_call(
cls,
api_client: endpoint_service_client.EndpointServiceClient,
endpoint_resource_name: str,
model: "Model",
endpoint_resource_traffic_split: Optional[proto.MapField] = None,
network: Optional[str] = None,
deployed_model_display_name: Optional[str] = None,
traffic_percentage: Optional[int] = 0,
traffic_split: Optional[Dict[str, int]] = None,
machine_type: Optional[str] = None,
min_replica_count: int = 1,
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
reservation_affinity_type: Optional[str] = None,
reservation_affinity_key: Optional[str] = None,
reservation_affinity_values: Optional[List[str]] = None,
service_account: Optional[str] = None,
explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
deploy_request_timeout: Optional[float] = None,
autoscaling_target_cpu_utilization: Optional[int] = None,
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
autoscaling_target_request_count_per_minute: Optional[int] = None,
spot: bool = False,
enable_access_logging=False,
disable_container_logging: bool = False,
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
fast_tryout_enabled: bool = False,
system_labels: Optional[Dict[str, str]] = None,
required_replica_count: Optional[int] = 0,
) -> None:
"""Helper method to deploy model to endpoint.
Args:
api_client (endpoint_service_client.EndpointServiceClient):
Required. endpoint_service_client.EndpointServiceClient to make call.
endpoint_resource_name (str):
Required. Endpoint resource name to deploy model to.
model (aiplatform.Model):
Required. Model to be deployed.
endpoint_resource_traffic_split (proto.MapField):
Optional. Endpoint current resource traffic split.
network (str):
Optional. The full name of the Compute Engine network to which
this Endpoint will be peered. E.g. "projects/123/global/networks/my_vpc".
Private services access must already be configured for the network.
deployed_model_display_name (str):
Optional. The display name of the DeployedModel. If not provided
upon creation, the Model's display_name is used.
traffic_percentage (int):
Optional. Desired traffic to newly deployed model. Defaults to
0 if there are pre-existing deployed models. Defaults to 100 if
there are no pre-existing deployed models. Negative values should
not be provided. Traffic of previously deployed models at the endpoint
will be scaled down to accommodate new deployed model's traffic.
Should not be provided if traffic_split is provided.
traffic_split (Dict[str, int]):
Optional. A map from a DeployedModel's ID to the percentage of
this Endpoint's traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives
no traffic. The traffic percentage values must add up to 100, or
map must be empty if the Endpoint is to not accept any traffic at
the moment. Key for model being deployed is "0". Should not be
provided if traffic_percentage is provided.
machine_type (str):
Optional. The type of machine. Not specifying machine type will
result in model to be deployed with automatic resources.
min_replica_count (int):
Optional. The minimum number of machine replicas this deployed
model will be always deployed on. If traffic against it increases,
it may dynamically be deployed onto more replicas, and as traffic
decreases, some of these extra replicas may be freed.
max_replica_count (int):
Optional. The maximum number of replicas this deployed model may
be deployed on when the traffic against it increases. If requested
value is too large, the deployment will error, but if deployment
succeeds then the ability to scale the model to that many replicas
is guaranteed (barring service outages). If traffic against the
deployed model increases beyond what its replicas at maximum may
handle, a portion of the traffic will be dropped. If this value
is not provided, the larger value of min_replica_count or 1 will
be used. If value provided is smaller than min_replica_count, it
will automatically be increased to be min_replica_count.
accelerator_type (str):
Optional. Hardware accelerator type. Must also set accelerator_count if used.
One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Required for CloudTPU multihost deployments.
reservation_affinity_type (str):
Optional. The type of reservation affinity.
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
reservation_affinity_key (str):
Optional. Corresponds to the label key of a reservation resource.
To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
and specify the name of your reservation as its value.
reservation_affinity_values (List[str]):
Optional. Corresponds to the label values of a reservation resource.
This must be the full resource name of the reservation.
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
specified, the container runs as a service account that doesn't have access
to the resource project.
Users deploying the Model must have the `iam.serviceAccounts.actAs`
permission on this service account.
If not specified, uses the service account set in aiplatform.init.
explanation_spec (aiplatform.explain.ExplanationSpec):
Optional. Specification of Model explanation.
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
deploy_request_timeout (float):
Optional. The timeout for the deploy request in seconds.
autoscaling_target_cpu_utilization (int):
Optional. Target CPU Utilization to use for Autoscaling Replicas.
A default value of 60 will be used if not specified.
autoscaling_target_accelerator_duty_cycle (int):
Optional. Target Accelerator Duty Cycle.
Must also set accelerator_type and accelerator_count if specified.
A default value of 60 will be used if not specified.
autoscaling_target_request_count_per_minute (int):
Optional. Target request count per minute per instance.
spot (bool):
Optional. Whether to schedule the deployment workload on spot VMs.
enable_access_logging (bool):
Whether to enable endpoint access logging. Defaults to False.
disable_container_logging (bool):
If True, container logs from the deployed model will not be
written to Cloud Logging. Defaults to False.
deployment_resource_pool (DeploymentResourcePool):
Resource pool where the model will be deployed. All models that
are deployed to the same DeploymentResourcePool will be hosted in
a shared model server. If provided, will override replica count
arguments.
fast_tryout_enabled (bool):
Optional. Defaults to False.
If True, model will be deployed using faster deployment path.
Useful for quick experiments. Not for production workloads. Only
available for most popular models with certain machine types.
system_labels (Dict[str, str]):
Optional. System labels to apply to Model Garden deployments.
System labels are managed by Google for internal use only.
required_replica_count (int):
Optional. Number of required available replicas for the
deployment to succeed. This field is only needed when partial
model deployment/mutation is desired, with a value greater than
or equal to 1 and fewer than or equal to min_replica_count. If
set, the model deploy/mutate operation will succeed once
available_replica_count reaches required_replica_count, and the
rest of the replicas will be retried.
Raises:
ValueError: If only `accelerator_type` or `accelerator_count` is specified.
ValueError: If model does not support deployment.
ValueError: If there is not current traffic split and traffic percentage
is not 0 or 100.
ValueError: If `deployment_resource_pool` and a custom machine spec
are both present.
ValueError: If both `explanation_spec` and `deployment_resource_pool`
are present.
"""
service_account = service_account or initializer.global_config.service_account
if deployment_resource_pool:
deployed_model = gca_endpoint_compat.DeployedModel(
model=model.versioned_resource_name,
display_name=deployed_model_display_name,
service_account=service_account,
disable_container_logging=disable_container_logging,
)
if system_labels:
deployed_model.system_labels = system_labels
supports_shared_resources = (
gca_model_compat.Model.DeploymentResourcesType.SHARED_RESOURCES
in model.supported_deployment_resources_types
)
if not supports_shared_resources:
raise ValueError(
"`deployment_resource_pool` may only be specified for models "
" which support shared resources."
)
provided_custom_machine_spec = (
machine_type
or accelerator_type
or accelerator_count
or autoscaling_target_accelerator_duty_cycle
or autoscaling_target_cpu_utilization
or autoscaling_target_request_count_per_minute
)
if provided_custom_machine_spec:
raise ValueError(
"Conflicting parameters in deployment request. "
"The machine_type, accelerator_type and accelerator_count, "
"autoscaling_target_accelerator_duty_cycle, "
"autoscaling_target_cpu_utilization, "
"autoscaling_target_request_count_per_minute parameters "
"may not be set when `deployment_resource_pool` is "
"specified."
)
deployed_model.shared_resources = deployment_resource_pool.resource_name
if explanation_spec:
raise ValueError(
"Model explanation is not supported for deployments using "
"shared resources."
)
else:
max_replica_count = max(min_replica_count, max_replica_count)
if bool(accelerator_type) != bool(accelerator_count):
raise ValueError(
"Both `accelerator_type` and `accelerator_count` should be specified or None."
)
if autoscaling_target_accelerator_duty_cycle is not None and (
not accelerator_type or not accelerator_count
):
raise ValueError(
"Both `accelerator_type` and `accelerator_count` should be set "
"when specifying autoscaling_target_accelerator_duty_cycle`"
)
deployed_model = gca_endpoint_compat.DeployedModel(
model=model.versioned_resource_name,
display_name=deployed_model_display_name,
service_account=service_account,
enable_access_logging=enable_access_logging,
disable_container_logging=disable_container_logging,
)
if system_labels:
deployed_model.system_labels = system_labels
supports_automatic_resources = (
gca_model_compat.Model.DeploymentResourcesType.AUTOMATIC_RESOURCES
in model.supported_deployment_resources_types
)
supports_dedicated_resources = (
gca_model_compat.Model.DeploymentResourcesType.DEDICATED_RESOURCES
in model.supported_deployment_resources_types
)
provided_custom_machine_spec = (
machine_type
or accelerator_type
or accelerator_count
or autoscaling_target_accelerator_duty_cycle
or autoscaling_target_cpu_utilization
or autoscaling_target_request_count_per_minute
)
# If the model supports both automatic and dedicated deployment resources,
# decide based on the presence of machine spec customizations
use_dedicated_resources = supports_dedicated_resources and (
not supports_automatic_resources or provided_custom_machine_spec
)
if provided_custom_machine_spec and not use_dedicated_resources:
_LOGGER.info(
"Model does not support dedicated deployment resources. "
"The machine_type, accelerator_type and accelerator_count, "
"autoscaling_target_accelerator_duty_cycle, "
"autoscaling_target_cpu_utilization, "
"autoscaling_target_request_count_per_minute parameters "
"are ignored."
)
if use_dedicated_resources and not machine_type:
machine_type = _DEFAULT_MACHINE_TYPE
_LOGGER.info(f"Using default machine_type: {machine_type}")
if use_dedicated_resources:
dedicated_resources = gca_machine_resources_compat.DedicatedResources(
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
spot=spot,
required_replica_count=required_replica_count,
)
machine_spec = gca_machine_resources_compat.MachineSpec(
machine_type=machine_type
)
if autoscaling_target_cpu_utilization:
autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
metric_name="aiplatform.googleapis.com/prediction/online/cpu/utilization",
target=autoscaling_target_cpu_utilization,
)
dedicated_resources.autoscaling_metric_specs.extend(
[autoscaling_metric_spec]
)
if accelerator_type and accelerator_count:
utils.validate_accelerator_type(accelerator_type)
machine_spec.accelerator_type = accelerator_type
machine_spec.accelerator_count = accelerator_count
if autoscaling_target_accelerator_duty_cycle:
autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
metric_name="aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle",
target=autoscaling_target_accelerator_duty_cycle,
)
dedicated_resources.autoscaling_metric_specs.extend(
[autoscaling_metric_spec]
)
if autoscaling_target_request_count_per_minute:
autoscaling_metric_spec = (
gca_machine_resources_compat.AutoscalingMetricSpec(
metric_name=(
"aiplatform.googleapis.com/prediction/online/"
"request_count"
),
target=autoscaling_target_request_count_per_minute,
)
)
dedicated_resources.autoscaling_metric_specs.extend(
[autoscaling_metric_spec]
)
if reservation_affinity_type:
machine_spec.reservation_affinity = utils.get_reservation_affinity(
reservation_affinity_type,
reservation_affinity_key,
reservation_affinity_values,
)
if tpu_topology is not None:
machine_spec.tpu_topology = tpu_topology
dedicated_resources.machine_spec = machine_spec
deployed_model.dedicated_resources = dedicated_resources
if fast_tryout_enabled:
deployed_model.faster_deployment_config = (
gca_endpoint_compat.FasterDeploymentConfig(
fast_tryout_enabled=fast_tryout_enabled
)
)
elif supports_automatic_resources:
deployed_model.automatic_resources = (
gca_machine_resources_compat.AutomaticResources(
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
)
)
else:
_LOGGER.warning(
"Model does not support deployment. "
"See https://cloud.google.com/vertex-ai/docs/reference/rpc/google.cloud.aiplatform.v1#google.cloud.aiplatform.v1.Model.FIELDS.repeated.google.cloud.aiplatform.v1.Model.DeploymentResourcesType.google.cloud.aiplatform.v1.Model.supported_deployment_resources_types"
)
deployed_model.explanation_spec = explanation_spec
# Checking if traffic percentage is valid
# TODO(b/221059294) PrivateEndpoint should support traffic split
if traffic_split is None and not network:
# new model traffic needs to be 100 if no pre-existing models
if not endpoint_resource_traffic_split:
# default scenario
if traffic_percentage == 0:
traffic_percentage = 100
# verify user specified 100
elif traffic_percentage < 100:
raise ValueError(
"""There are currently no deployed models so the traffic
percentage for this deployed model needs to be 100."""
)
traffic_split = cls._allocate_traffic(
traffic_split=dict(endpoint_resource_traffic_split),
traffic_percentage=traffic_percentage,
)
operation_future = api_client.deploy_model(
endpoint=endpoint_resource_name,
deployed_model=deployed_model,
traffic_split=traffic_split,
metadata=metadata,
timeout=deploy_request_timeout,
)
_LOGGER.log_action_started_against_resource_with_lro(
"Deploy", "model", cls, operation_future
)
operation_future.result(timeout=None)
def undeploy(
self,
deployed_model_id: str,
traffic_split: Optional[Dict[str, int]] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
sync=True,
) -> None:
"""Undeploys a deployed model.
The model to be undeployed should have no traffic or user must provide
a new traffic_split with the remaining deployed models. Refer
to `Endpoint.traffic_split` for the current traffic split mapping.
Args:
deployed_model_id (str):
Required. The ID of the DeployedModel to be undeployed from the
Endpoint.
traffic_split (Dict[str, int]):
Optional. A map of DeployedModel IDs to the percentage of
this Endpoint's traffic that should be forwarded to that DeployedModel.
Required if undeploying a model with non-zero traffic from an Endpoint
with multiple deployed models. The traffic percentage values must add
up to 100, or map must be empty if the Endpoint is to not accept any traffic
at the moment. If a DeployedModel's ID is not listed in this map, then it
receives no traffic.
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
"""
self._sync_gca_resource_if_skipped()
if traffic_split is not None:
if deployed_model_id in traffic_split and traffic_split[deployed_model_id]:
raise ValueError("Model being undeployed should have 0 traffic.")
if sum(traffic_split.values()) != 100:
raise ValueError(
"Sum of all traffic within traffic split needs to be 100."
)
# Two or more models deployed to Endpoint and remaining traffic will be zero
elif (
len(self.traffic_split) > 1
and deployed_model_id in self._gca_resource.traffic_split
and self._gca_resource.traffic_split[deployed_model_id] == 100
):
raise ValueError(
f"Undeploying deployed model '{deployed_model_id}' would leave the remaining "
"traffic split at 0%. Traffic split must add up to 100% when models are "
"deployed. Please undeploy the other models first or provide an updated "
"traffic_split."
)
self._undeploy(
deployed_model_id=deployed_model_id,
traffic_split=traffic_split,
metadata=metadata,
sync=sync,
)
@base.optional_sync()
def _undeploy(
self,
deployed_model_id: str,
traffic_split: Optional[Dict[str, int]] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
sync=True,
) -> None:
"""Undeploys a deployed model.
Proportionally adjusts the traffic_split among the remaining deployed
models of the endpoint.
Args:
deployed_model_id (str):
Required. The ID of the DeployedModel to be undeployed from the
Endpoint.
traffic_split (Dict[str, int]):
Optional. A map from a DeployedModel's ID to the percentage of
this Endpoint's traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives
no traffic. The traffic percentage values must add up to 100, or
map must be empty if the Endpoint is to not accept any traffic at
the moment. Key for model being deployed is "0". Should not be
provided if traffic_percentage is provided.
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
"""
self._sync_gca_resource_if_skipped()
current_traffic_split = traffic_split or dict(self._gca_resource.traffic_split)
if deployed_model_id in current_traffic_split:
current_traffic_split = self._unallocate_traffic(
traffic_split=current_traffic_split,
deployed_model_id=deployed_model_id,
)
current_traffic_split.pop(deployed_model_id)
_LOGGER.log_action_start_against_resource("Undeploying", "model", self)
operation_future = self.api_client.undeploy_model(
endpoint=self.resource_name,
deployed_model_id=deployed_model_id,
traffic_split=current_traffic_split,
metadata=metadata,
)
_LOGGER.log_action_started_against_resource_with_lro(
"Undeploy", "model", self.__class__, operation_future
)
# block before returning
operation_future.result()
_LOGGER.log_action_completed_against_resource("model", "undeployed", self)
# update local resource
self._sync_gca_resource()
def update(
self,
display_name: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
traffic_split: Optional[Dict[str, int]] = None,
request_metadata: Optional[Sequence[Tuple[str, str]]] = (),
update_request_timeout: Optional[float] = None,
) -> "Endpoint":
"""Updates an endpoint.
Example usage:
my_endpoint = my_endpoint.update(
display_name='my-updated-endpoint',
description='my updated description',
labels={'key': 'value'},
traffic_split={
'123456': 20,
'234567': 80,
},
)
Args:
display_name (str):
Optional. The display name of the Endpoint.
The name can be up to 128 characters long and can be consist of any UTF-8
characters.
description (str):
Optional. The description of the Endpoint.
labels (Dict[str, str]):
Optional. The labels with user-defined metadata to organize your Endpoints.
Label keys and values can be no longer than 64 characters
(Unicode codepoints), can only contain lowercase letters, numeric
characters, underscores and dashes. International characters are allowed.
See https://goo.gl/xmQnxf for more information and examples of labels.
traffic_split (Dict[str, int]):
Optional. A map from a DeployedModel's ID to the percentage of this Endpoint's
traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives no traffic.
The traffic percentage values must add up to 100, or map must be empty if
the Endpoint is to not accept any traffic at a moment.
request_metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as metadata.
update_request_timeout (float):
Optional. The timeout for the update request in seconds.
Returns:
Endpoint (aiplatform.Prediction):
Updated endpoint resource.
Raises:
ValueError: If `labels` is not the correct format.
"""
self.wait()
current_endpoint_proto = self.gca_resource
copied_endpoint_proto = current_endpoint_proto.__class__(current_endpoint_proto)
update_mask: List[str] = []
if display_name:
utils.validate_display_name(display_name)
copied_endpoint_proto.display_name = display_name
update_mask.append("display_name")
if description:
copied_endpoint_proto.description = description
update_mask.append("description")
if labels:
utils.validate_labels(labels)
copied_endpoint_proto.labels = labels
update_mask.append("labels")
if traffic_split:
update_mask.append("traffic_split")
copied_endpoint_proto.traffic_split = traffic_split
update_mask = field_mask_pb2.FieldMask(paths=update_mask)
_LOGGER.log_action_start_against_resource(
"Updating",
"endpoint",
self,
)
self._gca_resource = self.api_client.update_endpoint(
endpoint=copied_endpoint_proto,
update_mask=update_mask,
metadata=request_metadata,
timeout=update_request_timeout,
)
_LOGGER.log_action_completed_against_resource("endpoint", "updated", self)
return self
def predict(
self,
instances: List,
parameters: Optional[Dict] = None,
timeout: Optional[float] = None,
use_raw_predict: Optional[bool] = False,
*,
use_dedicated_endpoint: Optional[bool] = False,
) -> Prediction:
"""Make a prediction against this Endpoint.
For dedicated endpoint, set use_dedicated_endpoint = True:
```
response = my_endpoint.predict(instances=[...],
use_dedicated_endpoint=True)
my_predictions = response.predictions
```
Args:
instances (List):
Required. The instances that are the input to the
prediction call. A DeployedModel may have an upper limit
on the number of instances it supports per request, and
when it is exceeded the prediction call errors in case
of AutoML Models, or, in case of customer created
Models, the behaviour is as documented by that Model.
The schema of any single instance may be specified via
Endpoint's DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``instance_schema_uri``.
parameters (Dict):
The parameters that govern the prediction. The schema of
the parameters may be specified via Endpoint's
DeployedModels' [Model's
][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``parameters_schema_uri``.
timeout (float): Optional. The timeout for this request in seconds.
use_raw_predict (bool):
Optional. Default value is False. If set to True, the underlying prediction call will be made
against Endpoint.raw_predict().
use_dedicated_endpoint (bool):
Optional. Default value is False. If set to True, the underlying prediction call will be made
using the dedicated endpoint dns.
Returns:
prediction (aiplatform.Prediction):
Prediction with returned predictions and Model ID.
Raises:
ImportError: If there is an issue importing the `TCPKeepAliveAdapter` package.
"""
self.wait()
if use_raw_predict:
raw_predict_response = self.raw_predict(
body=json.dumps({"instances": instances, "parameters": parameters}),
headers={"Content-Type": "application/json"},
use_dedicated_endpoint=use_dedicated_endpoint,
timeout=timeout,
)
json_response = raw_predict_response.json()
return Prediction(
predictions=json_response["predictions"],
metadata=json_response.get("metadata"),
deployed_model_id=raw_predict_response.headers[
_RAW_PREDICT_DEPLOYED_MODEL_ID_KEY
],
model_resource_name=raw_predict_response.headers[
_RAW_PREDICT_MODEL_RESOURCE_KEY
],
model_version_id=raw_predict_response.headers.get(
_RAW_PREDICT_MODEL_VERSION_ID_KEY, None
),
)
if use_dedicated_endpoint:
self._sync_gca_resource_if_skipped()
if (
not self._gca_resource.dedicated_endpoint_enabled
or self._gca_resource.dedicated_endpoint_dns is None
):
raise ValueError(
"Dedicated endpoint is not enabled or DNS is empty."
"Please make sure endpoint has dedicated endpoint enabled"
"and model are ready before making a prediction."
)
try:
from requests_toolbelt.adapters.socket_options import (
TCPKeepAliveAdapter,
)
except ImportError:
raise ImportError(
"Cannot import the requests-toolbelt library. Please install requests-toolbelt."
)
if not self.authorized_session:
self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES
self.authorized_session = google_auth_requests.AuthorizedSession(
self.credentials
)
headers = {
"Content-Type": "application/json",
}
url = f"https://{self._gca_resource.dedicated_endpoint_dns}/v1/{self.resource_name}:predict"
# count * interval need to be larger than 1 hr (3600s)
keep_alive = TCPKeepAliveAdapter(idle=120, count=100, interval=100)
self.authorized_session.mount("https://", keep_alive)
response = self.authorized_session.post(
url=url,
data=json.dumps(
{
"instances": instances,
"parameters": parameters,
}
),
headers=headers,
timeout=timeout,
)
prediction_response = json.loads(response.text)
return Prediction(
predictions=prediction_response.get("predictions"),
metadata=prediction_response.get("metadata"),
deployed_model_id=prediction_response.get("deployedModelId"),
model_resource_name=prediction_response.get("model"),
model_version_id=prediction_response.get("modelVersionId"),
)
else:
prediction_response = self._prediction_client.predict(
endpoint=self._gca_resource.name,
instances=instances,
parameters=parameters,
timeout=timeout,
)
if prediction_response._pb.metadata:
metadata = json_format.MessageToDict(prediction_response._pb.metadata)
else:
metadata = None
return Prediction(
predictions=[
json_format.MessageToDict(item)
for item in prediction_response.predictions.pb
],
metadata=metadata,
deployed_model_id=prediction_response.deployed_model_id,
model_version_id=prediction_response.model_version_id,
model_resource_name=prediction_response.model,
)
async def predict_async(
self,
instances: List,
*,
parameters: Optional[Dict] = None,
timeout: Optional[float] = None,
) -> Prediction:
"""Make an asynchronous prediction against this Endpoint.
Example usage:
```
response = await my_endpoint.predict_async(instances=[...])
my_predictions = response.predictions
```
Args:
instances (List):
Required. The instances that are the input to the
prediction call. A DeployedModel may have an upper limit
on the number of instances it supports per request, and
when it is exceeded the prediction call errors in case
of AutoML Models, or, in case of customer created
Models, the behaviour is as documented by that Model.
The schema of any single instance may be specified via
Endpoint's DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``instance_schema_uri``.
parameters (Dict):
Optional. The parameters that govern the prediction. The schema of
the parameters may be specified via Endpoint's
DeployedModels' [Model's
][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``parameters_schema_uri``.
timeout (float): Optional. The timeout for this request in seconds.
Returns:
prediction (aiplatform.Prediction):
Prediction with returned predictions and Model ID.
"""
self.wait()
prediction_response = await self._prediction_async_client.predict(
endpoint=self._gca_resource.name,
instances=instances,
parameters=parameters,
timeout=timeout,
)
if prediction_response._pb.metadata:
metadata = json_format.MessageToDict(prediction_response._pb.metadata)
else:
metadata = None
return Prediction(
predictions=[
json_format.MessageToDict(item)
for item in prediction_response.predictions.pb
],
metadata=metadata,
deployed_model_id=prediction_response.deployed_model_id,
model_version_id=prediction_response.model_version_id,
model_resource_name=prediction_response.model,
)
def raw_predict(
self,
body: bytes,
headers: Dict[str, str],
*,
use_dedicated_endpoint: Optional[bool] = False,
timeout: Optional[float] = None,
) -> requests.models.Response:
"""Makes a prediction request using arbitrary headers.
Example usage:
my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
response = my_endpoint.raw_predict(
body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}'
headers = {'Content-Type':'application/json'}
)
# For dedicated endpoint:
response = my_endpoint.raw_predict(
body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}',
headers = {'Content-Type':'application/json'},
dedicated_endpoint=True,
)
status_code = response.status_code
results = json.dumps(response.text)
Args:
body (bytes):
The body of the prediction request in bytes. This must not exceed 1.5 mb per request.
headers (Dict[str, str]):
The header of the request as a dictionary. There are no restrictions on the header.
use_dedicated_endpoint (bool):
Optional. Default value is False. If set to True, the underlying prediction call will be made
using the dedicated endpoint dns.
timeout (float): Optional. The timeout for this request in seconds.
Returns:
A requests.models.Response object containing the status code and prediction results.
Raises:
ImportError: If there is an issue importing the `TCPKeepAliveAdapter` package.
"""
if not self.authorized_session:
self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES
self.authorized_session = google_auth_requests.AuthorizedSession(
self.credentials
)
if self.raw_predict_request_url is None:
self.raw_predict_request_url = f"https://{self.location}-{constants.base.API_BASE_PATH}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:rawPredict"
url = self.raw_predict_request_url
if use_dedicated_endpoint:
try:
from requests_toolbelt.adapters.socket_options import (
TCPKeepAliveAdapter,
)
except ImportError:
raise ImportError(
"Cannot import the requests-toolbelt library. Please install requests-toolbelt."
)
self._sync_gca_resource_if_skipped()
if (
not self._gca_resource.dedicated_endpoint_enabled
or self._gca_resource.dedicated_endpoint_dns is None
):
raise ValueError(
"Dedicated endpoint is not enabled or DNS is empty."
"Please make sure endpoint has dedicated endpoint enabled"
"and model are ready before making a prediction."
)
url = f"https://{self._gca_resource.dedicated_endpoint_dns}/v1/{self.resource_name}:rawPredict"
# count * interval need to be larger than 1 hr (3600s)
keep_alive = TCPKeepAliveAdapter(idle=120, count=100, interval=100)
self.authorized_session.mount("https://", keep_alive)
return self.authorized_session.post(
url=url, data=body, headers=headers, timeout=timeout
)
def stream_raw_predict(
self,
body: bytes,
headers: Dict[str, str],
*,
use_dedicated_endpoint: Optional[bool] = False,
timeout: Optional[float] = None,
) -> Iterator[requests.models.Response]:
"""Makes a streaming prediction request using arbitrary headers.
For custom model, this method is only supported for dedicated endpoint.
Example usage:
```
my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
for stream_response in my_endpoint.stream_raw_predict(
body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}'
headers = {'Content-Type':'application/json'}
):
status_code = response.status_code
stream_result = json.dumps(response.text)
```
For dedicated endpoint:
```
my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
for stream_response in my_endpoint.stream_raw_predict(
body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}',
headers = {'Content-Type':'application/json'},
use_dedicated_endpoint=True,
):
status_code = response.status_code
stream_result = json.dumps(response.text)
```
Args:
body (bytes):
The body of the prediction request in bytes. This must not
exceed 10 mb per request.
headers (Dict[str, str]):
The header of the request as a dictionary. There are no
restrictions on the header.
use_dedicated_endpoint (bool):
Optional. Default value is False. If set to True, the underlying prediction call will be made
using the dedicated endpoint dns.
timeout (float): Optional. The timeout for this request in seconds.
Yields:
predictions (Iterator[requests.models.Response]):
The streaming prediction results.
"""
if not self.authorized_session:
self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES
self.authorized_session = google_auth_requests.AuthorizedSession(
self.credentials
)
if self.stream_raw_predict_request_url is None:
self.stream_raw_predict_request_url = f"https://{self.location}-{constants.base.API_BASE_PATH}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:streamRawPredict"
url = self.stream_raw_predict_request_url
if use_dedicated_endpoint:
self._sync_gca_resource_if_skipped()
if (
not self._gca_resource.dedicated_endpoint_enabled
or self._gca_resource.dedicated_endpoint_dns is None
):
raise ValueError(
"Dedicated endpoint is not enabled or DNS is empty."
"Please make sure endpoint has dedicated endpoint enabled"
"and model are ready before making a prediction."
)
url = f"https://{self._gca_resource.dedicated_endpoint_dns}/v1/{self.resource_name}:streamRawPredict"
with self.authorized_session.post(
url=url,
data=body,
headers=headers,
timeout=timeout,
stream=True,
) as resp:
for line in resp.iter_lines():
yield line
def direct_predict(
self,
inputs: List,
parameters: Optional[Dict] = None,
timeout: Optional[float] = None,
) -> Prediction:
"""Makes a direct (gRPC) prediction against this Endpoint for a pre-built image.
Args:
inputs (List):
Required. The inputs that are the input to the prediction call.
A DeployedModel may have an upper limit on the number of
instances it supports per request, and when it is exceeded the
prediction call errors in case of AutoML Models, or, in case of
customer created Models, the behaviour is as documented by that
Model. The schema of any single instance may be specified via
Endpoint's DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``instance_schema_uri``.
parameters (Dict):
Optional. The parameters that govern the prediction. The schema
of the parameters may be specified via Endpoint's
DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``parameters_schema_uri``.
timeout (Optional[float]):
Optional. The timeout for this request in seconds.
Returns:
prediction (aiplatform.Prediction):
The resulting prediction.
"""
self.wait()
prediction_response = self._prediction_client.direct_predict(
request={
"endpoint": self._gca_resource.name,
"inputs": inputs,
"parameters": parameters,
},
timeout=timeout,
)
return Prediction(
predictions=[
json_format.MessageToDict(item)
for item in prediction_response.outputs.pb
],
metadata=None,
deployed_model_id=None,
model_version_id=None,
model_resource_name=None,
)
async def direct_predict_async(
self,
inputs: List,
*,
parameters: Optional[Dict] = None,
timeout: Optional[float] = None,
) -> Prediction:
"""Makes an asynchronous direct (gRPC) prediction against this Endpoint for a pre-built image.
Example usage:
```
response = await my_endpoint.direct_predict_async(inputs=[...])
my_predictions = response.predictions
```
Args:
inputs (List):
Required. The inputs that are the input to the prediction call.
A DeployedModel may have an upper limit on the number of
instances it supports per request, and when it is exceeded the
prediction call errors in case of AutoML Models, or, in case of
customer created Models, the behaviour is as documented by that
Model. The schema of any single instance may be specified via
Endpoint's DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``instance_schema_uri``.
parameters (Dict):
Optional. The parameters that govern the prediction. The schema
of the parameters may be specified via Endpoint's
DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``parameters_schema_uri``.
timeout (Optional[float]):
Optional. The timeout for this request in seconds.
Returns:
prediction (aiplatform.Prediction):
The resulting prediction.
"""
self.wait()
prediction_response = await self._prediction_async_client.direct_predict(
request={
"endpoint": self._gca_resource.name,
"inputs": inputs,
"parameters": parameters,
},
timeout=timeout,
)
return Prediction(
predictions=[
json_format.MessageToDict(item)
for item in prediction_response.outputs.pb
],
metadata=None,
deployed_model_id=None,
model_version_id=None,
model_resource_name=None,
)
def stream_direct_predict(
self,
inputs_iterator: Iterator[List],
parameters: Optional[Dict] = None,
timeout: Optional[float] = None,
) -> Iterator[Prediction]:
"""Makes a streaming direct (gRPC) prediction against this Endpoint for a pre-built image.
Args:
inputs_iterator (Iterator[List]):
Required. An iterator of the inputs that are the input to the
prediction call. A DeployedModel may have an upper limit on the
number of instances it supports per request, and when it is
exceeded the prediction call errors in case of AutoML Models, or,
in case of customer created Models, the behaviour is as
documented by that Model. The schema of any single instance may
be specified via Endpoint's DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``instance_schema_uri``.
parameters (Dict):
Optional. The parameters that govern the prediction. The schema
of the parameters may be specified via Endpoint's
DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``parameters_schema_uri``.
timeout (Optional[float]):
Optional. The timeout for this request in seconds.
Yields:
predictions (Iterator[aiplatform.Prediction]):
The resulting streamed predictions.
"""
self.wait()
for resp in self._prediction_client.stream_direct_predict(
requests=(
{
"endpoint": self._gca_resource.name,
"inputs": inputs,
"parameters": parameters,
}
for inputs in inputs_iterator
),
timeout=timeout,
):
yield Prediction(
predictions=[
json_format.MessageToDict(item) for item in resp.outputs.pb
],
metadata=None,
deployed_model_id=None,
model_version_id=None,
model_resource_name=None,
)
def direct_raw_predict(
self,
method_name: str,
request: bytes,
timeout: Optional[float] = None,
) -> Prediction:
"""Makes a direct (gRPC) prediction request using arbitrary headers for a custom container.
Example usage:
```
my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
response = my_endpoint.direct_raw_predict(request=b'...')
```
Args:
method_name (str):
Fully qualified name of the API method being invoked to perform
prediction.
request (bytes):
The body of the prediction request in bytes.
timeout (Optional[float]):
Optional. The timeout for this request in seconds.
Returns:
prediction (aiplatform.Prediction):
The resulting prediction.
"""
self.wait()
prediction_response = self._prediction_client.direct_raw_predict(
request={
"endpoint": self._gca_resource.name,
"method_name": method_name,
"input": request,
},
timeout=timeout,
)
return Prediction(
predictions=prediction_response.output,
metadata=None,
deployed_model_id=None,
model_version_id=None,
model_resource_name=None,
)
async def direct_raw_predict_async(
self,
method_name: str,
request: bytes,
timeout: Optional[float] = None,
) -> Prediction:
"""Makes a direct (gRPC) prediction request for a custom container.
Example usage:
```
my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
response = await my_endpoint.direct_raw_predict(request=b'...')
```
Args:
method_name (str):
Fully qualified name of the API method being invoked to perform
prediction.
request (bytes):
The body of the prediction request in bytes.
timeout (Optional[float]):
Optional. The timeout for this request in seconds.
Returns:
prediction (aiplatform.Prediction):
The resulting prediction.
"""
self.wait()
prediction_response = await self._prediction_async_client.direct_raw_predict(
request={
"endpoint": self._gca_resource.name,
"method_name": method_name,
"input": request,
},
timeout=timeout,
)
return Prediction(
predictions=prediction_response.output,
metadata=None,
deployed_model_id=None,
model_version_id=None,
model_resource_name=None,
)
def stream_direct_raw_predict(
self,
method_name: str,
requests: Iterator[bytes],
timeout: Optional[float] = None,
) -> Iterator[Prediction]:
"""Makes a direct (gRPC) streaming prediction request for a custom container.
Example usage:
```
my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
for stream_response in my_endpoint.stream_direct_raw_predict(
request=b'...'
):
yield stream_response
```
Args:
method_name (str):
Fully qualified name of the API method being invoked to perform
prediction.
requests (Iterator[bytes]):
The body of the prediction requests in bytes.
timeout (Optional[float]):
Optional. The timeout for this request in seconds.
Yields:
predictions (Iterator[aiplatform.Prediction]):
The resulting streamed predictions.
"""
self.wait()
for resp in self._prediction_client.stream_direct_raw_predict(
requests=(
{
"endpoint": self._gca_resource.name,
"method_name": method_name,
"input": request,
}
for request in requests
),
timeout=timeout,
):
yield Prediction(
predictions=resp.output,
metadata=None,
deployed_model_id=None,
model_version_id=None,
model_resource_name=None,
)
def explain(
self,
instances: List[Dict],
parameters: Optional[Dict] = None,
deployed_model_id: Optional[str] = None,
timeout: Optional[float] = None,
) -> Prediction:
"""Make a prediction with explanations against this Endpoint.
Example usage:
response = my_endpoint.explain(instances=[...])
my_explanations = response.explanations
Args:
instances (List):
Required. The instances that are the input to the
prediction call. A DeployedModel may have an upper limit
on the number of instances it supports per request, and
when it is exceeded the prediction call errors in case
of AutoML Models, or, in case of customer created
Models, the behaviour is as documented by that Model.
The schema of any single instance may be specified via
Endpoint's DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``instance_schema_uri``.
parameters (Dict):
The parameters that govern the prediction. The schema of
the parameters may be specified via Endpoint's
DeployedModels' [Model's
][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``parameters_schema_uri``.
deployed_model_id (str):
Optional. If specified, this ExplainRequest will be served by the
chosen DeployedModel, overriding this Endpoint's traffic split.
timeout (float): Optional. The timeout for this request in seconds.
Returns:
prediction (aiplatform.Prediction):
Prediction with returned predictions, explanations, and Model ID.
"""
self.wait()
explain_response = self._prediction_client.explain(
endpoint=self.resource_name,
instances=instances,
parameters=parameters,
deployed_model_id=deployed_model_id,
timeout=timeout,
)
return Prediction(
predictions=[
json_format.MessageToDict(item)
for item in explain_response.predictions.pb
],
deployed_model_id=explain_response.deployed_model_id,
explanations=explain_response.explanations,
)
async def explain_async(
self,
instances: List[Dict],
*,
parameters: Optional[Dict] = None,
deployed_model_id: Optional[str] = None,
timeout: Optional[float] = None,
) -> Prediction:
"""Make a prediction with explanations against this Endpoint.
Example usage:
```
response = await my_endpoint.explain_async(instances=[...])
my_explanations = response.explanations
```
Args:
instances (List):
Required. The instances that are the input to the
prediction call. A DeployedModel may have an upper limit
on the number of instances it supports per request, and
when it is exceeded the prediction call errors in case
of AutoML Models, or, in case of customer created
Models, the behaviour is as documented by that Model.
The schema of any single instance may be specified via
Endpoint's DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``instance_schema_uri``.
parameters (Dict):
The parameters that govern the prediction. The schema of
the parameters may be specified via Endpoint's
DeployedModels' [Model's
][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``parameters_schema_uri``.
deployed_model_id (str):
Optional. If specified, this ExplainRequest will be served by the
chosen DeployedModel, overriding this Endpoint's traffic split.
timeout (float): Optional. The timeout for this request in seconds.
Returns:
prediction (aiplatform.Prediction):
Prediction with returned predictions, explanations, and Model ID.
"""
self.wait()
explain_response = await self._prediction_async_client.explain(
endpoint=self.resource_name,
instances=instances,
parameters=parameters,
deployed_model_id=deployed_model_id,
timeout=timeout,
)
return Prediction(
predictions=[
json_format.MessageToDict(item)
for item in explain_response.predictions.pb
],
deployed_model_id=explain_response.deployed_model_id,
explanations=explain_response.explanations,
)
@classmethod
def list(
cls,
filter: Optional[str] = None,
order_by: Optional[str] = None,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
) -> List["models.Endpoint"]:
"""List all Endpoint resource instances.
Example Usage:
aiplatform.Endpoint.list(
filter='labels.my_label="my_label_value" OR display_name=!"old_endpoint"',
)
Args:
filter (str):
Optional. An expression for filtering the results of the request.
For field names both snake_case and camelCase are supported.
order_by (str):
Optional. A comma-separated list of fields to order by, sorted in
ascending order. Use "desc" after a field name for descending.
Supported fields: `display_name`, `create_time`, `update_time`
project (str):
Optional. Project to retrieve list from. If not set, project
set in aiplatform.init will be used.
location (str):
Optional. Location to retrieve list from. If not set, location
set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to retrieve list. Overrides
credentials set in aiplatform.init.
Returns:
List[models.Endpoint]:
A list of Endpoint resource objects
"""
return cls._list_with_local_order(
cls_filter=lambda ep: not bool(ep.network)
and not bool(ep.private_service_connect_config),
# `network` is empty and private_service_connect is not enabled for public Endpoints
filter=filter,
order_by=order_by,
project=project,
location=location,
credentials=credentials,
)
def list_models(self) -> List[gca_endpoint_compat.DeployedModel]:
"""Returns a list of the models deployed to this Endpoint.
Returns:
deployed_models (List[aiplatform.gapic.DeployedModel]):
A list of the models deployed in this Endpoint.
"""
self._sync_gca_resource()
return list(self._gca_resource.deployed_models)
def undeploy_all(self, sync: bool = True) -> "Endpoint":
"""Undeploys every model deployed to this Endpoint.
Args:
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
"""
self._sync_gca_resource()
models_in_traffic_split = sorted( # Undeploy zero traffic models first
self._gca_resource.traffic_split.keys(),
key=lambda id: self._gca_resource.traffic_split[id],
)
# Some deployed models may not in the traffic_split dict.
# These models have 0% traffic and should be undeployed first.
models_not_in_traffic_split = [
deployed_model.id
for deployed_model in self._gca_resource.deployed_models
if deployed_model.id not in models_in_traffic_split
]
models_to_undeploy = models_not_in_traffic_split + models_in_traffic_split
for deployed_model in models_to_undeploy:
self._undeploy(deployed_model_id=deployed_model, sync=sync)
return self
def delete(self, force: bool = False, sync: bool = True) -> None:
"""Deletes this Vertex AI Endpoint resource. If force is set to True,
all models on this Endpoint will be undeployed prior to deletion.
Args:
force (bool):
Required. If force is set to True, all deployed models on this
Endpoint will be undeployed first. Default is False.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
Raises:
FailedPrecondition: If models are deployed on this Endpoint and force = False.
"""
if force:
self.undeploy_all(sync=sync)
super().delete(sync=sync)
class PrivateEndpoint(Endpoint):
"""
Represents a Vertex AI PrivateEndpoint resource.
Read more [about private endpoints in the documentation.](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints)
"""
def __init__(
self,
endpoint_name: str,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
):
"""Retrieves a PrivateEndpoint resource.
Example usage:
my_private_endpoint = aiplatform.PrivateEndpoint(
endpoint_name="projects/123/locations/us-central1/endpoints/1234567891234567890"
)
or (when project and location are initialized)
my_private_endpoint = aiplatform.PrivateEndpoint(
endpoint_name="1234567891234567890"
)
Args:
endpoint_name (str):
Required. A fully-qualified endpoint resource name or endpoint ID.
Example: "projects/123/locations/us-central1/endpoints/my_endpoint_id" or
"my_endpoint_id" when project and location are initialized or passed.
project (str):
Optional. Project to retrieve endpoint from. If not set, project
set in aiplatform.init will be used.
location (str):
Optional. Location to retrieve endpoint from. If not set, location
set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to upload this model. Overrides
credentials set in aiplatform.init.
Raises:
ValueError: If the Endpoint being retrieved is not a PrivateEndpoint.
ImportError: If there is an issue importing the `urllib3` package.
"""
try:
import urllib3
except ImportError:
raise ImportError(
"Cannot import the urllib3 HTTP client. Please install google-cloud-aiplatform[private_endpoints]."
)
super().__init__(
endpoint_name=endpoint_name,
project=project,
location=location,
credentials=credentials,
)
if not self.network and not self.private_service_connect_config:
raise ValueError(
"Please ensure the Endpoint being retrieved is a PrivateEndpoint."
)
self._http_client = urllib3.PoolManager(cert_reqs="CERT_NONE")
@property
def predict_http_uri(self) -> Optional[str]:
"""HTTP path to send prediction requests to, used when calling `PrivateEndpoint.predict()`"""
if not self._gca_resource.deployed_models:
return None
return self._gca_resource.deployed_models[0].private_endpoints.predict_http_uri
@property
def explain_http_uri(self) -> Optional[str]:
"""HTTP path to send explain requests to, used when calling `PrivateEndpoint.explain()`"""
if not self._gca_resource.deployed_models:
return None
return self._gca_resource.deployed_models[0].private_endpoints.explain_http_uri
@property
def health_http_uri(self) -> Optional[str]:
"""HTTP path to send health check requests to, used when calling `PrivateEndpoint.health_check()`"""
if not self._gca_resource.deployed_models:
return None
return self._gca_resource.deployed_models[0].private_endpoints.health_http_uri
class PrivateServiceConnectConfig:
"""Represents a Vertex AI PrivateServiceConnectConfig resource."""
_gapic_private_service_connect_config: gca_service_networking.PrivateServiceConnectConfig
def __init__(
self,
project_allowlist: Optional[Sequence[str]] = None,
):
"""PrivateServiceConnectConfig for a PrivateEndpoint.
Args:
project_allowlist (Sequence[str]):
Optional. List of projects from which traffic can be accepted
by the endpoint via [ServiceAttachment](https://cloud.google.com/vpc/docs/private-service-connect#service-attachments).
If not set, the endpoint's project will be used.
"""
self._gapic_private_service_connect_config = (
gca_service_networking.PrivateServiceConnectConfig(
enable_private_service_connect=True,
project_allowlist=project_allowlist,
)
)
@classmethod
def create(
cls,
display_name: str,
project: Optional[str] = None,
location: Optional[str] = None,
network: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
credentials: Optional[auth_credentials.Credentials] = None,
encryption_spec_key_name: Optional[str] = None,
sync=True,
private_service_connect_config: Optional[PrivateServiceConnectConfig] = None,
enable_request_response_logging=False,
request_response_logging_sampling_rate: Optional[float] = None,
request_response_logging_bq_destination_table: Optional[str] = None,
inference_timeout: Optional[int] = None,
) -> "PrivateEndpoint":
"""Creates a new PrivateEndpoint.
Example usage:
For PSA based private endpoint:
my_private_endpoint = aiplatform.PrivateEndpoint.create(
display_name="my_endpoint_name",
project="my_project_id",
location="us-central1",
network="projects/123456789123/global/networks/my_vpc"
)
or (when project and location are initialized)
my_private_endpoint = aiplatform.PrivateEndpoint.create(
display_name="my_endpoint_name",
network="projects/123456789123/global/networks/my_vpc"
)
For PSC based private endpoint:
my_private_endpoint = aiplatform.PrivateEndpoint.create(
display_name="my_endpoint_name",
project="my_project_id",
location="us-central1",
private_service_connect=aiplatform.PrivateEndpoint.PrivateServiceConnectConfig(
project_allowlist=["test-project"]),
)
or (when project and location are initialized)
my_private_endpoint = aiplatform.PrivateEndpoint.create(
display_name="my_endpoint_name",
private_service_connect=aiplatform.PrivateEndpoint.PrivateServiceConnectConfig(
project_allowlist=["test-project"]),
)
Args:
display_name (str): Required. The user-defined name of the Endpoint. The
name can be up to 128 characters long and can be consist of any UTF-8
characters.
project (str): Optional. Project to retrieve endpoint from. If not set,
project set in aiplatform.init will be used.
location (str): Optional. Location to retrieve endpoint from. If not
set, location set in aiplatform.init will be used.
network (str): Optional. The full name of the Compute Engine network to
which this Endpoint will be peered. E.g.
"projects/123456789123/global/networks/my_vpc". Private services
access must already be configured for the network. If left
unspecified, the network set with aiplatform.init will be used. Cannot
be set together with private_service_connect_config.
description (str): Optional. The description of the Endpoint.
labels (Dict[str, str]): Optional. The labels with user-defined metadata
to organize your Endpoints. Label keys and values can be no longer
than 64 characters (Unicode codepoints), can only contain lowercase
letters, numeric characters, underscores and dashes. International
characters are allowed. See https://goo.gl/xmQnxf for more information
and examples of labels.
credentials (auth_credentials.Credentials): Optional. Custom credentials
to use to upload this model. Overrides credentials set in
aiplatform.init.
encryption_spec_key_name (str): Optional. The Cloud KMS resource
identifier of the customer managed encryption key used to protect the
model. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created. If set, this Model and all sub-resources of
this Model will be secured by this key. Overrides
encryption_spec_key_name set in aiplatform.init.
sync (bool): Whether to execute this method synchronously. If False,
this method will be executed in concurrent Future and any downstream
object will be immediately returned and synced when the Future has
completed. private_service_connect_config
(aiplatform.PrivateEndpoint.PrivateServiceConnectConfig): [Private
Service
Connect](https://cloud.google.com/vpc/docs/private-service-connect)
configuration for the endpoint. Cannot be set when network is
specified.
enable_request_response_logging (bool): Optional. Whether to enable
request & response logging for this endpoint.
request_response_logging_sampling_rate (float): Optional. The request
response logging sampling rate. If not set, default is 0.0.
request_response_logging_bq_destination_table (str): Optional. The
request response logging bigquery destination. If not set, will create
a table with name:
``bq://{project_id}.logging_{endpoint_display_name}_{endpoint_id}.request_response_logging``.
inference_timeout (int): Optional. It defines the prediction timeout, in
seconds, for online predictions using cloud-based endpoints. This
applies to either PSC endpoints, when private_service_connect_config
is set, or dedicated endpoints, when dedicated_endpoint_enabled is
true.
Returns:
endpoint (aiplatform.PrivateEndpoint):
Created endpoint.
Raises:
ValueError: A network must be instantiated when creating a
PrivateEndpoint.
"""
api_client = cls._instantiate_client(location=location, credentials=credentials)
utils.validate_display_name(display_name)
if labels:
utils.validate_labels(labels)
project = project or initializer.global_config.project
location = location or initializer.global_config.location
network = network or initializer.global_config.network
if not network and not private_service_connect_config:
raise ValueError(
"Please provide required argument `network` or"
"`private_service_connect_config`. You can also set network"
"using aiplatform.init(network=...)"
)
if network and private_service_connect_config:
raise ValueError(
"Argument `network` and `private_service_connect_config` enabled"
" mutually exclusive. You can only set one of them."
)
config = None
if private_service_connect_config:
config = (
private_service_connect_config._gapic_private_service_connect_config
)
predict_request_response_logging_config = None
if enable_request_response_logging:
predict_request_response_logging_config = (
gca_endpoint_compat.PredictRequestResponseLoggingConfig(
enabled=True,
sampling_rate=request_response_logging_sampling_rate,
bigquery_destination=gca_io_compat.BigQueryDestination(
output_uri=request_response_logging_bq_destination_table
),
)
)
client_connection_config = None
if private_service_connect_config and inference_timeout:
client_connection_config = gca_endpoint_compat.ClientConnectionConfig(
inference_timeout=duration_pb2.Duration(seconds=inference_timeout)
)
return cls._create(
api_client=api_client,
display_name=display_name,
project=project,
location=location,
description=description,
labels=labels,
credentials=credentials,
encryption_spec=initializer.global_config.get_encryption_spec(
encryption_spec_key_name=encryption_spec_key_name
),
network=network,
sync=sync,
private_service_connect_config=config,
predict_request_response_logging_config=predict_request_response_logging_config,
client_connection_config=client_connection_config,
)
@classmethod
def _construct_sdk_resource_from_gapic(
cls,
gapic_resource: proto.Message,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
) -> "PrivateEndpoint":
"""Given a GAPIC PrivateEndpoint object, return the SDK representation.
Args:
gapic_resource (proto.Message):
A GAPIC representation of a PrivateEndpoint resource, usually
retrieved by a get_* or in a list_* API call.
project (str):
Optional. Project to construct Endpoint object from. If not set,
project set in aiplatform.init will be used.
location (str):
Optional. Location to construct Endpoint object from. If not set,
location set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to construct Endpoint.
Overrides credentials set in aiplatform.init.
Returns:
endpoint (aiplatform.PrivateEndpoint):
An initialized PrivateEndpoint resource.
Raises:
ImportError: If there is an issue importing the `urllib3` package.
"""
try:
import urllib3
except ImportError:
raise ImportError(
"Cannot import the urllib3 HTTP client. Please install google-cloud-aiplatform[private_endpoints]."
)
endpoint = super()._construct_sdk_resource_from_gapic(
gapic_resource=gapic_resource,
project=project,
location=location,
credentials=credentials,
)
endpoint._http_client = urllib3.PoolManager(cert_reqs="CERT_NONE")
return endpoint
def _http_request(
self,
method: str,
url: str,
body: Optional[Dict[Any, Any]] = None,
headers: Optional[Dict[str, str]] = None,
) -> "urllib3.response.HTTPResponse": # type: ignore # noqa: F821
"""Helper function used to perform HTTP requests for PrivateEndpoint.
Args:
method (str):
Required. The HTTP request method to use. Example: "POST" or "GET"
url (str):
Required. The url used to send requests and get responses from.
body (Dict[Any, Any]):
Optional. Data sent to the url in the HTTP request. For a PrivateEndpoint,
an instance is sent and a prediction response is expected.
headers (Dict[str, str]):
Optional. Header in the HTTP request.
Returns:
urllib3.response.HTTPResponse:
A HTTP Response container.
Raises:
ImportError: If there is an issue importing the `urllib3` package.
RuntimeError: If a HTTP request could not be made.
RuntimeError: A connection could not be established with the PrivateEndpoint and
a HTTP request could not be made.
"""
try:
import urllib3
except ImportError:
raise ImportError(
"Cannot import the urllib3 HTTP client. Please install google-cloud-aiplatform[private_endpoints]."
)
try:
response = self._http_client.request(
method=method, url=url, body=body, headers=headers
)
if response.status < _SUCCESSFUL_HTTP_RESPONSE:
return response
else:
raise RuntimeError(
f"{response.status} - Failed to make request, see response: "
+ response.data.decode("utf-8")
)
except urllib3.exceptions.MaxRetryError as exc:
raise RuntimeError(
f"Failed to make a {method} request to this URI, make sure: "
" this call is being made inside the network this PrivateEndpoint is peered to "
f"({self._gca_resource.network}), calling health_check() returns True, "
f"and that {url} is a valid URL."
) from exc
def _validate_endpoint_override(self, endpoint_override: str) -> bool:
regex = re.compile("^[a-zA-Z0-9-.]+$")
return regex.match(endpoint_override) is not None
def predict(
self,
instances: List,
parameters: Optional[Dict] = None,
endpoint_override: Optional[str] = None,
) -> Prediction:
"""Make a prediction against this PrivateEndpoint using a HTTP request.
For PSA based private endpoint, this method must be called within the
network the PrivateEndpoint is peered to. Otherwise, the predict() call
will fail with error code 404. To check, use `PrivateEndpoint.network`.
For PSC based priviate endpoint, the project where caller credential are
from must be allowlisted.
Example usage:
PSA based private endpoint:
response = my_private_endpoint.predict(instances=[...], parameters={...})
my_predictions = response.predictions
PSC based private endpoint:
After creating PSC Endpoint pointing to the endpoint's
ServiceAttachment, use the PSC Endpoint IP Address or DNS as
endpoint_override.
psc_endpoint_address = "10.0.1.23"
or
psc_endpoint_address = "test.my.prediction"
response = my_private_endpoint.predict(instances=[...],
endpoint_override=psc_endpoint_address)
my_predictions = response.predictions
Args:
instances (List):
Required. The instances that are the input to the
prediction call. Instance types mut be JSON serializable.
A DeployedModel may have an upper limit
on the number of instances it supports per request, and
when it is exceeded the prediction call errors in case
of AutoML Models, or, in case of customer created
Models, the behaviour is as documented by that Model.
The schema of any single instance may be specified via
Endpoint's DeployedModels'
[Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``instance_schema_uri``.
parameters (Dict):
The parameters that govern the prediction. The schema of
the parameters may be specified via Endpoint's
DeployedModels' [Model's
][google.cloud.aiplatform.v1beta1.DeployedModel.model]
[PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
``parameters_schema_uri``.
endpoint_override (Optional[str]):
The Private Service Connect endpoint's IP address or DNS that
points to the endpoint's service attachment.
Returns:
prediction (aiplatform.Prediction):
Prediction object with returned predictions and Model ID.
Raises:
RuntimeError: If a model has not been deployed a request cannot be
made for PSA based endpoint.
ValueError: If a endpoint override is not provided for PSC based
endpoint.
ValueError: If a endpoint override is invalid for PSC based endpoint.
"""
self.wait()
self._sync_gca_resource_if_skipped()
if self.network:
if not self._gca_resource.deployed_models:
raise RuntimeError(
"Cannot make a predict request because a model has not been"
"deployed on this Private Endpoint. Please ensure a model"
"has been deployed."
)
response = self._http_request(
method="POST",
url=self.predict_http_uri,
body=json.dumps({"instances": instances, "parameters": parameters}),
headers={"Content-Type": "application/json"},
)
prediction_response = json.loads(response.data)
return Prediction(
predictions=prediction_response.get("predictions"),
metadata=prediction_response.get("metadata"),
deployed_model_id=self._gca_resource.deployed_models[0].id,
)
if self.private_service_connect_config:
if not endpoint_override:
raise ValueError(
"Cannot make a predict request because endpoint override is"
"not provided. Please ensure an endpoint override is"
"provided."
)
if not self._validate_endpoint_override(endpoint_override):
raise ValueError(
"Invalid endpoint override provided. Please only use IP"
"address or DNS."
)
if not self.credentials.valid:
self.credentials.refresh(google_auth_requests.Request())
token = self.credentials.token
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
}
url = f"https://{endpoint_override}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:predict"
response = self._http_request(
method="POST",
url=url,
body=json.dumps({"instances": instances, "parameters": parameters}),
headers=headers,
)
prediction_response = json.loads(response.data)
return Prediction(
predictions=prediction_response.get("predictions"),
metadata=prediction_response.get("metadata"),
deployed_model_id=prediction_response.get("deployedModelId"),
model_resource_name=prediction_response.get("model"),
model_version_id=prediction_response.get("modelVersionId"),
)
def raw_predict(
self,
body: bytes,
headers: Dict[str, str],
endpoint_override: Optional[str] = None,
) -> requests.models.Response:
"""Make a prediction request using arbitrary headers.
This method must be called within the network the PrivateEndpoint is peered to.
Otherwise, the predict() call will fail with error code 404. To check, use `PrivateEndpoint.network`.
Example usage:
my_endpoint = aiplatform.PrivateEndpoint(ENDPOINT_ID)
# PSA based private endpint
response = my_endpoint.raw_predict(
body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}',
headers = {'Content-Type':'application/json'}
)
# PSC based private endpoint
response = my_endpoint.raw_predict(
body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}',
headers = {'Content-Type':'application/json'},
endpoint_override = "10.1.0.23"
)
status_code = response.status_code
results = json.dumps(response.text)
Args:
body (bytes):
The body of the prediction request in bytes. This must not
exceed 1.5 mb per request.
headers (Dict[str, str]):
The header of the request as a dictionary. There are no
restrictions on the header.
endpoint_override (Optional[str]):
The Private Service Connect endpoint's IP address or DNS that
points to the endpoint's service attachment.
Returns:
A requests.models.Response object containing the status code and
prediction results.
Raises:
ValueError: If a endpoint override is not provided for PSC based
endpoint.
ValueError: If a endpoint override is invalid for PSC based endpoint.
"""
self.wait()
if self.network:
return self._http_request(
method="POST",
url=self.predict_http_uri,
body=body,
headers=headers,
)
if self.private_service_connect_config:
if not endpoint_override:
raise ValueError(
"Cannot make a predict request because endpoint override is"
"not provided. Please ensure an endpoint override is"
"provided."
)
if not self._validate_endpoint_override(endpoint_override):
raise ValueError(
"Invalid endpoint override provided. Please only use IP"
"address or DNS."
)
if not self.credentials.valid:
self.credentials.refresh(google_auth_requests.Request())
token = self.credentials.token
headers_with_token = dict(headers)
headers_with_token["Authorization"] = f"Bearer {token}"
url = f"https://{endpoint_override}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:rawPredict"
return self._http_request(
method="POST",
url=url,
body=body,
headers=headers_with_token,
)
def stream_raw_predict(
self,
body: bytes,
headers: Dict[str, str],
endpoint_override: Optional[str] = None,
) -> Iterator[bytes]:
"""Make a streaming prediction request using arbitrary headers.
Example usage:
my_endpoint = aiplatform.PrivateEndpoint(ENDPOINT_ID)
# Prepare the request body
request_body = json.dumps({...}).encode('utf-8')
# Define the headers
headers = {
'Content-Type': 'application/json',
}
# Use stream_raw_predict to send the request and process the response
for stream_response in psc_endpoint.stream_raw_predict(
body=request_body,
headers=headers,
endpoint_override="10.128.0.26" # Replace with your actual endpoint
):
stream_response_text = stream_response.decode('utf-8')
Args:
body (bytes):
The body of the prediction request in bytes. This must not
exceed 10 mb per request.
headers (Dict[str, str]):
The header of the request as a dictionary. There are no
restrictions on the header.
endpoint_override (Optional[str]):
The Private Service Connect endpoint's IP address or DNS that
points to the endpoint's service attachment.
Yields:
predictions (Iterator[bytes]):
The streaming prediction results as lines of bytes.
Raises:
ValueError: If a endpoint override is not provided for PSC based
endpoint.
ValueError: If a endpoint override is invalid for PSC based endpoint.
"""
self.wait()
if self.network or not self.private_service_connect_config:
raise ValueError(
"PSA based private endpoint does not support streaming prediction."
)
if self.private_service_connect_config:
if not endpoint_override:
raise ValueError(
"Cannot make a predict request because endpoint override is"
"not provided. Please ensure an endpoint override is"
"provided."
)
if not self._validate_endpoint_override(endpoint_override):
raise ValueError(
"Invalid endpoint override provided. Please only use IP"
"address or DNS."
)
if not self.credentials.valid:
self.credentials.refresh(google_auth_requests.Request())
token = self.credentials.token
headers_with_token = dict(headers)
headers_with_token["Authorization"] = f"Bearer {token}"
if not self.authorized_session:
self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES
self.authorized_session = google_auth_requests.AuthorizedSession(
self.credentials
)
url = f"https://{endpoint_override}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:streamRawPredict"
with self.authorized_session.post(
url=url,
data=body,
headers=headers_with_token,
stream=True,
verify=False,
) as resp:
for line in resp.iter_lines():
yield line
def explain(self):
raise NotImplementedError(
f"{self.__class__.__name__} class does not support 'explain' as of now."
)
def health_check(self) -> bool:
"""
Makes a request to this PrivateEndpoint's health check URI. Must be within network
that this PrivateEndpoint is in.
This is only supported by PSA based private endpoint.
Example Usage:
if my_private_endpoint.health_check():
print("PrivateEndpoint is healthy!")
Returns:
bool:
Checks if calls can be made to this PrivateEndpoint.
Raises:
RuntimeError: If a model has not been deployed a request cannot be made.
RuntimeError: If the endpoint is PSC based private endpoint.
"""
self.wait()
self._sync_gca_resource_if_skipped()
if self.private_service_connect_config:
raise RuntimeError(
"Health check request is not supported on PSC based Private Endpoint."
)
if not self._gca_resource.deployed_models:
raise RuntimeError(
"Cannot make a health check request because a model has not been deployed on this Private"
"Endpoint. Please ensure a model has been deployed."
)
response = self._http_request(
method="GET",
url=self.health_http_uri,
)
return response.status < _SUCCESSFUL_HTTP_RESPONSE
@classmethod
def list(
cls,
filter: Optional[str] = None,
order_by: Optional[str] = None,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
) -> List["models.PrivateEndpoint"]:
"""List all PrivateEndpoint resource instances.
Example Usage:
my_private_endpoints = aiplatform.PrivateEndpoint.list()
or
my_private_endpoints = aiplatform.PrivateEndpoint.list(
filter='labels.my_label="my_label_value" OR display_name=!"old_endpoint"',
)
Args:
filter (str):
Optional. An expression for filtering the results of the request.
For field names both snake_case and camelCase are supported.
order_by (str):
Optional. A comma-separated list of fields to order by, sorted in
ascending order. Use "desc" after a field name for descending.
Supported fields: `display_name`, `create_time`, `update_time`
project (str):
Optional. Project to retrieve list from. If not set, project
set in aiplatform.init will be used.
location (str):
Optional. Location to retrieve list from. If not set, location
set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to retrieve list. Overrides
credentials set in aiplatform.init.
Returns:
List[models.PrivateEndpoint]:
A list of PrivateEndpoint resource objects.
"""
return cls._list_with_local_order(
cls_filter=lambda ep: bool(ep.network)
or bool(ep.private_service_connect_config),
# Only PrivateEndpoints have a network or private_service_connect_config
filter=filter,
order_by=order_by,
project=project,
location=location,
credentials=credentials,
)
def deploy(
self,
model: "Model",
deployed_model_display_name: Optional[str] = None,
machine_type: Optional[str] = None,
min_replica_count: int = 1,
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
service_account: Optional[str] = None,
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
explanation_parameters: Optional[
aiplatform.explain.ExplanationParameters
] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
sync=True,
disable_container_logging: bool = False,
traffic_percentage: Optional[int] = 0,
traffic_split: Optional[Dict[str, int]] = None,
reservation_affinity_type: Optional[str] = None,
reservation_affinity_key: Optional[str] = None,
reservation_affinity_values: Optional[List[str]] = None,
spot: bool = False,
system_labels: Optional[Dict[str, str]] = None,
required_replica_count: Optional[int] = 0,
) -> None:
"""Deploys a Model to the PrivateEndpoint.
Example Usage:
PSA based private endpoint
my_private_endpoint.deploy(
model=my_model
)
PSC based private endpoint
psc_endpoint.deploy(
model=first_model,
)
psc_endpoint.deploy(
model=second_model,
traffic_percentage=50,
)
psc_endpoint.deploy(
model=third_model,
traffic_percentage={
'first_model_id': 40,
'second_model_id': 30,
'third_model_id': 30
},
)
Args:
model (aiplatform.Model):
Required. Model to be deployed.
deployed_model_display_name (str):
Optional. The display name of the DeployedModel. If not provided
upon creation, the Model's display_name is used.
machine_type (str):
Optional. The type of machine. Not specifying machine type will
result in model to be deployed with automatic resources.
min_replica_count (int):
Optional. The minimum number of machine replicas this deployed
model will be always deployed on. If traffic against it increases,
it may dynamically be deployed onto more replicas, and as traffic
decreases, some of these extra replicas may be freed.
max_replica_count (int):
Optional. The maximum number of replicas this deployed model may
be deployed on when the traffic against it increases. If requested
value is too large, the deployment will error, but if deployment
succeeds then the ability to scale the model to that many replicas
is guaranteed (barring service outages). If traffic against the
deployed model increases beyond what its replicas at maximum may
handle, a portion of the traffic will be dropped. If this value
is not provided, the larger value of min_replica_count or 1 will
be used. If value provided is smaller than min_replica_count, it
will automatically be increased to be min_replica_count.
accelerator_type (str):
Optional. Hardware accelerator type. Must also set accelerator_count if used.
One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Required for CloudTPU multihost deployments.
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
specified, the container runs as a service account that doesn't have access
to the resource project.
Users deploying the Model must have the `iam.serviceAccounts.actAs`
permission on this service account.
explanation_metadata (aiplatform.explain.ExplanationMetadata):
Optional. Metadata describing the Model's input and output for explanation.
`explanation_metadata` is optional while `explanation_parameters` must be
specified when used.
For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
explanation_parameters (aiplatform.explain.ExplanationParameters):
Optional. Parameters to configure explaining for Model's predictions.
For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
traffic_percentage (int):
Optional. Desired traffic to newly deployed model.
Defaults to 0 if there are pre-existing deployed models.
Defaults to 100 if there are no pre-existing deployed models.
Defaults to 100 for PSA based private endpoint.
Negative values should not be provided. Traffic of previously
deployed models at the endpoint will be scaled down to
accommodate new deployed model's traffic.
Should not be provided if traffic_split is provided.
traffic_split (Dict[str, int]):
Optional. Only supported by PSC base private endpoint.
A map from a DeployedModel's ID to the percentage of
this Endpoint's traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives
no traffic. The traffic percentage values must add up to 100, or
map must be empty if the Endpoint is to not accept any traffic at
the moment. Key for model being deployed is "0". Should not be
provided if traffic_percentage is provided.
reservation_affinity_type (str):
Optional. The type of reservation affinity.
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
reservation_affinity_key (str):
Optional. Corresponds to the label key of a reservation resource.
To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
and specify the name of your reservation as its value.
reservation_affinity_values (List[str]):
Optional. Corresponds to the label values of a reservation resource.
This must be the full resource name of the reservation.
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
spot (bool):
Optional. Whether to schedule the deployment workload on spot VMs.
system_labels (Dict[str, str]):
Optional. System labels to apply to Model Garden deployments.
System labels are managed by Google for internal use only.
required_replica_count (int):
Optional. Number of required available replicas for the
deployment to succeed. This field is only needed when partial
model deployment/mutation is desired, with a value greater than
or equal to 1 and fewer than or equal to min_replica_count. If
set, the model deploy/mutate operation will succeed once
available_replica_count reaches required_replica_count, and the
rest of the replicas will be retried.
"""
if self.network:
if traffic_split is not None:
raise ValueError(
"Traffic split is not supported for PSA based PrivateEndpoint."
)
traffic_percentage = 100
self._validate_deploy_args(
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
deployed_model_display_name=deployed_model_display_name,
traffic_split=traffic_split,
traffic_percentage=traffic_percentage,
deployment_resource_pool=None,
required_replica_count=required_replica_count,
)
explanation_spec = _explanation_utils.create_and_validate_explanation_spec(
explanation_metadata=explanation_metadata,
explanation_parameters=explanation_parameters,
)
self._deploy(
model=model,
deployed_model_display_name=deployed_model_display_name,
traffic_percentage=traffic_percentage,
traffic_split=traffic_split,
machine_type=machine_type,
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
tpu_topology=tpu_topology,
reservation_affinity_type=reservation_affinity_type,
reservation_affinity_key=reservation_affinity_key,
reservation_affinity_values=reservation_affinity_values,
service_account=service_account,
explanation_spec=explanation_spec,
metadata=metadata,
sync=sync,
spot=spot,
disable_container_logging=disable_container_logging,
system_labels=system_labels,
required_replica_count=required_replica_count,
)
def update(
self,
display_name: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
traffic_split: Optional[Dict[str, int]] = None,
request_metadata: Optional[Sequence[Tuple[str, str]]] = (),
update_request_timeout: Optional[float] = None,
) -> "PrivateEndpoint":
"""Updates a PrivateEndpoint.
Example usage:
PSC based private endpoint
my_endpoint = my_endpoint.update(
display_name='my-updated-endpoint',
description='my updated description',
labels={'key': 'value'},
traffic_split={
'123456': 20,
'234567': 80,
},
)
Args:
display_name (str):
Optional. The display name of the Endpoint.
The name can be up to 128 characters long and can be consist of any UTF-8
characters.
description (str):
Optional. The description of the Endpoint.
labels (Dict[str, str]):
Optional. The labels with user-defined metadata to organize your Endpoints.
Label keys and values can be no longer than 64 characters
(Unicode codepoints), can only contain lowercase letters, numeric
characters, underscores and dashes. International characters are allowed.
See https://goo.gl/xmQnxf for more information and examples of labels.
traffic_split (Dict[str, int]):
Optional. Only supported by PSC based private endpoint
A map from a DeployedModel's ID to the percentage of this Endpoint's
traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives no traffic.
The traffic percentage values must add up to 100, or map must be empty if
the Endpoint is to not accept any traffic at a moment.
request_metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as metadata.
update_request_timeout (float):
Optional. The timeout for the update request in seconds.
Returns:
Endpoint (aiplatform.Prediction):
Updated endpoint resource.
Raises:
ValueError: If `traffic_split` is set for PSA based private endpoint.
"""
if self.network:
if traffic_split is not None:
raise ValueError(
"Traffic split is not supported for PSA based Private Endpoint."
)
super().update(
display_name=display_name,
description=description,
labels=labels,
traffic_split=traffic_split,
request_metadata=request_metadata,
update_request_timeout=update_request_timeout,
)
return self
def undeploy(
self,
deployed_model_id: str,
sync=True,
traffic_split: Optional[Dict[str, int]] = None,
) -> None:
"""Undeploys a deployed model from the PrivateEndpoint.
Example Usage:
PSA based private endpoint:
my_private_endpoint.undeploy(
deployed_model_id="1234567891232567891"
)
or
my_deployed_model_id = my_private_endpoint.list_models()[0].id
my_private_endpoint.undeploy(
deployed_model_id=my_deployed_model_id
)
Args:
deployed_model_id (str):
Required. The ID of the DeployedModel to be undeployed from the
PrivateEndpoint. Use PrivateEndpoint.list_models() to get the
deployed model ID.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
traffic_split (Dict[str, int]):
Optional. Only supported by PSC based private endpoint.
A map of DeployedModel IDs to the percentage of this Endpoint's
traffic that should be forwarded to that DeployedModel.
Required if undeploying a model with non-zero traffic from an Endpoint
with multiple deployed models. The traffic percentage values must
add up to 100, or map must be empty if the Endpoint is to not
accept any traffic at the moment. If a DeployedModel's ID is not
listed in this map, then it receives no traffic.
"""
self._sync_gca_resource_if_skipped()
if self.network:
if traffic_split is not None:
raise ValueError(
"Traffic split is not supported for PSA based PrivateEndpoint."
)
# PSA based private endpoint
self._undeploy(
deployed_model_id=deployed_model_id,
traffic_split=None,
sync=sync,
)
# PSC based private endpoint
if self.private_service_connect_config:
super().undeploy(
deployed_model_id=deployed_model_id,
traffic_split=traffic_split,
sync=sync,
)
def undeploy_all(self, sync: bool = True) -> "PrivateEndpoint":
"""Undeploys every model deployed to this PrivateEndpoint.
Args:
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
"""
if self.network:
self._sync_gca_resource()
# PSA based private endpoint
self._undeploy(
deployed_model_id=self._gca_resource.deployed_models[0].id,
traffic_split=None,
sync=sync,
)
if self.private_service_connect_config:
# PSC based private endpoint
super().undeploy_all(sync=sync)
return self
def delete(self, force: bool = False, sync: bool = True) -> None:
"""Deletes this Vertex AI PrivateEndpoint resource. If force is set to True,
all models on this PrivateEndpoint will be undeployed prior to deletion.
Args:
force (bool):
Required. If force is set to True, all deployed models on this
Endpoint will be undeployed first. Default is False.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
Raises:
FailedPrecondition: If models are deployed on this Endpoint and force = False.
"""
if force and self._gca_resource.deployed_models:
self.undeploy_all(sync=sync)
super().delete(force=False, sync=sync)
class Model(base.VertexAiResourceNounWithFutureManager, base.PreviewMixin):
client_class = utils.ModelClientWithOverride
_resource_noun = "models"
_getter_method = "get_model"
_list_method = "list_models"
_delete_method = "delete_model"
_parse_resource_name_method = "parse_model_path"
_format_resource_name_method = "model_path"
_preview_class = "google.cloud.aiplatform.aiplatform.preview.models.Model"
@property
def preview(self):
"""Return a Model instance with preview features enabled."""
from google.cloud.aiplatform.preview import models as preview_models
if not hasattr(self, "_preview_instance"):
self._preview_instance = preview_models.Model(
self.resource_name, credentials=self.credentials
)
return self._preview_instance
@property
def uri(self) -> Optional[str]:
"""Path to the directory containing the Model artifact and any of its
supporting files. Not present for AutoML Models."""
self._assert_gca_resource_is_available()
return self._gca_resource.artifact_uri or None
@property
def description(self) -> str:
"""Description of the model."""
self._assert_gca_resource_is_available()
return self._gca_resource.description
@property
def supported_export_formats(
self,
) -> Dict[str, List[gca_model_compat.Model.ExportFormat.ExportableContent]]:
"""The formats and content types in which this Model may be exported.
If empty, this Model is not available for export.
For example, if this model can be exported as a Tensorflow SavedModel and
have the artifacts written to Cloud Storage, the expected value would be:
{'tf-saved-model': [<ExportableContent.ARTIFACT: 1>]}
"""
self._assert_gca_resource_is_available()
return {
export_format.id: [
gca_model_compat.Model.ExportFormat.ExportableContent(content)
for content in export_format.exportable_contents
]
for export_format in self._gca_resource.supported_export_formats
}
@property
def supported_deployment_resources_types(
self,
) -> List[model_v1.Model.DeploymentResourcesType]:
"""List of deployment resource types accepted for this Model.
When this Model is deployed, its prediction resources are described by
the `prediction_resources` field of the objects returned by
`Endpoint.list_models()`. Because not all Models support all resource
configuration types, the configuration types this Model supports are
listed here.
If no configuration types are listed, the Model cannot be
deployed to an `Endpoint` and does not support online predictions
(`Endpoint.predict()` or `Endpoint.explain()`). Such a Model can serve
predictions by using a `BatchPredictionJob`, if it has at least one entry
each in `Model.supported_input_storage_formats` and
`Model.supported_output_storage_formats`."""
self._assert_gca_resource_is_available()
return list(self._gca_resource.supported_deployment_resources_types)
@property
def supported_input_storage_formats(self) -> List[str]:
"""The formats this Model supports in the `input_config` field of a
`BatchPredictionJob`. If `Model.predict_schemata.instance_schema_uri`
exists, the instances should be given as per that schema.
[Read the docs for more on batch prediction formats](https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions#batch_request_input)
If this Model doesn't support any of these formats it means it cannot be
used with a `BatchPredictionJob`. However, if it has
`supported_deployment_resources_types`, it could serve online predictions
by using `Endpoint.predict()` or `Endpoint.explain()`.
"""
self._assert_gca_resource_is_available()
return list(self._gca_resource.supported_input_storage_formats)
@property
def supported_output_storage_formats(self) -> List[str]:
"""The formats this Model supports in the `output_config` field of a
`BatchPredictionJob`.
If both `Model.predict_schemata.instance_schema_uri` and
`Model.predict_schemata.prediction_schema_uri` exist, the predictions
are returned together with their instances. In other words, the
prediction has the original instance data first, followed by the actual
prediction content (as per the schema).
[Read the docs for more on batch prediction formats](https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions)
If this Model doesn't support any of these formats it means it cannot be
used with a `BatchPredictionJob`. However, if it has
`supported_deployment_resources_types`, it could serve online predictions
by using `Endpoint.predict()` or `Endpoint.explain()`.
"""
self._assert_gca_resource_is_available()
return list(self._gca_resource.supported_output_storage_formats)
@property
def predict_schemata(self) -> Optional[model_v1.PredictSchemata]:
"""The schemata that describe formats of the Model's predictions and
explanations, if available."""
self._assert_gca_resource_is_available()
return getattr(self._gca_resource, "predict_schemata")
@property
def training_job(self) -> Optional["aiplatform.training_jobs._TrainingJob"]:
"""The TrainingJob that uploaded this Model, if any.
Raises:
api_core.exceptions.NotFound: If the Model's training job resource
cannot be found on the Vertex service.
"""
self._assert_gca_resource_is_available()
job_name = getattr(self._gca_resource, "training_pipeline")
if not job_name:
return None
try:
return aiplatform.training_jobs._TrainingJob._get_and_return_subclass(
resource_name=job_name,
project=self.project,
location=self.location,
credentials=self.credentials,
)
except api_exceptions.NotFound as exc:
raise api_exceptions.NotFound(
f"The training job used to create this model could not be found: {job_name}"
) from exc
@property
def container_spec(self) -> Optional[model_v1.ModelContainerSpec]:
"""The specification of the container that is to be used when deploying
this Model. Not present for AutoML Models."""
self._assert_gca_resource_is_available()
return getattr(self._gca_resource, "container_spec")
@property
def version_id(self) -> str:
"""The version ID of the model.
A new version is committed when a new model version is uploaded or
trained under an existing model id. It is an auto-incrementing decimal
number in string representation."""
self._assert_gca_resource_is_available()
return getattr(self._gca_resource, "version_id")
@property
def version_aliases(self) -> Sequence[str]:
"""User provided version aliases so that a model version can be referenced via
alias (i.e. projects/{project}/locations/{location}/models/{model_id}@{version_alias}
instead of auto-generated version id (i.e.
projects/{project}/locations/{location}/models/{model_id}@{version_id}).
The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] to distinguish from
version_id. A default version alias will be created for the first version
of the model, and there must be exactly one default version alias for a model.
"""
self._assert_gca_resource_is_available()
return getattr(self._gca_resource, "version_aliases")
@property
def version_create_time(self) -> timestamp_pb2.Timestamp:
"""Timestamp when this version was created."""
self._assert_gca_resource_is_available()
return getattr(self._gca_resource, "version_create_time")
@property
def version_update_time(self) -> timestamp_pb2.Timestamp:
"""Timestamp when this version was updated."""
self._assert_gca_resource_is_available()
return getattr(self._gca_resource, "version_update_time")
@property
def version_description(self) -> str:
"""The description of this version."""
self._assert_gca_resource_is_available()
return getattr(self._gca_resource, "version_description")
@property
def resource_name(self) -> str:
"""Full qualified resource name, without any version ID."""
self._assert_gca_resource_is_available()
return ModelRegistry._parse_versioned_name(self._gca_resource.name)[0]
@property
def name(self) -> str:
"""Name of this resource."""
self._assert_gca_resource_is_available()
return ModelRegistry._parse_versioned_name(super().name)[0]
@property
def versioned_resource_name(self) -> str:
"""The fully-qualified resource name, including the version ID. For example,
projects/{project}/locations/{location}/models/{model_id}@{version_id}
"""
self._assert_gca_resource_is_available()
return ModelRegistry._get_versioned_name(
self.resource_name,
self.version_id,
)
@property
def versioning_registry(self) -> "ModelRegistry":
"""The registry of model versions associated with this
Model instance."""
return self._registry
def __init__(
self,
model_name: str,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
version: Optional[str] = None,
):
"""Retrieves the model resource and instantiates its representation.
Args:
model_name (str):
Required. A fully-qualified model resource name or model ID.
Example: "projects/123/locations/us-central1/models/456" or
"456" when project and location are initialized or passed.
May optionally contain a version ID or version alias in
{model_name}@{version} form. See version arg.
project (str):
Optional project to retrieve model from. If not set, project
set in aiplatform.init will be used.
location (str):
Optional location to retrieve model from. If not set, location
set in aiplatform.init will be used.
credentials: Optional[auth_credentials.Credentials]=None,
Custom credentials to use to upload this model. If not set,
credentials set in aiplatform.init will be used.
version (str):
Optional. Version ID or version alias.
When set, the specified model version will be targeted
unless overridden in method calls.
When not set, the model with the "default" alias will
be targeted unless overridden in method calls.
No behavior change if only one version of a model exists.
Raises:
ValueError: If `version` is passed alongside a model_name referencing a different version.
"""
# If the version was passed in model_name, parse it
model_name, parsed_version = ModelRegistry._parse_versioned_name(model_name)
if parsed_version:
if version and version != parsed_version:
raise ValueError(
f"A version of {version} was passed that conflicts with the version of {parsed_version} in the model_name."
)
version = parsed_version
super().__init__(
project=project,
location=location,
credentials=credentials,
resource_name=model_name,
)
# Model versions can include @{version} in the resource name.
self._resource_id_validator = super()._revisioned_resource_id_validator
# Create a versioned model_name, if it exists, for getting the GCA model
versioned_model_name = ModelRegistry._get_versioned_name(model_name, version)
self._gca_resource = self._get_gca_resource(resource_name=versioned_model_name)
# Create ModelRegistry with the unversioned resource name
self._registry = ModelRegistry(
self.resource_name,
location=location,
project=project,
credentials=credentials,
)
def update(
self,
display_name: Optional[str] = None,
description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
) -> "Model":
"""Updates a model.
Example usage:
my_model = my_model.update(
display_name="my-model",
description="my description",
labels={'key': 'value'},
)
Args:
display_name (str):
The display name of the Model. The name can be up to 128
characters long and can be consist of any UTF-8 characters.
description (str):
The description of the model.
labels (Dict[str, str]):
Optional. The labels with user-defined metadata to
organize your Models.
Label keys and values can be no longer than 64
characters (Unicode codepoints), can only
contain lowercase letters, numeric characters,
underscores and dashes. International characters
are allowed.
See https://goo.gl/xmQnxf for more information
and examples of labels.
Returns:
model (aiplatform.Model):
Updated model resource.
Raises:
ValueError: If `labels` is not the correct format.
"""
self.wait()
current_model_proto = self.gca_resource
copied_model_proto = current_model_proto.__class__(current_model_proto)
update_mask: List[str] = []
# Updates to base model properties cannot occur if a versioned model is passed.
# Use the unversioned model resource name.
copied_model_proto.name = self.resource_name
if display_name:
utils.validate_display_name(display_name)
copied_model_proto.display_name = display_name
update_mask.append("display_name")
if description:
copied_model_proto.description = description
update_mask.append("description")
if labels:
utils.validate_labels(labels)
copied_model_proto.labels = labels
update_mask.append("labels")
update_mask = field_mask_pb2.FieldMask(paths=update_mask)
self.api_client.update_model(model=copied_model_proto, update_mask=update_mask)
self._sync_gca_resource()
return self
# TODO(b/170979926) Add support for metadata and metadata schema
@classmethod
@base.optional_sync()
def upload(
cls,
serving_container_image_uri: Optional[str] = None,
*,
artifact_uri: Optional[str] = None,
model_id: Optional[str] = None,
parent_model: Optional[str] = None,
is_default_version: bool = True,
version_aliases: Optional[Sequence[str]] = None,
version_description: Optional[str] = None,
serving_container_predict_route: Optional[str] = None,
serving_container_health_route: Optional[str] = None,
description: Optional[str] = None,
serving_container_command: Optional[Sequence[str]] = None,
serving_container_args: Optional[Sequence[str]] = None,
serving_container_environment_variables: Optional[Dict[str, str]] = None,
serving_container_ports: Optional[Sequence[int]] = None,
serving_container_grpc_ports: Optional[Sequence[int]] = None,
local_model: Optional["LocalModel"] = None,
instance_schema_uri: Optional[str] = None,
parameters_schema_uri: Optional[str] = None,
prediction_schema_uri: Optional[str] = None,
explanation_metadata: Optional[explain.ExplanationMetadata] = None,
explanation_parameters: Optional[explain.ExplanationParameters] = None,
display_name: Optional[str] = None,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
labels: Optional[Dict[str, str]] = None,
encryption_spec_key_name: Optional[str] = None,
staging_bucket: Optional[str] = None,
sync=True,
upload_request_timeout: Optional[float] = None,
serving_container_deployment_timeout: Optional[int] = None,
serving_container_shared_memory_size_mb: Optional[int] = None,
serving_container_startup_probe_exec: Optional[Sequence[str]] = None,
serving_container_startup_probe_period_seconds: Optional[int] = None,
serving_container_startup_probe_timeout_seconds: Optional[int] = None,
serving_container_health_probe_exec: Optional[Sequence[str]] = None,
serving_container_health_probe_period_seconds: Optional[int] = None,
serving_container_health_probe_timeout_seconds: Optional[int] = None,
model_garden_source_model_name: Optional[str] = None,
) -> "Model":
"""Uploads a model and returns a Model representing the uploaded Model
resource.
Example usage:
my_model = Model.upload(
display_name="my-model",
artifact_uri="gs://my-model/saved-model",
serving_container_image_uri="tensorflow/serving"
)
Args:
serving_container_image_uri (str):
Optional. The URI of the Model serving container. This parameter is required
if the parameter `local_model` is not specified.
artifact_uri (str):
Optional. The path to the directory containing the Model artifact and
any of its supporting files. Leave blank for custom container prediction.
Not present for AutoML Models.
model_id (str):
Optional. The ID to use for the uploaded Model, which will
become the final component of the model resource name.
This value may be up to 63 characters, and valid characters
are `[a-z0-9_-]`. The first character cannot be a number or hyphen.
parent_model (str):
Optional. The resource name or model ID of an existing model that the
newly-uploaded model will be a version of.
Only set this field when uploading a new version of an existing model.
is_default_version (bool):
Optional. When set to True, the newly uploaded model version will
automatically have alias "default" included. Subsequent uses of
this model without a version specified will use this "default" version.
When set to False, the "default" alias will not be moved.
Actions targeting the newly-uploaded model version will need
to specifically reference this version by ID or alias.
New model uploads, i.e. version 1, will always be "default" aliased.
version_aliases (Sequence[str]):
Optional. User provided version aliases so that a model version
can be referenced via alias instead of auto-generated version ID.
A default version alias will be created for the first version of the model.
The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9]
version_description (str):
Optional. The description of the model version being uploaded.
serving_container_predict_route (str):
Optional. An HTTP path to send prediction requests to the container, and
which must be supported by it. If not specified a default HTTP path will
be used by Vertex AI.
serving_container_health_route (str):
Optional. An HTTP path to send health check requests to the container, and which
must be supported by it. If not specified a standard HTTP path will be
used by Vertex AI.
description (str):
The description of the model.
serving_container_command: Optional[Sequence[str]]=None,
The command with which the container is run. Not executed within a
shell. The Docker image's ENTRYPOINT is used if this is not provided.
Variable references $(VAR_NAME) are expanded using the container's
environment. If a variable cannot be resolved, the reference in the
input string will be unchanged. The $(VAR_NAME) syntax can be escaped
with a double $$, ie: $$(VAR_NAME). Escaped references will never be
expanded, regardless of whether the variable exists or not.
serving_container_args: Optional[Sequence[str]]=None,
The arguments to the command. The Docker image's CMD is used if this is
not provided. Variable references $(VAR_NAME) are expanded using the
container's environment. If a variable cannot be resolved, the reference
in the input string will be unchanged. The $(VAR_NAME) syntax can be
escaped with a double $$, ie: $$(VAR_NAME). Escaped references will
never be expanded, regardless of whether the variable exists or not.
serving_container_environment_variables: Optional[Dict[str, str]]=None,
The environment variables that are to be present in the container.
Should be a dictionary where keys are environment variable names
and values are environment variable values for those names.
serving_container_ports: Optional[Sequence[int]]=None,
Declaration of ports that are exposed by the container. This field is
primarily informational, it gives Vertex AI information about the
network connections the container uses. Listing or not a port here has
no impact on whether the port is actually exposed, any port listening on
the default "0.0.0.0" address inside a container will be accessible from
the network.
serving_container_grpc_ports: Optional[Sequence[int]]=None,
Declaration of ports that are exposed by the container. Vertex AI sends gRPC
prediction requests that it receives to the first port on this list. Vertex
AI also sends liveness and health checks to this port.
If you do not specify this field, gRPC requests to the container will be
disabled.
Vertex AI does not use ports other than the first one listed. This field
corresponds to the `ports` field of the Kubernetes Containers v1 core API.
local_model (Optional[LocalModel]):
Optional. A LocalModel instance that includes a `serving_container_spec`.
If provided, the `serving_container_spec` of the LocalModel instance
will overwrite the values of all other serving container parameters.
instance_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the format of a single instance, which
are used in
``PredictRequest.instances``,
``ExplainRequest.instances``
and
``BatchPredictionJob.input_config``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform. Note: The URI given on output will be immutable
and probably different, including the URI scheme, than the
one given on input. The output URI will point to a location
where the user only has a read access.
parameters_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the parameters of prediction and
explanation via
``PredictRequest.parameters``,
``ExplainRequest.parameters``
and
``BatchPredictionJob.model_parameters``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform, if no parameters are supported it is set to an
empty string. Note: The URI given on output will be
immutable and probably different, including the URI scheme,
than the one given on input. The output URI will point to a
location where the user only has a read access.
prediction_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the format of a single prediction
produced by this Model, which are returned via
``PredictResponse.predictions``,
``ExplainResponse.explanations``,
and
``BatchPredictionJob.output_config``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform. Note: The URI given on output will be immutable
and probably different, including the URI scheme, than the
one given on input. The output URI will point to a location
where the user only has a read access.
explanation_metadata (aiplatform.explain.ExplanationMetadata):
Optional. Metadata describing the Model's input and output for explanation.
`explanation_metadata` is optional while `explanation_parameters` must be
specified when used.
For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
explanation_parameters (aiplatform.explain.ExplanationParameters):
Optional. Parameters to configure explaining for Model's predictions.
For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
display_name (str):
Optional. The display name of the Model. The name can be up to 128
characters long and can be consist of any UTF-8 characters.
project: Optional[str]=None,
Project to upload this model to. Overrides project set in
aiplatform.init.
location: Optional[str]=None,
Location to upload this model to. Overrides location set in
aiplatform.init.
credentials: Optional[auth_credentials.Credentials]=None,
Custom credentials to use to upload this model. Overrides credentials
set in aiplatform.init.
labels (Dict[str, str]):
Optional. The labels with user-defined metadata to
organize your Models.
Label keys and values can be no longer than 64
characters (Unicode codepoints), can only
contain lowercase letters, numeric characters,
underscores and dashes. International characters
are allowed.
See https://goo.gl/xmQnxf for more information
and examples of labels.
encryption_spec_key_name (Optional[str]):
Optional. The Cloud KMS resource identifier of the customer
managed encryption key used to protect the model. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created.
If set, this Model and all sub-resources of this Model will be secured by this key.
Overrides encryption_spec_key_name set in aiplatform.init.
staging_bucket (str):
Optional. Bucket to stage local model artifacts. Overrides
staging_bucket set in aiplatform.init.
upload_request_timeout (float):
Optional. The timeout for the upload request in seconds.
serving_container_deployment_timeout (int):
Optional. Deployment timeout in seconds.
serving_container_shared_memory_size_mb (int):
Optional. The amount of the VM memory to reserve as the shared
memory for the model in megabytes.
serving_container_startup_probe_exec (Sequence[str]):
Optional. Exec specifies the action to take. Used by startup
probe. An example of this argument would be
["cat", "/tmp/healthy"]
serving_container_startup_probe_period_seconds (int):
Optional. How often (in seconds) to perform the startup probe.
Default to 10 seconds. Minimum value is 1.
serving_container_startup_probe_timeout_seconds (int):
Optional. Number of seconds after which the startup probe times
out. Defaults to 1 second. Minimum value is 1.
serving_container_health_probe_exec (Sequence[str]):
Optional. Exec specifies the action to take. Used by health
probe. An example of this argument would be
["cat", "/tmp/healthy"]
serving_container_health_probe_period_seconds (int):
Optional. How often (in seconds) to perform the health probe.
Default to 10 seconds. Minimum value is 1.
serving_container_health_probe_timeout_seconds (int):
Optional. Number of seconds after which the health probe times
out. Defaults to 1 second. Minimum value is 1.
model_garden_source_model_name:
Optional. The model garden source model resource name if the
model is from Vertex Model Garden.
Returns:
model (aiplatform.Model):
Instantiated representation of the uploaded model resource.
Raises:
ValueError: If explanation_metadata is specified while explanation_parameters
is not.
Also if model directory does not contain a supported model file.
If `local_model` is specified but `serving_container_spec.image_uri`
in the `local_model` is None.
If `local_model` is not specified and `serving_container_image_uri`
is None.
"""
if not display_name:
display_name = cls._generate_display_name()
utils.validate_display_name(display_name)
if labels:
utils.validate_labels(labels)
appended_user_agent = None
if local_model:
container_spec = local_model.get_serving_container_spec()
appended_user_agent = [prediction_constants.CUSTOM_PREDICTION_ROUTINES]
elif not serving_container_image_uri and not artifact_uri:
# It's a referenced/place holder model.
container_spec = None
else:
if not serving_container_image_uri:
raise ValueError(
"The parameter `serving_container_image_uri` is required "
"if no `local_model` is provided."
)
env = None
ports = None
grpc_ports = None
deployment_timeout = (
duration_pb2.Duration(seconds=serving_container_deployment_timeout)
if serving_container_deployment_timeout
else None
)
startup_probe = None
health_probe = None
if serving_container_environment_variables:
env = [
gca_env_var_compat.EnvVar(name=str(key), value=str(value))
for key, value in serving_container_environment_variables.items()
]
if serving_container_ports:
ports = [
gca_model_compat.Port(container_port=port)
for port in serving_container_ports
]
if serving_container_grpc_ports:
grpc_ports = [
gca_model_compat.Port(container_port=port)
for port in serving_container_grpc_ports
]
if (
serving_container_startup_probe_exec
or serving_container_startup_probe_period_seconds
or serving_container_startup_probe_timeout_seconds
):
startup_probe_exec = None
if serving_container_startup_probe_exec:
startup_probe_exec = gca_model_compat.Probe.ExecAction(
command=serving_container_startup_probe_exec
)
startup_probe = gca_model_compat.Probe(
exec=startup_probe_exec,
period_seconds=serving_container_startup_probe_period_seconds,
timeout_seconds=serving_container_startup_probe_timeout_seconds,
)
if (
serving_container_health_probe_exec
or serving_container_health_probe_period_seconds
or serving_container_health_probe_timeout_seconds
):
health_probe_exec = None
if serving_container_health_probe_exec:
health_probe_exec = gca_model_compat.Probe.ExecAction(
command=serving_container_health_probe_exec
)
health_probe = gca_model_compat.Probe(
exec=health_probe_exec,
period_seconds=serving_container_health_probe_period_seconds,
timeout_seconds=serving_container_health_probe_timeout_seconds,
)
container_spec = gca_model_compat.ModelContainerSpec(
image_uri=serving_container_image_uri,
command=serving_container_command,
args=serving_container_args,
env=env,
ports=ports,
grpc_ports=grpc_ports,
predict_route=serving_container_predict_route,
health_route=serving_container_health_route,
deployment_timeout=deployment_timeout,
shared_memory_size_mb=serving_container_shared_memory_size_mb,
startup_probe=startup_probe,
health_probe=health_probe,
)
model_predict_schemata = None
if any([instance_schema_uri, parameters_schema_uri, prediction_schema_uri]):
model_predict_schemata = gca_model_compat.PredictSchemata(
instance_schema_uri=instance_schema_uri,
parameters_schema_uri=parameters_schema_uri,
prediction_schema_uri=prediction_schema_uri,
)
# TODO(b/182388545) initializer.global_config.get_encryption_spec from a sync function
encryption_spec = initializer.global_config.get_encryption_spec(
encryption_spec_key_name=encryption_spec_key_name,
)
parent_model = ModelRegistry._get_true_version_parent(
location=location, project=project, parent_model=parent_model
)
version_aliases = ModelRegistry._get_true_alias_list(
version_aliases=version_aliases, is_default_version=is_default_version
)
base_model_source = None
if model_garden_source_model_name:
base_model_source = gca_model_compat.Model.BaseModelSource(
model_garden_source=gca_model_compat.ModelGardenSource(
public_model_name=model_garden_source_model_name
)
)
managed_model = gca_model_compat.Model(
display_name=display_name,
description=description,
version_aliases=version_aliases,
version_description=version_description,
container_spec=container_spec,
predict_schemata=model_predict_schemata,
labels=labels,
encryption_spec=encryption_spec,
base_model_source=base_model_source,
)
if artifact_uri and not artifact_uri.startswith("gs://"):
model_dir = pathlib.Path(artifact_uri)
# Validating the model directory
if not model_dir.exists():
raise ValueError(f"artifact_uri path does not exist: '{artifact_uri}'")
PREBUILT_IMAGE_RE = "(us|europe|asia)-docker.pkg.dev/vertex-ai/prediction/"
if serving_container_image_uri and re.match(
PREBUILT_IMAGE_RE, serving_container_image_uri
):
if not model_dir.is_dir():
raise ValueError(
f"artifact_uri path must be a directory: '{artifact_uri}' when using prebuilt image '{serving_container_image_uri}'"
)
if not any(
(model_dir / file_name).exists()
for file_name in _SUPPORTED_MODEL_FILE_NAMES
):
raise ValueError(
"artifact_uri directory does not contain any supported model files. "
f"When using a prebuilt serving image, the upload method only supports the following model files: '{_SUPPORTED_MODEL_FILE_NAMES}'"
)
# Uploading the model
staged_data_uri = gcs_utils.stage_local_data_in_gcs(
data_path=str(model_dir),
staging_gcs_dir=staging_bucket,
project=project,
location=location,
credentials=credentials,
)
artifact_uri = staged_data_uri
if artifact_uri:
managed_model.artifact_uri = artifact_uri
managed_model.explanation_spec = (
_explanation_utils.create_and_validate_explanation_spec(
explanation_metadata=explanation_metadata,
explanation_parameters=explanation_parameters,
)
)
request = gca_model_service_compat.UploadModelRequest(
parent=initializer.global_config.common_location_path(project, location),
model=managed_model,
parent_model=parent_model,
model_id=model_id,
)
api_client = cls._instantiate_client(
location, credentials, appended_user_agent=appended_user_agent
)
lro = api_client.upload_model(
request=request,
timeout=upload_request_timeout,
)
_LOGGER.log_create_with_lro(cls, lro)
model_upload_response = lro.result()
this_model = cls(
model_upload_response.model, version=model_upload_response.model_version_id
)
_LOGGER.log_create_complete(cls, this_model._gca_resource, "model")
return this_model
def deploy(
self,
endpoint: Optional[Union["Endpoint", "PrivateEndpoint"]] = None,
deployed_model_display_name: Optional[str] = None,
traffic_percentage: Optional[int] = 0,
traffic_split: Optional[Dict[str, int]] = None,
machine_type: Optional[str] = None,
min_replica_count: int = 1,
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
service_account: Optional[str] = None,
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
explanation_parameters: Optional[
aiplatform.explain.ExplanationParameters
] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
encryption_spec_key_name: Optional[str] = None,
network: Optional[str] = None,
sync=True,
deploy_request_timeout: Optional[float] = None,
autoscaling_target_cpu_utilization: Optional[int] = None,
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
enable_access_logging=False,
disable_container_logging: bool = False,
private_service_connect_config: Optional[
PrivateEndpoint.PrivateServiceConnectConfig
] = None,
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
reservation_affinity_type: Optional[str] = None,
reservation_affinity_key: Optional[str] = None,
reservation_affinity_values: Optional[List[str]] = None,
spot: bool = False,
fast_tryout_enabled: bool = False,
system_labels: Optional[Dict[str, str]] = None,
required_replica_count: Optional[int] = 0,
) -> Union[Endpoint, PrivateEndpoint]:
"""Deploys model to endpoint. Endpoint will be created if unspecified.
Args:
endpoint (Union[Endpoint, PrivateEndpoint]):
Optional. Public or private Endpoint to deploy model to. If not specified,
endpoint display name will be model display name+'_endpoint'.
deployed_model_display_name (str):
Optional. The display name of the DeployedModel. If not provided
upon creation, the Model's display_name is used.
traffic_percentage (int):
Optional. Desired traffic to newly deployed model. Defaults to
0 if there are pre-existing deployed models. Defaults to 100 if
there are no pre-existing deployed models. Negative values should
not be provided. Traffic of previously deployed models at the endpoint
will be scaled down to accommodate new deployed model's traffic.
Should not be provided if traffic_split is provided.
traffic_split (Dict[str, int]):
Optional. A map from a DeployedModel's ID to the percentage of
this Endpoint's traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives
no traffic. The traffic percentage values must add up to 100, or
map must be empty if the Endpoint is to not accept any traffic at
the moment. Key for model being deployed is "0". Should not be
provided if traffic_percentage is provided.
machine_type (str):
Optional. The type of machine. Not specifying machine type will
result in model to be deployed with automatic resources.
min_replica_count (int):
Optional. The minimum number of machine replicas this deployed
model will be always deployed on. If traffic against it increases,
it may dynamically be deployed onto more replicas, and as traffic
decreases, some of these extra replicas may be freed.
max_replica_count (int):
Optional. The maximum number of replicas this deployed model may
be deployed on when the traffic against it increases. If requested
value is too large, the deployment will error, but if deployment
succeeds then the ability to scale the model to that many replicas
is guaranteed (barring service outages). If traffic against the
deployed model increases beyond what its replicas at maximum may
handle, a portion of the traffic will be dropped. If this value
is not provided, the smaller value of min_replica_count or 1 will
be used.
accelerator_type (str):
Optional. Hardware accelerator type. Must also set accelerator_count if used.
One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Requireid for CloudTPU multihost deployments.
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
specified, the container runs as a service account that doesn't have access
to the resource project.
Users deploying the Model must have the `iam.serviceAccounts.actAs`
permission on this service account.
explanation_metadata (aiplatform.explain.ExplanationMetadata):
Optional. Metadata describing the Model's input and output for explanation.
`explanation_metadata` is optional while `explanation_parameters` must be
specified when used.
For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
explanation_parameters (aiplatform.explain.ExplanationParameters):
Optional. Parameters to configure explaining for Model's predictions.
For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
encryption_spec_key_name (Optional[str]):
Optional. The Cloud KMS resource identifier of the customer
managed encryption key used to protect the model. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created.
If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key.
Overrides encryption_spec_key_name set in aiplatform.init.
network (str):
Optional. The full name of the Compute Engine network to which
the Endpoint, if created, will be peered to. E.g. "projects/12345/global/networks/myVPC"
Private services access must already be configured for the network.
If set or aiplatform.init(network=...) has been set, a PrivateEndpoint will be created.
If left unspecified, an Endpoint will be created. Read more about PrivateEndpoints
[in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints).
Cannot be set together with private_service_connect_config.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
deploy_request_timeout (float):
Optional. The timeout for the deploy request in seconds.
autoscaling_target_cpu_utilization (int):
Optional. Target CPU Utilization to use for Autoscaling Replicas.
A default value of 60 will be used if not specified.
autoscaling_target_accelerator_duty_cycle (int):
Optional. Target Accelerator Duty Cycle.
Must also set accelerator_type and accelerator_count if specified.
A default value of 60 will be used if not specified.
enable_access_logging (bool):
Whether to enable endpoint access logging. Defaults to False.
disable_container_logging (bool):
If True, container logs from the deployed model will not be
written to Cloud Logging. Defaults to False.
private_service_connect_config (PrivateEndpoint.PrivateServiceConnectConfig):
If true, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect).
Cannot be set together with network.
deployment_resource_pool (DeploymentResourcePool):
Resource pool where the model will be deployed. All models that
are deployed to the same DeploymentResourcePool will be hosted in
a shared model server. If provided, will override replica count
arguments.
reservation_affinity_type (str):
Optional. The type of reservation affinity.
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
reservation_affinity_key (str):
Optional. Corresponds to the label key of a reservation resource.
To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
and specify the name of your reservation as its value.
reservation_affinity_values (List[str]):
Optional. Corresponds to the label values of a reservation resource.
This must be the full resource name of the reservation.
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
spot (bool):
Optional. Whether to schedule the deployment workload on spot VMs.
fast_tryout_enabled (bool):
Optional. Defaults to False.
If True, model will be deployed using faster deployment path.
Useful for quick experiments. Not for production workloads. Only
available for most popular models with certain machine types.
system_labels (Dict[str, str]):
Optional. System labels to apply to Model Garden deployments.
System labels are managed by Google for internal use only.
required_replica_count (int):
Optional. Number of required available replicas for the
deployment to succeed. This field is only needed when partial
model deployment/mutation is desired, with a value greater than
or equal to 1 and fewer than or equal to min_replica_count. If
set, the model deploy/mutate operation will succeed once
available_replica_count reaches required_replica_count, and the
rest of the replicas will be retried.
Returns:
endpoint (Union[Endpoint, PrivateEndpoint]):
Endpoint with the deployed model.
Raises:
ValueError: If `traffic_split` is set for PrivateEndpoint.
"""
network = network or initializer.global_config.network
Endpoint._validate_deploy_args(
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
deployed_model_display_name=deployed_model_display_name,
traffic_split=traffic_split,
traffic_percentage=traffic_percentage,
deployment_resource_pool=deployment_resource_pool,
required_replica_count=required_replica_count,
)
if isinstance(endpoint, PrivateEndpoint):
if deployment_resource_pool:
raise ValueError(
"Model co-hosting is not supported for PrivateEndpoint. "
"Try calling deploy() without providing `deployment_resource_pool`."
)
if traffic_split and endpoint.network:
raise ValueError(
"Traffic splitting is not yet supported for PSA based PrivateEndpoint. "
"Try calling deploy() without providing `traffic_split`. "
"A maximum of one model can be deployed to each private Endpoint."
)
explanation_spec = _explanation_utils.create_and_validate_explanation_spec(
explanation_metadata=explanation_metadata,
explanation_parameters=explanation_parameters,
)
return self._deploy(
endpoint=endpoint,
deployed_model_display_name=deployed_model_display_name,
traffic_percentage=traffic_percentage,
traffic_split=traffic_split,
machine_type=machine_type,
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
tpu_topology=tpu_topology,
reservation_affinity_type=reservation_affinity_type,
reservation_affinity_key=reservation_affinity_key,
reservation_affinity_values=reservation_affinity_values,
service_account=service_account,
explanation_spec=explanation_spec,
metadata=metadata,
encryption_spec_key_name=encryption_spec_key_name
or initializer.global_config.encryption_spec_key_name,
network=network,
sync=sync,
deploy_request_timeout=deploy_request_timeout,
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
spot=spot,
enable_access_logging=enable_access_logging,
disable_container_logging=disable_container_logging,
private_service_connect_config=private_service_connect_config,
deployment_resource_pool=deployment_resource_pool,
fast_tryout_enabled=fast_tryout_enabled,
system_labels=system_labels,
required_replica_count=required_replica_count,
)
def _should_enable_dedicated_endpoint(self, fast_tryout_enabled: bool) -> bool:
"""Check if dedicated endpoint should be enabled for this endpoint.
Returns True if endpoint should be a dedicated endpoint.
"""
return fast_tryout_enabled
@base.optional_sync(return_input_arg="endpoint", bind_future_to_self=False)
def _deploy(
self,
endpoint: Optional[Union["Endpoint", "PrivateEndpoint"]] = None,
deployed_model_display_name: Optional[str] = None,
traffic_percentage: Optional[int] = 0,
traffic_split: Optional[Dict[str, int]] = None,
machine_type: Optional[str] = None,
min_replica_count: int = 1,
max_replica_count: int = 1,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
tpu_topology: Optional[str] = None,
reservation_affinity_type: Optional[str] = None,
reservation_affinity_key: Optional[str] = None,
reservation_affinity_values: Optional[List[str]] = None,
service_account: Optional[str] = None,
explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
metadata: Optional[Sequence[Tuple[str, str]]] = (),
encryption_spec_key_name: Optional[str] = None,
network: Optional[str] = None,
sync: bool = True,
deploy_request_timeout: Optional[float] = None,
autoscaling_target_cpu_utilization: Optional[int] = None,
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
spot: bool = False,
enable_access_logging=False,
disable_container_logging: bool = False,
private_service_connect_config: Optional[
PrivateEndpoint.PrivateServiceConnectConfig
] = None,
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
fast_tryout_enabled: bool = False,
system_labels: Optional[Dict[str, str]] = None,
required_replica_count: Optional[int] = 0,
) -> Union[Endpoint, PrivateEndpoint]:
"""Deploys model to endpoint. Endpoint will be created if unspecified.
Args:
endpoint (Union[Endpoint, PrivateEndpoint]):
Optional. Public or private Endpoint to deploy model to. If not specified,
endpoint display name will be model display name+'_endpoint'.
deployed_model_display_name (str):
Optional. The display name of the DeployedModel. If not provided
upon creation, the Model's display_name is used.
traffic_percentage (int):
Optional. Desired traffic to newly deployed model. Defaults to
0 if there are pre-existing deployed models. Defaults to 100 if
there are no pre-existing deployed models. Negative values should
not be provided. Traffic of previously deployed models at the endpoint
will be scaled down to accommodate new deployed model's traffic.
Should not be provided if traffic_split is provided.
traffic_split (Dict[str, int]):
Optional. A map from a DeployedModel's ID to the percentage of
this Endpoint's traffic that should be forwarded to that DeployedModel.
If a DeployedModel's ID is not listed in this map, then it receives
no traffic. The traffic percentage values must add up to 100, or
map must be empty if the Endpoint is to not accept any traffic at
the moment. Key for model being deployed is "0". Should not be
provided if traffic_percentage is provided.
machine_type (str):
Optional. The type of machine. Not specifying machine type will
result in model to be deployed with automatic resources.
min_replica_count (int):
Optional. The minimum number of machine replicas this deployed
model will be always deployed on. If traffic against it increases,
it may dynamically be deployed onto more replicas, and as traffic
decreases, some of these extra replicas may be freed.
max_replica_count (int):
Optional. The maximum number of replicas this deployed model may
be deployed on when the traffic against it increases. If requested
value is too large, the deployment will error, but if deployment
succeeds then the ability to scale the model to that many replicas
is guaranteed (barring service outages). If traffic against the
deployed model increases beyond what its replicas at maximum may
handle, a portion of the traffic will be dropped. If this value
is not provided, the smaller value of min_replica_count or 1 will
be used.
accelerator_type (str):
Optional. Hardware accelerator type. Must also set accelerator_count if used.
One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
accelerator_count (int):
Optional. The number of accelerators to attach to a worker replica.
tpu_topology (str):
Optional. The TPU topology to use for the DeployedModel.
Requireid for CloudTPU multihost deployments.
reservation_affinity_type (str):
Optional. The type of reservation affinity.
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
reservation_affinity_key (str):
Optional. Corresponds to the label key of a reservation resource.
To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
and specify the name of your reservation as its value.
reservation_affinity_values (List[str]):
Optional. Corresponds to the label values of a reservation resource.
This must be the full resource name of the reservation.
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
service_account (str):
The service account that the DeployedModel's container runs as. Specify the
email address of the service account. If this service account is not
specified, the container runs as a service account that doesn't have access
to the resource project.
Users deploying the Model must have the `iam.serviceAccounts.actAs`
permission on this service account.
explanation_spec (aiplatform.explain.ExplanationSpec):
Optional. Specification of Model explanation.
metadata (Sequence[Tuple[str, str]]):
Optional. Strings which should be sent along with the request as
metadata.
encryption_spec_key_name (Optional[str]):
Optional. The Cloud KMS resource identifier of the customer
managed encryption key used to protect the model. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created.
If set, this Model and all sub-resources of this Model will be secured by this key.
Overrides encryption_spec_key_name set in aiplatform.init
network (str):
Optional. The full name of the Compute Engine network to which
the Endpoint, if created, will be peered to. E.g. "projects/12345/global/networks/myVPC".
Private services access must already be configured for the network.
Read more about PrivateEndpoints
[in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints).
Cannot be set together with private_service_connect_config.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
deploy_request_timeout (float):
Optional. The timeout for the deploy request in seconds.
autoscaling_target_cpu_utilization (int):
Optional. Target CPU Utilization to use for Autoscaling Replicas.
A default value of 60 will be used if not specified.
autoscaling_target_accelerator_duty_cycle (int):
Optional. Target Accelerator Duty Cycle.
Must also set accelerator_type and accelerator_count if specified.
A default value of 60 will be used if not specified.
spot (bool):
Optional. Whether to schedule the deployment workload on spot VMs.
enable_access_logging (bool):
Whether to enable endpoint access logging. Defaults to False.
disable_container_logging (bool):
If True, container logs from the deployed model will not be
written to Cloud Logging. Defaults to False.
private_service_connect_config (PrivateEndpoint.PrivateServiceConnectConfig):
If true, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect).
Cannot be set together with network.
deployment_resource_pool (DeploymentResourcePool):
Optional. Resource pool where the model will be deployed. All models that
are deployed to the same DeploymentResourcePool will be hosted in
a shared model server. If provided, will override replica count
arguments.
fast_tryout_enabled (bool):
Optional. Defaults to False.
If True, model will be deployed using faster deployment path.
Useful for quick experiments. Not for production workloads. Only
available for most popular models with certain machine types.
system_labels (Dict[str, str]):
Optional. System labels to apply to Model Garden deployments.
System labels are managed by Google for internal use only.
required_replica_count (int):
Optional. Number of required available replicas for the
deployment to succeed. This field is only needed when partial
model deployment/mutation is desired, with a value greater than
or equal to 1 and fewer than or equal to min_replica_count. If
set, the model deploy/mutate operation will succeed once
available_replica_count reaches required_replica_count, and the
rest of the replicas will be retried.
Returns:
endpoint (Union[Endpoint, PrivateEndpoint]):
Endpoint with the deployed model.
"""
if endpoint is None:
display_name = self.display_name[:118] + "_endpoint"
if not network and not private_service_connect_config:
endpoint = Endpoint.create(
display_name=display_name,
project=self.project,
location=self.location,
credentials=self.credentials,
encryption_spec_key_name=encryption_spec_key_name,
dedicated_endpoint_enabled=self._should_enable_dedicated_endpoint(
fast_tryout_enabled
),
)
else:
endpoint = PrivateEndpoint.create(
display_name=display_name,
network=network,
project=self.project,
location=self.location,
credentials=self.credentials,
encryption_spec_key_name=encryption_spec_key_name,
private_service_connect_config=private_service_connect_config,
)
_LOGGER.log_action_start_against_resource("Deploying model to", "", endpoint)
endpoint._deploy_call(
endpoint.api_client,
endpoint.resource_name,
self,
endpoint._gca_resource.traffic_split,
network=network or endpoint.network,
deployed_model_display_name=deployed_model_display_name,
traffic_percentage=traffic_percentage,
traffic_split=traffic_split,
machine_type=machine_type,
min_replica_count=min_replica_count,
max_replica_count=max_replica_count,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
tpu_topology=tpu_topology,
reservation_affinity_type=reservation_affinity_type,
reservation_affinity_key=reservation_affinity_key,
reservation_affinity_values=reservation_affinity_values,
service_account=service_account,
explanation_spec=explanation_spec,
metadata=metadata,
deploy_request_timeout=deploy_request_timeout,
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
spot=spot,
enable_access_logging=enable_access_logging,
disable_container_logging=disable_container_logging,
deployment_resource_pool=deployment_resource_pool,
fast_tryout_enabled=fast_tryout_enabled,
system_labels=system_labels,
required_replica_count=required_replica_count,
)
_LOGGER.log_action_completed_against_resource("model", "deployed", endpoint)
endpoint._sync_gca_resource()
return endpoint
def batch_predict(
self,
job_display_name: Optional[str] = None,
gcs_source: Optional[Union[str, Sequence[str]]] = None,
bigquery_source: Optional[str] = None,
instances_format: str = "jsonl",
gcs_destination_prefix: Optional[str] = None,
bigquery_destination_prefix: Optional[str] = None,
predictions_format: str = "jsonl",
model_parameters: Optional[Dict] = None,
machine_type: Optional[str] = None,
accelerator_type: Optional[str] = None,
accelerator_count: Optional[int] = None,
starting_replica_count: Optional[int] = None,
max_replica_count: Optional[int] = None,
generate_explanation: Optional[bool] = False,
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
explanation_parameters: Optional[
aiplatform.explain.ExplanationParameters
] = None,
labels: Optional[Dict[str, str]] = None,
credentials: Optional[auth_credentials.Credentials] = None,
encryption_spec_key_name: Optional[str] = None,
sync: bool = True,
create_request_timeout: Optional[float] = None,
batch_size: Optional[int] = None,
service_account: Optional[str] = None,
) -> jobs.BatchPredictionJob:
"""Creates a batch prediction job using this Model and outputs
prediction results to the provided destination prefix in the specified
`predictions_format`. One source and one destination prefix are
required.
Example usage:
my_model.batch_predict(
job_display_name="prediction-123",
gcs_source="gs://example-bucket/instances.csv",
instances_format="csv",
bigquery_destination_prefix="projectId.bqDatasetId.bqTableId"
)
Args:
job_display_name (str):
Optional. The user-defined name of the BatchPredictionJob.
The name can be up to 128 characters long and can be consist
of any UTF-8 characters.
gcs_source: Optional[Sequence[str]] = None
Google Cloud Storage URI(-s) to your instances to run
batch prediction on. They must match `instances_format`.
bigquery_source: Optional[str] = None
BigQuery URI to a table, up to 2000 characters long. For example:
`bq://projectId.bqDatasetId.bqTableId`
instances_format: str = "jsonl"
The format in which instances are provided. Must be one
of the formats listed in `Model.supported_input_storage_formats`.
Default is "jsonl" when using `gcs_source`. If a `bigquery_source`
is provided, this is overridden to "bigquery".
gcs_destination_prefix: Optional[str] = None
The Google Cloud Storage location of the directory where the
output is to be written to. In the given directory a new
directory is created. Its name is
``prediction-<model-display-name>-<job-create-time>``, where
timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format.
Inside of it files ``predictions_0001.<extension>``,
``predictions_0002.<extension>``, ...,
``predictions_N.<extension>`` are created where
``<extension>`` depends on chosen ``predictions_format``,
and N may equal 0001 and depends on the total number of
successfully predicted instances. If the Model has both
``instance`` and ``prediction`` schemata defined then each such
file contains predictions as per the ``predictions_format``.
If prediction for any instance failed (partially or
completely), then an additional ``errors_0001.<extension>``,
``errors_0002.<extension>``,..., ``errors_N.<extension>``
files are created (N depends on total number of failed
predictions). These files contain the failed instances, as
per their schema, followed by an additional ``error`` field
which as value has ```google.rpc.Status`` <Status>`__
containing only ``code`` and ``message`` fields.
bigquery_destination_prefix: Optional[str] = None
The BigQuery URI to a project or table, up to 2000 characters long.
When only the project is specified, the Dataset and Table is created.
When the full table reference is specified, the Dataset must exist and
table must not exist. Accepted forms: ``bq://projectId`` or
``bq://projectId.bqDatasetId``. If no Dataset is specified,
a new one is created with the name
``prediction_<model-display-name>_<job-create-time>``
where the table name is made BigQuery-dataset-name compatible
(for example, most special characters become underscores), and
timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601"
format. In the dataset two tables will be created, ``predictions``,
and ``errors``. If the Model has both ``instance`` and
``prediction`` schemata defined then the tables have columns as
follows: The ``predictions`` table contains instances for which
the prediction succeeded, it has columns as per a concatenation
of the Model's instance and prediction schemata. The ``errors``
table contains rows for which the prediction has failed, it has
instance columns, as per the instance schema, followed by a single
"errors" column, which as values has ```google.rpc.Status`` <Status>`__
represented as a STRUCT, and containing only ``code`` and ``message``.
predictions_format: str = "jsonl"
Required. The format in which Vertex AI outputs the
predictions, must be one of the formats specified in
`Model.supported_output_storage_formats`.
Default is "jsonl" when using `gcs_destination_prefix`. If a
`bigquery_destination_prefix` is provided, this is overridden to
"bigquery".
model_parameters: Optional[Dict] = None
Optional. The parameters that govern the predictions. The schema of
the parameters may be specified via the Model's `parameters_schema_uri`.
machine_type: Optional[str] = None
Optional. The type of machine for running batch prediction on
dedicated resources. Not specifying machine type will result in
batch prediction job being run with automatic resources.
accelerator_type: Optional[str] = None
Optional. The type of accelerator(s) that may be attached
to the machine as per `accelerator_count`. Only used if
`machine_type` is set.
accelerator_count: Optional[int] = None
Optional. The number of accelerators to attach to the
`machine_type`. Only used if `machine_type` is set.
starting_replica_count: Optional[int] = None
The number of machine replicas used at the start of the batch
operation. If not set, Vertex AI decides starting number, not
greater than `max_replica_count`. Only used if `machine_type` is
set.
max_replica_count: Optional[int] = None
The maximum number of machine replicas the batch operation may
be scaled to. Only used if `machine_type` is set.
Default is 10.
generate_explanation (bool):
Optional. Generate explanation along with the batch prediction
results. This will cause the batch prediction output to include
explanations based on the `prediction_format`:
- `bigquery`: output includes a column named `explanation`. The value
is a struct that conforms to the [aiplatform.gapic.Explanation] object.
- `jsonl`: The JSON objects on each line include an additional entry
keyed `explanation`. The value of the entry is a JSON object that
conforms to the [aiplatform.gapic.Explanation] object.
- `csv`: Generating explanations for CSV format is not supported.
explanation_metadata (aiplatform.explain.ExplanationMetadata):
Optional. Explanation metadata configuration for this BatchPredictionJob.
Can be specified only if `generate_explanation` is set to `True`.
This value overrides the value of `Model.explanation_metadata`.
All fields of `explanation_metadata` are optional in the request. If
a field of the `explanation_metadata` object is not populated, the
corresponding field of the `Model.explanation_metadata` object is inherited.
For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
explanation_parameters (aiplatform.explain.ExplanationParameters):
Optional. Parameters to configure explaining for Model's predictions.
Can be specified only if `generate_explanation` is set to `True`.
This value overrides the value of `Model.explanation_parameters`.
All fields of `explanation_parameters` are optional in the request. If
a field of the `explanation_parameters` object is not populated, the
corresponding field of the `Model.explanation_parameters` object is inherited.
For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
labels: Optional[Dict[str, str]] = None
Optional. The labels with user-defined metadata to organize your
BatchPredictionJobs. Label keys and values can be no longer than
64 characters (Unicode codepoints), can only contain lowercase
letters, numeric characters, underscores and dashes.
International characters are allowed. See https://goo.gl/xmQnxf
for more information and examples of labels.
credentials: Optional[auth_credentials.Credentials] = None
Optional. Custom credentials to use to create this batch prediction
job. Overrides credentials set in aiplatform.init.
encryption_spec_key_name (Optional[str]):
Optional. The Cloud KMS resource identifier of the customer
managed encryption key used to protect the model. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created.
If set, this Model and all sub-resources of this Model will be secured by this key.
Overrides encryption_spec_key_name set in aiplatform.init.
create_request_timeout (float):
Optional. The timeout for the create request in seconds.
batch_size (int):
Optional. The number of the records (e.g. instances) of the operation given in each batch
to a machine replica. Machine type, and size of a single record should be considered
when setting this parameter, higher value speeds up the batch operation's execution,
but too high value will result in a whole batch not fitting in a machine's memory,
and the whole operation will fail.
The default value is 64.
service_account (str):
Optional. Specifies the service account for workload run-as account.
Users submitting jobs must have act-as permission on this run-as account.
Returns:
job (jobs.BatchPredictionJob):
Instantiated representation of the created batch prediction job.
"""
return jobs.BatchPredictionJob.create(
job_display_name=job_display_name,
model_name=self,
instances_format=instances_format,
predictions_format=predictions_format,
gcs_source=gcs_source,
bigquery_source=bigquery_source,
gcs_destination_prefix=gcs_destination_prefix,
bigquery_destination_prefix=bigquery_destination_prefix,
model_parameters=model_parameters,
machine_type=machine_type,
accelerator_type=accelerator_type,
accelerator_count=accelerator_count,
starting_replica_count=starting_replica_count,
max_replica_count=max_replica_count,
batch_size=batch_size,
generate_explanation=generate_explanation,
explanation_metadata=explanation_metadata,
explanation_parameters=explanation_parameters,
labels=labels,
project=self.project,
location=self.location,
credentials=credentials or self.credentials,
encryption_spec_key_name=encryption_spec_key_name,
sync=sync,
create_request_timeout=create_request_timeout,
service_account=service_account,
)
@classmethod
def list(
cls,
filter: Optional[str] = None,
order_by: Optional[str] = None,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
) -> List["models.Model"]:
"""List all Model resource instances.
Example Usage:
aiplatform.Model.list(
filter='labels.my_label="my_label_value" AND display_name="my_model"',
)
Args:
filter (str):
Optional. An expression for filtering the results of the request.
For field names both snake_case and camelCase are supported.
order_by (str):
Optional. A comma-separated list of fields to order by, sorted in
ascending order. Use "desc" after a field name for descending.
Supported fields: `display_name`, `create_time`, `update_time`
project (str):
Optional. Project to retrieve list from. If not set, project
set in aiplatform.init will be used.
location (str):
Optional. Location to retrieve list from. If not set, location
set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to retrieve list. Overrides
credentials set in aiplatform.init.
Returns:
List[models.Model]:
A list of Model resource objects
"""
return cls._list(
filter=filter,
order_by=order_by,
project=project,
location=location,
credentials=credentials,
)
@classmethod
def _construct_sdk_resource_from_gapic(
cls,
gapic_resource: gca_model_compat.Model,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
) -> "Model":
"""Override base._construct_sdk_resource_from_gapic to allow for setting
a ModelRegistry and resource_id_validator.
Args:
gapic_resource (gca_model_compat.Model):
A GAPIC representation of a Model resource.
project (str):
Optional. Project to construct SDK object from. If not set,
project set in aiplatform.init will be used.
location (str):
Optional. Location to construct SDK object from. If not set,
location set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use to construct SDK object.
Overrides credentials set in aiplatform.init.
Returns:
Model:
An initialized SDK Model object that represents the Model GAPIC type.
"""
sdk_resource = super()._construct_sdk_resource_from_gapic(
gapic_resource=gapic_resource,
project=project,
location=location,
credentials=credentials,
)
sdk_resource._resource_id_validator = super()._revisioned_resource_id_validator
sdk_resource._registry = ModelRegistry(
sdk_resource.resource_name,
location=location,
project=project,
credentials=credentials,
)
return sdk_resource
@base.optional_sync()
def _wait_on_export(self, operation_future: operation.Operation, sync=True) -> None:
operation_future.result()
def export_model(
self,
export_format_id: str,
artifact_destination: Optional[str] = None,
image_destination: Optional[str] = None,
sync: bool = True,
) -> Dict[str, str]:
"""Exports a trained, exportable Model to a location specified by the user.
A Model is considered to be exportable if it has at least one `supported_export_formats`.
Either `artifact_destination` or `image_destination` must be provided.
Example Usage:
my_model.export(
export_format_id="tf-saved-model",
artifact_destination="gs://my-bucket/models/"
)
or
my_model.export(
export_format_id="custom-model",
image_destination="us-central1-docker.pkg.dev/projectId/repo/image"
)
Args:
export_format_id (str):
Required. The ID of the format in which the Model must be exported.
The list of export formats that this Model supports can be found
by calling `Model.supported_export_formats`.
artifact_destination (str):
The Cloud Storage location where the Model artifact is to be
written to. Under the directory given as the destination a
new one with name
"``model-export-<model-display-name>-<timestamp-of-export-call>``",
where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601
format, will be created. Inside, the Model and any of its
supporting files will be written.
This field should only be set when, in [Model.supported_export_formats],
the value for the key given in `export_format_id` contains ``ARTIFACT``.
image_destination (str):
The Google Container Registry or Artifact Registry URI where
the Model container image will be copied to. Accepted forms:
- Google Container Registry path. For example:
``gcr.io/projectId/imageName:tag``.
- Artifact Registry path. For example:
``us-central1-docker.pkg.dev/projectId/repoName/imageName:tag``.
This field should only be set when, in [Model.supported_export_formats],
the value for the key given in `export_format_id` contains ``IMAGE``.
sync (bool):
Whether to execute this export synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
Returns:
output_info (Dict[str, str]):
Details of the completed export with output destination paths to
the artifacts or container image.
Raises:
ValueError: If model does not support exporting.
ValueError: If invalid arguments or export formats are provided.
"""
self.wait()
# Model does not support exporting
if not self.supported_export_formats:
raise ValueError(f"The model `{self.resource_name}` is not exportable.")
# No destination provided
if not any((artifact_destination, image_destination)):
raise ValueError(
"Please provide an `artifact_destination` or `image_destination`."
)
export_format_id = export_format_id.lower()
# Unsupported export type
if export_format_id not in self.supported_export_formats:
raise ValueError(
f"'{export_format_id}' is not a supported export format for this model. "
f"Choose one of the following: {self.supported_export_formats}"
)
content_types = gca_model_compat.Model.ExportFormat.ExportableContent
supported_content_types = self.supported_export_formats[export_format_id]
if (
artifact_destination
and content_types.ARTIFACT not in supported_content_types
):
raise ValueError(
"This model can not be exported as an artifact in '{export_format_id}' format. "
"Try exporting as a container image by passing the `image_destination` argument."
)
if image_destination and content_types.IMAGE not in supported_content_types:
raise ValueError(
"This model can not be exported as a container image in '{export_format_id}' format. "
"Try exporting the model artifacts by passing a `artifact_destination` argument."
)
# Construct request payload
output_config = gca_model_service_compat.ExportModelRequest.OutputConfig(
export_format_id=export_format_id
)
if artifact_destination:
output_config.artifact_destination = gca_io_compat.GcsDestination(
output_uri_prefix=artifact_destination
)
if image_destination:
output_config.image_destination = (
gca_io_compat.ContainerRegistryDestination(output_uri=image_destination)
)
_LOGGER.log_action_start_against_resource("Exporting", "model", self)
model_name = self.versioned_resource_name
operation_future = self.api_client.export_model(
name=model_name, output_config=output_config
)
_LOGGER.log_action_started_against_resource_with_lro(
"Export", "model", self.__class__, operation_future
)
# Block before returning
self._wait_on_export(operation_future=operation_future, sync=sync)
_LOGGER.log_action_completed_against_resource("model", "exported", self)
return json_format.MessageToDict(operation_future.metadata.output_info._pb)
@classmethod
@base.optional_sync()
def upload_xgboost_model_file(
cls,
model_file_path: str,
xgboost_version: Optional[str] = None,
display_name: Optional[str] = None,
description: Optional[str] = None,
model_id: Optional[str] = None,
parent_model: Optional[str] = None,
is_default_version: Optional[bool] = True,
version_aliases: Optional[Sequence[str]] = None,
version_description: Optional[str] = None,
instance_schema_uri: Optional[str] = None,
parameters_schema_uri: Optional[str] = None,
prediction_schema_uri: Optional[str] = None,
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
explanation_parameters: Optional[
aiplatform.explain.ExplanationParameters
] = None,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
labels: Optional[Dict[str, str]] = None,
encryption_spec_key_name: Optional[str] = None,
staging_bucket: Optional[str] = None,
sync=True,
upload_request_timeout: Optional[float] = None,
) -> "Model":
"""Uploads a model and returns a Model representing the uploaded Model
resource.
Example usage:
my_model = Model.upload_xgboost_model_file(
model_file_path="iris.xgboost_model.bst"
)
Args:
model_file_path (str): Required. Local file path of the model.
xgboost_version (str): Optional. The version of the XGBoost serving container.
Supported versions: ["0.82", "0.90", "1.1", "1.2", "1.3", "1.4"].
If the version is not specified, the latest version is used.
display_name (str):
Optional. The display name of the Model. The name can be up to 128
characters long and can be consist of any UTF-8 characters.
description (str):
The description of the model.
model_id (str):
Optional. The ID to use for the uploaded Model, which will
become the final component of the model resource name.
This value may be up to 63 characters, and valid characters
are `[a-z0-9_-]`. The first character cannot be a number or hyphen.
parent_model (str):
Optional. The resource name or model ID of an existing model that the
newly-uploaded model will be a version of.
Only set this field when uploading a new version of an existing model.
is_default_version (bool):
Optional. When set to True, the newly uploaded model version will
automatically have alias "default" included. Subsequent uses of
this model without a version specified will use this "default" version.
When set to False, the "default" alias will not be moved.
Actions targeting the newly-uploaded model version will need
to specifically reference this version by ID or alias.
New model uploads, i.e. version 1, will always be "default" aliased.
version_aliases (Sequence[str]):
Optional. User provided version aliases so that a model version
can be referenced via alias instead of auto-generated version ID.
A default version alias will be created for the first version of the model.
The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9]
version_description (str):
Optional. The description of the model version being uploaded.
instance_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the format of a single instance, which
are used in
``PredictRequest.instances``,
``ExplainRequest.instances``
and
``BatchPredictionJob.input_config``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform. Note: The URI given on output will be immutable
and probably different, including the URI scheme, than the
one given on input. The output URI will point to a location
where the user only has a read access.
parameters_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the parameters of prediction and
explanation via
``PredictRequest.parameters``,
``ExplainRequest.parameters``
and
``BatchPredictionJob.model_parameters``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform, if no parameters are supported it is set to an
empty string. Note: The URI given on output will be
immutable and probably different, including the URI scheme,
than the one given on input. The output URI will point to a
location where the user only has a read access.
prediction_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the format of a single prediction
produced by this Model, which are returned via
``PredictResponse.predictions``,
``ExplainResponse.explanations``,
and
``BatchPredictionJob.output_config``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform. Note: The URI given on output will be immutable
and probably different, including the URI scheme, than the
one given on input. The output URI will point to a location
where the user only has a read access.
explanation_metadata (aiplatform.explain.ExplanationMetadata):
Optional. Metadata describing the Model's input and output for explanation.
`explanation_metadata` is optional while `explanation_parameters` must be
specified when used.
For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
explanation_parameters (aiplatform.explain.ExplanationParameters):
Optional. Parameters to configure explaining for Model's predictions.
For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
project: Optional[str]=None,
Project to upload this model to. Overrides project set in
aiplatform.init.
location: Optional[str]=None,
Location to upload this model to. Overrides location set in
aiplatform.init.
credentials: Optional[auth_credentials.Credentials]=None,
Custom credentials to use to upload this model. Overrides credentials
set in aiplatform.init.
labels (Dict[str, str]):
Optional. The labels with user-defined metadata to
organize your Models.
Label keys and values can be no longer than 64
characters (Unicode codepoints), can only
contain lowercase letters, numeric characters,
underscores and dashes. International characters
are allowed.
See https://goo.gl/xmQnxf for more information
and examples of labels.
encryption_spec_key_name (Optional[str]):
Optional. The Cloud KMS resource identifier of the customer
managed encryption key used to protect the model. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created.
If set, this Model and all sub-resources of this Model will be secured by this key.
Overrides encryption_spec_key_name set in aiplatform.init.
staging_bucket (str):
Optional. Bucket to stage local model artifacts. Overrides
staging_bucket set in aiplatform.init.
upload_request_timeout (float):
Optional. The timeout for the upload request in seconds.
Returns:
model (aiplatform.Model):
Instantiated representation of the uploaded model resource.
Raises:
ValueError: If model directory does not contain a supported model file.
"""
if not display_name:
display_name = cls._generate_display_name("XGBoost model")
XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS = [
".pkl",
".joblib",
".bst",
]
container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri(
region=location,
framework="xgboost",
framework_version=xgboost_version or "1.4",
accelerator="cpu",
)
model_file_path_obj = pathlib.Path(model_file_path)
if not model_file_path_obj.is_file():
raise ValueError(
f"model_file_path path must point to a file: '{model_file_path}'"
)
model_file_extension = model_file_path_obj.suffix
if model_file_extension not in XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS:
_LOGGER.warning(
f"Only the following XGBoost model file extensions are currently supported: '{XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS}'"
)
_LOGGER.warning(
"Treating the model file as a binary serialized XGBoost Booster."
)
model_file_extension = ".bst"
# Preparing model directory
# We cannot clean up the directory immediately after calling Model.upload since
# that call may be asynchronous and return before the model file has been read.
# To work around this, we make this method asynchronous (decorate with @base.optional_sync)
# but call Model.upload with sync=True.
with tempfile.TemporaryDirectory() as prepared_model_dir:
prepared_model_file_path = pathlib.Path(prepared_model_dir) / (
"model" + model_file_extension
)
shutil.copy(model_file_path_obj, prepared_model_file_path)
return cls.upload(
serving_container_image_uri=container_image_uri,
artifact_uri=prepared_model_dir,
display_name=display_name,
description=description,
model_id=model_id,
parent_model=parent_model,
is_default_version=is_default_version,
version_aliases=version_aliases,
version_description=version_description,
instance_schema_uri=instance_schema_uri,
parameters_schema_uri=parameters_schema_uri,
prediction_schema_uri=prediction_schema_uri,
explanation_metadata=explanation_metadata,
explanation_parameters=explanation_parameters,
project=project,
location=location,
credentials=credentials,
labels=labels,
encryption_spec_key_name=encryption_spec_key_name,
staging_bucket=staging_bucket,
sync=True,
upload_request_timeout=upload_request_timeout,
)
@classmethod
@base.optional_sync()
def upload_scikit_learn_model_file(
cls,
model_file_path: str,
sklearn_version: Optional[str] = None,
display_name: Optional[str] = None,
description: Optional[str] = None,
model_id: Optional[str] = None,
parent_model: Optional[str] = None,
is_default_version: Optional[bool] = True,
version_aliases: Optional[Sequence[str]] = None,
version_description: Optional[str] = None,
instance_schema_uri: Optional[str] = None,
parameters_schema_uri: Optional[str] = None,
prediction_schema_uri: Optional[str] = None,
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
explanation_parameters: Optional[
aiplatform.explain.ExplanationParameters
] = None,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
labels: Optional[Dict[str, str]] = None,
encryption_spec_key_name: Optional[str] = None,
staging_bucket: Optional[str] = None,
sync=True,
upload_request_timeout: Optional[float] = None,
) -> "Model":
"""Uploads a model and returns a Model representing the uploaded Model
resource.
Example usage:
my_model = Model.upload_scikit_learn_model_file(
model_file_path="iris.sklearn_model.joblib"
)
Args:
model_file_path (str): Required. Local file path of the model.
sklearn_version (str):
Optional. The version of the Scikit-learn serving container.
Supported versions: ["0.20", "0.22", "0.23", "0.24", "1.0"].
If the version is not specified, the latest version is used.
display_name (str):
Optional. The display name of the Model. The name can be up to 128
characters long and can be consist of any UTF-8 characters.
description (str):
The description of the model.
model_id (str):
Optional. The ID to use for the uploaded Model, which will
become the final component of the model resource name.
This value may be up to 63 characters, and valid characters
are `[a-z0-9_-]`. The first character cannot be a number or hyphen.
parent_model (str):
Optional. The resource name or model ID of an existing model that the
newly-uploaded model will be a version of.
Only set this field when uploading a new version of an existing model.
is_default_version (bool):
Optional. When set to True, the newly uploaded model version will
automatically have alias "default" included. Subsequent uses of
this model without a version specified will use this "default" version.
When set to False, the "default" alias will not be moved.
Actions targeting the newly-uploaded model version will need
to specifically reference this version by ID or alias.
New model uploads, i.e. version 1, will always be "default" aliased.
version_aliases (Sequence[str]):
Optional. User provided version aliases so that a model version
can be referenced via alias instead of auto-generated version ID.
A default version alias will be created for the first version of the model.
The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9]
version_description (str):
Optional. The description of the model version being uploaded.
instance_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the format of a single instance, which
are used in
``PredictRequest.instances``,
``ExplainRequest.instances``
and
``BatchPredictionJob.input_config``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform. Note: The URI given on output will be immutable
and probably different, including the URI scheme, than the
one given on input. The output URI will point to a location
where the user only has a read access.
parameters_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the parameters of prediction and
explanation via
``PredictRequest.parameters``,
``ExplainRequest.parameters``
and
``BatchPredictionJob.model_parameters``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform, if no parameters are supported it is set to an
empty string. Note: The URI given on output will be
immutable and probably different, including the URI scheme,
than the one given on input. The output URI will point to a
location where the user only has a read access.
prediction_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the format of a single prediction
produced by this Model, which are returned via
``PredictResponse.predictions``,
``ExplainResponse.explanations``,
and
``BatchPredictionJob.output_config``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform. Note: The URI given on output will be immutable
and probably different, including the URI scheme, than the
one given on input. The output URI will point to a location
where the user only has a read access.
explanation_metadata (aiplatform.explain.ExplanationMetadata):
Optional. Metadata describing the Model's input and output for explanation.
`explanation_metadata` is optional while `explanation_parameters` must be
specified when used.
For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
explanation_parameters (aiplatform.explain.ExplanationParameters):
Optional. Parameters to configure explaining for Model's predictions.
For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
project: Optional[str]=None,
Project to upload this model to. Overrides project set in
aiplatform.init.
location: Optional[str]=None,
Location to upload this model to. Overrides location set in
aiplatform.init.
credentials: Optional[auth_credentials.Credentials]=None,
Custom credentials to use to upload this model. Overrides credentials
set in aiplatform.init.
labels (Dict[str, str]):
Optional. The labels with user-defined metadata to
organize your Models.
Label keys and values can be no longer than 64
characters (Unicode codepoints), can only
contain lowercase letters, numeric characters,
underscores and dashes. International characters
are allowed.
See https://goo.gl/xmQnxf for more information
and examples of labels.
encryption_spec_key_name (Optional[str]):
Optional. The Cloud KMS resource identifier of the customer
managed encryption key used to protect the model. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created.
If set, this Model and all sub-resources of this Model will be secured by this key.
Overrides encryption_spec_key_name set in aiplatform.init.
staging_bucket (str):
Optional. Bucket to stage local model artifacts. Overrides
staging_bucket set in aiplatform.init.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
upload_request_timeout (float):
Optional. The timeout for the upload request in seconds.
Returns:
model (aiplatform.Model):
Instantiated representation of the uploaded model resource.
Raises:
ValueError: If explanation_metadata is specified while explanation_parameters
is not. Also if model directory does not contain a supported model file.
"""
if not display_name:
display_name = cls._generate_display_name("Scikit-Learn model")
SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS = [
".pkl",
".joblib",
]
container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri(
region=location,
framework="sklearn",
framework_version=sklearn_version or "1.0",
accelerator="cpu",
)
model_file_path_obj = pathlib.Path(model_file_path)
if not model_file_path_obj.is_file():
raise ValueError(
f"model_file_path path must point to a file: '{model_file_path}'"
)
model_file_extension = model_file_path_obj.suffix
if model_file_extension not in SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS:
_LOGGER.warning(
f"Only the following Scikit-learn model file extensions are currently supported: '{SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS}'"
)
_LOGGER.warning(
"Treating the model file as a pickle serialized Scikit-learn model."
)
model_file_extension = ".pkl"
# Preparing model directory
# We cannot clean up the directory immediately after calling Model.upload since
# that call may be asynchronous and return before the model file has been read.
# To work around this, we make this method asynchronous (decorate with @base.optional_sync)
# but call Model.upload with sync=True.
with tempfile.TemporaryDirectory() as prepared_model_dir:
prepared_model_file_path = pathlib.Path(prepared_model_dir) / (
"model" + model_file_extension
)
shutil.copy(model_file_path_obj, prepared_model_file_path)
return cls.upload(
serving_container_image_uri=container_image_uri,
artifact_uri=prepared_model_dir,
display_name=display_name,
description=description,
model_id=model_id,
parent_model=parent_model,
is_default_version=is_default_version,
version_aliases=version_aliases,
version_description=version_description,
instance_schema_uri=instance_schema_uri,
parameters_schema_uri=parameters_schema_uri,
prediction_schema_uri=prediction_schema_uri,
explanation_metadata=explanation_metadata,
explanation_parameters=explanation_parameters,
project=project,
location=location,
credentials=credentials,
labels=labels,
encryption_spec_key_name=encryption_spec_key_name,
staging_bucket=staging_bucket,
sync=True,
upload_request_timeout=upload_request_timeout,
)
@classmethod
def upload_tensorflow_saved_model(
cls,
saved_model_dir: str,
tensorflow_version: Optional[str] = None,
use_gpu: bool = False,
display_name: Optional[str] = None,
description: Optional[str] = None,
model_id: Optional[str] = None,
parent_model: Optional[str] = None,
is_default_version: Optional[bool] = True,
version_aliases: Optional[Sequence[str]] = None,
version_description: Optional[str] = None,
instance_schema_uri: Optional[str] = None,
parameters_schema_uri: Optional[str] = None,
prediction_schema_uri: Optional[str] = None,
explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
explanation_parameters: Optional[
aiplatform.explain.ExplanationParameters
] = None,
project: Optional[str] = None,
location: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
labels: Optional[Dict[str, str]] = None,
encryption_spec_key_name: Optional[str] = None,
staging_bucket: Optional[str] = None,
sync=True,
upload_request_timeout: Optional[str] = None,
) -> "Model":
"""Uploads a model and returns a Model representing the uploaded Model
resource.
Example usage:
my_model = Model.upload_scikit_learn_model_file(
model_file_path="iris.tensorflow_model.SavedModel"
)
Args:
saved_model_dir (str): Required.
Local directory of the Tensorflow SavedModel.
tensorflow_version (str):
Optional. The version of the Tensorflow serving container.
Supported versions: ["0.15", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7"].
If the version is not specified, the latest version is used.
use_gpu (bool): Whether to use GPU for model serving.
display_name (str):
Optional. The display name of the Model. The name can be up to 128
characters long and can be consist of any UTF-8 characters.
description (str):
The description of the model.
model_id (str):
Optional. The ID to use for the uploaded Model, which will
become the final component of the model resource name.
This value may be up to 63 characters, and valid characters
are `[a-z0-9_-]`. The first character cannot be a number or hyphen.
parent_model (str):
Optional. The resource name or model ID of an existing model that the
newly-uploaded model will be a version of.
Only set this field when uploading a new version of an existing model.
is_default_version (bool):
Optional. When set to True, the newly uploaded model version will
automatically have alias "default" included. Subsequent uses of
this model without a version specified will use this "default" version.
When set to False, the "default" alias will not be moved.
Actions targeting the newly-uploaded model version will need
to specifically reference this version by ID or alias.
New model uploads, i.e. version 1, will always be "default" aliased.
version_aliases (Sequence[str]):
Optional. User provided version aliases so that a model version
can be referenced via alias instead of auto-generated version ID.
A default version alias will be created for the first version of the model.
The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9]
version_description (str):
Optional. The description of the model version being uploaded.
instance_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the format of a single instance, which
are used in
``PredictRequest.instances``,
``ExplainRequest.instances``
and
``BatchPredictionJob.input_config``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform. Note: The URI given on output will be immutable
and probably different, including the URI scheme, than the
one given on input. The output URI will point to a location
where the user only has a read access.
parameters_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the parameters of prediction and
explanation via
``PredictRequest.parameters``,
``ExplainRequest.parameters``
and
``BatchPredictionJob.model_parameters``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform, if no parameters are supported it is set to an
empty string. Note: The URI given on output will be
immutable and probably different, including the URI scheme,
than the one given on input. The output URI will point to a
location where the user only has a read access.
prediction_schema_uri (str):
Optional. Points to a YAML file stored on Google Cloud
Storage describing the format of a single prediction
produced by this Model, which are returned via
``PredictResponse.predictions``,
``ExplainResponse.explanations``,
and
``BatchPredictionJob.output_config``.
The schema is defined as an OpenAPI 3.0.2 `Schema
Object <https://tinyurl.com/y538mdwt#schema-object>`__.
AutoML Models always have this field populated by AI
Platform. Note: The URI given on output will be immutable
and probably different, including the URI scheme, than the
one given on input. The output URI will point to a location
where the user only has a read access.
explanation_metadata (aiplatform.explain.ExplanationMetadata):
Optional. Metadata describing the Model's input and output for explanation.
`explanation_metadata` is optional while `explanation_parameters` must be
specified when used.
For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
explanation_parameters (aiplatform.explain.ExplanationParameters):
Optional. Parameters to configure explaining for Model's predictions.
For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
project: Optional[str]=None,
Project to upload this model to. Overrides project set in
aiplatform.init.
location: Optional[str]=None,
Location to upload this model to. Overrides location set in
aiplatform.init.
credentials: Optional[auth_credentials.Credentials]=None,
Custom credentials to use to upload this model. Overrides credentials
set in aiplatform.init.
labels (Dict[str, str]):
Optional. The labels with user-defined metadata to
organize your Models.
Label keys and values can be no longer than 64
characters (Unicode codepoints), can only
contain lowercase letters, numeric characters,
underscores and dashes. International characters
are allowed.
See https://goo.gl/xmQnxf for more information
and examples of labels.
encryption_spec_key_name (Optional[str]):
Optional. The Cloud KMS resource identifier of the customer
managed encryption key used to protect the model. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created.
If set, this Model and all sub-resources of this Model will be secured by this key.
Overrides encryption_spec_key_name set in aiplatform.init.
staging_bucket (str):
Optional. Bucket to stage local model artifacts. Overrides
staging_bucket set in aiplatform.init.
sync (bool):
Whether to execute this method synchronously. If False, this method
will be executed in concurrent Future and any downstream object will
be immediately returned and synced when the Future has completed.
upload_request_timeout (float):
Optional. The timeout for the upload request in seconds.
Returns:
model (aiplatform.Model):
Instantiated representation of the uploaded model resource.
Raises:
ValueError: If explanation_metadata is specified while explanation_parameters
is not. Also if model directory does not contain a supported model file.
"""
if not display_name:
display_name = cls._generate_display_name("Tensorflow model")
container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri(
region=location,
framework="tensorflow",
framework_version=tensorflow_version or "2.7",
accelerator="gpu" if use_gpu else "cpu",
)
return cls.upload(
serving_container_image_uri=container_image_uri,
artifact_uri=saved_model_dir,
display_name=display_name,
description=description,
model_id=model_id,
parent_model=parent_model,
is_default_version=is_default_version,
version_aliases=version_aliases,
version_description=version_description,
instance_schema_uri=instance_schema_uri,
parameters_schema_uri=parameters_schema_uri,
prediction_schema_uri=prediction_schema_uri,
explanation_metadata=explanation_metadata,
explanation_parameters=explanation_parameters,
project=project,
location=location,
credentials=credentials,
labels=labels,
encryption_spec_key_name=encryption_spec_key_name,
staging_bucket=staging_bucket,
sync=sync,
upload_request_timeout=upload_request_timeout,
)
# TODO(b/273499620): Add async support.
def copy(
self,
destination_location: str,
destination_model_id: Optional[str] = None,
destination_parent_model: Optional[str] = None,
encryption_spec_key_name: Optional[str] = None,
copy_request_timeout: Optional[float] = None,
) -> "Model":
"""Copys a model and returns a Model representing the copied Model
resource. This method is a blocking call.
Example usage:
copied_model = my_model.copy(
destination_location="us-central1"
)
Args:
destination_location (str):
The destination location to copy the model to.
destination_model_id (str):
Optional. The ID to use for the copied Model, which will
become the final component of the model resource name.
This value may be up to 63 characters, and valid characters
are `[a-z0-9_-]`. The first character cannot be a number or hyphen.
Only set this field when copying as a new model. If this field is not set,
a numeric model id will be generated.
destination_parent_model (str):
Optional. The resource name or model ID of an existing model that the
newly-copied model will be a version of.
Only set this field when copying as a new version of an existing model.
encryption_spec_key_name (Optional[str]):
Optional. The Cloud KMS resource identifier of the customer
managed encryption key used to protect the model. Has the
form:
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
The key needs to be in the same region as where the compute
resource is created.
If set, this Model and all sub-resources of this Model will be secured by this key.
Overrides encryption_spec_key_name set in aiplatform.init.
copy_request_timeout (float):
Optional. The timeout for the copy request in seconds.
Returns:
model (aiplatform.Model):
Instantiated representation of the copied model resource.
Raises:
ValueError: If both `destination_model_id` and `destination_parent_model` are set.
"""
if destination_model_id is not None and destination_parent_model is not None:
raise ValueError(
"`destination_model_id` and `destination_parent_model` can not be set together."
)
parent = initializer.global_config.common_location_path(
initializer.global_config.project, destination_location
)
source_model = self.versioned_resource_name
destination_parent_model = ModelRegistry._get_true_version_parent(
parent_model=destination_parent_model,
project=initializer.global_config.project,
location=destination_location,
)
encryption_spec = initializer.global_config.get_encryption_spec(
encryption_spec_key_name=encryption_spec_key_name,
)
if destination_model_id is not None:
request = gca_model_service_compat.CopyModelRequest(
parent=parent,
source_model=source_model,
model_id=destination_model_id,
encryption_spec=encryption_spec,
)
else:
request = gca_model_service_compat.CopyModelRequest(
parent=parent,
source_model=source_model,
parent_model=destination_parent_model,
encryption_spec=encryption_spec,
)
api_client = initializer.global_config.create_client(
client_class=utils.ModelClientWithOverride,
location_override=destination_location,
credentials=initializer.global_config.credentials,
)
_LOGGER.log_action_start_against_resource("Copying", "", self)
lro = api_client.copy_model(
request=request,
timeout=copy_request_timeout,
)
_LOGGER.log_action_started_against_resource_with_lro(
"Copy", "", self.__class__, lro
)
model_copy_response = lro.result(timeout=None)
this_model = models.Model(
model_copy_response.model,
version=model_copy_response.model_version_id,
location=destination_location,
)
_LOGGER.log_action_completed_against_resource("", "copied", this_model)
return this_model
def list_model_evaluations(
self,
) -> List["model_evaluation.ModelEvaluation"]:
"""List all Model Evaluation resources associated with this model.
If this Model resource was instantiated with a version, the Model
Evaluation resources for that version will be returned. If no version
was provided when the Model resource was instantiated, Model Evaluation
resources will be returned for the default version.
Example Usage:
my_model = Model(
model_name="projects/123/locations/us-central1/models/456@1"
)
my_evaluations = my_model.list_model_evaluations()
Returns:
List[model_evaluation.ModelEvaluation]:
List of ModelEvaluation resources for the model.
"""
return model_evaluation.ModelEvaluation._list(
parent=self.versioned_resource_name,
credentials=self.credentials,
)
def get_model_evaluation(
self,
evaluation_id: Optional[str] = None,
) -> Optional[model_evaluation.ModelEvaluation]:
"""Returns a ModelEvaluation resource and instantiates its representation.
If no evaluation_id is passed, it will return the first evaluation associated
with this model. If the aiplatform.Model resource was instantiated with a
version, this will return a Model Evaluation from that version. If no version
was specified when instantiating the Model resource, this will return an
Evaluation from the default version.
Example usage:
my_model = Model(
model_name="projects/123/locations/us-central1/models/456"
)
my_evaluation = my_model.get_model_evaluation(
evaluation_id="789"
)
# If no arguments are passed, this method returns the first evaluation for the model
my_evaluation = my_model.get_model_evaluation()
Args:
evaluation_id (str):
Optional. The ID of the model evaluation to retrieve.
Returns:
model_evaluation.ModelEvaluation:
Instantiated representation of the ModelEvaluation resource.
"""
evaluations = self.list_model_evaluations()
if not evaluation_id:
if len(evaluations) > 1:
_LOGGER.warning(
f"Your model has more than one model evaluation, this is returning only one evaluation resource: {evaluations[0].resource_name}"
)
_ipython_utils.display_model_evaluation_button(evaluations[0])
return evaluations[0]
else:
resource_uri_parts = self._parse_resource_name(self.resource_name)
evaluation_resource_name = (
model_evaluation.ModelEvaluation._format_resource_name(
**resource_uri_parts,
evaluation=evaluation_id,
)
)
evaluation = model_evaluation.ModelEvaluation(
evaluation_name=evaluation_resource_name,
credentials=self.credentials,
)
_ipython_utils.display_model_evaluation_button(evaluation)
return evaluation
def evaluate(
self,
prediction_type: str,
target_field_name: str,
gcs_source_uris: Optional[List[str]] = None,
bigquery_source_uri: Optional[str] = None,
bigquery_destination_output_uri: Optional[str] = None,
class_labels: Optional[List[str]] = None,
prediction_label_column: Optional[str] = None,
prediction_score_column: Optional[str] = None,
staging_bucket: Optional[str] = None,
service_account: Optional[str] = None,
generate_feature_attributions: bool = False,
evaluation_pipeline_display_name: Optional[str] = None,
evaluation_metrics_display_name: Optional[str] = None,
network: Optional[str] = None,
encryption_spec_key_name: Optional[str] = None,
experiment: Optional[Union[str, "aiplatform.Experiment"]] = None,
enable_caching: Optional[bool] = None,
) -> "model_evaluation._ModelEvaluationJob":
"""Creates a model evaluation job running on Vertex Pipelines and returns the resulting
ModelEvaluationJob resource.
Example usage:
```
my_model = Model(
model_name="projects/123/locations/us-central1/models/456"
)
my_evaluation_job = my_model.evaluate(
prediction_type="classification",
target_field_name="type",
data_source_uris=["gs://sdk-model-eval/my-prediction-data.csv"],
staging_bucket="gs://my-staging-bucket/eval_pipeline_root",
)
my_evaluation_job.wait()
my_evaluation = my_evaluation_job.get_model_evaluation()
my_evaluation.metrics
```
Args:
prediction_type (str):
Required. The problem type being addressed by this evaluation run. 'classification' and 'regression'
are the currently supported problem types.
target_field_name (str):
Required. The column name of the field containing the label for this prediction task.
gcs_source_uris (List[str]):
Optional. A list of Cloud Storage data files containing the ground truth data to use for this
evaluation job. These files should contain your model's prediction column. Currently only Google Cloud Storage
urls are supported, for example: "gs://path/to/your/data.csv". The provided data files must be
either CSV or JSONL. One of `gcs_source_uris` or `bigquery_source_uri` is required.
bigquery_source_uri (str):
Optional. A bigquery table URI containing the ground truth data to use for this evaluation job. This uri should
be in the format 'bq://my-project-id.dataset.table'. One of `gcs_source_uris` or `bigquery_source_uri` is
required.
bigquery_destination_output_uri (str):
Optional. A bigquery table URI where the Batch Prediction job associated with your Model Evaluation will write
prediction output. This can be a BigQuery URI to a project ('bq://my-project'), a dataset
('bq://my-project.my-dataset'), or a table ('bq://my-project.my-dataset.my-table'). Required if `bigquery_source_uri`
is provided.
class_labels (List[str]):
Optional. For custom (non-AutoML) classification models, a list of possible class names, in the
same order that predictions are generated. This argument is required when prediction_type is 'classification'.
For example, in a classification model with 3 possible classes that are outputted in the format: [0.97, 0.02, 0.01]
with the class names "cat", "dog", and "fish", the value of `class_labels` should be `["cat", "dog", "fish"]` where
the class "cat" corresponds with 0.97 in the example above.
prediction_label_column (str):
Optional. The column name of the field containing classes the model is scoring. Formatted to be able to find nested
columns, delimited by `.`. If not set, defaulted to `prediction.classes` for classification.
prediction_score_column (str):
Optional. The column name of the field containing batch prediction scores. Formatted to be able to find nested columns,
delimited by `.`. If not set, defaulted to `prediction.scores` for a `classification` problem_type, `prediction.value`
for a `regression` problem_type.
staging_bucket (str):
Optional. The GCS directory to use for staging files from this evaluation job. Defaults to the value set in
aiplatform.init(staging_bucket=...) if not provided. Required if staging_bucket is not set in aiplatform.init().
service_account (str):
Specifies the service account for workload run-as account for this Model Evaluation PipelineJob.
Users submitting jobs must have act-as permission on this run-as account. The service account running
this Model Evaluation job needs the following permissions: Dataflow Worker, Storage Admin,
Vertex AI Administrator, and Vertex AI Service Agent.
generate_feature_attributions (boolean):
Optional. Whether the model evaluation job should generate feature attributions. Defaults to False if not specified.
evaluation_pipeline_display_name (str):
Optional. The display name of your model evaluation job. This is the display name that will be applied to the
Vertex Pipeline run for your evaluation job. If not set, a display name will be generated automatically.
evaluation_metrics_display_name (str):
Optional. The display name of the model evaluation resource uploaded to Vertex from your Model Evaluation pipeline.
network (str):
The full name of the Compute Engine network to which the job
should be peered. For example, projects/12345/global/networks/myVPC.
Private services access must already be configured for the network.
If left unspecified, the job is not peered with any network.
encryption_spec_key_name (str):
Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the job. Has the
form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same
region as where the compute resource is created. If this is set, then all
resources created by the PipelineJob for this Model Evaluation will be encrypted with the provided encryption key.
If not specified, encryption_spec of original PipelineJob will be used.
experiment (Union[str, experiments_resource.Experiment]):
Optional. The Vertex AI experiment name or instance to associate to the PipelineJob executing
this model evaluation job. Metrics produced by the PipelineJob as system.Metric Artifacts
will be associated as metrics to the provided experiment, and parameters from this PipelineJob
will be associated as parameters to the provided experiment.
enable_caching (bool):
Optional. Whether to turn on caching for the run.
If this is not set, defaults to the compile time settings, which
are True for all tasks by default, while users may specify
different caching options for individual tasks.
If this is set, the setting applies to all tasks in the pipeline.
Overrides the compile time settings.
Returns:
model_evaluation.ModelEvaluationJob: Instantiated representation of the
_ModelEvaluationJob.
Raises:
ValueError:
If staging_bucket was not set in aiplatform.init() and staging_bucket was not provided.
If the provided `prediction_type` is not valid.
If the provided `data_source_uris` don't start with 'gs://'.
"""
if (gcs_source_uris is None) == (bigquery_source_uri is None):
raise ValueError(
"Exactly one of `gcs_source_uris` or `bigquery_source_uri` must be provided."
)
if isinstance(gcs_source_uris, str):
gcs_source_uris = [gcs_source_uris]
if bigquery_source_uri and not isinstance(bigquery_source_uri, str):
raise ValueError("The provided `bigquery_source_uri` must be a string.")
if bigquery_source_uri and not bigquery_destination_output_uri:
raise ValueError(
"`bigquery_destination_output_uri` must be provided if `bigquery_source_uri` is used as the data source."
)
if gcs_source_uris is not None and not all(
uri.startswith("gs://") for uri in gcs_source_uris
):
raise ValueError("`gcs_source_uris` must start with 'gs://'.")
if bigquery_source_uri is not None and not bigquery_source_uri.startswith(
"bq://"
):
raise ValueError(
"`bigquery_source_uri` and `bigquery_destination_output_uri` must start with 'bq://'"
)
if (
bigquery_destination_output_uri is not None
and not bigquery_destination_output_uri.startswith("bq://")
):
raise ValueError(
"`bigquery_source_uri` and `bigquery_destination_output_uri` must start with 'bq://'"
)
SUPPORTED_INSTANCES_FORMAT_FILE_EXTENSIONS = [".jsonl", ".csv"]
if not staging_bucket and initializer.global_config.staging_bucket:
staging_bucket = initializer.global_config.staging_bucket
elif not staging_bucket and not initializer.global_config.staging_bucket:
raise ValueError(
"Please provide `evaluation_staging_bucket` when calling evaluate or set one using aiplatform.init(staging_bucket=...)"
)
if prediction_type not in _SUPPORTED_EVAL_PREDICTION_TYPES:
raise ValueError(
f"Please provide a supported model prediction type, one of: {_SUPPORTED_EVAL_PREDICTION_TYPES}."
)
if generate_feature_attributions:
if not self._gca_resource.explanation_spec:
raise ValueError(
"To generate feature attributions with your evaluation, call evaluate on a model with an explanation spec. To run evaluation on the current model, call evaluate with `generate_feature_attributions=False`."
)
instances_format = None
if gcs_source_uris:
data_file_path_obj = pathlib.Path(gcs_source_uris[0])
data_file_extension = data_file_path_obj.suffix
if data_file_extension not in SUPPORTED_INSTANCES_FORMAT_FILE_EXTENSIONS:
_LOGGER.warning(
f"Only the following data file extensions are currently supported: '{SUPPORTED_INSTANCES_FORMAT_FILE_EXTENSIONS}'"
)
else:
instances_format = data_file_extension[1:]
elif bigquery_source_uri:
instances_format = "bigquery"
if (
self._gca_resource.metadata_schema_uri
== "https://storage.googleapis.com/google-cloud-aiplatform/schema/model/metadata/automl_tabular_1.0.0.yaml"
):
model_type = "automl_tabular"
else:
model_type = "other"
if (
model_type == "other"
and prediction_type == "classification"
and not class_labels
):
raise ValueError(
"Please provide `class_labels` when running evaluation on a custom classification model."
)
return model_evaluation._ModelEvaluationJob.submit(
model_name=self.versioned_resource_name,
prediction_type=prediction_type,
target_field_name=target_field_name,
gcs_source_uris=gcs_source_uris,
bigquery_source_uri=bigquery_source_uri,
batch_predict_bigquery_destination_output_uri=bigquery_destination_output_uri,
class_labels=class_labels,
prediction_label_column=prediction_label_column,
prediction_score_column=prediction_score_column,
service_account=service_account,
pipeline_root=staging_bucket,
instances_format=instances_format,
model_type=model_type,
generate_feature_attributions=generate_feature_attributions,
evaluation_pipeline_display_name=evaluation_pipeline_display_name,
evaluation_metrics_display_name=evaluation_metrics_display_name,
network=network,
encryption_spec_key_name=encryption_spec_key_name,
credentials=self.credentials,
experiment=experiment,
enable_caching=enable_caching,
)
# TODO (b/232546878): Async support
class ModelRegistry:
def __init__(
self,
model: Union[Model, str],
location: Optional[str] = None,
project: Optional[str] = None,
credentials: Optional[auth_credentials.Credentials] = None,
):
"""Creates a ModelRegistry instance for version management of a registered model.
Args:
model (Union[Model, str]):
Required. One of the following:
1. A Model instance
2. A fully-qualified model resource name
3. A model ID. A location and project must be provided.
location (str):
Optional. The model location. Used when passing a model name as model.
If not set, project set in aiplatform.init will be used.
project (str):
Optional. The model project. Used when passing a model name as model.
If not set, project set in aiplatform.init will be used.
credentials (auth_credentials.Credentials):
Optional. Custom credentials to use with model access. If not set,
credentials set in aiplatform.init will be used.
"""
if isinstance(model, Model):
self.model_resource_name = model.resource_name
else:
self.model_resource_name = utils.full_resource_name(
resource_name=model,
resource_noun="models",
parse_resource_name_method=Model._parse_resource_name,
format_resource_name_method=Model._format_resource_name,
project=project,
location=location,
resource_id_validator=base.VertexAiResourceNoun._revisioned_resource_id_validator,
)
self.credentials = credentials or (
model.credentials
if isinstance(model, Model)
else initializer.global_config.credentials
)
self.client = Model._instantiate_client(location, self.credentials)
def get_model(
self,
version: Optional[str] = None,
) -> Model:
"""Gets a registered model with optional version.
Args:
version (str):
Optional. A model version ID or alias to target.
Defaults to the model with the "default" alias.
Returns:
Model: An instance of a Model from this ModelRegistry.
"""
return Model(
self.model_resource_name, version=version, credentials=self.credentials
)
def list_versions(
self,
filter: Optional[str] = None,
) -> List[VersionInfo]:
"""Lists the versions and version info of a model.
Args:
filter (str):
Optional. An expression for filtering the results of the request.
For field names both snake_case and camelCase are supported.
- `labels` supports general map functions that is:
- `labels.key=value` - key:value equality
- `labels.key:* or labels:key - key existence
- A key including a space must be quoted.
`labels."a key"`.
Some examples:
- `labels.myKey="myValue"`
Returns:
List[VersionInfo]:
A list of VersionInfo, each containing
info about specific model versions.
"""
_LOGGER.info(f"Getting versions for {self.model_resource_name}")
request = gca_model_service_compat.ListModelVersionsRequest(
name=self.model_resource_name,
filter=filter,
)
page_result = self.client.list_model_versions(
request=request,
)
versions = [
VersionInfo(
version_id=model.version_id,
version_create_time=model.version_create_time,
version_update_time=model.version_update_time,
model_display_name=model.display_name,
model_resource_name=self._parse_versioned_name(model.name)[0],
version_aliases=model.version_aliases,
version_description=model.version_description,
)
for model in page_result
]
return versions
def get_version_info(
self,
version: str,
) -> VersionInfo:
"""Gets information about a specific model version.
Args:
version (str): Required. The model version to obtain info for.
Returns:
VersionInfo: Contains info about the model version.
"""
_LOGGER.info(f"Getting version {version} info for {self.model_resource_name}")
model = self.client.get_model(
name=self._get_versioned_name(self.model_resource_name, version),
)
return VersionInfo(
version_id=model.version_id,
version_create_time=model.version_create_time,
version_update_time=model.version_update_time,
model_display_name=model.display_name,
model_resource_name=self._parse_versioned_name(model.name)[0],
version_aliases=model.version_aliases,
version_description=model.version_description,
)
def delete_version(
self,
version: str,
) -> None:
"""Deletes a model version from the registry.
Cannot delete a version if it is the last remaining version.
Use Model.delete() in that case.
Args:
version (str): Required. The model version ID or alias to delete.
"""
lro = self.client.delete_model_version(
name=self._get_versioned_name(self.model_resource_name, version),
)
_LOGGER.info(f"Deleting version {version} for {self.model_resource_name}")
lro.result()
_LOGGER.info(f"Deleted version {version} for {self.model_resource_name}")
def update_version(
self,
version: str,
version_description: Optional[str] = None,
labels: Optional[Dict[str, str]] = None,
) -> None:
"""Updates a model version.
Args:
version (str): Required. The version ID to receive the new alias(es).
version_description (str):
The description of the model version.
labels (Dict[str, str]):
Optional. The labels with user-defined metadata to
organize your Model versions.
Label keys and values can be no longer than 64
characters (Unicode codepoints), can only
contain lowercase letters, numeric characters,
underscores and dashes. International characters
are allowed.
See https://goo.gl/xmQnxf for more information
and examples of labels.
Raises:
ValueError: If `labels` is not the correct format.
"""
current_model_proto = self.get_model(version).gca_resource
copied_model_proto = current_model_proto.__class__(current_model_proto)
update_mask: List[str] = []
if version_description:
copied_model_proto.version_description = version_description
update_mask.append("version_description")
if labels:
utils.validate_labels(labels)
copied_model_proto.labels = labels
update_mask.append("labels")
update_mask = field_mask_pb2.FieldMask(paths=update_mask)
versioned_name = self._get_versioned_name(self.model_resource_name, version)
_LOGGER.info(f"Updating model {versioned_name}")
self.client.update_model(
model=copied_model_proto,
update_mask=update_mask,
)
_LOGGER.info(f"Completed updating model {versioned_name}")
def add_version_aliases(
self,
new_aliases: List[str],
version: str,
) -> None:
"""Adds version alias(es) to a model version.
Args:
new_aliases (List[str]): Required. The alias(es) to add to a model version.
version (str): Required. The version ID to receive the new alias(es).
"""
self._merge_version_aliases(
version_aliases=new_aliases,
version=version,
)
def remove_version_aliases(
self,
target_aliases: List[str],
version: str,
) -> None:
"""Removes version alias(es) from a model version.
Args:
target_aliases (List[str]): Required. The alias(es) to remove from a model version.
version (str): Required. The version ID to be stripped of the target alias(es).
"""
self._merge_version_aliases(
version_aliases=[f"-{alias}" for alias in target_aliases],
version=version,
)
def _merge_version_aliases(
self,
version_aliases: List[str],
version: str,
) -> None:
"""Merges a list of version aliases with a model's existing alias list.
Args:
version_aliases (List[str]): Required. The version alias change list.
version (str): Required. The version ID to have its alias list changed.
"""
_LOGGER.info(f"Merging version aliases for {self.model_resource_name}")
self.client.merge_version_aliases(
name=self._get_versioned_name(self.model_resource_name, version),
version_aliases=version_aliases,
)
_LOGGER.info(
f"Completed merging version aliases for {self.model_resource_name}"
)
@staticmethod
def _get_versioned_name(
resource_name: str,
version: Optional[str] = None,
) -> str:
"""Creates a versioned form of a model resource name.
Args:
resource_name (str): Required. A fully-qualified resource name or resource ID.
version (str): Optional. The version or alias of the resource.
Returns:
versioned_name (str): The versioned resource name in revisioned format.
"""
if version:
return f"{resource_name}@{version}"
return resource_name
@staticmethod
def _parse_versioned_name(
model_name: str,
) -> Tuple[str, Optional[str]]:
"""Return a model name and, if included in the model name, a model version.
Args:
model_name (str): Required. A fully-qualified model name or model ID,
optionally with an included version.
Returns:
parsed_version_name (Tuple[str, Optional[str]]):
A tuple containing the model name or ID as the first element,
and the model version as the second element, if present in `model_name`.
Raises:
ValueError: If the `model_name` is invalid and contains too many '@' symbols.
"""
if "@" not in model_name:
return model_name, None
elif model_name.count("@") > 1:
raise ValueError(
f"Received an invalid model_name with too many `@`s: {model_name}"
)
else:
return model_name.split("@")
@staticmethod
def _get_true_version_parent(
parent_model: Optional[str] = None,
project: Optional[str] = None,
location: Optional[str] = None,
) -> Optional[str]:
"""Gets the true `parent_model` with full resource name.
Args:
parent_model (str): Optional. A fully-qualified resource name or resource ID
of the model that would be the parent of another model.
project (str): Optional. The project of `parent_model`, if not included in `parent_model`.
location (str): Optional. The location of `parent_model`, if not included in `parent_model`.
Returns:
true_parent_model (str):
Optional. The true resource name of the parent model, if one should exist.
"""
if parent_model:
existing_resource = utils.full_resource_name(
resource_name=parent_model,
resource_noun="models",
parse_resource_name_method=Model._parse_resource_name,
format_resource_name_method=Model._format_resource_name,
project=project,
location=location,
)
parent_model = existing_resource
return parent_model
@staticmethod
def _get_true_alias_list(
version_aliases: Optional[Sequence[str]] = None,
is_default_version: bool = True,
) -> Optional[Sequence[str]]:
"""Gets the true `version_aliases` list based on `is_default_version`.
Args:
version_aliases (Sequence[str]): Optional. The user-provided list of model aliases.
is_default_version (bool):
Optional. When set, includes the "default" alias in `version_aliases`.
Defaults to True.
Returns:
true_alias_list (Sequence[str]):
Optional: The true alias list, should one exist,
containing "default" if specified.
"""
if is_default_version:
if version_aliases and "default" not in version_aliases:
version_aliases.append("default")
elif not version_aliases:
version_aliases = ["default"]
return version_aliases