# -*- coding: utf-8 -*- # Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import itertools import json import pathlib import re import shutil import tempfile import requests from typing import ( Any, Dict, Iterator, List, NamedTuple, Optional, Sequence, Tuple, TYPE_CHECKING, Union, ) from google.api_core import operation from google.api_core import exceptions as api_exceptions from google.auth import credentials as auth_credentials from google.auth.transport import requests as google_auth_requests from google.protobuf import duration_pb2 import proto from google.cloud import aiplatform from google.cloud.aiplatform import base from google.cloud.aiplatform import constants from google.cloud.aiplatform import explain from google.cloud.aiplatform import initializer from google.cloud.aiplatform import jobs from google.cloud.aiplatform import models from google.cloud.aiplatform import utils from google.cloud.aiplatform.utils import gcs_utils from google.cloud.aiplatform.utils import _explanation_utils from google.cloud.aiplatform.utils import _ipython_utils from google.cloud.aiplatform import model_evaluation from google.cloud.aiplatform.compat.services import endpoint_service_client from google.cloud.aiplatform.compat.services import ( deployment_resource_pool_service_client, ) from google.cloud.aiplatform.compat.types import ( deployment_resource_pool as gca_deployment_resource_pool_compat, deployed_model_ref as gca_deployed_model_ref_compat, encryption_spec as gca_encryption_spec, endpoint as gca_endpoint_compat, explanation as gca_explanation_compat, io as gca_io_compat, machine_resources as gca_machine_resources_compat, model as gca_model_compat, model_service as gca_model_service_compat, env_var as gca_env_var_compat, service_networking as gca_service_networking, ) from google.cloud.aiplatform.constants import ( prediction as prediction_constants, ) from google.cloud.aiplatform_v1.types import model as model_v1 from google.protobuf import field_mask_pb2, timestamp_pb2 from google.protobuf import json_format if TYPE_CHECKING: from google.cloud.aiplatform.prediction import LocalModel _DEFAULT_MACHINE_TYPE = "n1-standard-2" _DEPLOYING_MODEL_TRAFFIC_SPLIT_KEY = "0" _SUCCESSFUL_HTTP_RESPONSE = 300 _RAW_PREDICT_DEPLOYED_MODEL_ID_KEY = "X-Vertex-AI-Deployed-Model-Id" _RAW_PREDICT_MODEL_RESOURCE_KEY = "X-Vertex-AI-Model" _RAW_PREDICT_MODEL_VERSION_ID_KEY = "X-Vertex-AI-Model-Version-Id" _LOGGER = base.Logger(__name__) _SUPPORTED_MODEL_FILE_NAMES = [ "model.pkl", "model.joblib", "model.bst", "model.mar", "saved_model.pb", "saved_model.pbtxt", ] _SUPPORTED_EVAL_PREDICTION_TYPES = [ "classification", "regression", ] class VersionInfo(NamedTuple): """VersionInfo class envelopes returned Model version information. Attributes: version_id: The version ID of the model. create_time: Timestamp when this Model version was uploaded into Vertex AI. update_time: Timestamp when this Model version was most recently updated. model_display_name: The user-defined name of the model this version belongs to. model_resource_name: The fully-qualified model resource name. e.g. projects/{project}/locations/{location}/models/{model_display_name} version_aliases: User provided version aliases so that a model version can be referenced via alias (i.e. projects/{project}/locations/{location}/models/{model_display_name}@{version_alias}). Default is None. version_description: The description of this version. Default is None. """ version_id: str version_create_time: timestamp_pb2.Timestamp version_update_time: timestamp_pb2.Timestamp model_display_name: str model_resource_name: str version_aliases: Optional[Sequence[str]] = None version_description: Optional[str] = None class Prediction(NamedTuple): """Prediction class envelopes returned Model predictions and the Model id. Attributes: predictions: The predictions that are the output of the predictions call. The schema of any single prediction may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] deployed_model_id: ID of the Endpoint's DeployedModel that served this prediction. metadata: The metadata that is the output of the predictions call. model_version_id: ID of the DeployedModel's version that served this prediction. model_resource_name: The fully-qualified resource name of the model that served this prediction. explanations: The explanations of the Model's predictions. It has the same number of elements as instances to be explained. Default is None. """ predictions: List[Any] deployed_model_id: str metadata: Optional[Any] = None model_version_id: Optional[str] = None model_resource_name: Optional[str] = None explanations: Optional[Sequence[gca_explanation_compat.Explanation]] = None class DeploymentResourcePool(base.VertexAiResourceNounWithFutureManager): client_class = utils.DeploymentResourcePoolClientWithOverride _resource_noun = "deploymentResourcePools" _getter_method = "get_deployment_resource_pool" _list_method = "list_deployment_resource_pools" _delete_method = "delete_deployment_resource_pool" _parse_resource_name_method = "parse_deployment_resource_pool_path" _format_resource_name_method = "deployment_resource_pool_path" def __init__( self, deployment_resource_pool_name: str, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ): """Retrieves a DeploymentResourcePool. Args: deployment_resource_pool_name (str): Required. The fully-qualified resource name or ID of the deployment resource pool. Example: "projects/123/locations/us-central1/deploymentResourcePools/456" or "456" when project and location are initialized or passed. project (str): Optional. Project containing the deployment resource pool to retrieve. If not set, the project given to `aiplatform.init` will be used. location (str): Optional. Location containing the deployment resource pool to retrieve. If not set, the location given to `aiplatform.init` will be used. credentials: Optional[auth_credentials.Credentials]=None, Custom credentials to use to retrieve the deployment resource pool. If not set, the credentials given to `aiplatform.init` will be used. """ super().__init__( project=project, location=location, credentials=credentials, resource_name=deployment_resource_pool_name, ) deployment_resource_pool_name = utils.full_resource_name( resource_name=deployment_resource_pool_name, resource_noun=self._resource_noun, parse_resource_name_method=self._parse_resource_name, format_resource_name_method=self._format_resource_name, project=project, location=location, ) self._gca_resource = self._get_gca_resource( resource_name=deployment_resource_pool_name ) @classmethod def create( cls, deployment_resource_pool_id: str, project: Optional[str] = None, location: Optional[str] = None, metadata: Sequence[Tuple[str, str]] = (), credentials: Optional[auth_credentials.Credentials] = None, machine_type: Optional[str] = None, min_replica_count: int = 1, max_replica_count: int = 1, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, sync=True, create_request_timeout: Optional[float] = None, reservation_affinity_type: Optional[str] = None, reservation_affinity_key: Optional[str] = None, reservation_affinity_values: Optional[List[str]] = None, spot: bool = False, required_replica_count: Optional[int] = 0, ) -> "DeploymentResourcePool": """Creates a new DeploymentResourcePool. Args: deployment_resource_pool_id (str): Required. User-specified name for the new deployment resource pool. project (str): Optional. Project containing the deployment resource pool to retrieve. If not set, the project given to `aiplatform.init` will be used. location (str): Optional. Location containing the deployment resource pool to retrieve. If not set, the location given to `aiplatform.init` will be used. metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. credentials: Optional[auth_credentials.Credentials]=None, Optional. Custom credentials to use to retrieve the deployment resource pool. If not set, the credentials given to `aiplatform.init` will be used. machine_type (str): Optional. Machine type to use for the deployment resource pool. If not set, the default machine type of `n1-standard-2` is used. min_replica_count (int): Optional. The minimum replica count of the new deployment resource pool. Each replica serves a copy of each model deployed on the deployment resource pool. If this value is less than `max_replica_count`, then autoscaling is enabled, and the actual number of replicas will be adjusted to bring resource usage in line with the autoscaling targets. max_replica_count (int): Optional. The maximum replica count of the new deployment resource pool. accelerator_type (str): Optional. Hardware accelerator type. Must also set accelerator_ count if used. One of NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4, or NVIDIA_TESLA_A100. accelerator_count (int): Optional. The number of accelerators attached to each replica. autoscaling_target_cpu_utilization (int): Optional. Target CPU utilization value for autoscaling. A default value of 60 will be used if not specified. autoscaling_target_accelerator_duty_cycle (int): Optional. Target accelerator duty cycle percentage to use for autoscaling. Must also set accelerator_type and accelerator count if specified. A default value of 60 will be used if accelerators are requested and this is not specified. sync (bool): Optional. Whether to execute this method synchronously. If False, this method will be executed in a concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. create_request_timeout (float): Optional. The create request timeout in seconds. reservation_affinity_type (str): Optional. The type of reservation affinity. One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION reservation_affinity_key (str): Optional. Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key and specify the name of your reservation as its value. reservation_affinity_values (List[str]): Optional. Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation. Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}' spot (bool): Optional. Whether to schedule the deployment workload on spot VMs. required_replica_count (int): Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial model deployment/mutation is desired, with a value greater than or equal to 1 and fewer than or equal to min_replica_count. If set, the model deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. Returns: DeploymentResourcePool """ api_client = cls._instantiate_client(location=location, credentials=credentials) project = project or initializer.global_config.project location = location or initializer.global_config.location return cls._create( api_client=api_client, deployment_resource_pool_id=deployment_resource_pool_id, project=project, location=location, metadata=metadata, credentials=credentials, machine_type=machine_type, min_replica_count=min_replica_count, max_replica_count=max_replica_count, accelerator_type=accelerator_type, accelerator_count=accelerator_count, reservation_affinity_type=reservation_affinity_type, reservation_affinity_key=reservation_affinity_key, reservation_affinity_values=reservation_affinity_values, autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, spot=spot, sync=sync, create_request_timeout=create_request_timeout, required_replica_count=required_replica_count, ) @classmethod @base.optional_sync() def _create( cls, api_client: deployment_resource_pool_service_client.DeploymentResourcePoolServiceClient, deployment_resource_pool_id: str, project: Optional[str] = None, location: Optional[str] = None, metadata: Sequence[Tuple[str, str]] = (), credentials: Optional[auth_credentials.Credentials] = None, machine_type: Optional[str] = None, min_replica_count: int = 1, max_replica_count: int = 1, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, reservation_affinity_type: Optional[str] = None, reservation_affinity_key: Optional[str] = None, reservation_affinity_values: Optional[List[str]] = None, autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, spot: bool = False, sync=True, create_request_timeout: Optional[float] = None, required_replica_count: Optional[int] = 0, ) -> "DeploymentResourcePool": """Creates a new DeploymentResourcePool. Args: api_client (DeploymentResourcePoolServiceClient): Required. DeploymentResourcePoolServiceClient used to make the underlying CreateDeploymentResourcePool API call. deployment_resource_pool_id (str): Required. User-specified name for the new deployment resource pool. project (str): Optional. Project containing the deployment resource pool to retrieve. If not set, the project given to `aiplatform.init` will be used. location (str): Optional. Location containing the deployment resource pool to retrieve. If not set, the location given to `aiplatform.init` will be used. metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. credentials: Optional[auth_credentials.Credentials]=None, Optional. Custom credentials to use to retrieve the deployment resource pool. If not set, the credentials given to `aiplatform.init` will be used. machine_type (str): Optional. Machine type to use for the deployment resource pool. If not set, the default machine type of `n1-standard-2` is used. min_replica_count (int): Optional. The minimum replica count of the new deployment resource pool. Each replica serves a copy of each model deployed on the deployment resource pool. If this value is less than `max_replica_count`, then autoscaling is enabled, and the actual number of replicas will be adjusted to bring resource usage in line with the autoscaling targets. max_replica_count (int): Optional. The maximum replica count of the new deployment resource pool. accelerator_type (str): Optional. Hardware accelerator type. Must also set accelerator_ count if used. One of NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4, or NVIDIA_TESLA_A100. accelerator_count (int): Optional. The number of accelerators attached to each replica. reservation_affinity_type (str): Optional. The type of reservation affinity. One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION reservation_affinity_key (str): Optional. Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key and specify the name of your reservation as its value. reservation_affinity_values (List[str]): Optional. Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation. Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}' autoscaling_target_cpu_utilization (int): Optional. Target CPU utilization value for autoscaling. A default value of 60 will be used if not specified. autoscaling_target_accelerator_duty_cycle (int): Optional. Target accelerator duty cycle percentage to use for autoscaling. Must also set accelerator_type and accelerator count if specified. A default value of 60 will be used if accelerators are requested and this is not specified. spot (bool): Optional. Whether to schedule the deployment workload on spot VMs. sync (bool): Optional. Whether to execute this method synchronously. If False, this method will be executed in a concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. create_request_timeout (float): Optional. The create request timeout in seconds. required_replica_count (int): Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial model deployment/mutation is desired, with a value greater than or equal to 1 and fewer than or equal to min_replica_count. If set, the model deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. Returns: DeploymentResourcePool """ parent = initializer.global_config.common_location_path( project=project, location=location ) dedicated_resources = gca_machine_resources_compat.DedicatedResources( min_replica_count=min_replica_count, max_replica_count=max_replica_count, spot=spot, required_replica_count=required_replica_count, ) machine_spec = gca_machine_resources_compat.MachineSpec( machine_type=machine_type ) if autoscaling_target_cpu_utilization: autoscaling_metric_spec = ( gca_machine_resources_compat.AutoscalingMetricSpec( metric_name=( "aiplatform.googleapis.com/prediction/online/cpu/utilization" ), target=autoscaling_target_cpu_utilization, ) ) dedicated_resources.autoscaling_metric_specs.extend( [autoscaling_metric_spec] ) if accelerator_type and accelerator_count: utils.validate_accelerator_type(accelerator_type) machine_spec.accelerator_type = accelerator_type machine_spec.accelerator_count = accelerator_count if autoscaling_target_accelerator_duty_cycle: autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec( metric_name="aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle", target=autoscaling_target_accelerator_duty_cycle, ) dedicated_resources.autoscaling_metric_specs.extend( [autoscaling_metric_spec] ) if reservation_affinity_type: machine_spec.reservation_affinity = utils.get_reservation_affinity( reservation_affinity_type, reservation_affinity_key, reservation_affinity_values, ) dedicated_resources.machine_spec = machine_spec gapic_drp = gca_deployment_resource_pool_compat.DeploymentResourcePool( dedicated_resources=dedicated_resources ) operation_future = api_client.create_deployment_resource_pool( parent=parent, deployment_resource_pool=gapic_drp, deployment_resource_pool_id=deployment_resource_pool_id, metadata=metadata, timeout=create_request_timeout, ) _LOGGER.log_create_with_lro(cls, operation_future) created_drp = operation_future.result() _LOGGER.log_create_complete(cls, created_drp, "deployment resource pool") return cls._construct_sdk_resource_from_gapic( gapic_resource=created_drp, project=project, location=location, credentials=credentials, ) def query_deployed_models( self, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ) -> List[gca_deployed_model_ref_compat.DeployedModelRef]: """Lists the deployed models using this resource pool. Args: project (str): Optional. Project to retrieve list from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to retrieve list from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to retrieve list. Overrides credentials set in aiplatform.init. Returns: List of DeployedModelRef objects containing the endpoint ID and deployed model ID of the deployed models using this resource pool. """ location = location or initializer.global_config.location api_client = DeploymentResourcePool._instantiate_client( location=location, credentials=credentials ) response = api_client.query_deployed_models( deployment_resource_pool=self.resource_name ) return list( itertools.chain(page.deployed_model_refs for page in response.pages) ) @classmethod def list( cls, filter: Optional[str] = None, order_by: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ) -> List["models.DeploymentResourcePool"]: """Lists the deployment resource pools. filter (str): Optional. An expression for filtering the results of the request. For field names both snake_case and camelCase are supported. order_by (str): Optional. A comma-separated list of fields to order by, sorted in ascending order. Use "desc" after a field name for descending. Supported fields: `display_name`, `create_time`, `update_time` project (str): Optional. Project to retrieve list from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to retrieve list from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to retrieve list. Overrides credentials set in aiplatform.init. Returns: List of deployment resource pools. """ return cls._list( filter=filter, order_by=order_by, project=project, location=location, credentials=credentials, ) class Endpoint(base.VertexAiResourceNounWithFutureManager, base.PreviewMixin): client_class = utils.EndpointClientWithOverride _resource_noun = "endpoints" _getter_method = "get_endpoint" _list_method = "list_endpoints" _delete_method = "delete_endpoint" _parse_resource_name_method = "parse_endpoint_path" _format_resource_name_method = "endpoint_path" _preview_class = "google.cloud.aiplatform.aiplatform.preview.models.Endpoint" @property def preview(self): """Return an Endpoint instance with preview features enabled.""" from google.cloud.aiplatform.preview import models as preview_models if not hasattr(self, "_preview_instance"): self._preview_instance = preview_models.Endpoint( self.resource_name, credentials=self.credentials ) return self._preview_instance def __init__( self, endpoint_name: str, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ): """Retrieves an endpoint resource. Args: endpoint_name (str): Required. A fully-qualified endpoint resource name or endpoint ID. Example: "projects/123/locations/us-central1/endpoints/456" or "456" when project and location are initialized or passed. project (str): Optional. Project to retrieve endpoint from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to retrieve endpoint from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. """ super().__init__( project=project, location=location, credentials=credentials, resource_name=endpoint_name, ) endpoint_name = utils.full_resource_name( resource_name=endpoint_name, resource_noun="endpoints", parse_resource_name_method=self._parse_resource_name, format_resource_name_method=self._format_resource_name, project=project, location=location, ) # Lazy load the Endpoint gca_resource until needed self._gca_resource = gca_endpoint_compat.Endpoint(name=endpoint_name) self.authorized_session = None self.raw_predict_request_url = None self.stream_raw_predict_request_url = None @property def _prediction_client(self) -> utils.PredictionClientWithOverride: # The attribute might not exist due to issues in # `VertexAiResourceNounWithFutureManager._sync_object_with_future_result` # We should switch to @functools.cached_property once its available. if not getattr(self, "_prediction_client_value", None): self._prediction_client_value = initializer.global_config.create_client( client_class=utils.PredictionClientWithOverride, credentials=self.credentials, location_override=self.location, prediction_client=True, ) return self._prediction_client_value @property def _prediction_async_client(self) -> utils.PredictionAsyncClientWithOverride: # The attribute might not exist due to issues in # `VertexAiResourceNounWithFutureManager._sync_object_with_future_result` # We should switch to @functools.cached_property once its available. if not getattr(self, "_prediction_async_client_value", None): self._prediction_async_client_value = ( initializer.global_config.create_client( client_class=utils.PredictionAsyncClientWithOverride, credentials=self.credentials, location_override=self.location, prediction_client=True, ) ) return self._prediction_async_client_value def _skipped_getter_call(self) -> bool: """Check if GAPIC resource was populated by call to get/list API methods Returns False if `_gca_resource` is None or fully populated. Returns True if `_gca_resource` is partially populated """ return self._gca_resource and not self._gca_resource.create_time def _sync_gca_resource_if_skipped(self) -> None: """Sync GAPIC service representation of Endpoint class resource only if get_endpoint() was never called.""" if self._skipped_getter_call(): self._gca_resource = self._get_gca_resource( resource_name=self._gca_resource.name ) def _assert_gca_resource_is_available(self) -> None: """Ensures Endpoint getter was called at least once before asserting on gca_resource's availability.""" super()._assert_gca_resource_is_available() self._sync_gca_resource_if_skipped() @property def traffic_split(self) -> Dict[str, int]: """A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at a moment. """ self._sync_gca_resource() return dict(self._gca_resource.traffic_split) @property def network(self) -> Optional[str]: """The full name of the Google Compute Engine [network](https://cloud.google.com/vpc/docs/vpc#networks) to which this Endpoint should be peered. Takes the format `projects/{project}/global/networks/{network}`. Where {project} is a project number, as in `12345`, and {network} is a network name. Private services access must already be configured for the network. If left unspecified, the Endpoint is not peered with any network. """ self._assert_gca_resource_is_available() return getattr(self._gca_resource, "network", None) @property def private_service_connect_config( self, ) -> Optional[gca_service_networking.PrivateServiceConnectConfig]: """The Private Service Connect configuration for this Endpoint.""" self._assert_gca_resource_is_available() return self._gca_resource.private_service_connect_config @classmethod def create( cls, display_name: Optional[str] = None, description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, encryption_spec_key_name: Optional[str] = None, sync=True, create_request_timeout: Optional[float] = None, endpoint_id: Optional[str] = None, enable_request_response_logging=False, request_response_logging_sampling_rate: Optional[float] = None, request_response_logging_bq_destination_table: Optional[str] = None, dedicated_endpoint_enabled=False, inference_timeout: Optional[int] = None, ) -> "Endpoint": """Creates a new endpoint. Args: display_name (str): Optional. The user-defined name of the Endpoint. The name can be up to 128 characters long and can be consist of any UTF-8 characters. description (str): Optional. The description of the Endpoint. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. project (str): Optional. Project to retrieve endpoint from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to retrieve endpoint from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. encryption_spec_key_name (str): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. create_request_timeout (float): Optional. The timeout for the create request in seconds. endpoint_id (str): Optional. The ID to use for endpoint, which will become the final component of the endpoint resource name. If not provided, Vertex AI will generate a value for this ID. This value should be 1-10 characters, and valid characters are /[0-9]/. When using HTTP/JSON, this field is populated based on a query string argument, such as ``?endpoint_id=12345``. This is the fallback for fields that are not included in either the URI or the body. enable_request_response_logging (bool): Optional. Whether to enable request & response logging for this endpoint. request_response_logging_sampling_rate (float): Optional. The request response logging sampling rate. If not set, default is 0.0. request_response_logging_bq_destination_table (str): Optional. The request response logging bigquery destination. If not set, will create a table with name: ``bq://{project_id}.logging_{endpoint_display_name}_{endpoint_id}.request_response_logging``. dedicated_endpoint_enabled (bool): Optional. If enabled, a dedicated dns will be created and your traffic will be fully isolated from other customers' traffic and latency will be reduced. inference_timeout (int): Optional. It defines the prediction timeout, in seconds, for online predictions using cloud-based endpoints. This applies to either PSC endpoints, when private_service_connect_config is set, or dedicated endpoints, when dedicated_endpoint_enabled is true. Returns: endpoint (aiplatform.Endpoint): Created endpoint. """ api_client = cls._instantiate_client(location=location, credentials=credentials) if not display_name: display_name = cls._generate_display_name() utils.validate_display_name(display_name) if labels: utils.validate_labels(labels) project = project or initializer.global_config.project location = location or initializer.global_config.location predict_request_response_logging_config = None if enable_request_response_logging: predict_request_response_logging_config = ( gca_endpoint_compat.PredictRequestResponseLoggingConfig( enabled=True, sampling_rate=request_response_logging_sampling_rate, bigquery_destination=gca_io_compat.BigQueryDestination( output_uri=request_response_logging_bq_destination_table ), ) ) client_connection_config = None if ( inference_timeout is not None and inference_timeout > 0 and dedicated_endpoint_enabled ): client_connection_config = gca_endpoint_compat.ClientConnectionConfig( inference_timeout=duration_pb2.Duration(seconds=inference_timeout) ) return cls._create( api_client=api_client, display_name=display_name, project=project, location=location, description=description, labels=labels, metadata=metadata, credentials=credentials, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), sync=sync, create_request_timeout=create_request_timeout, endpoint_id=endpoint_id, predict_request_response_logging_config=predict_request_response_logging_config, dedicated_endpoint_enabled=dedicated_endpoint_enabled, client_connection_config=client_connection_config, ) @classmethod @base.optional_sync() def _create( cls, api_client: endpoint_service_client.EndpointServiceClient, display_name: str, project: str, location: str, description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), credentials: Optional[auth_credentials.Credentials] = None, encryption_spec: Optional[gca_encryption_spec.EncryptionSpec] = None, network: Optional[str] = None, sync=True, create_request_timeout: Optional[float] = None, endpoint_id: Optional[str] = None, predict_request_response_logging_config: Optional[ gca_endpoint_compat.PredictRequestResponseLoggingConfig ] = None, private_service_connect_config: Optional[ gca_service_networking.PrivateServiceConnectConfig ] = None, dedicated_endpoint_enabled=False, client_connection_config: Optional[ gca_endpoint_compat.ClientConnectionConfig ] = None, ) -> "Endpoint": """Creates a new endpoint by calling the API client. Args: api_client (EndpointServiceClient): Required. An instance of EndpointServiceClient with the correct api_endpoint already set based on user's preferences. display_name (str): Required. The user-defined name of the Endpoint. The name can be up to 128 characters long and can be consist of any UTF-8 characters. project (str): Required. Project to retrieve endpoint from. location (str): Required. Location to retrieve endpoint from. description (str): Optional. The description of the Endpoint. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. encryption_spec (gca_encryption_spec.EncryptionSpec): Optional. The Cloud KMS customer managed encryption key used to protect the dataset. The key needs to be in the same region as where the compute resource is created. If set, this Dataset and all sub-resources of this Dataset will be secured by this key. network (str): Optional. The full name of the Compute Engine network to which this Endpoint will be peered. E.g. "projects/12345/global/networks/myVPC". Private services access must already be configured for the network. Cannot be specified when private_service_connect is enabled. Read more about PrivateEndpoints [in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints) sync (bool): Whether to create this endpoint synchronously. create_request_timeout (float): Optional. The timeout for the create request in seconds. endpoint_id (str): Optional. The ID to use for endpoint, which will become the final component of the endpoint resource name. If not provided, Vertex AI will generate a value for this ID. This value should be 1-10 characters, and valid characters are /[0-9]/. When using HTTP/JSON, this field is populated based on a query string argument, such as ``?endpoint_id=12345``. This is the fallback for fields that are not included in either the URI or the body. predict_request_response_logging_config (aiplatform.endpoint.PredictRequestResponseLoggingConfig): Optional. The request response logging configuration for online prediction. private_service_connect_config (aiplatform.service_network.PrivateServiceConnectConfig): If enabled, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect). Cannot be enabled when network is specified. dedicated_endpoint_enabled (bool): Optional. If enabled, a dedicated dns will be created and your traffic will be fully isolated from other customers' traffic and latency will be reduced. client_connection_config (aiplatform.endpoint.ClientConnectionConfig): Optional. The inference timeout which is applied on cloud-based (PSC, or dedicated) endpoints for online prediction. Returns: endpoint (aiplatform.Endpoint): Created endpoint. """ parent = initializer.global_config.common_location_path( project=project, location=location ) gapic_endpoint = gca_endpoint_compat.Endpoint( display_name=display_name, description=description, labels=labels, encryption_spec=encryption_spec, network=network, predict_request_response_logging_config=predict_request_response_logging_config, private_service_connect_config=private_service_connect_config, dedicated_endpoint_enabled=dedicated_endpoint_enabled, client_connection_config=client_connection_config, ) operation_future = api_client.create_endpoint( parent=parent, endpoint=gapic_endpoint, endpoint_id=endpoint_id, metadata=metadata, timeout=create_request_timeout, ) _LOGGER.log_create_with_lro(cls, operation_future) created_endpoint = operation_future.result() _LOGGER.log_create_complete(cls, created_endpoint, "endpoint") return cls._construct_sdk_resource_from_gapic( gapic_resource=created_endpoint, project=project, location=location, credentials=credentials, ) @classmethod def _construct_sdk_resource_from_gapic( cls, gapic_resource: proto.Message, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ) -> "Endpoint": """Given a GAPIC Endpoint object, return the SDK representation. Args: gapic_resource (proto.Message): A GAPIC representation of a Endpoint resource, usually retrieved by a get_* or in a list_* API call. project (str): Optional. Project to construct Endpoint object from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to construct Endpoint object from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to construct Endpoint. Overrides credentials set in aiplatform.init. Returns: Endpoint (aiplatform.Endpoint): An initialized Endpoint resource. """ endpoint = super()._construct_sdk_resource_from_gapic( gapic_resource=gapic_resource, project=project, location=location, credentials=credentials, ) endpoint.authorized_session = None endpoint.raw_predict_request_url = None endpoint.stream_raw_predict_request_url = None return endpoint @staticmethod def _allocate_traffic( traffic_split: Dict[str, int], traffic_percentage: int, ) -> Dict[str, int]: """Allocates desired traffic to new deployed model and scales traffic of older deployed models. Args: traffic_split (Dict[str, int]): Required. Current traffic split of deployed models in endpoint. traffic_percentage (int): Required. Desired traffic to new deployed model. Returns: new_traffic_split (Dict[str, int]): Traffic split to use. """ new_traffic_split = {} old_models_traffic = 100 - traffic_percentage if old_models_traffic: unallocated_traffic = old_models_traffic for deployed_model in traffic_split: current_traffic = traffic_split[deployed_model] new_traffic = int(current_traffic / 100 * old_models_traffic) new_traffic_split[deployed_model] = new_traffic unallocated_traffic -= new_traffic # will likely under-allocate. make total 100. for deployed_model in new_traffic_split: if unallocated_traffic == 0: break new_traffic_split[deployed_model] += 1 unallocated_traffic -= 1 new_traffic_split[_DEPLOYING_MODEL_TRAFFIC_SPLIT_KEY] = traffic_percentage return new_traffic_split @staticmethod def _unallocate_traffic( traffic_split: Dict[str, int], deployed_model_id: str, ) -> Dict[str, int]: """Sets deployed model id's traffic to 0 and scales the traffic of other deployed models. Args: traffic_split (Dict[str, int]): Required. Current traffic split of deployed models in endpoint. deployed_model_id (str): Required. Desired traffic to new deployed model. Returns: new_traffic_split (Dict[str, int]): Traffic split to use. """ new_traffic_split = traffic_split.copy() del new_traffic_split[deployed_model_id] deployed_model_id_traffic = traffic_split[deployed_model_id] traffic_percent_left = 100 - deployed_model_id_traffic if traffic_percent_left: unallocated_traffic = 100 for deployed_model in new_traffic_split: current_traffic = traffic_split[deployed_model] new_traffic = int(current_traffic / traffic_percent_left * 100) new_traffic_split[deployed_model] = new_traffic unallocated_traffic -= new_traffic # will likely under-allocate. make total 100. for deployed_model in new_traffic_split: if unallocated_traffic == 0: break new_traffic_split[deployed_model] += 1 unallocated_traffic -= 1 new_traffic_split[deployed_model_id] = 0 return new_traffic_split @staticmethod def _validate_deploy_args( min_replica_count: Optional[int], max_replica_count: Optional[int], accelerator_type: Optional[str], deployed_model_display_name: Optional[str], traffic_split: Optional[Dict[str, int]], traffic_percentage: Optional[int], deployment_resource_pool: Optional[DeploymentResourcePool], required_replica_count: Optional[int], ): """Helper method to validate deploy arguments. Args: min_replica_count (int): Required. The minimum number of machine replicas this deployed model will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed. max_replica_count (int): Required. The maximum number of replicas this deployed model may be deployed on when the traffic against it increases. If requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the deployed model increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, the larger value of min_replica_count or 1 will be used. If value provided is smaller than min_replica_count, it will automatically be increased to be min_replica_count. accelerator_type (str): Required. Hardware accelerator type. One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4 deployed_model_display_name (str): Required. The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used. traffic_split (Dict[str, int]): Optional. A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at the moment. Key for model being deployed is "0". Should not be provided if traffic_percentage is provided. traffic_percentage (int): Optional. Desired traffic to newly deployed model. Defaults to 0 if there are pre-existing deployed models. Defaults to 100 if there are no pre-existing deployed models. Negative values should not be provided. Traffic of previously deployed models at the endpoint will be scaled down to accommodate new deployed model's traffic. Should not be provided if traffic_split is provided. deployment_resource_pool (DeploymentResourcePool): Optional. Resource pool where the model will be deployed. All models that are deployed to the same DeploymentResourcePool will be hosted in a shared model server. If provided, will override replica count arguments. required_replica_count (int): Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial model deployment/mutation is desired, with a value greater than or equal to 1 and fewer than or equal to min_replica_count. If set, the model deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. Raises: ValueError: if Min or Max replica is negative. Traffic percentage > 100 or < 0. Or if traffic_split does not sum to 100. """ if deployment_resource_pool: # Validate that replica count and deployment resource pool are not # both specified. if ( min_replica_count and min_replica_count != 1 or max_replica_count and max_replica_count != 1 or required_replica_count and required_replica_count != 0 ): raise ValueError( "Ignoring explicitly specified replica counts, " "since deployment_resource_pool was also given." ) if accelerator_type: raise ValueError( "Conflicting deployment parameters were given." "deployment_resource_pool may not be specified at the same" "time as accelerator_type. " ) else: # Validate that a non-negative replica count is given, and validate # the accelerator type. if min_replica_count < 0: raise ValueError("Min replica cannot be negative.") if max_replica_count < 0: raise ValueError("Max replica cannot be negative.") if required_replica_count and required_replica_count < 0: raise ValueError("Required replica cannot be negative.") if accelerator_type: utils.validate_accelerator_type(accelerator_type) if deployed_model_display_name is not None: utils.validate_display_name(deployed_model_display_name) if traffic_split is None: if traffic_percentage > 100: raise ValueError("Traffic percentage cannot be greater than 100.") if traffic_percentage < 0: raise ValueError("Traffic percentage cannot be negative.") elif traffic_split: if sum(traffic_split.values()) != 100: raise ValueError( "Sum of all traffic within traffic split needs to be 100." ) def deploy( self, model: "Model", deployed_model_display_name: Optional[str] = None, traffic_percentage: int = 0, traffic_split: Optional[Dict[str, int]] = None, machine_type: Optional[str] = None, min_replica_count: int = 1, max_replica_count: int = 1, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, tpu_topology: Optional[str] = None, service_account: Optional[str] = None, explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None, explanation_parameters: Optional[ aiplatform.explain.ExplanationParameters ] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), sync=True, deploy_request_timeout: Optional[float] = None, autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, enable_access_logging=False, disable_container_logging: bool = False, deployment_resource_pool: Optional[DeploymentResourcePool] = None, reservation_affinity_type: Optional[str] = None, reservation_affinity_key: Optional[str] = None, reservation_affinity_values: Optional[List[str]] = None, spot: bool = False, fast_tryout_enabled: bool = False, system_labels: Optional[Dict[str, str]] = None, required_replica_count: Optional[int] = 0, ) -> None: """Deploys a Model to the Endpoint. Args: model (aiplatform.Model): Required. Model to be deployed. deployed_model_display_name (str): Optional. The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used. traffic_percentage (int): Optional. Desired traffic to newly deployed model. Defaults to 0 if there are pre-existing deployed models. Defaults to 100 if there are no pre-existing deployed models. Negative values should not be provided. Traffic of previously deployed models at the endpoint will be scaled down to accommodate new deployed model's traffic. Should not be provided if traffic_split is provided. traffic_split (Dict[str, int]): Optional. A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at the moment. Key for model being deployed is "0". Should not be provided if traffic_percentage is provided. machine_type (str): Optional. The type of machine. Not specifying machine type will result in model to be deployed with automatic resources. min_replica_count (int): Optional. The minimum number of machine replicas this deployed model will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed. max_replica_count (int): Optional. The maximum number of replicas this deployed model may be deployed on when the traffic against it increases. If requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the deployed model increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, the larger value of min_replica_count or 1 will be used. If value provided is smaller than min_replica_count, it will automatically be increased to be min_replica_count. accelerator_type (str): Optional. Hardware accelerator type. Must also set accelerator_count if used. One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4 accelerator_count (int): Optional. The number of accelerators to attach to a worker replica. tpu_topology (str): Optional. The TPU topology to use for the DeployedModel. Required for CloudTPU multihost deployments. service_account (str): The service account that the DeployedModel's container runs as. Specify the email address of the service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the `iam.serviceAccounts.actAs` permission on this service account. explanation_metadata (aiplatform.explain.ExplanationMetadata): Optional. Metadata describing the Model's input and output for explanation. `explanation_metadata` is optional while `explanation_parameters` must be specified when used. For more details, see `Ref docs ` explanation_parameters (aiplatform.explain.ExplanationParameters): Optional. Parameters to configure explaining for Model's predictions. For more details, see `Ref docs ` metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. deploy_request_timeout (float): Optional. The timeout for the deploy request in seconds. autoscaling_target_cpu_utilization (int): Target CPU Utilization to use for Autoscaling Replicas. A default value of 60 will be used if not specified. autoscaling_target_accelerator_duty_cycle (int): Target Accelerator Duty Cycle. Must also set accelerator_type and accelerator_count if specified. A default value of 60 will be used if not specified. enable_access_logging (bool): Whether to enable endpoint access logging. Defaults to False. disable_container_logging (bool): If True, container logs from the deployed model will not be written to Cloud Logging. Defaults to False. deployment_resource_pool (DeploymentResourcePool): Resource pool where the model will be deployed. All models that are deployed to the same DeploymentResourcePool will be hosted in a shared model server. If provided, will override replica count arguments. reservation_affinity_type (str): Optional. The type of reservation affinity. One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION reservation_affinity_key (str): Optional. Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key and specify the name of your reservation as its value. reservation_affinity_values (List[str]): Optional. Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation. Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}' spot (bool): Optional. Whether to schedule the deployment workload on spot VMs. fast_tryout_enabled (bool): Optional. Defaults to False. If True, model will be deployed using faster deployment path. Useful for quick experiments. Not for production workloads. Only available for most popular models with certain machine types. system_labels (Dict[str, str]): Optional. System labels to apply to Model Garden deployments. System labels are managed by Google for internal use only. required_replica_count (int): Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial model deployment/mutation is desired, with a value greater than or equal to 1 and fewer than or equal to min_replica_count. If set, the model deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. """ self._sync_gca_resource_if_skipped() self._validate_deploy_args( min_replica_count=min_replica_count, max_replica_count=max_replica_count, accelerator_type=accelerator_type, deployed_model_display_name=deployed_model_display_name, traffic_split=traffic_split, traffic_percentage=traffic_percentage, deployment_resource_pool=deployment_resource_pool, required_replica_count=required_replica_count, ) explanation_spec = _explanation_utils.create_and_validate_explanation_spec( explanation_metadata=explanation_metadata, explanation_parameters=explanation_parameters, ) self._deploy( model=model, deployed_model_display_name=deployed_model_display_name, traffic_percentage=traffic_percentage, traffic_split=traffic_split, machine_type=machine_type, min_replica_count=min_replica_count, max_replica_count=max_replica_count, accelerator_type=accelerator_type, accelerator_count=accelerator_count, tpu_topology=tpu_topology, reservation_affinity_type=reservation_affinity_type, reservation_affinity_key=reservation_affinity_key, reservation_affinity_values=reservation_affinity_values, service_account=service_account, explanation_spec=explanation_spec, metadata=metadata, sync=sync, deploy_request_timeout=deploy_request_timeout, autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, spot=spot, enable_access_logging=enable_access_logging, disable_container_logging=disable_container_logging, deployment_resource_pool=deployment_resource_pool, fast_tryout_enabled=fast_tryout_enabled, system_labels=system_labels, required_replica_count=required_replica_count, ) @base.optional_sync() def _deploy( self, model: "Model", deployed_model_display_name: Optional[str] = None, traffic_percentage: Optional[int] = 0, traffic_split: Optional[Dict[str, int]] = None, machine_type: Optional[str] = None, min_replica_count: int = 1, max_replica_count: int = 1, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, tpu_topology: Optional[str] = None, reservation_affinity_type: Optional[str] = None, reservation_affinity_key: Optional[str] = None, reservation_affinity_values: Optional[List[str]] = None, service_account: Optional[str] = None, explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), sync=True, deploy_request_timeout: Optional[float] = None, autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, spot: bool = False, enable_access_logging=False, disable_container_logging: bool = False, deployment_resource_pool: Optional[DeploymentResourcePool] = None, fast_tryout_enabled: bool = False, system_labels: Optional[Dict[str, str]] = None, required_replica_count: Optional[int] = 0, ) -> None: """Deploys a Model to the Endpoint. Args: model (aiplatform.Model): Required. Model to be deployed. deployed_model_display_name (str): Optional. The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used. traffic_percentage (int): Optional. Desired traffic to newly deployed model. Defaults to 0 if there are pre-existing deployed models. Defaults to 100 if there are no pre-existing deployed models. Negative values should not be provided. Traffic of previously deployed models at the endpoint will be scaled down to accommodate new deployed model's traffic. Should not be provided if traffic_split is provided. traffic_split (Dict[str, int]): Optional. A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at the moment. Key for model being deployed is "0". Should not be provided if traffic_percentage is provided. machine_type (str): Optional. The type of machine. Not specifying machine type will result in model to be deployed with automatic resources. min_replica_count (int): Optional. The minimum number of machine replicas this deployed model will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed. max_replica_count (int): Optional. The maximum number of replicas this deployed model may be deployed on when the traffic against it increases. If requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the deployed model increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, the larger value of min_replica_count or 1 will be used. If value provided is smaller than min_replica_count, it will automatically be increased to be min_replica_count. accelerator_type (str): Optional. Hardware accelerator type. Must also set accelerator_count if used. One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4 accelerator_count (int): Optional. The number of accelerators to attach to a worker replica. tpu_topology (str): Optional. The TPU topology to use for the DeployedModel. Required for CloudTPU multihost deployments. reservation_affinity_type (str): Optional. The type of reservation affinity. One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION reservation_affinity_key (str): Optional. Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key and specify the name of your reservation as its value. reservation_affinity_values (List[str]): Optional. Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation. Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}' service_account (str): The service account that the DeployedModel's container runs as. Specify the email address of the service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the `iam.serviceAccounts.actAs` permission on this service account. explanation_spec (aiplatform.explain.ExplanationSpec): Optional. Specification of Model explanation. metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. deploy_request_timeout (float): Optional. The timeout for the deploy request in seconds. autoscaling_target_cpu_utilization (int): Target CPU Utilization to use for Autoscaling Replicas. A default value of 60 will be used if not specified. autoscaling_target_accelerator_duty_cycle (int): Target Accelerator Duty Cycle. Must also set accelerator_type and accelerator_count if specified. A default value of 60 will be used if not specified. spot (bool): Optional. Whether to schedule the deployment workload on spot VMs. enable_access_logging (bool): Whether to enable endpoint access logging. Defaults to False. disable_container_logging (bool): If True, container logs from the deployed model will not be written to Cloud Logging. Defaults to False. deployment_resource_pool (DeploymentResourcePool): Resource pool where the model will be deployed. All models that are deployed to the same DeploymentResourcePool will be hosted in a shared model server. If provided, will override replica count arguments. fast_tryout_enabled (bool): Optional. Defaults to False. If True, model will be deployed using faster deployment path. Useful for quick experiments. Not for production workloads. Only available for most popular models with certain machine types. system_labels (Dict[str, str]): Optional. System labels to apply to Model Garden deployments. System labels are managed by Google for internal use only. required_replica_count (int): Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial model deployment/mutation is desired, with a value greater than or equal to 1 and fewer than or equal to min_replica_count. If set, the model deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. """ _LOGGER.log_action_start_against_resource( f"Deploying Model {model.resource_name} to", "", self ) self._deploy_call( api_client=self.api_client, endpoint_resource_name=self.resource_name, model=model, endpoint_resource_traffic_split=self._gca_resource.traffic_split, network=self.network, deployed_model_display_name=deployed_model_display_name, traffic_percentage=traffic_percentage, traffic_split=traffic_split, machine_type=machine_type, min_replica_count=min_replica_count, max_replica_count=max_replica_count, accelerator_type=accelerator_type, accelerator_count=accelerator_count, tpu_topology=tpu_topology, reservation_affinity_type=reservation_affinity_type, reservation_affinity_key=reservation_affinity_key, reservation_affinity_values=reservation_affinity_values, service_account=service_account, explanation_spec=explanation_spec, metadata=metadata, deploy_request_timeout=deploy_request_timeout, autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, spot=spot, enable_access_logging=enable_access_logging, disable_container_logging=disable_container_logging, deployment_resource_pool=deployment_resource_pool, fast_tryout_enabled=fast_tryout_enabled, system_labels=system_labels, required_replica_count=required_replica_count, ) _LOGGER.log_action_completed_against_resource("model", "deployed", self) self._sync_gca_resource() @classmethod def _deploy_call( cls, api_client: endpoint_service_client.EndpointServiceClient, endpoint_resource_name: str, model: "Model", endpoint_resource_traffic_split: Optional[proto.MapField] = None, network: Optional[str] = None, deployed_model_display_name: Optional[str] = None, traffic_percentage: Optional[int] = 0, traffic_split: Optional[Dict[str, int]] = None, machine_type: Optional[str] = None, min_replica_count: int = 1, max_replica_count: int = 1, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, tpu_topology: Optional[str] = None, reservation_affinity_type: Optional[str] = None, reservation_affinity_key: Optional[str] = None, reservation_affinity_values: Optional[List[str]] = None, service_account: Optional[str] = None, explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), deploy_request_timeout: Optional[float] = None, autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, autoscaling_target_request_count_per_minute: Optional[int] = None, spot: bool = False, enable_access_logging=False, disable_container_logging: bool = False, deployment_resource_pool: Optional[DeploymentResourcePool] = None, fast_tryout_enabled: bool = False, system_labels: Optional[Dict[str, str]] = None, required_replica_count: Optional[int] = 0, ) -> None: """Helper method to deploy model to endpoint. Args: api_client (endpoint_service_client.EndpointServiceClient): Required. endpoint_service_client.EndpointServiceClient to make call. endpoint_resource_name (str): Required. Endpoint resource name to deploy model to. model (aiplatform.Model): Required. Model to be deployed. endpoint_resource_traffic_split (proto.MapField): Optional. Endpoint current resource traffic split. network (str): Optional. The full name of the Compute Engine network to which this Endpoint will be peered. E.g. "projects/123/global/networks/my_vpc". Private services access must already be configured for the network. deployed_model_display_name (str): Optional. The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used. traffic_percentage (int): Optional. Desired traffic to newly deployed model. Defaults to 0 if there are pre-existing deployed models. Defaults to 100 if there are no pre-existing deployed models. Negative values should not be provided. Traffic of previously deployed models at the endpoint will be scaled down to accommodate new deployed model's traffic. Should not be provided if traffic_split is provided. traffic_split (Dict[str, int]): Optional. A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at the moment. Key for model being deployed is "0". Should not be provided if traffic_percentage is provided. machine_type (str): Optional. The type of machine. Not specifying machine type will result in model to be deployed with automatic resources. min_replica_count (int): Optional. The minimum number of machine replicas this deployed model will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed. max_replica_count (int): Optional. The maximum number of replicas this deployed model may be deployed on when the traffic against it increases. If requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the deployed model increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, the larger value of min_replica_count or 1 will be used. If value provided is smaller than min_replica_count, it will automatically be increased to be min_replica_count. accelerator_type (str): Optional. Hardware accelerator type. Must also set accelerator_count if used. One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4 accelerator_count (int): Optional. The number of accelerators to attach to a worker replica. tpu_topology (str): Optional. The TPU topology to use for the DeployedModel. Required for CloudTPU multihost deployments. reservation_affinity_type (str): Optional. The type of reservation affinity. One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION reservation_affinity_key (str): Optional. Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key and specify the name of your reservation as its value. reservation_affinity_values (List[str]): Optional. Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation. Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}' service_account (str): The service account that the DeployedModel's container runs as. Specify the email address of the service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the `iam.serviceAccounts.actAs` permission on this service account. If not specified, uses the service account set in aiplatform.init. explanation_spec (aiplatform.explain.ExplanationSpec): Optional. Specification of Model explanation. metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. deploy_request_timeout (float): Optional. The timeout for the deploy request in seconds. autoscaling_target_cpu_utilization (int): Optional. Target CPU Utilization to use for Autoscaling Replicas. A default value of 60 will be used if not specified. autoscaling_target_accelerator_duty_cycle (int): Optional. Target Accelerator Duty Cycle. Must also set accelerator_type and accelerator_count if specified. A default value of 60 will be used if not specified. autoscaling_target_request_count_per_minute (int): Optional. Target request count per minute per instance. spot (bool): Optional. Whether to schedule the deployment workload on spot VMs. enable_access_logging (bool): Whether to enable endpoint access logging. Defaults to False. disable_container_logging (bool): If True, container logs from the deployed model will not be written to Cloud Logging. Defaults to False. deployment_resource_pool (DeploymentResourcePool): Resource pool where the model will be deployed. All models that are deployed to the same DeploymentResourcePool will be hosted in a shared model server. If provided, will override replica count arguments. fast_tryout_enabled (bool): Optional. Defaults to False. If True, model will be deployed using faster deployment path. Useful for quick experiments. Not for production workloads. Only available for most popular models with certain machine types. system_labels (Dict[str, str]): Optional. System labels to apply to Model Garden deployments. System labels are managed by Google for internal use only. required_replica_count (int): Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial model deployment/mutation is desired, with a value greater than or equal to 1 and fewer than or equal to min_replica_count. If set, the model deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. Raises: ValueError: If only `accelerator_type` or `accelerator_count` is specified. ValueError: If model does not support deployment. ValueError: If there is not current traffic split and traffic percentage is not 0 or 100. ValueError: If `deployment_resource_pool` and a custom machine spec are both present. ValueError: If both `explanation_spec` and `deployment_resource_pool` are present. """ service_account = service_account or initializer.global_config.service_account if deployment_resource_pool: deployed_model = gca_endpoint_compat.DeployedModel( model=model.versioned_resource_name, display_name=deployed_model_display_name, service_account=service_account, disable_container_logging=disable_container_logging, ) if system_labels: deployed_model.system_labels = system_labels supports_shared_resources = ( gca_model_compat.Model.DeploymentResourcesType.SHARED_RESOURCES in model.supported_deployment_resources_types ) if not supports_shared_resources: raise ValueError( "`deployment_resource_pool` may only be specified for models " " which support shared resources." ) provided_custom_machine_spec = ( machine_type or accelerator_type or accelerator_count or autoscaling_target_accelerator_duty_cycle or autoscaling_target_cpu_utilization or autoscaling_target_request_count_per_minute ) if provided_custom_machine_spec: raise ValueError( "Conflicting parameters in deployment request. " "The machine_type, accelerator_type and accelerator_count, " "autoscaling_target_accelerator_duty_cycle, " "autoscaling_target_cpu_utilization, " "autoscaling_target_request_count_per_minute parameters " "may not be set when `deployment_resource_pool` is " "specified." ) deployed_model.shared_resources = deployment_resource_pool.resource_name if explanation_spec: raise ValueError( "Model explanation is not supported for deployments using " "shared resources." ) else: max_replica_count = max(min_replica_count, max_replica_count) if bool(accelerator_type) != bool(accelerator_count): raise ValueError( "Both `accelerator_type` and `accelerator_count` should be specified or None." ) if autoscaling_target_accelerator_duty_cycle is not None and ( not accelerator_type or not accelerator_count ): raise ValueError( "Both `accelerator_type` and `accelerator_count` should be set " "when specifying autoscaling_target_accelerator_duty_cycle`" ) deployed_model = gca_endpoint_compat.DeployedModel( model=model.versioned_resource_name, display_name=deployed_model_display_name, service_account=service_account, enable_access_logging=enable_access_logging, disable_container_logging=disable_container_logging, ) if system_labels: deployed_model.system_labels = system_labels supports_automatic_resources = ( gca_model_compat.Model.DeploymentResourcesType.AUTOMATIC_RESOURCES in model.supported_deployment_resources_types ) supports_dedicated_resources = ( gca_model_compat.Model.DeploymentResourcesType.DEDICATED_RESOURCES in model.supported_deployment_resources_types ) provided_custom_machine_spec = ( machine_type or accelerator_type or accelerator_count or autoscaling_target_accelerator_duty_cycle or autoscaling_target_cpu_utilization or autoscaling_target_request_count_per_minute ) # If the model supports both automatic and dedicated deployment resources, # decide based on the presence of machine spec customizations use_dedicated_resources = supports_dedicated_resources and ( not supports_automatic_resources or provided_custom_machine_spec ) if provided_custom_machine_spec and not use_dedicated_resources: _LOGGER.info( "Model does not support dedicated deployment resources. " "The machine_type, accelerator_type and accelerator_count, " "autoscaling_target_accelerator_duty_cycle, " "autoscaling_target_cpu_utilization, " "autoscaling_target_request_count_per_minute parameters " "are ignored." ) if use_dedicated_resources and not machine_type: machine_type = _DEFAULT_MACHINE_TYPE _LOGGER.info(f"Using default machine_type: {machine_type}") if use_dedicated_resources: dedicated_resources = gca_machine_resources_compat.DedicatedResources( min_replica_count=min_replica_count, max_replica_count=max_replica_count, spot=spot, required_replica_count=required_replica_count, ) machine_spec = gca_machine_resources_compat.MachineSpec( machine_type=machine_type ) if autoscaling_target_cpu_utilization: autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec( metric_name="aiplatform.googleapis.com/prediction/online/cpu/utilization", target=autoscaling_target_cpu_utilization, ) dedicated_resources.autoscaling_metric_specs.extend( [autoscaling_metric_spec] ) if accelerator_type and accelerator_count: utils.validate_accelerator_type(accelerator_type) machine_spec.accelerator_type = accelerator_type machine_spec.accelerator_count = accelerator_count if autoscaling_target_accelerator_duty_cycle: autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec( metric_name="aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle", target=autoscaling_target_accelerator_duty_cycle, ) dedicated_resources.autoscaling_metric_specs.extend( [autoscaling_metric_spec] ) if autoscaling_target_request_count_per_minute: autoscaling_metric_spec = ( gca_machine_resources_compat.AutoscalingMetricSpec( metric_name=( "aiplatform.googleapis.com/prediction/online/" "request_count" ), target=autoscaling_target_request_count_per_minute, ) ) dedicated_resources.autoscaling_metric_specs.extend( [autoscaling_metric_spec] ) if reservation_affinity_type: machine_spec.reservation_affinity = utils.get_reservation_affinity( reservation_affinity_type, reservation_affinity_key, reservation_affinity_values, ) if tpu_topology is not None: machine_spec.tpu_topology = tpu_topology dedicated_resources.machine_spec = machine_spec deployed_model.dedicated_resources = dedicated_resources if fast_tryout_enabled: deployed_model.faster_deployment_config = ( gca_endpoint_compat.FasterDeploymentConfig( fast_tryout_enabled=fast_tryout_enabled ) ) elif supports_automatic_resources: deployed_model.automatic_resources = ( gca_machine_resources_compat.AutomaticResources( min_replica_count=min_replica_count, max_replica_count=max_replica_count, ) ) else: _LOGGER.warning( "Model does not support deployment. " "See https://cloud.google.com/vertex-ai/docs/reference/rpc/google.cloud.aiplatform.v1#google.cloud.aiplatform.v1.Model.FIELDS.repeated.google.cloud.aiplatform.v1.Model.DeploymentResourcesType.google.cloud.aiplatform.v1.Model.supported_deployment_resources_types" ) deployed_model.explanation_spec = explanation_spec # Checking if traffic percentage is valid # TODO(b/221059294) PrivateEndpoint should support traffic split if traffic_split is None and not network: # new model traffic needs to be 100 if no pre-existing models if not endpoint_resource_traffic_split: # default scenario if traffic_percentage == 0: traffic_percentage = 100 # verify user specified 100 elif traffic_percentage < 100: raise ValueError( """There are currently no deployed models so the traffic percentage for this deployed model needs to be 100.""" ) traffic_split = cls._allocate_traffic( traffic_split=dict(endpoint_resource_traffic_split), traffic_percentage=traffic_percentage, ) operation_future = api_client.deploy_model( endpoint=endpoint_resource_name, deployed_model=deployed_model, traffic_split=traffic_split, metadata=metadata, timeout=deploy_request_timeout, ) _LOGGER.log_action_started_against_resource_with_lro( "Deploy", "model", cls, operation_future ) operation_future.result(timeout=None) def undeploy( self, deployed_model_id: str, traffic_split: Optional[Dict[str, int]] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), sync=True, ) -> None: """Undeploys a deployed model. The model to be undeployed should have no traffic or user must provide a new traffic_split with the remaining deployed models. Refer to `Endpoint.traffic_split` for the current traffic split mapping. Args: deployed_model_id (str): Required. The ID of the DeployedModel to be undeployed from the Endpoint. traffic_split (Dict[str, int]): Optional. A map of DeployedModel IDs to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. Required if undeploying a model with non-zero traffic from an Endpoint with multiple deployed models. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at the moment. If a DeployedModel's ID is not listed in this map, then it receives no traffic. metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. """ self._sync_gca_resource_if_skipped() if traffic_split is not None: if deployed_model_id in traffic_split and traffic_split[deployed_model_id]: raise ValueError("Model being undeployed should have 0 traffic.") if sum(traffic_split.values()) != 100: raise ValueError( "Sum of all traffic within traffic split needs to be 100." ) # Two or more models deployed to Endpoint and remaining traffic will be zero elif ( len(self.traffic_split) > 1 and deployed_model_id in self._gca_resource.traffic_split and self._gca_resource.traffic_split[deployed_model_id] == 100 ): raise ValueError( f"Undeploying deployed model '{deployed_model_id}' would leave the remaining " "traffic split at 0%. Traffic split must add up to 100% when models are " "deployed. Please undeploy the other models first or provide an updated " "traffic_split." ) self._undeploy( deployed_model_id=deployed_model_id, traffic_split=traffic_split, metadata=metadata, sync=sync, ) @base.optional_sync() def _undeploy( self, deployed_model_id: str, traffic_split: Optional[Dict[str, int]] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), sync=True, ) -> None: """Undeploys a deployed model. Proportionally adjusts the traffic_split among the remaining deployed models of the endpoint. Args: deployed_model_id (str): Required. The ID of the DeployedModel to be undeployed from the Endpoint. traffic_split (Dict[str, int]): Optional. A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at the moment. Key for model being deployed is "0". Should not be provided if traffic_percentage is provided. metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. """ self._sync_gca_resource_if_skipped() current_traffic_split = traffic_split or dict(self._gca_resource.traffic_split) if deployed_model_id in current_traffic_split: current_traffic_split = self._unallocate_traffic( traffic_split=current_traffic_split, deployed_model_id=deployed_model_id, ) current_traffic_split.pop(deployed_model_id) _LOGGER.log_action_start_against_resource("Undeploying", "model", self) operation_future = self.api_client.undeploy_model( endpoint=self.resource_name, deployed_model_id=deployed_model_id, traffic_split=current_traffic_split, metadata=metadata, ) _LOGGER.log_action_started_against_resource_with_lro( "Undeploy", "model", self.__class__, operation_future ) # block before returning operation_future.result() _LOGGER.log_action_completed_against_resource("model", "undeployed", self) # update local resource self._sync_gca_resource() def update( self, display_name: Optional[str] = None, description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, traffic_split: Optional[Dict[str, int]] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), update_request_timeout: Optional[float] = None, ) -> "Endpoint": """Updates an endpoint. Example usage: my_endpoint = my_endpoint.update( display_name='my-updated-endpoint', description='my updated description', labels={'key': 'value'}, traffic_split={ '123456': 20, '234567': 80, }, ) Args: display_name (str): Optional. The display name of the Endpoint. The name can be up to 128 characters long and can be consist of any UTF-8 characters. description (str): Optional. The description of the Endpoint. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. traffic_split (Dict[str, int]): Optional. A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at a moment. request_metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. update_request_timeout (float): Optional. The timeout for the update request in seconds. Returns: Endpoint (aiplatform.Prediction): Updated endpoint resource. Raises: ValueError: If `labels` is not the correct format. """ self.wait() current_endpoint_proto = self.gca_resource copied_endpoint_proto = current_endpoint_proto.__class__(current_endpoint_proto) update_mask: List[str] = [] if display_name: utils.validate_display_name(display_name) copied_endpoint_proto.display_name = display_name update_mask.append("display_name") if description: copied_endpoint_proto.description = description update_mask.append("description") if labels: utils.validate_labels(labels) copied_endpoint_proto.labels = labels update_mask.append("labels") if traffic_split: update_mask.append("traffic_split") copied_endpoint_proto.traffic_split = traffic_split update_mask = field_mask_pb2.FieldMask(paths=update_mask) _LOGGER.log_action_start_against_resource( "Updating", "endpoint", self, ) self._gca_resource = self.api_client.update_endpoint( endpoint=copied_endpoint_proto, update_mask=update_mask, metadata=request_metadata, timeout=update_request_timeout, ) _LOGGER.log_action_completed_against_resource("endpoint", "updated", self) return self def predict( self, instances: List, parameters: Optional[Dict] = None, timeout: Optional[float] = None, use_raw_predict: Optional[bool] = False, *, use_dedicated_endpoint: Optional[bool] = False, ) -> Prediction: """Make a prediction against this Endpoint. For dedicated endpoint, set use_dedicated_endpoint = True: ``` response = my_endpoint.predict(instances=[...], use_dedicated_endpoint=True) my_predictions = response.predictions ``` Args: instances (List): Required. The instances that are the input to the prediction call. A DeployedModel may have an upper limit on the number of instances it supports per request, and when it is exceeded the prediction call errors in case of AutoML Models, or, in case of customer created Models, the behaviour is as documented by that Model. The schema of any single instance may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``instance_schema_uri``. parameters (Dict): The parameters that govern the prediction. The schema of the parameters may be specified via Endpoint's DeployedModels' [Model's ][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``parameters_schema_uri``. timeout (float): Optional. The timeout for this request in seconds. use_raw_predict (bool): Optional. Default value is False. If set to True, the underlying prediction call will be made against Endpoint.raw_predict(). use_dedicated_endpoint (bool): Optional. Default value is False. If set to True, the underlying prediction call will be made using the dedicated endpoint dns. Returns: prediction (aiplatform.Prediction): Prediction with returned predictions and Model ID. Raises: ImportError: If there is an issue importing the `TCPKeepAliveAdapter` package. """ self.wait() if use_raw_predict: raw_predict_response = self.raw_predict( body=json.dumps({"instances": instances, "parameters": parameters}), headers={"Content-Type": "application/json"}, use_dedicated_endpoint=use_dedicated_endpoint, timeout=timeout, ) json_response = raw_predict_response.json() return Prediction( predictions=json_response["predictions"], metadata=json_response.get("metadata"), deployed_model_id=raw_predict_response.headers[ _RAW_PREDICT_DEPLOYED_MODEL_ID_KEY ], model_resource_name=raw_predict_response.headers[ _RAW_PREDICT_MODEL_RESOURCE_KEY ], model_version_id=raw_predict_response.headers.get( _RAW_PREDICT_MODEL_VERSION_ID_KEY, None ), ) if use_dedicated_endpoint: self._sync_gca_resource_if_skipped() if ( not self._gca_resource.dedicated_endpoint_enabled or self._gca_resource.dedicated_endpoint_dns is None ): raise ValueError( "Dedicated endpoint is not enabled or DNS is empty." "Please make sure endpoint has dedicated endpoint enabled" "and model are ready before making a prediction." ) try: from requests_toolbelt.adapters.socket_options import ( TCPKeepAliveAdapter, ) except ImportError: raise ImportError( "Cannot import the requests-toolbelt library. Please install requests-toolbelt." ) if not self.authorized_session: self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES self.authorized_session = google_auth_requests.AuthorizedSession( self.credentials ) headers = { "Content-Type": "application/json", } url = f"https://{self._gca_resource.dedicated_endpoint_dns}/v1/{self.resource_name}:predict" # count * interval need to be larger than 1 hr (3600s) keep_alive = TCPKeepAliveAdapter(idle=120, count=100, interval=100) self.authorized_session.mount("https://", keep_alive) response = self.authorized_session.post( url=url, data=json.dumps( { "instances": instances, "parameters": parameters, } ), headers=headers, timeout=timeout, ) prediction_response = json.loads(response.text) return Prediction( predictions=prediction_response.get("predictions"), metadata=prediction_response.get("metadata"), deployed_model_id=prediction_response.get("deployedModelId"), model_resource_name=prediction_response.get("model"), model_version_id=prediction_response.get("modelVersionId"), ) else: prediction_response = self._prediction_client.predict( endpoint=self._gca_resource.name, instances=instances, parameters=parameters, timeout=timeout, ) if prediction_response._pb.metadata: metadata = json_format.MessageToDict(prediction_response._pb.metadata) else: metadata = None return Prediction( predictions=[ json_format.MessageToDict(item) for item in prediction_response.predictions.pb ], metadata=metadata, deployed_model_id=prediction_response.deployed_model_id, model_version_id=prediction_response.model_version_id, model_resource_name=prediction_response.model, ) async def predict_async( self, instances: List, *, parameters: Optional[Dict] = None, timeout: Optional[float] = None, ) -> Prediction: """Make an asynchronous prediction against this Endpoint. Example usage: ``` response = await my_endpoint.predict_async(instances=[...]) my_predictions = response.predictions ``` Args: instances (List): Required. The instances that are the input to the prediction call. A DeployedModel may have an upper limit on the number of instances it supports per request, and when it is exceeded the prediction call errors in case of AutoML Models, or, in case of customer created Models, the behaviour is as documented by that Model. The schema of any single instance may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``instance_schema_uri``. parameters (Dict): Optional. The parameters that govern the prediction. The schema of the parameters may be specified via Endpoint's DeployedModels' [Model's ][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``parameters_schema_uri``. timeout (float): Optional. The timeout for this request in seconds. Returns: prediction (aiplatform.Prediction): Prediction with returned predictions and Model ID. """ self.wait() prediction_response = await self._prediction_async_client.predict( endpoint=self._gca_resource.name, instances=instances, parameters=parameters, timeout=timeout, ) if prediction_response._pb.metadata: metadata = json_format.MessageToDict(prediction_response._pb.metadata) else: metadata = None return Prediction( predictions=[ json_format.MessageToDict(item) for item in prediction_response.predictions.pb ], metadata=metadata, deployed_model_id=prediction_response.deployed_model_id, model_version_id=prediction_response.model_version_id, model_resource_name=prediction_response.model, ) def raw_predict( self, body: bytes, headers: Dict[str, str], *, use_dedicated_endpoint: Optional[bool] = False, timeout: Optional[float] = None, ) -> requests.models.Response: """Makes a prediction request using arbitrary headers. Example usage: my_endpoint = aiplatform.Endpoint(ENDPOINT_ID) response = my_endpoint.raw_predict( body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}' headers = {'Content-Type':'application/json'} ) # For dedicated endpoint: response = my_endpoint.raw_predict( body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}', headers = {'Content-Type':'application/json'}, dedicated_endpoint=True, ) status_code = response.status_code results = json.dumps(response.text) Args: body (bytes): The body of the prediction request in bytes. This must not exceed 1.5 mb per request. headers (Dict[str, str]): The header of the request as a dictionary. There are no restrictions on the header. use_dedicated_endpoint (bool): Optional. Default value is False. If set to True, the underlying prediction call will be made using the dedicated endpoint dns. timeout (float): Optional. The timeout for this request in seconds. Returns: A requests.models.Response object containing the status code and prediction results. Raises: ImportError: If there is an issue importing the `TCPKeepAliveAdapter` package. """ if not self.authorized_session: self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES self.authorized_session = google_auth_requests.AuthorizedSession( self.credentials ) if self.raw_predict_request_url is None: self.raw_predict_request_url = f"https://{self.location}-{constants.base.API_BASE_PATH}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:rawPredict" url = self.raw_predict_request_url if use_dedicated_endpoint: try: from requests_toolbelt.adapters.socket_options import ( TCPKeepAliveAdapter, ) except ImportError: raise ImportError( "Cannot import the requests-toolbelt library. Please install requests-toolbelt." ) self._sync_gca_resource_if_skipped() if ( not self._gca_resource.dedicated_endpoint_enabled or self._gca_resource.dedicated_endpoint_dns is None ): raise ValueError( "Dedicated endpoint is not enabled or DNS is empty." "Please make sure endpoint has dedicated endpoint enabled" "and model are ready before making a prediction." ) url = f"https://{self._gca_resource.dedicated_endpoint_dns}/v1/{self.resource_name}:rawPredict" # count * interval need to be larger than 1 hr (3600s) keep_alive = TCPKeepAliveAdapter(idle=120, count=100, interval=100) self.authorized_session.mount("https://", keep_alive) return self.authorized_session.post( url=url, data=body, headers=headers, timeout=timeout ) def stream_raw_predict( self, body: bytes, headers: Dict[str, str], *, use_dedicated_endpoint: Optional[bool] = False, timeout: Optional[float] = None, ) -> Iterator[requests.models.Response]: """Makes a streaming prediction request using arbitrary headers. For custom model, this method is only supported for dedicated endpoint. Example usage: ``` my_endpoint = aiplatform.Endpoint(ENDPOINT_ID) for stream_response in my_endpoint.stream_raw_predict( body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}' headers = {'Content-Type':'application/json'} ): status_code = response.status_code stream_result = json.dumps(response.text) ``` For dedicated endpoint: ``` my_endpoint = aiplatform.Endpoint(ENDPOINT_ID) for stream_response in my_endpoint.stream_raw_predict( body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}', headers = {'Content-Type':'application/json'}, use_dedicated_endpoint=True, ): status_code = response.status_code stream_result = json.dumps(response.text) ``` Args: body (bytes): The body of the prediction request in bytes. This must not exceed 10 mb per request. headers (Dict[str, str]): The header of the request as a dictionary. There are no restrictions on the header. use_dedicated_endpoint (bool): Optional. Default value is False. If set to True, the underlying prediction call will be made using the dedicated endpoint dns. timeout (float): Optional. The timeout for this request in seconds. Yields: predictions (Iterator[requests.models.Response]): The streaming prediction results. """ if not self.authorized_session: self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES self.authorized_session = google_auth_requests.AuthorizedSession( self.credentials ) if self.stream_raw_predict_request_url is None: self.stream_raw_predict_request_url = f"https://{self.location}-{constants.base.API_BASE_PATH}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:streamRawPredict" url = self.stream_raw_predict_request_url if use_dedicated_endpoint: self._sync_gca_resource_if_skipped() if ( not self._gca_resource.dedicated_endpoint_enabled or self._gca_resource.dedicated_endpoint_dns is None ): raise ValueError( "Dedicated endpoint is not enabled or DNS is empty." "Please make sure endpoint has dedicated endpoint enabled" "and model are ready before making a prediction." ) url = f"https://{self._gca_resource.dedicated_endpoint_dns}/v1/{self.resource_name}:streamRawPredict" with self.authorized_session.post( url=url, data=body, headers=headers, timeout=timeout, stream=True, ) as resp: for line in resp.iter_lines(): yield line def direct_predict( self, inputs: List, parameters: Optional[Dict] = None, timeout: Optional[float] = None, ) -> Prediction: """Makes a direct (gRPC) prediction against this Endpoint for a pre-built image. Args: inputs (List): Required. The inputs that are the input to the prediction call. A DeployedModel may have an upper limit on the number of instances it supports per request, and when it is exceeded the prediction call errors in case of AutoML Models, or, in case of customer created Models, the behaviour is as documented by that Model. The schema of any single instance may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``instance_schema_uri``. parameters (Dict): Optional. The parameters that govern the prediction. The schema of the parameters may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``parameters_schema_uri``. timeout (Optional[float]): Optional. The timeout for this request in seconds. Returns: prediction (aiplatform.Prediction): The resulting prediction. """ self.wait() prediction_response = self._prediction_client.direct_predict( request={ "endpoint": self._gca_resource.name, "inputs": inputs, "parameters": parameters, }, timeout=timeout, ) return Prediction( predictions=[ json_format.MessageToDict(item) for item in prediction_response.outputs.pb ], metadata=None, deployed_model_id=None, model_version_id=None, model_resource_name=None, ) async def direct_predict_async( self, inputs: List, *, parameters: Optional[Dict] = None, timeout: Optional[float] = None, ) -> Prediction: """Makes an asynchronous direct (gRPC) prediction against this Endpoint for a pre-built image. Example usage: ``` response = await my_endpoint.direct_predict_async(inputs=[...]) my_predictions = response.predictions ``` Args: inputs (List): Required. The inputs that are the input to the prediction call. A DeployedModel may have an upper limit on the number of instances it supports per request, and when it is exceeded the prediction call errors in case of AutoML Models, or, in case of customer created Models, the behaviour is as documented by that Model. The schema of any single instance may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``instance_schema_uri``. parameters (Dict): Optional. The parameters that govern the prediction. The schema of the parameters may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``parameters_schema_uri``. timeout (Optional[float]): Optional. The timeout for this request in seconds. Returns: prediction (aiplatform.Prediction): The resulting prediction. """ self.wait() prediction_response = await self._prediction_async_client.direct_predict( request={ "endpoint": self._gca_resource.name, "inputs": inputs, "parameters": parameters, }, timeout=timeout, ) return Prediction( predictions=[ json_format.MessageToDict(item) for item in prediction_response.outputs.pb ], metadata=None, deployed_model_id=None, model_version_id=None, model_resource_name=None, ) def stream_direct_predict( self, inputs_iterator: Iterator[List], parameters: Optional[Dict] = None, timeout: Optional[float] = None, ) -> Iterator[Prediction]: """Makes a streaming direct (gRPC) prediction against this Endpoint for a pre-built image. Args: inputs_iterator (Iterator[List]): Required. An iterator of the inputs that are the input to the prediction call. A DeployedModel may have an upper limit on the number of instances it supports per request, and when it is exceeded the prediction call errors in case of AutoML Models, or, in case of customer created Models, the behaviour is as documented by that Model. The schema of any single instance may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``instance_schema_uri``. parameters (Dict): Optional. The parameters that govern the prediction. The schema of the parameters may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``parameters_schema_uri``. timeout (Optional[float]): Optional. The timeout for this request in seconds. Yields: predictions (Iterator[aiplatform.Prediction]): The resulting streamed predictions. """ self.wait() for resp in self._prediction_client.stream_direct_predict( requests=( { "endpoint": self._gca_resource.name, "inputs": inputs, "parameters": parameters, } for inputs in inputs_iterator ), timeout=timeout, ): yield Prediction( predictions=[ json_format.MessageToDict(item) for item in resp.outputs.pb ], metadata=None, deployed_model_id=None, model_version_id=None, model_resource_name=None, ) def direct_raw_predict( self, method_name: str, request: bytes, timeout: Optional[float] = None, ) -> Prediction: """Makes a direct (gRPC) prediction request using arbitrary headers for a custom container. Example usage: ``` my_endpoint = aiplatform.Endpoint(ENDPOINT_ID) response = my_endpoint.direct_raw_predict(request=b'...') ``` Args: method_name (str): Fully qualified name of the API method being invoked to perform prediction. request (bytes): The body of the prediction request in bytes. timeout (Optional[float]): Optional. The timeout for this request in seconds. Returns: prediction (aiplatform.Prediction): The resulting prediction. """ self.wait() prediction_response = self._prediction_client.direct_raw_predict( request={ "endpoint": self._gca_resource.name, "method_name": method_name, "input": request, }, timeout=timeout, ) return Prediction( predictions=prediction_response.output, metadata=None, deployed_model_id=None, model_version_id=None, model_resource_name=None, ) async def direct_raw_predict_async( self, method_name: str, request: bytes, timeout: Optional[float] = None, ) -> Prediction: """Makes a direct (gRPC) prediction request for a custom container. Example usage: ``` my_endpoint = aiplatform.Endpoint(ENDPOINT_ID) response = await my_endpoint.direct_raw_predict(request=b'...') ``` Args: method_name (str): Fully qualified name of the API method being invoked to perform prediction. request (bytes): The body of the prediction request in bytes. timeout (Optional[float]): Optional. The timeout for this request in seconds. Returns: prediction (aiplatform.Prediction): The resulting prediction. """ self.wait() prediction_response = await self._prediction_async_client.direct_raw_predict( request={ "endpoint": self._gca_resource.name, "method_name": method_name, "input": request, }, timeout=timeout, ) return Prediction( predictions=prediction_response.output, metadata=None, deployed_model_id=None, model_version_id=None, model_resource_name=None, ) def stream_direct_raw_predict( self, method_name: str, requests: Iterator[bytes], timeout: Optional[float] = None, ) -> Iterator[Prediction]: """Makes a direct (gRPC) streaming prediction request for a custom container. Example usage: ``` my_endpoint = aiplatform.Endpoint(ENDPOINT_ID) for stream_response in my_endpoint.stream_direct_raw_predict( request=b'...' ): yield stream_response ``` Args: method_name (str): Fully qualified name of the API method being invoked to perform prediction. requests (Iterator[bytes]): The body of the prediction requests in bytes. timeout (Optional[float]): Optional. The timeout for this request in seconds. Yields: predictions (Iterator[aiplatform.Prediction]): The resulting streamed predictions. """ self.wait() for resp in self._prediction_client.stream_direct_raw_predict( requests=( { "endpoint": self._gca_resource.name, "method_name": method_name, "input": request, } for request in requests ), timeout=timeout, ): yield Prediction( predictions=resp.output, metadata=None, deployed_model_id=None, model_version_id=None, model_resource_name=None, ) def explain( self, instances: List[Dict], parameters: Optional[Dict] = None, deployed_model_id: Optional[str] = None, timeout: Optional[float] = None, ) -> Prediction: """Make a prediction with explanations against this Endpoint. Example usage: response = my_endpoint.explain(instances=[...]) my_explanations = response.explanations Args: instances (List): Required. The instances that are the input to the prediction call. A DeployedModel may have an upper limit on the number of instances it supports per request, and when it is exceeded the prediction call errors in case of AutoML Models, or, in case of customer created Models, the behaviour is as documented by that Model. The schema of any single instance may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``instance_schema_uri``. parameters (Dict): The parameters that govern the prediction. The schema of the parameters may be specified via Endpoint's DeployedModels' [Model's ][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``parameters_schema_uri``. deployed_model_id (str): Optional. If specified, this ExplainRequest will be served by the chosen DeployedModel, overriding this Endpoint's traffic split. timeout (float): Optional. The timeout for this request in seconds. Returns: prediction (aiplatform.Prediction): Prediction with returned predictions, explanations, and Model ID. """ self.wait() explain_response = self._prediction_client.explain( endpoint=self.resource_name, instances=instances, parameters=parameters, deployed_model_id=deployed_model_id, timeout=timeout, ) return Prediction( predictions=[ json_format.MessageToDict(item) for item in explain_response.predictions.pb ], deployed_model_id=explain_response.deployed_model_id, explanations=explain_response.explanations, ) async def explain_async( self, instances: List[Dict], *, parameters: Optional[Dict] = None, deployed_model_id: Optional[str] = None, timeout: Optional[float] = None, ) -> Prediction: """Make a prediction with explanations against this Endpoint. Example usage: ``` response = await my_endpoint.explain_async(instances=[...]) my_explanations = response.explanations ``` Args: instances (List): Required. The instances that are the input to the prediction call. A DeployedModel may have an upper limit on the number of instances it supports per request, and when it is exceeded the prediction call errors in case of AutoML Models, or, in case of customer created Models, the behaviour is as documented by that Model. The schema of any single instance may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``instance_schema_uri``. parameters (Dict): The parameters that govern the prediction. The schema of the parameters may be specified via Endpoint's DeployedModels' [Model's ][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``parameters_schema_uri``. deployed_model_id (str): Optional. If specified, this ExplainRequest will be served by the chosen DeployedModel, overriding this Endpoint's traffic split. timeout (float): Optional. The timeout for this request in seconds. Returns: prediction (aiplatform.Prediction): Prediction with returned predictions, explanations, and Model ID. """ self.wait() explain_response = await self._prediction_async_client.explain( endpoint=self.resource_name, instances=instances, parameters=parameters, deployed_model_id=deployed_model_id, timeout=timeout, ) return Prediction( predictions=[ json_format.MessageToDict(item) for item in explain_response.predictions.pb ], deployed_model_id=explain_response.deployed_model_id, explanations=explain_response.explanations, ) @classmethod def list( cls, filter: Optional[str] = None, order_by: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ) -> List["models.Endpoint"]: """List all Endpoint resource instances. Example Usage: aiplatform.Endpoint.list( filter='labels.my_label="my_label_value" OR display_name=!"old_endpoint"', ) Args: filter (str): Optional. An expression for filtering the results of the request. For field names both snake_case and camelCase are supported. order_by (str): Optional. A comma-separated list of fields to order by, sorted in ascending order. Use "desc" after a field name for descending. Supported fields: `display_name`, `create_time`, `update_time` project (str): Optional. Project to retrieve list from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to retrieve list from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to retrieve list. Overrides credentials set in aiplatform.init. Returns: List[models.Endpoint]: A list of Endpoint resource objects """ return cls._list_with_local_order( cls_filter=lambda ep: not bool(ep.network) and not bool(ep.private_service_connect_config), # `network` is empty and private_service_connect is not enabled for public Endpoints filter=filter, order_by=order_by, project=project, location=location, credentials=credentials, ) def list_models(self) -> List[gca_endpoint_compat.DeployedModel]: """Returns a list of the models deployed to this Endpoint. Returns: deployed_models (List[aiplatform.gapic.DeployedModel]): A list of the models deployed in this Endpoint. """ self._sync_gca_resource() return list(self._gca_resource.deployed_models) def undeploy_all(self, sync: bool = True) -> "Endpoint": """Undeploys every model deployed to this Endpoint. Args: sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. """ self._sync_gca_resource() models_in_traffic_split = sorted( # Undeploy zero traffic models first self._gca_resource.traffic_split.keys(), key=lambda id: self._gca_resource.traffic_split[id], ) # Some deployed models may not in the traffic_split dict. # These models have 0% traffic and should be undeployed first. models_not_in_traffic_split = [ deployed_model.id for deployed_model in self._gca_resource.deployed_models if deployed_model.id not in models_in_traffic_split ] models_to_undeploy = models_not_in_traffic_split + models_in_traffic_split for deployed_model in models_to_undeploy: self._undeploy(deployed_model_id=deployed_model, sync=sync) return self def delete(self, force: bool = False, sync: bool = True) -> None: """Deletes this Vertex AI Endpoint resource. If force is set to True, all models on this Endpoint will be undeployed prior to deletion. Args: force (bool): Required. If force is set to True, all deployed models on this Endpoint will be undeployed first. Default is False. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. Raises: FailedPrecondition: If models are deployed on this Endpoint and force = False. """ if force: self.undeploy_all(sync=sync) super().delete(sync=sync) class PrivateEndpoint(Endpoint): """ Represents a Vertex AI PrivateEndpoint resource. Read more [about private endpoints in the documentation.](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints) """ def __init__( self, endpoint_name: str, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ): """Retrieves a PrivateEndpoint resource. Example usage: my_private_endpoint = aiplatform.PrivateEndpoint( endpoint_name="projects/123/locations/us-central1/endpoints/1234567891234567890" ) or (when project and location are initialized) my_private_endpoint = aiplatform.PrivateEndpoint( endpoint_name="1234567891234567890" ) Args: endpoint_name (str): Required. A fully-qualified endpoint resource name or endpoint ID. Example: "projects/123/locations/us-central1/endpoints/my_endpoint_id" or "my_endpoint_id" when project and location are initialized or passed. project (str): Optional. Project to retrieve endpoint from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to retrieve endpoint from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. Raises: ValueError: If the Endpoint being retrieved is not a PrivateEndpoint. ImportError: If there is an issue importing the `urllib3` package. """ try: import urllib3 except ImportError: raise ImportError( "Cannot import the urllib3 HTTP client. Please install google-cloud-aiplatform[private_endpoints]." ) super().__init__( endpoint_name=endpoint_name, project=project, location=location, credentials=credentials, ) if not self.network and not self.private_service_connect_config: raise ValueError( "Please ensure the Endpoint being retrieved is a PrivateEndpoint." ) self._http_client = urllib3.PoolManager(cert_reqs="CERT_NONE") @property def predict_http_uri(self) -> Optional[str]: """HTTP path to send prediction requests to, used when calling `PrivateEndpoint.predict()`""" if not self._gca_resource.deployed_models: return None return self._gca_resource.deployed_models[0].private_endpoints.predict_http_uri @property def explain_http_uri(self) -> Optional[str]: """HTTP path to send explain requests to, used when calling `PrivateEndpoint.explain()`""" if not self._gca_resource.deployed_models: return None return self._gca_resource.deployed_models[0].private_endpoints.explain_http_uri @property def health_http_uri(self) -> Optional[str]: """HTTP path to send health check requests to, used when calling `PrivateEndpoint.health_check()`""" if not self._gca_resource.deployed_models: return None return self._gca_resource.deployed_models[0].private_endpoints.health_http_uri class PrivateServiceConnectConfig: """Represents a Vertex AI PrivateServiceConnectConfig resource.""" _gapic_private_service_connect_config: gca_service_networking.PrivateServiceConnectConfig def __init__( self, project_allowlist: Optional[Sequence[str]] = None, ): """PrivateServiceConnectConfig for a PrivateEndpoint. Args: project_allowlist (Sequence[str]): Optional. List of projects from which traffic can be accepted by the endpoint via [ServiceAttachment](https://cloud.google.com/vpc/docs/private-service-connect#service-attachments). If not set, the endpoint's project will be used. """ self._gapic_private_service_connect_config = ( gca_service_networking.PrivateServiceConnectConfig( enable_private_service_connect=True, project_allowlist=project_allowlist, ) ) @classmethod def create( cls, display_name: str, project: Optional[str] = None, location: Optional[str] = None, network: Optional[str] = None, description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, credentials: Optional[auth_credentials.Credentials] = None, encryption_spec_key_name: Optional[str] = None, sync=True, private_service_connect_config: Optional[PrivateServiceConnectConfig] = None, enable_request_response_logging=False, request_response_logging_sampling_rate: Optional[float] = None, request_response_logging_bq_destination_table: Optional[str] = None, inference_timeout: Optional[int] = None, ) -> "PrivateEndpoint": """Creates a new PrivateEndpoint. Example usage: For PSA based private endpoint: my_private_endpoint = aiplatform.PrivateEndpoint.create( display_name="my_endpoint_name", project="my_project_id", location="us-central1", network="projects/123456789123/global/networks/my_vpc" ) or (when project and location are initialized) my_private_endpoint = aiplatform.PrivateEndpoint.create( display_name="my_endpoint_name", network="projects/123456789123/global/networks/my_vpc" ) For PSC based private endpoint: my_private_endpoint = aiplatform.PrivateEndpoint.create( display_name="my_endpoint_name", project="my_project_id", location="us-central1", private_service_connect=aiplatform.PrivateEndpoint.PrivateServiceConnectConfig( project_allowlist=["test-project"]), ) or (when project and location are initialized) my_private_endpoint = aiplatform.PrivateEndpoint.create( display_name="my_endpoint_name", private_service_connect=aiplatform.PrivateEndpoint.PrivateServiceConnectConfig( project_allowlist=["test-project"]), ) Args: display_name (str): Required. The user-defined name of the Endpoint. The name can be up to 128 characters long and can be consist of any UTF-8 characters. project (str): Optional. Project to retrieve endpoint from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to retrieve endpoint from. If not set, location set in aiplatform.init will be used. network (str): Optional. The full name of the Compute Engine network to which this Endpoint will be peered. E.g. "projects/123456789123/global/networks/my_vpc". Private services access must already be configured for the network. If left unspecified, the network set with aiplatform.init will be used. Cannot be set together with private_service_connect_config. description (str): Optional. The description of the Endpoint. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. encryption_spec_key_name (str): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Model and all sub-resources of this Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. private_service_connect_config (aiplatform.PrivateEndpoint.PrivateServiceConnectConfig): [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect) configuration for the endpoint. Cannot be set when network is specified. enable_request_response_logging (bool): Optional. Whether to enable request & response logging for this endpoint. request_response_logging_sampling_rate (float): Optional. The request response logging sampling rate. If not set, default is 0.0. request_response_logging_bq_destination_table (str): Optional. The request response logging bigquery destination. If not set, will create a table with name: ``bq://{project_id}.logging_{endpoint_display_name}_{endpoint_id}.request_response_logging``. inference_timeout (int): Optional. It defines the prediction timeout, in seconds, for online predictions using cloud-based endpoints. This applies to either PSC endpoints, when private_service_connect_config is set, or dedicated endpoints, when dedicated_endpoint_enabled is true. Returns: endpoint (aiplatform.PrivateEndpoint): Created endpoint. Raises: ValueError: A network must be instantiated when creating a PrivateEndpoint. """ api_client = cls._instantiate_client(location=location, credentials=credentials) utils.validate_display_name(display_name) if labels: utils.validate_labels(labels) project = project or initializer.global_config.project location = location or initializer.global_config.location network = network or initializer.global_config.network if not network and not private_service_connect_config: raise ValueError( "Please provide required argument `network` or" "`private_service_connect_config`. You can also set network" "using aiplatform.init(network=...)" ) if network and private_service_connect_config: raise ValueError( "Argument `network` and `private_service_connect_config` enabled" " mutually exclusive. You can only set one of them." ) config = None if private_service_connect_config: config = ( private_service_connect_config._gapic_private_service_connect_config ) predict_request_response_logging_config = None if enable_request_response_logging: predict_request_response_logging_config = ( gca_endpoint_compat.PredictRequestResponseLoggingConfig( enabled=True, sampling_rate=request_response_logging_sampling_rate, bigquery_destination=gca_io_compat.BigQueryDestination( output_uri=request_response_logging_bq_destination_table ), ) ) client_connection_config = None if private_service_connect_config and inference_timeout: client_connection_config = gca_endpoint_compat.ClientConnectionConfig( inference_timeout=duration_pb2.Duration(seconds=inference_timeout) ) return cls._create( api_client=api_client, display_name=display_name, project=project, location=location, description=description, labels=labels, credentials=credentials, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), network=network, sync=sync, private_service_connect_config=config, predict_request_response_logging_config=predict_request_response_logging_config, client_connection_config=client_connection_config, ) @classmethod def _construct_sdk_resource_from_gapic( cls, gapic_resource: proto.Message, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ) -> "PrivateEndpoint": """Given a GAPIC PrivateEndpoint object, return the SDK representation. Args: gapic_resource (proto.Message): A GAPIC representation of a PrivateEndpoint resource, usually retrieved by a get_* or in a list_* API call. project (str): Optional. Project to construct Endpoint object from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to construct Endpoint object from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to construct Endpoint. Overrides credentials set in aiplatform.init. Returns: endpoint (aiplatform.PrivateEndpoint): An initialized PrivateEndpoint resource. Raises: ImportError: If there is an issue importing the `urllib3` package. """ try: import urllib3 except ImportError: raise ImportError( "Cannot import the urllib3 HTTP client. Please install google-cloud-aiplatform[private_endpoints]." ) endpoint = super()._construct_sdk_resource_from_gapic( gapic_resource=gapic_resource, project=project, location=location, credentials=credentials, ) endpoint._http_client = urllib3.PoolManager(cert_reqs="CERT_NONE") return endpoint def _http_request( self, method: str, url: str, body: Optional[Dict[Any, Any]] = None, headers: Optional[Dict[str, str]] = None, ) -> "urllib3.response.HTTPResponse": # type: ignore # noqa: F821 """Helper function used to perform HTTP requests for PrivateEndpoint. Args: method (str): Required. The HTTP request method to use. Example: "POST" or "GET" url (str): Required. The url used to send requests and get responses from. body (Dict[Any, Any]): Optional. Data sent to the url in the HTTP request. For a PrivateEndpoint, an instance is sent and a prediction response is expected. headers (Dict[str, str]): Optional. Header in the HTTP request. Returns: urllib3.response.HTTPResponse: A HTTP Response container. Raises: ImportError: If there is an issue importing the `urllib3` package. RuntimeError: If a HTTP request could not be made. RuntimeError: A connection could not be established with the PrivateEndpoint and a HTTP request could not be made. """ try: import urllib3 except ImportError: raise ImportError( "Cannot import the urllib3 HTTP client. Please install google-cloud-aiplatform[private_endpoints]." ) try: response = self._http_client.request( method=method, url=url, body=body, headers=headers ) if response.status < _SUCCESSFUL_HTTP_RESPONSE: return response else: raise RuntimeError( f"{response.status} - Failed to make request, see response: " + response.data.decode("utf-8") ) except urllib3.exceptions.MaxRetryError as exc: raise RuntimeError( f"Failed to make a {method} request to this URI, make sure: " " this call is being made inside the network this PrivateEndpoint is peered to " f"({self._gca_resource.network}), calling health_check() returns True, " f"and that {url} is a valid URL." ) from exc def _validate_endpoint_override(self, endpoint_override: str) -> bool: regex = re.compile("^[a-zA-Z0-9-.]+$") return regex.match(endpoint_override) is not None def predict( self, instances: List, parameters: Optional[Dict] = None, endpoint_override: Optional[str] = None, ) -> Prediction: """Make a prediction against this PrivateEndpoint using a HTTP request. For PSA based private endpoint, this method must be called within the network the PrivateEndpoint is peered to. Otherwise, the predict() call will fail with error code 404. To check, use `PrivateEndpoint.network`. For PSC based priviate endpoint, the project where caller credential are from must be allowlisted. Example usage: PSA based private endpoint: response = my_private_endpoint.predict(instances=[...], parameters={...}) my_predictions = response.predictions PSC based private endpoint: After creating PSC Endpoint pointing to the endpoint's ServiceAttachment, use the PSC Endpoint IP Address or DNS as endpoint_override. psc_endpoint_address = "10.0.1.23" or psc_endpoint_address = "test.my.prediction" response = my_private_endpoint.predict(instances=[...], endpoint_override=psc_endpoint_address) my_predictions = response.predictions Args: instances (List): Required. The instances that are the input to the prediction call. Instance types mut be JSON serializable. A DeployedModel may have an upper limit on the number of instances it supports per request, and when it is exceeded the prediction call errors in case of AutoML Models, or, in case of customer created Models, the behaviour is as documented by that Model. The schema of any single instance may be specified via Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``instance_schema_uri``. parameters (Dict): The parameters that govern the prediction. The schema of the parameters may be specified via Endpoint's DeployedModels' [Model's ][google.cloud.aiplatform.v1beta1.DeployedModel.model] [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] ``parameters_schema_uri``. endpoint_override (Optional[str]): The Private Service Connect endpoint's IP address or DNS that points to the endpoint's service attachment. Returns: prediction (aiplatform.Prediction): Prediction object with returned predictions and Model ID. Raises: RuntimeError: If a model has not been deployed a request cannot be made for PSA based endpoint. ValueError: If a endpoint override is not provided for PSC based endpoint. ValueError: If a endpoint override is invalid for PSC based endpoint. """ self.wait() self._sync_gca_resource_if_skipped() if self.network: if not self._gca_resource.deployed_models: raise RuntimeError( "Cannot make a predict request because a model has not been" "deployed on this Private Endpoint. Please ensure a model" "has been deployed." ) response = self._http_request( method="POST", url=self.predict_http_uri, body=json.dumps({"instances": instances, "parameters": parameters}), headers={"Content-Type": "application/json"}, ) prediction_response = json.loads(response.data) return Prediction( predictions=prediction_response.get("predictions"), metadata=prediction_response.get("metadata"), deployed_model_id=self._gca_resource.deployed_models[0].id, ) if self.private_service_connect_config: if not endpoint_override: raise ValueError( "Cannot make a predict request because endpoint override is" "not provided. Please ensure an endpoint override is" "provided." ) if not self._validate_endpoint_override(endpoint_override): raise ValueError( "Invalid endpoint override provided. Please only use IP" "address or DNS." ) if not self.credentials.valid: self.credentials.refresh(google_auth_requests.Request()) token = self.credentials.token headers = { "Authorization": f"Bearer {token}", "Content-Type": "application/json", } url = f"https://{endpoint_override}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:predict" response = self._http_request( method="POST", url=url, body=json.dumps({"instances": instances, "parameters": parameters}), headers=headers, ) prediction_response = json.loads(response.data) return Prediction( predictions=prediction_response.get("predictions"), metadata=prediction_response.get("metadata"), deployed_model_id=prediction_response.get("deployedModelId"), model_resource_name=prediction_response.get("model"), model_version_id=prediction_response.get("modelVersionId"), ) def raw_predict( self, body: bytes, headers: Dict[str, str], endpoint_override: Optional[str] = None, ) -> requests.models.Response: """Make a prediction request using arbitrary headers. This method must be called within the network the PrivateEndpoint is peered to. Otherwise, the predict() call will fail with error code 404. To check, use `PrivateEndpoint.network`. Example usage: my_endpoint = aiplatform.PrivateEndpoint(ENDPOINT_ID) # PSA based private endpint response = my_endpoint.raw_predict( body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}', headers = {'Content-Type':'application/json'} ) # PSC based private endpoint response = my_endpoint.raw_predict( body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}', headers = {'Content-Type':'application/json'}, endpoint_override = "10.1.0.23" ) status_code = response.status_code results = json.dumps(response.text) Args: body (bytes): The body of the prediction request in bytes. This must not exceed 1.5 mb per request. headers (Dict[str, str]): The header of the request as a dictionary. There are no restrictions on the header. endpoint_override (Optional[str]): The Private Service Connect endpoint's IP address or DNS that points to the endpoint's service attachment. Returns: A requests.models.Response object containing the status code and prediction results. Raises: ValueError: If a endpoint override is not provided for PSC based endpoint. ValueError: If a endpoint override is invalid for PSC based endpoint. """ self.wait() if self.network: return self._http_request( method="POST", url=self.predict_http_uri, body=body, headers=headers, ) if self.private_service_connect_config: if not endpoint_override: raise ValueError( "Cannot make a predict request because endpoint override is" "not provided. Please ensure an endpoint override is" "provided." ) if not self._validate_endpoint_override(endpoint_override): raise ValueError( "Invalid endpoint override provided. Please only use IP" "address or DNS." ) if not self.credentials.valid: self.credentials.refresh(google_auth_requests.Request()) token = self.credentials.token headers_with_token = dict(headers) headers_with_token["Authorization"] = f"Bearer {token}" url = f"https://{endpoint_override}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:rawPredict" return self._http_request( method="POST", url=url, body=body, headers=headers_with_token, ) def stream_raw_predict( self, body: bytes, headers: Dict[str, str], endpoint_override: Optional[str] = None, ) -> Iterator[bytes]: """Make a streaming prediction request using arbitrary headers. Example usage: my_endpoint = aiplatform.PrivateEndpoint(ENDPOINT_ID) # Prepare the request body request_body = json.dumps({...}).encode('utf-8') # Define the headers headers = { 'Content-Type': 'application/json', } # Use stream_raw_predict to send the request and process the response for stream_response in psc_endpoint.stream_raw_predict( body=request_body, headers=headers, endpoint_override="10.128.0.26" # Replace with your actual endpoint ): stream_response_text = stream_response.decode('utf-8') Args: body (bytes): The body of the prediction request in bytes. This must not exceed 10 mb per request. headers (Dict[str, str]): The header of the request as a dictionary. There are no restrictions on the header. endpoint_override (Optional[str]): The Private Service Connect endpoint's IP address or DNS that points to the endpoint's service attachment. Yields: predictions (Iterator[bytes]): The streaming prediction results as lines of bytes. Raises: ValueError: If a endpoint override is not provided for PSC based endpoint. ValueError: If a endpoint override is invalid for PSC based endpoint. """ self.wait() if self.network or not self.private_service_connect_config: raise ValueError( "PSA based private endpoint does not support streaming prediction." ) if self.private_service_connect_config: if not endpoint_override: raise ValueError( "Cannot make a predict request because endpoint override is" "not provided. Please ensure an endpoint override is" "provided." ) if not self._validate_endpoint_override(endpoint_override): raise ValueError( "Invalid endpoint override provided. Please only use IP" "address or DNS." ) if not self.credentials.valid: self.credentials.refresh(google_auth_requests.Request()) token = self.credentials.token headers_with_token = dict(headers) headers_with_token["Authorization"] = f"Bearer {token}" if not self.authorized_session: self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES self.authorized_session = google_auth_requests.AuthorizedSession( self.credentials ) url = f"https://{endpoint_override}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:streamRawPredict" with self.authorized_session.post( url=url, data=body, headers=headers_with_token, stream=True, verify=False, ) as resp: for line in resp.iter_lines(): yield line def explain(self): raise NotImplementedError( f"{self.__class__.__name__} class does not support 'explain' as of now." ) def health_check(self) -> bool: """ Makes a request to this PrivateEndpoint's health check URI. Must be within network that this PrivateEndpoint is in. This is only supported by PSA based private endpoint. Example Usage: if my_private_endpoint.health_check(): print("PrivateEndpoint is healthy!") Returns: bool: Checks if calls can be made to this PrivateEndpoint. Raises: RuntimeError: If a model has not been deployed a request cannot be made. RuntimeError: If the endpoint is PSC based private endpoint. """ self.wait() self._sync_gca_resource_if_skipped() if self.private_service_connect_config: raise RuntimeError( "Health check request is not supported on PSC based Private Endpoint." ) if not self._gca_resource.deployed_models: raise RuntimeError( "Cannot make a health check request because a model has not been deployed on this Private" "Endpoint. Please ensure a model has been deployed." ) response = self._http_request( method="GET", url=self.health_http_uri, ) return response.status < _SUCCESSFUL_HTTP_RESPONSE @classmethod def list( cls, filter: Optional[str] = None, order_by: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ) -> List["models.PrivateEndpoint"]: """List all PrivateEndpoint resource instances. Example Usage: my_private_endpoints = aiplatform.PrivateEndpoint.list() or my_private_endpoints = aiplatform.PrivateEndpoint.list( filter='labels.my_label="my_label_value" OR display_name=!"old_endpoint"', ) Args: filter (str): Optional. An expression for filtering the results of the request. For field names both snake_case and camelCase are supported. order_by (str): Optional. A comma-separated list of fields to order by, sorted in ascending order. Use "desc" after a field name for descending. Supported fields: `display_name`, `create_time`, `update_time` project (str): Optional. Project to retrieve list from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to retrieve list from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to retrieve list. Overrides credentials set in aiplatform.init. Returns: List[models.PrivateEndpoint]: A list of PrivateEndpoint resource objects. """ return cls._list_with_local_order( cls_filter=lambda ep: bool(ep.network) or bool(ep.private_service_connect_config), # Only PrivateEndpoints have a network or private_service_connect_config filter=filter, order_by=order_by, project=project, location=location, credentials=credentials, ) def deploy( self, model: "Model", deployed_model_display_name: Optional[str] = None, machine_type: Optional[str] = None, min_replica_count: int = 1, max_replica_count: int = 1, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, tpu_topology: Optional[str] = None, service_account: Optional[str] = None, explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None, explanation_parameters: Optional[ aiplatform.explain.ExplanationParameters ] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), sync=True, disable_container_logging: bool = False, traffic_percentage: Optional[int] = 0, traffic_split: Optional[Dict[str, int]] = None, reservation_affinity_type: Optional[str] = None, reservation_affinity_key: Optional[str] = None, reservation_affinity_values: Optional[List[str]] = None, spot: bool = False, system_labels: Optional[Dict[str, str]] = None, required_replica_count: Optional[int] = 0, ) -> None: """Deploys a Model to the PrivateEndpoint. Example Usage: PSA based private endpoint my_private_endpoint.deploy( model=my_model ) PSC based private endpoint psc_endpoint.deploy( model=first_model, ) psc_endpoint.deploy( model=second_model, traffic_percentage=50, ) psc_endpoint.deploy( model=third_model, traffic_percentage={ 'first_model_id': 40, 'second_model_id': 30, 'third_model_id': 30 }, ) Args: model (aiplatform.Model): Required. Model to be deployed. deployed_model_display_name (str): Optional. The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used. machine_type (str): Optional. The type of machine. Not specifying machine type will result in model to be deployed with automatic resources. min_replica_count (int): Optional. The minimum number of machine replicas this deployed model will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed. max_replica_count (int): Optional. The maximum number of replicas this deployed model may be deployed on when the traffic against it increases. If requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the deployed model increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, the larger value of min_replica_count or 1 will be used. If value provided is smaller than min_replica_count, it will automatically be increased to be min_replica_count. accelerator_type (str): Optional. Hardware accelerator type. Must also set accelerator_count if used. One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4 accelerator_count (int): Optional. The number of accelerators to attach to a worker replica. tpu_topology (str): Optional. The TPU topology to use for the DeployedModel. Required for CloudTPU multihost deployments. service_account (str): The service account that the DeployedModel's container runs as. Specify the email address of the service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the `iam.serviceAccounts.actAs` permission on this service account. explanation_metadata (aiplatform.explain.ExplanationMetadata): Optional. Metadata describing the Model's input and output for explanation. `explanation_metadata` is optional while `explanation_parameters` must be specified when used. For more details, see `Ref docs ` explanation_parameters (aiplatform.explain.ExplanationParameters): Optional. Parameters to configure explaining for Model's predictions. For more details, see `Ref docs ` metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. traffic_percentage (int): Optional. Desired traffic to newly deployed model. Defaults to 0 if there are pre-existing deployed models. Defaults to 100 if there are no pre-existing deployed models. Defaults to 100 for PSA based private endpoint. Negative values should not be provided. Traffic of previously deployed models at the endpoint will be scaled down to accommodate new deployed model's traffic. Should not be provided if traffic_split is provided. traffic_split (Dict[str, int]): Optional. Only supported by PSC base private endpoint. A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at the moment. Key for model being deployed is "0". Should not be provided if traffic_percentage is provided. reservation_affinity_type (str): Optional. The type of reservation affinity. One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION reservation_affinity_key (str): Optional. Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key and specify the name of your reservation as its value. reservation_affinity_values (List[str]): Optional. Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation. Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}' spot (bool): Optional. Whether to schedule the deployment workload on spot VMs. system_labels (Dict[str, str]): Optional. System labels to apply to Model Garden deployments. System labels are managed by Google for internal use only. required_replica_count (int): Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial model deployment/mutation is desired, with a value greater than or equal to 1 and fewer than or equal to min_replica_count. If set, the model deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. """ if self.network: if traffic_split is not None: raise ValueError( "Traffic split is not supported for PSA based PrivateEndpoint." ) traffic_percentage = 100 self._validate_deploy_args( min_replica_count=min_replica_count, max_replica_count=max_replica_count, accelerator_type=accelerator_type, deployed_model_display_name=deployed_model_display_name, traffic_split=traffic_split, traffic_percentage=traffic_percentage, deployment_resource_pool=None, required_replica_count=required_replica_count, ) explanation_spec = _explanation_utils.create_and_validate_explanation_spec( explanation_metadata=explanation_metadata, explanation_parameters=explanation_parameters, ) self._deploy( model=model, deployed_model_display_name=deployed_model_display_name, traffic_percentage=traffic_percentage, traffic_split=traffic_split, machine_type=machine_type, min_replica_count=min_replica_count, max_replica_count=max_replica_count, accelerator_type=accelerator_type, accelerator_count=accelerator_count, tpu_topology=tpu_topology, reservation_affinity_type=reservation_affinity_type, reservation_affinity_key=reservation_affinity_key, reservation_affinity_values=reservation_affinity_values, service_account=service_account, explanation_spec=explanation_spec, metadata=metadata, sync=sync, spot=spot, disable_container_logging=disable_container_logging, system_labels=system_labels, required_replica_count=required_replica_count, ) def update( self, display_name: Optional[str] = None, description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, traffic_split: Optional[Dict[str, int]] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), update_request_timeout: Optional[float] = None, ) -> "PrivateEndpoint": """Updates a PrivateEndpoint. Example usage: PSC based private endpoint my_endpoint = my_endpoint.update( display_name='my-updated-endpoint', description='my updated description', labels={'key': 'value'}, traffic_split={ '123456': 20, '234567': 80, }, ) Args: display_name (str): Optional. The display name of the Endpoint. The name can be up to 128 characters long and can be consist of any UTF-8 characters. description (str): Optional. The description of the Endpoint. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. traffic_split (Dict[str, int]): Optional. Only supported by PSC based private endpoint A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at a moment. request_metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. update_request_timeout (float): Optional. The timeout for the update request in seconds. Returns: Endpoint (aiplatform.Prediction): Updated endpoint resource. Raises: ValueError: If `traffic_split` is set for PSA based private endpoint. """ if self.network: if traffic_split is not None: raise ValueError( "Traffic split is not supported for PSA based Private Endpoint." ) super().update( display_name=display_name, description=description, labels=labels, traffic_split=traffic_split, request_metadata=request_metadata, update_request_timeout=update_request_timeout, ) return self def undeploy( self, deployed_model_id: str, sync=True, traffic_split: Optional[Dict[str, int]] = None, ) -> None: """Undeploys a deployed model from the PrivateEndpoint. Example Usage: PSA based private endpoint: my_private_endpoint.undeploy( deployed_model_id="1234567891232567891" ) or my_deployed_model_id = my_private_endpoint.list_models()[0].id my_private_endpoint.undeploy( deployed_model_id=my_deployed_model_id ) Args: deployed_model_id (str): Required. The ID of the DeployedModel to be undeployed from the PrivateEndpoint. Use PrivateEndpoint.list_models() to get the deployed model ID. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. traffic_split (Dict[str, int]): Optional. Only supported by PSC based private endpoint. A map of DeployedModel IDs to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. Required if undeploying a model with non-zero traffic from an Endpoint with multiple deployed models. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at the moment. If a DeployedModel's ID is not listed in this map, then it receives no traffic. """ self._sync_gca_resource_if_skipped() if self.network: if traffic_split is not None: raise ValueError( "Traffic split is not supported for PSA based PrivateEndpoint." ) # PSA based private endpoint self._undeploy( deployed_model_id=deployed_model_id, traffic_split=None, sync=sync, ) # PSC based private endpoint if self.private_service_connect_config: super().undeploy( deployed_model_id=deployed_model_id, traffic_split=traffic_split, sync=sync, ) def undeploy_all(self, sync: bool = True) -> "PrivateEndpoint": """Undeploys every model deployed to this PrivateEndpoint. Args: sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. """ if self.network: self._sync_gca_resource() # PSA based private endpoint self._undeploy( deployed_model_id=self._gca_resource.deployed_models[0].id, traffic_split=None, sync=sync, ) if self.private_service_connect_config: # PSC based private endpoint super().undeploy_all(sync=sync) return self def delete(self, force: bool = False, sync: bool = True) -> None: """Deletes this Vertex AI PrivateEndpoint resource. If force is set to True, all models on this PrivateEndpoint will be undeployed prior to deletion. Args: force (bool): Required. If force is set to True, all deployed models on this Endpoint will be undeployed first. Default is False. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. Raises: FailedPrecondition: If models are deployed on this Endpoint and force = False. """ if force and self._gca_resource.deployed_models: self.undeploy_all(sync=sync) super().delete(force=False, sync=sync) class Model(base.VertexAiResourceNounWithFutureManager, base.PreviewMixin): client_class = utils.ModelClientWithOverride _resource_noun = "models" _getter_method = "get_model" _list_method = "list_models" _delete_method = "delete_model" _parse_resource_name_method = "parse_model_path" _format_resource_name_method = "model_path" _preview_class = "google.cloud.aiplatform.aiplatform.preview.models.Model" @property def preview(self): """Return a Model instance with preview features enabled.""" from google.cloud.aiplatform.preview import models as preview_models if not hasattr(self, "_preview_instance"): self._preview_instance = preview_models.Model( self.resource_name, credentials=self.credentials ) return self._preview_instance @property def uri(self) -> Optional[str]: """Path to the directory containing the Model artifact and any of its supporting files. Not present for AutoML Models.""" self._assert_gca_resource_is_available() return self._gca_resource.artifact_uri or None @property def description(self) -> str: """Description of the model.""" self._assert_gca_resource_is_available() return self._gca_resource.description @property def supported_export_formats( self, ) -> Dict[str, List[gca_model_compat.Model.ExportFormat.ExportableContent]]: """The formats and content types in which this Model may be exported. If empty, this Model is not available for export. For example, if this model can be exported as a Tensorflow SavedModel and have the artifacts written to Cloud Storage, the expected value would be: {'tf-saved-model': []} """ self._assert_gca_resource_is_available() return { export_format.id: [ gca_model_compat.Model.ExportFormat.ExportableContent(content) for content in export_format.exportable_contents ] for export_format in self._gca_resource.supported_export_formats } @property def supported_deployment_resources_types( self, ) -> List[model_v1.Model.DeploymentResourcesType]: """List of deployment resource types accepted for this Model. When this Model is deployed, its prediction resources are described by the `prediction_resources` field of the objects returned by `Endpoint.list_models()`. Because not all Models support all resource configuration types, the configuration types this Model supports are listed here. If no configuration types are listed, the Model cannot be deployed to an `Endpoint` and does not support online predictions (`Endpoint.predict()` or `Endpoint.explain()`). Such a Model can serve predictions by using a `BatchPredictionJob`, if it has at least one entry each in `Model.supported_input_storage_formats` and `Model.supported_output_storage_formats`.""" self._assert_gca_resource_is_available() return list(self._gca_resource.supported_deployment_resources_types) @property def supported_input_storage_formats(self) -> List[str]: """The formats this Model supports in the `input_config` field of a `BatchPredictionJob`. If `Model.predict_schemata.instance_schema_uri` exists, the instances should be given as per that schema. [Read the docs for more on batch prediction formats](https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions#batch_request_input) If this Model doesn't support any of these formats it means it cannot be used with a `BatchPredictionJob`. However, if it has `supported_deployment_resources_types`, it could serve online predictions by using `Endpoint.predict()` or `Endpoint.explain()`. """ self._assert_gca_resource_is_available() return list(self._gca_resource.supported_input_storage_formats) @property def supported_output_storage_formats(self) -> List[str]: """The formats this Model supports in the `output_config` field of a `BatchPredictionJob`. If both `Model.predict_schemata.instance_schema_uri` and `Model.predict_schemata.prediction_schema_uri` exist, the predictions are returned together with their instances. In other words, the prediction has the original instance data first, followed by the actual prediction content (as per the schema). [Read the docs for more on batch prediction formats](https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions) If this Model doesn't support any of these formats it means it cannot be used with a `BatchPredictionJob`. However, if it has `supported_deployment_resources_types`, it could serve online predictions by using `Endpoint.predict()` or `Endpoint.explain()`. """ self._assert_gca_resource_is_available() return list(self._gca_resource.supported_output_storage_formats) @property def predict_schemata(self) -> Optional[model_v1.PredictSchemata]: """The schemata that describe formats of the Model's predictions and explanations, if available.""" self._assert_gca_resource_is_available() return getattr(self._gca_resource, "predict_schemata") @property def training_job(self) -> Optional["aiplatform.training_jobs._TrainingJob"]: """The TrainingJob that uploaded this Model, if any. Raises: api_core.exceptions.NotFound: If the Model's training job resource cannot be found on the Vertex service. """ self._assert_gca_resource_is_available() job_name = getattr(self._gca_resource, "training_pipeline") if not job_name: return None try: return aiplatform.training_jobs._TrainingJob._get_and_return_subclass( resource_name=job_name, project=self.project, location=self.location, credentials=self.credentials, ) except api_exceptions.NotFound as exc: raise api_exceptions.NotFound( f"The training job used to create this model could not be found: {job_name}" ) from exc @property def container_spec(self) -> Optional[model_v1.ModelContainerSpec]: """The specification of the container that is to be used when deploying this Model. Not present for AutoML Models.""" self._assert_gca_resource_is_available() return getattr(self._gca_resource, "container_spec") @property def version_id(self) -> str: """The version ID of the model. A new version is committed when a new model version is uploaded or trained under an existing model id. It is an auto-incrementing decimal number in string representation.""" self._assert_gca_resource_is_available() return getattr(self._gca_resource, "version_id") @property def version_aliases(self) -> Sequence[str]: """User provided version aliases so that a model version can be referenced via alias (i.e. projects/{project}/locations/{location}/models/{model_id}@{version_alias} instead of auto-generated version id (i.e. projects/{project}/locations/{location}/models/{model_id}@{version_id}). The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] to distinguish from version_id. A default version alias will be created for the first version of the model, and there must be exactly one default version alias for a model. """ self._assert_gca_resource_is_available() return getattr(self._gca_resource, "version_aliases") @property def version_create_time(self) -> timestamp_pb2.Timestamp: """Timestamp when this version was created.""" self._assert_gca_resource_is_available() return getattr(self._gca_resource, "version_create_time") @property def version_update_time(self) -> timestamp_pb2.Timestamp: """Timestamp when this version was updated.""" self._assert_gca_resource_is_available() return getattr(self._gca_resource, "version_update_time") @property def version_description(self) -> str: """The description of this version.""" self._assert_gca_resource_is_available() return getattr(self._gca_resource, "version_description") @property def resource_name(self) -> str: """Full qualified resource name, without any version ID.""" self._assert_gca_resource_is_available() return ModelRegistry._parse_versioned_name(self._gca_resource.name)[0] @property def name(self) -> str: """Name of this resource.""" self._assert_gca_resource_is_available() return ModelRegistry._parse_versioned_name(super().name)[0] @property def versioned_resource_name(self) -> str: """The fully-qualified resource name, including the version ID. For example, projects/{project}/locations/{location}/models/{model_id}@{version_id} """ self._assert_gca_resource_is_available() return ModelRegistry._get_versioned_name( self.resource_name, self.version_id, ) @property def versioning_registry(self) -> "ModelRegistry": """The registry of model versions associated with this Model instance.""" return self._registry def __init__( self, model_name: str, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, version: Optional[str] = None, ): """Retrieves the model resource and instantiates its representation. Args: model_name (str): Required. A fully-qualified model resource name or model ID. Example: "projects/123/locations/us-central1/models/456" or "456" when project and location are initialized or passed. May optionally contain a version ID or version alias in {model_name}@{version} form. See version arg. project (str): Optional project to retrieve model from. If not set, project set in aiplatform.init will be used. location (str): Optional location to retrieve model from. If not set, location set in aiplatform.init will be used. credentials: Optional[auth_credentials.Credentials]=None, Custom credentials to use to upload this model. If not set, credentials set in aiplatform.init will be used. version (str): Optional. Version ID or version alias. When set, the specified model version will be targeted unless overridden in method calls. When not set, the model with the "default" alias will be targeted unless overridden in method calls. No behavior change if only one version of a model exists. Raises: ValueError: If `version` is passed alongside a model_name referencing a different version. """ # If the version was passed in model_name, parse it model_name, parsed_version = ModelRegistry._parse_versioned_name(model_name) if parsed_version: if version and version != parsed_version: raise ValueError( f"A version of {version} was passed that conflicts with the version of {parsed_version} in the model_name." ) version = parsed_version super().__init__( project=project, location=location, credentials=credentials, resource_name=model_name, ) # Model versions can include @{version} in the resource name. self._resource_id_validator = super()._revisioned_resource_id_validator # Create a versioned model_name, if it exists, for getting the GCA model versioned_model_name = ModelRegistry._get_versioned_name(model_name, version) self._gca_resource = self._get_gca_resource(resource_name=versioned_model_name) # Create ModelRegistry with the unversioned resource name self._registry = ModelRegistry( self.resource_name, location=location, project=project, credentials=credentials, ) def update( self, display_name: Optional[str] = None, description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, ) -> "Model": """Updates a model. Example usage: my_model = my_model.update( display_name="my-model", description="my description", labels={'key': 'value'}, ) Args: display_name (str): The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. description (str): The description of the model. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. Returns: model (aiplatform.Model): Updated model resource. Raises: ValueError: If `labels` is not the correct format. """ self.wait() current_model_proto = self.gca_resource copied_model_proto = current_model_proto.__class__(current_model_proto) update_mask: List[str] = [] # Updates to base model properties cannot occur if a versioned model is passed. # Use the unversioned model resource name. copied_model_proto.name = self.resource_name if display_name: utils.validate_display_name(display_name) copied_model_proto.display_name = display_name update_mask.append("display_name") if description: copied_model_proto.description = description update_mask.append("description") if labels: utils.validate_labels(labels) copied_model_proto.labels = labels update_mask.append("labels") update_mask = field_mask_pb2.FieldMask(paths=update_mask) self.api_client.update_model(model=copied_model_proto, update_mask=update_mask) self._sync_gca_resource() return self # TODO(b/170979926) Add support for metadata and metadata schema @classmethod @base.optional_sync() def upload( cls, serving_container_image_uri: Optional[str] = None, *, artifact_uri: Optional[str] = None, model_id: Optional[str] = None, parent_model: Optional[str] = None, is_default_version: bool = True, version_aliases: Optional[Sequence[str]] = None, version_description: Optional[str] = None, serving_container_predict_route: Optional[str] = None, serving_container_health_route: Optional[str] = None, description: Optional[str] = None, serving_container_command: Optional[Sequence[str]] = None, serving_container_args: Optional[Sequence[str]] = None, serving_container_environment_variables: Optional[Dict[str, str]] = None, serving_container_ports: Optional[Sequence[int]] = None, serving_container_grpc_ports: Optional[Sequence[int]] = None, local_model: Optional["LocalModel"] = None, instance_schema_uri: Optional[str] = None, parameters_schema_uri: Optional[str] = None, prediction_schema_uri: Optional[str] = None, explanation_metadata: Optional[explain.ExplanationMetadata] = None, explanation_parameters: Optional[explain.ExplanationParameters] = None, display_name: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, sync=True, upload_request_timeout: Optional[float] = None, serving_container_deployment_timeout: Optional[int] = None, serving_container_shared_memory_size_mb: Optional[int] = None, serving_container_startup_probe_exec: Optional[Sequence[str]] = None, serving_container_startup_probe_period_seconds: Optional[int] = None, serving_container_startup_probe_timeout_seconds: Optional[int] = None, serving_container_health_probe_exec: Optional[Sequence[str]] = None, serving_container_health_probe_period_seconds: Optional[int] = None, serving_container_health_probe_timeout_seconds: Optional[int] = None, model_garden_source_model_name: Optional[str] = None, ) -> "Model": """Uploads a model and returns a Model representing the uploaded Model resource. Example usage: my_model = Model.upload( display_name="my-model", artifact_uri="gs://my-model/saved-model", serving_container_image_uri="tensorflow/serving" ) Args: serving_container_image_uri (str): Optional. The URI of the Model serving container. This parameter is required if the parameter `local_model` is not specified. artifact_uri (str): Optional. The path to the directory containing the Model artifact and any of its supporting files. Leave blank for custom container prediction. Not present for AutoML Models. model_id (str): Optional. The ID to use for the uploaded Model, which will become the final component of the model resource name. This value may be up to 63 characters, and valid characters are `[a-z0-9_-]`. The first character cannot be a number or hyphen. parent_model (str): Optional. The resource name or model ID of an existing model that the newly-uploaded model will be a version of. Only set this field when uploading a new version of an existing model. is_default_version (bool): Optional. When set to True, the newly uploaded model version will automatically have alias "default" included. Subsequent uses of this model without a version specified will use this "default" version. When set to False, the "default" alias will not be moved. Actions targeting the newly-uploaded model version will need to specifically reference this version by ID or alias. New model uploads, i.e. version 1, will always be "default" aliased. version_aliases (Sequence[str]): Optional. User provided version aliases so that a model version can be referenced via alias instead of auto-generated version ID. A default version alias will be created for the first version of the model. The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] version_description (str): Optional. The description of the model version being uploaded. serving_container_predict_route (str): Optional. An HTTP path to send prediction requests to the container, and which must be supported by it. If not specified a default HTTP path will be used by Vertex AI. serving_container_health_route (str): Optional. An HTTP path to send health check requests to the container, and which must be supported by it. If not specified a standard HTTP path will be used by Vertex AI. description (str): The description of the model. serving_container_command: Optional[Sequence[str]]=None, The command with which the container is run. Not executed within a shell. The Docker image's ENTRYPOINT is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container's environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. serving_container_args: Optional[Sequence[str]]=None, The arguments to the command. The Docker image's CMD is used if this is not provided. Variable references $(VAR_NAME) are expanded using the container's environment. If a variable cannot be resolved, the reference in the input string will be unchanged. The $(VAR_NAME) syntax can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references will never be expanded, regardless of whether the variable exists or not. serving_container_environment_variables: Optional[Dict[str, str]]=None, The environment variables that are to be present in the container. Should be a dictionary where keys are environment variable names and values are environment variable values for those names. serving_container_ports: Optional[Sequence[int]]=None, Declaration of ports that are exposed by the container. This field is primarily informational, it gives Vertex AI information about the network connections the container uses. Listing or not a port here has no impact on whether the port is actually exposed, any port listening on the default "0.0.0.0" address inside a container will be accessible from the network. serving_container_grpc_ports: Optional[Sequence[int]]=None, Declaration of ports that are exposed by the container. Vertex AI sends gRPC prediction requests that it receives to the first port on this list. Vertex AI also sends liveness and health checks to this port. If you do not specify this field, gRPC requests to the container will be disabled. Vertex AI does not use ports other than the first one listed. This field corresponds to the `ports` field of the Kubernetes Containers v1 core API. local_model (Optional[LocalModel]): Optional. A LocalModel instance that includes a `serving_container_spec`. If provided, the `serving_container_spec` of the LocalModel instance will overwrite the values of all other serving container parameters. instance_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the format of a single instance, which are used in ``PredictRequest.instances``, ``ExplainRequest.instances`` and ``BatchPredictionJob.input_config``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. parameters_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the parameters of prediction and explanation via ``PredictRequest.parameters``, ``ExplainRequest.parameters`` and ``BatchPredictionJob.model_parameters``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform, if no parameters are supported it is set to an empty string. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. prediction_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the format of a single prediction produced by this Model, which are returned via ``PredictResponse.predictions``, ``ExplainResponse.explanations``, and ``BatchPredictionJob.output_config``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. explanation_metadata (aiplatform.explain.ExplanationMetadata): Optional. Metadata describing the Model's input and output for explanation. `explanation_metadata` is optional while `explanation_parameters` must be specified when used. For more details, see `Ref docs ` explanation_parameters (aiplatform.explain.ExplanationParameters): Optional. Parameters to configure explaining for Model's predictions. For more details, see `Ref docs ` display_name (str): Optional. The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. project: Optional[str]=None, Project to upload this model to. Overrides project set in aiplatform.init. location: Optional[str]=None, Location to upload this model to. Overrides location set in aiplatform.init. credentials: Optional[auth_credentials.Credentials]=None, Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Model and all sub-resources of this Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. staging_bucket (str): Optional. Bucket to stage local model artifacts. Overrides staging_bucket set in aiplatform.init. upload_request_timeout (float): Optional. The timeout for the upload request in seconds. serving_container_deployment_timeout (int): Optional. Deployment timeout in seconds. serving_container_shared_memory_size_mb (int): Optional. The amount of the VM memory to reserve as the shared memory for the model in megabytes. serving_container_startup_probe_exec (Sequence[str]): Optional. Exec specifies the action to take. Used by startup probe. An example of this argument would be ["cat", "/tmp/healthy"] serving_container_startup_probe_period_seconds (int): Optional. How often (in seconds) to perform the startup probe. Default to 10 seconds. Minimum value is 1. serving_container_startup_probe_timeout_seconds (int): Optional. Number of seconds after which the startup probe times out. Defaults to 1 second. Minimum value is 1. serving_container_health_probe_exec (Sequence[str]): Optional. Exec specifies the action to take. Used by health probe. An example of this argument would be ["cat", "/tmp/healthy"] serving_container_health_probe_period_seconds (int): Optional. How often (in seconds) to perform the health probe. Default to 10 seconds. Minimum value is 1. serving_container_health_probe_timeout_seconds (int): Optional. Number of seconds after which the health probe times out. Defaults to 1 second. Minimum value is 1. model_garden_source_model_name: Optional. The model garden source model resource name if the model is from Vertex Model Garden. Returns: model (aiplatform.Model): Instantiated representation of the uploaded model resource. Raises: ValueError: If explanation_metadata is specified while explanation_parameters is not. Also if model directory does not contain a supported model file. If `local_model` is specified but `serving_container_spec.image_uri` in the `local_model` is None. If `local_model` is not specified and `serving_container_image_uri` is None. """ if not display_name: display_name = cls._generate_display_name() utils.validate_display_name(display_name) if labels: utils.validate_labels(labels) appended_user_agent = None if local_model: container_spec = local_model.get_serving_container_spec() appended_user_agent = [prediction_constants.CUSTOM_PREDICTION_ROUTINES] elif not serving_container_image_uri and not artifact_uri: # It's a referenced/place holder model. container_spec = None else: if not serving_container_image_uri: raise ValueError( "The parameter `serving_container_image_uri` is required " "if no `local_model` is provided." ) env = None ports = None grpc_ports = None deployment_timeout = ( duration_pb2.Duration(seconds=serving_container_deployment_timeout) if serving_container_deployment_timeout else None ) startup_probe = None health_probe = None if serving_container_environment_variables: env = [ gca_env_var_compat.EnvVar(name=str(key), value=str(value)) for key, value in serving_container_environment_variables.items() ] if serving_container_ports: ports = [ gca_model_compat.Port(container_port=port) for port in serving_container_ports ] if serving_container_grpc_ports: grpc_ports = [ gca_model_compat.Port(container_port=port) for port in serving_container_grpc_ports ] if ( serving_container_startup_probe_exec or serving_container_startup_probe_period_seconds or serving_container_startup_probe_timeout_seconds ): startup_probe_exec = None if serving_container_startup_probe_exec: startup_probe_exec = gca_model_compat.Probe.ExecAction( command=serving_container_startup_probe_exec ) startup_probe = gca_model_compat.Probe( exec=startup_probe_exec, period_seconds=serving_container_startup_probe_period_seconds, timeout_seconds=serving_container_startup_probe_timeout_seconds, ) if ( serving_container_health_probe_exec or serving_container_health_probe_period_seconds or serving_container_health_probe_timeout_seconds ): health_probe_exec = None if serving_container_health_probe_exec: health_probe_exec = gca_model_compat.Probe.ExecAction( command=serving_container_health_probe_exec ) health_probe = gca_model_compat.Probe( exec=health_probe_exec, period_seconds=serving_container_health_probe_period_seconds, timeout_seconds=serving_container_health_probe_timeout_seconds, ) container_spec = gca_model_compat.ModelContainerSpec( image_uri=serving_container_image_uri, command=serving_container_command, args=serving_container_args, env=env, ports=ports, grpc_ports=grpc_ports, predict_route=serving_container_predict_route, health_route=serving_container_health_route, deployment_timeout=deployment_timeout, shared_memory_size_mb=serving_container_shared_memory_size_mb, startup_probe=startup_probe, health_probe=health_probe, ) model_predict_schemata = None if any([instance_schema_uri, parameters_schema_uri, prediction_schema_uri]): model_predict_schemata = gca_model_compat.PredictSchemata( instance_schema_uri=instance_schema_uri, parameters_schema_uri=parameters_schema_uri, prediction_schema_uri=prediction_schema_uri, ) # TODO(b/182388545) initializer.global_config.get_encryption_spec from a sync function encryption_spec = initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name, ) parent_model = ModelRegistry._get_true_version_parent( location=location, project=project, parent_model=parent_model ) version_aliases = ModelRegistry._get_true_alias_list( version_aliases=version_aliases, is_default_version=is_default_version ) base_model_source = None if model_garden_source_model_name: base_model_source = gca_model_compat.Model.BaseModelSource( model_garden_source=gca_model_compat.ModelGardenSource( public_model_name=model_garden_source_model_name ) ) managed_model = gca_model_compat.Model( display_name=display_name, description=description, version_aliases=version_aliases, version_description=version_description, container_spec=container_spec, predict_schemata=model_predict_schemata, labels=labels, encryption_spec=encryption_spec, base_model_source=base_model_source, ) if artifact_uri and not artifact_uri.startswith("gs://"): model_dir = pathlib.Path(artifact_uri) # Validating the model directory if not model_dir.exists(): raise ValueError(f"artifact_uri path does not exist: '{artifact_uri}'") PREBUILT_IMAGE_RE = "(us|europe|asia)-docker.pkg.dev/vertex-ai/prediction/" if serving_container_image_uri and re.match( PREBUILT_IMAGE_RE, serving_container_image_uri ): if not model_dir.is_dir(): raise ValueError( f"artifact_uri path must be a directory: '{artifact_uri}' when using prebuilt image '{serving_container_image_uri}'" ) if not any( (model_dir / file_name).exists() for file_name in _SUPPORTED_MODEL_FILE_NAMES ): raise ValueError( "artifact_uri directory does not contain any supported model files. " f"When using a prebuilt serving image, the upload method only supports the following model files: '{_SUPPORTED_MODEL_FILE_NAMES}'" ) # Uploading the model staged_data_uri = gcs_utils.stage_local_data_in_gcs( data_path=str(model_dir), staging_gcs_dir=staging_bucket, project=project, location=location, credentials=credentials, ) artifact_uri = staged_data_uri if artifact_uri: managed_model.artifact_uri = artifact_uri managed_model.explanation_spec = ( _explanation_utils.create_and_validate_explanation_spec( explanation_metadata=explanation_metadata, explanation_parameters=explanation_parameters, ) ) request = gca_model_service_compat.UploadModelRequest( parent=initializer.global_config.common_location_path(project, location), model=managed_model, parent_model=parent_model, model_id=model_id, ) api_client = cls._instantiate_client( location, credentials, appended_user_agent=appended_user_agent ) lro = api_client.upload_model( request=request, timeout=upload_request_timeout, ) _LOGGER.log_create_with_lro(cls, lro) model_upload_response = lro.result() this_model = cls( model_upload_response.model, version=model_upload_response.model_version_id ) _LOGGER.log_create_complete(cls, this_model._gca_resource, "model") return this_model def deploy( self, endpoint: Optional[Union["Endpoint", "PrivateEndpoint"]] = None, deployed_model_display_name: Optional[str] = None, traffic_percentage: Optional[int] = 0, traffic_split: Optional[Dict[str, int]] = None, machine_type: Optional[str] = None, min_replica_count: int = 1, max_replica_count: int = 1, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, tpu_topology: Optional[str] = None, service_account: Optional[str] = None, explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None, explanation_parameters: Optional[ aiplatform.explain.ExplanationParameters ] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), encryption_spec_key_name: Optional[str] = None, network: Optional[str] = None, sync=True, deploy_request_timeout: Optional[float] = None, autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, enable_access_logging=False, disable_container_logging: bool = False, private_service_connect_config: Optional[ PrivateEndpoint.PrivateServiceConnectConfig ] = None, deployment_resource_pool: Optional[DeploymentResourcePool] = None, reservation_affinity_type: Optional[str] = None, reservation_affinity_key: Optional[str] = None, reservation_affinity_values: Optional[List[str]] = None, spot: bool = False, fast_tryout_enabled: bool = False, system_labels: Optional[Dict[str, str]] = None, required_replica_count: Optional[int] = 0, ) -> Union[Endpoint, PrivateEndpoint]: """Deploys model to endpoint. Endpoint will be created if unspecified. Args: endpoint (Union[Endpoint, PrivateEndpoint]): Optional. Public or private Endpoint to deploy model to. If not specified, endpoint display name will be model display name+'_endpoint'. deployed_model_display_name (str): Optional. The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used. traffic_percentage (int): Optional. Desired traffic to newly deployed model. Defaults to 0 if there are pre-existing deployed models. Defaults to 100 if there are no pre-existing deployed models. Negative values should not be provided. Traffic of previously deployed models at the endpoint will be scaled down to accommodate new deployed model's traffic. Should not be provided if traffic_split is provided. traffic_split (Dict[str, int]): Optional. A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at the moment. Key for model being deployed is "0". Should not be provided if traffic_percentage is provided. machine_type (str): Optional. The type of machine. Not specifying machine type will result in model to be deployed with automatic resources. min_replica_count (int): Optional. The minimum number of machine replicas this deployed model will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed. max_replica_count (int): Optional. The maximum number of replicas this deployed model may be deployed on when the traffic against it increases. If requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the deployed model increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, the smaller value of min_replica_count or 1 will be used. accelerator_type (str): Optional. Hardware accelerator type. Must also set accelerator_count if used. One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4 accelerator_count (int): Optional. The number of accelerators to attach to a worker replica. tpu_topology (str): Optional. The TPU topology to use for the DeployedModel. Requireid for CloudTPU multihost deployments. service_account (str): The service account that the DeployedModel's container runs as. Specify the email address of the service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the `iam.serviceAccounts.actAs` permission on this service account. explanation_metadata (aiplatform.explain.ExplanationMetadata): Optional. Metadata describing the Model's input and output for explanation. `explanation_metadata` is optional while `explanation_parameters` must be specified when used. For more details, see `Ref docs ` explanation_parameters (aiplatform.explain.ExplanationParameters): Optional. Parameters to configure explaining for Model's predictions. For more details, see `Ref docs ` metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. network (str): Optional. The full name of the Compute Engine network to which the Endpoint, if created, will be peered to. E.g. "projects/12345/global/networks/myVPC" Private services access must already be configured for the network. If set or aiplatform.init(network=...) has been set, a PrivateEndpoint will be created. If left unspecified, an Endpoint will be created. Read more about PrivateEndpoints [in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints). Cannot be set together with private_service_connect_config. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. deploy_request_timeout (float): Optional. The timeout for the deploy request in seconds. autoscaling_target_cpu_utilization (int): Optional. Target CPU Utilization to use for Autoscaling Replicas. A default value of 60 will be used if not specified. autoscaling_target_accelerator_duty_cycle (int): Optional. Target Accelerator Duty Cycle. Must also set accelerator_type and accelerator_count if specified. A default value of 60 will be used if not specified. enable_access_logging (bool): Whether to enable endpoint access logging. Defaults to False. disable_container_logging (bool): If True, container logs from the deployed model will not be written to Cloud Logging. Defaults to False. private_service_connect_config (PrivateEndpoint.PrivateServiceConnectConfig): If true, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect). Cannot be set together with network. deployment_resource_pool (DeploymentResourcePool): Resource pool where the model will be deployed. All models that are deployed to the same DeploymentResourcePool will be hosted in a shared model server. If provided, will override replica count arguments. reservation_affinity_type (str): Optional. The type of reservation affinity. One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION reservation_affinity_key (str): Optional. Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key and specify the name of your reservation as its value. reservation_affinity_values (List[str]): Optional. Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation. Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}' spot (bool): Optional. Whether to schedule the deployment workload on spot VMs. fast_tryout_enabled (bool): Optional. Defaults to False. If True, model will be deployed using faster deployment path. Useful for quick experiments. Not for production workloads. Only available for most popular models with certain machine types. system_labels (Dict[str, str]): Optional. System labels to apply to Model Garden deployments. System labels are managed by Google for internal use only. required_replica_count (int): Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial model deployment/mutation is desired, with a value greater than or equal to 1 and fewer than or equal to min_replica_count. If set, the model deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. Returns: endpoint (Union[Endpoint, PrivateEndpoint]): Endpoint with the deployed model. Raises: ValueError: If `traffic_split` is set for PrivateEndpoint. """ network = network or initializer.global_config.network Endpoint._validate_deploy_args( min_replica_count=min_replica_count, max_replica_count=max_replica_count, accelerator_type=accelerator_type, deployed_model_display_name=deployed_model_display_name, traffic_split=traffic_split, traffic_percentage=traffic_percentage, deployment_resource_pool=deployment_resource_pool, required_replica_count=required_replica_count, ) if isinstance(endpoint, PrivateEndpoint): if deployment_resource_pool: raise ValueError( "Model co-hosting is not supported for PrivateEndpoint. " "Try calling deploy() without providing `deployment_resource_pool`." ) if traffic_split and endpoint.network: raise ValueError( "Traffic splitting is not yet supported for PSA based PrivateEndpoint. " "Try calling deploy() without providing `traffic_split`. " "A maximum of one model can be deployed to each private Endpoint." ) explanation_spec = _explanation_utils.create_and_validate_explanation_spec( explanation_metadata=explanation_metadata, explanation_parameters=explanation_parameters, ) return self._deploy( endpoint=endpoint, deployed_model_display_name=deployed_model_display_name, traffic_percentage=traffic_percentage, traffic_split=traffic_split, machine_type=machine_type, min_replica_count=min_replica_count, max_replica_count=max_replica_count, accelerator_type=accelerator_type, accelerator_count=accelerator_count, tpu_topology=tpu_topology, reservation_affinity_type=reservation_affinity_type, reservation_affinity_key=reservation_affinity_key, reservation_affinity_values=reservation_affinity_values, service_account=service_account, explanation_spec=explanation_spec, metadata=metadata, encryption_spec_key_name=encryption_spec_key_name or initializer.global_config.encryption_spec_key_name, network=network, sync=sync, deploy_request_timeout=deploy_request_timeout, autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, spot=spot, enable_access_logging=enable_access_logging, disable_container_logging=disable_container_logging, private_service_connect_config=private_service_connect_config, deployment_resource_pool=deployment_resource_pool, fast_tryout_enabled=fast_tryout_enabled, system_labels=system_labels, required_replica_count=required_replica_count, ) def _should_enable_dedicated_endpoint(self, fast_tryout_enabled: bool) -> bool: """Check if dedicated endpoint should be enabled for this endpoint. Returns True if endpoint should be a dedicated endpoint. """ return fast_tryout_enabled @base.optional_sync(return_input_arg="endpoint", bind_future_to_self=False) def _deploy( self, endpoint: Optional[Union["Endpoint", "PrivateEndpoint"]] = None, deployed_model_display_name: Optional[str] = None, traffic_percentage: Optional[int] = 0, traffic_split: Optional[Dict[str, int]] = None, machine_type: Optional[str] = None, min_replica_count: int = 1, max_replica_count: int = 1, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, tpu_topology: Optional[str] = None, reservation_affinity_type: Optional[str] = None, reservation_affinity_key: Optional[str] = None, reservation_affinity_values: Optional[List[str]] = None, service_account: Optional[str] = None, explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), encryption_spec_key_name: Optional[str] = None, network: Optional[str] = None, sync: bool = True, deploy_request_timeout: Optional[float] = None, autoscaling_target_cpu_utilization: Optional[int] = None, autoscaling_target_accelerator_duty_cycle: Optional[int] = None, spot: bool = False, enable_access_logging=False, disable_container_logging: bool = False, private_service_connect_config: Optional[ PrivateEndpoint.PrivateServiceConnectConfig ] = None, deployment_resource_pool: Optional[DeploymentResourcePool] = None, fast_tryout_enabled: bool = False, system_labels: Optional[Dict[str, str]] = None, required_replica_count: Optional[int] = 0, ) -> Union[Endpoint, PrivateEndpoint]: """Deploys model to endpoint. Endpoint will be created if unspecified. Args: endpoint (Union[Endpoint, PrivateEndpoint]): Optional. Public or private Endpoint to deploy model to. If not specified, endpoint display name will be model display name+'_endpoint'. deployed_model_display_name (str): Optional. The display name of the DeployedModel. If not provided upon creation, the Model's display_name is used. traffic_percentage (int): Optional. Desired traffic to newly deployed model. Defaults to 0 if there are pre-existing deployed models. Defaults to 100 if there are no pre-existing deployed models. Negative values should not be provided. Traffic of previously deployed models at the endpoint will be scaled down to accommodate new deployed model's traffic. Should not be provided if traffic_split is provided. traffic_split (Dict[str, int]): Optional. A map from a DeployedModel's ID to the percentage of this Endpoint's traffic that should be forwarded to that DeployedModel. If a DeployedModel's ID is not listed in this map, then it receives no traffic. The traffic percentage values must add up to 100, or map must be empty if the Endpoint is to not accept any traffic at the moment. Key for model being deployed is "0". Should not be provided if traffic_percentage is provided. machine_type (str): Optional. The type of machine. Not specifying machine type will result in model to be deployed with automatic resources. min_replica_count (int): Optional. The minimum number of machine replicas this deployed model will be always deployed on. If traffic against it increases, it may dynamically be deployed onto more replicas, and as traffic decreases, some of these extra replicas may be freed. max_replica_count (int): Optional. The maximum number of replicas this deployed model may be deployed on when the traffic against it increases. If requested value is too large, the deployment will error, but if deployment succeeds then the ability to scale the model to that many replicas is guaranteed (barring service outages). If traffic against the deployed model increases beyond what its replicas at maximum may handle, a portion of the traffic will be dropped. If this value is not provided, the smaller value of min_replica_count or 1 will be used. accelerator_type (str): Optional. Hardware accelerator type. Must also set accelerator_count if used. One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4 accelerator_count (int): Optional. The number of accelerators to attach to a worker replica. tpu_topology (str): Optional. The TPU topology to use for the DeployedModel. Requireid for CloudTPU multihost deployments. reservation_affinity_type (str): Optional. The type of reservation affinity. One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION, SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION reservation_affinity_key (str): Optional. Corresponds to the label key of a reservation resource. To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key and specify the name of your reservation as its value. reservation_affinity_values (List[str]): Optional. Corresponds to the label values of a reservation resource. This must be the full resource name of the reservation. Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}' service_account (str): The service account that the DeployedModel's container runs as. Specify the email address of the service account. If this service account is not specified, the container runs as a service account that doesn't have access to the resource project. Users deploying the Model must have the `iam.serviceAccounts.actAs` permission on this service account. explanation_spec (aiplatform.explain.ExplanationSpec): Optional. Specification of Model explanation. metadata (Sequence[Tuple[str, str]]): Optional. Strings which should be sent along with the request as metadata. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Model and all sub-resources of this Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init network (str): Optional. The full name of the Compute Engine network to which the Endpoint, if created, will be peered to. E.g. "projects/12345/global/networks/myVPC". Private services access must already be configured for the network. Read more about PrivateEndpoints [in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints). Cannot be set together with private_service_connect_config. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. deploy_request_timeout (float): Optional. The timeout for the deploy request in seconds. autoscaling_target_cpu_utilization (int): Optional. Target CPU Utilization to use for Autoscaling Replicas. A default value of 60 will be used if not specified. autoscaling_target_accelerator_duty_cycle (int): Optional. Target Accelerator Duty Cycle. Must also set accelerator_type and accelerator_count if specified. A default value of 60 will be used if not specified. spot (bool): Optional. Whether to schedule the deployment workload on spot VMs. enable_access_logging (bool): Whether to enable endpoint access logging. Defaults to False. disable_container_logging (bool): If True, container logs from the deployed model will not be written to Cloud Logging. Defaults to False. private_service_connect_config (PrivateEndpoint.PrivateServiceConnectConfig): If true, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect). Cannot be set together with network. deployment_resource_pool (DeploymentResourcePool): Optional. Resource pool where the model will be deployed. All models that are deployed to the same DeploymentResourcePool will be hosted in a shared model server. If provided, will override replica count arguments. fast_tryout_enabled (bool): Optional. Defaults to False. If True, model will be deployed using faster deployment path. Useful for quick experiments. Not for production workloads. Only available for most popular models with certain machine types. system_labels (Dict[str, str]): Optional. System labels to apply to Model Garden deployments. System labels are managed by Google for internal use only. required_replica_count (int): Optional. Number of required available replicas for the deployment to succeed. This field is only needed when partial model deployment/mutation is desired, with a value greater than or equal to 1 and fewer than or equal to min_replica_count. If set, the model deploy/mutate operation will succeed once available_replica_count reaches required_replica_count, and the rest of the replicas will be retried. Returns: endpoint (Union[Endpoint, PrivateEndpoint]): Endpoint with the deployed model. """ if endpoint is None: display_name = self.display_name[:118] + "_endpoint" if not network and not private_service_connect_config: endpoint = Endpoint.create( display_name=display_name, project=self.project, location=self.location, credentials=self.credentials, encryption_spec_key_name=encryption_spec_key_name, dedicated_endpoint_enabled=self._should_enable_dedicated_endpoint( fast_tryout_enabled ), ) else: endpoint = PrivateEndpoint.create( display_name=display_name, network=network, project=self.project, location=self.location, credentials=self.credentials, encryption_spec_key_name=encryption_spec_key_name, private_service_connect_config=private_service_connect_config, ) _LOGGER.log_action_start_against_resource("Deploying model to", "", endpoint) endpoint._deploy_call( endpoint.api_client, endpoint.resource_name, self, endpoint._gca_resource.traffic_split, network=network or endpoint.network, deployed_model_display_name=deployed_model_display_name, traffic_percentage=traffic_percentage, traffic_split=traffic_split, machine_type=machine_type, min_replica_count=min_replica_count, max_replica_count=max_replica_count, accelerator_type=accelerator_type, accelerator_count=accelerator_count, tpu_topology=tpu_topology, reservation_affinity_type=reservation_affinity_type, reservation_affinity_key=reservation_affinity_key, reservation_affinity_values=reservation_affinity_values, service_account=service_account, explanation_spec=explanation_spec, metadata=metadata, deploy_request_timeout=deploy_request_timeout, autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization, autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle, spot=spot, enable_access_logging=enable_access_logging, disable_container_logging=disable_container_logging, deployment_resource_pool=deployment_resource_pool, fast_tryout_enabled=fast_tryout_enabled, system_labels=system_labels, required_replica_count=required_replica_count, ) _LOGGER.log_action_completed_against_resource("model", "deployed", endpoint) endpoint._sync_gca_resource() return endpoint def batch_predict( self, job_display_name: Optional[str] = None, gcs_source: Optional[Union[str, Sequence[str]]] = None, bigquery_source: Optional[str] = None, instances_format: str = "jsonl", gcs_destination_prefix: Optional[str] = None, bigquery_destination_prefix: Optional[str] = None, predictions_format: str = "jsonl", model_parameters: Optional[Dict] = None, machine_type: Optional[str] = None, accelerator_type: Optional[str] = None, accelerator_count: Optional[int] = None, starting_replica_count: Optional[int] = None, max_replica_count: Optional[int] = None, generate_explanation: Optional[bool] = False, explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None, explanation_parameters: Optional[ aiplatform.explain.ExplanationParameters ] = None, labels: Optional[Dict[str, str]] = None, credentials: Optional[auth_credentials.Credentials] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, create_request_timeout: Optional[float] = None, batch_size: Optional[int] = None, service_account: Optional[str] = None, ) -> jobs.BatchPredictionJob: """Creates a batch prediction job using this Model and outputs prediction results to the provided destination prefix in the specified `predictions_format`. One source and one destination prefix are required. Example usage: my_model.batch_predict( job_display_name="prediction-123", gcs_source="gs://example-bucket/instances.csv", instances_format="csv", bigquery_destination_prefix="projectId.bqDatasetId.bqTableId" ) Args: job_display_name (str): Optional. The user-defined name of the BatchPredictionJob. The name can be up to 128 characters long and can be consist of any UTF-8 characters. gcs_source: Optional[Sequence[str]] = None Google Cloud Storage URI(-s) to your instances to run batch prediction on. They must match `instances_format`. bigquery_source: Optional[str] = None BigQuery URI to a table, up to 2000 characters long. For example: `bq://projectId.bqDatasetId.bqTableId` instances_format: str = "jsonl" The format in which instances are provided. Must be one of the formats listed in `Model.supported_input_storage_formats`. Default is "jsonl" when using `gcs_source`. If a `bigquery_source` is provided, this is overridden to "bigquery". gcs_destination_prefix: Optional[str] = None The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new directory is created. Its name is ``prediction--``, where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files ``predictions_0001.``, ``predictions_0002.``, ..., ``predictions_N.`` are created where ```` depends on chosen ``predictions_format``, and N may equal 0001 and depends on the total number of successfully predicted instances. If the Model has both ``instance`` and ``prediction`` schemata defined then each such file contains predictions as per the ``predictions_format``. If prediction for any instance failed (partially or completely), then an additional ``errors_0001.``, ``errors_0002.``,..., ``errors_N.`` files are created (N depends on total number of failed predictions). These files contain the failed instances, as per their schema, followed by an additional ``error`` field which as value has ```google.rpc.Status`` `__ containing only ``code`` and ``message`` fields. bigquery_destination_prefix: Optional[str] = None The BigQuery URI to a project or table, up to 2000 characters long. When only the project is specified, the Dataset and Table is created. When the full table reference is specified, the Dataset must exist and table must not exist. Accepted forms: ``bq://projectId`` or ``bq://projectId.bqDatasetId``. If no Dataset is specified, a new one is created with the name ``prediction__`` where the table name is made BigQuery-dataset-name compatible (for example, most special characters become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset two tables will be created, ``predictions``, and ``errors``. If the Model has both ``instance`` and ``prediction`` schemata defined then the tables have columns as follows: The ``predictions`` table contains instances for which the prediction succeeded, it has columns as per a concatenation of the Model's instance and prediction schemata. The ``errors`` table contains rows for which the prediction has failed, it has instance columns, as per the instance schema, followed by a single "errors" column, which as values has ```google.rpc.Status`` `__ represented as a STRUCT, and containing only ``code`` and ``message``. predictions_format: str = "jsonl" Required. The format in which Vertex AI outputs the predictions, must be one of the formats specified in `Model.supported_output_storage_formats`. Default is "jsonl" when using `gcs_destination_prefix`. If a `bigquery_destination_prefix` is provided, this is overridden to "bigquery". model_parameters: Optional[Dict] = None Optional. The parameters that govern the predictions. The schema of the parameters may be specified via the Model's `parameters_schema_uri`. machine_type: Optional[str] = None Optional. The type of machine for running batch prediction on dedicated resources. Not specifying machine type will result in batch prediction job being run with automatic resources. accelerator_type: Optional[str] = None Optional. The type of accelerator(s) that may be attached to the machine as per `accelerator_count`. Only used if `machine_type` is set. accelerator_count: Optional[int] = None Optional. The number of accelerators to attach to the `machine_type`. Only used if `machine_type` is set. starting_replica_count: Optional[int] = None The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set. max_replica_count: Optional[int] = None The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. Default is 10. generate_explanation (bool): Optional. Generate explanation along with the batch prediction results. This will cause the batch prediction output to include explanations based on the `prediction_format`: - `bigquery`: output includes a column named `explanation`. The value is a struct that conforms to the [aiplatform.gapic.Explanation] object. - `jsonl`: The JSON objects on each line include an additional entry keyed `explanation`. The value of the entry is a JSON object that conforms to the [aiplatform.gapic.Explanation] object. - `csv`: Generating explanations for CSV format is not supported. explanation_metadata (aiplatform.explain.ExplanationMetadata): Optional. Explanation metadata configuration for this BatchPredictionJob. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_metadata`. All fields of `explanation_metadata` are optional in the request. If a field of the `explanation_metadata` object is not populated, the corresponding field of the `Model.explanation_metadata` object is inherited. For more details, see `Ref docs ` explanation_parameters (aiplatform.explain.ExplanationParameters): Optional. Parameters to configure explaining for Model's predictions. Can be specified only if `generate_explanation` is set to `True`. This value overrides the value of `Model.explanation_parameters`. All fields of `explanation_parameters` are optional in the request. If a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see `Ref docs ` labels: Optional[Dict[str, str]] = None Optional. The labels with user-defined metadata to organize your BatchPredictionJobs. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. credentials: Optional[auth_credentials.Credentials] = None Optional. Custom credentials to use to create this batch prediction job. Overrides credentials set in aiplatform.init. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Model and all sub-resources of this Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. create_request_timeout (float): Optional. The timeout for the create request in seconds. batch_size (int): Optional. The number of the records (e.g. instances) of the operation given in each batch to a machine replica. Machine type, and size of a single record should be considered when setting this parameter, higher value speeds up the batch operation's execution, but too high value will result in a whole batch not fitting in a machine's memory, and the whole operation will fail. The default value is 64. service_account (str): Optional. Specifies the service account for workload run-as account. Users submitting jobs must have act-as permission on this run-as account. Returns: job (jobs.BatchPredictionJob): Instantiated representation of the created batch prediction job. """ return jobs.BatchPredictionJob.create( job_display_name=job_display_name, model_name=self, instances_format=instances_format, predictions_format=predictions_format, gcs_source=gcs_source, bigquery_source=bigquery_source, gcs_destination_prefix=gcs_destination_prefix, bigquery_destination_prefix=bigquery_destination_prefix, model_parameters=model_parameters, machine_type=machine_type, accelerator_type=accelerator_type, accelerator_count=accelerator_count, starting_replica_count=starting_replica_count, max_replica_count=max_replica_count, batch_size=batch_size, generate_explanation=generate_explanation, explanation_metadata=explanation_metadata, explanation_parameters=explanation_parameters, labels=labels, project=self.project, location=self.location, credentials=credentials or self.credentials, encryption_spec_key_name=encryption_spec_key_name, sync=sync, create_request_timeout=create_request_timeout, service_account=service_account, ) @classmethod def list( cls, filter: Optional[str] = None, order_by: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ) -> List["models.Model"]: """List all Model resource instances. Example Usage: aiplatform.Model.list( filter='labels.my_label="my_label_value" AND display_name="my_model"', ) Args: filter (str): Optional. An expression for filtering the results of the request. For field names both snake_case and camelCase are supported. order_by (str): Optional. A comma-separated list of fields to order by, sorted in ascending order. Use "desc" after a field name for descending. Supported fields: `display_name`, `create_time`, `update_time` project (str): Optional. Project to retrieve list from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to retrieve list from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to retrieve list. Overrides credentials set in aiplatform.init. Returns: List[models.Model]: A list of Model resource objects """ return cls._list( filter=filter, order_by=order_by, project=project, location=location, credentials=credentials, ) @classmethod def _construct_sdk_resource_from_gapic( cls, gapic_resource: gca_model_compat.Model, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ) -> "Model": """Override base._construct_sdk_resource_from_gapic to allow for setting a ModelRegistry and resource_id_validator. Args: gapic_resource (gca_model_compat.Model): A GAPIC representation of a Model resource. project (str): Optional. Project to construct SDK object from. If not set, project set in aiplatform.init will be used. location (str): Optional. Location to construct SDK object from. If not set, location set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use to construct SDK object. Overrides credentials set in aiplatform.init. Returns: Model: An initialized SDK Model object that represents the Model GAPIC type. """ sdk_resource = super()._construct_sdk_resource_from_gapic( gapic_resource=gapic_resource, project=project, location=location, credentials=credentials, ) sdk_resource._resource_id_validator = super()._revisioned_resource_id_validator sdk_resource._registry = ModelRegistry( sdk_resource.resource_name, location=location, project=project, credentials=credentials, ) return sdk_resource @base.optional_sync() def _wait_on_export(self, operation_future: operation.Operation, sync=True) -> None: operation_future.result() def export_model( self, export_format_id: str, artifact_destination: Optional[str] = None, image_destination: Optional[str] = None, sync: bool = True, ) -> Dict[str, str]: """Exports a trained, exportable Model to a location specified by the user. A Model is considered to be exportable if it has at least one `supported_export_formats`. Either `artifact_destination` or `image_destination` must be provided. Example Usage: my_model.export( export_format_id="tf-saved-model", artifact_destination="gs://my-bucket/models/" ) or my_model.export( export_format_id="custom-model", image_destination="us-central1-docker.pkg.dev/projectId/repo/image" ) Args: export_format_id (str): Required. The ID of the format in which the Model must be exported. The list of export formats that this Model supports can be found by calling `Model.supported_export_formats`. artifact_destination (str): The Cloud Storage location where the Model artifact is to be written to. Under the directory given as the destination a new one with name "``model-export--``", where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format, will be created. Inside, the Model and any of its supporting files will be written. This field should only be set when, in [Model.supported_export_formats], the value for the key given in `export_format_id` contains ``ARTIFACT``. image_destination (str): The Google Container Registry or Artifact Registry URI where the Model container image will be copied to. Accepted forms: - Google Container Registry path. For example: ``gcr.io/projectId/imageName:tag``. - Artifact Registry path. For example: ``us-central1-docker.pkg.dev/projectId/repoName/imageName:tag``. This field should only be set when, in [Model.supported_export_formats], the value for the key given in `export_format_id` contains ``IMAGE``. sync (bool): Whether to execute this export synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. Returns: output_info (Dict[str, str]): Details of the completed export with output destination paths to the artifacts or container image. Raises: ValueError: If model does not support exporting. ValueError: If invalid arguments or export formats are provided. """ self.wait() # Model does not support exporting if not self.supported_export_formats: raise ValueError(f"The model `{self.resource_name}` is not exportable.") # No destination provided if not any((artifact_destination, image_destination)): raise ValueError( "Please provide an `artifact_destination` or `image_destination`." ) export_format_id = export_format_id.lower() # Unsupported export type if export_format_id not in self.supported_export_formats: raise ValueError( f"'{export_format_id}' is not a supported export format for this model. " f"Choose one of the following: {self.supported_export_formats}" ) content_types = gca_model_compat.Model.ExportFormat.ExportableContent supported_content_types = self.supported_export_formats[export_format_id] if ( artifact_destination and content_types.ARTIFACT not in supported_content_types ): raise ValueError( "This model can not be exported as an artifact in '{export_format_id}' format. " "Try exporting as a container image by passing the `image_destination` argument." ) if image_destination and content_types.IMAGE not in supported_content_types: raise ValueError( "This model can not be exported as a container image in '{export_format_id}' format. " "Try exporting the model artifacts by passing a `artifact_destination` argument." ) # Construct request payload output_config = gca_model_service_compat.ExportModelRequest.OutputConfig( export_format_id=export_format_id ) if artifact_destination: output_config.artifact_destination = gca_io_compat.GcsDestination( output_uri_prefix=artifact_destination ) if image_destination: output_config.image_destination = ( gca_io_compat.ContainerRegistryDestination(output_uri=image_destination) ) _LOGGER.log_action_start_against_resource("Exporting", "model", self) model_name = self.versioned_resource_name operation_future = self.api_client.export_model( name=model_name, output_config=output_config ) _LOGGER.log_action_started_against_resource_with_lro( "Export", "model", self.__class__, operation_future ) # Block before returning self._wait_on_export(operation_future=operation_future, sync=sync) _LOGGER.log_action_completed_against_resource("model", "exported", self) return json_format.MessageToDict(operation_future.metadata.output_info._pb) @classmethod @base.optional_sync() def upload_xgboost_model_file( cls, model_file_path: str, xgboost_version: Optional[str] = None, display_name: Optional[str] = None, description: Optional[str] = None, model_id: Optional[str] = None, parent_model: Optional[str] = None, is_default_version: Optional[bool] = True, version_aliases: Optional[Sequence[str]] = None, version_description: Optional[str] = None, instance_schema_uri: Optional[str] = None, parameters_schema_uri: Optional[str] = None, prediction_schema_uri: Optional[str] = None, explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None, explanation_parameters: Optional[ aiplatform.explain.ExplanationParameters ] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, sync=True, upload_request_timeout: Optional[float] = None, ) -> "Model": """Uploads a model and returns a Model representing the uploaded Model resource. Example usage: my_model = Model.upload_xgboost_model_file( model_file_path="iris.xgboost_model.bst" ) Args: model_file_path (str): Required. Local file path of the model. xgboost_version (str): Optional. The version of the XGBoost serving container. Supported versions: ["0.82", "0.90", "1.1", "1.2", "1.3", "1.4"]. If the version is not specified, the latest version is used. display_name (str): Optional. The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. description (str): The description of the model. model_id (str): Optional. The ID to use for the uploaded Model, which will become the final component of the model resource name. This value may be up to 63 characters, and valid characters are `[a-z0-9_-]`. The first character cannot be a number or hyphen. parent_model (str): Optional. The resource name or model ID of an existing model that the newly-uploaded model will be a version of. Only set this field when uploading a new version of an existing model. is_default_version (bool): Optional. When set to True, the newly uploaded model version will automatically have alias "default" included. Subsequent uses of this model without a version specified will use this "default" version. When set to False, the "default" alias will not be moved. Actions targeting the newly-uploaded model version will need to specifically reference this version by ID or alias. New model uploads, i.e. version 1, will always be "default" aliased. version_aliases (Sequence[str]): Optional. User provided version aliases so that a model version can be referenced via alias instead of auto-generated version ID. A default version alias will be created for the first version of the model. The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] version_description (str): Optional. The description of the model version being uploaded. instance_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the format of a single instance, which are used in ``PredictRequest.instances``, ``ExplainRequest.instances`` and ``BatchPredictionJob.input_config``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. parameters_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the parameters of prediction and explanation via ``PredictRequest.parameters``, ``ExplainRequest.parameters`` and ``BatchPredictionJob.model_parameters``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform, if no parameters are supported it is set to an empty string. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. prediction_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the format of a single prediction produced by this Model, which are returned via ``PredictResponse.predictions``, ``ExplainResponse.explanations``, and ``BatchPredictionJob.output_config``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. explanation_metadata (aiplatform.explain.ExplanationMetadata): Optional. Metadata describing the Model's input and output for explanation. `explanation_metadata` is optional while `explanation_parameters` must be specified when used. For more details, see `Ref docs ` explanation_parameters (aiplatform.explain.ExplanationParameters): Optional. Parameters to configure explaining for Model's predictions. For more details, see `Ref docs ` project: Optional[str]=None, Project to upload this model to. Overrides project set in aiplatform.init. location: Optional[str]=None, Location to upload this model to. Overrides location set in aiplatform.init. credentials: Optional[auth_credentials.Credentials]=None, Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Model and all sub-resources of this Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. staging_bucket (str): Optional. Bucket to stage local model artifacts. Overrides staging_bucket set in aiplatform.init. upload_request_timeout (float): Optional. The timeout for the upload request in seconds. Returns: model (aiplatform.Model): Instantiated representation of the uploaded model resource. Raises: ValueError: If model directory does not contain a supported model file. """ if not display_name: display_name = cls._generate_display_name("XGBoost model") XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS = [ ".pkl", ".joblib", ".bst", ] container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri( region=location, framework="xgboost", framework_version=xgboost_version or "1.4", accelerator="cpu", ) model_file_path_obj = pathlib.Path(model_file_path) if not model_file_path_obj.is_file(): raise ValueError( f"model_file_path path must point to a file: '{model_file_path}'" ) model_file_extension = model_file_path_obj.suffix if model_file_extension not in XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS: _LOGGER.warning( f"Only the following XGBoost model file extensions are currently supported: '{XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS}'" ) _LOGGER.warning( "Treating the model file as a binary serialized XGBoost Booster." ) model_file_extension = ".bst" # Preparing model directory # We cannot clean up the directory immediately after calling Model.upload since # that call may be asynchronous and return before the model file has been read. # To work around this, we make this method asynchronous (decorate with @base.optional_sync) # but call Model.upload with sync=True. with tempfile.TemporaryDirectory() as prepared_model_dir: prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( "model" + model_file_extension ) shutil.copy(model_file_path_obj, prepared_model_file_path) return cls.upload( serving_container_image_uri=container_image_uri, artifact_uri=prepared_model_dir, display_name=display_name, description=description, model_id=model_id, parent_model=parent_model, is_default_version=is_default_version, version_aliases=version_aliases, version_description=version_description, instance_schema_uri=instance_schema_uri, parameters_schema_uri=parameters_schema_uri, prediction_schema_uri=prediction_schema_uri, explanation_metadata=explanation_metadata, explanation_parameters=explanation_parameters, project=project, location=location, credentials=credentials, labels=labels, encryption_spec_key_name=encryption_spec_key_name, staging_bucket=staging_bucket, sync=True, upload_request_timeout=upload_request_timeout, ) @classmethod @base.optional_sync() def upload_scikit_learn_model_file( cls, model_file_path: str, sklearn_version: Optional[str] = None, display_name: Optional[str] = None, description: Optional[str] = None, model_id: Optional[str] = None, parent_model: Optional[str] = None, is_default_version: Optional[bool] = True, version_aliases: Optional[Sequence[str]] = None, version_description: Optional[str] = None, instance_schema_uri: Optional[str] = None, parameters_schema_uri: Optional[str] = None, prediction_schema_uri: Optional[str] = None, explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None, explanation_parameters: Optional[ aiplatform.explain.ExplanationParameters ] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, sync=True, upload_request_timeout: Optional[float] = None, ) -> "Model": """Uploads a model and returns a Model representing the uploaded Model resource. Example usage: my_model = Model.upload_scikit_learn_model_file( model_file_path="iris.sklearn_model.joblib" ) Args: model_file_path (str): Required. Local file path of the model. sklearn_version (str): Optional. The version of the Scikit-learn serving container. Supported versions: ["0.20", "0.22", "0.23", "0.24", "1.0"]. If the version is not specified, the latest version is used. display_name (str): Optional. The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. description (str): The description of the model. model_id (str): Optional. The ID to use for the uploaded Model, which will become the final component of the model resource name. This value may be up to 63 characters, and valid characters are `[a-z0-9_-]`. The first character cannot be a number or hyphen. parent_model (str): Optional. The resource name or model ID of an existing model that the newly-uploaded model will be a version of. Only set this field when uploading a new version of an existing model. is_default_version (bool): Optional. When set to True, the newly uploaded model version will automatically have alias "default" included. Subsequent uses of this model without a version specified will use this "default" version. When set to False, the "default" alias will not be moved. Actions targeting the newly-uploaded model version will need to specifically reference this version by ID or alias. New model uploads, i.e. version 1, will always be "default" aliased. version_aliases (Sequence[str]): Optional. User provided version aliases so that a model version can be referenced via alias instead of auto-generated version ID. A default version alias will be created for the first version of the model. The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] version_description (str): Optional. The description of the model version being uploaded. instance_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the format of a single instance, which are used in ``PredictRequest.instances``, ``ExplainRequest.instances`` and ``BatchPredictionJob.input_config``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. parameters_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the parameters of prediction and explanation via ``PredictRequest.parameters``, ``ExplainRequest.parameters`` and ``BatchPredictionJob.model_parameters``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform, if no parameters are supported it is set to an empty string. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. prediction_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the format of a single prediction produced by this Model, which are returned via ``PredictResponse.predictions``, ``ExplainResponse.explanations``, and ``BatchPredictionJob.output_config``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. explanation_metadata (aiplatform.explain.ExplanationMetadata): Optional. Metadata describing the Model's input and output for explanation. `explanation_metadata` is optional while `explanation_parameters` must be specified when used. For more details, see `Ref docs ` explanation_parameters (aiplatform.explain.ExplanationParameters): Optional. Parameters to configure explaining for Model's predictions. For more details, see `Ref docs ` project: Optional[str]=None, Project to upload this model to. Overrides project set in aiplatform.init. location: Optional[str]=None, Location to upload this model to. Overrides location set in aiplatform.init. credentials: Optional[auth_credentials.Credentials]=None, Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Model and all sub-resources of this Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. staging_bucket (str): Optional. Bucket to stage local model artifacts. Overrides staging_bucket set in aiplatform.init. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. upload_request_timeout (float): Optional. The timeout for the upload request in seconds. Returns: model (aiplatform.Model): Instantiated representation of the uploaded model resource. Raises: ValueError: If explanation_metadata is specified while explanation_parameters is not. Also if model directory does not contain a supported model file. """ if not display_name: display_name = cls._generate_display_name("Scikit-Learn model") SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS = [ ".pkl", ".joblib", ] container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri( region=location, framework="sklearn", framework_version=sklearn_version or "1.0", accelerator="cpu", ) model_file_path_obj = pathlib.Path(model_file_path) if not model_file_path_obj.is_file(): raise ValueError( f"model_file_path path must point to a file: '{model_file_path}'" ) model_file_extension = model_file_path_obj.suffix if model_file_extension not in SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS: _LOGGER.warning( f"Only the following Scikit-learn model file extensions are currently supported: '{SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS}'" ) _LOGGER.warning( "Treating the model file as a pickle serialized Scikit-learn model." ) model_file_extension = ".pkl" # Preparing model directory # We cannot clean up the directory immediately after calling Model.upload since # that call may be asynchronous and return before the model file has been read. # To work around this, we make this method asynchronous (decorate with @base.optional_sync) # but call Model.upload with sync=True. with tempfile.TemporaryDirectory() as prepared_model_dir: prepared_model_file_path = pathlib.Path(prepared_model_dir) / ( "model" + model_file_extension ) shutil.copy(model_file_path_obj, prepared_model_file_path) return cls.upload( serving_container_image_uri=container_image_uri, artifact_uri=prepared_model_dir, display_name=display_name, description=description, model_id=model_id, parent_model=parent_model, is_default_version=is_default_version, version_aliases=version_aliases, version_description=version_description, instance_schema_uri=instance_schema_uri, parameters_schema_uri=parameters_schema_uri, prediction_schema_uri=prediction_schema_uri, explanation_metadata=explanation_metadata, explanation_parameters=explanation_parameters, project=project, location=location, credentials=credentials, labels=labels, encryption_spec_key_name=encryption_spec_key_name, staging_bucket=staging_bucket, sync=True, upload_request_timeout=upload_request_timeout, ) @classmethod def upload_tensorflow_saved_model( cls, saved_model_dir: str, tensorflow_version: Optional[str] = None, use_gpu: bool = False, display_name: Optional[str] = None, description: Optional[str] = None, model_id: Optional[str] = None, parent_model: Optional[str] = None, is_default_version: Optional[bool] = True, version_aliases: Optional[Sequence[str]] = None, version_description: Optional[str] = None, instance_schema_uri: Optional[str] = None, parameters_schema_uri: Optional[str] = None, prediction_schema_uri: Optional[str] = None, explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None, explanation_parameters: Optional[ aiplatform.explain.ExplanationParameters ] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, sync=True, upload_request_timeout: Optional[str] = None, ) -> "Model": """Uploads a model and returns a Model representing the uploaded Model resource. Example usage: my_model = Model.upload_scikit_learn_model_file( model_file_path="iris.tensorflow_model.SavedModel" ) Args: saved_model_dir (str): Required. Local directory of the Tensorflow SavedModel. tensorflow_version (str): Optional. The version of the Tensorflow serving container. Supported versions: ["0.15", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7"]. If the version is not specified, the latest version is used. use_gpu (bool): Whether to use GPU for model serving. display_name (str): Optional. The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. description (str): The description of the model. model_id (str): Optional. The ID to use for the uploaded Model, which will become the final component of the model resource name. This value may be up to 63 characters, and valid characters are `[a-z0-9_-]`. The first character cannot be a number or hyphen. parent_model (str): Optional. The resource name or model ID of an existing model that the newly-uploaded model will be a version of. Only set this field when uploading a new version of an existing model. is_default_version (bool): Optional. When set to True, the newly uploaded model version will automatically have alias "default" included. Subsequent uses of this model without a version specified will use this "default" version. When set to False, the "default" alias will not be moved. Actions targeting the newly-uploaded model version will need to specifically reference this version by ID or alias. New model uploads, i.e. version 1, will always be "default" aliased. version_aliases (Sequence[str]): Optional. User provided version aliases so that a model version can be referenced via alias instead of auto-generated version ID. A default version alias will be created for the first version of the model. The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] version_description (str): Optional. The description of the model version being uploaded. instance_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the format of a single instance, which are used in ``PredictRequest.instances``, ``ExplainRequest.instances`` and ``BatchPredictionJob.input_config``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. parameters_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the parameters of prediction and explanation via ``PredictRequest.parameters``, ``ExplainRequest.parameters`` and ``BatchPredictionJob.model_parameters``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform, if no parameters are supported it is set to an empty string. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. prediction_schema_uri (str): Optional. Points to a YAML file stored on Google Cloud Storage describing the format of a single prediction produced by this Model, which are returned via ``PredictResponse.predictions``, ``ExplainResponse.explanations``, and ``BatchPredictionJob.output_config``. The schema is defined as an OpenAPI 3.0.2 `Schema Object `__. AutoML Models always have this field populated by AI Platform. Note: The URI given on output will be immutable and probably different, including the URI scheme, than the one given on input. The output URI will point to a location where the user only has a read access. explanation_metadata (aiplatform.explain.ExplanationMetadata): Optional. Metadata describing the Model's input and output for explanation. `explanation_metadata` is optional while `explanation_parameters` must be specified when used. For more details, see `Ref docs ` explanation_parameters (aiplatform.explain.ExplanationParameters): Optional. Parameters to configure explaining for Model's predictions. For more details, see `Ref docs ` project: Optional[str]=None, Project to upload this model to. Overrides project set in aiplatform.init. location: Optional[str]=None, Location to upload this model to. Overrides location set in aiplatform.init. credentials: Optional[auth_credentials.Credentials]=None, Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Models. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Model and all sub-resources of this Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. staging_bucket (str): Optional. Bucket to stage local model artifacts. Overrides staging_bucket set in aiplatform.init. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. upload_request_timeout (float): Optional. The timeout for the upload request in seconds. Returns: model (aiplatform.Model): Instantiated representation of the uploaded model resource. Raises: ValueError: If explanation_metadata is specified while explanation_parameters is not. Also if model directory does not contain a supported model file. """ if not display_name: display_name = cls._generate_display_name("Tensorflow model") container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri( region=location, framework="tensorflow", framework_version=tensorflow_version or "2.7", accelerator="gpu" if use_gpu else "cpu", ) return cls.upload( serving_container_image_uri=container_image_uri, artifact_uri=saved_model_dir, display_name=display_name, description=description, model_id=model_id, parent_model=parent_model, is_default_version=is_default_version, version_aliases=version_aliases, version_description=version_description, instance_schema_uri=instance_schema_uri, parameters_schema_uri=parameters_schema_uri, prediction_schema_uri=prediction_schema_uri, explanation_metadata=explanation_metadata, explanation_parameters=explanation_parameters, project=project, location=location, credentials=credentials, labels=labels, encryption_spec_key_name=encryption_spec_key_name, staging_bucket=staging_bucket, sync=sync, upload_request_timeout=upload_request_timeout, ) # TODO(b/273499620): Add async support. def copy( self, destination_location: str, destination_model_id: Optional[str] = None, destination_parent_model: Optional[str] = None, encryption_spec_key_name: Optional[str] = None, copy_request_timeout: Optional[float] = None, ) -> "Model": """Copys a model and returns a Model representing the copied Model resource. This method is a blocking call. Example usage: copied_model = my_model.copy( destination_location="us-central1" ) Args: destination_location (str): The destination location to copy the model to. destination_model_id (str): Optional. The ID to use for the copied Model, which will become the final component of the model resource name. This value may be up to 63 characters, and valid characters are `[a-z0-9_-]`. The first character cannot be a number or hyphen. Only set this field when copying as a new model. If this field is not set, a numeric model id will be generated. destination_parent_model (str): Optional. The resource name or model ID of an existing model that the newly-copied model will be a version of. Only set this field when copying as a new version of an existing model. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If set, this Model and all sub-resources of this Model will be secured by this key. Overrides encryption_spec_key_name set in aiplatform.init. copy_request_timeout (float): Optional. The timeout for the copy request in seconds. Returns: model (aiplatform.Model): Instantiated representation of the copied model resource. Raises: ValueError: If both `destination_model_id` and `destination_parent_model` are set. """ if destination_model_id is not None and destination_parent_model is not None: raise ValueError( "`destination_model_id` and `destination_parent_model` can not be set together." ) parent = initializer.global_config.common_location_path( initializer.global_config.project, destination_location ) source_model = self.versioned_resource_name destination_parent_model = ModelRegistry._get_true_version_parent( parent_model=destination_parent_model, project=initializer.global_config.project, location=destination_location, ) encryption_spec = initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name, ) if destination_model_id is not None: request = gca_model_service_compat.CopyModelRequest( parent=parent, source_model=source_model, model_id=destination_model_id, encryption_spec=encryption_spec, ) else: request = gca_model_service_compat.CopyModelRequest( parent=parent, source_model=source_model, parent_model=destination_parent_model, encryption_spec=encryption_spec, ) api_client = initializer.global_config.create_client( client_class=utils.ModelClientWithOverride, location_override=destination_location, credentials=initializer.global_config.credentials, ) _LOGGER.log_action_start_against_resource("Copying", "", self) lro = api_client.copy_model( request=request, timeout=copy_request_timeout, ) _LOGGER.log_action_started_against_resource_with_lro( "Copy", "", self.__class__, lro ) model_copy_response = lro.result(timeout=None) this_model = models.Model( model_copy_response.model, version=model_copy_response.model_version_id, location=destination_location, ) _LOGGER.log_action_completed_against_resource("", "copied", this_model) return this_model def list_model_evaluations( self, ) -> List["model_evaluation.ModelEvaluation"]: """List all Model Evaluation resources associated with this model. If this Model resource was instantiated with a version, the Model Evaluation resources for that version will be returned. If no version was provided when the Model resource was instantiated, Model Evaluation resources will be returned for the default version. Example Usage: my_model = Model( model_name="projects/123/locations/us-central1/models/456@1" ) my_evaluations = my_model.list_model_evaluations() Returns: List[model_evaluation.ModelEvaluation]: List of ModelEvaluation resources for the model. """ return model_evaluation.ModelEvaluation._list( parent=self.versioned_resource_name, credentials=self.credentials, ) def get_model_evaluation( self, evaluation_id: Optional[str] = None, ) -> Optional[model_evaluation.ModelEvaluation]: """Returns a ModelEvaluation resource and instantiates its representation. If no evaluation_id is passed, it will return the first evaluation associated with this model. If the aiplatform.Model resource was instantiated with a version, this will return a Model Evaluation from that version. If no version was specified when instantiating the Model resource, this will return an Evaluation from the default version. Example usage: my_model = Model( model_name="projects/123/locations/us-central1/models/456" ) my_evaluation = my_model.get_model_evaluation( evaluation_id="789" ) # If no arguments are passed, this method returns the first evaluation for the model my_evaluation = my_model.get_model_evaluation() Args: evaluation_id (str): Optional. The ID of the model evaluation to retrieve. Returns: model_evaluation.ModelEvaluation: Instantiated representation of the ModelEvaluation resource. """ evaluations = self.list_model_evaluations() if not evaluation_id: if len(evaluations) > 1: _LOGGER.warning( f"Your model has more than one model evaluation, this is returning only one evaluation resource: {evaluations[0].resource_name}" ) _ipython_utils.display_model_evaluation_button(evaluations[0]) return evaluations[0] else: resource_uri_parts = self._parse_resource_name(self.resource_name) evaluation_resource_name = ( model_evaluation.ModelEvaluation._format_resource_name( **resource_uri_parts, evaluation=evaluation_id, ) ) evaluation = model_evaluation.ModelEvaluation( evaluation_name=evaluation_resource_name, credentials=self.credentials, ) _ipython_utils.display_model_evaluation_button(evaluation) return evaluation def evaluate( self, prediction_type: str, target_field_name: str, gcs_source_uris: Optional[List[str]] = None, bigquery_source_uri: Optional[str] = None, bigquery_destination_output_uri: Optional[str] = None, class_labels: Optional[List[str]] = None, prediction_label_column: Optional[str] = None, prediction_score_column: Optional[str] = None, staging_bucket: Optional[str] = None, service_account: Optional[str] = None, generate_feature_attributions: bool = False, evaluation_pipeline_display_name: Optional[str] = None, evaluation_metrics_display_name: Optional[str] = None, network: Optional[str] = None, encryption_spec_key_name: Optional[str] = None, experiment: Optional[Union[str, "aiplatform.Experiment"]] = None, enable_caching: Optional[bool] = None, ) -> "model_evaluation._ModelEvaluationJob": """Creates a model evaluation job running on Vertex Pipelines and returns the resulting ModelEvaluationJob resource. Example usage: ``` my_model = Model( model_name="projects/123/locations/us-central1/models/456" ) my_evaluation_job = my_model.evaluate( prediction_type="classification", target_field_name="type", data_source_uris=["gs://sdk-model-eval/my-prediction-data.csv"], staging_bucket="gs://my-staging-bucket/eval_pipeline_root", ) my_evaluation_job.wait() my_evaluation = my_evaluation_job.get_model_evaluation() my_evaluation.metrics ``` Args: prediction_type (str): Required. The problem type being addressed by this evaluation run. 'classification' and 'regression' are the currently supported problem types. target_field_name (str): Required. The column name of the field containing the label for this prediction task. gcs_source_uris (List[str]): Optional. A list of Cloud Storage data files containing the ground truth data to use for this evaluation job. These files should contain your model's prediction column. Currently only Google Cloud Storage urls are supported, for example: "gs://path/to/your/data.csv". The provided data files must be either CSV or JSONL. One of `gcs_source_uris` or `bigquery_source_uri` is required. bigquery_source_uri (str): Optional. A bigquery table URI containing the ground truth data to use for this evaluation job. This uri should be in the format 'bq://my-project-id.dataset.table'. One of `gcs_source_uris` or `bigquery_source_uri` is required. bigquery_destination_output_uri (str): Optional. A bigquery table URI where the Batch Prediction job associated with your Model Evaluation will write prediction output. This can be a BigQuery URI to a project ('bq://my-project'), a dataset ('bq://my-project.my-dataset'), or a table ('bq://my-project.my-dataset.my-table'). Required if `bigquery_source_uri` is provided. class_labels (List[str]): Optional. For custom (non-AutoML) classification models, a list of possible class names, in the same order that predictions are generated. This argument is required when prediction_type is 'classification'. For example, in a classification model with 3 possible classes that are outputted in the format: [0.97, 0.02, 0.01] with the class names "cat", "dog", and "fish", the value of `class_labels` should be `["cat", "dog", "fish"]` where the class "cat" corresponds with 0.97 in the example above. prediction_label_column (str): Optional. The column name of the field containing classes the model is scoring. Formatted to be able to find nested columns, delimited by `.`. If not set, defaulted to `prediction.classes` for classification. prediction_score_column (str): Optional. The column name of the field containing batch prediction scores. Formatted to be able to find nested columns, delimited by `.`. If not set, defaulted to `prediction.scores` for a `classification` problem_type, `prediction.value` for a `regression` problem_type. staging_bucket (str): Optional. The GCS directory to use for staging files from this evaluation job. Defaults to the value set in aiplatform.init(staging_bucket=...) if not provided. Required if staging_bucket is not set in aiplatform.init(). service_account (str): Specifies the service account for workload run-as account for this Model Evaluation PipelineJob. Users submitting jobs must have act-as permission on this run-as account. The service account running this Model Evaluation job needs the following permissions: Dataflow Worker, Storage Admin, Vertex AI Administrator, and Vertex AI Service Agent. generate_feature_attributions (boolean): Optional. Whether the model evaluation job should generate feature attributions. Defaults to False if not specified. evaluation_pipeline_display_name (str): Optional. The display name of your model evaluation job. This is the display name that will be applied to the Vertex Pipeline run for your evaluation job. If not set, a display name will be generated automatically. evaluation_metrics_display_name (str): Optional. The display name of the model evaluation resource uploaded to Vertex from your Model Evaluation pipeline. network (str): The full name of the Compute Engine network to which the job should be peered. For example, projects/12345/global/networks/myVPC. Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. encryption_spec_key_name (str): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the job. Has the form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. If this is set, then all resources created by the PipelineJob for this Model Evaluation will be encrypted with the provided encryption key. If not specified, encryption_spec of original PipelineJob will be used. experiment (Union[str, experiments_resource.Experiment]): Optional. The Vertex AI experiment name or instance to associate to the PipelineJob executing this model evaluation job. Metrics produced by the PipelineJob as system.Metric Artifacts will be associated as metrics to the provided experiment, and parameters from this PipelineJob will be associated as parameters to the provided experiment. enable_caching (bool): Optional. Whether to turn on caching for the run. If this is not set, defaults to the compile time settings, which are True for all tasks by default, while users may specify different caching options for individual tasks. If this is set, the setting applies to all tasks in the pipeline. Overrides the compile time settings. Returns: model_evaluation.ModelEvaluationJob: Instantiated representation of the _ModelEvaluationJob. Raises: ValueError: If staging_bucket was not set in aiplatform.init() and staging_bucket was not provided. If the provided `prediction_type` is not valid. If the provided `data_source_uris` don't start with 'gs://'. """ if (gcs_source_uris is None) == (bigquery_source_uri is None): raise ValueError( "Exactly one of `gcs_source_uris` or `bigquery_source_uri` must be provided." ) if isinstance(gcs_source_uris, str): gcs_source_uris = [gcs_source_uris] if bigquery_source_uri and not isinstance(bigquery_source_uri, str): raise ValueError("The provided `bigquery_source_uri` must be a string.") if bigquery_source_uri and not bigquery_destination_output_uri: raise ValueError( "`bigquery_destination_output_uri` must be provided if `bigquery_source_uri` is used as the data source." ) if gcs_source_uris is not None and not all( uri.startswith("gs://") for uri in gcs_source_uris ): raise ValueError("`gcs_source_uris` must start with 'gs://'.") if bigquery_source_uri is not None and not bigquery_source_uri.startswith( "bq://" ): raise ValueError( "`bigquery_source_uri` and `bigquery_destination_output_uri` must start with 'bq://'" ) if ( bigquery_destination_output_uri is not None and not bigquery_destination_output_uri.startswith("bq://") ): raise ValueError( "`bigquery_source_uri` and `bigquery_destination_output_uri` must start with 'bq://'" ) SUPPORTED_INSTANCES_FORMAT_FILE_EXTENSIONS = [".jsonl", ".csv"] if not staging_bucket and initializer.global_config.staging_bucket: staging_bucket = initializer.global_config.staging_bucket elif not staging_bucket and not initializer.global_config.staging_bucket: raise ValueError( "Please provide `evaluation_staging_bucket` when calling evaluate or set one using aiplatform.init(staging_bucket=...)" ) if prediction_type not in _SUPPORTED_EVAL_PREDICTION_TYPES: raise ValueError( f"Please provide a supported model prediction type, one of: {_SUPPORTED_EVAL_PREDICTION_TYPES}." ) if generate_feature_attributions: if not self._gca_resource.explanation_spec: raise ValueError( "To generate feature attributions with your evaluation, call evaluate on a model with an explanation spec. To run evaluation on the current model, call evaluate with `generate_feature_attributions=False`." ) instances_format = None if gcs_source_uris: data_file_path_obj = pathlib.Path(gcs_source_uris[0]) data_file_extension = data_file_path_obj.suffix if data_file_extension not in SUPPORTED_INSTANCES_FORMAT_FILE_EXTENSIONS: _LOGGER.warning( f"Only the following data file extensions are currently supported: '{SUPPORTED_INSTANCES_FORMAT_FILE_EXTENSIONS}'" ) else: instances_format = data_file_extension[1:] elif bigquery_source_uri: instances_format = "bigquery" if ( self._gca_resource.metadata_schema_uri == "https://storage.googleapis.com/google-cloud-aiplatform/schema/model/metadata/automl_tabular_1.0.0.yaml" ): model_type = "automl_tabular" else: model_type = "other" if ( model_type == "other" and prediction_type == "classification" and not class_labels ): raise ValueError( "Please provide `class_labels` when running evaluation on a custom classification model." ) return model_evaluation._ModelEvaluationJob.submit( model_name=self.versioned_resource_name, prediction_type=prediction_type, target_field_name=target_field_name, gcs_source_uris=gcs_source_uris, bigquery_source_uri=bigquery_source_uri, batch_predict_bigquery_destination_output_uri=bigquery_destination_output_uri, class_labels=class_labels, prediction_label_column=prediction_label_column, prediction_score_column=prediction_score_column, service_account=service_account, pipeline_root=staging_bucket, instances_format=instances_format, model_type=model_type, generate_feature_attributions=generate_feature_attributions, evaluation_pipeline_display_name=evaluation_pipeline_display_name, evaluation_metrics_display_name=evaluation_metrics_display_name, network=network, encryption_spec_key_name=encryption_spec_key_name, credentials=self.credentials, experiment=experiment, enable_caching=enable_caching, ) # TODO (b/232546878): Async support class ModelRegistry: def __init__( self, model: Union[Model, str], location: Optional[str] = None, project: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, ): """Creates a ModelRegistry instance for version management of a registered model. Args: model (Union[Model, str]): Required. One of the following: 1. A Model instance 2. A fully-qualified model resource name 3. A model ID. A location and project must be provided. location (str): Optional. The model location. Used when passing a model name as model. If not set, project set in aiplatform.init will be used. project (str): Optional. The model project. Used when passing a model name as model. If not set, project set in aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional. Custom credentials to use with model access. If not set, credentials set in aiplatform.init will be used. """ if isinstance(model, Model): self.model_resource_name = model.resource_name else: self.model_resource_name = utils.full_resource_name( resource_name=model, resource_noun="models", parse_resource_name_method=Model._parse_resource_name, format_resource_name_method=Model._format_resource_name, project=project, location=location, resource_id_validator=base.VertexAiResourceNoun._revisioned_resource_id_validator, ) self.credentials = credentials or ( model.credentials if isinstance(model, Model) else initializer.global_config.credentials ) self.client = Model._instantiate_client(location, self.credentials) def get_model( self, version: Optional[str] = None, ) -> Model: """Gets a registered model with optional version. Args: version (str): Optional. A model version ID or alias to target. Defaults to the model with the "default" alias. Returns: Model: An instance of a Model from this ModelRegistry. """ return Model( self.model_resource_name, version=version, credentials=self.credentials ) def list_versions( self, filter: Optional[str] = None, ) -> List[VersionInfo]: """Lists the versions and version info of a model. Args: filter (str): Optional. An expression for filtering the results of the request. For field names both snake_case and camelCase are supported. - `labels` supports general map functions that is: - `labels.key=value` - key:value equality - `labels.key:* or labels:key - key existence - A key including a space must be quoted. `labels."a key"`. Some examples: - `labels.myKey="myValue"` Returns: List[VersionInfo]: A list of VersionInfo, each containing info about specific model versions. """ _LOGGER.info(f"Getting versions for {self.model_resource_name}") request = gca_model_service_compat.ListModelVersionsRequest( name=self.model_resource_name, filter=filter, ) page_result = self.client.list_model_versions( request=request, ) versions = [ VersionInfo( version_id=model.version_id, version_create_time=model.version_create_time, version_update_time=model.version_update_time, model_display_name=model.display_name, model_resource_name=self._parse_versioned_name(model.name)[0], version_aliases=model.version_aliases, version_description=model.version_description, ) for model in page_result ] return versions def get_version_info( self, version: str, ) -> VersionInfo: """Gets information about a specific model version. Args: version (str): Required. The model version to obtain info for. Returns: VersionInfo: Contains info about the model version. """ _LOGGER.info(f"Getting version {version} info for {self.model_resource_name}") model = self.client.get_model( name=self._get_versioned_name(self.model_resource_name, version), ) return VersionInfo( version_id=model.version_id, version_create_time=model.version_create_time, version_update_time=model.version_update_time, model_display_name=model.display_name, model_resource_name=self._parse_versioned_name(model.name)[0], version_aliases=model.version_aliases, version_description=model.version_description, ) def delete_version( self, version: str, ) -> None: """Deletes a model version from the registry. Cannot delete a version if it is the last remaining version. Use Model.delete() in that case. Args: version (str): Required. The model version ID or alias to delete. """ lro = self.client.delete_model_version( name=self._get_versioned_name(self.model_resource_name, version), ) _LOGGER.info(f"Deleting version {version} for {self.model_resource_name}") lro.result() _LOGGER.info(f"Deleted version {version} for {self.model_resource_name}") def update_version( self, version: str, version_description: Optional[str] = None, labels: Optional[Dict[str, str]] = None, ) -> None: """Updates a model version. Args: version (str): Required. The version ID to receive the new alias(es). version_description (str): The description of the model version. labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Model versions. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. International characters are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. Raises: ValueError: If `labels` is not the correct format. """ current_model_proto = self.get_model(version).gca_resource copied_model_proto = current_model_proto.__class__(current_model_proto) update_mask: List[str] = [] if version_description: copied_model_proto.version_description = version_description update_mask.append("version_description") if labels: utils.validate_labels(labels) copied_model_proto.labels = labels update_mask.append("labels") update_mask = field_mask_pb2.FieldMask(paths=update_mask) versioned_name = self._get_versioned_name(self.model_resource_name, version) _LOGGER.info(f"Updating model {versioned_name}") self.client.update_model( model=copied_model_proto, update_mask=update_mask, ) _LOGGER.info(f"Completed updating model {versioned_name}") def add_version_aliases( self, new_aliases: List[str], version: str, ) -> None: """Adds version alias(es) to a model version. Args: new_aliases (List[str]): Required. The alias(es) to add to a model version. version (str): Required. The version ID to receive the new alias(es). """ self._merge_version_aliases( version_aliases=new_aliases, version=version, ) def remove_version_aliases( self, target_aliases: List[str], version: str, ) -> None: """Removes version alias(es) from a model version. Args: target_aliases (List[str]): Required. The alias(es) to remove from a model version. version (str): Required. The version ID to be stripped of the target alias(es). """ self._merge_version_aliases( version_aliases=[f"-{alias}" for alias in target_aliases], version=version, ) def _merge_version_aliases( self, version_aliases: List[str], version: str, ) -> None: """Merges a list of version aliases with a model's existing alias list. Args: version_aliases (List[str]): Required. The version alias change list. version (str): Required. The version ID to have its alias list changed. """ _LOGGER.info(f"Merging version aliases for {self.model_resource_name}") self.client.merge_version_aliases( name=self._get_versioned_name(self.model_resource_name, version), version_aliases=version_aliases, ) _LOGGER.info( f"Completed merging version aliases for {self.model_resource_name}" ) @staticmethod def _get_versioned_name( resource_name: str, version: Optional[str] = None, ) -> str: """Creates a versioned form of a model resource name. Args: resource_name (str): Required. A fully-qualified resource name or resource ID. version (str): Optional. The version or alias of the resource. Returns: versioned_name (str): The versioned resource name in revisioned format. """ if version: return f"{resource_name}@{version}" return resource_name @staticmethod def _parse_versioned_name( model_name: str, ) -> Tuple[str, Optional[str]]: """Return a model name and, if included in the model name, a model version. Args: model_name (str): Required. A fully-qualified model name or model ID, optionally with an included version. Returns: parsed_version_name (Tuple[str, Optional[str]]): A tuple containing the model name or ID as the first element, and the model version as the second element, if present in `model_name`. Raises: ValueError: If the `model_name` is invalid and contains too many '@' symbols. """ if "@" not in model_name: return model_name, None elif model_name.count("@") > 1: raise ValueError( f"Received an invalid model_name with too many `@`s: {model_name}" ) else: return model_name.split("@") @staticmethod def _get_true_version_parent( parent_model: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, ) -> Optional[str]: """Gets the true `parent_model` with full resource name. Args: parent_model (str): Optional. A fully-qualified resource name or resource ID of the model that would be the parent of another model. project (str): Optional. The project of `parent_model`, if not included in `parent_model`. location (str): Optional. The location of `parent_model`, if not included in `parent_model`. Returns: true_parent_model (str): Optional. The true resource name of the parent model, if one should exist. """ if parent_model: existing_resource = utils.full_resource_name( resource_name=parent_model, resource_noun="models", parse_resource_name_method=Model._parse_resource_name, format_resource_name_method=Model._format_resource_name, project=project, location=location, ) parent_model = existing_resource return parent_model @staticmethod def _get_true_alias_list( version_aliases: Optional[Sequence[str]] = None, is_default_version: bool = True, ) -> Optional[Sequence[str]]: """Gets the true `version_aliases` list based on `is_default_version`. Args: version_aliases (Sequence[str]): Optional. The user-provided list of model aliases. is_default_version (bool): Optional. When set, includes the "default" alias in `version_aliases`. Defaults to True. Returns: true_alias_list (Sequence[str]): Optional: The true alias list, should one exist, containing "default" if specified. """ if is_default_version: if version_aliases and "default" not in version_aliases: version_aliases.append("default") elif not version_aliases: version_aliases = ["default"] return version_aliases