evo-ai/.venv/lib/python3.10/site-packages/google/cloud/aiplatform/models.py

# -*- coding: utf-8 -*-

# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import itertools
import json
import pathlib
import re
import shutil
import tempfile
import requests
from typing import (
    Any,
    Dict,
    Iterator,
    List,
    NamedTuple,
    Optional,
    Sequence,
    Tuple,
    TYPE_CHECKING,
    Union,
)

from google.api_core import operation
from google.api_core import exceptions as api_exceptions
from google.auth import credentials as auth_credentials
from google.auth.transport import requests as google_auth_requests
from google.protobuf import duration_pb2
import proto

from google.cloud import aiplatform
from google.cloud.aiplatform import base
from google.cloud.aiplatform import constants
from google.cloud.aiplatform import explain
from google.cloud.aiplatform import initializer
from google.cloud.aiplatform import jobs
from google.cloud.aiplatform import models
from google.cloud.aiplatform import utils
from google.cloud.aiplatform.utils import gcs_utils
from google.cloud.aiplatform.utils import _explanation_utils
from google.cloud.aiplatform.utils import _ipython_utils
from google.cloud.aiplatform import model_evaluation
from google.cloud.aiplatform.compat.services import endpoint_service_client
from google.cloud.aiplatform.compat.services import (
    deployment_resource_pool_service_client,
)

from google.cloud.aiplatform.compat.types import (
    deployment_resource_pool as gca_deployment_resource_pool_compat,
    deployed_model_ref as gca_deployed_model_ref_compat,
    encryption_spec as gca_encryption_spec,
    endpoint as gca_endpoint_compat,
    explanation as gca_explanation_compat,
    io as gca_io_compat,
    machine_resources as gca_machine_resources_compat,
    model as gca_model_compat,
    model_service as gca_model_service_compat,
    env_var as gca_env_var_compat,
    service_networking as gca_service_networking,
)

from google.cloud.aiplatform.constants import (
    prediction as prediction_constants,
)

from google.cloud.aiplatform_v1.types import model as model_v1

from google.protobuf import field_mask_pb2, timestamp_pb2
from google.protobuf import json_format

if TYPE_CHECKING:
    from google.cloud.aiplatform.prediction import LocalModel

_DEFAULT_MACHINE_TYPE = "n1-standard-2"
_DEPLOYING_MODEL_TRAFFIC_SPLIT_KEY = "0"
_SUCCESSFUL_HTTP_RESPONSE = 300
_RAW_PREDICT_DEPLOYED_MODEL_ID_KEY = "X-Vertex-AI-Deployed-Model-Id"
_RAW_PREDICT_MODEL_RESOURCE_KEY = "X-Vertex-AI-Model"
_RAW_PREDICT_MODEL_VERSION_ID_KEY = "X-Vertex-AI-Model-Version-Id"

_LOGGER = base.Logger(__name__)


_SUPPORTED_MODEL_FILE_NAMES = [
    "model.pkl",
    "model.joblib",
    "model.bst",
    "model.mar",
    "saved_model.pb",
    "saved_model.pbtxt",
]

_SUPPORTED_EVAL_PREDICTION_TYPES = [
    "classification",
    "regression",
]


class VersionInfo(NamedTuple):
    """VersionInfo class envelopes returned Model version information.

    Attributes:
        version_id:
            The version ID of the model.
        create_time:
            Timestamp when this Model version was uploaded into Vertex AI.
        update_time:
            Timestamp when this Model version was most recently updated.
        model_display_name:
            The user-defined name of the model this version belongs to.
        model_resource_name:
            The fully-qualified model resource name.
            e.g. projects/{project}/locations/{location}/models/{model_display_name}
        version_aliases:
            User provided version aliases so that a model version can be referenced via
            alias (i.e. projects/{project}/locations/{location}/models/{model_display_name}@{version_alias}).
            Default is None.
        version_description:
            The description of this version.
            Default is None.
    """

    version_id: str
    version_create_time: timestamp_pb2.Timestamp
    version_update_time: timestamp_pb2.Timestamp
    model_display_name: str
    model_resource_name: str
    version_aliases: Optional[Sequence[str]] = None
    version_description: Optional[str] = None


class Prediction(NamedTuple):
    """Prediction class envelopes returned Model predictions and the Model id.

    Attributes:
        predictions:
            The predictions that are the output of the predictions
            call. The schema of any single prediction may be specified via
            Endpoint's DeployedModels' [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
            [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
        deployed_model_id:
            ID of the Endpoint's DeployedModel that served this prediction.
        metadata:
            The metadata that is the output of the predictions call.
        model_version_id:
            ID of the DeployedModel's version that served this prediction.
        model_resource_name:
            The fully-qualified resource name of the model that served this prediction.
        explanations:
            The explanations of the Model's predictions. It has the same number
            of elements as instances to be explained. Default is None.
    """

    predictions: List[Any]
    deployed_model_id: str
    metadata: Optional[Any] = None
    model_version_id: Optional[str] = None
    model_resource_name: Optional[str] = None
    explanations: Optional[Sequence[gca_explanation_compat.Explanation]] = None


class DeploymentResourcePool(base.VertexAiResourceNounWithFutureManager):
    client_class = utils.DeploymentResourcePoolClientWithOverride
    _resource_noun = "deploymentResourcePools"
    _getter_method = "get_deployment_resource_pool"
    _list_method = "list_deployment_resource_pools"
    _delete_method = "delete_deployment_resource_pool"
    _parse_resource_name_method = "parse_deployment_resource_pool_path"
    _format_resource_name_method = "deployment_resource_pool_path"

    def __init__(
        self,
        deployment_resource_pool_name: str,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ):
        """Retrieves a DeploymentResourcePool.

        Args:
            deployment_resource_pool_name (str):
                Required. The fully-qualified resource name or ID of the
                deployment resource pool. Example:
                "projects/123/locations/us-central1/deploymentResourcePools/456"
                or "456" when project and location are initialized or passed.
            project (str):
                Optional. Project containing the deployment resource pool to
                retrieve. If not set, the project given to `aiplatform.init`
                will be used.
            location (str):
                Optional. Location containing the deployment resource pool to
                retrieve. If not set, the location given to `aiplatform.init`
                will be used.
            credentials: Optional[auth_credentials.Credentials]=None,
                Custom credentials to use to retrieve the deployment resource
                pool. If not set, the credentials given to `aiplatform.init`
                will be used.
        """

        super().__init__(
            project=project,
            location=location,
            credentials=credentials,
            resource_name=deployment_resource_pool_name,
        )

        deployment_resource_pool_name = utils.full_resource_name(
            resource_name=deployment_resource_pool_name,
            resource_noun=self._resource_noun,
            parse_resource_name_method=self._parse_resource_name,
            format_resource_name_method=self._format_resource_name,
            project=project,
            location=location,
        )

        self._gca_resource = self._get_gca_resource(
            resource_name=deployment_resource_pool_name
        )

    @classmethod
    def create(
        cls,
        deployment_resource_pool_id: str,
        project: Optional[str] = None,
        location: Optional[str] = None,
        metadata: Sequence[Tuple[str, str]] = (),
        credentials: Optional[auth_credentials.Credentials] = None,
        machine_type: Optional[str] = None,
        min_replica_count: int = 1,
        max_replica_count: int = 1,
        accelerator_type: Optional[str] = None,
        accelerator_count: Optional[int] = None,
        autoscaling_target_cpu_utilization: Optional[int] = None,
        autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
        sync=True,
        create_request_timeout: Optional[float] = None,
        reservation_affinity_type: Optional[str] = None,
        reservation_affinity_key: Optional[str] = None,
        reservation_affinity_values: Optional[List[str]] = None,
        spot: bool = False,
        required_replica_count: Optional[int] = 0,
    ) -> "DeploymentResourcePool":
        """Creates a new DeploymentResourcePool.

        Args:
            deployment_resource_pool_id (str):
                Required. User-specified name for the new deployment resource
                pool.
            project (str):
                Optional. Project containing the deployment resource pool to
                retrieve. If not set, the project given to `aiplatform.init`
                will be used.
            location (str):
                Optional. Location containing the deployment resource pool to
                retrieve. If not set, the location given to `aiplatform.init`
                will be used.
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
            credentials: Optional[auth_credentials.Credentials]=None,
                Optional. Custom credentials to use to retrieve the deployment
                resource pool. If not set, the credentials given to
                `aiplatform.init` will be used.
            machine_type (str):
                Optional. Machine type to use for the deployment resource pool.
                If not set, the default machine type of `n1-standard-2` is
                used.
            min_replica_count (int):
                Optional. The minimum replica count of the new deployment
                resource pool. Each replica serves a copy of each model deployed
                on the deployment resource pool. If this value is less than
                `max_replica_count`, then autoscaling is enabled, and the actual
                number of replicas will be adjusted to bring resource usage in
                line with the autoscaling targets.
            max_replica_count (int):
                Optional. The maximum replica count of the new deployment
                resource pool.
            accelerator_type (str):
                Optional. Hardware accelerator type. Must also set accelerator_
                count if used. One of NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
                NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4, or
                NVIDIA_TESLA_A100.
            accelerator_count (int):
                Optional. The number of accelerators attached to each replica.
            autoscaling_target_cpu_utilization (int):
                Optional. Target CPU utilization value for autoscaling. A
                default value of 60 will be used if not specified.
            autoscaling_target_accelerator_duty_cycle (int):
                Optional. Target accelerator duty cycle percentage to use for
                autoscaling. Must also set accelerator_type and accelerator
                count if specified. A default value of 60 will be used if
                accelerators are requested and this is not specified.
            sync (bool):
                Optional. Whether to execute this method synchronously. If
                False, this method will be executed in a concurrent Future and
                any downstream object will be immediately returned and synced
                when the Future has completed.
            create_request_timeout (float):
                Optional. The create request timeout in seconds.
            reservation_affinity_type (str):
                Optional. The type of reservation affinity.
                One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
                SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
            reservation_affinity_key (str):
                Optional. Corresponds to the label key of a reservation resource.
                To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
                and specify the name of your reservation as its value.
            reservation_affinity_values (List[str]):
                Optional. Corresponds to the label values of a reservation resource.
                This must be the full resource name of the reservation.
                Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
            spot (bool):
                Optional. Whether to schedule the deployment workload on spot VMs.
            required_replica_count (int):
                Optional. Number of required available replicas for the
                deployment to succeed. This field is only needed when partial
                model deployment/mutation is desired, with a value greater than
                or equal to 1 and fewer than or equal to min_replica_count. If
                set, the model deploy/mutate operation will succeed once
                available_replica_count reaches required_replica_count, and the
                rest of the replicas will be retried.

        Returns:
            DeploymentResourcePool
        """

        api_client = cls._instantiate_client(location=location, credentials=credentials)

        project = project or initializer.global_config.project
        location = location or initializer.global_config.location

        return cls._create(
            api_client=api_client,
            deployment_resource_pool_id=deployment_resource_pool_id,
            project=project,
            location=location,
            metadata=metadata,
            credentials=credentials,
            machine_type=machine_type,
            min_replica_count=min_replica_count,
            max_replica_count=max_replica_count,
            accelerator_type=accelerator_type,
            accelerator_count=accelerator_count,
            reservation_affinity_type=reservation_affinity_type,
            reservation_affinity_key=reservation_affinity_key,
            reservation_affinity_values=reservation_affinity_values,
            autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
            autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
            spot=spot,
            sync=sync,
            create_request_timeout=create_request_timeout,
            required_replica_count=required_replica_count,
        )

    @classmethod
    @base.optional_sync()
    def _create(
        cls,
        api_client: deployment_resource_pool_service_client.DeploymentResourcePoolServiceClient,
        deployment_resource_pool_id: str,
        project: Optional[str] = None,
        location: Optional[str] = None,
        metadata: Sequence[Tuple[str, str]] = (),
        credentials: Optional[auth_credentials.Credentials] = None,
        machine_type: Optional[str] = None,
        min_replica_count: int = 1,
        max_replica_count: int = 1,
        accelerator_type: Optional[str] = None,
        accelerator_count: Optional[int] = None,
        reservation_affinity_type: Optional[str] = None,
        reservation_affinity_key: Optional[str] = None,
        reservation_affinity_values: Optional[List[str]] = None,
        autoscaling_target_cpu_utilization: Optional[int] = None,
        autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
        spot: bool = False,
        sync=True,
        create_request_timeout: Optional[float] = None,
        required_replica_count: Optional[int] = 0,
    ) -> "DeploymentResourcePool":
        """Creates a new DeploymentResourcePool.

        Args:
            api_client (DeploymentResourcePoolServiceClient):
                Required. DeploymentResourcePoolServiceClient used to make the
                underlying CreateDeploymentResourcePool API call.
            deployment_resource_pool_id (str):
                Required. User-specified name for the new deployment resource
                pool.
            project (str):
                Optional. Project containing the deployment resource pool to
                retrieve. If not set, the project given to `aiplatform.init`
                will be used.
            location (str):
                Optional. Location containing the deployment resource pool to
                retrieve. If not set, the location given to `aiplatform.init`
                will be used.
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
            credentials: Optional[auth_credentials.Credentials]=None,
                Optional. Custom credentials to use to retrieve the deployment
                resource pool. If not set, the credentials given to
                `aiplatform.init` will be used.
            machine_type (str):
                Optional. Machine type to use for the deployment resource pool.
                If not set, the default machine type of `n1-standard-2` is
                used.
            min_replica_count (int):
                Optional. The minimum replica count of the new deployment
                resource pool. Each replica serves a copy of each model deployed
                on the deployment resource pool. If this value is less than
                `max_replica_count`, then autoscaling is enabled, and the actual
                number of replicas will be adjusted to bring resource usage in
                line with the autoscaling targets.
            max_replica_count (int):
                Optional. The maximum replica count of the new deployment
                resource pool.
            accelerator_type (str):
                Optional. Hardware accelerator type. Must also set accelerator_
                count if used. One of NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
                NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4, or
                NVIDIA_TESLA_A100.
            accelerator_count (int):
                Optional. The number of accelerators attached to each replica.
            reservation_affinity_type (str):
                Optional. The type of reservation affinity.
                One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
                SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
            reservation_affinity_key (str):
                Optional. Corresponds to the label key of a reservation resource.
                To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
                and specify the name of your reservation as its value.
            reservation_affinity_values (List[str]):
                Optional. Corresponds to the label values of a reservation resource.
                This must be the full resource name of the reservation.
                Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
            autoscaling_target_cpu_utilization (int):
                Optional. Target CPU utilization value for autoscaling. A
                default value of 60 will be used if not specified.
            autoscaling_target_accelerator_duty_cycle (int):
                Optional. Target accelerator duty cycle percentage to use for
                autoscaling. Must also set accelerator_type and accelerator
                count if specified. A default value of 60 will be used if
                accelerators are requested and this is not specified.
            spot (bool):
                Optional. Whether to schedule the deployment workload on spot VMs.
            sync (bool):
                Optional. Whether to execute this method synchronously. If
                False, this method will be executed in a concurrent Future and
                any downstream object will be immediately returned and synced
                when the Future has completed.
            create_request_timeout (float):
                Optional. The create request timeout in seconds.
            required_replica_count (int):
                Optional. Number of required available replicas for the
                deployment to succeed. This field is only needed when partial
                model deployment/mutation is desired, with a value greater than
                or equal to 1 and fewer than or equal to min_replica_count. If
                set, the model deploy/mutate operation will succeed once
                available_replica_count reaches required_replica_count, and the
                rest of the replicas will be retried.

        Returns:
            DeploymentResourcePool
        """

        parent = initializer.global_config.common_location_path(
            project=project, location=location
        )

        dedicated_resources = gca_machine_resources_compat.DedicatedResources(
            min_replica_count=min_replica_count,
            max_replica_count=max_replica_count,
            spot=spot,
            required_replica_count=required_replica_count,
        )

        machine_spec = gca_machine_resources_compat.MachineSpec(
            machine_type=machine_type
        )

        if autoscaling_target_cpu_utilization:
            autoscaling_metric_spec = (
                gca_machine_resources_compat.AutoscalingMetricSpec(
                    metric_name=(
                        "aiplatform.googleapis.com/prediction/online/cpu/utilization"
                    ),
                    target=autoscaling_target_cpu_utilization,
                )
            )
            dedicated_resources.autoscaling_metric_specs.extend(
                [autoscaling_metric_spec]
            )

        if accelerator_type and accelerator_count:
            utils.validate_accelerator_type(accelerator_type)
            machine_spec.accelerator_type = accelerator_type
            machine_spec.accelerator_count = accelerator_count

            if autoscaling_target_accelerator_duty_cycle:
                autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
                    metric_name="aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle",
                    target=autoscaling_target_accelerator_duty_cycle,
                )
                dedicated_resources.autoscaling_metric_specs.extend(
                    [autoscaling_metric_spec]
                )

        if reservation_affinity_type:
            machine_spec.reservation_affinity = utils.get_reservation_affinity(
                reservation_affinity_type,
                reservation_affinity_key,
                reservation_affinity_values,
            )

        dedicated_resources.machine_spec = machine_spec

        gapic_drp = gca_deployment_resource_pool_compat.DeploymentResourcePool(
            dedicated_resources=dedicated_resources
        )

        operation_future = api_client.create_deployment_resource_pool(
            parent=parent,
            deployment_resource_pool=gapic_drp,
            deployment_resource_pool_id=deployment_resource_pool_id,
            metadata=metadata,
            timeout=create_request_timeout,
        )

        _LOGGER.log_create_with_lro(cls, operation_future)

        created_drp = operation_future.result()

        _LOGGER.log_create_complete(cls, created_drp, "deployment resource pool")

        return cls._construct_sdk_resource_from_gapic(
            gapic_resource=created_drp,
            project=project,
            location=location,
            credentials=credentials,
        )

    def query_deployed_models(
        self,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ) -> List[gca_deployed_model_ref_compat.DeployedModelRef]:
        """Lists the deployed models using this resource pool.

        Args:
            project (str):
                Optional. Project to retrieve list from. If not set, project
                set in aiplatform.init will be used.
            location (str):
                Optional. Location to retrieve list from. If not set, location
                set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to retrieve list. Overrides
                credentials set in aiplatform.init.

        Returns:
            List of DeployedModelRef objects containing the endpoint ID and
            deployed model ID of the deployed models using this resource pool.
        """
        location = location or initializer.global_config.location
        api_client = DeploymentResourcePool._instantiate_client(
            location=location, credentials=credentials
        )
        response = api_client.query_deployed_models(
            deployment_resource_pool=self.resource_name
        )
        return list(
            itertools.chain(page.deployed_model_refs for page in response.pages)
        )

    @classmethod
    def list(
        cls,
        filter: Optional[str] = None,
        order_by: Optional[str] = None,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ) -> List["models.DeploymentResourcePool"]:
        """Lists the deployment resource pools.

        filter (str):
            Optional. An expression for filtering the results of the request.
            For field names both snake_case and camelCase are supported.
        order_by (str):
            Optional. A comma-separated list of fields to order by, sorted in
            ascending order. Use "desc" after a field name for descending.
            Supported fields: `display_name`, `create_time`, `update_time`
        project (str):
            Optional. Project to retrieve list from. If not set, project
            set in aiplatform.init will be used.
        location (str):
            Optional. Location to retrieve list from. If not set, location
            set in aiplatform.init will be used.
        credentials (auth_credentials.Credentials):
            Optional. Custom credentials to use to retrieve list. Overrides
            credentials set in aiplatform.init.

        Returns:
            List of deployment resource pools.
        """
        return cls._list(
            filter=filter,
            order_by=order_by,
            project=project,
            location=location,
            credentials=credentials,
        )


class Endpoint(base.VertexAiResourceNounWithFutureManager, base.PreviewMixin):
    client_class = utils.EndpointClientWithOverride
    _resource_noun = "endpoints"
    _getter_method = "get_endpoint"
    _list_method = "list_endpoints"
    _delete_method = "delete_endpoint"
    _parse_resource_name_method = "parse_endpoint_path"
    _format_resource_name_method = "endpoint_path"
    _preview_class = "google.cloud.aiplatform.aiplatform.preview.models.Endpoint"

    @property
    def preview(self):
        """Return an Endpoint instance with preview features enabled."""
        from google.cloud.aiplatform.preview import models as preview_models

        if not hasattr(self, "_preview_instance"):
            self._preview_instance = preview_models.Endpoint(
                self.resource_name, credentials=self.credentials
            )

        return self._preview_instance

    def __init__(
        self,
        endpoint_name: str,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ):
        """Retrieves an endpoint resource.

        Args:
            endpoint_name (str):
                Required. A fully-qualified endpoint resource name or endpoint ID.
                Example: "projects/123/locations/us-central1/endpoints/456" or
                "456" when project and location are initialized or passed.
            project (str):
                Optional. Project to retrieve endpoint from. If not set, project
                set in aiplatform.init will be used.
            location (str):
                Optional. Location to retrieve endpoint from. If not set, location
                set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to upload this model. Overrides
                credentials set in aiplatform.init.
        """

        super().__init__(
            project=project,
            location=location,
            credentials=credentials,
            resource_name=endpoint_name,
        )

        endpoint_name = utils.full_resource_name(
            resource_name=endpoint_name,
            resource_noun="endpoints",
            parse_resource_name_method=self._parse_resource_name,
            format_resource_name_method=self._format_resource_name,
            project=project,
            location=location,
        )

        # Lazy load the Endpoint gca_resource until needed
        self._gca_resource = gca_endpoint_compat.Endpoint(name=endpoint_name)

        self.authorized_session = None
        self.raw_predict_request_url = None
        self.stream_raw_predict_request_url = None

    @property
    def _prediction_client(self) -> utils.PredictionClientWithOverride:
        # The attribute might not exist due to issues in
        # `VertexAiResourceNounWithFutureManager._sync_object_with_future_result`
        # We should switch to @functools.cached_property once its available.
        if not getattr(self, "_prediction_client_value", None):
            self._prediction_client_value = initializer.global_config.create_client(
                client_class=utils.PredictionClientWithOverride,
                credentials=self.credentials,
                location_override=self.location,
                prediction_client=True,
            )
        return self._prediction_client_value

    @property
    def _prediction_async_client(self) -> utils.PredictionAsyncClientWithOverride:
        # The attribute might not exist due to issues in
        # `VertexAiResourceNounWithFutureManager._sync_object_with_future_result`
        # We should switch to @functools.cached_property once its available.
        if not getattr(self, "_prediction_async_client_value", None):
            self._prediction_async_client_value = (
                initializer.global_config.create_client(
                    client_class=utils.PredictionAsyncClientWithOverride,
                    credentials=self.credentials,
                    location_override=self.location,
                    prediction_client=True,
                )
            )
        return self._prediction_async_client_value

    def _skipped_getter_call(self) -> bool:
        """Check if GAPIC resource was populated by call to get/list API methods

        Returns False if `_gca_resource` is None or fully populated. Returns True
        if `_gca_resource` is partially populated
        """
        return self._gca_resource and not self._gca_resource.create_time

    def _sync_gca_resource_if_skipped(self) -> None:
        """Sync GAPIC service representation of Endpoint class resource only if
        get_endpoint() was never called."""
        if self._skipped_getter_call():
            self._gca_resource = self._get_gca_resource(
                resource_name=self._gca_resource.name
            )

    def _assert_gca_resource_is_available(self) -> None:
        """Ensures Endpoint getter was called at least once before
        asserting on gca_resource's availability."""
        super()._assert_gca_resource_is_available()
        self._sync_gca_resource_if_skipped()

    @property
    def traffic_split(self) -> Dict[str, int]:
        """A map from a DeployedModel's ID to the percentage of this Endpoint's
        traffic that should be forwarded to that DeployedModel.

        If a DeployedModel's ID is not listed in this map, then it receives no traffic.

        The traffic percentage values must add up to 100, or map must be empty if
        the Endpoint is to not accept any traffic at a moment.
        """
        self._sync_gca_resource()
        return dict(self._gca_resource.traffic_split)

    @property
    def network(self) -> Optional[str]:
        """The full name of the Google Compute Engine
        [network](https://cloud.google.com/vpc/docs/vpc#networks) to which this
        Endpoint should be peered.

        Takes the format `projects/{project}/global/networks/{network}`. Where
        {project} is a project number, as in `12345`, and {network} is a network name.

        Private services access must already be configured for the network. If left
        unspecified, the Endpoint is not peered with any network.
        """
        self._assert_gca_resource_is_available()
        return getattr(self._gca_resource, "network", None)

    @property
    def private_service_connect_config(
        self,
    ) -> Optional[gca_service_networking.PrivateServiceConnectConfig]:
        """The Private Service Connect configuration for this Endpoint."""
        self._assert_gca_resource_is_available()
        return self._gca_resource.private_service_connect_config

    @classmethod
    def create(
        cls,
        display_name: Optional[str] = None,
        description: Optional[str] = None,
        labels: Optional[Dict[str, str]] = None,
        metadata: Optional[Sequence[Tuple[str, str]]] = (),
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
        encryption_spec_key_name: Optional[str] = None,
        sync=True,
        create_request_timeout: Optional[float] = None,
        endpoint_id: Optional[str] = None,
        enable_request_response_logging=False,
        request_response_logging_sampling_rate: Optional[float] = None,
        request_response_logging_bq_destination_table: Optional[str] = None,
        dedicated_endpoint_enabled=False,
        inference_timeout: Optional[int] = None,
    ) -> "Endpoint":
        """Creates a new endpoint.

        Args:
            display_name (str):
                Optional. The user-defined name of the Endpoint.
                The name can be up to 128 characters long and can be consist
                of any UTF-8 characters.
            description (str):
                Optional. The description of the Endpoint.
            labels (Dict[str, str]):
                Optional. The labels with user-defined metadata to
                organize your Endpoints.
                Label keys and values can be no longer than 64
                characters (Unicode codepoints), can only
                contain lowercase letters, numeric characters,
                underscores and dashes. International characters
                are allowed.
                See https://goo.gl/xmQnxf for more information
                and examples of labels.
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
            project (str):
                Optional. Project to retrieve endpoint from. If not set, project
                set in aiplatform.init will be used.
            location (str):
                Optional. Location to retrieve endpoint from. If not set, location
                set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to upload this model. Overrides
                credentials set in aiplatform.init.
            encryption_spec_key_name (str):
                Optional. The Cloud KMS resource identifier of the customer
                managed encryption key used to protect the model. Has the
                form:
                ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
                The key needs to be in the same region as where the compute
                resource is created.

                If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key.

                Overrides encryption_spec_key_name set in aiplatform.init.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
            create_request_timeout (float):
                Optional. The timeout for the create request in seconds.
            endpoint_id (str):
                Optional. The ID to use for endpoint, which will become
                the final component of the endpoint resource name. If
                not provided, Vertex AI will generate a value for this
                ID.

                This value should be 1-10 characters, and valid
                characters are /[0-9]/. When using HTTP/JSON, this field
                is populated based on a query string argument, such as
                ``?endpoint_id=12345``. This is the fallback for fields
                that are not included in either the URI or the body.
            enable_request_response_logging (bool):
                Optional. Whether to enable request & response logging for this endpoint.
            request_response_logging_sampling_rate (float):
                Optional. The request response logging sampling rate. If not set, default is 0.0.
            request_response_logging_bq_destination_table (str):
                Optional. The request response logging bigquery destination. If not set, will create a table with name:
                ``bq://{project_id}.logging_{endpoint_display_name}_{endpoint_id}.request_response_logging``.
            dedicated_endpoint_enabled (bool):
                Optional. If enabled, a dedicated dns will be created and your
                traffic will be fully isolated from other customers' traffic and
                latency will be reduced.
            inference_timeout (int):
                Optional. It defines the prediction timeout, in seconds, for online predictions using cloud-based endpoints. This applies to either PSC endpoints, when private_service_connect_config is set, or dedicated endpoints, when dedicated_endpoint_enabled is true.

        Returns:
            endpoint (aiplatform.Endpoint):
                Created endpoint.
        """
        api_client = cls._instantiate_client(location=location, credentials=credentials)

        if not display_name:
            display_name = cls._generate_display_name()

        utils.validate_display_name(display_name)
        if labels:
            utils.validate_labels(labels)

        project = project or initializer.global_config.project
        location = location or initializer.global_config.location

        predict_request_response_logging_config = None
        if enable_request_response_logging:
            predict_request_response_logging_config = (
                gca_endpoint_compat.PredictRequestResponseLoggingConfig(
                    enabled=True,
                    sampling_rate=request_response_logging_sampling_rate,
                    bigquery_destination=gca_io_compat.BigQueryDestination(
                        output_uri=request_response_logging_bq_destination_table
                    ),
                )
            )

        client_connection_config = None
        if (
            inference_timeout is not None
            and inference_timeout > 0
            and dedicated_endpoint_enabled
        ):
            client_connection_config = gca_endpoint_compat.ClientConnectionConfig(
                inference_timeout=duration_pb2.Duration(seconds=inference_timeout)
            )

        return cls._create(
            api_client=api_client,
            display_name=display_name,
            project=project,
            location=location,
            description=description,
            labels=labels,
            metadata=metadata,
            credentials=credentials,
            encryption_spec=initializer.global_config.get_encryption_spec(
                encryption_spec_key_name=encryption_spec_key_name
            ),
            sync=sync,
            create_request_timeout=create_request_timeout,
            endpoint_id=endpoint_id,
            predict_request_response_logging_config=predict_request_response_logging_config,
            dedicated_endpoint_enabled=dedicated_endpoint_enabled,
            client_connection_config=client_connection_config,
        )

    @classmethod
    @base.optional_sync()
    def _create(
        cls,
        api_client: endpoint_service_client.EndpointServiceClient,
        display_name: str,
        project: str,
        location: str,
        description: Optional[str] = None,
        labels: Optional[Dict[str, str]] = None,
        metadata: Optional[Sequence[Tuple[str, str]]] = (),
        credentials: Optional[auth_credentials.Credentials] = None,
        encryption_spec: Optional[gca_encryption_spec.EncryptionSpec] = None,
        network: Optional[str] = None,
        sync=True,
        create_request_timeout: Optional[float] = None,
        endpoint_id: Optional[str] = None,
        predict_request_response_logging_config: Optional[
            gca_endpoint_compat.PredictRequestResponseLoggingConfig
        ] = None,
        private_service_connect_config: Optional[
            gca_service_networking.PrivateServiceConnectConfig
        ] = None,
        dedicated_endpoint_enabled=False,
        client_connection_config: Optional[
            gca_endpoint_compat.ClientConnectionConfig
        ] = None,
    ) -> "Endpoint":
        """Creates a new endpoint by calling the API client.

        Args:
            api_client (EndpointServiceClient):
                Required. An instance of EndpointServiceClient with the correct
                api_endpoint already set based on user's preferences.
            display_name (str):
                Required. The user-defined name of the Endpoint.
                The name can be up to 128 characters long and can be consist
                of any UTF-8 characters.
            project (str):
                Required. Project to retrieve endpoint from.
            location (str):
                Required. Location to retrieve endpoint from.
            description (str):
                Optional. The description of the Endpoint.
            labels (Dict[str, str]):
                Optional. The labels with user-defined metadata to
                organize your Endpoints.
                Label keys and values can be no longer than 64
                characters (Unicode codepoints), can only
                contain lowercase letters, numeric characters,
                underscores and dashes. International characters
                are allowed.
                See https://goo.gl/xmQnxf for more information
                and examples of labels.
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to upload this model. Overrides
                credentials set in aiplatform.init.
            encryption_spec (gca_encryption_spec.EncryptionSpec):
                Optional. The Cloud KMS customer managed encryption key used to protect the dataset.
                The key needs to be in the same region as where the compute
                resource is created.

                If set, this Dataset and all sub-resources of this Dataset will be secured by this key.
            network (str):
                Optional. The full name of the Compute Engine network to which
                this Endpoint will be peered. E.g. "projects/12345/global/networks/myVPC".
                Private services access must already be configured for the network.
                Cannot be specified when private_service_connect is enabled.
                Read more about PrivateEndpoints
                [in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints)
            sync (bool):
                Whether to create this endpoint synchronously.
            create_request_timeout (float):
                Optional. The timeout for the create request in seconds.
            endpoint_id (str):
                Optional. The ID to use for endpoint, which will become
                the final component of the endpoint resource name. If
                not provided, Vertex AI will generate a value for this
                ID.

                This value should be 1-10 characters, and valid
                characters are /[0-9]/. When using HTTP/JSON, this field
                is populated based on a query string argument, such as
                ``?endpoint_id=12345``. This is the fallback for fields
                that are not included in either the URI or the body.
            predict_request_response_logging_config (aiplatform.endpoint.PredictRequestResponseLoggingConfig):
                Optional. The request response logging configuration for online prediction.
            private_service_connect_config (aiplatform.service_network.PrivateServiceConnectConfig):
                If enabled, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect).
                Cannot be enabled when network is specified.
            dedicated_endpoint_enabled (bool):
                Optional. If enabled, a dedicated dns will be created and your
                traffic will be fully isolated from other customers' traffic and
                latency will be reduced.
            client_connection_config (aiplatform.endpoint.ClientConnectionConfig):
                Optional. The inference timeout which is applied on cloud-based (PSC, or dedicated) endpoints for online prediction.

        Returns:
            endpoint (aiplatform.Endpoint):
                Created endpoint.
        """

        parent = initializer.global_config.common_location_path(
            project=project, location=location
        )

        gapic_endpoint = gca_endpoint_compat.Endpoint(
            display_name=display_name,
            description=description,
            labels=labels,
            encryption_spec=encryption_spec,
            network=network,
            predict_request_response_logging_config=predict_request_response_logging_config,
            private_service_connect_config=private_service_connect_config,
            dedicated_endpoint_enabled=dedicated_endpoint_enabled,
            client_connection_config=client_connection_config,
        )

        operation_future = api_client.create_endpoint(
            parent=parent,
            endpoint=gapic_endpoint,
            endpoint_id=endpoint_id,
            metadata=metadata,
            timeout=create_request_timeout,
        )

        _LOGGER.log_create_with_lro(cls, operation_future)

        created_endpoint = operation_future.result()

        _LOGGER.log_create_complete(cls, created_endpoint, "endpoint")

        return cls._construct_sdk_resource_from_gapic(
            gapic_resource=created_endpoint,
            project=project,
            location=location,
            credentials=credentials,
        )

    @classmethod
    def _construct_sdk_resource_from_gapic(
        cls,
        gapic_resource: proto.Message,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ) -> "Endpoint":
        """Given a GAPIC Endpoint object, return the SDK representation.

        Args:
            gapic_resource (proto.Message):
                A GAPIC representation of a Endpoint resource, usually
                retrieved by a get_* or in a list_* API call.
            project (str):
                Optional. Project to construct Endpoint object from. If not set,
                project set in aiplatform.init will be used.
            location (str):
                Optional. Location to construct Endpoint object from. If not set,
                location set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to construct Endpoint.
                Overrides credentials set in aiplatform.init.

        Returns:
            Endpoint (aiplatform.Endpoint):
                An initialized Endpoint resource.
        """
        endpoint = super()._construct_sdk_resource_from_gapic(
            gapic_resource=gapic_resource,
            project=project,
            location=location,
            credentials=credentials,
        )
        endpoint.authorized_session = None
        endpoint.raw_predict_request_url = None
        endpoint.stream_raw_predict_request_url = None

        return endpoint

    @staticmethod
    def _allocate_traffic(
        traffic_split: Dict[str, int],
        traffic_percentage: int,
    ) -> Dict[str, int]:
        """Allocates desired traffic to new deployed model and scales traffic
        of older deployed models.

        Args:
            traffic_split (Dict[str, int]):
                Required. Current traffic split of deployed models in endpoint.
            traffic_percentage (int):
                Required. Desired traffic to new deployed model.

        Returns:
            new_traffic_split (Dict[str, int]):
                Traffic split to use.
        """
        new_traffic_split = {}
        old_models_traffic = 100 - traffic_percentage
        if old_models_traffic:
            unallocated_traffic = old_models_traffic
            for deployed_model in traffic_split:
                current_traffic = traffic_split[deployed_model]
                new_traffic = int(current_traffic / 100 * old_models_traffic)
                new_traffic_split[deployed_model] = new_traffic
                unallocated_traffic -= new_traffic
            # will likely under-allocate. make total 100.
            for deployed_model in new_traffic_split:
                if unallocated_traffic == 0:
                    break
                new_traffic_split[deployed_model] += 1
                unallocated_traffic -= 1

        new_traffic_split[_DEPLOYING_MODEL_TRAFFIC_SPLIT_KEY] = traffic_percentage

        return new_traffic_split

    @staticmethod
    def _unallocate_traffic(
        traffic_split: Dict[str, int],
        deployed_model_id: str,
    ) -> Dict[str, int]:
        """Sets deployed model id's traffic to 0 and scales the traffic of
        other deployed models.

        Args:
            traffic_split (Dict[str, int]):
                Required. Current traffic split of deployed models in endpoint.
            deployed_model_id (str):
                Required. Desired traffic to new deployed model.

        Returns:
            new_traffic_split (Dict[str, int]):
                Traffic split to use.
        """
        new_traffic_split = traffic_split.copy()
        del new_traffic_split[deployed_model_id]
        deployed_model_id_traffic = traffic_split[deployed_model_id]
        traffic_percent_left = 100 - deployed_model_id_traffic

        if traffic_percent_left:
            unallocated_traffic = 100
            for deployed_model in new_traffic_split:
                current_traffic = traffic_split[deployed_model]
                new_traffic = int(current_traffic / traffic_percent_left * 100)
                new_traffic_split[deployed_model] = new_traffic
                unallocated_traffic -= new_traffic
            # will likely under-allocate. make total 100.
            for deployed_model in new_traffic_split:
                if unallocated_traffic == 0:
                    break
                new_traffic_split[deployed_model] += 1
                unallocated_traffic -= 1

        new_traffic_split[deployed_model_id] = 0

        return new_traffic_split

    @staticmethod
    def _validate_deploy_args(
        min_replica_count: Optional[int],
        max_replica_count: Optional[int],
        accelerator_type: Optional[str],
        deployed_model_display_name: Optional[str],
        traffic_split: Optional[Dict[str, int]],
        traffic_percentage: Optional[int],
        deployment_resource_pool: Optional[DeploymentResourcePool],
        required_replica_count: Optional[int],
    ):
        """Helper method to validate deploy arguments.

        Args:
            min_replica_count (int):
                Required. The minimum number of machine replicas this deployed
                model will be always deployed on. If traffic against it increases,
                it may dynamically be deployed onto more replicas, and as traffic
                decreases, some of these extra replicas may be freed.
            max_replica_count (int):
                Required. The maximum number of replicas this deployed model may
                be deployed on when the traffic against it increases. If requested
                value is too large, the deployment will error, but if deployment
                succeeds then the ability to scale the model to that many replicas
                is guaranteed (barring service outages). If traffic against the
                deployed model increases beyond what its replicas at maximum may
                handle, a portion of the traffic will be dropped. If this value
                is not provided, the larger value of min_replica_count or 1 will
                be used. If value provided is smaller than min_replica_count, it
                will automatically be increased to be min_replica_count.
            accelerator_type (str):
                Required. Hardware accelerator type. One of ACCELERATOR_TYPE_UNSPECIFIED,
                NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4,
                NVIDIA_TESLA_T4
            deployed_model_display_name (str):
                Required. The display name of the DeployedModel. If not provided
                upon creation, the Model's display_name is used.
            traffic_split (Dict[str, int]):
                Optional. A map from a DeployedModel's ID to the percentage of
                this Endpoint's traffic that should be forwarded to that DeployedModel.
                If a DeployedModel's ID is not listed in this map, then it receives
                no traffic. The traffic percentage values must add up to 100, or
                map must be empty if the Endpoint is to not accept any traffic at
                the moment. Key for model being deployed is "0". Should not be
                provided if traffic_percentage is provided.
            traffic_percentage (int):
                Optional. Desired traffic to newly deployed model. Defaults to
                0 if there are pre-existing deployed models. Defaults to 100 if
                there are no pre-existing deployed models. Negative values should
                not be provided. Traffic of previously deployed models at the endpoint
                will be scaled down to accommodate new deployed model's traffic.
                Should not be provided if traffic_split is provided.
            deployment_resource_pool (DeploymentResourcePool): Optional.
                Resource pool where the model will be deployed. All models that
                are deployed to the same DeploymentResourcePool will be hosted in
                a shared model server. If provided, will override replica count
                arguments.
            required_replica_count (int):
                Optional. Number of required available replicas for the
                deployment to succeed. This field is only needed when partial
                model deployment/mutation is desired, with a value greater than
                or equal to 1 and fewer than or equal to min_replica_count. If
                set, the model deploy/mutate operation will succeed once
                available_replica_count reaches required_replica_count, and the
                rest of the replicas will be retried.

        Raises:
            ValueError: if Min or Max replica is negative. Traffic percentage > 100 or
                < 0. Or if traffic_split does not sum to 100.
        """
        if deployment_resource_pool:
            # Validate that replica count and deployment resource pool are not
            # both specified.
            if (
                min_replica_count
                and min_replica_count != 1
                or max_replica_count
                and max_replica_count != 1
                or required_replica_count
                and required_replica_count != 0
            ):
                raise ValueError(
                    "Ignoring explicitly specified replica counts, "
                    "since deployment_resource_pool was also given."
                )
            if accelerator_type:
                raise ValueError(
                    "Conflicting deployment parameters were given."
                    "deployment_resource_pool may not be specified at the same"
                    "time as accelerator_type. "
                )
        else:
            # Validate that a non-negative replica count is given, and validate
            # the accelerator type.
            if min_replica_count < 0:
                raise ValueError("Min replica cannot be negative.")
            if max_replica_count < 0:
                raise ValueError("Max replica cannot be negative.")
            if required_replica_count and required_replica_count < 0:
                raise ValueError("Required replica cannot be negative.")
            if accelerator_type:
                utils.validate_accelerator_type(accelerator_type)

        if deployed_model_display_name is not None:
            utils.validate_display_name(deployed_model_display_name)

        if traffic_split is None:
            if traffic_percentage > 100:
                raise ValueError("Traffic percentage cannot be greater than 100.")
            if traffic_percentage < 0:
                raise ValueError("Traffic percentage cannot be negative.")

        elif traffic_split:
            if sum(traffic_split.values()) != 100:
                raise ValueError(
                    "Sum of all traffic within traffic split needs to be 100."
                )

    def deploy(
        self,
        model: "Model",
        deployed_model_display_name: Optional[str] = None,
        traffic_percentage: int = 0,
        traffic_split: Optional[Dict[str, int]] = None,
        machine_type: Optional[str] = None,
        min_replica_count: int = 1,
        max_replica_count: int = 1,
        accelerator_type: Optional[str] = None,
        accelerator_count: Optional[int] = None,
        tpu_topology: Optional[str] = None,
        service_account: Optional[str] = None,
        explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
        explanation_parameters: Optional[
            aiplatform.explain.ExplanationParameters
        ] = None,
        metadata: Optional[Sequence[Tuple[str, str]]] = (),
        sync=True,
        deploy_request_timeout: Optional[float] = None,
        autoscaling_target_cpu_utilization: Optional[int] = None,
        autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
        enable_access_logging=False,
        disable_container_logging: bool = False,
        deployment_resource_pool: Optional[DeploymentResourcePool] = None,
        reservation_affinity_type: Optional[str] = None,
        reservation_affinity_key: Optional[str] = None,
        reservation_affinity_values: Optional[List[str]] = None,
        spot: bool = False,
        fast_tryout_enabled: bool = False,
        system_labels: Optional[Dict[str, str]] = None,
        required_replica_count: Optional[int] = 0,
    ) -> None:
        """Deploys a Model to the Endpoint.

        Args:
            model (aiplatform.Model):
                Required. Model to be deployed.
            deployed_model_display_name (str):
                Optional. The display name of the DeployedModel. If not provided
                upon creation, the Model's display_name is used.
            traffic_percentage (int):
                Optional. Desired traffic to newly deployed model. Defaults to
                0 if there are pre-existing deployed models. Defaults to 100 if
                there are no pre-existing deployed models. Negative values should
                not be provided. Traffic of previously deployed models at the endpoint
                will be scaled down to accommodate new deployed model's traffic.
                Should not be provided if traffic_split is provided.
            traffic_split (Dict[str, int]):
                Optional. A map from a DeployedModel's ID to the percentage of
                this Endpoint's traffic that should be forwarded to that DeployedModel.
                If a DeployedModel's ID is not listed in this map, then it receives
                no traffic. The traffic percentage values must add up to 100, or
                map must be empty if the Endpoint is to not accept any traffic at
                the moment. Key for model being deployed is "0". Should not be
                provided if traffic_percentage is provided.
            machine_type (str):
                Optional. The type of machine. Not specifying machine type will
                result in model to be deployed with automatic resources.
            min_replica_count (int):
                Optional. The minimum number of machine replicas this deployed
                model will be always deployed on. If traffic against it increases,
                it may dynamically be deployed onto more replicas, and as traffic
                decreases, some of these extra replicas may be freed.
            max_replica_count (int):
                Optional. The maximum number of replicas this deployed model may
                be deployed on when the traffic against it increases. If requested
                value is too large, the deployment will error, but if deployment
                succeeds then the ability to scale the model to that many replicas
                is guaranteed (barring service outages). If traffic against the
                deployed model increases beyond what its replicas at maximum may
                handle, a portion of the traffic will be dropped. If this value
                is not provided, the larger value of min_replica_count or 1 will
                be used. If value provided is smaller than min_replica_count, it
                will automatically be increased to be min_replica_count.
            accelerator_type (str):
                Optional. Hardware accelerator type. Must also set accelerator_count if used.
                One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
                NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
            accelerator_count (int):
                Optional. The number of accelerators to attach to a worker replica.
            tpu_topology (str):
                Optional. The TPU topology to use for the DeployedModel.
                Required for CloudTPU multihost deployments.
            service_account (str):
                The service account that the DeployedModel's container runs as. Specify the
                email address of the service account. If this service account is not
                specified, the container runs as a service account that doesn't have access
                to the resource project.
                Users deploying the Model must have the `iam.serviceAccounts.actAs`
                permission on this service account.
            explanation_metadata (aiplatform.explain.ExplanationMetadata):
                Optional. Metadata describing the Model's input and output for explanation.
                `explanation_metadata` is optional while `explanation_parameters` must be
                specified when used.
                For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
            explanation_parameters (aiplatform.explain.ExplanationParameters):
                Optional. Parameters to configure explaining for Model's predictions.
                For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
            deploy_request_timeout (float):
                Optional. The timeout for the deploy request in seconds.
            autoscaling_target_cpu_utilization (int):
                Target CPU Utilization to use for Autoscaling Replicas.
                A default value of 60 will be used if not specified.
            autoscaling_target_accelerator_duty_cycle (int):
                Target Accelerator Duty Cycle.
                Must also set accelerator_type and accelerator_count if specified.
                A default value of 60 will be used if not specified.
            enable_access_logging (bool):
                Whether to enable endpoint access logging. Defaults to False.
            disable_container_logging (bool):
                If True, container logs from the deployed model will not be
                written to Cloud Logging. Defaults to False.
            deployment_resource_pool (DeploymentResourcePool):
                Resource pool where the model will be deployed. All models that
                are deployed to the same DeploymentResourcePool will be hosted in
                a shared model server. If provided, will override replica count
                arguments.
            reservation_affinity_type (str):
                Optional. The type of reservation affinity.
                One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
                SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
            reservation_affinity_key (str):
                Optional. Corresponds to the label key of a reservation resource.
                To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
                and specify the name of your reservation as its value.
            reservation_affinity_values (List[str]):
                Optional. Corresponds to the label values of a reservation resource.
                This must be the full resource name of the reservation.
                Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
            spot (bool):
                Optional. Whether to schedule the deployment workload on spot VMs.
            fast_tryout_enabled (bool):
              Optional. Defaults to False.
              If True, model will be deployed using faster deployment path.
              Useful for quick experiments. Not for production workloads. Only
              available for most popular models with certain machine types.
            system_labels (Dict[str, str]):
                Optional. System labels to apply to Model Garden deployments.
                System labels are managed by Google for internal use only.
            required_replica_count (int):
                Optional. Number of required available replicas for the
                deployment to succeed. This field is only needed when partial
                model deployment/mutation is desired, with a value greater than
                or equal to 1 and fewer than or equal to min_replica_count. If
                set, the model deploy/mutate operation will succeed once
                available_replica_count reaches required_replica_count, and the
                rest of the replicas will be retried.
        """
        self._sync_gca_resource_if_skipped()

        self._validate_deploy_args(
            min_replica_count=min_replica_count,
            max_replica_count=max_replica_count,
            accelerator_type=accelerator_type,
            deployed_model_display_name=deployed_model_display_name,
            traffic_split=traffic_split,
            traffic_percentage=traffic_percentage,
            deployment_resource_pool=deployment_resource_pool,
            required_replica_count=required_replica_count,
        )

        explanation_spec = _explanation_utils.create_and_validate_explanation_spec(
            explanation_metadata=explanation_metadata,
            explanation_parameters=explanation_parameters,
        )

        self._deploy(
            model=model,
            deployed_model_display_name=deployed_model_display_name,
            traffic_percentage=traffic_percentage,
            traffic_split=traffic_split,
            machine_type=machine_type,
            min_replica_count=min_replica_count,
            max_replica_count=max_replica_count,
            accelerator_type=accelerator_type,
            accelerator_count=accelerator_count,
            tpu_topology=tpu_topology,
            reservation_affinity_type=reservation_affinity_type,
            reservation_affinity_key=reservation_affinity_key,
            reservation_affinity_values=reservation_affinity_values,
            service_account=service_account,
            explanation_spec=explanation_spec,
            metadata=metadata,
            sync=sync,
            deploy_request_timeout=deploy_request_timeout,
            autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
            autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
            spot=spot,
            enable_access_logging=enable_access_logging,
            disable_container_logging=disable_container_logging,
            deployment_resource_pool=deployment_resource_pool,
            fast_tryout_enabled=fast_tryout_enabled,
            system_labels=system_labels,
            required_replica_count=required_replica_count,
        )

    @base.optional_sync()
    def _deploy(
        self,
        model: "Model",
        deployed_model_display_name: Optional[str] = None,
        traffic_percentage: Optional[int] = 0,
        traffic_split: Optional[Dict[str, int]] = None,
        machine_type: Optional[str] = None,
        min_replica_count: int = 1,
        max_replica_count: int = 1,
        accelerator_type: Optional[str] = None,
        accelerator_count: Optional[int] = None,
        tpu_topology: Optional[str] = None,
        reservation_affinity_type: Optional[str] = None,
        reservation_affinity_key: Optional[str] = None,
        reservation_affinity_values: Optional[List[str]] = None,
        service_account: Optional[str] = None,
        explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
        metadata: Optional[Sequence[Tuple[str, str]]] = (),
        sync=True,
        deploy_request_timeout: Optional[float] = None,
        autoscaling_target_cpu_utilization: Optional[int] = None,
        autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
        spot: bool = False,
        enable_access_logging=False,
        disable_container_logging: bool = False,
        deployment_resource_pool: Optional[DeploymentResourcePool] = None,
        fast_tryout_enabled: bool = False,
        system_labels: Optional[Dict[str, str]] = None,
        required_replica_count: Optional[int] = 0,
    ) -> None:
        """Deploys a Model to the Endpoint.

        Args:
            model (aiplatform.Model):
                Required. Model to be deployed.
            deployed_model_display_name (str):
                Optional. The display name of the DeployedModel. If not provided
                upon creation, the Model's display_name is used.
            traffic_percentage (int):
                Optional. Desired traffic to newly deployed model. Defaults to
                0 if there are pre-existing deployed models. Defaults to 100 if
                there are no pre-existing deployed models. Negative values should
                not be provided. Traffic of previously deployed models at the endpoint
                will be scaled down to accommodate new deployed model's traffic.
                Should not be provided if traffic_split is provided.
            traffic_split (Dict[str, int]):
                Optional. A map from a DeployedModel's ID to the percentage of
                this Endpoint's traffic that should be forwarded to that DeployedModel.
                If a DeployedModel's ID is not listed in this map, then it receives
                no traffic. The traffic percentage values must add up to 100, or
                map must be empty if the Endpoint is to not accept any traffic at
                the moment. Key for model being deployed is "0". Should not be
                provided if traffic_percentage is provided.
            machine_type (str):
                Optional. The type of machine. Not specifying machine type will
                result in model to be deployed with automatic resources.
            min_replica_count (int):
                Optional. The minimum number of machine replicas this deployed
                model will be always deployed on. If traffic against it increases,
                it may dynamically be deployed onto more replicas, and as traffic
                decreases, some of these extra replicas may be freed.
            max_replica_count (int):
                Optional. The maximum number of replicas this deployed model may
                be deployed on when the traffic against it increases. If requested
                value is too large, the deployment will error, but if deployment
                succeeds then the ability to scale the model to that many replicas
                is guaranteed (barring service outages). If traffic against the
                deployed model increases beyond what its replicas at maximum may
                handle, a portion of the traffic will be dropped. If this value
                is not provided, the larger value of min_replica_count or 1 will
                be used. If value provided is smaller than min_replica_count, it
                will automatically be increased to be min_replica_count.
            accelerator_type (str):
                Optional. Hardware accelerator type. Must also set accelerator_count if used.
                One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
                NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
            accelerator_count (int):
                Optional. The number of accelerators to attach to a worker replica.
            tpu_topology (str):
                Optional. The TPU topology to use for the DeployedModel.
                Required for CloudTPU multihost deployments.
            reservation_affinity_type (str):
                Optional. The type of reservation affinity.
                One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
                SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
            reservation_affinity_key (str):
                Optional. Corresponds to the label key of a reservation resource.
                To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
                and specify the name of your reservation as its value.
            reservation_affinity_values (List[str]):
                Optional. Corresponds to the label values of a reservation resource.
                This must be the full resource name of the reservation.
                Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
            service_account (str):
                The service account that the DeployedModel's container runs as. Specify the
                email address of the service account. If this service account is not
                specified, the container runs as a service account that doesn't have access
                to the resource project.
                Users deploying the Model must have the `iam.serviceAccounts.actAs`
                permission on this service account.
            explanation_spec (aiplatform.explain.ExplanationSpec):
                Optional. Specification of Model explanation.
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
            deploy_request_timeout (float):
                Optional. The timeout for the deploy request in seconds.
            autoscaling_target_cpu_utilization (int):
                Target CPU Utilization to use for Autoscaling Replicas.
                A default value of 60 will be used if not specified.
            autoscaling_target_accelerator_duty_cycle (int):
                Target Accelerator Duty Cycle.
                Must also set accelerator_type and accelerator_count if specified.
                A default value of 60 will be used if not specified.
            spot (bool):
                Optional. Whether to schedule the deployment workload on spot VMs.
            enable_access_logging (bool):
                Whether to enable endpoint access logging. Defaults to False.
            disable_container_logging (bool):
                If True, container logs from the deployed model will not be
                written to Cloud Logging. Defaults to False.
            deployment_resource_pool (DeploymentResourcePool):
                Resource pool where the model will be deployed. All models that
                are deployed to the same DeploymentResourcePool will be hosted in
                a shared model server. If provided, will override replica count
                arguments.
            fast_tryout_enabled (bool):
              Optional. Defaults to False.
              If True, model will be deployed using faster deployment path.
              Useful for quick experiments. Not for production workloads. Only
              available for most popular models with certain machine types.
            system_labels (Dict[str, str]):
                Optional. System labels to apply to Model Garden deployments.
                System labels are managed by Google for internal use only.
            required_replica_count (int):
                Optional. Number of required available replicas for the
                deployment to succeed. This field is only needed when partial
                model deployment/mutation is desired, with a value greater than
                or equal to 1 and fewer than or equal to min_replica_count. If
                set, the model deploy/mutate operation will succeed once
                available_replica_count reaches required_replica_count, and the
                rest of the replicas will be retried.
        """
        _LOGGER.log_action_start_against_resource(
            f"Deploying Model {model.resource_name} to", "", self
        )

        self._deploy_call(
            api_client=self.api_client,
            endpoint_resource_name=self.resource_name,
            model=model,
            endpoint_resource_traffic_split=self._gca_resource.traffic_split,
            network=self.network,
            deployed_model_display_name=deployed_model_display_name,
            traffic_percentage=traffic_percentage,
            traffic_split=traffic_split,
            machine_type=machine_type,
            min_replica_count=min_replica_count,
            max_replica_count=max_replica_count,
            accelerator_type=accelerator_type,
            accelerator_count=accelerator_count,
            tpu_topology=tpu_topology,
            reservation_affinity_type=reservation_affinity_type,
            reservation_affinity_key=reservation_affinity_key,
            reservation_affinity_values=reservation_affinity_values,
            service_account=service_account,
            explanation_spec=explanation_spec,
            metadata=metadata,
            deploy_request_timeout=deploy_request_timeout,
            autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
            autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
            spot=spot,
            enable_access_logging=enable_access_logging,
            disable_container_logging=disable_container_logging,
            deployment_resource_pool=deployment_resource_pool,
            fast_tryout_enabled=fast_tryout_enabled,
            system_labels=system_labels,
            required_replica_count=required_replica_count,
        )

        _LOGGER.log_action_completed_against_resource("model", "deployed", self)

        self._sync_gca_resource()

    @classmethod
    def _deploy_call(
        cls,
        api_client: endpoint_service_client.EndpointServiceClient,
        endpoint_resource_name: str,
        model: "Model",
        endpoint_resource_traffic_split: Optional[proto.MapField] = None,
        network: Optional[str] = None,
        deployed_model_display_name: Optional[str] = None,
        traffic_percentage: Optional[int] = 0,
        traffic_split: Optional[Dict[str, int]] = None,
        machine_type: Optional[str] = None,
        min_replica_count: int = 1,
        max_replica_count: int = 1,
        accelerator_type: Optional[str] = None,
        accelerator_count: Optional[int] = None,
        tpu_topology: Optional[str] = None,
        reservation_affinity_type: Optional[str] = None,
        reservation_affinity_key: Optional[str] = None,
        reservation_affinity_values: Optional[List[str]] = None,
        service_account: Optional[str] = None,
        explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
        metadata: Optional[Sequence[Tuple[str, str]]] = (),
        deploy_request_timeout: Optional[float] = None,
        autoscaling_target_cpu_utilization: Optional[int] = None,
        autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
        autoscaling_target_request_count_per_minute: Optional[int] = None,
        spot: bool = False,
        enable_access_logging=False,
        disable_container_logging: bool = False,
        deployment_resource_pool: Optional[DeploymentResourcePool] = None,
        fast_tryout_enabled: bool = False,
        system_labels: Optional[Dict[str, str]] = None,
        required_replica_count: Optional[int] = 0,
    ) -> None:
        """Helper method to deploy model to endpoint.

        Args:
            api_client (endpoint_service_client.EndpointServiceClient):
                Required. endpoint_service_client.EndpointServiceClient to make call.
            endpoint_resource_name (str):
                Required. Endpoint resource name to deploy model to.
            model (aiplatform.Model):
                Required. Model to be deployed.
            endpoint_resource_traffic_split (proto.MapField):
                Optional. Endpoint current resource traffic split.
            network (str):
                Optional. The full name of the Compute Engine network to which
                this Endpoint will be peered. E.g. "projects/123/global/networks/my_vpc".
                Private services access must already be configured for the network.
            deployed_model_display_name (str):
                Optional. The display name of the DeployedModel. If not provided
                upon creation, the Model's display_name is used.
            traffic_percentage (int):
                Optional. Desired traffic to newly deployed model. Defaults to
                0 if there are pre-existing deployed models. Defaults to 100 if
                there are no pre-existing deployed models. Negative values should
                not be provided. Traffic of previously deployed models at the endpoint
                will be scaled down to accommodate new deployed model's traffic.
                Should not be provided if traffic_split is provided.
            traffic_split (Dict[str, int]):
                Optional. A map from a DeployedModel's ID to the percentage of
                this Endpoint's traffic that should be forwarded to that DeployedModel.
                If a DeployedModel's ID is not listed in this map, then it receives
                no traffic. The traffic percentage values must add up to 100, or
                map must be empty if the Endpoint is to not accept any traffic at
                the moment. Key for model being deployed is "0". Should not be
                provided if traffic_percentage is provided.
            machine_type (str):
                Optional. The type of machine. Not specifying machine type will
                result in model to be deployed with automatic resources.
            min_replica_count (int):
                Optional. The minimum number of machine replicas this deployed
                model will be always deployed on. If traffic against it increases,
                it may dynamically be deployed onto more replicas, and as traffic
                decreases, some of these extra replicas may be freed.
            max_replica_count (int):
                Optional. The maximum number of replicas this deployed model may
                be deployed on when the traffic against it increases. If requested
                value is too large, the deployment will error, but if deployment
                succeeds then the ability to scale the model to that many replicas
                is guaranteed (barring service outages). If traffic against the
                deployed model increases beyond what its replicas at maximum may
                handle, a portion of the traffic will be dropped. If this value
                is not provided, the larger value of min_replica_count or 1 will
                be used. If value provided is smaller than min_replica_count, it
                will automatically be increased to be min_replica_count.
            accelerator_type (str):
                Optional. Hardware accelerator type. Must also set accelerator_count if used.
                One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
                NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
            accelerator_count (int):
                Optional. The number of accelerators to attach to a worker replica.
            tpu_topology (str):
                Optional. The TPU topology to use for the DeployedModel.
                Required for CloudTPU multihost deployments.
            reservation_affinity_type (str):
                Optional. The type of reservation affinity.
                One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
                SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
            reservation_affinity_key (str):
                Optional. Corresponds to the label key of a reservation resource.
                To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
                and specify the name of your reservation as its value.
            reservation_affinity_values (List[str]):
                Optional. Corresponds to the label values of a reservation resource.
                This must be the full resource name of the reservation.
                Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
            service_account (str):
                The service account that the DeployedModel's container runs as. Specify the
                email address of the service account. If this service account is not
                specified, the container runs as a service account that doesn't have access
                to the resource project.
                Users deploying the Model must have the `iam.serviceAccounts.actAs`
                permission on this service account.
                If not specified, uses the service account set in aiplatform.init.
            explanation_spec (aiplatform.explain.ExplanationSpec):
                Optional. Specification of Model explanation.
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
            deploy_request_timeout (float):
                Optional. The timeout for the deploy request in seconds.
            autoscaling_target_cpu_utilization (int):
                Optional. Target CPU Utilization to use for Autoscaling Replicas.
                A default value of 60 will be used if not specified.
            autoscaling_target_accelerator_duty_cycle (int):
                Optional. Target Accelerator Duty Cycle.
                Must also set accelerator_type and accelerator_count if specified.
                A default value of 60 will be used if not specified.
            autoscaling_target_request_count_per_minute (int):
                Optional. Target request count per minute per instance.
            spot (bool):
                Optional. Whether to schedule the deployment workload on spot VMs.
            enable_access_logging (bool):
                Whether to enable endpoint access logging. Defaults to False.
            disable_container_logging (bool):
                If True, container logs from the deployed model will not be
                written to Cloud Logging. Defaults to False.
            deployment_resource_pool (DeploymentResourcePool):
                Resource pool where the model will be deployed. All models that
                are deployed to the same DeploymentResourcePool will be hosted in
                a shared model server. If provided, will override replica count
                arguments.
            fast_tryout_enabled (bool):
                Optional. Defaults to False.
                If True, model will be deployed using faster deployment path.
                Useful for quick experiments. Not for production workloads. Only
                available for most popular models with certain machine types.
            system_labels (Dict[str, str]):
                Optional. System labels to apply to Model Garden deployments.
                System labels are managed by Google for internal use only.
            required_replica_count (int):
                Optional. Number of required available replicas for the
                deployment to succeed. This field is only needed when partial
                model deployment/mutation is desired, with a value greater than
                or equal to 1 and fewer than or equal to min_replica_count. If
                set, the model deploy/mutate operation will succeed once
                available_replica_count reaches required_replica_count, and the
                rest of the replicas will be retried.

        Raises:
            ValueError: If only `accelerator_type` or `accelerator_count` is specified.
            ValueError: If model does not support deployment.
            ValueError: If there is not current traffic split and traffic percentage
                is not 0 or 100.
            ValueError: If `deployment_resource_pool` and a custom machine spec
                are both present.
            ValueError: If both `explanation_spec` and `deployment_resource_pool`
                are present.
        """
        service_account = service_account or initializer.global_config.service_account

        if deployment_resource_pool:
            deployed_model = gca_endpoint_compat.DeployedModel(
                model=model.versioned_resource_name,
                display_name=deployed_model_display_name,
                service_account=service_account,
                disable_container_logging=disable_container_logging,
            )

            if system_labels:
                deployed_model.system_labels = system_labels

            supports_shared_resources = (
                gca_model_compat.Model.DeploymentResourcesType.SHARED_RESOURCES
                in model.supported_deployment_resources_types
            )

            if not supports_shared_resources:
                raise ValueError(
                    "`deployment_resource_pool` may only be specified for models "
                    " which support shared resources."
                )

            provided_custom_machine_spec = (
                machine_type
                or accelerator_type
                or accelerator_count
                or autoscaling_target_accelerator_duty_cycle
                or autoscaling_target_cpu_utilization
                or autoscaling_target_request_count_per_minute
            )

            if provided_custom_machine_spec:
                raise ValueError(
                    "Conflicting parameters in deployment request. "
                    "The machine_type, accelerator_type and accelerator_count, "
                    "autoscaling_target_accelerator_duty_cycle, "
                    "autoscaling_target_cpu_utilization, "
                    "autoscaling_target_request_count_per_minute parameters "
                    "may not be set when `deployment_resource_pool` is "
                    "specified."
                )

            deployed_model.shared_resources = deployment_resource_pool.resource_name

            if explanation_spec:
                raise ValueError(
                    "Model explanation is not supported for deployments using "
                    "shared resources."
                )
        else:
            max_replica_count = max(min_replica_count, max_replica_count)

            if bool(accelerator_type) != bool(accelerator_count):
                raise ValueError(
                    "Both `accelerator_type` and `accelerator_count` should be specified or None."
                )

            if autoscaling_target_accelerator_duty_cycle is not None and (
                not accelerator_type or not accelerator_count
            ):
                raise ValueError(
                    "Both `accelerator_type` and `accelerator_count` should be set "
                    "when specifying autoscaling_target_accelerator_duty_cycle`"
                )

            deployed_model = gca_endpoint_compat.DeployedModel(
                model=model.versioned_resource_name,
                display_name=deployed_model_display_name,
                service_account=service_account,
                enable_access_logging=enable_access_logging,
                disable_container_logging=disable_container_logging,
            )

            if system_labels:
                deployed_model.system_labels = system_labels

            supports_automatic_resources = (
                gca_model_compat.Model.DeploymentResourcesType.AUTOMATIC_RESOURCES
                in model.supported_deployment_resources_types
            )
            supports_dedicated_resources = (
                gca_model_compat.Model.DeploymentResourcesType.DEDICATED_RESOURCES
                in model.supported_deployment_resources_types
            )
            provided_custom_machine_spec = (
                machine_type
                or accelerator_type
                or accelerator_count
                or autoscaling_target_accelerator_duty_cycle
                or autoscaling_target_cpu_utilization
                or autoscaling_target_request_count_per_minute
            )

            # If the model supports both automatic and dedicated deployment resources,
            # decide based on the presence of machine spec customizations
            use_dedicated_resources = supports_dedicated_resources and (
                not supports_automatic_resources or provided_custom_machine_spec
            )

            if provided_custom_machine_spec and not use_dedicated_resources:
                _LOGGER.info(
                    "Model does not support dedicated deployment resources. "
                    "The machine_type, accelerator_type and accelerator_count, "
                    "autoscaling_target_accelerator_duty_cycle, "
                    "autoscaling_target_cpu_utilization, "
                    "autoscaling_target_request_count_per_minute parameters "
                    "are ignored."
                )

            if use_dedicated_resources and not machine_type:
                machine_type = _DEFAULT_MACHINE_TYPE
                _LOGGER.info(f"Using default machine_type: {machine_type}")

            if use_dedicated_resources:
                dedicated_resources = gca_machine_resources_compat.DedicatedResources(
                    min_replica_count=min_replica_count,
                    max_replica_count=max_replica_count,
                    spot=spot,
                    required_replica_count=required_replica_count,
                )

                machine_spec = gca_machine_resources_compat.MachineSpec(
                    machine_type=machine_type
                )

                if autoscaling_target_cpu_utilization:
                    autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
                        metric_name="aiplatform.googleapis.com/prediction/online/cpu/utilization",
                        target=autoscaling_target_cpu_utilization,
                    )
                    dedicated_resources.autoscaling_metric_specs.extend(
                        [autoscaling_metric_spec]
                    )

                if accelerator_type and accelerator_count:
                    utils.validate_accelerator_type(accelerator_type)
                    machine_spec.accelerator_type = accelerator_type
                    machine_spec.accelerator_count = accelerator_count

                    if autoscaling_target_accelerator_duty_cycle:
                        autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
                            metric_name="aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle",
                            target=autoscaling_target_accelerator_duty_cycle,
                        )
                        dedicated_resources.autoscaling_metric_specs.extend(
                            [autoscaling_metric_spec]
                        )

                if autoscaling_target_request_count_per_minute:
                    autoscaling_metric_spec = (
                        gca_machine_resources_compat.AutoscalingMetricSpec(
                            metric_name=(
                                "aiplatform.googleapis.com/prediction/online/"
                                "request_count"
                            ),
                            target=autoscaling_target_request_count_per_minute,
                        )
                    )
                    dedicated_resources.autoscaling_metric_specs.extend(
                        [autoscaling_metric_spec]
                    )

                if reservation_affinity_type:
                    machine_spec.reservation_affinity = utils.get_reservation_affinity(
                        reservation_affinity_type,
                        reservation_affinity_key,
                        reservation_affinity_values,
                    )

                if tpu_topology is not None:
                    machine_spec.tpu_topology = tpu_topology

                dedicated_resources.machine_spec = machine_spec
                deployed_model.dedicated_resources = dedicated_resources
                if fast_tryout_enabled:
                    deployed_model.faster_deployment_config = (
                        gca_endpoint_compat.FasterDeploymentConfig(
                            fast_tryout_enabled=fast_tryout_enabled
                        )
                    )

            elif supports_automatic_resources:
                deployed_model.automatic_resources = (
                    gca_machine_resources_compat.AutomaticResources(
                        min_replica_count=min_replica_count,
                        max_replica_count=max_replica_count,
                    )
                )
            else:
                _LOGGER.warning(
                    "Model does not support deployment. "
                    "See https://cloud.google.com/vertex-ai/docs/reference/rpc/google.cloud.aiplatform.v1#google.cloud.aiplatform.v1.Model.FIELDS.repeated.google.cloud.aiplatform.v1.Model.DeploymentResourcesType.google.cloud.aiplatform.v1.Model.supported_deployment_resources_types"
                )

            deployed_model.explanation_spec = explanation_spec

        # Checking if traffic percentage is valid
        # TODO(b/221059294) PrivateEndpoint should support traffic split
        if traffic_split is None and not network:
            # new model traffic needs to be 100 if no pre-existing models
            if not endpoint_resource_traffic_split:
                # default scenario
                if traffic_percentage == 0:
                    traffic_percentage = 100
                # verify user specified 100
                elif traffic_percentage < 100:
                    raise ValueError(
                        """There are currently no deployed models so the traffic
                        percentage for this deployed model needs to be 100."""
                    )
            traffic_split = cls._allocate_traffic(
                traffic_split=dict(endpoint_resource_traffic_split),
                traffic_percentage=traffic_percentage,
            )

        operation_future = api_client.deploy_model(
            endpoint=endpoint_resource_name,
            deployed_model=deployed_model,
            traffic_split=traffic_split,
            metadata=metadata,
            timeout=deploy_request_timeout,
        )

        _LOGGER.log_action_started_against_resource_with_lro(
            "Deploy", "model", cls, operation_future
        )

        operation_future.result(timeout=None)

    def undeploy(
        self,
        deployed_model_id: str,
        traffic_split: Optional[Dict[str, int]] = None,
        metadata: Optional[Sequence[Tuple[str, str]]] = (),
        sync=True,
    ) -> None:
        """Undeploys a deployed model.

        The model to be undeployed should have no traffic or user must provide
        a new traffic_split with the remaining deployed models. Refer
        to `Endpoint.traffic_split` for the current traffic split mapping.

        Args:
            deployed_model_id (str):
                Required. The ID of the DeployedModel to be undeployed from the
                Endpoint.
            traffic_split (Dict[str, int]):
                Optional. A map of DeployedModel IDs to the percentage of
                this Endpoint's traffic that should be forwarded to that DeployedModel.
                Required if undeploying a model with non-zero traffic from an Endpoint
                with multiple deployed models. The traffic percentage values must add
                up to 100, or map must be empty if the Endpoint is to not accept any traffic
                at the moment. If a DeployedModel's ID is not listed in this map, then it
                receives no traffic.
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
        """
        self._sync_gca_resource_if_skipped()

        if traffic_split is not None:
            if deployed_model_id in traffic_split and traffic_split[deployed_model_id]:
                raise ValueError("Model being undeployed should have 0 traffic.")
            if sum(traffic_split.values()) != 100:
                raise ValueError(
                    "Sum of all traffic within traffic split needs to be 100."
                )

        # Two or more models deployed to Endpoint and remaining traffic will be zero
        elif (
            len(self.traffic_split) > 1
            and deployed_model_id in self._gca_resource.traffic_split
            and self._gca_resource.traffic_split[deployed_model_id] == 100
        ):
            raise ValueError(
                f"Undeploying deployed model '{deployed_model_id}' would leave the remaining "
                "traffic split at 0%. Traffic split must add up to 100% when models are "
                "deployed. Please undeploy the other models first or provide an updated "
                "traffic_split."
            )

        self._undeploy(
            deployed_model_id=deployed_model_id,
            traffic_split=traffic_split,
            metadata=metadata,
            sync=sync,
        )

    @base.optional_sync()
    def _undeploy(
        self,
        deployed_model_id: str,
        traffic_split: Optional[Dict[str, int]] = None,
        metadata: Optional[Sequence[Tuple[str, str]]] = (),
        sync=True,
    ) -> None:
        """Undeploys a deployed model.

        Proportionally adjusts the traffic_split among the remaining deployed
        models of the endpoint.

        Args:
            deployed_model_id (str):
                Required. The ID of the DeployedModel to be undeployed from the
                Endpoint.
            traffic_split (Dict[str, int]):
                Optional. A map from a DeployedModel's ID to the percentage of
                this Endpoint's traffic that should be forwarded to that DeployedModel.
                If a DeployedModel's ID is not listed in this map, then it receives
                no traffic. The traffic percentage values must add up to 100, or
                map must be empty if the Endpoint is to not accept any traffic at
                the moment. Key for model being deployed is "0". Should not be
                provided if traffic_percentage is provided.
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
        """
        self._sync_gca_resource_if_skipped()
        current_traffic_split = traffic_split or dict(self._gca_resource.traffic_split)

        if deployed_model_id in current_traffic_split:
            current_traffic_split = self._unallocate_traffic(
                traffic_split=current_traffic_split,
                deployed_model_id=deployed_model_id,
            )
            current_traffic_split.pop(deployed_model_id)

        _LOGGER.log_action_start_against_resource("Undeploying", "model", self)

        operation_future = self.api_client.undeploy_model(
            endpoint=self.resource_name,
            deployed_model_id=deployed_model_id,
            traffic_split=current_traffic_split,
            metadata=metadata,
        )

        _LOGGER.log_action_started_against_resource_with_lro(
            "Undeploy", "model", self.__class__, operation_future
        )

        # block before returning
        operation_future.result()

        _LOGGER.log_action_completed_against_resource("model", "undeployed", self)

        # update local resource
        self._sync_gca_resource()

    def update(
        self,
        display_name: Optional[str] = None,
        description: Optional[str] = None,
        labels: Optional[Dict[str, str]] = None,
        traffic_split: Optional[Dict[str, int]] = None,
        request_metadata: Optional[Sequence[Tuple[str, str]]] = (),
        update_request_timeout: Optional[float] = None,
    ) -> "Endpoint":
        """Updates an endpoint.

        Example usage:
            my_endpoint = my_endpoint.update(
                display_name='my-updated-endpoint',
                description='my updated description',
                labels={'key': 'value'},
                traffic_split={
                    '123456': 20,
                    '234567': 80,
                },
            )

        Args:
            display_name (str):
                Optional. The display name of the Endpoint.
                The name can be up to 128 characters long and can be consist of any UTF-8
                characters.
            description (str):
                Optional. The description of the Endpoint.
            labels (Dict[str, str]):
                Optional. The labels with user-defined metadata to organize your Endpoints.
                Label keys and values can be no longer than 64 characters
                (Unicode codepoints), can only contain lowercase letters, numeric
                characters, underscores and dashes. International characters are allowed.
                See https://goo.gl/xmQnxf for more information and examples of labels.
            traffic_split (Dict[str, int]):
                Optional. A map from a DeployedModel's ID to the percentage of this Endpoint's
                traffic that should be forwarded to that DeployedModel.
                If a DeployedModel's ID is not listed in this map, then it receives no traffic.
                The traffic percentage values must add up to 100, or map must be empty if
                the Endpoint is to not accept any traffic at a moment.
            request_metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as metadata.
            update_request_timeout (float):
                Optional. The timeout for the update request in seconds.

        Returns:
            Endpoint (aiplatform.Prediction):
                Updated endpoint resource.

        Raises:
            ValueError: If `labels` is not the correct format.
        """

        self.wait()

        current_endpoint_proto = self.gca_resource
        copied_endpoint_proto = current_endpoint_proto.__class__(current_endpoint_proto)

        update_mask: List[str] = []

        if display_name:
            utils.validate_display_name(display_name)
            copied_endpoint_proto.display_name = display_name
            update_mask.append("display_name")

        if description:
            copied_endpoint_proto.description = description
            update_mask.append("description")

        if labels:
            utils.validate_labels(labels)
            copied_endpoint_proto.labels = labels
            update_mask.append("labels")

        if traffic_split:
            update_mask.append("traffic_split")
            copied_endpoint_proto.traffic_split = traffic_split

        update_mask = field_mask_pb2.FieldMask(paths=update_mask)

        _LOGGER.log_action_start_against_resource(
            "Updating",
            "endpoint",
            self,
        )

        self._gca_resource = self.api_client.update_endpoint(
            endpoint=copied_endpoint_proto,
            update_mask=update_mask,
            metadata=request_metadata,
            timeout=update_request_timeout,
        )

        _LOGGER.log_action_completed_against_resource("endpoint", "updated", self)

        return self

    def predict(
        self,
        instances: List,
        parameters: Optional[Dict] = None,
        timeout: Optional[float] = None,
        use_raw_predict: Optional[bool] = False,
        *,
        use_dedicated_endpoint: Optional[bool] = False,
    ) -> Prediction:
        """Make a prediction against this Endpoint.

        For dedicated endpoint, set use_dedicated_endpoint = True:
            ```
            response = my_endpoint.predict(instances=[...],
                use_dedicated_endpoint=True)
            my_predictions = response.predictions
            ```

        Args:
            instances (List):
                Required. The instances that are the input to the
                prediction call. A DeployedModel may have an upper limit
                on the number of instances it supports per request, and
                when it is exceeded the prediction call errors in case
                of AutoML Models, or, in case of customer created
                Models, the behaviour is as documented by that Model.
                The schema of any single instance may be specified via
                Endpoint's DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``instance_schema_uri``.
            parameters (Dict):
                The parameters that govern the prediction. The schema of
                the parameters may be specified via Endpoint's
                DeployedModels' [Model's
                ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``parameters_schema_uri``.
            timeout (float): Optional. The timeout for this request in seconds.
            use_raw_predict (bool):
                Optional. Default value is False. If set to True, the underlying prediction call will be made
                against Endpoint.raw_predict().
            use_dedicated_endpoint (bool):
                Optional. Default value is False. If set to True, the underlying prediction call will be made
                using the dedicated endpoint dns.

        Returns:
            prediction (aiplatform.Prediction):
                Prediction with returned predictions and Model ID.

        Raises:
            ImportError: If there is an issue importing the `TCPKeepAliveAdapter` package.
        """
        self.wait()
        if use_raw_predict:
            raw_predict_response = self.raw_predict(
                body=json.dumps({"instances": instances, "parameters": parameters}),
                headers={"Content-Type": "application/json"},
                use_dedicated_endpoint=use_dedicated_endpoint,
                timeout=timeout,
            )
            json_response = raw_predict_response.json()
            return Prediction(
                predictions=json_response["predictions"],
                metadata=json_response.get("metadata"),
                deployed_model_id=raw_predict_response.headers[
                    _RAW_PREDICT_DEPLOYED_MODEL_ID_KEY
                ],
                model_resource_name=raw_predict_response.headers[
                    _RAW_PREDICT_MODEL_RESOURCE_KEY
                ],
                model_version_id=raw_predict_response.headers.get(
                    _RAW_PREDICT_MODEL_VERSION_ID_KEY, None
                ),
            )

        if use_dedicated_endpoint:
            self._sync_gca_resource_if_skipped()
            if (
                not self._gca_resource.dedicated_endpoint_enabled
                or self._gca_resource.dedicated_endpoint_dns is None
            ):
                raise ValueError(
                    "Dedicated endpoint is not enabled or DNS is empty."
                    "Please make sure endpoint has dedicated endpoint enabled"
                    "and model are ready before making a prediction."
                )
            try:
                from requests_toolbelt.adapters.socket_options import (
                    TCPKeepAliveAdapter,
                )
            except ImportError:
                raise ImportError(
                    "Cannot import the requests-toolbelt library. Please install requests-toolbelt."
                )

            if not self.authorized_session:
                self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES
                self.authorized_session = google_auth_requests.AuthorizedSession(
                    self.credentials
                )

            headers = {
                "Content-Type": "application/json",
            }

            url = f"https://{self._gca_resource.dedicated_endpoint_dns}/v1/{self.resource_name}:predict"
            # count * interval need to be larger than 1 hr (3600s)
            keep_alive = TCPKeepAliveAdapter(idle=120, count=100, interval=100)
            self.authorized_session.mount("https://", keep_alive)
            response = self.authorized_session.post(
                url=url,
                data=json.dumps(
                    {
                        "instances": instances,
                        "parameters": parameters,
                    }
                ),
                headers=headers,
                timeout=timeout,
            )

            prediction_response = json.loads(response.text)

            return Prediction(
                predictions=prediction_response.get("predictions"),
                metadata=prediction_response.get("metadata"),
                deployed_model_id=prediction_response.get("deployedModelId"),
                model_resource_name=prediction_response.get("model"),
                model_version_id=prediction_response.get("modelVersionId"),
            )

        else:
            prediction_response = self._prediction_client.predict(
                endpoint=self._gca_resource.name,
                instances=instances,
                parameters=parameters,
                timeout=timeout,
            )
            if prediction_response._pb.metadata:
                metadata = json_format.MessageToDict(prediction_response._pb.metadata)
            else:
                metadata = None

            return Prediction(
                predictions=[
                    json_format.MessageToDict(item)
                    for item in prediction_response.predictions.pb
                ],
                metadata=metadata,
                deployed_model_id=prediction_response.deployed_model_id,
                model_version_id=prediction_response.model_version_id,
                model_resource_name=prediction_response.model,
            )

    async def predict_async(
        self,
        instances: List,
        *,
        parameters: Optional[Dict] = None,
        timeout: Optional[float] = None,
    ) -> Prediction:
        """Make an asynchronous prediction against this Endpoint.
        Example usage:
            ```
            response = await my_endpoint.predict_async(instances=[...])
            my_predictions = response.predictions
            ```

        Args:
            instances (List):
                Required. The instances that are the input to the
                prediction call. A DeployedModel may have an upper limit
                on the number of instances it supports per request, and
                when it is exceeded the prediction call errors in case
                of AutoML Models, or, in case of customer created
                Models, the behaviour is as documented by that Model.
                The schema of any single instance may be specified via
                Endpoint's DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``instance_schema_uri``.
            parameters (Dict):
                Optional. The parameters that govern the prediction. The schema of
                the parameters may be specified via Endpoint's
                DeployedModels' [Model's
                ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``parameters_schema_uri``.
            timeout (float): Optional. The timeout for this request in seconds.

        Returns:
            prediction (aiplatform.Prediction):
                Prediction with returned predictions and Model ID.
        """
        self.wait()

        prediction_response = await self._prediction_async_client.predict(
            endpoint=self._gca_resource.name,
            instances=instances,
            parameters=parameters,
            timeout=timeout,
        )
        if prediction_response._pb.metadata:
            metadata = json_format.MessageToDict(prediction_response._pb.metadata)
        else:
            metadata = None

        return Prediction(
            predictions=[
                json_format.MessageToDict(item)
                for item in prediction_response.predictions.pb
            ],
            metadata=metadata,
            deployed_model_id=prediction_response.deployed_model_id,
            model_version_id=prediction_response.model_version_id,
            model_resource_name=prediction_response.model,
        )

    def raw_predict(
        self,
        body: bytes,
        headers: Dict[str, str],
        *,
        use_dedicated_endpoint: Optional[bool] = False,
        timeout: Optional[float] = None,
    ) -> requests.models.Response:
        """Makes a prediction request using arbitrary headers.

        Example usage:
            my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
            response = my_endpoint.raw_predict(
                body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}'
                headers = {'Content-Type':'application/json'}
            )
            # For dedicated endpoint:
            response = my_endpoint.raw_predict(
                body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}',
                headers = {'Content-Type':'application/json'},
                dedicated_endpoint=True,
            )
            status_code = response.status_code
            results = json.dumps(response.text)

        Args:
            body (bytes):
                The body of the prediction request in bytes. This must not exceed 1.5 mb per request.
            headers (Dict[str, str]):
                The header of the request as a dictionary. There are no restrictions on the header.
            use_dedicated_endpoint (bool):
                Optional. Default value is False. If set to True, the underlying prediction call will be made
                using the dedicated endpoint dns.
            timeout (float): Optional. The timeout for this request in seconds.

        Returns:
            A requests.models.Response object containing the status code and prediction results.

        Raises:
            ImportError: If there is an issue importing the `TCPKeepAliveAdapter` package.
        """
        if not self.authorized_session:
            self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES
            self.authorized_session = google_auth_requests.AuthorizedSession(
                self.credentials
            )

        if self.raw_predict_request_url is None:
            self.raw_predict_request_url = f"https://{self.location}-{constants.base.API_BASE_PATH}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:rawPredict"

        url = self.raw_predict_request_url

        if use_dedicated_endpoint:
            try:
                from requests_toolbelt.adapters.socket_options import (
                    TCPKeepAliveAdapter,
                )
            except ImportError:
                raise ImportError(
                    "Cannot import the requests-toolbelt library. Please install requests-toolbelt."
                )
            self._sync_gca_resource_if_skipped()
            if (
                not self._gca_resource.dedicated_endpoint_enabled
                or self._gca_resource.dedicated_endpoint_dns is None
            ):
                raise ValueError(
                    "Dedicated endpoint is not enabled or DNS is empty."
                    "Please make sure endpoint has dedicated endpoint enabled"
                    "and model are ready before making a prediction."
                )
            url = f"https://{self._gca_resource.dedicated_endpoint_dns}/v1/{self.resource_name}:rawPredict"
            # count * interval need to be larger than 1 hr (3600s)
            keep_alive = TCPKeepAliveAdapter(idle=120, count=100, interval=100)
            self.authorized_session.mount("https://", keep_alive)

        return self.authorized_session.post(
            url=url, data=body, headers=headers, timeout=timeout
        )

    def stream_raw_predict(
        self,
        body: bytes,
        headers: Dict[str, str],
        *,
        use_dedicated_endpoint: Optional[bool] = False,
        timeout: Optional[float] = None,
    ) -> Iterator[requests.models.Response]:
        """Makes a streaming prediction request using arbitrary headers.
        For custom model, this method is only supported for dedicated endpoint.

        Example usage:
            ```
            my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
            for stream_response in my_endpoint.stream_raw_predict(
                body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}'
                headers = {'Content-Type':'application/json'}
            ):
                status_code = response.status_code
                stream_result = json.dumps(response.text)
            ```

            For dedicated endpoint:
            ```
            my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
            for stream_response in my_endpoint.stream_raw_predict(
                body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}',
                headers = {'Content-Type':'application/json'},
                use_dedicated_endpoint=True,
            ):
                status_code = response.status_code
                stream_result = json.dumps(response.text)
            ```

        Args:
            body (bytes):
                The body of the prediction request in bytes. This must not
                exceed 10 mb per request.
            headers (Dict[str, str]):
                The header of the request as a dictionary. There are no
                restrictions on the header.
            use_dedicated_endpoint (bool):
                Optional. Default value is False. If set to True, the underlying prediction call will be made
                using the dedicated endpoint dns.
            timeout (float): Optional. The timeout for this request in seconds.

        Yields:
            predictions (Iterator[requests.models.Response]):
                The streaming prediction results.
        """
        if not self.authorized_session:
            self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES
            self.authorized_session = google_auth_requests.AuthorizedSession(
                self.credentials
            )

        if self.stream_raw_predict_request_url is None:
            self.stream_raw_predict_request_url = f"https://{self.location}-{constants.base.API_BASE_PATH}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:streamRawPredict"

        url = self.stream_raw_predict_request_url

        if use_dedicated_endpoint:
            self._sync_gca_resource_if_skipped()
            if (
                not self._gca_resource.dedicated_endpoint_enabled
                or self._gca_resource.dedicated_endpoint_dns is None
            ):
                raise ValueError(
                    "Dedicated endpoint is not enabled or DNS is empty."
                    "Please make sure endpoint has dedicated endpoint enabled"
                    "and model are ready before making a prediction."
                )
            url = f"https://{self._gca_resource.dedicated_endpoint_dns}/v1/{self.resource_name}:streamRawPredict"

        with self.authorized_session.post(
            url=url,
            data=body,
            headers=headers,
            timeout=timeout,
            stream=True,
        ) as resp:
            for line in resp.iter_lines():
                yield line

    def direct_predict(
        self,
        inputs: List,
        parameters: Optional[Dict] = None,
        timeout: Optional[float] = None,
    ) -> Prediction:
        """Makes a direct (gRPC) prediction against this Endpoint for a pre-built image.

        Args:
            inputs (List):
                Required. The inputs that are the input to the prediction call.
                A DeployedModel may have an upper limit on the number of
                instances it supports per request, and when it is exceeded the
                prediction call errors in case of AutoML Models, or, in case of
                customer created Models, the behaviour is as documented by that
                Model. The schema of any single instance may be specified via
                Endpoint's DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``instance_schema_uri``.
            parameters (Dict):
                Optional. The parameters that govern the prediction. The schema
                of the parameters may be specified via Endpoint's
                DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``parameters_schema_uri``.
            timeout (Optional[float]):
                Optional. The timeout for this request in seconds.

        Returns:
            prediction (aiplatform.Prediction):
                The resulting prediction.
        """
        self.wait()

        prediction_response = self._prediction_client.direct_predict(
            request={
                "endpoint": self._gca_resource.name,
                "inputs": inputs,
                "parameters": parameters,
            },
            timeout=timeout,
        )

        return Prediction(
            predictions=[
                json_format.MessageToDict(item)
                for item in prediction_response.outputs.pb
            ],
            metadata=None,
            deployed_model_id=None,
            model_version_id=None,
            model_resource_name=None,
        )

    async def direct_predict_async(
        self,
        inputs: List,
        *,
        parameters: Optional[Dict] = None,
        timeout: Optional[float] = None,
    ) -> Prediction:
        """Makes an asynchronous direct (gRPC) prediction against this Endpoint for a pre-built image.

        Example usage:
            ```
            response = await my_endpoint.direct_predict_async(inputs=[...])
            my_predictions = response.predictions
            ```

        Args:
            inputs (List):
                Required. The inputs that are the input to the prediction call.
                A DeployedModel may have an upper limit on the number of
                instances it supports per request, and when it is exceeded the
                prediction call errors in case of AutoML Models, or, in case of
                customer created Models, the behaviour is as documented by that
                Model. The schema of any single instance may be specified via
                Endpoint's DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``instance_schema_uri``.
            parameters (Dict):
                Optional. The parameters that govern the prediction. The schema
                of the parameters may be specified via Endpoint's
                DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``parameters_schema_uri``.
            timeout (Optional[float]):
                Optional. The timeout for this request in seconds.

        Returns:
            prediction (aiplatform.Prediction):
                The resulting prediction.
        """
        self.wait()

        prediction_response = await self._prediction_async_client.direct_predict(
            request={
                "endpoint": self._gca_resource.name,
                "inputs": inputs,
                "parameters": parameters,
            },
            timeout=timeout,
        )

        return Prediction(
            predictions=[
                json_format.MessageToDict(item)
                for item in prediction_response.outputs.pb
            ],
            metadata=None,
            deployed_model_id=None,
            model_version_id=None,
            model_resource_name=None,
        )

    def stream_direct_predict(
        self,
        inputs_iterator: Iterator[List],
        parameters: Optional[Dict] = None,
        timeout: Optional[float] = None,
    ) -> Iterator[Prediction]:
        """Makes a streaming direct (gRPC) prediction against this Endpoint for a pre-built image.

        Args:
            inputs_iterator (Iterator[List]):
                Required. An iterator of the inputs that are the input to the
                prediction call. A DeployedModel may have an upper limit on the
                number of instances it supports per request, and when it is
                exceeded the prediction call errors in case of AutoML Models, or,
                in case of customer created Models, the behaviour is as
                documented by that Model. The schema of any single instance may
                be specified via Endpoint's DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``instance_schema_uri``.
            parameters (Dict):
                Optional. The parameters that govern the prediction. The schema
                of the parameters may be specified via Endpoint's
                DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``parameters_schema_uri``.
            timeout (Optional[float]):
                Optional. The timeout for this request in seconds.

        Yields:
            predictions (Iterator[aiplatform.Prediction]):
                The resulting streamed predictions.
        """
        self.wait()
        for resp in self._prediction_client.stream_direct_predict(
            requests=(
                {
                    "endpoint": self._gca_resource.name,
                    "inputs": inputs,
                    "parameters": parameters,
                }
                for inputs in inputs_iterator
            ),
            timeout=timeout,
        ):
            yield Prediction(
                predictions=[
                    json_format.MessageToDict(item) for item in resp.outputs.pb
                ],
                metadata=None,
                deployed_model_id=None,
                model_version_id=None,
                model_resource_name=None,
            )

    def direct_raw_predict(
        self,
        method_name: str,
        request: bytes,
        timeout: Optional[float] = None,
    ) -> Prediction:
        """Makes a direct (gRPC) prediction request using arbitrary headers for a custom container.

        Example usage:
            ```
            my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
            response = my_endpoint.direct_raw_predict(request=b'...')
            ```

        Args:
            method_name (str):
                Fully qualified name of the API method being invoked to perform
                prediction.
            request (bytes):
                The body of the prediction request in bytes.
            timeout (Optional[float]):
                Optional. The timeout for this request in seconds.

        Returns:
            prediction (aiplatform.Prediction):
                The resulting prediction.
        """
        self.wait()

        prediction_response = self._prediction_client.direct_raw_predict(
            request={
                "endpoint": self._gca_resource.name,
                "method_name": method_name,
                "input": request,
            },
            timeout=timeout,
        )

        return Prediction(
            predictions=prediction_response.output,
            metadata=None,
            deployed_model_id=None,
            model_version_id=None,
            model_resource_name=None,
        )

    async def direct_raw_predict_async(
        self,
        method_name: str,
        request: bytes,
        timeout: Optional[float] = None,
    ) -> Prediction:
        """Makes a direct (gRPC) prediction request for a custom container.

        Example usage:
            ```
            my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
            response = await my_endpoint.direct_raw_predict(request=b'...')
            ```

        Args:
            method_name (str):
                Fully qualified name of the API method being invoked to perform
                prediction.
            request (bytes):
                The body of the prediction request in bytes.
            timeout (Optional[float]):
                Optional. The timeout for this request in seconds.

        Returns:
            prediction (aiplatform.Prediction):
                The resulting prediction.
        """
        self.wait()

        prediction_response = await self._prediction_async_client.direct_raw_predict(
            request={
                "endpoint": self._gca_resource.name,
                "method_name": method_name,
                "input": request,
            },
            timeout=timeout,
        )

        return Prediction(
            predictions=prediction_response.output,
            metadata=None,
            deployed_model_id=None,
            model_version_id=None,
            model_resource_name=None,
        )

    def stream_direct_raw_predict(
        self,
        method_name: str,
        requests: Iterator[bytes],
        timeout: Optional[float] = None,
    ) -> Iterator[Prediction]:
        """Makes a direct (gRPC) streaming prediction request for a custom container.

        Example usage:
            ```
            my_endpoint = aiplatform.Endpoint(ENDPOINT_ID)
            for stream_response in my_endpoint.stream_direct_raw_predict(
                request=b'...'
            ):
                yield stream_response
            ```

        Args:
            method_name (str):
                Fully qualified name of the API method being invoked to perform
                prediction.
            requests (Iterator[bytes]):
                The body of the prediction requests in bytes.
            timeout (Optional[float]):
                Optional. The timeout for this request in seconds.

        Yields:
            predictions (Iterator[aiplatform.Prediction]):
                The resulting streamed predictions.
        """
        self.wait()

        for resp in self._prediction_client.stream_direct_raw_predict(
            requests=(
                {
                    "endpoint": self._gca_resource.name,
                    "method_name": method_name,
                    "input": request,
                }
                for request in requests
            ),
            timeout=timeout,
        ):
            yield Prediction(
                predictions=resp.output,
                metadata=None,
                deployed_model_id=None,
                model_version_id=None,
                model_resource_name=None,
            )

    def explain(
        self,
        instances: List[Dict],
        parameters: Optional[Dict] = None,
        deployed_model_id: Optional[str] = None,
        timeout: Optional[float] = None,
    ) -> Prediction:
        """Make a prediction with explanations against this Endpoint.

        Example usage:
            response = my_endpoint.explain(instances=[...])
            my_explanations = response.explanations

        Args:
            instances (List):
                Required. The instances that are the input to the
                prediction call. A DeployedModel may have an upper limit
                on the number of instances it supports per request, and
                when it is exceeded the prediction call errors in case
                of AutoML Models, or, in case of customer created
                Models, the behaviour is as documented by that Model.
                The schema of any single instance may be specified via
                Endpoint's DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``instance_schema_uri``.
            parameters (Dict):
                The parameters that govern the prediction. The schema of
                the parameters may be specified via Endpoint's
                DeployedModels' [Model's
                ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``parameters_schema_uri``.
            deployed_model_id (str):
                Optional. If specified, this ExplainRequest will be served by the
                chosen DeployedModel, overriding this Endpoint's traffic split.
            timeout (float): Optional. The timeout for this request in seconds.

        Returns:
            prediction (aiplatform.Prediction):
                Prediction with returned predictions, explanations, and Model ID.
        """
        self.wait()

        explain_response = self._prediction_client.explain(
            endpoint=self.resource_name,
            instances=instances,
            parameters=parameters,
            deployed_model_id=deployed_model_id,
            timeout=timeout,
        )

        return Prediction(
            predictions=[
                json_format.MessageToDict(item)
                for item in explain_response.predictions.pb
            ],
            deployed_model_id=explain_response.deployed_model_id,
            explanations=explain_response.explanations,
        )

    async def explain_async(
        self,
        instances: List[Dict],
        *,
        parameters: Optional[Dict] = None,
        deployed_model_id: Optional[str] = None,
        timeout: Optional[float] = None,
    ) -> Prediction:
        """Make a prediction with explanations against this Endpoint.

        Example usage:
            ```
            response = await my_endpoint.explain_async(instances=[...])
            my_explanations = response.explanations
            ```

        Args:
            instances (List):
                Required. The instances that are the input to the
                prediction call. A DeployedModel may have an upper limit
                on the number of instances it supports per request, and
                when it is exceeded the prediction call errors in case
                of AutoML Models, or, in case of customer created
                Models, the behaviour is as documented by that Model.
                The schema of any single instance may be specified via
                Endpoint's DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``instance_schema_uri``.
            parameters (Dict):
                The parameters that govern the prediction. The schema of
                the parameters may be specified via Endpoint's
                DeployedModels' [Model's
                ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``parameters_schema_uri``.
            deployed_model_id (str):
                Optional. If specified, this ExplainRequest will be served by the
                chosen DeployedModel, overriding this Endpoint's traffic split.
            timeout (float): Optional. The timeout for this request in seconds.

        Returns:
            prediction (aiplatform.Prediction):
                Prediction with returned predictions, explanations, and Model ID.
        """
        self.wait()

        explain_response = await self._prediction_async_client.explain(
            endpoint=self.resource_name,
            instances=instances,
            parameters=parameters,
            deployed_model_id=deployed_model_id,
            timeout=timeout,
        )

        return Prediction(
            predictions=[
                json_format.MessageToDict(item)
                for item in explain_response.predictions.pb
            ],
            deployed_model_id=explain_response.deployed_model_id,
            explanations=explain_response.explanations,
        )

    @classmethod
    def list(
        cls,
        filter: Optional[str] = None,
        order_by: Optional[str] = None,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ) -> List["models.Endpoint"]:
        """List all Endpoint resource instances.

        Example Usage:
            aiplatform.Endpoint.list(
                filter='labels.my_label="my_label_value" OR display_name=!"old_endpoint"',
            )

        Args:
            filter (str):
                Optional. An expression for filtering the results of the request.
                For field names both snake_case and camelCase are supported.
            order_by (str):
                Optional. A comma-separated list of fields to order by, sorted in
                ascending order. Use "desc" after a field name for descending.
                Supported fields: `display_name`, `create_time`, `update_time`
            project (str):
                Optional. Project to retrieve list from. If not set, project
                set in aiplatform.init will be used.
            location (str):
                Optional. Location to retrieve list from. If not set, location
                set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to retrieve list. Overrides
                credentials set in aiplatform.init.

        Returns:
            List[models.Endpoint]:
                A list of Endpoint resource objects
        """

        return cls._list_with_local_order(
            cls_filter=lambda ep: not bool(ep.network)
            and not bool(ep.private_service_connect_config),
            # `network` is empty and private_service_connect is not enabled for public Endpoints
            filter=filter,
            order_by=order_by,
            project=project,
            location=location,
            credentials=credentials,
        )

    def list_models(self) -> List[gca_endpoint_compat.DeployedModel]:
        """Returns a list of the models deployed to this Endpoint.

        Returns:
            deployed_models (List[aiplatform.gapic.DeployedModel]):
                A list of the models deployed in this Endpoint.
        """
        self._sync_gca_resource()
        return list(self._gca_resource.deployed_models)

    def undeploy_all(self, sync: bool = True) -> "Endpoint":
        """Undeploys every model deployed to this Endpoint.

        Args:
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
        """
        self._sync_gca_resource()

        models_in_traffic_split = sorted(  # Undeploy zero traffic models first
            self._gca_resource.traffic_split.keys(),
            key=lambda id: self._gca_resource.traffic_split[id],
        )

        # Some deployed models may not in the traffic_split dict.
        # These models have 0% traffic and should be undeployed first.
        models_not_in_traffic_split = [
            deployed_model.id
            for deployed_model in self._gca_resource.deployed_models
            if deployed_model.id not in models_in_traffic_split
        ]

        models_to_undeploy = models_not_in_traffic_split + models_in_traffic_split

        for deployed_model in models_to_undeploy:
            self._undeploy(deployed_model_id=deployed_model, sync=sync)

        return self

    def delete(self, force: bool = False, sync: bool = True) -> None:
        """Deletes this Vertex AI Endpoint resource. If force is set to True,
        all models on this Endpoint will be undeployed prior to deletion.

        Args:
            force (bool):
                Required. If force is set to True, all deployed models on this
                Endpoint will be undeployed first. Default is False.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.

        Raises:
            FailedPrecondition: If models are deployed on this Endpoint and force = False.
        """
        if force:
            self.undeploy_all(sync=sync)

        super().delete(sync=sync)


class PrivateEndpoint(Endpoint):
    """
    Represents a Vertex AI PrivateEndpoint resource.

    Read more [about private endpoints in the documentation.](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints)
    """

    def __init__(
        self,
        endpoint_name: str,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ):
        """Retrieves a PrivateEndpoint resource.

        Example usage:
            my_private_endpoint = aiplatform.PrivateEndpoint(
                endpoint_name="projects/123/locations/us-central1/endpoints/1234567891234567890"
            )

            or (when project and location are initialized)

            my_private_endpoint = aiplatform.PrivateEndpoint(
                endpoint_name="1234567891234567890"
            )

        Args:
            endpoint_name (str):
                Required. A fully-qualified endpoint resource name or endpoint ID.
                Example: "projects/123/locations/us-central1/endpoints/my_endpoint_id" or
                "my_endpoint_id" when project and location are initialized or passed.
            project (str):
                Optional. Project to retrieve endpoint from. If not set, project
                set in aiplatform.init will be used.
            location (str):
                Optional. Location to retrieve endpoint from. If not set, location
                set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to upload this model. Overrides
                credentials set in aiplatform.init.

        Raises:
            ValueError: If the Endpoint being retrieved is not a PrivateEndpoint.
            ImportError: If there is an issue importing the `urllib3` package.
        """
        try:
            import urllib3
        except ImportError:
            raise ImportError(
                "Cannot import the urllib3 HTTP client. Please install google-cloud-aiplatform[private_endpoints]."
            )

        super().__init__(
            endpoint_name=endpoint_name,
            project=project,
            location=location,
            credentials=credentials,
        )

        if not self.network and not self.private_service_connect_config:
            raise ValueError(
                "Please ensure the Endpoint being retrieved is a PrivateEndpoint."
            )

        self._http_client = urllib3.PoolManager(cert_reqs="CERT_NONE")

    @property
    def predict_http_uri(self) -> Optional[str]:
        """HTTP path to send prediction requests to, used when calling `PrivateEndpoint.predict()`"""
        if not self._gca_resource.deployed_models:
            return None
        return self._gca_resource.deployed_models[0].private_endpoints.predict_http_uri

    @property
    def explain_http_uri(self) -> Optional[str]:
        """HTTP path to send explain requests to, used when calling `PrivateEndpoint.explain()`"""
        if not self._gca_resource.deployed_models:
            return None
        return self._gca_resource.deployed_models[0].private_endpoints.explain_http_uri

    @property
    def health_http_uri(self) -> Optional[str]:
        """HTTP path to send health check requests to, used when calling `PrivateEndpoint.health_check()`"""
        if not self._gca_resource.deployed_models:
            return None
        return self._gca_resource.deployed_models[0].private_endpoints.health_http_uri

    class PrivateServiceConnectConfig:
        """Represents a Vertex AI PrivateServiceConnectConfig resource."""

        _gapic_private_service_connect_config: gca_service_networking.PrivateServiceConnectConfig

        def __init__(
            self,
            project_allowlist: Optional[Sequence[str]] = None,
        ):
            """PrivateServiceConnectConfig for a PrivateEndpoint.

            Args:
                project_allowlist (Sequence[str]):
                    Optional. List of projects from which traffic can be accepted
                    by the endpoint via [ServiceAttachment](https://cloud.google.com/vpc/docs/private-service-connect#service-attachments).
                    If not set, the endpoint's project will be used.
            """
            self._gapic_private_service_connect_config = (
                gca_service_networking.PrivateServiceConnectConfig(
                    enable_private_service_connect=True,
                    project_allowlist=project_allowlist,
                )
            )

    @classmethod
    def create(
        cls,
        display_name: str,
        project: Optional[str] = None,
        location: Optional[str] = None,
        network: Optional[str] = None,
        description: Optional[str] = None,
        labels: Optional[Dict[str, str]] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
        encryption_spec_key_name: Optional[str] = None,
        sync=True,
        private_service_connect_config: Optional[PrivateServiceConnectConfig] = None,
        enable_request_response_logging=False,
        request_response_logging_sampling_rate: Optional[float] = None,
        request_response_logging_bq_destination_table: Optional[str] = None,
        inference_timeout: Optional[int] = None,
    ) -> "PrivateEndpoint":
        """Creates a new PrivateEndpoint.

        Example usage:
            For PSA based private endpoint:
            my_private_endpoint = aiplatform.PrivateEndpoint.create(
                display_name="my_endpoint_name",
                project="my_project_id",
                location="us-central1",
                network="projects/123456789123/global/networks/my_vpc"
            )

            or (when project and location are initialized)

            my_private_endpoint = aiplatform.PrivateEndpoint.create(
                display_name="my_endpoint_name",
                network="projects/123456789123/global/networks/my_vpc"
            )

        For PSC based private endpoint:
            my_private_endpoint = aiplatform.PrivateEndpoint.create(
                display_name="my_endpoint_name",
                project="my_project_id",
                location="us-central1",
                private_service_connect=aiplatform.PrivateEndpoint.PrivateServiceConnectConfig(
                    project_allowlist=["test-project"]),
            )

            or (when project and location are initialized)

            my_private_endpoint = aiplatform.PrivateEndpoint.create(
                display_name="my_endpoint_name",
                private_service_connect=aiplatform.PrivateEndpoint.PrivateServiceConnectConfig(
                    project_allowlist=["test-project"]),
            )
        Args:
            display_name (str): Required. The user-defined name of the Endpoint. The
              name can be up to 128 characters long and can be consist of any UTF-8
              characters.
            project (str): Optional. Project to retrieve endpoint from. If not set,
              project set in aiplatform.init will be used.
            location (str): Optional. Location to retrieve endpoint from. If not
              set, location set in aiplatform.init will be used.
            network (str): Optional. The full name of the Compute Engine network to
              which this Endpoint will be peered. E.g.
              "projects/123456789123/global/networks/my_vpc". Private services
              access must already be configured for the network. If left
              unspecified, the network set with aiplatform.init will be used. Cannot
              be set together with private_service_connect_config.
            description (str): Optional. The description of the Endpoint.
            labels (Dict[str, str]): Optional. The labels with user-defined metadata
              to organize your Endpoints. Label keys and values can be no longer
              than 64 characters (Unicode codepoints), can only contain lowercase
              letters, numeric characters, underscores and dashes. International
              characters are allowed. See https://goo.gl/xmQnxf for more information
              and examples of labels.
            credentials (auth_credentials.Credentials): Optional. Custom credentials
              to use to upload this model. Overrides credentials set in
              aiplatform.init.
            encryption_spec_key_name (str): Optional. The Cloud KMS resource
              identifier of the customer managed encryption key used to protect the
              model. Has the
                form:
                  ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
                  The key needs to be in the same region as where the compute
                  resource is created.  If set, this Model and all sub-resources of
                  this Model will be secured by this key.  Overrides
                  encryption_spec_key_name set in aiplatform.init.
            sync (bool): Whether to execute this method synchronously. If False,
              this method will be executed in concurrent Future and any downstream
              object will be immediately returned and synced when the Future has
              completed. private_service_connect_config
              (aiplatform.PrivateEndpoint.PrivateServiceConnectConfig): [Private
              Service
              Connect](https://cloud.google.com/vpc/docs/private-service-connect)
              configuration for the endpoint. Cannot be set when network is
              specified.
            enable_request_response_logging (bool): Optional. Whether to enable
              request & response logging for this endpoint.
            request_response_logging_sampling_rate (float): Optional. The request
              response logging sampling rate. If not set, default is 0.0.
            request_response_logging_bq_destination_table (str): Optional. The
              request response logging bigquery destination. If not set, will create
              a table with name:
              ``bq://{project_id}.logging_{endpoint_display_name}_{endpoint_id}.request_response_logging``.
            inference_timeout (int): Optional. It defines the prediction timeout, in
              seconds, for online predictions using cloud-based endpoints. This
              applies to either PSC endpoints, when private_service_connect_config
              is set, or dedicated endpoints, when dedicated_endpoint_enabled is
              true.

        Returns:
            endpoint (aiplatform.PrivateEndpoint):
                Created endpoint.

        Raises:
            ValueError: A network must be instantiated when creating a
            PrivateEndpoint.
        """
        api_client = cls._instantiate_client(location=location, credentials=credentials)

        utils.validate_display_name(display_name)
        if labels:
            utils.validate_labels(labels)

        project = project or initializer.global_config.project
        location = location or initializer.global_config.location
        network = network or initializer.global_config.network

        if not network and not private_service_connect_config:
            raise ValueError(
                "Please provide required argument `network` or"
                "`private_service_connect_config`. You can also set network"
                "using aiplatform.init(network=...)"
            )
        if network and private_service_connect_config:
            raise ValueError(
                "Argument `network` and `private_service_connect_config` enabled"
                " mutually exclusive. You can only set one of them."
            )

        config = None
        if private_service_connect_config:
            config = (
                private_service_connect_config._gapic_private_service_connect_config
            )

        predict_request_response_logging_config = None
        if enable_request_response_logging:
            predict_request_response_logging_config = (
                gca_endpoint_compat.PredictRequestResponseLoggingConfig(
                    enabled=True,
                    sampling_rate=request_response_logging_sampling_rate,
                    bigquery_destination=gca_io_compat.BigQueryDestination(
                        output_uri=request_response_logging_bq_destination_table
                    ),
                )
            )

        client_connection_config = None
        if private_service_connect_config and inference_timeout:
            client_connection_config = gca_endpoint_compat.ClientConnectionConfig(
                inference_timeout=duration_pb2.Duration(seconds=inference_timeout)
            )

        return cls._create(
            api_client=api_client,
            display_name=display_name,
            project=project,
            location=location,
            description=description,
            labels=labels,
            credentials=credentials,
            encryption_spec=initializer.global_config.get_encryption_spec(
                encryption_spec_key_name=encryption_spec_key_name
            ),
            network=network,
            sync=sync,
            private_service_connect_config=config,
            predict_request_response_logging_config=predict_request_response_logging_config,
            client_connection_config=client_connection_config,
        )

    @classmethod
    def _construct_sdk_resource_from_gapic(
        cls,
        gapic_resource: proto.Message,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ) -> "PrivateEndpoint":
        """Given a GAPIC PrivateEndpoint object, return the SDK representation.

        Args:
            gapic_resource (proto.Message):
                A GAPIC representation of a PrivateEndpoint resource, usually
                retrieved by a get_* or in a list_* API call.
            project (str):
                Optional. Project to construct Endpoint object from. If not set,
                project set in aiplatform.init will be used.
            location (str):
                Optional. Location to construct Endpoint object from. If not set,
                location set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to construct Endpoint.
                Overrides credentials set in aiplatform.init.

        Returns:
            endpoint (aiplatform.PrivateEndpoint):
                An initialized PrivateEndpoint resource.

        Raises:
            ImportError: If there is an issue importing the `urllib3` package.
        """
        try:
            import urllib3
        except ImportError:
            raise ImportError(
                "Cannot import the urllib3 HTTP client. Please install google-cloud-aiplatform[private_endpoints]."
            )

        endpoint = super()._construct_sdk_resource_from_gapic(
            gapic_resource=gapic_resource,
            project=project,
            location=location,
            credentials=credentials,
        )

        endpoint._http_client = urllib3.PoolManager(cert_reqs="CERT_NONE")

        return endpoint

    def _http_request(
        self,
        method: str,
        url: str,
        body: Optional[Dict[Any, Any]] = None,
        headers: Optional[Dict[str, str]] = None,
    ) -> "urllib3.response.HTTPResponse":  # type: ignore # noqa: F821
        """Helper function used to perform HTTP requests for PrivateEndpoint.

        Args:
            method (str):
                Required. The HTTP request method to use. Example: "POST" or "GET"
            url (str):
                Required. The url used to send requests and get responses from.
            body (Dict[Any, Any]):
                Optional. Data sent to the url in the HTTP request. For a PrivateEndpoint,
                an instance is sent and a prediction response is expected.
            headers (Dict[str, str]):
                Optional. Header in the HTTP request.

        Returns:
            urllib3.response.HTTPResponse:
                A HTTP Response container.

        Raises:
            ImportError: If there is an issue importing the `urllib3` package.
            RuntimeError: If a HTTP request could not be made.
            RuntimeError: A connection could not be established with the PrivateEndpoint and
                a HTTP request could not be made.
        """
        try:
            import urllib3
        except ImportError:
            raise ImportError(
                "Cannot import the urllib3 HTTP client. Please install google-cloud-aiplatform[private_endpoints]."
            )

        try:
            response = self._http_client.request(
                method=method, url=url, body=body, headers=headers
            )

            if response.status < _SUCCESSFUL_HTTP_RESPONSE:
                return response
            else:
                raise RuntimeError(
                    f"{response.status} - Failed to make request, see response: "
                    + response.data.decode("utf-8")
                )

        except urllib3.exceptions.MaxRetryError as exc:
            raise RuntimeError(
                f"Failed to make a {method} request to this URI, make sure: "
                " this call is being made inside the network this PrivateEndpoint is peered to "
                f"({self._gca_resource.network}), calling health_check() returns True, "
                f"and that {url} is a valid URL."
            ) from exc

    def _validate_endpoint_override(self, endpoint_override: str) -> bool:
        regex = re.compile("^[a-zA-Z0-9-.]+$")
        return regex.match(endpoint_override) is not None

    def predict(
        self,
        instances: List,
        parameters: Optional[Dict] = None,
        endpoint_override: Optional[str] = None,
    ) -> Prediction:
        """Make a prediction against this PrivateEndpoint using a HTTP request.
        For PSA based private endpoint, this method must be called within the
        network the PrivateEndpoint is peered to. Otherwise, the predict() call
        will fail with error code 404. To check, use `PrivateEndpoint.network`.

        For PSC based priviate endpoint, the project where caller credential are
        from must be allowlisted.

        Example usage:
            PSA based private endpoint:

            response = my_private_endpoint.predict(instances=[...], parameters={...})
            my_predictions = response.predictions

            PSC based private endpoint:

            After creating PSC Endpoint pointing to the endpoint's
            ServiceAttachment, use the PSC Endpoint IP Address or DNS as
            endpoint_override.

            psc_endpoint_address = "10.0.1.23"
            or
            psc_endpoint_address = "test.my.prediction"

            response = my_private_endpoint.predict(instances=[...],
                endpoint_override=psc_endpoint_address)
            my_predictions = response.predictions

        Args:
            instances (List):
                Required. The instances that are the input to the
                prediction call. Instance types mut be JSON serializable.
                A DeployedModel may have an upper limit
                on the number of instances it supports per request, and
                when it is exceeded the prediction call errors in case
                of AutoML Models, or, in case of customer created
                Models, the behaviour is as documented by that Model.
                The schema of any single instance may be specified via
                Endpoint's DeployedModels'
                [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``instance_schema_uri``.
            parameters (Dict):
                The parameters that govern the prediction. The schema of
                the parameters may be specified via Endpoint's
                DeployedModels' [Model's
                ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
                [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
                ``parameters_schema_uri``.
            endpoint_override (Optional[str]):
                The Private Service Connect endpoint's IP address or DNS that
                points to the endpoint's service attachment.

        Returns:
            prediction (aiplatform.Prediction):
                Prediction object with returned predictions and Model ID.

        Raises:
            RuntimeError: If a model has not been deployed a request cannot be
                made for PSA based endpoint.
            ValueError: If a endpoint override is not provided for PSC based
                endpoint.
            ValueError: If a endpoint override is invalid for PSC based endpoint.
        """
        self.wait()
        self._sync_gca_resource_if_skipped()

        if self.network:
            if not self._gca_resource.deployed_models:
                raise RuntimeError(
                    "Cannot make a predict request because a model has not been"
                    "deployed on this Private Endpoint. Please ensure a model"
                    "has been deployed."
                )
            response = self._http_request(
                method="POST",
                url=self.predict_http_uri,
                body=json.dumps({"instances": instances, "parameters": parameters}),
                headers={"Content-Type": "application/json"},
            )
            prediction_response = json.loads(response.data)

            return Prediction(
                predictions=prediction_response.get("predictions"),
                metadata=prediction_response.get("metadata"),
                deployed_model_id=self._gca_resource.deployed_models[0].id,
            )

        if self.private_service_connect_config:
            if not endpoint_override:
                raise ValueError(
                    "Cannot make a predict request because endpoint override is"
                    "not provided. Please ensure an endpoint override is"
                    "provided."
                )
            if not self._validate_endpoint_override(endpoint_override):
                raise ValueError(
                    "Invalid endpoint override provided. Please only use IP"
                    "address or DNS."
                )

            if not self.credentials.valid:
                self.credentials.refresh(google_auth_requests.Request())

            token = self.credentials.token
            headers = {
                "Authorization": f"Bearer {token}",
                "Content-Type": "application/json",
            }

            url = f"https://{endpoint_override}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:predict"
            response = self._http_request(
                method="POST",
                url=url,
                body=json.dumps({"instances": instances, "parameters": parameters}),
                headers=headers,
            )

            prediction_response = json.loads(response.data)

            return Prediction(
                predictions=prediction_response.get("predictions"),
                metadata=prediction_response.get("metadata"),
                deployed_model_id=prediction_response.get("deployedModelId"),
                model_resource_name=prediction_response.get("model"),
                model_version_id=prediction_response.get("modelVersionId"),
            )

    def raw_predict(
        self,
        body: bytes,
        headers: Dict[str, str],
        endpoint_override: Optional[str] = None,
    ) -> requests.models.Response:
        """Make a prediction request using arbitrary headers.
        This method must be called within the network the PrivateEndpoint is peered to.
        Otherwise, the predict() call will fail with error code 404. To check, use `PrivateEndpoint.network`.

        Example usage:
            my_endpoint = aiplatform.PrivateEndpoint(ENDPOINT_ID)

            # PSA based private endpint
            response = my_endpoint.raw_predict(
                body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}',
                headers = {'Content-Type':'application/json'}
            )
            # PSC based private endpoint

            response = my_endpoint.raw_predict(
                body = b'{"instances":[{"feat_1":val_1, "feat_2":val_2}]}',
                headers = {'Content-Type':'application/json'},
                endpoint_override = "10.1.0.23"
            )

            status_code = response.status_code
            results = json.dumps(response.text)

        Args:
            body (bytes):
                The body of the prediction request in bytes. This must not
                exceed 1.5 mb per request.
            headers (Dict[str, str]):
                The header of the request as a dictionary. There are no
                restrictions on the header.
            endpoint_override (Optional[str]):
                The Private Service Connect endpoint's IP address or DNS that
                points to the endpoint's service attachment.

        Returns:
            A requests.models.Response object containing the status code and
            prediction results.

        Raises:
            ValueError: If a endpoint override is not provided for PSC based
                endpoint.
            ValueError: If a endpoint override is invalid for PSC based endpoint.
        """
        self.wait()
        if self.network:
            return self._http_request(
                method="POST",
                url=self.predict_http_uri,
                body=body,
                headers=headers,
            )

        if self.private_service_connect_config:
            if not endpoint_override:
                raise ValueError(
                    "Cannot make a predict request because endpoint override is"
                    "not provided. Please ensure an endpoint override is"
                    "provided."
                )
            if not self._validate_endpoint_override(endpoint_override):
                raise ValueError(
                    "Invalid endpoint override provided. Please only use IP"
                    "address or DNS."
                )
            if not self.credentials.valid:
                self.credentials.refresh(google_auth_requests.Request())

            token = self.credentials.token
            headers_with_token = dict(headers)
            headers_with_token["Authorization"] = f"Bearer {token}"

            url = f"https://{endpoint_override}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:rawPredict"
            return self._http_request(
                method="POST",
                url=url,
                body=body,
                headers=headers_with_token,
            )

    def stream_raw_predict(
        self,
        body: bytes,
        headers: Dict[str, str],
        endpoint_override: Optional[str] = None,
    ) -> Iterator[bytes]:
        """Make a streaming prediction request using arbitrary headers.

        Example usage:
            my_endpoint = aiplatform.PrivateEndpoint(ENDPOINT_ID)

            # Prepare the request body
            request_body = json.dumps({...}).encode('utf-8')

            # Define the headers
            headers = {
                'Content-Type': 'application/json',
            }

            # Use stream_raw_predict to send the request and process the response
            for stream_response in psc_endpoint.stream_raw_predict(
                body=request_body,
                headers=headers,
                endpoint_override="10.128.0.26"  # Replace with your actual endpoint
            ):
                stream_response_text = stream_response.decode('utf-8')

        Args:
            body (bytes):
                The body of the prediction request in bytes. This must not
                exceed 10 mb per request.
            headers (Dict[str, str]):
                The header of the request as a dictionary. There are no
                restrictions on the header.
            endpoint_override (Optional[str]):
                The Private Service Connect endpoint's IP address or DNS that
                points to the endpoint's service attachment.

        Yields:
            predictions (Iterator[bytes]):
                The streaming prediction results as lines of bytes.

        Raises:
            ValueError: If a endpoint override is not provided for PSC based
                endpoint.
            ValueError: If a endpoint override is invalid for PSC based endpoint.
        """
        self.wait()
        if self.network or not self.private_service_connect_config:
            raise ValueError(
                "PSA based private endpoint does not support streaming prediction."
            )

        if self.private_service_connect_config:
            if not endpoint_override:
                raise ValueError(
                    "Cannot make a predict request because endpoint override is"
                    "not provided. Please ensure an endpoint override is"
                    "provided."
                )
            if not self._validate_endpoint_override(endpoint_override):
                raise ValueError(
                    "Invalid endpoint override provided. Please only use IP"
                    "address or DNS."
                )
            if not self.credentials.valid:
                self.credentials.refresh(google_auth_requests.Request())

            token = self.credentials.token
            headers_with_token = dict(headers)
            headers_with_token["Authorization"] = f"Bearer {token}"

            if not self.authorized_session:
                self.credentials._scopes = constants.base.DEFAULT_AUTHED_SCOPES
                self.authorized_session = google_auth_requests.AuthorizedSession(
                    self.credentials
                )

            url = f"https://{endpoint_override}/v1/projects/{self.project}/locations/{self.location}/endpoints/{self.name}:streamRawPredict"
            with self.authorized_session.post(
                url=url,
                data=body,
                headers=headers_with_token,
                stream=True,
                verify=False,
            ) as resp:
                for line in resp.iter_lines():
                    yield line

    def explain(self):
        raise NotImplementedError(
            f"{self.__class__.__name__} class does not support 'explain' as of now."
        )

    def health_check(self) -> bool:
        """
        Makes a request to this PrivateEndpoint's health check URI. Must be within network
        that this PrivateEndpoint is in.
        This is only supported by PSA based private endpoint.

        Example Usage:
            if my_private_endpoint.health_check():
                print("PrivateEndpoint is healthy!")

        Returns:
            bool:
                Checks if calls can be made to this PrivateEndpoint.

        Raises:
            RuntimeError: If a model has not been deployed a request cannot be made.
            RuntimeError: If the endpoint is PSC based private endpoint.
        """
        self.wait()
        self._sync_gca_resource_if_skipped()

        if self.private_service_connect_config:
            raise RuntimeError(
                "Health check request is not supported on PSC based Private Endpoint."
            )

        if not self._gca_resource.deployed_models:
            raise RuntimeError(
                "Cannot make a health check request because a model has not been deployed on this Private"
                "Endpoint. Please ensure a model has been deployed."
            )

        response = self._http_request(
            method="GET",
            url=self.health_http_uri,
        )

        return response.status < _SUCCESSFUL_HTTP_RESPONSE

    @classmethod
    def list(
        cls,
        filter: Optional[str] = None,
        order_by: Optional[str] = None,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ) -> List["models.PrivateEndpoint"]:
        """List all PrivateEndpoint resource instances.

        Example Usage:
            my_private_endpoints = aiplatform.PrivateEndpoint.list()

            or

            my_private_endpoints = aiplatform.PrivateEndpoint.list(
                filter='labels.my_label="my_label_value" OR display_name=!"old_endpoint"',
            )

        Args:
            filter (str):
                Optional. An expression for filtering the results of the request.
                For field names both snake_case and camelCase are supported.
            order_by (str):
                Optional. A comma-separated list of fields to order by, sorted in
                ascending order. Use "desc" after a field name for descending.
                Supported fields: `display_name`, `create_time`, `update_time`
            project (str):
                Optional. Project to retrieve list from. If not set, project
                set in aiplatform.init will be used.
            location (str):
                Optional. Location to retrieve list from. If not set, location
                set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to retrieve list. Overrides
                credentials set in aiplatform.init.

        Returns:
            List[models.PrivateEndpoint]:
                A list of PrivateEndpoint resource objects.
        """

        return cls._list_with_local_order(
            cls_filter=lambda ep: bool(ep.network)
            or bool(ep.private_service_connect_config),
            # Only PrivateEndpoints have a network or private_service_connect_config
            filter=filter,
            order_by=order_by,
            project=project,
            location=location,
            credentials=credentials,
        )

    def deploy(
        self,
        model: "Model",
        deployed_model_display_name: Optional[str] = None,
        machine_type: Optional[str] = None,
        min_replica_count: int = 1,
        max_replica_count: int = 1,
        accelerator_type: Optional[str] = None,
        accelerator_count: Optional[int] = None,
        tpu_topology: Optional[str] = None,
        service_account: Optional[str] = None,
        explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
        explanation_parameters: Optional[
            aiplatform.explain.ExplanationParameters
        ] = None,
        metadata: Optional[Sequence[Tuple[str, str]]] = (),
        sync=True,
        disable_container_logging: bool = False,
        traffic_percentage: Optional[int] = 0,
        traffic_split: Optional[Dict[str, int]] = None,
        reservation_affinity_type: Optional[str] = None,
        reservation_affinity_key: Optional[str] = None,
        reservation_affinity_values: Optional[List[str]] = None,
        spot: bool = False,
        system_labels: Optional[Dict[str, str]] = None,
        required_replica_count: Optional[int] = 0,
    ) -> None:
        """Deploys a Model to the PrivateEndpoint.

        Example Usage:
            PSA based private endpoint
            my_private_endpoint.deploy(
                model=my_model
            )

            PSC based private endpoint

            psc_endpoint.deploy(
                model=first_model,
            )
            psc_endpoint.deploy(
                model=second_model,
                traffic_percentage=50,
            )
            psc_endpoint.deploy(
                model=third_model,
                traffic_percentage={
                    'first_model_id': 40,
                    'second_model_id': 30,
                    'third_model_id': 30
                },
            )

        Args:
            model (aiplatform.Model):
                Required. Model to be deployed.
            deployed_model_display_name (str):
                Optional. The display name of the DeployedModel. If not provided
                upon creation, the Model's display_name is used.
            machine_type (str):
                Optional. The type of machine. Not specifying machine type will
                result in model to be deployed with automatic resources.
            min_replica_count (int):
                Optional. The minimum number of machine replicas this deployed
                model will be always deployed on. If traffic against it increases,
                it may dynamically be deployed onto more replicas, and as traffic
                decreases, some of these extra replicas may be freed.
            max_replica_count (int):
                Optional. The maximum number of replicas this deployed model may
                be deployed on when the traffic against it increases. If requested
                value is too large, the deployment will error, but if deployment
                succeeds then the ability to scale the model to that many replicas
                is guaranteed (barring service outages). If traffic against the
                deployed model increases beyond what its replicas at maximum may
                handle, a portion of the traffic will be dropped. If this value
                is not provided, the larger value of min_replica_count or 1 will
                be used. If value provided is smaller than min_replica_count, it
                will automatically be increased to be min_replica_count.
            accelerator_type (str):
                Optional. Hardware accelerator type. Must also set accelerator_count if used.
                One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
                NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
            accelerator_count (int):
                Optional. The number of accelerators to attach to a worker replica.
            tpu_topology (str):
                Optional. The TPU topology to use for the DeployedModel.
                Required for CloudTPU multihost deployments.
            service_account (str):
                The service account that the DeployedModel's container runs as. Specify the
                email address of the service account. If this service account is not
                specified, the container runs as a service account that doesn't have access
                to the resource project.
                Users deploying the Model must have the `iam.serviceAccounts.actAs`
                permission on this service account.
            explanation_metadata (aiplatform.explain.ExplanationMetadata):
                Optional. Metadata describing the Model's input and output for explanation.
                `explanation_metadata` is optional while `explanation_parameters` must be
                specified when used.
                For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
            explanation_parameters (aiplatform.explain.ExplanationParameters):
                Optional. Parameters to configure explaining for Model's predictions.
                For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
            traffic_percentage (int):
                Optional. Desired traffic to newly deployed model.
                Defaults to 0 if there are pre-existing deployed models.
                Defaults to 100 if there are no pre-existing deployed models.
                Defaults to 100 for PSA based private endpoint.
                Negative values should not be provided. Traffic of previously
                deployed models at the endpoint will be scaled down to
                accommodate new deployed model's traffic.
                Should not be provided if traffic_split is provided.
            traffic_split (Dict[str, int]):
                Optional. Only supported by PSC base private endpoint.
                A map from a DeployedModel's ID to the percentage of
                this Endpoint's traffic that should be forwarded to that DeployedModel.
                If a DeployedModel's ID is not listed in this map, then it receives
                no traffic. The traffic percentage values must add up to 100, or
                map must be empty if the Endpoint is to not accept any traffic at
                the moment. Key for model being deployed is "0". Should not be
                provided if traffic_percentage is provided.
            reservation_affinity_type (str):
                Optional. The type of reservation affinity.
                One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
                SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
            reservation_affinity_key (str):
                Optional. Corresponds to the label key of a reservation resource.
                To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
                and specify the name of your reservation as its value.
            reservation_affinity_values (List[str]):
                Optional. Corresponds to the label values of a reservation resource.
                This must be the full resource name of the reservation.
                Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
            spot (bool):
                Optional. Whether to schedule the deployment workload on spot VMs.
            system_labels (Dict[str, str]):
                Optional. System labels to apply to Model Garden deployments.
                System labels are managed by Google for internal use only.
            required_replica_count (int):
                Optional. Number of required available replicas for the
                deployment to succeed. This field is only needed when partial
                model deployment/mutation is desired, with a value greater than
                or equal to 1 and fewer than or equal to min_replica_count. If
                set, the model deploy/mutate operation will succeed once
                available_replica_count reaches required_replica_count, and the
                rest of the replicas will be retried.
        """

        if self.network:
            if traffic_split is not None:
                raise ValueError(
                    "Traffic split is not supported for PSA based PrivateEndpoint."
                )
            traffic_percentage = 100

        self._validate_deploy_args(
            min_replica_count=min_replica_count,
            max_replica_count=max_replica_count,
            accelerator_type=accelerator_type,
            deployed_model_display_name=deployed_model_display_name,
            traffic_split=traffic_split,
            traffic_percentage=traffic_percentage,
            deployment_resource_pool=None,
            required_replica_count=required_replica_count,
        )

        explanation_spec = _explanation_utils.create_and_validate_explanation_spec(
            explanation_metadata=explanation_metadata,
            explanation_parameters=explanation_parameters,
        )

        self._deploy(
            model=model,
            deployed_model_display_name=deployed_model_display_name,
            traffic_percentage=traffic_percentage,
            traffic_split=traffic_split,
            machine_type=machine_type,
            min_replica_count=min_replica_count,
            max_replica_count=max_replica_count,
            accelerator_type=accelerator_type,
            accelerator_count=accelerator_count,
            tpu_topology=tpu_topology,
            reservation_affinity_type=reservation_affinity_type,
            reservation_affinity_key=reservation_affinity_key,
            reservation_affinity_values=reservation_affinity_values,
            service_account=service_account,
            explanation_spec=explanation_spec,
            metadata=metadata,
            sync=sync,
            spot=spot,
            disable_container_logging=disable_container_logging,
            system_labels=system_labels,
            required_replica_count=required_replica_count,
        )

    def update(
        self,
        display_name: Optional[str] = None,
        description: Optional[str] = None,
        labels: Optional[Dict[str, str]] = None,
        traffic_split: Optional[Dict[str, int]] = None,
        request_metadata: Optional[Sequence[Tuple[str, str]]] = (),
        update_request_timeout: Optional[float] = None,
    ) -> "PrivateEndpoint":
        """Updates a PrivateEndpoint.

        Example usage:
            PSC based private endpoint

            my_endpoint = my_endpoint.update(
                display_name='my-updated-endpoint',
                description='my updated description',
                labels={'key': 'value'},
                traffic_split={
                    '123456': 20,
                    '234567': 80,
                },
            )

        Args:
            display_name (str):
                Optional. The display name of the Endpoint.
                The name can be up to 128 characters long and can be consist of any UTF-8
                characters.
            description (str):
                Optional. The description of the Endpoint.
            labels (Dict[str, str]):
                Optional. The labels with user-defined metadata to organize your Endpoints.
                Label keys and values can be no longer than 64 characters
                (Unicode codepoints), can only contain lowercase letters, numeric
                characters, underscores and dashes. International characters are allowed.
                See https://goo.gl/xmQnxf for more information and examples of labels.
            traffic_split (Dict[str, int]):
                Optional. Only supported by PSC based private endpoint
                A map from a DeployedModel's ID to the percentage of this Endpoint's
                traffic that should be forwarded to that DeployedModel.
                If a DeployedModel's ID is not listed in this map, then it receives no traffic.
                The traffic percentage values must add up to 100, or map must be empty if
                the Endpoint is to not accept any traffic at a moment.
            request_metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as metadata.
            update_request_timeout (float):
                Optional. The timeout for the update request in seconds.

        Returns:
            Endpoint (aiplatform.Prediction):
                Updated endpoint resource.

        Raises:
            ValueError: If `traffic_split` is set for PSA based private endpoint.
        """

        if self.network:
            if traffic_split is not None:
                raise ValueError(
                    "Traffic split is not supported for PSA based Private Endpoint."
                )

        super().update(
            display_name=display_name,
            description=description,
            labels=labels,
            traffic_split=traffic_split,
            request_metadata=request_metadata,
            update_request_timeout=update_request_timeout,
        )

        return self

    def undeploy(
        self,
        deployed_model_id: str,
        sync=True,
        traffic_split: Optional[Dict[str, int]] = None,
    ) -> None:
        """Undeploys a deployed model from the PrivateEndpoint.

        Example Usage:
            PSA based private endpoint:
            my_private_endpoint.undeploy(
                deployed_model_id="1234567891232567891"
            )

            or

            my_deployed_model_id = my_private_endpoint.list_models()[0].id
            my_private_endpoint.undeploy(
                deployed_model_id=my_deployed_model_id
            )

        Args:
            deployed_model_id (str):
                Required. The ID of the DeployedModel to be undeployed from the
                PrivateEndpoint. Use PrivateEndpoint.list_models() to get the
                deployed model ID.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
            traffic_split (Dict[str, int]):
                Optional. Only supported by PSC based private endpoint.
                A map of DeployedModel IDs to the percentage of this Endpoint's
                traffic that should be forwarded to that DeployedModel.
                Required if undeploying a model with non-zero traffic from an Endpoint
                with multiple deployed models. The traffic percentage values must
                add up to 100, or map must be empty if the Endpoint is to not
                accept any traffic at the moment. If a DeployedModel's ID is not
                listed in this map, then it receives no traffic.
        """
        self._sync_gca_resource_if_skipped()

        if self.network:
            if traffic_split is not None:
                raise ValueError(
                    "Traffic split is not supported for PSA based PrivateEndpoint."
                )
            # PSA based private endpoint
            self._undeploy(
                deployed_model_id=deployed_model_id,
                traffic_split=None,
                sync=sync,
            )

        # PSC based private endpoint
        if self.private_service_connect_config:
            super().undeploy(
                deployed_model_id=deployed_model_id,
                traffic_split=traffic_split,
                sync=sync,
            )

    def undeploy_all(self, sync: bool = True) -> "PrivateEndpoint":
        """Undeploys every model deployed to this PrivateEndpoint.

        Args:
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
        """
        if self.network:
            self._sync_gca_resource()
            # PSA based private endpoint
            self._undeploy(
                deployed_model_id=self._gca_resource.deployed_models[0].id,
                traffic_split=None,
                sync=sync,
            )

        if self.private_service_connect_config:
            # PSC based private endpoint
            super().undeploy_all(sync=sync)

        return self

    def delete(self, force: bool = False, sync: bool = True) -> None:
        """Deletes this Vertex AI PrivateEndpoint resource. If force is set to True,
        all models on this PrivateEndpoint will be undeployed prior to deletion.

        Args:
            force (bool):
                Required. If force is set to True, all deployed models on this
                Endpoint will be undeployed first. Default is False.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.

        Raises:
            FailedPrecondition: If models are deployed on this Endpoint and force = False.
        """
        if force and self._gca_resource.deployed_models:
            self.undeploy_all(sync=sync)

        super().delete(force=False, sync=sync)


class Model(base.VertexAiResourceNounWithFutureManager, base.PreviewMixin):
    client_class = utils.ModelClientWithOverride
    _resource_noun = "models"
    _getter_method = "get_model"
    _list_method = "list_models"
    _delete_method = "delete_model"
    _parse_resource_name_method = "parse_model_path"
    _format_resource_name_method = "model_path"
    _preview_class = "google.cloud.aiplatform.aiplatform.preview.models.Model"

    @property
    def preview(self):
        """Return a Model instance with preview features enabled."""
        from google.cloud.aiplatform.preview import models as preview_models

        if not hasattr(self, "_preview_instance"):
            self._preview_instance = preview_models.Model(
                self.resource_name, credentials=self.credentials
            )

        return self._preview_instance

    @property
    def uri(self) -> Optional[str]:
        """Path to the directory containing the Model artifact and any of its
        supporting files. Not present for AutoML Models."""
        self._assert_gca_resource_is_available()
        return self._gca_resource.artifact_uri or None

    @property
    def description(self) -> str:
        """Description of the model."""
        self._assert_gca_resource_is_available()
        return self._gca_resource.description

    @property
    def supported_export_formats(
        self,
    ) -> Dict[str, List[gca_model_compat.Model.ExportFormat.ExportableContent]]:
        """The formats and content types in which this Model may be exported.
        If empty, this Model is not available for export.

        For example, if this model can be exported as a Tensorflow SavedModel and
        have the artifacts written to Cloud Storage, the expected value would be:

            {'tf-saved-model': [<ExportableContent.ARTIFACT: 1>]}
        """
        self._assert_gca_resource_is_available()
        return {
            export_format.id: [
                gca_model_compat.Model.ExportFormat.ExportableContent(content)
                for content in export_format.exportable_contents
            ]
            for export_format in self._gca_resource.supported_export_formats
        }

    @property
    def supported_deployment_resources_types(
        self,
    ) -> List[model_v1.Model.DeploymentResourcesType]:
        """List of deployment resource types accepted for this Model.

        When this Model is deployed, its prediction resources are described by
        the `prediction_resources` field of the objects returned by
        `Endpoint.list_models()`. Because not all Models support all resource
        configuration types, the configuration types this Model supports are
        listed here.

        If no configuration types are listed, the Model cannot be
        deployed to an `Endpoint` and does not support online predictions
        (`Endpoint.predict()` or `Endpoint.explain()`). Such a Model can serve
        predictions by using a `BatchPredictionJob`, if it has at least one entry
        each in `Model.supported_input_storage_formats` and
        `Model.supported_output_storage_formats`."""
        self._assert_gca_resource_is_available()
        return list(self._gca_resource.supported_deployment_resources_types)

    @property
    def supported_input_storage_formats(self) -> List[str]:
        """The formats this Model supports in the `input_config` field of a
        `BatchPredictionJob`. If `Model.predict_schemata.instance_schema_uri`
        exists, the instances should be given as per that schema.

        [Read the docs for more on batch prediction formats](https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions#batch_request_input)

        If this Model doesn't support any of these formats it means it cannot be
        used with a `BatchPredictionJob`. However, if it has
        `supported_deployment_resources_types`, it could serve online predictions
        by using `Endpoint.predict()` or `Endpoint.explain()`.
        """
        self._assert_gca_resource_is_available()
        return list(self._gca_resource.supported_input_storage_formats)

    @property
    def supported_output_storage_formats(self) -> List[str]:
        """The formats this Model supports in the `output_config` field of a
        `BatchPredictionJob`.

        If both `Model.predict_schemata.instance_schema_uri` and
        `Model.predict_schemata.prediction_schema_uri` exist, the predictions
        are returned together with their instances. In other words, the
        prediction has the original instance data first, followed by the actual
        prediction content (as per the schema).

        [Read the docs for more on batch prediction formats](https://cloud.google.com/vertex-ai/docs/predictions/batch-predictions)

        If this Model doesn't support any of these formats it means it cannot be
        used with a `BatchPredictionJob`. However, if it has
        `supported_deployment_resources_types`, it could serve online predictions
        by using `Endpoint.predict()` or `Endpoint.explain()`.
        """
        self._assert_gca_resource_is_available()
        return list(self._gca_resource.supported_output_storage_formats)

    @property
    def predict_schemata(self) -> Optional[model_v1.PredictSchemata]:
        """The schemata that describe formats of the Model's predictions and
        explanations, if available."""
        self._assert_gca_resource_is_available()
        return getattr(self._gca_resource, "predict_schemata")

    @property
    def training_job(self) -> Optional["aiplatform.training_jobs._TrainingJob"]:
        """The TrainingJob that uploaded this Model, if any.

        Raises:
            api_core.exceptions.NotFound: If the Model's training job resource
                cannot be found on the Vertex service.
        """
        self._assert_gca_resource_is_available()
        job_name = getattr(self._gca_resource, "training_pipeline")

        if not job_name:
            return None

        try:
            return aiplatform.training_jobs._TrainingJob._get_and_return_subclass(
                resource_name=job_name,
                project=self.project,
                location=self.location,
                credentials=self.credentials,
            )
        except api_exceptions.NotFound as exc:
            raise api_exceptions.NotFound(
                f"The training job used to create this model could not be found: {job_name}"
            ) from exc

    @property
    def container_spec(self) -> Optional[model_v1.ModelContainerSpec]:
        """The specification of the container that is to be used when deploying
        this Model. Not present for AutoML Models."""
        self._assert_gca_resource_is_available()
        return getattr(self._gca_resource, "container_spec")

    @property
    def version_id(self) -> str:
        """The version ID of the model.
        A new version is committed when a new model version is uploaded or
        trained under an existing model id. It is an auto-incrementing decimal
        number in string representation."""
        self._assert_gca_resource_is_available()
        return getattr(self._gca_resource, "version_id")

    @property
    def version_aliases(self) -> Sequence[str]:
        """User provided version aliases so that a model version can be referenced via
        alias (i.e. projects/{project}/locations/{location}/models/{model_id}@{version_alias}
        instead of auto-generated version id (i.e.
        projects/{project}/locations/{location}/models/{model_id}@{version_id}).
        The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9] to distinguish from
        version_id. A default version alias will be created for the first version
        of the model, and there must be exactly one default version alias for a model.
        """
        self._assert_gca_resource_is_available()
        return getattr(self._gca_resource, "version_aliases")

    @property
    def version_create_time(self) -> timestamp_pb2.Timestamp:
        """Timestamp when this version was created."""
        self._assert_gca_resource_is_available()
        return getattr(self._gca_resource, "version_create_time")

    @property
    def version_update_time(self) -> timestamp_pb2.Timestamp:
        """Timestamp when this version was updated."""
        self._assert_gca_resource_is_available()
        return getattr(self._gca_resource, "version_update_time")

    @property
    def version_description(self) -> str:
        """The description of this version."""
        self._assert_gca_resource_is_available()
        return getattr(self._gca_resource, "version_description")

    @property
    def resource_name(self) -> str:
        """Full qualified resource name, without any version ID."""
        self._assert_gca_resource_is_available()
        return ModelRegistry._parse_versioned_name(self._gca_resource.name)[0]

    @property
    def name(self) -> str:
        """Name of this resource."""
        self._assert_gca_resource_is_available()
        return ModelRegistry._parse_versioned_name(super().name)[0]

    @property
    def versioned_resource_name(self) -> str:
        """The fully-qualified resource name, including the version ID. For example,
        projects/{project}/locations/{location}/models/{model_id}@{version_id}
        """
        self._assert_gca_resource_is_available()
        return ModelRegistry._get_versioned_name(
            self.resource_name,
            self.version_id,
        )

    @property
    def versioning_registry(self) -> "ModelRegistry":
        """The registry of model versions associated with this
        Model instance."""
        return self._registry

    def __init__(
        self,
        model_name: str,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
        version: Optional[str] = None,
    ):
        """Retrieves the model resource and instantiates its representation.

        Args:
            model_name (str):
                Required. A fully-qualified model resource name or model ID.
                Example: "projects/123/locations/us-central1/models/456" or
                "456" when project and location are initialized or passed.
                May optionally contain a version ID or version alias in
                {model_name}@{version} form. See version arg.
            project (str):
                Optional project to retrieve model from. If not set, project
                set in aiplatform.init will be used.
            location (str):
                Optional location to retrieve model from. If not set, location
                set in aiplatform.init will be used.
            credentials: Optional[auth_credentials.Credentials]=None,
                Custom credentials to use to upload this model. If not set,
                credentials set in aiplatform.init will be used.
            version (str):
                Optional. Version ID or version alias.
                When set, the specified model version will be targeted
                unless overridden in method calls.
                When not set, the model with the "default" alias will
                be targeted unless overridden in method calls.
                No behavior change if only one version of a model exists.
        Raises:
            ValueError: If `version` is passed alongside a model_name referencing a different version.
        """
        # If the version was passed in model_name, parse it
        model_name, parsed_version = ModelRegistry._parse_versioned_name(model_name)
        if parsed_version:
            if version and version != parsed_version:
                raise ValueError(
                    f"A version of {version} was passed that conflicts with the version of {parsed_version} in the model_name."
                )
            version = parsed_version

        super().__init__(
            project=project,
            location=location,
            credentials=credentials,
            resource_name=model_name,
        )

        # Model versions can include @{version} in the resource name.
        self._resource_id_validator = super()._revisioned_resource_id_validator

        # Create a versioned model_name, if it exists, for getting the GCA model
        versioned_model_name = ModelRegistry._get_versioned_name(model_name, version)
        self._gca_resource = self._get_gca_resource(resource_name=versioned_model_name)

        # Create ModelRegistry with the unversioned resource name
        self._registry = ModelRegistry(
            self.resource_name,
            location=location,
            project=project,
            credentials=credentials,
        )

    def update(
        self,
        display_name: Optional[str] = None,
        description: Optional[str] = None,
        labels: Optional[Dict[str, str]] = None,
    ) -> "Model":
        """Updates a model.

        Example usage:
            my_model = my_model.update(
                display_name="my-model",
                description="my description",
                labels={'key': 'value'},
            )

        Args:
            display_name (str):
                The display name of the Model. The name can be up to 128
                characters long and can be consist of any UTF-8 characters.
            description (str):
                The description of the model.
            labels (Dict[str, str]):
                Optional. The labels with user-defined metadata to
                organize your Models.
                Label keys and values can be no longer than 64
                characters (Unicode codepoints), can only
                contain lowercase letters, numeric characters,
                underscores and dashes. International characters
                are allowed.
                See https://goo.gl/xmQnxf for more information
                and examples of labels.

        Returns:
            model (aiplatform.Model):
                Updated model resource.

        Raises:
            ValueError: If `labels` is not the correct format.
        """

        self.wait()

        current_model_proto = self.gca_resource
        copied_model_proto = current_model_proto.__class__(current_model_proto)

        update_mask: List[str] = []

        # Updates to base model properties cannot occur if a versioned model is passed.
        # Use the unversioned model resource name.
        copied_model_proto.name = self.resource_name

        if display_name:
            utils.validate_display_name(display_name)

            copied_model_proto.display_name = display_name
            update_mask.append("display_name")

        if description:
            copied_model_proto.description = description
            update_mask.append("description")

        if labels:
            utils.validate_labels(labels)

            copied_model_proto.labels = labels
            update_mask.append("labels")

        update_mask = field_mask_pb2.FieldMask(paths=update_mask)

        self.api_client.update_model(model=copied_model_proto, update_mask=update_mask)

        self._sync_gca_resource()

        return self

    # TODO(b/170979926) Add support for metadata and metadata schema
    @classmethod
    @base.optional_sync()
    def upload(
        cls,
        serving_container_image_uri: Optional[str] = None,
        *,
        artifact_uri: Optional[str] = None,
        model_id: Optional[str] = None,
        parent_model: Optional[str] = None,
        is_default_version: bool = True,
        version_aliases: Optional[Sequence[str]] = None,
        version_description: Optional[str] = None,
        serving_container_predict_route: Optional[str] = None,
        serving_container_health_route: Optional[str] = None,
        description: Optional[str] = None,
        serving_container_command: Optional[Sequence[str]] = None,
        serving_container_args: Optional[Sequence[str]] = None,
        serving_container_environment_variables: Optional[Dict[str, str]] = None,
        serving_container_ports: Optional[Sequence[int]] = None,
        serving_container_grpc_ports: Optional[Sequence[int]] = None,
        local_model: Optional["LocalModel"] = None,
        instance_schema_uri: Optional[str] = None,
        parameters_schema_uri: Optional[str] = None,
        prediction_schema_uri: Optional[str] = None,
        explanation_metadata: Optional[explain.ExplanationMetadata] = None,
        explanation_parameters: Optional[explain.ExplanationParameters] = None,
        display_name: Optional[str] = None,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
        labels: Optional[Dict[str, str]] = None,
        encryption_spec_key_name: Optional[str] = None,
        staging_bucket: Optional[str] = None,
        sync=True,
        upload_request_timeout: Optional[float] = None,
        serving_container_deployment_timeout: Optional[int] = None,
        serving_container_shared_memory_size_mb: Optional[int] = None,
        serving_container_startup_probe_exec: Optional[Sequence[str]] = None,
        serving_container_startup_probe_period_seconds: Optional[int] = None,
        serving_container_startup_probe_timeout_seconds: Optional[int] = None,
        serving_container_health_probe_exec: Optional[Sequence[str]] = None,
        serving_container_health_probe_period_seconds: Optional[int] = None,
        serving_container_health_probe_timeout_seconds: Optional[int] = None,
        model_garden_source_model_name: Optional[str] = None,
    ) -> "Model":
        """Uploads a model and returns a Model representing the uploaded Model
        resource.

        Example usage:
            my_model = Model.upload(
                display_name="my-model",
                artifact_uri="gs://my-model/saved-model",
                serving_container_image_uri="tensorflow/serving"
            )

        Args:
            serving_container_image_uri (str):
                Optional. The URI of the Model serving container. This parameter is required
                if the parameter `local_model` is not specified.
            artifact_uri (str):
                Optional. The path to the directory containing the Model artifact and
                any of its supporting files. Leave blank for custom container prediction.
                Not present for AutoML Models.
            model_id (str):
                Optional. The ID to use for the uploaded Model, which will
                become the final component of the model resource name.
                This value may be up to 63 characters, and valid characters
                are `[a-z0-9_-]`. The first character cannot be a number or hyphen.
            parent_model (str):
                Optional. The resource name or model ID of an existing model that the
                newly-uploaded model will be a version of.

                Only set this field when uploading a new version of an existing model.
            is_default_version (bool):
                Optional. When set to True, the newly uploaded model version will
                automatically have alias "default" included. Subsequent uses of
                this model without a version specified will use this "default" version.

                When set to False, the "default" alias will not be moved.
                Actions targeting the newly-uploaded model version will need
                to specifically reference this version by ID or alias.

                New model uploads, i.e. version 1, will always be "default" aliased.
            version_aliases (Sequence[str]):
                Optional. User provided version aliases so that a model version
                can be referenced via alias instead of auto-generated version ID.
                A default version alias will be created for the first version of the model.

                The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9]
            version_description (str):
                Optional. The description of the model version being uploaded.
            serving_container_predict_route (str):
                Optional. An HTTP path to send prediction requests to the container, and
                which must be supported by it. If not specified a default HTTP path will
                be used by Vertex AI.
            serving_container_health_route (str):
                Optional. An HTTP path to send health check requests to the container, and which
                must be supported by it. If not specified a standard HTTP path will be
                used by Vertex AI.
            description (str):
                The description of the model.
            serving_container_command: Optional[Sequence[str]]=None,
                The command with which the container is run. Not executed within a
                shell. The Docker image's ENTRYPOINT is used if this is not provided.
                Variable references $(VAR_NAME) are expanded using the container's
                environment. If a variable cannot be resolved, the reference in the
                input string will be unchanged. The $(VAR_NAME) syntax can be escaped
                with a double $$, ie: $$(VAR_NAME). Escaped references will never be
                expanded, regardless of whether the variable exists or not.
            serving_container_args: Optional[Sequence[str]]=None,
                The arguments to the command. The Docker image's CMD is used if this is
                not provided. Variable references $(VAR_NAME) are expanded using the
                container's environment. If a variable cannot be resolved, the reference
                in the input string will be unchanged. The $(VAR_NAME) syntax can be
                escaped with a double $$, ie: $$(VAR_NAME). Escaped references will
                never be expanded, regardless of whether the variable exists or not.
            serving_container_environment_variables: Optional[Dict[str, str]]=None,
                The environment variables that are to be present in the container.
                Should be a dictionary where keys are environment variable names
                and values are environment variable values for those names.
            serving_container_ports: Optional[Sequence[int]]=None,
                Declaration of ports that are exposed by the container. This field is
                primarily informational, it gives Vertex AI information about the
                network connections the container uses. Listing or not a port here has
                no impact on whether the port is actually exposed, any port listening on
                the default "0.0.0.0" address inside a container will be accessible from
                the network.
            serving_container_grpc_ports: Optional[Sequence[int]]=None,
                Declaration of ports that are exposed by the container. Vertex AI sends gRPC
                prediction requests that it receives to the first port on this list. Vertex
                AI also sends liveness and health checks to this port.
                If you do not specify this field, gRPC requests to the container will be
                disabled.
                Vertex AI does not use ports other than the first one listed. This field
                corresponds to the `ports` field of the Kubernetes Containers v1 core API.
            local_model (Optional[LocalModel]):
                Optional. A LocalModel instance that includes a `serving_container_spec`.
                If provided, the `serving_container_spec` of the LocalModel instance
                will overwrite the values of all other serving container parameters.
            instance_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the format of a single instance, which
                are used in
                ``PredictRequest.instances``,
                ``ExplainRequest.instances``
                and
                ``BatchPredictionJob.input_config``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform. Note: The URI given on output will be immutable
                and probably different, including the URI scheme, than the
                one given on input. The output URI will point to a location
                where the user only has a read access.
            parameters_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the parameters of prediction and
                explanation via
                ``PredictRequest.parameters``,
                ``ExplainRequest.parameters``
                and
                ``BatchPredictionJob.model_parameters``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform, if no parameters are supported it is set to an
                empty string. Note: The URI given on output will be
                immutable and probably different, including the URI scheme,
                than the one given on input. The output URI will point to a
                location where the user only has a read access.
            prediction_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the format of a single prediction
                produced by this Model, which are returned via
                ``PredictResponse.predictions``,
                ``ExplainResponse.explanations``,
                and
                ``BatchPredictionJob.output_config``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform. Note: The URI given on output will be immutable
                and probably different, including the URI scheme, than the
                one given on input. The output URI will point to a location
                where the user only has a read access.
            explanation_metadata (aiplatform.explain.ExplanationMetadata):
                Optional. Metadata describing the Model's input and output for explanation.
                `explanation_metadata` is optional while `explanation_parameters` must be
                specified when used.
                For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
            explanation_parameters (aiplatform.explain.ExplanationParameters):
                Optional. Parameters to configure explaining for Model's predictions.
                For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
            display_name (str):
                Optional. The display name of the Model. The name can be up to 128
                characters long and can be consist of any UTF-8 characters.
            project: Optional[str]=None,
                Project to upload this model to. Overrides project set in
                aiplatform.init.
            location: Optional[str]=None,
                Location to upload this model to. Overrides location set in
                aiplatform.init.
            credentials: Optional[auth_credentials.Credentials]=None,
                Custom credentials to use to upload this model. Overrides credentials
                set in aiplatform.init.
            labels (Dict[str, str]):
                Optional. The labels with user-defined metadata to
                organize your Models.
                Label keys and values can be no longer than 64
                characters (Unicode codepoints), can only
                contain lowercase letters, numeric characters,
                underscores and dashes. International characters
                are allowed.
                See https://goo.gl/xmQnxf for more information
                and examples of labels.
            encryption_spec_key_name (Optional[str]):
                Optional. The Cloud KMS resource identifier of the customer
                managed encryption key used to protect the model. Has the
                form:
                ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
                The key needs to be in the same region as where the compute
                resource is created.

                If set, this Model and all sub-resources of this Model will be secured by this key.

                Overrides encryption_spec_key_name set in aiplatform.init.
            staging_bucket (str):
                Optional. Bucket to stage local model artifacts. Overrides
                staging_bucket set in aiplatform.init.
            upload_request_timeout (float):
                Optional. The timeout for the upload request in seconds.
            serving_container_deployment_timeout (int):
                Optional. Deployment timeout in seconds.
            serving_container_shared_memory_size_mb (int):
                Optional. The amount of the VM memory to reserve as the shared
                memory for the model in megabytes.
            serving_container_startup_probe_exec (Sequence[str]):
                Optional. Exec specifies the action to take. Used by startup
                probe. An example of this argument would be
                ["cat", "/tmp/healthy"]
            serving_container_startup_probe_period_seconds (int):
                Optional. How often (in seconds) to perform the startup probe.
                Default to 10 seconds. Minimum value is 1.
            serving_container_startup_probe_timeout_seconds (int):
                Optional. Number of seconds after which the startup probe times
                out. Defaults to 1 second. Minimum value is 1.
            serving_container_health_probe_exec (Sequence[str]):
                Optional. Exec specifies the action to take. Used by health
                probe. An example of this argument would be
                ["cat", "/tmp/healthy"]
            serving_container_health_probe_period_seconds (int):
                Optional. How often (in seconds) to perform the health probe.
                Default to 10 seconds. Minimum value is 1.
            serving_container_health_probe_timeout_seconds (int):
                Optional. Number of seconds after which the health probe times
                out. Defaults to 1 second. Minimum value is 1.
            model_garden_source_model_name:
                Optional. The model garden source model resource name if the
                model is from Vertex Model Garden.


        Returns:
            model (aiplatform.Model):
                Instantiated representation of the uploaded model resource.

        Raises:
            ValueError: If explanation_metadata is specified while explanation_parameters
                is not.

                Also if model directory does not contain a supported model file.
                If `local_model` is specified but `serving_container_spec.image_uri`
                in the `local_model` is None.
                If `local_model` is not specified and `serving_container_image_uri`
                is None.
        """
        if not display_name:
            display_name = cls._generate_display_name()
        utils.validate_display_name(display_name)
        if labels:
            utils.validate_labels(labels)

        appended_user_agent = None
        if local_model:
            container_spec = local_model.get_serving_container_spec()
            appended_user_agent = [prediction_constants.CUSTOM_PREDICTION_ROUTINES]
        elif not serving_container_image_uri and not artifact_uri:
            # It's a referenced/place holder model.
            container_spec = None
        else:
            if not serving_container_image_uri:
                raise ValueError(
                    "The parameter `serving_container_image_uri` is required "
                    "if no `local_model` is provided."
                )

            env = None
            ports = None
            grpc_ports = None
            deployment_timeout = (
                duration_pb2.Duration(seconds=serving_container_deployment_timeout)
                if serving_container_deployment_timeout
                else None
            )
            startup_probe = None
            health_probe = None

            if serving_container_environment_variables:
                env = [
                    gca_env_var_compat.EnvVar(name=str(key), value=str(value))
                    for key, value in serving_container_environment_variables.items()
                ]
            if serving_container_ports:
                ports = [
                    gca_model_compat.Port(container_port=port)
                    for port in serving_container_ports
                ]
            if serving_container_grpc_ports:
                grpc_ports = [
                    gca_model_compat.Port(container_port=port)
                    for port in serving_container_grpc_ports
                ]
            if (
                serving_container_startup_probe_exec
                or serving_container_startup_probe_period_seconds
                or serving_container_startup_probe_timeout_seconds
            ):
                startup_probe_exec = None
                if serving_container_startup_probe_exec:
                    startup_probe_exec = gca_model_compat.Probe.ExecAction(
                        command=serving_container_startup_probe_exec
                    )
                startup_probe = gca_model_compat.Probe(
                    exec=startup_probe_exec,
                    period_seconds=serving_container_startup_probe_period_seconds,
                    timeout_seconds=serving_container_startup_probe_timeout_seconds,
                )
            if (
                serving_container_health_probe_exec
                or serving_container_health_probe_period_seconds
                or serving_container_health_probe_timeout_seconds
            ):
                health_probe_exec = None
                if serving_container_health_probe_exec:
                    health_probe_exec = gca_model_compat.Probe.ExecAction(
                        command=serving_container_health_probe_exec
                    )
                health_probe = gca_model_compat.Probe(
                    exec=health_probe_exec,
                    period_seconds=serving_container_health_probe_period_seconds,
                    timeout_seconds=serving_container_health_probe_timeout_seconds,
                )

            container_spec = gca_model_compat.ModelContainerSpec(
                image_uri=serving_container_image_uri,
                command=serving_container_command,
                args=serving_container_args,
                env=env,
                ports=ports,
                grpc_ports=grpc_ports,
                predict_route=serving_container_predict_route,
                health_route=serving_container_health_route,
                deployment_timeout=deployment_timeout,
                shared_memory_size_mb=serving_container_shared_memory_size_mb,
                startup_probe=startup_probe,
                health_probe=health_probe,
            )

        model_predict_schemata = None
        if any([instance_schema_uri, parameters_schema_uri, prediction_schema_uri]):
            model_predict_schemata = gca_model_compat.PredictSchemata(
                instance_schema_uri=instance_schema_uri,
                parameters_schema_uri=parameters_schema_uri,
                prediction_schema_uri=prediction_schema_uri,
            )

        # TODO(b/182388545) initializer.global_config.get_encryption_spec from a sync function
        encryption_spec = initializer.global_config.get_encryption_spec(
            encryption_spec_key_name=encryption_spec_key_name,
        )

        parent_model = ModelRegistry._get_true_version_parent(
            location=location, project=project, parent_model=parent_model
        )

        version_aliases = ModelRegistry._get_true_alias_list(
            version_aliases=version_aliases, is_default_version=is_default_version
        )

        base_model_source = None
        if model_garden_source_model_name:
            base_model_source = gca_model_compat.Model.BaseModelSource(
                model_garden_source=gca_model_compat.ModelGardenSource(
                    public_model_name=model_garden_source_model_name
                )
            )

        managed_model = gca_model_compat.Model(
            display_name=display_name,
            description=description,
            version_aliases=version_aliases,
            version_description=version_description,
            container_spec=container_spec,
            predict_schemata=model_predict_schemata,
            labels=labels,
            encryption_spec=encryption_spec,
            base_model_source=base_model_source,
        )

        if artifact_uri and not artifact_uri.startswith("gs://"):
            model_dir = pathlib.Path(artifact_uri)
            # Validating the model directory
            if not model_dir.exists():
                raise ValueError(f"artifact_uri path does not exist: '{artifact_uri}'")
            PREBUILT_IMAGE_RE = "(us|europe|asia)-docker.pkg.dev/vertex-ai/prediction/"
            if serving_container_image_uri and re.match(
                PREBUILT_IMAGE_RE, serving_container_image_uri
            ):
                if not model_dir.is_dir():
                    raise ValueError(
                        f"artifact_uri path must be a directory: '{artifact_uri}' when using prebuilt image '{serving_container_image_uri}'"
                    )
                if not any(
                    (model_dir / file_name).exists()
                    for file_name in _SUPPORTED_MODEL_FILE_NAMES
                ):
                    raise ValueError(
                        "artifact_uri directory does not contain any supported model files. "
                        f"When using a prebuilt serving image, the upload method only supports the following model files: '{_SUPPORTED_MODEL_FILE_NAMES}'"
                    )

            # Uploading the model
            staged_data_uri = gcs_utils.stage_local_data_in_gcs(
                data_path=str(model_dir),
                staging_gcs_dir=staging_bucket,
                project=project,
                location=location,
                credentials=credentials,
            )
            artifact_uri = staged_data_uri

        if artifact_uri:
            managed_model.artifact_uri = artifact_uri

        managed_model.explanation_spec = (
            _explanation_utils.create_and_validate_explanation_spec(
                explanation_metadata=explanation_metadata,
                explanation_parameters=explanation_parameters,
            )
        )

        request = gca_model_service_compat.UploadModelRequest(
            parent=initializer.global_config.common_location_path(project, location),
            model=managed_model,
            parent_model=parent_model,
            model_id=model_id,
        )

        api_client = cls._instantiate_client(
            location, credentials, appended_user_agent=appended_user_agent
        )

        lro = api_client.upload_model(
            request=request,
            timeout=upload_request_timeout,
        )

        _LOGGER.log_create_with_lro(cls, lro)

        model_upload_response = lro.result()

        this_model = cls(
            model_upload_response.model, version=model_upload_response.model_version_id
        )

        _LOGGER.log_create_complete(cls, this_model._gca_resource, "model")

        return this_model

    def deploy(
        self,
        endpoint: Optional[Union["Endpoint", "PrivateEndpoint"]] = None,
        deployed_model_display_name: Optional[str] = None,
        traffic_percentage: Optional[int] = 0,
        traffic_split: Optional[Dict[str, int]] = None,
        machine_type: Optional[str] = None,
        min_replica_count: int = 1,
        max_replica_count: int = 1,
        accelerator_type: Optional[str] = None,
        accelerator_count: Optional[int] = None,
        tpu_topology: Optional[str] = None,
        service_account: Optional[str] = None,
        explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
        explanation_parameters: Optional[
            aiplatform.explain.ExplanationParameters
        ] = None,
        metadata: Optional[Sequence[Tuple[str, str]]] = (),
        encryption_spec_key_name: Optional[str] = None,
        network: Optional[str] = None,
        sync=True,
        deploy_request_timeout: Optional[float] = None,
        autoscaling_target_cpu_utilization: Optional[int] = None,
        autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
        enable_access_logging=False,
        disable_container_logging: bool = False,
        private_service_connect_config: Optional[
            PrivateEndpoint.PrivateServiceConnectConfig
        ] = None,
        deployment_resource_pool: Optional[DeploymentResourcePool] = None,
        reservation_affinity_type: Optional[str] = None,
        reservation_affinity_key: Optional[str] = None,
        reservation_affinity_values: Optional[List[str]] = None,
        spot: bool = False,
        fast_tryout_enabled: bool = False,
        system_labels: Optional[Dict[str, str]] = None,
        required_replica_count: Optional[int] = 0,
    ) -> Union[Endpoint, PrivateEndpoint]:
        """Deploys model to endpoint. Endpoint will be created if unspecified.

        Args:
            endpoint (Union[Endpoint, PrivateEndpoint]):
                Optional. Public or private Endpoint to deploy model to. If not specified,
                endpoint display name will be model display name+'_endpoint'.
            deployed_model_display_name (str):
                Optional. The display name of the DeployedModel. If not provided
                upon creation, the Model's display_name is used.
            traffic_percentage (int):
                Optional. Desired traffic to newly deployed model. Defaults to
                0 if there are pre-existing deployed models. Defaults to 100 if
                there are no pre-existing deployed models. Negative values should
                not be provided. Traffic of previously deployed models at the endpoint
                will be scaled down to accommodate new deployed model's traffic.
                Should not be provided if traffic_split is provided.
            traffic_split (Dict[str, int]):
                Optional. A map from a DeployedModel's ID to the percentage of
                this Endpoint's traffic that should be forwarded to that DeployedModel.
                If a DeployedModel's ID is not listed in this map, then it receives
                no traffic. The traffic percentage values must add up to 100, or
                map must be empty if the Endpoint is to not accept any traffic at
                the moment. Key for model being deployed is "0". Should not be
                provided if traffic_percentage is provided.
            machine_type (str):
                Optional. The type of machine. Not specifying machine type will
                result in model to be deployed with automatic resources.
            min_replica_count (int):
                Optional. The minimum number of machine replicas this deployed
                model will be always deployed on. If traffic against it increases,
                it may dynamically be deployed onto more replicas, and as traffic
                decreases, some of these extra replicas may be freed.
            max_replica_count (int):
                Optional. The maximum number of replicas this deployed model may
                be deployed on when the traffic against it increases. If requested
                value is too large, the deployment will error, but if deployment
                succeeds then the ability to scale the model to that many replicas
                is guaranteed (barring service outages). If traffic against the
                deployed model increases beyond what its replicas at maximum may
                handle, a portion of the traffic will be dropped. If this value
                is not provided, the smaller value of min_replica_count or 1 will
                be used.
            accelerator_type (str):
                Optional. Hardware accelerator type. Must also set accelerator_count if used.
                One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
                NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
            accelerator_count (int):
                Optional. The number of accelerators to attach to a worker replica.
            tpu_topology (str):
                Optional. The TPU topology to use for the DeployedModel.
                Requireid for CloudTPU multihost deployments.
            service_account (str):
                The service account that the DeployedModel's container runs as. Specify the
                email address of the service account. If this service account is not
                specified, the container runs as a service account that doesn't have access
                to the resource project.
                Users deploying the Model must have the `iam.serviceAccounts.actAs`
                permission on this service account.
            explanation_metadata (aiplatform.explain.ExplanationMetadata):
                Optional. Metadata describing the Model's input and output for explanation.
                `explanation_metadata` is optional while `explanation_parameters` must be
                specified when used.
                For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
            explanation_parameters (aiplatform.explain.ExplanationParameters):
                Optional. Parameters to configure explaining for Model's predictions.
                For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
            encryption_spec_key_name (Optional[str]):
                Optional. The Cloud KMS resource identifier of the customer
                managed encryption key used to protect the model. Has the
                form:
                ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
                The key needs to be in the same region as where the compute
                resource is created.

                If set, this Endpoint and all sub-resources of this Endpoint will be secured by this key.

                Overrides encryption_spec_key_name set in aiplatform.init.
            network (str):
                Optional. The full name of the Compute Engine network to which
                the Endpoint, if created, will be peered to. E.g. "projects/12345/global/networks/myVPC"
                Private services access must already be configured for the network.
                If set or aiplatform.init(network=...) has been set, a PrivateEndpoint will be created.
                If left unspecified, an Endpoint will be created. Read more about PrivateEndpoints
                [in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints).
                Cannot be set together with private_service_connect_config.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
            deploy_request_timeout (float):
                Optional. The timeout for the deploy request in seconds.
            autoscaling_target_cpu_utilization (int):
                Optional. Target CPU Utilization to use for Autoscaling Replicas.
                A default value of 60 will be used if not specified.
            autoscaling_target_accelerator_duty_cycle (int):
                Optional. Target Accelerator Duty Cycle.
                Must also set accelerator_type and accelerator_count if specified.
                A default value of 60 will be used if not specified.
            enable_access_logging (bool):
                Whether to enable endpoint access logging. Defaults to False.
            disable_container_logging (bool):
                If True, container logs from the deployed model will not be
                written to Cloud Logging. Defaults to False.
            private_service_connect_config (PrivateEndpoint.PrivateServiceConnectConfig):
                If true, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect).
                Cannot be set together with network.
            deployment_resource_pool (DeploymentResourcePool):
                Resource pool where the model will be deployed. All models that
                are deployed to the same DeploymentResourcePool will be hosted in
                a shared model server. If provided, will override replica count
                arguments.
            reservation_affinity_type (str):
                Optional. The type of reservation affinity.
                One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
                SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
            reservation_affinity_key (str):
                Optional. Corresponds to the label key of a reservation resource.
                To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
                and specify the name of your reservation as its value.
            reservation_affinity_values (List[str]):
                Optional. Corresponds to the label values of a reservation resource.
                This must be the full resource name of the reservation.
                Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
            spot (bool):
                Optional. Whether to schedule the deployment workload on spot VMs.
            fast_tryout_enabled (bool):
              Optional. Defaults to False.
              If True, model will be deployed using faster deployment path.
              Useful for quick experiments. Not for production workloads. Only
              available for most popular models with certain machine types.
            system_labels (Dict[str, str]):
                Optional. System labels to apply to Model Garden deployments.
                System labels are managed by Google for internal use only.
            required_replica_count (int):
                Optional. Number of required available replicas for the
                deployment to succeed. This field is only needed when partial
                model deployment/mutation is desired, with a value greater than
                or equal to 1 and fewer than or equal to min_replica_count. If
                set, the model deploy/mutate operation will succeed once
                available_replica_count reaches required_replica_count, and the
                rest of the replicas will be retried.

        Returns:
            endpoint (Union[Endpoint, PrivateEndpoint]):
                Endpoint with the deployed model.

        Raises:
            ValueError: If `traffic_split` is set for PrivateEndpoint.
        """
        network = network or initializer.global_config.network

        Endpoint._validate_deploy_args(
            min_replica_count=min_replica_count,
            max_replica_count=max_replica_count,
            accelerator_type=accelerator_type,
            deployed_model_display_name=deployed_model_display_name,
            traffic_split=traffic_split,
            traffic_percentage=traffic_percentage,
            deployment_resource_pool=deployment_resource_pool,
            required_replica_count=required_replica_count,
        )

        if isinstance(endpoint, PrivateEndpoint):
            if deployment_resource_pool:
                raise ValueError(
                    "Model co-hosting is not supported for PrivateEndpoint. "
                    "Try calling deploy() without providing `deployment_resource_pool`."
                )

            if traffic_split and endpoint.network:
                raise ValueError(
                    "Traffic splitting is not yet supported for PSA based PrivateEndpoint. "
                    "Try calling deploy() without providing `traffic_split`. "
                    "A maximum of one model can be deployed to each private Endpoint."
                )

        explanation_spec = _explanation_utils.create_and_validate_explanation_spec(
            explanation_metadata=explanation_metadata,
            explanation_parameters=explanation_parameters,
        )

        return self._deploy(
            endpoint=endpoint,
            deployed_model_display_name=deployed_model_display_name,
            traffic_percentage=traffic_percentage,
            traffic_split=traffic_split,
            machine_type=machine_type,
            min_replica_count=min_replica_count,
            max_replica_count=max_replica_count,
            accelerator_type=accelerator_type,
            accelerator_count=accelerator_count,
            tpu_topology=tpu_topology,
            reservation_affinity_type=reservation_affinity_type,
            reservation_affinity_key=reservation_affinity_key,
            reservation_affinity_values=reservation_affinity_values,
            service_account=service_account,
            explanation_spec=explanation_spec,
            metadata=metadata,
            encryption_spec_key_name=encryption_spec_key_name
            or initializer.global_config.encryption_spec_key_name,
            network=network,
            sync=sync,
            deploy_request_timeout=deploy_request_timeout,
            autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
            autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
            spot=spot,
            enable_access_logging=enable_access_logging,
            disable_container_logging=disable_container_logging,
            private_service_connect_config=private_service_connect_config,
            deployment_resource_pool=deployment_resource_pool,
            fast_tryout_enabled=fast_tryout_enabled,
            system_labels=system_labels,
            required_replica_count=required_replica_count,
        )

    def _should_enable_dedicated_endpoint(self, fast_tryout_enabled: bool) -> bool:
        """Check if dedicated endpoint should be enabled for this endpoint.

        Returns True if endpoint should be a dedicated endpoint.
        """
        return fast_tryout_enabled

    @base.optional_sync(return_input_arg="endpoint", bind_future_to_self=False)
    def _deploy(
        self,
        endpoint: Optional[Union["Endpoint", "PrivateEndpoint"]] = None,
        deployed_model_display_name: Optional[str] = None,
        traffic_percentage: Optional[int] = 0,
        traffic_split: Optional[Dict[str, int]] = None,
        machine_type: Optional[str] = None,
        min_replica_count: int = 1,
        max_replica_count: int = 1,
        accelerator_type: Optional[str] = None,
        accelerator_count: Optional[int] = None,
        tpu_topology: Optional[str] = None,
        reservation_affinity_type: Optional[str] = None,
        reservation_affinity_key: Optional[str] = None,
        reservation_affinity_values: Optional[List[str]] = None,
        service_account: Optional[str] = None,
        explanation_spec: Optional[aiplatform.explain.ExplanationSpec] = None,
        metadata: Optional[Sequence[Tuple[str, str]]] = (),
        encryption_spec_key_name: Optional[str] = None,
        network: Optional[str] = None,
        sync: bool = True,
        deploy_request_timeout: Optional[float] = None,
        autoscaling_target_cpu_utilization: Optional[int] = None,
        autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
        spot: bool = False,
        enable_access_logging=False,
        disable_container_logging: bool = False,
        private_service_connect_config: Optional[
            PrivateEndpoint.PrivateServiceConnectConfig
        ] = None,
        deployment_resource_pool: Optional[DeploymentResourcePool] = None,
        fast_tryout_enabled: bool = False,
        system_labels: Optional[Dict[str, str]] = None,
        required_replica_count: Optional[int] = 0,
    ) -> Union[Endpoint, PrivateEndpoint]:
        """Deploys model to endpoint. Endpoint will be created if unspecified.

        Args:
            endpoint (Union[Endpoint, PrivateEndpoint]):
                Optional. Public or private Endpoint to deploy model to. If not specified,
                endpoint display name will be model display name+'_endpoint'.
            deployed_model_display_name (str):
                Optional. The display name of the DeployedModel. If not provided
                upon creation, the Model's display_name is used.
            traffic_percentage (int):
                Optional. Desired traffic to newly deployed model. Defaults to
                0 if there are pre-existing deployed models. Defaults to 100 if
                there are no pre-existing deployed models. Negative values should
                not be provided. Traffic of previously deployed models at the endpoint
                will be scaled down to accommodate new deployed model's traffic.
                Should not be provided if traffic_split is provided.
            traffic_split (Dict[str, int]):
                Optional. A map from a DeployedModel's ID to the percentage of
                this Endpoint's traffic that should be forwarded to that DeployedModel.
                If a DeployedModel's ID is not listed in this map, then it receives
                no traffic. The traffic percentage values must add up to 100, or
                map must be empty if the Endpoint is to not accept any traffic at
                the moment. Key for model being deployed is "0". Should not be
                provided if traffic_percentage is provided.
            machine_type (str):
                Optional. The type of machine. Not specifying machine type will
                result in model to be deployed with automatic resources.
            min_replica_count (int):
                Optional. The minimum number of machine replicas this deployed
                model will be always deployed on. If traffic against it increases,
                it may dynamically be deployed onto more replicas, and as traffic
                decreases, some of these extra replicas may be freed.
            max_replica_count (int):
                Optional. The maximum number of replicas this deployed model may
                be deployed on when the traffic against it increases. If requested
                value is too large, the deployment will error, but if deployment
                succeeds then the ability to scale the model to that many replicas
                is guaranteed (barring service outages). If traffic against the
                deployed model increases beyond what its replicas at maximum may
                handle, a portion of the traffic will be dropped. If this value
                is not provided, the smaller value of min_replica_count or 1 will
                be used.
            accelerator_type (str):
                Optional. Hardware accelerator type. Must also set accelerator_count if used.
                One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
                NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
            accelerator_count (int):
                Optional. The number of accelerators to attach to a worker replica.
            tpu_topology (str):
                Optional. The TPU topology to use for the DeployedModel.
                Requireid for CloudTPU multihost deployments.
            reservation_affinity_type (str):
                Optional. The type of reservation affinity.
                One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
                SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
            reservation_affinity_key (str):
                Optional. Corresponds to the label key of a reservation resource.
                To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
                and specify the name of your reservation as its value.
            reservation_affinity_values (List[str]):
                Optional. Corresponds to the label values of a reservation resource.
                This must be the full resource name of the reservation.
                Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
            service_account (str):
                The service account that the DeployedModel's container runs as. Specify the
                email address of the service account. If this service account is not
                specified, the container runs as a service account that doesn't have access
                to the resource project.
                Users deploying the Model must have the `iam.serviceAccounts.actAs`
                permission on this service account.
            explanation_spec (aiplatform.explain.ExplanationSpec):
                Optional. Specification of Model explanation.
            metadata (Sequence[Tuple[str, str]]):
                Optional. Strings which should be sent along with the request as
                metadata.
            encryption_spec_key_name (Optional[str]):
                Optional. The Cloud KMS resource identifier of the customer
                managed encryption key used to protect the model. Has the
                form:
                ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
                The key needs to be in the same region as where the compute
                resource is created.

                If set, this Model and all sub-resources of this Model will be secured by this key.

                Overrides encryption_spec_key_name set in aiplatform.init
            network (str):
                Optional. The full name of the Compute Engine network to which
                the Endpoint, if created, will be peered to. E.g. "projects/12345/global/networks/myVPC".
                Private services access must already be configured for the network.
                Read more about PrivateEndpoints
                [in the documentation](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints).
                Cannot be set together with private_service_connect_config.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
            deploy_request_timeout (float):
                Optional. The timeout for the deploy request in seconds.
            autoscaling_target_cpu_utilization (int):
                Optional. Target CPU Utilization to use for Autoscaling Replicas.
                A default value of 60 will be used if not specified.
            autoscaling_target_accelerator_duty_cycle (int):
                Optional. Target Accelerator Duty Cycle.
                Must also set accelerator_type and accelerator_count if specified.
                A default value of 60 will be used if not specified.
            spot (bool):
                Optional. Whether to schedule the deployment workload on spot VMs.
            enable_access_logging (bool):
                Whether to enable endpoint access logging. Defaults to False.
            disable_container_logging (bool):
                If True, container logs from the deployed model will not be
                written to Cloud Logging. Defaults to False.
            private_service_connect_config (PrivateEndpoint.PrivateServiceConnectConfig):
                If true, the endpoint can be accessible via [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect).
                Cannot be set together with network.
            deployment_resource_pool (DeploymentResourcePool):
                Optional. Resource pool where the model will be deployed. All models that
                are deployed to the same DeploymentResourcePool will be hosted in
                a shared model server. If provided, will override replica count
                arguments.
            fast_tryout_enabled (bool):
                Optional. Defaults to False.
                If True, model will be deployed using faster deployment path.
                Useful for quick experiments. Not for production workloads. Only
                available for most popular models with certain machine types.
            system_labels (Dict[str, str]):
                Optional. System labels to apply to Model Garden deployments.
                System labels are managed by Google for internal use only.
            required_replica_count (int):
                Optional. Number of required available replicas for the
                deployment to succeed. This field is only needed when partial
                model deployment/mutation is desired, with a value greater than
                or equal to 1 and fewer than or equal to min_replica_count. If
                set, the model deploy/mutate operation will succeed once
                available_replica_count reaches required_replica_count, and the
                rest of the replicas will be retried.

        Returns:
            endpoint (Union[Endpoint, PrivateEndpoint]):
                Endpoint with the deployed model.
        """

        if endpoint is None:
            display_name = self.display_name[:118] + "_endpoint"

            if not network and not private_service_connect_config:
                endpoint = Endpoint.create(
                    display_name=display_name,
                    project=self.project,
                    location=self.location,
                    credentials=self.credentials,
                    encryption_spec_key_name=encryption_spec_key_name,
                    dedicated_endpoint_enabled=self._should_enable_dedicated_endpoint(
                        fast_tryout_enabled
                    ),
                )
            else:
                endpoint = PrivateEndpoint.create(
                    display_name=display_name,
                    network=network,
                    project=self.project,
                    location=self.location,
                    credentials=self.credentials,
                    encryption_spec_key_name=encryption_spec_key_name,
                    private_service_connect_config=private_service_connect_config,
                )

        _LOGGER.log_action_start_against_resource("Deploying model to", "", endpoint)

        endpoint._deploy_call(
            endpoint.api_client,
            endpoint.resource_name,
            self,
            endpoint._gca_resource.traffic_split,
            network=network or endpoint.network,
            deployed_model_display_name=deployed_model_display_name,
            traffic_percentage=traffic_percentage,
            traffic_split=traffic_split,
            machine_type=machine_type,
            min_replica_count=min_replica_count,
            max_replica_count=max_replica_count,
            accelerator_type=accelerator_type,
            accelerator_count=accelerator_count,
            tpu_topology=tpu_topology,
            reservation_affinity_type=reservation_affinity_type,
            reservation_affinity_key=reservation_affinity_key,
            reservation_affinity_values=reservation_affinity_values,
            service_account=service_account,
            explanation_spec=explanation_spec,
            metadata=metadata,
            deploy_request_timeout=deploy_request_timeout,
            autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
            autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
            spot=spot,
            enable_access_logging=enable_access_logging,
            disable_container_logging=disable_container_logging,
            deployment_resource_pool=deployment_resource_pool,
            fast_tryout_enabled=fast_tryout_enabled,
            system_labels=system_labels,
            required_replica_count=required_replica_count,
        )

        _LOGGER.log_action_completed_against_resource("model", "deployed", endpoint)

        endpoint._sync_gca_resource()

        return endpoint

    def batch_predict(
        self,
        job_display_name: Optional[str] = None,
        gcs_source: Optional[Union[str, Sequence[str]]] = None,
        bigquery_source: Optional[str] = None,
        instances_format: str = "jsonl",
        gcs_destination_prefix: Optional[str] = None,
        bigquery_destination_prefix: Optional[str] = None,
        predictions_format: str = "jsonl",
        model_parameters: Optional[Dict] = None,
        machine_type: Optional[str] = None,
        accelerator_type: Optional[str] = None,
        accelerator_count: Optional[int] = None,
        starting_replica_count: Optional[int] = None,
        max_replica_count: Optional[int] = None,
        generate_explanation: Optional[bool] = False,
        explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
        explanation_parameters: Optional[
            aiplatform.explain.ExplanationParameters
        ] = None,
        labels: Optional[Dict[str, str]] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
        encryption_spec_key_name: Optional[str] = None,
        sync: bool = True,
        create_request_timeout: Optional[float] = None,
        batch_size: Optional[int] = None,
        service_account: Optional[str] = None,
    ) -> jobs.BatchPredictionJob:
        """Creates a batch prediction job using this Model and outputs
        prediction results to the provided destination prefix in the specified
        `predictions_format`. One source and one destination prefix are
        required.

        Example usage:
            my_model.batch_predict(
                job_display_name="prediction-123",
                gcs_source="gs://example-bucket/instances.csv",
                instances_format="csv",
                bigquery_destination_prefix="projectId.bqDatasetId.bqTableId"
            )

        Args:
            job_display_name (str):
                Optional. The user-defined name of the BatchPredictionJob.
                The name can be up to 128 characters long and can be consist
                of any UTF-8 characters.
            gcs_source: Optional[Sequence[str]] = None
                Google Cloud Storage URI(-s) to your instances to run
                batch prediction on. They must match `instances_format`.
            bigquery_source: Optional[str] = None
                BigQuery URI to a table, up to 2000 characters long. For example:
                `bq://projectId.bqDatasetId.bqTableId`
            instances_format: str = "jsonl"
                The format in which instances are provided. Must be one
                of the formats listed in `Model.supported_input_storage_formats`.
                Default is "jsonl" when using `gcs_source`. If a `bigquery_source`
                is provided, this is overridden to "bigquery".
            gcs_destination_prefix: Optional[str] = None
                The Google Cloud Storage location of the directory where the
                output is to be written to. In the given directory a new
                directory is created. Its name is
                ``prediction-<model-display-name>-<job-create-time>``, where
                timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format.
                Inside of it files ``predictions_0001.<extension>``,
                ``predictions_0002.<extension>``, ...,
                ``predictions_N.<extension>`` are created where
                ``<extension>`` depends on chosen ``predictions_format``,
                and N may equal 0001 and depends on the total number of
                successfully predicted instances. If the Model has both
                ``instance`` and ``prediction`` schemata defined then each such
                file contains predictions as per the ``predictions_format``.
                If prediction for any instance failed (partially or
                completely), then an additional ``errors_0001.<extension>``,
                ``errors_0002.<extension>``,..., ``errors_N.<extension>``
                files are created (N depends on total number of failed
                predictions). These files contain the failed instances, as
                per their schema, followed by an additional ``error`` field
                which as value has ```google.rpc.Status`` <Status>`__
                containing only ``code`` and ``message`` fields.
            bigquery_destination_prefix: Optional[str] = None
                The BigQuery URI to a project or table, up to 2000 characters long.
                When only the project is specified, the Dataset and Table is created.
                When the full table reference is specified, the Dataset must exist and
                table must not exist. Accepted forms: ``bq://projectId`` or
                ``bq://projectId.bqDatasetId``. If no Dataset is specified,
                a new one is created with the name
                ``prediction_<model-display-name>_<job-create-time>``
                where the table name is made BigQuery-dataset-name compatible
                (for example, most special characters become underscores), and
                timestamp is in YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601"
                format. In the dataset two tables will be created, ``predictions``,
                and ``errors``. If the Model has both ``instance`` and
                ``prediction`` schemata defined then the tables have columns as
                follows: The ``predictions`` table contains instances for which
                the prediction succeeded, it has columns as per a concatenation
                of the Model's instance and prediction schemata. The ``errors``
                table contains rows for which the prediction has failed, it has
                instance columns, as per the instance schema, followed by a single
                "errors" column, which as values has ```google.rpc.Status`` <Status>`__
                represented as a STRUCT, and containing only ``code`` and ``message``.
            predictions_format: str = "jsonl"
                Required. The format in which Vertex AI outputs the
                predictions, must be one of the formats specified in
                `Model.supported_output_storage_formats`.
                Default is "jsonl" when using `gcs_destination_prefix`. If a
                `bigquery_destination_prefix` is provided, this is overridden to
                "bigquery".
            model_parameters: Optional[Dict] = None
                Optional. The parameters that govern the predictions. The schema of
                the parameters may be specified via the Model's `parameters_schema_uri`.
            machine_type: Optional[str] = None
                Optional. The type of machine for running batch prediction on
                dedicated resources. Not specifying machine type will result in
                batch prediction job being run with automatic resources.
            accelerator_type: Optional[str] = None
                Optional. The type of accelerator(s) that may be attached
                to the machine as per `accelerator_count`. Only used if
                `machine_type` is set.
            accelerator_count: Optional[int] = None
                Optional. The number of accelerators to attach to the
                `machine_type`. Only used if `machine_type` is set.
            starting_replica_count: Optional[int] = None
                The number of machine replicas used at the start of the batch
                operation. If not set, Vertex AI decides starting number, not
                greater than `max_replica_count`. Only used if `machine_type` is
                set.
            max_replica_count: Optional[int] = None
                The maximum number of machine replicas the batch operation may
                be scaled to. Only used if `machine_type` is set.
                Default is 10.
            generate_explanation (bool):
                Optional. Generate explanation along with the batch prediction
                results. This will cause the batch prediction output to include
                explanations based on the `prediction_format`:
                    - `bigquery`: output includes a column named `explanation`. The value
                        is a struct that conforms to the [aiplatform.gapic.Explanation] object.
                    - `jsonl`: The JSON objects on each line include an additional entry
                        keyed `explanation`. The value of the entry is a JSON object that
                        conforms to the [aiplatform.gapic.Explanation] object.
                    - `csv`: Generating explanations for CSV format is not supported.
            explanation_metadata (aiplatform.explain.ExplanationMetadata):
                Optional. Explanation metadata configuration for this BatchPredictionJob.
                Can be specified only if `generate_explanation` is set to `True`.

                This value overrides the value of `Model.explanation_metadata`.
                All fields of `explanation_metadata` are optional in the request. If
                a field of the `explanation_metadata` object is not populated, the
                corresponding field of the `Model.explanation_metadata` object is inherited.
                For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
            explanation_parameters (aiplatform.explain.ExplanationParameters):
                Optional. Parameters to configure explaining for Model's predictions.
                Can be specified only if `generate_explanation` is set to `True`.

                This value overrides the value of `Model.explanation_parameters`.
                All fields of `explanation_parameters` are optional in the request. If
                a field of the `explanation_parameters` object is not populated, the
                corresponding field of the `Model.explanation_parameters` object is inherited.
                For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
            labels: Optional[Dict[str, str]] = None
                Optional. The labels with user-defined metadata to organize your
                BatchPredictionJobs. Label keys and values can be no longer than
                64 characters (Unicode codepoints), can only contain lowercase
                letters, numeric characters, underscores and dashes.
                International characters are allowed. See https://goo.gl/xmQnxf
                for more information and examples of labels.
            credentials: Optional[auth_credentials.Credentials] = None
                Optional. Custom credentials to use to create this batch prediction
                job. Overrides credentials set in aiplatform.init.
            encryption_spec_key_name (Optional[str]):
                Optional. The Cloud KMS resource identifier of the customer
                managed encryption key used to protect the model. Has the
                form:
                ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
                The key needs to be in the same region as where the compute
                resource is created.

                If set, this Model and all sub-resources of this Model will be secured by this key.

                Overrides encryption_spec_key_name set in aiplatform.init.
            create_request_timeout (float):
                Optional. The timeout for the create request in seconds.
            batch_size (int):
                Optional. The number of the records (e.g. instances) of the operation given in each batch
                to a machine replica. Machine type, and size of a single record should be considered
                when setting this parameter, higher value speeds up the batch operation's execution,
                but too high value will result in a whole batch not fitting in a machine's memory,
                and the whole operation will fail.
                The default value is 64.
            service_account (str):
                Optional. Specifies the service account for workload run-as account.
                Users submitting jobs must have act-as permission on this run-as account.

        Returns:
            job (jobs.BatchPredictionJob):
                Instantiated representation of the created batch prediction job.
        """

        return jobs.BatchPredictionJob.create(
            job_display_name=job_display_name,
            model_name=self,
            instances_format=instances_format,
            predictions_format=predictions_format,
            gcs_source=gcs_source,
            bigquery_source=bigquery_source,
            gcs_destination_prefix=gcs_destination_prefix,
            bigquery_destination_prefix=bigquery_destination_prefix,
            model_parameters=model_parameters,
            machine_type=machine_type,
            accelerator_type=accelerator_type,
            accelerator_count=accelerator_count,
            starting_replica_count=starting_replica_count,
            max_replica_count=max_replica_count,
            batch_size=batch_size,
            generate_explanation=generate_explanation,
            explanation_metadata=explanation_metadata,
            explanation_parameters=explanation_parameters,
            labels=labels,
            project=self.project,
            location=self.location,
            credentials=credentials or self.credentials,
            encryption_spec_key_name=encryption_spec_key_name,
            sync=sync,
            create_request_timeout=create_request_timeout,
            service_account=service_account,
        )

    @classmethod
    def list(
        cls,
        filter: Optional[str] = None,
        order_by: Optional[str] = None,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ) -> List["models.Model"]:
        """List all Model resource instances.

        Example Usage:
            aiplatform.Model.list(
                filter='labels.my_label="my_label_value" AND display_name="my_model"',
            )

        Args:
            filter (str):
                Optional. An expression for filtering the results of the request.
                For field names both snake_case and camelCase are supported.
            order_by (str):
                Optional. A comma-separated list of fields to order by, sorted in
                ascending order. Use "desc" after a field name for descending.
                Supported fields: `display_name`, `create_time`, `update_time`
            project (str):
                Optional. Project to retrieve list from. If not set, project
                set in aiplatform.init will be used.
            location (str):
                Optional. Location to retrieve list from. If not set, location
                set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to retrieve list. Overrides
                credentials set in aiplatform.init.

        Returns:
            List[models.Model]:
                A list of Model resource objects
        """

        return cls._list(
            filter=filter,
            order_by=order_by,
            project=project,
            location=location,
            credentials=credentials,
        )

    @classmethod
    def _construct_sdk_resource_from_gapic(
        cls,
        gapic_resource: gca_model_compat.Model,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ) -> "Model":
        """Override base._construct_sdk_resource_from_gapic to allow for setting
        a ModelRegistry and resource_id_validator.

        Args:
            gapic_resource (gca_model_compat.Model):
                A GAPIC representation of a Model resource.
            project (str):
                Optional. Project to construct SDK object from. If not set,
                project set in aiplatform.init will be used.
            location (str):
                Optional. Location to construct SDK object from. If not set,
                location set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use to construct SDK object.
                Overrides credentials set in aiplatform.init.

        Returns:
            Model:
                An initialized SDK Model object that represents the Model GAPIC type.
        """
        sdk_resource = super()._construct_sdk_resource_from_gapic(
            gapic_resource=gapic_resource,
            project=project,
            location=location,
            credentials=credentials,
        )
        sdk_resource._resource_id_validator = super()._revisioned_resource_id_validator

        sdk_resource._registry = ModelRegistry(
            sdk_resource.resource_name,
            location=location,
            project=project,
            credentials=credentials,
        )

        return sdk_resource

    @base.optional_sync()
    def _wait_on_export(self, operation_future: operation.Operation, sync=True) -> None:
        operation_future.result()

    def export_model(
        self,
        export_format_id: str,
        artifact_destination: Optional[str] = None,
        image_destination: Optional[str] = None,
        sync: bool = True,
    ) -> Dict[str, str]:
        """Exports a trained, exportable Model to a location specified by the user.
        A Model is considered to be exportable if it has at least one `supported_export_formats`.
        Either `artifact_destination` or `image_destination` must be provided.

        Example Usage:
            my_model.export(
                export_format_id="tf-saved-model",
                artifact_destination="gs://my-bucket/models/"
            )

            or

            my_model.export(
                export_format_id="custom-model",
                image_destination="us-central1-docker.pkg.dev/projectId/repo/image"
            )

        Args:
            export_format_id (str):
                Required. The ID of the format in which the Model must be exported.
                The list of export formats that this Model supports can be found
                by calling `Model.supported_export_formats`.
            artifact_destination (str):
                The Cloud Storage location where the Model artifact is to be
                written to. Under the directory given as the destination a
                new one with name
                "``model-export-<model-display-name>-<timestamp-of-export-call>``",
                where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601
                format, will be created. Inside, the Model and any of its
                supporting files will be written.

                This field should only be set when, in [Model.supported_export_formats],
                the value for the key given in `export_format_id` contains ``ARTIFACT``.
            image_destination (str):
                The Google Container Registry or Artifact Registry URI where
                the Model container image will be copied to. Accepted forms:

                -  Google Container Registry path. For example:
                ``gcr.io/projectId/imageName:tag``.

                -  Artifact Registry path. For example:
                ``us-central1-docker.pkg.dev/projectId/repoName/imageName:tag``.

                This field should only be set when, in [Model.supported_export_formats],
                the value for the key given in `export_format_id` contains ``IMAGE``.
            sync (bool):
                Whether to execute this export synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.

        Returns:
            output_info (Dict[str, str]):
                Details of the completed export with output destination paths to
                the artifacts or container image.

        Raises:
            ValueError: If model does not support exporting.
            ValueError: If invalid arguments or export formats are provided.
        """

        self.wait()

        # Model does not support exporting
        if not self.supported_export_formats:
            raise ValueError(f"The model `{self.resource_name}` is not exportable.")

        # No destination provided
        if not any((artifact_destination, image_destination)):
            raise ValueError(
                "Please provide an `artifact_destination` or `image_destination`."
            )

        export_format_id = export_format_id.lower()

        # Unsupported export type
        if export_format_id not in self.supported_export_formats:
            raise ValueError(
                f"'{export_format_id}' is not a supported export format for this model. "
                f"Choose one of the following: {self.supported_export_formats}"
            )

        content_types = gca_model_compat.Model.ExportFormat.ExportableContent
        supported_content_types = self.supported_export_formats[export_format_id]

        if (
            artifact_destination
            and content_types.ARTIFACT not in supported_content_types
        ):
            raise ValueError(
                "This model can not be exported as an artifact in '{export_format_id}' format. "
                "Try exporting as a container image by passing the `image_destination` argument."
            )

        if image_destination and content_types.IMAGE not in supported_content_types:
            raise ValueError(
                "This model can not be exported as a container image in '{export_format_id}' format. "
                "Try exporting the model artifacts by passing a `artifact_destination` argument."
            )

        # Construct request payload
        output_config = gca_model_service_compat.ExportModelRequest.OutputConfig(
            export_format_id=export_format_id
        )

        if artifact_destination:
            output_config.artifact_destination = gca_io_compat.GcsDestination(
                output_uri_prefix=artifact_destination
            )

        if image_destination:
            output_config.image_destination = (
                gca_io_compat.ContainerRegistryDestination(output_uri=image_destination)
            )

        _LOGGER.log_action_start_against_resource("Exporting", "model", self)

        model_name = self.versioned_resource_name

        operation_future = self.api_client.export_model(
            name=model_name, output_config=output_config
        )

        _LOGGER.log_action_started_against_resource_with_lro(
            "Export", "model", self.__class__, operation_future
        )

        # Block before returning
        self._wait_on_export(operation_future=operation_future, sync=sync)

        _LOGGER.log_action_completed_against_resource("model", "exported", self)

        return json_format.MessageToDict(operation_future.metadata.output_info._pb)

    @classmethod
    @base.optional_sync()
    def upload_xgboost_model_file(
        cls,
        model_file_path: str,
        xgboost_version: Optional[str] = None,
        display_name: Optional[str] = None,
        description: Optional[str] = None,
        model_id: Optional[str] = None,
        parent_model: Optional[str] = None,
        is_default_version: Optional[bool] = True,
        version_aliases: Optional[Sequence[str]] = None,
        version_description: Optional[str] = None,
        instance_schema_uri: Optional[str] = None,
        parameters_schema_uri: Optional[str] = None,
        prediction_schema_uri: Optional[str] = None,
        explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
        explanation_parameters: Optional[
            aiplatform.explain.ExplanationParameters
        ] = None,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
        labels: Optional[Dict[str, str]] = None,
        encryption_spec_key_name: Optional[str] = None,
        staging_bucket: Optional[str] = None,
        sync=True,
        upload_request_timeout: Optional[float] = None,
    ) -> "Model":
        """Uploads a model and returns a Model representing the uploaded Model
        resource.

        Example usage:
            my_model = Model.upload_xgboost_model_file(
                model_file_path="iris.xgboost_model.bst"
            )

        Args:
            model_file_path (str): Required. Local file path of the model.
            xgboost_version (str): Optional. The version of the XGBoost serving container.
                Supported versions: ["0.82", "0.90", "1.1", "1.2", "1.3", "1.4"].
                If the version is not specified, the latest version is used.
            display_name (str):
                Optional. The display name of the Model. The name can be up to 128
                characters long and can be consist of any UTF-8 characters.
            description (str):
                The description of the model.
            model_id (str):
                Optional. The ID to use for the uploaded Model, which will
                become the final component of the model resource name.
                This value may be up to 63 characters, and valid characters
                are `[a-z0-9_-]`. The first character cannot be a number or hyphen.
            parent_model (str):
                Optional. The resource name or model ID of an existing model that the
                newly-uploaded model will be a version of.

                Only set this field when uploading a new version of an existing model.
            is_default_version (bool):
                Optional. When set to True, the newly uploaded model version will
                automatically have alias "default" included. Subsequent uses of
                this model without a version specified will use this "default" version.

                When set to False, the "default" alias will not be moved.
                Actions targeting the newly-uploaded model version will need
                to specifically reference this version by ID or alias.

                New model uploads, i.e. version 1, will always be "default" aliased.
            version_aliases (Sequence[str]):
                Optional. User provided version aliases so that a model version
                can be referenced via alias instead of auto-generated version ID.
                A default version alias will be created for the first version of the model.

                The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9]
            version_description (str):
                Optional. The description of the model version being uploaded.
            instance_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the format of a single instance, which
                are used in
                ``PredictRequest.instances``,
                ``ExplainRequest.instances``
                and
                ``BatchPredictionJob.input_config``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform. Note: The URI given on output will be immutable
                and probably different, including the URI scheme, than the
                one given on input. The output URI will point to a location
                where the user only has a read access.
            parameters_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the parameters of prediction and
                explanation via
                ``PredictRequest.parameters``,
                ``ExplainRequest.parameters``
                and
                ``BatchPredictionJob.model_parameters``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform, if no parameters are supported it is set to an
                empty string. Note: The URI given on output will be
                immutable and probably different, including the URI scheme,
                than the one given on input. The output URI will point to a
                location where the user only has a read access.
            prediction_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the format of a single prediction
                produced by this Model, which are returned via
                ``PredictResponse.predictions``,
                ``ExplainResponse.explanations``,
                and
                ``BatchPredictionJob.output_config``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform. Note: The URI given on output will be immutable
                and probably different, including the URI scheme, than the
                one given on input. The output URI will point to a location
                where the user only has a read access.
            explanation_metadata (aiplatform.explain.ExplanationMetadata):
                Optional. Metadata describing the Model's input and output for explanation.
                `explanation_metadata` is optional while `explanation_parameters` must be
                specified when used.
                For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
            explanation_parameters (aiplatform.explain.ExplanationParameters):
                Optional. Parameters to configure explaining for Model's predictions.
                For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
            project: Optional[str]=None,
                Project to upload this model to. Overrides project set in
                aiplatform.init.
            location: Optional[str]=None,
                Location to upload this model to. Overrides location set in
                aiplatform.init.
            credentials: Optional[auth_credentials.Credentials]=None,
                Custom credentials to use to upload this model. Overrides credentials
                set in aiplatform.init.
            labels (Dict[str, str]):
                Optional. The labels with user-defined metadata to
                organize your Models.
                Label keys and values can be no longer than 64
                characters (Unicode codepoints), can only
                contain lowercase letters, numeric characters,
                underscores and dashes. International characters
                are allowed.
                See https://goo.gl/xmQnxf for more information
                and examples of labels.
            encryption_spec_key_name (Optional[str]):
                Optional. The Cloud KMS resource identifier of the customer
                managed encryption key used to protect the model. Has the
                form:
                ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
                The key needs to be in the same region as where the compute
                resource is created.

                If set, this Model and all sub-resources of this Model will be secured by this key.

                Overrides encryption_spec_key_name set in aiplatform.init.
            staging_bucket (str):
                Optional. Bucket to stage local model artifacts. Overrides
                staging_bucket set in aiplatform.init.
            upload_request_timeout (float):
                Optional. The timeout for the upload request in seconds.

        Returns:
            model (aiplatform.Model):
                Instantiated representation of the uploaded model resource.

        Raises:
            ValueError: If model directory does not contain a supported model file.
        """
        if not display_name:
            display_name = cls._generate_display_name("XGBoost model")

        XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS = [
            ".pkl",
            ".joblib",
            ".bst",
        ]

        container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri(
            region=location,
            framework="xgboost",
            framework_version=xgboost_version or "1.4",
            accelerator="cpu",
        )

        model_file_path_obj = pathlib.Path(model_file_path)
        if not model_file_path_obj.is_file():
            raise ValueError(
                f"model_file_path path must point to a file: '{model_file_path}'"
            )

        model_file_extension = model_file_path_obj.suffix
        if model_file_extension not in XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS:
            _LOGGER.warning(
                f"Only the following XGBoost model file extensions are currently supported: '{XGBOOST_SUPPORTED_MODEL_FILE_EXTENSIONS}'"
            )
            _LOGGER.warning(
                "Treating the model file as a binary serialized XGBoost Booster."
            )
            model_file_extension = ".bst"

        # Preparing model directory
        # We cannot clean up the directory immediately after calling Model.upload since
        # that call may be asynchronous and return before the model file has been read.
        # To work around this, we make this method asynchronous (decorate with @base.optional_sync)
        # but call Model.upload with sync=True.
        with tempfile.TemporaryDirectory() as prepared_model_dir:
            prepared_model_file_path = pathlib.Path(prepared_model_dir) / (
                "model" + model_file_extension
            )
            shutil.copy(model_file_path_obj, prepared_model_file_path)

            return cls.upload(
                serving_container_image_uri=container_image_uri,
                artifact_uri=prepared_model_dir,
                display_name=display_name,
                description=description,
                model_id=model_id,
                parent_model=parent_model,
                is_default_version=is_default_version,
                version_aliases=version_aliases,
                version_description=version_description,
                instance_schema_uri=instance_schema_uri,
                parameters_schema_uri=parameters_schema_uri,
                prediction_schema_uri=prediction_schema_uri,
                explanation_metadata=explanation_metadata,
                explanation_parameters=explanation_parameters,
                project=project,
                location=location,
                credentials=credentials,
                labels=labels,
                encryption_spec_key_name=encryption_spec_key_name,
                staging_bucket=staging_bucket,
                sync=True,
                upload_request_timeout=upload_request_timeout,
            )

    @classmethod
    @base.optional_sync()
    def upload_scikit_learn_model_file(
        cls,
        model_file_path: str,
        sklearn_version: Optional[str] = None,
        display_name: Optional[str] = None,
        description: Optional[str] = None,
        model_id: Optional[str] = None,
        parent_model: Optional[str] = None,
        is_default_version: Optional[bool] = True,
        version_aliases: Optional[Sequence[str]] = None,
        version_description: Optional[str] = None,
        instance_schema_uri: Optional[str] = None,
        parameters_schema_uri: Optional[str] = None,
        prediction_schema_uri: Optional[str] = None,
        explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
        explanation_parameters: Optional[
            aiplatform.explain.ExplanationParameters
        ] = None,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
        labels: Optional[Dict[str, str]] = None,
        encryption_spec_key_name: Optional[str] = None,
        staging_bucket: Optional[str] = None,
        sync=True,
        upload_request_timeout: Optional[float] = None,
    ) -> "Model":
        """Uploads a model and returns a Model representing the uploaded Model
        resource.

        Example usage:
            my_model = Model.upload_scikit_learn_model_file(
                model_file_path="iris.sklearn_model.joblib"
            )

        Args:
            model_file_path (str): Required. Local file path of the model.
            sklearn_version (str):
                Optional. The version of the Scikit-learn serving container.
                Supported versions: ["0.20", "0.22", "0.23", "0.24", "1.0"].
                If the version is not specified, the latest version is used.
            display_name (str):
                Optional. The display name of the Model. The name can be up to 128
                characters long and can be consist of any UTF-8 characters.
            description (str):
                The description of the model.
            model_id (str):
                Optional. The ID to use for the uploaded Model, which will
                become the final component of the model resource name.
                This value may be up to 63 characters, and valid characters
                are `[a-z0-9_-]`. The first character cannot be a number or hyphen.
            parent_model (str):
                Optional. The resource name or model ID of an existing model that the
                newly-uploaded model will be a version of.

                Only set this field when uploading a new version of an existing model.
            is_default_version (bool):
                Optional. When set to True, the newly uploaded model version will
                automatically have alias "default" included. Subsequent uses of
                this model without a version specified will use this "default" version.

                When set to False, the "default" alias will not be moved.
                Actions targeting the newly-uploaded model version will need
                to specifically reference this version by ID or alias.

                New model uploads, i.e. version 1, will always be "default" aliased.
            version_aliases (Sequence[str]):
                Optional. User provided version aliases so that a model version
                can be referenced via alias instead of auto-generated version ID.
                A default version alias will be created for the first version of the model.

                The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9]
            version_description (str):
                Optional. The description of the model version being uploaded.
            instance_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the format of a single instance, which
                are used in
                ``PredictRequest.instances``,
                ``ExplainRequest.instances``
                and
                ``BatchPredictionJob.input_config``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform. Note: The URI given on output will be immutable
                and probably different, including the URI scheme, than the
                one given on input. The output URI will point to a location
                where the user only has a read access.
            parameters_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the parameters of prediction and
                explanation via
                ``PredictRequest.parameters``,
                ``ExplainRequest.parameters``
                and
                ``BatchPredictionJob.model_parameters``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform, if no parameters are supported it is set to an
                empty string. Note: The URI given on output will be
                immutable and probably different, including the URI scheme,
                than the one given on input. The output URI will point to a
                location where the user only has a read access.
            prediction_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the format of a single prediction
                produced by this Model, which are returned via
                ``PredictResponse.predictions``,
                ``ExplainResponse.explanations``,
                and
                ``BatchPredictionJob.output_config``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform. Note: The URI given on output will be immutable
                and probably different, including the URI scheme, than the
                one given on input. The output URI will point to a location
                where the user only has a read access.
            explanation_metadata (aiplatform.explain.ExplanationMetadata):
                Optional. Metadata describing the Model's input and output for explanation.
                `explanation_metadata` is optional while `explanation_parameters` must be
                specified when used.
                For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
            explanation_parameters (aiplatform.explain.ExplanationParameters):
                Optional. Parameters to configure explaining for Model's predictions.
                For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
            project: Optional[str]=None,
                Project to upload this model to. Overrides project set in
                aiplatform.init.
            location: Optional[str]=None,
                Location to upload this model to. Overrides location set in
                aiplatform.init.
            credentials: Optional[auth_credentials.Credentials]=None,
                Custom credentials to use to upload this model. Overrides credentials
                set in aiplatform.init.
            labels (Dict[str, str]):
                Optional. The labels with user-defined metadata to
                organize your Models.
                Label keys and values can be no longer than 64
                characters (Unicode codepoints), can only
                contain lowercase letters, numeric characters,
                underscores and dashes. International characters
                are allowed.
                See https://goo.gl/xmQnxf for more information
                and examples of labels.
            encryption_spec_key_name (Optional[str]):
                Optional. The Cloud KMS resource identifier of the customer
                managed encryption key used to protect the model. Has the
                form:
                ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
                The key needs to be in the same region as where the compute
                resource is created.

                If set, this Model and all sub-resources of this Model will be secured by this key.

                Overrides encryption_spec_key_name set in aiplatform.init.
            staging_bucket (str):
                Optional. Bucket to stage local model artifacts. Overrides
                staging_bucket set in aiplatform.init.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
            upload_request_timeout (float):
                Optional. The timeout for the upload request in seconds.

        Returns:
            model (aiplatform.Model):
                Instantiated representation of the uploaded model resource.

        Raises:
            ValueError: If explanation_metadata is specified while explanation_parameters
                is not. Also if model directory does not contain a supported model file.
        """
        if not display_name:
            display_name = cls._generate_display_name("Scikit-Learn model")

        SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS = [
            ".pkl",
            ".joblib",
        ]

        container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri(
            region=location,
            framework="sklearn",
            framework_version=sklearn_version or "1.0",
            accelerator="cpu",
        )

        model_file_path_obj = pathlib.Path(model_file_path)
        if not model_file_path_obj.is_file():
            raise ValueError(
                f"model_file_path path must point to a file: '{model_file_path}'"
            )

        model_file_extension = model_file_path_obj.suffix
        if model_file_extension not in SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS:
            _LOGGER.warning(
                f"Only the following Scikit-learn model file extensions are currently supported: '{SKLEARN_SUPPORTED_MODEL_FILE_EXTENSIONS}'"
            )
            _LOGGER.warning(
                "Treating the model file as a pickle serialized Scikit-learn model."
            )
            model_file_extension = ".pkl"

        # Preparing model directory
        # We cannot clean up the directory immediately after calling Model.upload since
        # that call may be asynchronous and return before the model file has been read.
        # To work around this, we make this method asynchronous (decorate with @base.optional_sync)
        # but call Model.upload with sync=True.
        with tempfile.TemporaryDirectory() as prepared_model_dir:
            prepared_model_file_path = pathlib.Path(prepared_model_dir) / (
                "model" + model_file_extension
            )
            shutil.copy(model_file_path_obj, prepared_model_file_path)

            return cls.upload(
                serving_container_image_uri=container_image_uri,
                artifact_uri=prepared_model_dir,
                display_name=display_name,
                description=description,
                model_id=model_id,
                parent_model=parent_model,
                is_default_version=is_default_version,
                version_aliases=version_aliases,
                version_description=version_description,
                instance_schema_uri=instance_schema_uri,
                parameters_schema_uri=parameters_schema_uri,
                prediction_schema_uri=prediction_schema_uri,
                explanation_metadata=explanation_metadata,
                explanation_parameters=explanation_parameters,
                project=project,
                location=location,
                credentials=credentials,
                labels=labels,
                encryption_spec_key_name=encryption_spec_key_name,
                staging_bucket=staging_bucket,
                sync=True,
                upload_request_timeout=upload_request_timeout,
            )

    @classmethod
    def upload_tensorflow_saved_model(
        cls,
        saved_model_dir: str,
        tensorflow_version: Optional[str] = None,
        use_gpu: bool = False,
        display_name: Optional[str] = None,
        description: Optional[str] = None,
        model_id: Optional[str] = None,
        parent_model: Optional[str] = None,
        is_default_version: Optional[bool] = True,
        version_aliases: Optional[Sequence[str]] = None,
        version_description: Optional[str] = None,
        instance_schema_uri: Optional[str] = None,
        parameters_schema_uri: Optional[str] = None,
        prediction_schema_uri: Optional[str] = None,
        explanation_metadata: Optional[aiplatform.explain.ExplanationMetadata] = None,
        explanation_parameters: Optional[
            aiplatform.explain.ExplanationParameters
        ] = None,
        project: Optional[str] = None,
        location: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
        labels: Optional[Dict[str, str]] = None,
        encryption_spec_key_name: Optional[str] = None,
        staging_bucket: Optional[str] = None,
        sync=True,
        upload_request_timeout: Optional[str] = None,
    ) -> "Model":
        """Uploads a model and returns a Model representing the uploaded Model
        resource.

        Example usage:
            my_model = Model.upload_scikit_learn_model_file(
                model_file_path="iris.tensorflow_model.SavedModel"
            )

        Args:
            saved_model_dir (str): Required.
                Local directory of the Tensorflow SavedModel.
            tensorflow_version (str):
                Optional. The version of the Tensorflow serving container.
                Supported versions: ["0.15", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7"].
                If the version is not specified, the latest version is used.
            use_gpu (bool): Whether to use GPU for model serving.
            display_name (str):
                Optional. The display name of the Model. The name can be up to 128
                characters long and can be consist of any UTF-8 characters.
            description (str):
                The description of the model.
            model_id (str):
                Optional. The ID to use for the uploaded Model, which will
                become the final component of the model resource name.
                This value may be up to 63 characters, and valid characters
                are `[a-z0-9_-]`. The first character cannot be a number or hyphen.
            parent_model (str):
                Optional. The resource name or model ID of an existing model that the
                newly-uploaded model will be a version of.

                Only set this field when uploading a new version of an existing model.
            is_default_version (bool):
                Optional. When set to True, the newly uploaded model version will
                automatically have alias "default" included. Subsequent uses of
                this model without a version specified will use this "default" version.

                When set to False, the "default" alias will not be moved.
                Actions targeting the newly-uploaded model version will need
                to specifically reference this version by ID or alias.

                New model uploads, i.e. version 1, will always be "default" aliased.
            version_aliases (Sequence[str]):
                Optional. User provided version aliases so that a model version
                can be referenced via alias instead of auto-generated version ID.
                A default version alias will be created for the first version of the model.

                The format is [a-z][a-zA-Z0-9-]{0,126}[a-z0-9]
            version_description (str):
                Optional. The description of the model version being uploaded.
            instance_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the format of a single instance, which
                are used in
                ``PredictRequest.instances``,
                ``ExplainRequest.instances``
                and
                ``BatchPredictionJob.input_config``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform. Note: The URI given on output will be immutable
                and probably different, including the URI scheme, than the
                one given on input. The output URI will point to a location
                where the user only has a read access.
            parameters_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the parameters of prediction and
                explanation via
                ``PredictRequest.parameters``,
                ``ExplainRequest.parameters``
                and
                ``BatchPredictionJob.model_parameters``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform, if no parameters are supported it is set to an
                empty string. Note: The URI given on output will be
                immutable and probably different, including the URI scheme,
                than the one given on input. The output URI will point to a
                location where the user only has a read access.
            prediction_schema_uri (str):
                Optional. Points to a YAML file stored on Google Cloud
                Storage describing the format of a single prediction
                produced by this Model, which are returned via
                ``PredictResponse.predictions``,
                ``ExplainResponse.explanations``,
                and
                ``BatchPredictionJob.output_config``.
                The schema is defined as an OpenAPI 3.0.2 `Schema
                Object <https://tinyurl.com/y538mdwt#schema-object>`__.
                AutoML Models always have this field populated by AI
                Platform. Note: The URI given on output will be immutable
                and probably different, including the URI scheme, than the
                one given on input. The output URI will point to a location
                where the user only has a read access.
            explanation_metadata (aiplatform.explain.ExplanationMetadata):
                Optional. Metadata describing the Model's input and output for explanation.
                `explanation_metadata` is optional while `explanation_parameters` must be
                specified when used.
                For more details, see `Ref docs <http://tinyurl.com/1igh60kt>`
            explanation_parameters (aiplatform.explain.ExplanationParameters):
                Optional. Parameters to configure explaining for Model's predictions.
                For more details, see `Ref docs <http://tinyurl.com/1an4zake>`
            project: Optional[str]=None,
                Project to upload this model to. Overrides project set in
                aiplatform.init.
            location: Optional[str]=None,
                Location to upload this model to. Overrides location set in
                aiplatform.init.
            credentials: Optional[auth_credentials.Credentials]=None,
                Custom credentials to use to upload this model. Overrides credentials
                set in aiplatform.init.
            labels (Dict[str, str]):
                Optional. The labels with user-defined metadata to
                organize your Models.
                Label keys and values can be no longer than 64
                characters (Unicode codepoints), can only
                contain lowercase letters, numeric characters,
                underscores and dashes. International characters
                are allowed.
                See https://goo.gl/xmQnxf for more information
                and examples of labels.
            encryption_spec_key_name (Optional[str]):
                Optional. The Cloud KMS resource identifier of the customer
                managed encryption key used to protect the model. Has the
                form:
                ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
                The key needs to be in the same region as where the compute
                resource is created.

                If set, this Model and all sub-resources of this Model will be secured by this key.

                Overrides encryption_spec_key_name set in aiplatform.init.
            staging_bucket (str):
                Optional. Bucket to stage local model artifacts. Overrides
                staging_bucket set in aiplatform.init.
            sync (bool):
                Whether to execute this method synchronously. If False, this method
                will be executed in concurrent Future and any downstream object will
                be immediately returned and synced when the Future has completed.
            upload_request_timeout (float):
                Optional. The timeout for the upload request in seconds.

        Returns:
            model (aiplatform.Model):
                Instantiated representation of the uploaded model resource.

        Raises:
            ValueError: If explanation_metadata is specified while explanation_parameters
                is not. Also if model directory does not contain a supported model file.
        """
        if not display_name:
            display_name = cls._generate_display_name("Tensorflow model")

        container_image_uri = aiplatform.helpers.get_prebuilt_prediction_container_uri(
            region=location,
            framework="tensorflow",
            framework_version=tensorflow_version or "2.7",
            accelerator="gpu" if use_gpu else "cpu",
        )

        return cls.upload(
            serving_container_image_uri=container_image_uri,
            artifact_uri=saved_model_dir,
            display_name=display_name,
            description=description,
            model_id=model_id,
            parent_model=parent_model,
            is_default_version=is_default_version,
            version_aliases=version_aliases,
            version_description=version_description,
            instance_schema_uri=instance_schema_uri,
            parameters_schema_uri=parameters_schema_uri,
            prediction_schema_uri=prediction_schema_uri,
            explanation_metadata=explanation_metadata,
            explanation_parameters=explanation_parameters,
            project=project,
            location=location,
            credentials=credentials,
            labels=labels,
            encryption_spec_key_name=encryption_spec_key_name,
            staging_bucket=staging_bucket,
            sync=sync,
            upload_request_timeout=upload_request_timeout,
        )

    # TODO(b/273499620): Add async support.
    def copy(
        self,
        destination_location: str,
        destination_model_id: Optional[str] = None,
        destination_parent_model: Optional[str] = None,
        encryption_spec_key_name: Optional[str] = None,
        copy_request_timeout: Optional[float] = None,
    ) -> "Model":
        """Copys a model and returns a Model representing the copied Model
        resource. This method is a blocking call.

        Example usage:
            copied_model = my_model.copy(
                destination_location="us-central1"
            )

        Args:
            destination_location (str):
                The destination location to copy the model to.
            destination_model_id (str):
                Optional. The ID to use for the copied Model, which will
                become the final component of the model resource name.
                This value may be up to 63 characters, and valid characters
                are `[a-z0-9_-]`. The first character cannot be a number or hyphen.

                Only set this field when copying as a new model. If this field is not set,
                a numeric model id will be generated.
            destination_parent_model (str):
                Optional. The resource name or model ID of an existing model that the
                newly-copied model will be a version of.

                Only set this field when copying as a new version of an existing model.
            encryption_spec_key_name (Optional[str]):
                Optional. The Cloud KMS resource identifier of the customer
                managed encryption key used to protect the model. Has the
                form:
                ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
                The key needs to be in the same region as where the compute
                resource is created.

                If set, this Model and all sub-resources of this Model will be secured by this key.

                Overrides encryption_spec_key_name set in aiplatform.init.
            copy_request_timeout (float):
                Optional. The timeout for the copy request in seconds.

        Returns:
            model (aiplatform.Model):
                Instantiated representation of the copied model resource.

        Raises:
            ValueError: If both `destination_model_id` and `destination_parent_model` are set.
        """
        if destination_model_id is not None and destination_parent_model is not None:
            raise ValueError(
                "`destination_model_id` and `destination_parent_model` can not be set together."
            )

        parent = initializer.global_config.common_location_path(
            initializer.global_config.project, destination_location
        )

        source_model = self.versioned_resource_name

        destination_parent_model = ModelRegistry._get_true_version_parent(
            parent_model=destination_parent_model,
            project=initializer.global_config.project,
            location=destination_location,
        )

        encryption_spec = initializer.global_config.get_encryption_spec(
            encryption_spec_key_name=encryption_spec_key_name,
        )

        if destination_model_id is not None:
            request = gca_model_service_compat.CopyModelRequest(
                parent=parent,
                source_model=source_model,
                model_id=destination_model_id,
                encryption_spec=encryption_spec,
            )
        else:
            request = gca_model_service_compat.CopyModelRequest(
                parent=parent,
                source_model=source_model,
                parent_model=destination_parent_model,
                encryption_spec=encryption_spec,
            )

        api_client = initializer.global_config.create_client(
            client_class=utils.ModelClientWithOverride,
            location_override=destination_location,
            credentials=initializer.global_config.credentials,
        )

        _LOGGER.log_action_start_against_resource("Copying", "", self)

        lro = api_client.copy_model(
            request=request,
            timeout=copy_request_timeout,
        )

        _LOGGER.log_action_started_against_resource_with_lro(
            "Copy", "", self.__class__, lro
        )

        model_copy_response = lro.result(timeout=None)

        this_model = models.Model(
            model_copy_response.model,
            version=model_copy_response.model_version_id,
            location=destination_location,
        )

        _LOGGER.log_action_completed_against_resource("", "copied", this_model)

        return this_model

    def list_model_evaluations(
        self,
    ) -> List["model_evaluation.ModelEvaluation"]:
        """List all Model Evaluation resources associated with this model.
        If this Model resource was instantiated with a version, the Model
        Evaluation resources for that version will be returned. If no version
        was provided when the Model resource was instantiated, Model Evaluation
        resources will be returned for the default version.

        Example Usage:
            my_model = Model(
                model_name="projects/123/locations/us-central1/models/456@1"
            )

            my_evaluations = my_model.list_model_evaluations()

        Returns:
            List[model_evaluation.ModelEvaluation]:
                List of ModelEvaluation resources for the model.
        """

        return model_evaluation.ModelEvaluation._list(
            parent=self.versioned_resource_name,
            credentials=self.credentials,
        )

    def get_model_evaluation(
        self,
        evaluation_id: Optional[str] = None,
    ) -> Optional[model_evaluation.ModelEvaluation]:
        """Returns a ModelEvaluation resource and instantiates its representation.
        If no evaluation_id is passed, it will return the first evaluation associated
        with this model. If the aiplatform.Model resource was instantiated with a
        version, this will return a Model Evaluation from that version. If no version
        was specified when instantiating the Model resource, this will return an
        Evaluation from the default version.

        Example usage:
            my_model = Model(
                model_name="projects/123/locations/us-central1/models/456"
            )

            my_evaluation = my_model.get_model_evaluation(
                evaluation_id="789"
            )

            # If no arguments are passed, this method returns the first evaluation for the model
            my_evaluation = my_model.get_model_evaluation()

        Args:
            evaluation_id (str):
                Optional. The ID of the model evaluation to retrieve.

        Returns:
            model_evaluation.ModelEvaluation:
                Instantiated representation of the ModelEvaluation resource.
        """

        evaluations = self.list_model_evaluations()

        if not evaluation_id:
            if len(evaluations) > 1:
                _LOGGER.warning(
                    f"Your model has more than one model evaluation, this is returning only one evaluation resource: {evaluations[0].resource_name}"
                )
            _ipython_utils.display_model_evaluation_button(evaluations[0])
            return evaluations[0]
        else:
            resource_uri_parts = self._parse_resource_name(self.resource_name)
            evaluation_resource_name = (
                model_evaluation.ModelEvaluation._format_resource_name(
                    **resource_uri_parts,
                    evaluation=evaluation_id,
                )
            )

            evaluation = model_evaluation.ModelEvaluation(
                evaluation_name=evaluation_resource_name,
                credentials=self.credentials,
            )
            _ipython_utils.display_model_evaluation_button(evaluation)
            return evaluation

    def evaluate(
        self,
        prediction_type: str,
        target_field_name: str,
        gcs_source_uris: Optional[List[str]] = None,
        bigquery_source_uri: Optional[str] = None,
        bigquery_destination_output_uri: Optional[str] = None,
        class_labels: Optional[List[str]] = None,
        prediction_label_column: Optional[str] = None,
        prediction_score_column: Optional[str] = None,
        staging_bucket: Optional[str] = None,
        service_account: Optional[str] = None,
        generate_feature_attributions: bool = False,
        evaluation_pipeline_display_name: Optional[str] = None,
        evaluation_metrics_display_name: Optional[str] = None,
        network: Optional[str] = None,
        encryption_spec_key_name: Optional[str] = None,
        experiment: Optional[Union[str, "aiplatform.Experiment"]] = None,
        enable_caching: Optional[bool] = None,
    ) -> "model_evaluation._ModelEvaluationJob":
        """Creates a model evaluation job running on Vertex Pipelines and returns the resulting
        ModelEvaluationJob resource.

        Example usage:

            ```
            my_model = Model(
                model_name="projects/123/locations/us-central1/models/456"
            )
            my_evaluation_job = my_model.evaluate(
                prediction_type="classification",
                target_field_name="type",
                data_source_uris=["gs://sdk-model-eval/my-prediction-data.csv"],
                staging_bucket="gs://my-staging-bucket/eval_pipeline_root",
            )
            my_evaluation_job.wait()
            my_evaluation = my_evaluation_job.get_model_evaluation()
            my_evaluation.metrics
            ```

        Args:
            prediction_type (str):
                Required. The problem type being addressed by this evaluation run. 'classification' and 'regression'
                are the currently supported problem types.
            target_field_name (str):
                Required. The column name of the field containing the label for this prediction task.
            gcs_source_uris (List[str]):
                Optional. A list of Cloud Storage data files containing the ground truth data to use for this
                evaluation job. These files should contain your model's prediction column. Currently only Google Cloud Storage
                urls are supported, for example: "gs://path/to/your/data.csv". The provided data files must be
                either CSV or JSONL. One of `gcs_source_uris` or `bigquery_source_uri` is required.
            bigquery_source_uri (str):
                Optional. A bigquery table URI containing the ground truth data to use for this evaluation job. This uri should
                be in the format 'bq://my-project-id.dataset.table'. One of `gcs_source_uris` or `bigquery_source_uri` is
                required.
            bigquery_destination_output_uri (str):
                Optional. A bigquery table URI where the Batch Prediction job associated with your Model Evaluation will write
                prediction output. This can be a BigQuery URI to a project ('bq://my-project'), a dataset
                ('bq://my-project.my-dataset'), or a table ('bq://my-project.my-dataset.my-table'). Required if `bigquery_source_uri`
                is provided.
            class_labels (List[str]):
                Optional. For custom (non-AutoML) classification models, a list of possible class names, in the
                same order that predictions are generated. This argument is required when prediction_type is 'classification'.
                For example, in a classification model with 3 possible classes that are outputted in the format: [0.97, 0.02, 0.01]
                with the class names "cat", "dog", and "fish", the value of `class_labels` should be `["cat", "dog", "fish"]` where
                the class "cat" corresponds with 0.97 in the example above.
            prediction_label_column (str):
                Optional. The column name of the field containing classes the model is scoring. Formatted to be able to find nested
                columns, delimited by `.`. If not set, defaulted to `prediction.classes` for classification.
            prediction_score_column (str):
                Optional. The column name of the field containing batch prediction scores. Formatted to be able to find nested columns,
                delimited by `.`. If not set, defaulted to `prediction.scores` for a `classification` problem_type, `prediction.value`
                for a `regression` problem_type.
            staging_bucket (str):
                Optional. The GCS directory to use for staging files from this evaluation job. Defaults to the value set in
                aiplatform.init(staging_bucket=...) if not provided. Required if staging_bucket is not set in aiplatform.init().
            service_account (str):
                Specifies the service account for workload run-as account for this Model Evaluation PipelineJob.
                Users submitting jobs must have act-as permission on this run-as account. The service account running
                this Model Evaluation job needs the following permissions: Dataflow Worker, Storage Admin,
                Vertex AI Administrator, and Vertex AI Service Agent.
            generate_feature_attributions (boolean):
                Optional. Whether the model evaluation job should generate feature attributions. Defaults to False if not specified.
            evaluation_pipeline_display_name (str):
                Optional. The display name of your model evaluation job. This is the display name that will be applied to the
                Vertex Pipeline run for your evaluation job. If not set, a display name will be generated automatically.
            evaluation_metrics_display_name (str):
                Optional. The display name of the model evaluation resource uploaded to Vertex from your Model Evaluation pipeline.
            network (str):
                The full name of the Compute Engine network to which the job
                should be peered. For example, projects/12345/global/networks/myVPC.
                Private services access must already be configured for the network.
                If left unspecified, the job is not peered with any network.
            encryption_spec_key_name (str):
                Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the job. Has the
                form: ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same
                region as where the compute resource is created. If this is set, then all
                resources created by the PipelineJob for this Model Evaluation will be encrypted with the provided encryption key.
                If not specified, encryption_spec of original PipelineJob will be used.
            experiment (Union[str, experiments_resource.Experiment]):
                Optional. The Vertex AI experiment name or instance to associate to the PipelineJob executing
                this model evaluation job. Metrics produced by the PipelineJob as system.Metric Artifacts
                will be associated as metrics to the provided experiment, and parameters from this PipelineJob
                will be associated as parameters to the provided experiment.
            enable_caching (bool):
                Optional. Whether to turn on caching for the run.

                If this is not set, defaults to the compile time settings, which
                are True for all tasks by default, while users may specify
                different caching options for individual tasks.

                If this is set, the setting applies to all tasks in the pipeline.

                Overrides the compile time settings.
        Returns:
            model_evaluation.ModelEvaluationJob: Instantiated representation of the
            _ModelEvaluationJob.
        Raises:
            ValueError:
                If staging_bucket was not set in aiplatform.init() and staging_bucket was not provided.
                If the provided `prediction_type` is not valid.
                If the provided `data_source_uris` don't start with 'gs://'.
        """

        if (gcs_source_uris is None) == (bigquery_source_uri is None):
            raise ValueError(
                "Exactly one of `gcs_source_uris` or `bigquery_source_uri` must be provided."
            )

        if isinstance(gcs_source_uris, str):
            gcs_source_uris = [gcs_source_uris]

        if bigquery_source_uri and not isinstance(bigquery_source_uri, str):
            raise ValueError("The provided `bigquery_source_uri` must be a string.")

        if bigquery_source_uri and not bigquery_destination_output_uri:
            raise ValueError(
                "`bigquery_destination_output_uri` must be provided if `bigquery_source_uri` is used as the data source."
            )

        if gcs_source_uris is not None and not all(
            uri.startswith("gs://") for uri in gcs_source_uris
        ):
            raise ValueError("`gcs_source_uris` must start with 'gs://'.")

        if bigquery_source_uri is not None and not bigquery_source_uri.startswith(
            "bq://"
        ):
            raise ValueError(
                "`bigquery_source_uri` and `bigquery_destination_output_uri` must start with 'bq://'"
            )

        if (
            bigquery_destination_output_uri is not None
            and not bigquery_destination_output_uri.startswith("bq://")
        ):
            raise ValueError(
                "`bigquery_source_uri` and `bigquery_destination_output_uri` must start with 'bq://'"
            )

        SUPPORTED_INSTANCES_FORMAT_FILE_EXTENSIONS = [".jsonl", ".csv"]

        if not staging_bucket and initializer.global_config.staging_bucket:
            staging_bucket = initializer.global_config.staging_bucket
        elif not staging_bucket and not initializer.global_config.staging_bucket:
            raise ValueError(
                "Please provide `evaluation_staging_bucket` when calling evaluate or set one using aiplatform.init(staging_bucket=...)"
            )

        if prediction_type not in _SUPPORTED_EVAL_PREDICTION_TYPES:
            raise ValueError(
                f"Please provide a supported model prediction type, one of: {_SUPPORTED_EVAL_PREDICTION_TYPES}."
            )

        if generate_feature_attributions:
            if not self._gca_resource.explanation_spec:
                raise ValueError(
                    "To generate feature attributions with your evaluation, call evaluate on a model with an explanation spec. To run evaluation on the current model, call evaluate with `generate_feature_attributions=False`."
                )

        instances_format = None

        if gcs_source_uris:
            data_file_path_obj = pathlib.Path(gcs_source_uris[0])

            data_file_extension = data_file_path_obj.suffix
            if data_file_extension not in SUPPORTED_INSTANCES_FORMAT_FILE_EXTENSIONS:
                _LOGGER.warning(
                    f"Only the following data file extensions are currently supported: '{SUPPORTED_INSTANCES_FORMAT_FILE_EXTENSIONS}'"
                )
            else:
                instances_format = data_file_extension[1:]

        elif bigquery_source_uri:
            instances_format = "bigquery"

        if (
            self._gca_resource.metadata_schema_uri
            == "https://storage.googleapis.com/google-cloud-aiplatform/schema/model/metadata/automl_tabular_1.0.0.yaml"
        ):
            model_type = "automl_tabular"
        else:
            model_type = "other"

        if (
            model_type == "other"
            and prediction_type == "classification"
            and not class_labels
        ):
            raise ValueError(
                "Please provide `class_labels` when running evaluation on a custom classification model."
            )

        return model_evaluation._ModelEvaluationJob.submit(
            model_name=self.versioned_resource_name,
            prediction_type=prediction_type,
            target_field_name=target_field_name,
            gcs_source_uris=gcs_source_uris,
            bigquery_source_uri=bigquery_source_uri,
            batch_predict_bigquery_destination_output_uri=bigquery_destination_output_uri,
            class_labels=class_labels,
            prediction_label_column=prediction_label_column,
            prediction_score_column=prediction_score_column,
            service_account=service_account,
            pipeline_root=staging_bucket,
            instances_format=instances_format,
            model_type=model_type,
            generate_feature_attributions=generate_feature_attributions,
            evaluation_pipeline_display_name=evaluation_pipeline_display_name,
            evaluation_metrics_display_name=evaluation_metrics_display_name,
            network=network,
            encryption_spec_key_name=encryption_spec_key_name,
            credentials=self.credentials,
            experiment=experiment,
            enable_caching=enable_caching,
        )


# TODO (b/232546878): Async support
class ModelRegistry:
    def __init__(
        self,
        model: Union[Model, str],
        location: Optional[str] = None,
        project: Optional[str] = None,
        credentials: Optional[auth_credentials.Credentials] = None,
    ):
        """Creates a ModelRegistry instance for version management of a registered model.

        Args:
            model (Union[Model, str]):
                Required. One of the following:
                    1. A Model instance
                    2. A fully-qualified model resource name
                    3. A model ID. A location and project must be provided.
            location (str):
                Optional. The model location. Used when passing a model name as model.
                If not set, project set in aiplatform.init will be used.
            project (str):
                Optional. The model project. Used when passing a model name as model.
                If not set, project set in aiplatform.init will be used.
            credentials (auth_credentials.Credentials):
                Optional. Custom credentials to use with model access. If not set,
                credentials set in aiplatform.init will be used.
        """

        if isinstance(model, Model):
            self.model_resource_name = model.resource_name
        else:
            self.model_resource_name = utils.full_resource_name(
                resource_name=model,
                resource_noun="models",
                parse_resource_name_method=Model._parse_resource_name,
                format_resource_name_method=Model._format_resource_name,
                project=project,
                location=location,
                resource_id_validator=base.VertexAiResourceNoun._revisioned_resource_id_validator,
            )

        self.credentials = credentials or (
            model.credentials
            if isinstance(model, Model)
            else initializer.global_config.credentials
        )
        self.client = Model._instantiate_client(location, self.credentials)

    def get_model(
        self,
        version: Optional[str] = None,
    ) -> Model:
        """Gets a registered model with optional version.

        Args:
            version (str):
                Optional. A model version ID or alias to target.
                Defaults to the model with the "default" alias.

        Returns:
            Model: An instance of a Model from this ModelRegistry.
        """
        return Model(
            self.model_resource_name, version=version, credentials=self.credentials
        )

    def list_versions(
        self,
        filter: Optional[str] = None,
    ) -> List[VersionInfo]:
        """Lists the versions and version info of a model.

        Args:
            filter (str):
                Optional. An expression for filtering the results of the request.
                For field names both snake_case and camelCase are supported.
                -  `labels` supports general map functions that is:
                    -  `labels.key=value` - key:value equality
                    -  `labels.key:* or labels:key - key existence
                    -  A key including a space must be quoted.
                        `labels."a key"`.
                Some examples:
                -  `labels.myKey="myValue"`

        Returns:
            List[VersionInfo]:
                A list of VersionInfo, each containing
                info about specific model versions.
        """

        _LOGGER.info(f"Getting versions for {self.model_resource_name}")

        request = gca_model_service_compat.ListModelVersionsRequest(
            name=self.model_resource_name,
            filter=filter,
        )

        page_result = self.client.list_model_versions(
            request=request,
        )

        versions = [
            VersionInfo(
                version_id=model.version_id,
                version_create_time=model.version_create_time,
                version_update_time=model.version_update_time,
                model_display_name=model.display_name,
                model_resource_name=self._parse_versioned_name(model.name)[0],
                version_aliases=model.version_aliases,
                version_description=model.version_description,
            )
            for model in page_result
        ]

        return versions

    def get_version_info(
        self,
        version: str,
    ) -> VersionInfo:
        """Gets information about a specific model version.

        Args:
            version (str): Required. The model version to obtain info for.

        Returns:
            VersionInfo: Contains info about the model version.
        """

        _LOGGER.info(f"Getting version {version} info for {self.model_resource_name}")

        model = self.client.get_model(
            name=self._get_versioned_name(self.model_resource_name, version),
        )

        return VersionInfo(
            version_id=model.version_id,
            version_create_time=model.version_create_time,
            version_update_time=model.version_update_time,
            model_display_name=model.display_name,
            model_resource_name=self._parse_versioned_name(model.name)[0],
            version_aliases=model.version_aliases,
            version_description=model.version_description,
        )

    def delete_version(
        self,
        version: str,
    ) -> None:
        """Deletes a model version from the registry.

        Cannot delete a version if it is the last remaining version.
        Use Model.delete() in that case.

        Args:
            version (str): Required. The model version ID or alias to delete.
        """

        lro = self.client.delete_model_version(
            name=self._get_versioned_name(self.model_resource_name, version),
        )

        _LOGGER.info(f"Deleting version {version} for {self.model_resource_name}")

        lro.result()

        _LOGGER.info(f"Deleted version {version} for {self.model_resource_name}")

    def update_version(
        self,
        version: str,
        version_description: Optional[str] = None,
        labels: Optional[Dict[str, str]] = None,
    ) -> None:
        """Updates a model version.

        Args:
            version (str): Required. The version ID to receive the new alias(es).
            version_description (str):
                The description of the model version.
            labels (Dict[str, str]):
                Optional. The labels with user-defined metadata to
                organize your Model versions.
                Label keys and values can be no longer than 64
                characters (Unicode codepoints), can only
                contain lowercase letters, numeric characters,
                underscores and dashes. International characters
                are allowed.
                See https://goo.gl/xmQnxf for more information
                and examples of labels.

        Raises:
            ValueError: If `labels` is not the correct format.
        """

        current_model_proto = self.get_model(version).gca_resource
        copied_model_proto = current_model_proto.__class__(current_model_proto)

        update_mask: List[str] = []

        if version_description:
            copied_model_proto.version_description = version_description
            update_mask.append("version_description")

        if labels:
            utils.validate_labels(labels)

            copied_model_proto.labels = labels
            update_mask.append("labels")

        update_mask = field_mask_pb2.FieldMask(paths=update_mask)
        versioned_name = self._get_versioned_name(self.model_resource_name, version)

        _LOGGER.info(f"Updating model {versioned_name}")

        self.client.update_model(
            model=copied_model_proto,
            update_mask=update_mask,
        )

        _LOGGER.info(f"Completed updating model {versioned_name}")

    def add_version_aliases(
        self,
        new_aliases: List[str],
        version: str,
    ) -> None:
        """Adds version alias(es) to a model version.

        Args:
            new_aliases (List[str]): Required. The alias(es) to add to a model version.
            version (str): Required. The version ID to receive the new alias(es).
        """

        self._merge_version_aliases(
            version_aliases=new_aliases,
            version=version,
        )

    def remove_version_aliases(
        self,
        target_aliases: List[str],
        version: str,
    ) -> None:
        """Removes version alias(es) from a model version.

        Args:
            target_aliases (List[str]): Required. The alias(es) to remove from a model version.
            version (str): Required. The version ID to be stripped of the target alias(es).
        """

        self._merge_version_aliases(
            version_aliases=[f"-{alias}" for alias in target_aliases],
            version=version,
        )

    def _merge_version_aliases(
        self,
        version_aliases: List[str],
        version: str,
    ) -> None:
        """Merges a list of version aliases with a model's existing alias list.

        Args:
            version_aliases (List[str]): Required. The version alias change list.
            version (str): Required. The version ID to have its alias list changed.
        """

        _LOGGER.info(f"Merging version aliases for {self.model_resource_name}")

        self.client.merge_version_aliases(
            name=self._get_versioned_name(self.model_resource_name, version),
            version_aliases=version_aliases,
        )

        _LOGGER.info(
            f"Completed merging version aliases for {self.model_resource_name}"
        )

    @staticmethod
    def _get_versioned_name(
        resource_name: str,
        version: Optional[str] = None,
    ) -> str:
        """Creates a versioned form of a model resource name.

        Args:
            resource_name (str): Required. A fully-qualified resource name or resource ID.
            version (str): Optional. The version or alias of the resource.

        Returns:
            versioned_name (str): The versioned resource name in revisioned format.
        """
        if version:
            return f"{resource_name}@{version}"
        return resource_name

    @staticmethod
    def _parse_versioned_name(
        model_name: str,
    ) -> Tuple[str, Optional[str]]:
        """Return a model name and, if included in the model name, a model version.

        Args:
            model_name (str): Required. A fully-qualified model name or model ID,
                optionally with an included version.

        Returns:
            parsed_version_name (Tuple[str, Optional[str]]):
                A tuple containing the model name or ID as the first element,
                and the model version as the second element, if present in `model_name`.

        Raises:
            ValueError: If the `model_name` is invalid and contains too many '@' symbols.
        """
        if "@" not in model_name:
            return model_name, None
        elif model_name.count("@") > 1:
            raise ValueError(
                f"Received an invalid model_name with too many `@`s: {model_name}"
            )
        else:
            return model_name.split("@")

    @staticmethod
    def _get_true_version_parent(
        parent_model: Optional[str] = None,
        project: Optional[str] = None,
        location: Optional[str] = None,
    ) -> Optional[str]:
        """Gets the true `parent_model` with full resource name.

        Args:
            parent_model (str): Optional. A fully-qualified resource name or resource ID
                of the model that would be the parent of another model.
            project (str): Optional. The project of `parent_model`, if not included in `parent_model`.
            location (str): Optional. The location of `parent_model`, if not included in `parent_model`.

        Returns:
            true_parent_model (str):
                Optional. The true resource name of the parent model, if one should exist.
        """
        if parent_model:
            existing_resource = utils.full_resource_name(
                resource_name=parent_model,
                resource_noun="models",
                parse_resource_name_method=Model._parse_resource_name,
                format_resource_name_method=Model._format_resource_name,
                project=project,
                location=location,
            )
            parent_model = existing_resource
        return parent_model

    @staticmethod
    def _get_true_alias_list(
        version_aliases: Optional[Sequence[str]] = None,
        is_default_version: bool = True,
    ) -> Optional[Sequence[str]]:
        """Gets the true `version_aliases` list based on `is_default_version`.

        Args:
            version_aliases (Sequence[str]): Optional. The user-provided list of model aliases.
            is_default_version (bool):
                Optional. When set, includes the "default" alias in `version_aliases`.
                Defaults to True.

        Returns:
            true_alias_list (Sequence[str]):
                Optional: The true alias list, should one exist,
                containing "default" if specified.
        """
        if is_default_version:
            if version_aliases and "default" not in version_aliases:
                version_aliases.append("default")
            elif not version_aliases:
                version_aliases = ["default"]
        return version_aliases