structure saas with tools
This commit is contained in:
Binary file not shown.
@@ -0,0 +1,658 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2025 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# pylint: disable=bad-continuation, line-too-long, protected-access
|
||||
"""Class for interacting with Model Garden OSS models."""
|
||||
|
||||
import datetime
|
||||
import functools
|
||||
import re
|
||||
from typing import Dict, List, Optional, Sequence
|
||||
|
||||
from google.cloud import aiplatform
|
||||
from google.cloud.aiplatform import base
|
||||
from google.cloud.aiplatform import compat
|
||||
from google.cloud.aiplatform import initializer
|
||||
from google.cloud.aiplatform import models as aiplatform_models
|
||||
from google.cloud.aiplatform import utils
|
||||
from google.cloud.aiplatform_v1beta1 import types
|
||||
from google.cloud.aiplatform_v1beta1.services import model_garden_service
|
||||
|
||||
from google.protobuf import duration_pb2
|
||||
|
||||
|
||||
_LOGGER = base.Logger(__name__)
|
||||
_DEFAULT_VERSION = compat.V1BETA1
|
||||
_DEFAULT_TIMEOUT = 2 * 60 * 60 # 2 hours, same as UI one-click deployment.
|
||||
_DEFAULT_EXPORT_TIMEOUT = 1 * 60 * 60 # 1 hour.
|
||||
_HF_WILDCARD_FILTER = "is_hf_wildcard(true)"
|
||||
_NATIVE_MODEL_FILTER = "is_hf_wildcard(false)"
|
||||
_VERIFIED_DEPLOYMENT_FILTER = (
|
||||
"labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED"
|
||||
)
|
||||
|
||||
|
||||
def list_deployable_models(
|
||||
*, list_hf_models: bool = False, model_filter: Optional[str] = None
|
||||
) -> List[str]:
|
||||
"""Lists the deployable models in Model Garden.
|
||||
|
||||
Args:
|
||||
list_hf_models: Whether to list the Hugging Face models.
|
||||
model_filter: Optional. A string to filter the models by.
|
||||
|
||||
Returns:
|
||||
The names of the deployable models in Model Garden in the format of
|
||||
`{publisher}/{model}@{version}` or Hugging Face model ID in the format
|
||||
of `{organization}/{model}`.
|
||||
"""
|
||||
filter_str = _NATIVE_MODEL_FILTER
|
||||
if list_hf_models:
|
||||
filter_str = " AND ".join([_HF_WILDCARD_FILTER, _VERIFIED_DEPLOYMENT_FILTER])
|
||||
if model_filter:
|
||||
filter_str = (
|
||||
f'{filter_str} AND (model_user_id=~"(?i).*{model_filter}.*" OR'
|
||||
f' display_name=~"(?i).*{model_filter}.*")'
|
||||
)
|
||||
|
||||
request = types.ListPublisherModelsRequest(
|
||||
parent="publishers/*",
|
||||
list_all_versions=True,
|
||||
filter=filter_str,
|
||||
)
|
||||
client = initializer.global_config.create_client(
|
||||
client_class=_ModelGardenClientWithOverride,
|
||||
credentials=initializer.global_config.credentials,
|
||||
location_override="us-central1",
|
||||
)
|
||||
response = client.list_publisher_models(request)
|
||||
output = []
|
||||
for page in response.pages:
|
||||
for model in page.publisher_models:
|
||||
if model.supported_actions.multi_deploy_vertex.multi_deploy_vertex:
|
||||
output.append(
|
||||
re.sub(r"publishers/(hf-|)|models/", "", model.name)
|
||||
+ ("" if list_hf_models else ("@" + model.version_id))
|
||||
)
|
||||
return output
|
||||
|
||||
|
||||
def _is_hugging_face_model(model_name: str) -> bool:
|
||||
"""Returns whether the model is a Hugging Face model."""
|
||||
return re.match(r"^(?P<publisher>[^/]+)/(?P<model>[^/@]+)$", model_name)
|
||||
|
||||
|
||||
def _get_publisher_model_resource_name(publisher: str, model: str) -> str:
|
||||
"""Returns the resource name.
|
||||
|
||||
Args:
|
||||
publisher: Publisher of the model.
|
||||
model: Model name, may or may not include version.
|
||||
|
||||
Returns:
|
||||
The resource name in the format of
|
||||
`publishers/{publisher}/models/{model_user_id}@{version_id}`.
|
||||
"""
|
||||
return f"publishers/{publisher}/models/{model}"
|
||||
|
||||
|
||||
def _reconcile_model_name(model_name: str) -> str:
|
||||
"""Returns the resource name from the model name.
|
||||
|
||||
Args:
|
||||
model_name: Model Garden model resource name in the format of
|
||||
`publishers/{publisher}/models/{model}@{version}`, or a simplified
|
||||
resource name in the format of `{publisher}/{model}@{version}`, or a
|
||||
Hugging Face model ID in the format of `{organization}/{model}`.
|
||||
|
||||
Returns:
|
||||
The resource name in the format of
|
||||
`publishers/{publisher}/models/{model}@{version}`.
|
||||
"""
|
||||
model_name = model_name.lower() # Use lower case for Hugging Face.
|
||||
full_resource_name_match = re.match(
|
||||
r"^publishers/(?P<publisher>[^/]+)/models/(?P<model>[^@]+)@(?P<version>[^@]+)$",
|
||||
model_name,
|
||||
)
|
||||
if full_resource_name_match:
|
||||
return _get_publisher_model_resource_name(
|
||||
full_resource_name_match.group("publisher"),
|
||||
full_resource_name_match.group("model")
|
||||
+ "@"
|
||||
+ full_resource_name_match.group("version"),
|
||||
)
|
||||
else:
|
||||
simplified_name_match = re.match(
|
||||
r"^(?P<publisher>[^/]+)/(?P<model>[^@]+)(?:@(?P<version>.+))?$",
|
||||
model_name,
|
||||
)
|
||||
if simplified_name_match:
|
||||
if simplified_name_match.group("version"):
|
||||
return _get_publisher_model_resource_name(
|
||||
publisher=simplified_name_match.group("publisher"),
|
||||
model=simplified_name_match.group("model")
|
||||
+ "@"
|
||||
+ simplified_name_match.group("version"),
|
||||
)
|
||||
else:
|
||||
return _get_publisher_model_resource_name(
|
||||
publisher=simplified_name_match.group("publisher"),
|
||||
model=simplified_name_match.group("model"),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"`{model_name}` is not a valid Open Model name")
|
||||
|
||||
|
||||
def _construct_serving_container_spec(
|
||||
serving_container_image_uri: Optional[str] = None,
|
||||
serving_container_predict_route: Optional[str] = None,
|
||||
serving_container_health_route: Optional[str] = None,
|
||||
serving_container_command: Optional[Sequence[str]] = None,
|
||||
serving_container_args: Optional[Sequence[str]] = None,
|
||||
serving_container_environment_variables: Optional[Dict[str, str]] = None,
|
||||
serving_container_ports: Optional[Sequence[int]] = None,
|
||||
serving_container_grpc_ports: Optional[Sequence[int]] = None,
|
||||
serving_container_deployment_timeout: Optional[int] = None,
|
||||
serving_container_shared_memory_size_mb: Optional[int] = None,
|
||||
serving_container_startup_probe_exec: Optional[Sequence[str]] = None,
|
||||
serving_container_startup_probe_period_seconds: Optional[int] = None,
|
||||
serving_container_startup_probe_timeout_seconds: Optional[int] = None,
|
||||
serving_container_health_probe_exec: Optional[Sequence[str]] = None,
|
||||
serving_container_health_probe_period_seconds: Optional[int] = None,
|
||||
serving_container_health_probe_timeout_seconds: Optional[int] = None,
|
||||
) -> types.ModelContainerSpec:
|
||||
"""Constructs a ServingContainerSpec from the proto."""
|
||||
env = None
|
||||
ports = None
|
||||
grpc_ports = None
|
||||
deployment_timeout = (
|
||||
duration_pb2.Duration(seconds=serving_container_deployment_timeout)
|
||||
if serving_container_deployment_timeout
|
||||
else None
|
||||
)
|
||||
startup_probe = None
|
||||
health_probe = None
|
||||
|
||||
if serving_container_environment_variables:
|
||||
env = [
|
||||
types.EnvVar(name=str(key), value=str(value))
|
||||
for key, value in serving_container_environment_variables.items()
|
||||
]
|
||||
if serving_container_ports:
|
||||
ports = [types.Port(container_port=port) for port in serving_container_ports]
|
||||
if serving_container_grpc_ports:
|
||||
grpc_ports = [
|
||||
types.Port(container_port=port) for port in serving_container_grpc_ports
|
||||
]
|
||||
if (
|
||||
serving_container_startup_probe_exec
|
||||
or serving_container_startup_probe_period_seconds
|
||||
or serving_container_startup_probe_timeout_seconds
|
||||
):
|
||||
startup_probe_exec = None
|
||||
if serving_container_startup_probe_exec:
|
||||
startup_probe_exec = types.Probe.ExecAction(
|
||||
command=serving_container_startup_probe_exec
|
||||
)
|
||||
startup_probe = types.Probe(
|
||||
exec=startup_probe_exec,
|
||||
period_seconds=serving_container_startup_probe_period_seconds,
|
||||
timeout_seconds=serving_container_startup_probe_timeout_seconds,
|
||||
)
|
||||
if (
|
||||
serving_container_health_probe_exec
|
||||
or serving_container_health_probe_period_seconds
|
||||
or serving_container_health_probe_timeout_seconds
|
||||
):
|
||||
health_probe_exec = None
|
||||
if serving_container_health_probe_exec:
|
||||
health_probe_exec = types.Probe.ExecAction(
|
||||
command=serving_container_health_probe_exec
|
||||
)
|
||||
health_probe = types.Probe(
|
||||
exec=health_probe_exec,
|
||||
period_seconds=serving_container_health_probe_period_seconds,
|
||||
timeout_seconds=serving_container_health_probe_timeout_seconds,
|
||||
)
|
||||
|
||||
return types.ModelContainerSpec(
|
||||
image_uri=serving_container_image_uri,
|
||||
command=serving_container_command,
|
||||
args=serving_container_args,
|
||||
env=env,
|
||||
ports=ports,
|
||||
grpc_ports=grpc_ports,
|
||||
predict_route=serving_container_predict_route,
|
||||
health_route=serving_container_health_route,
|
||||
deployment_timeout=deployment_timeout,
|
||||
shared_memory_size_mb=serving_container_shared_memory_size_mb,
|
||||
startup_probe=startup_probe,
|
||||
health_probe=health_probe,
|
||||
)
|
||||
|
||||
|
||||
class _ModelGardenClientWithOverride(utils.ClientWithOverride):
|
||||
_is_temporary = True
|
||||
_default_version = _DEFAULT_VERSION
|
||||
_version_map = (
|
||||
(
|
||||
_DEFAULT_VERSION,
|
||||
model_garden_service.ModelGardenServiceClient,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class OpenModel:
|
||||
"""Represents a Model Garden Open model."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_name: str,
|
||||
):
|
||||
r"""Initializes a Model Garden model.
|
||||
|
||||
Usage:
|
||||
|
||||
```
|
||||
model = OpenModel("publishers/google/models/gemma2@gemma-2-2b-it")
|
||||
```
|
||||
|
||||
Args:
|
||||
model_name: Model Garden model resource name in the format of
|
||||
`publishers/{publisher}/models/{model}@{version}`, or a
|
||||
simplified resource name in the format of
|
||||
`{publisher}/{model}@{version}`, or a Hugging Face model ID in
|
||||
the format of `{organization}/{model}`.
|
||||
"""
|
||||
project = initializer.global_config.project
|
||||
location = initializer.global_config.location
|
||||
credentials = initializer.global_config.credentials
|
||||
|
||||
self._model_name = model_name
|
||||
self._is_hugging_face_model = _is_hugging_face_model(model_name)
|
||||
self._publisher_model_name = _reconcile_model_name(model_name)
|
||||
self._project = project
|
||||
self._location = location
|
||||
self._credentials = credentials
|
||||
|
||||
@functools.cached_property
|
||||
def _model_garden_client(
|
||||
self,
|
||||
) -> model_garden_service.ModelGardenServiceClient:
|
||||
"""Returns the Model Garden client."""
|
||||
return initializer.global_config.create_client(
|
||||
client_class=_ModelGardenClientWithOverride,
|
||||
credentials=self._credentials,
|
||||
location_override=self._location,
|
||||
)
|
||||
|
||||
@functools.cached_property
|
||||
def _us_central1_model_garden_client(
|
||||
self,
|
||||
) -> model_garden_service.ModelGardenServiceClient:
|
||||
"""Returns the Model Garden client in us-central1."""
|
||||
return initializer.global_config.create_client(
|
||||
client_class=_ModelGardenClientWithOverride,
|
||||
credentials=self._credentials,
|
||||
location_override="us-central1",
|
||||
)
|
||||
|
||||
def export(
|
||||
self,
|
||||
target_gcs_path: str = "",
|
||||
export_request_timeout: Optional[float] = None,
|
||||
) -> str:
|
||||
"""Exports an Open Model to a google cloud storage bucket.
|
||||
|
||||
Args:
|
||||
target_gcs_path: target gcs path.
|
||||
export_request_timeout: The timeout for the deploy request. Default is 2
|
||||
hours.
|
||||
|
||||
Returns:
|
||||
str: the target gcs bucket where the model weights are downloaded to
|
||||
|
||||
|
||||
Raises:
|
||||
ValueError: If ``target_gcs_path`` is not specified
|
||||
"""
|
||||
if not target_gcs_path:
|
||||
raise ValueError("target_gcs_path is required.")
|
||||
|
||||
request = types.ExportPublisherModelRequest(
|
||||
parent=f"projects/{self._project}/locations/{self._location}",
|
||||
name=self._publisher_model_name,
|
||||
destination=types.GcsDestination(output_uri_prefix=target_gcs_path),
|
||||
)
|
||||
request_headers = [
|
||||
("x-goog-user-project", "{}".format(initializer.global_config.project)),
|
||||
]
|
||||
|
||||
_LOGGER.info(f"Exporting model weights: {self._model_name}")
|
||||
|
||||
operation_future = self._model_garden_client.export_publisher_model(
|
||||
request, metadata=request_headers
|
||||
)
|
||||
_LOGGER.info(f"LRO: {operation_future.operation.name}")
|
||||
|
||||
_LOGGER.info(f"Start time: {datetime.datetime.now()}")
|
||||
export_publisher_model_response = operation_future.result(
|
||||
timeout=export_request_timeout or _DEFAULT_EXPORT_TIMEOUT
|
||||
)
|
||||
_LOGGER.info(f"End time: {datetime.datetime.now()}")
|
||||
_LOGGER.info(f"Response: {export_publisher_model_response}")
|
||||
|
||||
return target_gcs_path
|
||||
|
||||
def deploy(
|
||||
self,
|
||||
accept_eula: bool = False,
|
||||
hugging_face_access_token: Optional[str] = None,
|
||||
machine_type: Optional[str] = None,
|
||||
min_replica_count: int = 1,
|
||||
max_replica_count: int = 1,
|
||||
accelerator_type: Optional[str] = None,
|
||||
accelerator_count: Optional[int] = None,
|
||||
spot: bool = False,
|
||||
reservation_affinity_type: Optional[str] = None,
|
||||
reservation_affinity_key: Optional[str] = None,
|
||||
reservation_affinity_values: Optional[List[str]] = None,
|
||||
use_dedicated_endpoint: Optional[bool] = False,
|
||||
fast_tryout_enabled: Optional[bool] = False,
|
||||
endpoint_display_name: Optional[str] = None,
|
||||
model_display_name: Optional[str] = None,
|
||||
deploy_request_timeout: Optional[float] = None,
|
||||
serving_container_spec: Optional[types.ModelContainerSpec] = None,
|
||||
serving_container_image_uri: Optional[str] = None,
|
||||
serving_container_predict_route: Optional[str] = None,
|
||||
serving_container_health_route: Optional[str] = None,
|
||||
serving_container_command: Optional[Sequence[str]] = None,
|
||||
serving_container_args: Optional[Sequence[str]] = None,
|
||||
serving_container_environment_variables: Optional[Dict[str, str]] = None,
|
||||
serving_container_ports: Optional[Sequence[int]] = None,
|
||||
serving_container_grpc_ports: Optional[Sequence[int]] = None,
|
||||
serving_container_deployment_timeout: Optional[int] = None,
|
||||
serving_container_shared_memory_size_mb: Optional[int] = None,
|
||||
serving_container_startup_probe_exec: Optional[Sequence[str]] = None,
|
||||
serving_container_startup_probe_period_seconds: Optional[int] = None,
|
||||
serving_container_startup_probe_timeout_seconds: Optional[int] = None,
|
||||
serving_container_health_probe_exec: Optional[Sequence[str]] = None,
|
||||
serving_container_health_probe_period_seconds: Optional[int] = None,
|
||||
serving_container_health_probe_timeout_seconds: Optional[int] = None,
|
||||
) -> aiplatform.Endpoint:
|
||||
"""Deploys an Open Model to an endpoint.
|
||||
|
||||
Args:
|
||||
accept_eula (bool): Whether to accept the End User License Agreement.
|
||||
hugging_face_access_token (str): The access token to access Hugging Face
|
||||
models. Reference: https://huggingface.co/docs/hub/en/security-tokens
|
||||
machine_type (str):
|
||||
Optional. The type of machine. Not specifying machine type will
|
||||
result in model to be deployed with automatic resources.
|
||||
min_replica_count (int):
|
||||
Optional. The minimum number of machine replicas this deployed
|
||||
model will be always deployed on. If traffic against it increases,
|
||||
it may dynamically be deployed onto more replicas, and as traffic
|
||||
decreases, some of these extra replicas may be freed.
|
||||
max_replica_count (int):
|
||||
Optional. The maximum number of replicas this deployed model may
|
||||
be deployed on when the traffic against it increases. If requested
|
||||
value is too large, the deployment will error, but if deployment
|
||||
succeeds then the ability to scale the model to that many replicas
|
||||
is guaranteed (barring service outages). If traffic against the
|
||||
deployed model increases beyond what its replicas at maximum may
|
||||
handle, a portion of the traffic will be dropped. If this value
|
||||
is not provided, the larger value of min_replica_count or 1 will
|
||||
be used. If value provided is smaller than min_replica_count, it
|
||||
will automatically be increased to be min_replica_count.
|
||||
accelerator_type (str):
|
||||
Optional. Hardware accelerator type. Must also set accelerator_count if used.
|
||||
One of ACCELERATOR_TYPE_UNSPECIFIED, NVIDIA_TESLA_K80, NVIDIA_TESLA_P100,
|
||||
NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, NVIDIA_TESLA_T4
|
||||
accelerator_count (int):
|
||||
Optional. The number of accelerators to attach to a worker replica.
|
||||
spot (bool):
|
||||
Optional. Whether to schedule the deployment workload on spot VMs.
|
||||
reservation_affinity_type (str):
|
||||
Optional. The type of reservation affinity.
|
||||
One of NO_RESERVATION, ANY_RESERVATION, SPECIFIC_RESERVATION,
|
||||
SPECIFIC_THEN_ANY_RESERVATION, SPECIFIC_THEN_NO_RESERVATION
|
||||
reservation_affinity_key (str):
|
||||
Optional. Corresponds to the label key of a reservation resource.
|
||||
To target a SPECIFIC_RESERVATION by name, use `compute.googleapis.com/reservation-name` as the key
|
||||
and specify the name of your reservation as its value.
|
||||
reservation_affinity_values (List[str]):
|
||||
Optional. Corresponds to the label values of a reservation resource.
|
||||
This must be the full resource name of the reservation.
|
||||
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
|
||||
use_dedicated_endpoint (bool):
|
||||
Optional. Default value is False. If set to True, the underlying prediction call will be made
|
||||
using the dedicated endpoint dns.
|
||||
fast_tryout_enabled (bool):
|
||||
Optional. Defaults to False.
|
||||
If True, model will be deployed using faster deployment path.
|
||||
Useful for quick experiments. Not for production workloads. Only
|
||||
available for most popular models with certain machine types.
|
||||
endpoint_display_name: The display name of the created endpoint.
|
||||
model_display_name: The display name of the uploaded model.
|
||||
deploy_request_timeout: The timeout for the deploy request. Default
|
||||
is 2 hours.
|
||||
serving_container_spec (types.ModelContainerSpec):
|
||||
Optional. The container specification for the model instance.
|
||||
This specification overrides the default container specification
|
||||
and other serving container parameters.
|
||||
serving_container_image_uri (str):
|
||||
Optional. The URI of the Model serving container. This parameter is required
|
||||
if the parameter `local_model` is not specified.
|
||||
serving_container_predict_route (str):
|
||||
Optional. An HTTP path to send prediction requests to the container, and
|
||||
which must be supported by it. If not specified a default HTTP path will
|
||||
be used by Vertex AI.
|
||||
serving_container_health_route (str):
|
||||
Optional. An HTTP path to send health check requests to the container, and which
|
||||
must be supported by it. If not specified a standard HTTP path will be
|
||||
used by Vertex AI.
|
||||
serving_container_command: Optional[Sequence[str]]=None,
|
||||
The command with which the container is run. Not executed within a
|
||||
shell. The Docker image's ENTRYPOINT is used if this is not provided.
|
||||
Variable references $(VAR_NAME) are expanded using the container's
|
||||
environment. If a variable cannot be resolved, the reference in the
|
||||
input string will be unchanged. The $(VAR_NAME) syntax can be escaped
|
||||
with a double $$, ie: $$(VAR_NAME). Escaped references will never be
|
||||
expanded, regardless of whether the variable exists or not.
|
||||
serving_container_args: Optional[Sequence[str]]=None,
|
||||
The arguments to the command. The Docker image's CMD is used if this is
|
||||
not provided. Variable references $(VAR_NAME) are expanded using the
|
||||
container's environment. If a variable cannot be resolved, the reference
|
||||
in the input string will be unchanged. The $(VAR_NAME) syntax can be
|
||||
escaped with a double $$, ie: $$(VAR_NAME). Escaped references will
|
||||
never be expanded, regardless of whether the variable exists or not.
|
||||
serving_container_environment_variables: Optional[Dict[str, str]]=None,
|
||||
The environment variables that are to be present in the container.
|
||||
Should be a dictionary where keys are environment variable names
|
||||
and values are environment variable values for those names.
|
||||
serving_container_ports: Optional[Sequence[int]]=None,
|
||||
Declaration of ports that are exposed by the container. This field is
|
||||
primarily informational, it gives Vertex AI information about the
|
||||
network connections the container uses. Listing or not a port here has
|
||||
no impact on whether the port is actually exposed, any port listening on
|
||||
the default "0.0.0.0" address inside a container will be accessible from
|
||||
the network.
|
||||
serving_container_grpc_ports: Optional[Sequence[int]]=None,
|
||||
Declaration of ports that are exposed by the container. Vertex AI sends gRPC
|
||||
prediction requests that it receives to the first port on this list. Vertex
|
||||
AI also sends liveness and health checks to this port.
|
||||
If you do not specify this field, gRPC requests to the container will be
|
||||
disabled.
|
||||
Vertex AI does not use ports other than the first one listed. This field
|
||||
corresponds to the `ports` field of the Kubernetes Containers v1 core API.
|
||||
serving_container_deployment_timeout (int):
|
||||
Optional. Deployment timeout in seconds.
|
||||
serving_container_shared_memory_size_mb (int):
|
||||
Optional. The amount of the VM memory to reserve as the shared
|
||||
memory for the model in megabytes.
|
||||
serving_container_startup_probe_exec (Sequence[str]):
|
||||
Optional. Exec specifies the action to take. Used by startup
|
||||
probe. An example of this argument would be
|
||||
["cat", "/tmp/healthy"]
|
||||
serving_container_startup_probe_period_seconds (int):
|
||||
Optional. How often (in seconds) to perform the startup probe.
|
||||
Default to 10 seconds. Minimum value is 1.
|
||||
serving_container_startup_probe_timeout_seconds (int):
|
||||
Optional. Number of seconds after which the startup probe times
|
||||
out. Defaults to 1 second. Minimum value is 1.
|
||||
serving_container_health_probe_exec (Sequence[str]):
|
||||
Optional. Exec specifies the action to take. Used by health
|
||||
probe. An example of this argument would be
|
||||
["cat", "/tmp/healthy"]
|
||||
serving_container_health_probe_period_seconds (int):
|
||||
Optional. How often (in seconds) to perform the health probe.
|
||||
Default to 10 seconds. Minimum value is 1.
|
||||
serving_container_health_probe_timeout_seconds (int):
|
||||
Optional. Number of seconds after which the health probe times
|
||||
out. Defaults to 1 second. Minimum value is 1.
|
||||
|
||||
Returns:
|
||||
endpoint (aiplatform.Endpoint):
|
||||
Created endpoint.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``serving_container_spec`` is specified but ``serving_container_spec.image_uri``
|
||||
is ``None``, or if ``serving_container_spec`` is specified but other
|
||||
serving container parameters are specified.
|
||||
"""
|
||||
request = types.DeployRequest(
|
||||
destination=f"projects/{self._project}/locations/{self._location}",
|
||||
)
|
||||
if self._is_hugging_face_model:
|
||||
request.hugging_face_model_id = self._model_name.lower()
|
||||
else:
|
||||
request.publisher_model_name = self._publisher_model_name
|
||||
|
||||
if endpoint_display_name:
|
||||
request.endpoint_config.endpoint_display_name = endpoint_display_name
|
||||
if model_display_name:
|
||||
request.model_config.model_display_name = model_display_name
|
||||
|
||||
if accept_eula:
|
||||
request.model_config.accept_eula = accept_eula
|
||||
|
||||
if hugging_face_access_token:
|
||||
request.model_config.hugging_face_access_token = hugging_face_access_token
|
||||
|
||||
provided_custom_machine_spec = (
|
||||
machine_type or accelerator_type or accelerator_count
|
||||
)
|
||||
if provided_custom_machine_spec:
|
||||
dedicated_resources = types.DedicatedResources(
|
||||
machine_spec=types.MachineSpec(
|
||||
machine_type=machine_type,
|
||||
accelerator_type=accelerator_type,
|
||||
accelerator_count=accelerator_count,
|
||||
),
|
||||
min_replica_count=min_replica_count,
|
||||
max_replica_count=max_replica_count,
|
||||
)
|
||||
request.deploy_config.dedicated_resources = dedicated_resources
|
||||
if spot:
|
||||
request.deploy_config.dedicated_resources.spot = True
|
||||
|
||||
if reservation_affinity_type:
|
||||
request.deploy_config.dedicated_resources.machine_spec.reservation_affinity.reservation_affinity_type = (
|
||||
reservation_affinity_type
|
||||
)
|
||||
if reservation_affinity_key and reservation_affinity_values:
|
||||
request.deploy_config.dedicated_resources.machine_spec.reservation_affinity.key = (
|
||||
reservation_affinity_key
|
||||
)
|
||||
request.deploy_config.dedicated_resources.machine_spec.reservation_affinity.values = (
|
||||
reservation_affinity_values
|
||||
)
|
||||
|
||||
if use_dedicated_endpoint:
|
||||
request.endpoint_config.dedicated_endpoint_enabled = use_dedicated_endpoint
|
||||
|
||||
if fast_tryout_enabled:
|
||||
request.deploy_config.fast_tryout_enabled = fast_tryout_enabled
|
||||
|
||||
if serving_container_spec:
|
||||
if not serving_container_spec.image_uri:
|
||||
raise ValueError(
|
||||
"Serving container image uri is required for the serving container spec."
|
||||
)
|
||||
if serving_container_image_uri:
|
||||
raise ValueError(
|
||||
"Serving container image uri is already set in the serving container spec."
|
||||
)
|
||||
request.model_config.container_spec = serving_container_spec
|
||||
|
||||
if serving_container_image_uri:
|
||||
request.model_config.container_spec = _construct_serving_container_spec(
|
||||
serving_container_image_uri,
|
||||
serving_container_predict_route,
|
||||
serving_container_health_route,
|
||||
serving_container_command,
|
||||
serving_container_args,
|
||||
serving_container_environment_variables,
|
||||
serving_container_ports,
|
||||
serving_container_grpc_ports,
|
||||
serving_container_deployment_timeout,
|
||||
serving_container_shared_memory_size_mb,
|
||||
serving_container_startup_probe_exec,
|
||||
serving_container_startup_probe_period_seconds,
|
||||
serving_container_startup_probe_timeout_seconds,
|
||||
serving_container_health_probe_exec,
|
||||
serving_container_health_probe_period_seconds,
|
||||
serving_container_health_probe_timeout_seconds,
|
||||
)
|
||||
|
||||
_LOGGER.info(f"Deploying model: {self._model_name}")
|
||||
|
||||
operation_future = self._model_garden_client.deploy(request)
|
||||
_LOGGER.info(f"LRO: {operation_future.operation.name}")
|
||||
|
||||
_LOGGER.info(f"Start time: {datetime.datetime.now()}")
|
||||
deploy_response = operation_future.result(
|
||||
timeout=deploy_request_timeout or _DEFAULT_TIMEOUT
|
||||
)
|
||||
_LOGGER.info(f"End time: {datetime.datetime.now()}")
|
||||
|
||||
self._endpoint_name = deploy_response.endpoint
|
||||
_LOGGER.info(f"Endpoint: {self._endpoint_name}")
|
||||
endpoint = aiplatform.Endpoint._construct_sdk_resource_from_gapic(
|
||||
aiplatform_models.gca_endpoint_compat.Endpoint(name=self._endpoint_name),
|
||||
)
|
||||
return endpoint
|
||||
|
||||
def list_deploy_options(
|
||||
self,
|
||||
) -> Sequence[types.PublisherModel.CallToAction.Deploy]:
|
||||
"""Lists the verified deploy options for the model."""
|
||||
request = types.GetPublisherModelRequest(
|
||||
name=self._publisher_model_name,
|
||||
is_hugging_face_model=bool(self._is_hugging_face_model),
|
||||
include_equivalent_model_garden_model_deployment_configs=True,
|
||||
)
|
||||
response = self._us_central1_model_garden_client.get_publisher_model(request)
|
||||
multi_deploy = (
|
||||
response.supported_actions.multi_deploy_vertex.multi_deploy_vertex
|
||||
)
|
||||
if not multi_deploy:
|
||||
raise ValueError(
|
||||
"Model does not support deployment, please use a deploy-able model"
|
||||
" instead. You can use the list_deployable_models() method"
|
||||
" to find out which ones currently support deployment."
|
||||
)
|
||||
return multi_deploy
|
||||
Reference in New Issue
Block a user