structure saas with tools
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@@ -0,0 +1,282 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2022 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Dict, List, Union
|
||||
|
||||
from google.cloud.aiplatform import base
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
featurestore_online_service_v1beta1 as gca_featurestore_online_service_v1beta1,
|
||||
)
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
types_v1beta1 as gca_types_v1beta1,
|
||||
)
|
||||
|
||||
from google.cloud.aiplatform.featurestore import _entity_type
|
||||
|
||||
_LOGGER = base.Logger(__name__)
|
||||
|
||||
|
||||
class EntityType(_entity_type._EntityType):
|
||||
"""Preview EntityType resource for Vertex AI."""
|
||||
|
||||
# TODO(b/262275273): Remove preview v1beta1 implementation of `write_feature_values`
|
||||
# when GA implementation can write multiple payloads per request. Currently, GA
|
||||
# supports one payload per request.
|
||||
def write_feature_values(
|
||||
self,
|
||||
instances: Union[
|
||||
List[gca_featurestore_online_service_v1beta1.WriteFeatureValuesPayload],
|
||||
Dict[
|
||||
str,
|
||||
Dict[
|
||||
str,
|
||||
Union[
|
||||
int,
|
||||
str,
|
||||
float,
|
||||
bool,
|
||||
bytes,
|
||||
List[int],
|
||||
List[str],
|
||||
List[float],
|
||||
List[bool],
|
||||
],
|
||||
],
|
||||
],
|
||||
"pd.DataFrame", # type: ignore # noqa: F821 - skip check for undefined name 'pd'
|
||||
],
|
||||
) -> "EntityType":
|
||||
"""Streaming ingestion. Write feature values directly to Feature Store.
|
||||
|
||||
```
|
||||
my_entity_type = aiplatform.EntityType(
|
||||
entity_type_name="my_entity_type_id",
|
||||
featurestore_id="my_featurestore_id",
|
||||
)
|
||||
|
||||
# writing feature values from a pandas DataFrame
|
||||
my_dataframe = pd.DataFrame(
|
||||
data = [
|
||||
{"entity_id": "movie_01", "average_rating": 4.9},
|
||||
{"entity_id": "movie_02", "average_rating": 4.5},
|
||||
],
|
||||
columns=["entity_id", "average_rating"],
|
||||
)
|
||||
my_dataframe = my_df.set_index("entity_id")
|
||||
|
||||
my_entity_type.preview.write_feature_values(
|
||||
instances=my_df
|
||||
)
|
||||
|
||||
# writing feature values from a Python dict
|
||||
my_data_dict = {
|
||||
"movie_03" : {"average_rating": 3.7},
|
||||
"movie_04" : {"average_rating": 2.5},
|
||||
}
|
||||
|
||||
my_entity_type.preview.write_feature_values(
|
||||
instances=my_data_dict
|
||||
)
|
||||
|
||||
# writing feature values from a list of WriteFeatureValuesPayload objects
|
||||
payloads = [
|
||||
gca_featurestore_online_service_v1beta1.WriteFeatureValuesPayload(
|
||||
entity_id="movie_05",
|
||||
feature_values=gca_featurestore_online_service_v1beta1.FeatureValue(
|
||||
double_value=4.9
|
||||
)
|
||||
)
|
||||
]
|
||||
|
||||
my_entity_type.preview.write_feature_values(
|
||||
instances=payloads
|
||||
)
|
||||
|
||||
# reading back written feature values
|
||||
my_entity_type.read(
|
||||
entity_ids=["movie_01", "movie_02", "movie_03", "movie_04", "movie_05"]
|
||||
)
|
||||
```
|
||||
|
||||
Args:
|
||||
instances (
|
||||
Union[
|
||||
List[gca_featurestore_online_service_v1beta1.WriteFeatureValuesPayload],
|
||||
Dict[str, Dict[str, Union[int, str, float, bool, bytes,
|
||||
List[int], List[str], List[float], List[bool]]]],
|
||||
pd.Dataframe]):
|
||||
Required. Feature values to be written to the Feature Store that
|
||||
can take the form of a list of WriteFeatureValuesPayload objects,
|
||||
a Python dict of the form {entity_id : {feature_id : feature_value}, ...},
|
||||
or a pandas Dataframe, where the index column holds the unique entity
|
||||
ID strings and each remaining column represents a feature. Each row
|
||||
in the pandas Dataframe represents an entity, which has an entity ID
|
||||
and its associated feature values.
|
||||
|
||||
Returns:
|
||||
EntityType - The updated EntityType object.
|
||||
"""
|
||||
|
||||
if isinstance(instances, Dict):
|
||||
payloads = self._generate_payloads(instances=instances)
|
||||
elif isinstance(instances, List):
|
||||
payloads = instances
|
||||
else:
|
||||
instances_dict = instances.to_dict(orient="index")
|
||||
payloads = self._generate_payloads(instances=instances_dict)
|
||||
|
||||
_LOGGER.log_action_start_against_resource(
|
||||
"Writing",
|
||||
"feature values",
|
||||
self,
|
||||
)
|
||||
|
||||
self._featurestore_online_client.select_version("v1beta1").write_feature_values(
|
||||
entity_type=self.resource_name, payloads=payloads
|
||||
)
|
||||
|
||||
_LOGGER.log_action_completed_against_resource("feature values", "written", self)
|
||||
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def _generate_payloads(
|
||||
cls,
|
||||
instances: Dict[
|
||||
str,
|
||||
Dict[
|
||||
str,
|
||||
Union[
|
||||
int,
|
||||
str,
|
||||
float,
|
||||
bool,
|
||||
bytes,
|
||||
List[int],
|
||||
List[str],
|
||||
List[float],
|
||||
List[bool],
|
||||
],
|
||||
],
|
||||
],
|
||||
) -> List[gca_featurestore_online_service_v1beta1.WriteFeatureValuesPayload]:
|
||||
"""Helper method used to generate GAPIC WriteFeatureValuesPayloads from
|
||||
a Python dict.
|
||||
|
||||
Args:
|
||||
instances (Dict[str, Dict[str, Union[int, str, float, bool, bytes,
|
||||
List[int], List[str], List[float], List[bool]]]]):
|
||||
Required. Dict mapping entity IDs to their corresponding features.
|
||||
|
||||
Returns:
|
||||
List[gca_featurestore_online_service_v1beta1.WriteFeatureValuesPayload] -
|
||||
A list of WriteFeatureValuesPayload objects ready to be written to the Feature Store.
|
||||
"""
|
||||
payloads = []
|
||||
for entity_id, features in instances.items():
|
||||
feature_values = {}
|
||||
for feature_id, value in features.items():
|
||||
feature_value = cls._convert_value_to_gapic_feature_value(
|
||||
feature_id=feature_id, value=value
|
||||
)
|
||||
feature_values[feature_id] = feature_value
|
||||
payload = gca_featurestore_online_service_v1beta1.WriteFeatureValuesPayload(
|
||||
entity_id=entity_id, feature_values=feature_values
|
||||
)
|
||||
payloads.append(payload)
|
||||
|
||||
return payloads
|
||||
|
||||
@classmethod
|
||||
def _convert_value_to_gapic_feature_value(
|
||||
cls,
|
||||
feature_id: str,
|
||||
value: Union[
|
||||
int, str, float, bool, bytes, List[int], List[str], List[float], List[bool]
|
||||
],
|
||||
) -> gca_featurestore_online_service_v1beta1.FeatureValue:
|
||||
"""Helper method that converts a Python literal value or a list of
|
||||
literals to a GAPIC FeatureValue.
|
||||
|
||||
Args:
|
||||
feature_id (str):
|
||||
Required. Name of a feature.
|
||||
value (Union[int, str, float, bool, bytes,
|
||||
List[int], List[str], List[float], List[bool]]]):
|
||||
Required. Python literal value or list of Python literals to
|
||||
be converted to a GAPIC FeatureValue.
|
||||
|
||||
Returns:
|
||||
gca_featurestore_online_service_v1beta1.FeatureValue - GAPIC object
|
||||
that represents the value of a feature.
|
||||
|
||||
Raises:
|
||||
ValueError if a list has values that are not all of the same type.
|
||||
ValueError if feature type is not supported.
|
||||
"""
|
||||
if isinstance(value, bool):
|
||||
feature_value = gca_featurestore_online_service_v1beta1.FeatureValue(
|
||||
bool_value=value
|
||||
)
|
||||
elif isinstance(value, str):
|
||||
feature_value = gca_featurestore_online_service_v1beta1.FeatureValue(
|
||||
string_value=value
|
||||
)
|
||||
elif isinstance(value, int):
|
||||
feature_value = gca_featurestore_online_service_v1beta1.FeatureValue(
|
||||
int64_value=value
|
||||
)
|
||||
elif isinstance(value, float):
|
||||
feature_value = gca_featurestore_online_service_v1beta1.FeatureValue(
|
||||
double_value=value
|
||||
)
|
||||
elif isinstance(value, bytes):
|
||||
feature_value = gca_featurestore_online_service_v1beta1.FeatureValue(
|
||||
bytes_value=value
|
||||
)
|
||||
elif isinstance(value, List):
|
||||
if all([isinstance(item, bool) for item in value]):
|
||||
feature_value = gca_featurestore_online_service_v1beta1.FeatureValue(
|
||||
bool_array_value=gca_types_v1beta1.BoolArray(values=value)
|
||||
)
|
||||
elif all([isinstance(item, str) for item in value]):
|
||||
feature_value = gca_featurestore_online_service_v1beta1.FeatureValue(
|
||||
string_array_value=gca_types_v1beta1.StringArray(values=value)
|
||||
)
|
||||
elif all([isinstance(item, int) for item in value]):
|
||||
feature_value = gca_featurestore_online_service_v1beta1.FeatureValue(
|
||||
int64_array_value=gca_types_v1beta1.Int64Array(values=value)
|
||||
)
|
||||
elif all([isinstance(item, float) for item in value]):
|
||||
feature_value = gca_featurestore_online_service_v1beta1.FeatureValue(
|
||||
double_array_value=gca_types_v1beta1.DoubleArray(values=value)
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Cannot infer feature value for feature {feature_id} with "
|
||||
f"value {value}! Please ensure every value in the list "
|
||||
f"is the same type (either int, str, float, bool)."
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Cannot infer feature value for feature {feature_id} with "
|
||||
f"value {value}! {type(value)} type is not supported. "
|
||||
f"Please ensure value type is an int, str, float, bool, "
|
||||
f"bytes, or a list of int, str, float, bool."
|
||||
)
|
||||
return feature_value
|
||||
@@ -0,0 +1,869 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import copy
|
||||
import uuid
|
||||
|
||||
from google.api_core import retry
|
||||
from google.auth import credentials as auth_credentials
|
||||
from google.cloud import aiplatform
|
||||
from google.cloud.aiplatform import base
|
||||
from google.cloud.aiplatform import compat
|
||||
from google.cloud.aiplatform import initializer
|
||||
from google.cloud.aiplatform import jobs
|
||||
from google.cloud.aiplatform import utils
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
custom_job_v1beta1 as gca_custom_job_compat,
|
||||
hyperparameter_tuning_job_v1beta1 as gca_hyperparameter_tuning_job_compat,
|
||||
job_state as gca_job_state,
|
||||
job_state_v1beta1 as gca_job_state_v1beta1,
|
||||
study_v1beta1,
|
||||
)
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
execution_v1beta1 as gcs_execution_compat,
|
||||
)
|
||||
from google.cloud.aiplatform.compat.types import io_v1beta1 as gca_io_compat
|
||||
from google.cloud.aiplatform.metadata import constants as metadata_constants
|
||||
from google.cloud.aiplatform import hyperparameter_tuning
|
||||
from google.cloud.aiplatform.utils import console_utils
|
||||
import proto
|
||||
|
||||
from google.protobuf import duration_pb2 # type: ignore
|
||||
|
||||
|
||||
_LOGGER = base.Logger(__name__)
|
||||
_DEFAULT_RETRY = retry.Retry()
|
||||
# TODO(b/242108750): remove temporary logic once model monitoring for batch prediction is GA
|
||||
_JOB_COMPLETE_STATES = (
|
||||
gca_job_state.JobState.JOB_STATE_SUCCEEDED,
|
||||
gca_job_state.JobState.JOB_STATE_FAILED,
|
||||
gca_job_state.JobState.JOB_STATE_CANCELLED,
|
||||
gca_job_state.JobState.JOB_STATE_PAUSED,
|
||||
gca_job_state_v1beta1.JobState.JOB_STATE_SUCCEEDED,
|
||||
gca_job_state_v1beta1.JobState.JOB_STATE_FAILED,
|
||||
gca_job_state_v1beta1.JobState.JOB_STATE_CANCELLED,
|
||||
gca_job_state_v1beta1.JobState.JOB_STATE_PAUSED,
|
||||
)
|
||||
|
||||
_JOB_ERROR_STATES = (
|
||||
gca_job_state.JobState.JOB_STATE_FAILED,
|
||||
gca_job_state.JobState.JOB_STATE_CANCELLED,
|
||||
gca_job_state_v1beta1.JobState.JOB_STATE_FAILED,
|
||||
gca_job_state_v1beta1.JobState.JOB_STATE_CANCELLED,
|
||||
)
|
||||
|
||||
|
||||
class CustomJob(jobs.CustomJob):
|
||||
"""Deprecated. Vertex AI Custom Job (preview)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
# TODO(b/223262536): Make display_name parameter fully optional in next major release
|
||||
display_name: str,
|
||||
worker_pool_specs: Union[
|
||||
List[Dict], List[gca_custom_job_compat.WorkerPoolSpec]
|
||||
],
|
||||
base_output_dir: Optional[str] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
encryption_spec_key_name: Optional[str] = None,
|
||||
staging_bucket: Optional[str] = None,
|
||||
persistent_resource_id: Optional[str] = None,
|
||||
):
|
||||
"""Deprecated. Please use the GA (non-preview) version of this class.
|
||||
|
||||
Constructs a Custom Job with Worker Pool Specs.
|
||||
|
||||
```
|
||||
Example usage:
|
||||
worker_pool_specs = [
|
||||
{
|
||||
"machine_spec": {
|
||||
"machine_type": "n1-standard-4",
|
||||
"accelerator_type": "NVIDIA_TESLA_K80",
|
||||
"accelerator_count": 1,
|
||||
},
|
||||
"replica_count": 1,
|
||||
"container_spec": {
|
||||
"image_uri": container_image_uri,
|
||||
"command": [],
|
||||
"args": [],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
my_job = aiplatform.preview.jobs.CustomJob(
|
||||
display_name='my_job',
|
||||
worker_pool_specs=worker_pool_specs,
|
||||
labels={'my_key': 'my_value'},
|
||||
)
|
||||
|
||||
my_job.run()
|
||||
```
|
||||
|
||||
|
||||
For more information on configuring worker pool specs please visit:
|
||||
https://cloud.google.com/ai-platform-unified/docs/training/create-custom-job
|
||||
|
||||
|
||||
Args:
|
||||
display_name (str):
|
||||
Required. The user-defined name of the HyperparameterTuningJob.
|
||||
The name can be up to 128 characters long and can be consist
|
||||
of any UTF-8 characters.
|
||||
worker_pool_specs (Union[List[Dict], List[aiplatform.gapic.WorkerPoolSpec]]):
|
||||
Required. The spec of the worker pools including machine type and Docker image.
|
||||
Can provided as a list of dictionaries or list of WorkerPoolSpec proto messages.
|
||||
base_output_dir (str):
|
||||
Optional. GCS output directory of job. If not provided a
|
||||
timestamped directory in the staging directory will be used.
|
||||
project (str):
|
||||
Optional.Project to run the custom job in. Overrides project set in aiplatform.init.
|
||||
location (str):
|
||||
Optional.Location to run the custom job in. Overrides location set in aiplatform.init.
|
||||
credentials (auth_credentials.Credentials):
|
||||
Optional.Custom credentials to use to run call custom job service. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
labels (Dict[str, str]):
|
||||
Optional. The labels with user-defined metadata to
|
||||
organize CustomJobs.
|
||||
Label keys and values can be no longer than 64
|
||||
characters (Unicode codepoints), can only
|
||||
contain lowercase letters, numeric characters,
|
||||
underscores and dashes. International characters
|
||||
are allowed.
|
||||
See https://goo.gl/xmQnxf for more information
|
||||
and examples of labels.
|
||||
encryption_spec_key_name (str):
|
||||
Optional.Customer-managed encryption key name for a
|
||||
CustomJob. If this is set, then all resources
|
||||
created by the CustomJob will be encrypted with
|
||||
the provided encryption key.
|
||||
staging_bucket (str):
|
||||
Optional. Bucket for produced custom job artifacts. Overrides
|
||||
staging_bucket set in aiplatform.init.
|
||||
persistent_resource_id (str):
|
||||
Optional. The ID of the PersistentResource in the same Project
|
||||
and Location. If this is specified, the job will be run on
|
||||
existing machines held by the PersistentResource instead of
|
||||
on-demand short-live machines. The network and CMEK configs on
|
||||
the job should be consistent with those on the PersistentResource,
|
||||
otherwise, the job will be rejected.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If staging bucket was not set using aiplatform.init
|
||||
and a staging bucket was not passed in.
|
||||
"""
|
||||
|
||||
super().__init__(
|
||||
display_name=display_name,
|
||||
worker_pool_specs=worker_pool_specs,
|
||||
base_output_dir=base_output_dir,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
labels=labels,
|
||||
encryption_spec_key_name=encryption_spec_key_name,
|
||||
staging_bucket=staging_bucket,
|
||||
)
|
||||
|
||||
staging_bucket = staging_bucket or initializer.global_config.staging_bucket
|
||||
|
||||
if not staging_bucket:
|
||||
raise RuntimeError(
|
||||
"staging_bucket should be passed to CustomJob constructor or "
|
||||
"should be set using aiplatform.init(staging_bucket='gs://my-bucket')"
|
||||
)
|
||||
|
||||
if labels:
|
||||
utils.validate_labels(labels)
|
||||
|
||||
# default directory if not given
|
||||
base_output_dir = base_output_dir or utils._timestamped_gcs_dir(
|
||||
staging_bucket, "aiplatform-custom-job"
|
||||
)
|
||||
|
||||
if not display_name:
|
||||
display_name = self.__class__._generate_display_name()
|
||||
|
||||
self._gca_resource = gca_custom_job_compat.CustomJob(
|
||||
display_name=display_name,
|
||||
job_spec=gca_custom_job_compat.CustomJobSpec(
|
||||
worker_pool_specs=worker_pool_specs,
|
||||
base_output_directory=gca_io_compat.GcsDestination(
|
||||
output_uri_prefix=base_output_dir
|
||||
),
|
||||
persistent_resource_id=persistent_resource_id,
|
||||
),
|
||||
labels=labels,
|
||||
encryption_spec=initializer.global_config.get_encryption_spec(
|
||||
encryption_spec_key_name=encryption_spec_key_name,
|
||||
select_version=compat.V1BETA1,
|
||||
),
|
||||
)
|
||||
|
||||
self._experiment = None
|
||||
self._experiment_run = None
|
||||
self._enable_autolog = False
|
||||
|
||||
def _get_gca_resource(
|
||||
self,
|
||||
resource_name: str,
|
||||
parent_resource_name_fields: Optional[Dict[str, str]] = None,
|
||||
) -> proto.Message:
|
||||
"""Returns GAPIC service representation of client class resource.
|
||||
|
||||
Args:
|
||||
resource_name (str): Required. A fully-qualified resource name or ID.
|
||||
parent_resource_name_fields (Dict[str,str]):
|
||||
Optional. Mapping of parent resource name key to values. These
|
||||
will be used to compose the resource name if only resource ID is given.
|
||||
Should not include project and location.
|
||||
"""
|
||||
resource_name = utils.full_resource_name(
|
||||
resource_name=resource_name,
|
||||
resource_noun=self._resource_noun,
|
||||
parse_resource_name_method=self._parse_resource_name,
|
||||
format_resource_name_method=self._format_resource_name,
|
||||
project=self.project,
|
||||
location=self.location,
|
||||
parent_resource_name_fields=parent_resource_name_fields,
|
||||
resource_id_validator=self._resource_id_validator,
|
||||
)
|
||||
|
||||
return getattr(self.api_client.select_version("v1beta1"), self._getter_method)(
|
||||
name=resource_name, retry=_DEFAULT_RETRY
|
||||
)
|
||||
|
||||
def submit(
|
||||
self,
|
||||
*,
|
||||
service_account: Optional[str] = None,
|
||||
network: Optional[str] = None,
|
||||
timeout: Optional[int] = None,
|
||||
restart_job_on_worker_restart: bool = False,
|
||||
enable_web_access: bool = False,
|
||||
experiment: Optional[Union["aiplatform.Experiment", str]] = None,
|
||||
experiment_run: Optional[Union["aiplatform.ExperimentRun", str]] = None,
|
||||
tensorboard: Optional[str] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
disable_retries: bool = False,
|
||||
max_wait_duration: Optional[int] = None,
|
||||
) -> None:
|
||||
"""Submit the configured CustomJob.
|
||||
|
||||
Args:
|
||||
service_account (str):
|
||||
Optional. Specifies the service account for workload run-as account.
|
||||
Users submitting jobs must have act-as permission on this run-as account.
|
||||
network (str):
|
||||
Optional. The full name of the Compute Engine network to which the job
|
||||
should be peered. For example, projects/12345/global/networks/myVPC.
|
||||
Private services access must already be configured for the network.
|
||||
timeout (int):
|
||||
The maximum job running time in seconds. The default is 7 days.
|
||||
restart_job_on_worker_restart (bool):
|
||||
Restarts the entire CustomJob if a worker
|
||||
gets restarted. This feature can be used by
|
||||
distributed training jobs that are not resilient
|
||||
to workers leaving and joining a job.
|
||||
enable_web_access (bool):
|
||||
Whether you want Vertex AI to enable interactive shell access
|
||||
to training containers.
|
||||
https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell
|
||||
experiment (Union[aiplatform.Experiment, str]):
|
||||
Optional. The instance or name of an Experiment resource to which
|
||||
this CustomJob will upload training parameters and metrics.
|
||||
|
||||
`service_account` is required with provided `experiment`.
|
||||
For more information on configuring your service account please visit:
|
||||
https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training
|
||||
experiment_run (Union[aiplatform.ExperimentRun, str]):
|
||||
Optional. The instance or name of an ExperimentRun resource to which
|
||||
this CustomJob will upload training parameters and metrics.
|
||||
This arg can only be set when `experiment` is set. If 'experiment'
|
||||
is set but 'experiment_run` is not, an ExperimentRun resource
|
||||
will still be auto-generated.
|
||||
tensorboard (str):
|
||||
Optional. The name of a Vertex AI
|
||||
[Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard]
|
||||
resource to which this CustomJob will upload Tensorboard
|
||||
logs. Format:
|
||||
``projects/{project}/locations/{location}/tensorboards/{tensorboard}``
|
||||
|
||||
The training script should write Tensorboard to following Vertex AI environment
|
||||
variable:
|
||||
|
||||
AIP_TENSORBOARD_LOG_DIR
|
||||
|
||||
`service_account` is required with provided `tensorboard`.
|
||||
For more information on configuring your service account please visit:
|
||||
https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training
|
||||
create_request_timeout (float):
|
||||
Optional. The timeout for the create request in seconds.
|
||||
disable_retries (bool):
|
||||
Indicates if the job should retry for internal errors after the
|
||||
job starts running. If True, overrides
|
||||
`restart_job_on_worker_restart` to False.
|
||||
max_wait_duration (int):
|
||||
This is the maximum duration that a job will wait for the
|
||||
requested resources to be provisioned in seconds. If set to 0,
|
||||
the job will wait indefinitely. The default is 30 minutes.
|
||||
|
||||
Raises:
|
||||
ValueError:
|
||||
If both `experiment` and `tensorboard` are specified or if
|
||||
`enable_autolog` is True in `CustomJob.from_local_script` but
|
||||
`experiment` is not specified or the specified experiment
|
||||
doesn't have a backing tensorboard.
|
||||
"""
|
||||
if experiment and tensorboard:
|
||||
raise ValueError("'experiment' and 'tensorboard' cannot be set together.")
|
||||
if self._enable_autolog and (not experiment):
|
||||
raise ValueError(
|
||||
"'experiment' is required since you've enabled autolog in 'from_local_script'."
|
||||
)
|
||||
if service_account:
|
||||
self._gca_resource.job_spec.service_account = service_account
|
||||
|
||||
if network:
|
||||
self._gca_resource.job_spec.network = network
|
||||
|
||||
if (
|
||||
timeout
|
||||
or restart_job_on_worker_restart
|
||||
or disable_retries
|
||||
or max_wait_duration
|
||||
):
|
||||
timeout = duration_pb2.Duration(seconds=timeout) if timeout else None
|
||||
max_wait_duration = (
|
||||
duration_pb2.Duration(seconds=max_wait_duration)
|
||||
if max_wait_duration
|
||||
else None
|
||||
)
|
||||
self._gca_resource.job_spec.scheduling = gca_custom_job_compat.Scheduling(
|
||||
timeout=timeout,
|
||||
restart_job_on_worker_restart=restart_job_on_worker_restart,
|
||||
disable_retries=disable_retries,
|
||||
max_wait_duration=max_wait_duration,
|
||||
)
|
||||
|
||||
if enable_web_access:
|
||||
self._gca_resource.job_spec.enable_web_access = enable_web_access
|
||||
|
||||
if tensorboard:
|
||||
self._gca_resource.job_spec.tensorboard = tensorboard
|
||||
|
||||
# TODO(b/275105711) Update implementation after experiment/run in the proto
|
||||
if experiment:
|
||||
# short-term solution to set experiment/experimentRun in SDK
|
||||
if isinstance(experiment, aiplatform.Experiment):
|
||||
self._experiment = experiment
|
||||
# convert the Experiment instance to string to be passed to env
|
||||
experiment = experiment.name
|
||||
else:
|
||||
self._experiment = aiplatform.Experiment.get(experiment_name=experiment)
|
||||
if not self._experiment:
|
||||
raise ValueError(
|
||||
f"Experiment '{experiment}' doesn't exist. "
|
||||
"Please call aiplatform.init(experiment='my-exp') to create an experiment."
|
||||
)
|
||||
elif (
|
||||
not self._experiment.backing_tensorboard_resource_name
|
||||
and self._enable_autolog
|
||||
):
|
||||
raise ValueError(
|
||||
f"Experiment '{experiment}' doesn't have a backing tensorboard resource, "
|
||||
"which is required by the experiment autologging feature. "
|
||||
"Please call Experiment.assign_backing_tensorboard('my-tb-resource-name')."
|
||||
)
|
||||
|
||||
# if run name is not specified, auto-generate one
|
||||
if not experiment_run:
|
||||
experiment_run = (
|
||||
# TODO(b/223262536)Once display_name is optional this run name
|
||||
# might be invalid as well.
|
||||
f"{self._gca_resource.display_name}-{uuid.uuid4().hex[0:5]}"
|
||||
)
|
||||
|
||||
# get or create the experiment run for the job
|
||||
if isinstance(experiment_run, aiplatform.ExperimentRun):
|
||||
self._experiment_run = experiment_run
|
||||
# convert the ExperimentRun instance to string to be passed to env
|
||||
experiment_run = experiment_run.name
|
||||
else:
|
||||
self._experiment_run = aiplatform.ExperimentRun.get(
|
||||
run_name=experiment_run,
|
||||
experiment=self._experiment,
|
||||
)
|
||||
if not self._experiment_run:
|
||||
self._experiment_run = aiplatform.ExperimentRun.create(
|
||||
run_name=experiment_run,
|
||||
experiment=self._experiment,
|
||||
)
|
||||
self._experiment_run.update_state(
|
||||
gcs_execution_compat.Execution.State.RUNNING
|
||||
)
|
||||
|
||||
worker_pool_specs = self._gca_resource.job_spec.worker_pool_specs
|
||||
for spec in worker_pool_specs:
|
||||
if not spec:
|
||||
continue
|
||||
|
||||
if "python_package_spec" in spec:
|
||||
container_spec = spec.python_package_spec
|
||||
else:
|
||||
container_spec = spec.container_spec
|
||||
|
||||
experiment_env = [
|
||||
{
|
||||
"name": metadata_constants.ENV_EXPERIMENT_KEY,
|
||||
"value": experiment,
|
||||
},
|
||||
{
|
||||
"name": metadata_constants.ENV_EXPERIMENT_RUN_KEY,
|
||||
"value": experiment_run,
|
||||
},
|
||||
]
|
||||
if "env" in container_spec:
|
||||
container_spec.env.extend(experiment_env)
|
||||
else:
|
||||
container_spec.env = experiment_env
|
||||
|
||||
_LOGGER.log_create_with_lro(self.__class__)
|
||||
|
||||
self._gca_resource = self.api_client.select_version(
|
||||
"v1beta1"
|
||||
).create_custom_job(
|
||||
parent=self._parent,
|
||||
custom_job=self._gca_resource,
|
||||
timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
_LOGGER.log_create_complete_with_getter(
|
||||
self.__class__, self._gca_resource, "custom_job"
|
||||
)
|
||||
|
||||
_LOGGER.info("View Custom Job:\n%s" % self._dashboard_uri())
|
||||
|
||||
if tensorboard:
|
||||
_LOGGER.info(
|
||||
"View Tensorboard:\n%s"
|
||||
% console_utils.custom_job_tensorboard_console_uri(
|
||||
tensorboard, self.resource_name
|
||||
)
|
||||
)
|
||||
|
||||
if experiment:
|
||||
custom_job = {
|
||||
metadata_constants._CUSTOM_JOB_RESOURCE_NAME: self.resource_name,
|
||||
metadata_constants._CUSTOM_JOB_CONSOLE_URI: self._dashboard_uri(),
|
||||
}
|
||||
|
||||
run_context = self._experiment_run._metadata_node
|
||||
custom_jobs = run_context._gca_resource.metadata.get(
|
||||
metadata_constants._CUSTOM_JOB_KEY
|
||||
)
|
||||
if custom_jobs:
|
||||
custom_jobs.append(custom_job)
|
||||
else:
|
||||
custom_jobs = [custom_job]
|
||||
run_context.update({metadata_constants._CUSTOM_JOB_KEY: custom_jobs})
|
||||
|
||||
|
||||
class HyperparameterTuningJob(jobs.HyperparameterTuningJob):
|
||||
"""Deprecated. Vertex AI Hyperparameter Tuning Job (preview)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
# TODO(b/223262536): Make display_name parameter fully optional in next major release
|
||||
display_name: str,
|
||||
custom_job: CustomJob,
|
||||
metric_spec: Dict[str, str],
|
||||
parameter_spec: Dict[str, hyperparameter_tuning._ParameterSpec],
|
||||
max_trial_count: int,
|
||||
parallel_trial_count: int,
|
||||
max_failed_trial_count: int = 0,
|
||||
search_algorithm: Optional[str] = None,
|
||||
measurement_selection: Optional[str] = "best",
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
encryption_spec_key_name: Optional[str] = None,
|
||||
):
|
||||
"""Deprecated. Please use the GA (non-preview) version of this class.
|
||||
|
||||
Configures a HyperparameterTuning Job.
|
||||
|
||||
Example usage:
|
||||
|
||||
```
|
||||
from google.cloud.aiplatform import hyperparameter_tuning as hpt
|
||||
|
||||
worker_pool_specs = [
|
||||
{
|
||||
"machine_spec": {
|
||||
"machine_type": "n1-standard-4",
|
||||
"accelerator_type": "NVIDIA_TESLA_K80",
|
||||
"accelerator_count": 1,
|
||||
},
|
||||
"replica_count": 1,
|
||||
"container_spec": {
|
||||
"image_uri": container_image_uri,
|
||||
"command": [],
|
||||
"args": [],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
custom_job = aiplatform.preview.jobs.CustomJob(
|
||||
display_name='my_job',
|
||||
worker_pool_specs=worker_pool_specs,
|
||||
labels={'my_key': 'my_value'},
|
||||
persistent_resource_id='my_persistent_resource',
|
||||
)
|
||||
|
||||
|
||||
hp_job = aiplatform.preview.jobs.HyperparameterTuningJob(
|
||||
display_name='hp-test',
|
||||
custom_job=job,
|
||||
metric_spec={
|
||||
'loss': 'minimize',
|
||||
},
|
||||
parameter_spec={
|
||||
'lr': hpt.DoubleParameterSpec(min=0.001, max=0.1, scale='log'),
|
||||
'units': hpt.IntegerParameterSpec(min=4, max=128, scale='linear'),
|
||||
'activation': hpt.CategoricalParameterSpec(values=['relu', 'selu']),
|
||||
'batch_size': hpt.DiscreteParameterSpec(values=[128, 256], scale='linear')
|
||||
},
|
||||
max_trial_count=128,
|
||||
parallel_trial_count=8,
|
||||
labels={'my_key': 'my_value'},
|
||||
)
|
||||
|
||||
hp_job.run()
|
||||
|
||||
print(hp_job.trials)
|
||||
```
|
||||
|
||||
|
||||
For more information on using hyperparameter tuning please visit:
|
||||
https://cloud.google.com/ai-platform-unified/docs/training/using-hyperparameter-tuning
|
||||
|
||||
Args:
|
||||
display_name (str):
|
||||
Required. The user-defined name of the HyperparameterTuningJob.
|
||||
The name can be up to 128 characters long and can be consist
|
||||
of any UTF-8 characters.
|
||||
custom_job (aiplatform.preview.jobs.CustomJob):
|
||||
Required. Configured CustomJob. The worker pool spec from this custom job
|
||||
applies to the CustomJobs created in all the trials. A persistent_resource_id can be
|
||||
specified on the custom job to be used when running this Hyperparameter Tuning job.
|
||||
metric_spec: Dict[str, str]
|
||||
Required. Dictionary representing metrics to optimize. The dictionary key is the metric_id,
|
||||
which is reported by your training job, and the dictionary value is the
|
||||
optimization goal of the metric('minimize' or 'maximize'). example:
|
||||
|
||||
metric_spec = {'loss': 'minimize', 'accuracy': 'maximize'}
|
||||
|
||||
parameter_spec (Dict[str, hyperparameter_tuning._ParameterSpec]):
|
||||
Required. Dictionary representing parameters to optimize. The dictionary key is the metric_id,
|
||||
which is passed into your training job as a command line key word argument, and the
|
||||
dictionary value is the parameter specification of the metric.
|
||||
|
||||
|
||||
from google.cloud.aiplatform import hyperparameter_tuning as hpt
|
||||
|
||||
parameter_spec={
|
||||
'decay': hpt.DoubleParameterSpec(min=1e-7, max=1, scale='linear'),
|
||||
'learning_rate': hpt.DoubleParameterSpec(min=1e-7, max=1, scale='linear')
|
||||
'batch_size': hpt.DiscreteParamterSpec(values=[4, 8, 16, 32, 64, 128], scale='linear')
|
||||
}
|
||||
|
||||
Supported parameter specifications can be found until aiplatform.hyperparameter_tuning.
|
||||
These parameter specification are currently supported:
|
||||
DoubleParameterSpec, IntegerParameterSpec, CategoricalParameterSpace, DiscreteParameterSpec
|
||||
|
||||
max_trial_count (int):
|
||||
Required. The desired total number of Trials.
|
||||
parallel_trial_count (int):
|
||||
Required. The desired number of Trials to run in parallel.
|
||||
max_failed_trial_count (int):
|
||||
Optional. The number of failed Trials that need to be
|
||||
seen before failing the HyperparameterTuningJob.
|
||||
If set to 0, Vertex AI decides how many Trials
|
||||
must fail before the whole job fails.
|
||||
search_algorithm (str):
|
||||
The search algorithm specified for the Study.
|
||||
Accepts one of the following:
|
||||
`None` - If you do not specify an algorithm, your job uses
|
||||
the default Vertex AI algorithm. The default algorithm
|
||||
applies Bayesian optimization to arrive at the optimal
|
||||
solution with a more effective search over the parameter space.
|
||||
|
||||
'grid' - A simple grid search within the feasible space. This
|
||||
option is particularly useful if you want to specify a quantity
|
||||
of trials that is greater than the number of points in the
|
||||
feasible space. In such cases, if you do not specify a grid
|
||||
search, the Vertex AI default algorithm may generate duplicate
|
||||
suggestions. To use grid search, all parameter specs must be
|
||||
of type `IntegerParameterSpec`, `CategoricalParameterSpace`,
|
||||
or `DiscreteParameterSpec`.
|
||||
|
||||
'random' - A simple random search within the feasible space.
|
||||
measurement_selection (str):
|
||||
This indicates which measurement to use if/when the service
|
||||
automatically selects the final measurement from previously reported
|
||||
intermediate measurements.
|
||||
|
||||
Accepts: 'best', 'last'
|
||||
|
||||
Choose this based on two considerations:
|
||||
A) Do you expect your measurements to monotonically improve? If so,
|
||||
choose 'last'. On the other hand, if you're in a situation
|
||||
where your system can "over-train" and you expect the performance to
|
||||
get better for a while but then start declining, choose
|
||||
'best'. B) Are your measurements significantly noisy
|
||||
and/or irreproducible? If so, 'best' will tend to be
|
||||
over-optimistic, and it may be better to choose 'last'. If
|
||||
both or neither of (A) and (B) apply, it doesn't matter which
|
||||
selection type is chosen.
|
||||
project (str):
|
||||
Optional. Project to run the HyperparameterTuningjob in. Overrides project set in aiplatform.init.
|
||||
location (str):
|
||||
Optional. Location to run the HyperparameterTuning in. Overrides location set in aiplatform.init.
|
||||
credentials (auth_credentials.Credentials):
|
||||
Optional. Custom credentials to use to run call HyperparameterTuning service. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
labels (Dict[str, str]):
|
||||
Optional. The labels with user-defined metadata to
|
||||
organize HyperparameterTuningJobs.
|
||||
Label keys and values can be no longer than 64
|
||||
characters (Unicode codepoints), can only
|
||||
contain lowercase letters, numeric characters,
|
||||
underscores and dashes. International characters
|
||||
are allowed.
|
||||
See https://goo.gl/xmQnxf for more information
|
||||
and examples of labels.
|
||||
encryption_spec_key_name (str):
|
||||
Optional. Customer-managed encryption key options for a
|
||||
HyperparameterTuningJob. If this is set, then
|
||||
all resources created by the
|
||||
HyperparameterTuningJob will be encrypted with
|
||||
the provided encryption key.
|
||||
"""
|
||||
|
||||
super(jobs.HyperparameterTuningJob, self).__init__(
|
||||
project=project, location=location, credentials=credentials
|
||||
)
|
||||
|
||||
metrics = [
|
||||
study_v1beta1.StudySpec.MetricSpec(metric_id=metric_id, goal=goal.upper())
|
||||
for metric_id, goal in metric_spec.items()
|
||||
]
|
||||
|
||||
parameters = [
|
||||
parameter._to_parameter_spec_v1beta1(parameter_id=parameter_id)
|
||||
for parameter_id, parameter in parameter_spec.items()
|
||||
]
|
||||
|
||||
study_spec = study_v1beta1.StudySpec(
|
||||
metrics=metrics,
|
||||
parameters=parameters,
|
||||
algorithm=hyperparameter_tuning.SEARCH_ALGORITHM_TO_PROTO_VALUE[
|
||||
search_algorithm
|
||||
],
|
||||
measurement_selection_type=hyperparameter_tuning.MEASUREMENT_SELECTION_TO_PROTO_VALUE[
|
||||
measurement_selection
|
||||
],
|
||||
)
|
||||
|
||||
if not display_name:
|
||||
display_name = self.__class__._generate_display_name()
|
||||
|
||||
self._gca_resource = (
|
||||
gca_hyperparameter_tuning_job_compat.HyperparameterTuningJob(
|
||||
display_name=display_name,
|
||||
study_spec=study_spec,
|
||||
max_trial_count=max_trial_count,
|
||||
parallel_trial_count=parallel_trial_count,
|
||||
max_failed_trial_count=max_failed_trial_count,
|
||||
trial_job_spec=copy.deepcopy(custom_job.job_spec),
|
||||
labels=labels,
|
||||
encryption_spec=initializer.global_config.get_encryption_spec(
|
||||
encryption_spec_key_name=encryption_spec_key_name,
|
||||
select_version=compat.V1BETA1,
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
def _get_gca_resource(
|
||||
self,
|
||||
resource_name: str,
|
||||
parent_resource_name_fields: Optional[Dict[str, str]] = None,
|
||||
) -> proto.Message:
|
||||
"""Returns GAPIC service representation of client class resource.
|
||||
|
||||
Args:
|
||||
resource_name (str): Required. A fully-qualified resource name or ID.
|
||||
parent_resource_name_fields (Dict[str,str]):
|
||||
Optional. Mapping of parent resource name key to values. These
|
||||
will be used to compose the resource name if only resource ID is given.
|
||||
Should not include project and location.
|
||||
"""
|
||||
resource_name = utils.full_resource_name(
|
||||
resource_name=resource_name,
|
||||
resource_noun=self._resource_noun,
|
||||
parse_resource_name_method=self._parse_resource_name,
|
||||
format_resource_name_method=self._format_resource_name,
|
||||
project=self.project,
|
||||
location=self.location,
|
||||
parent_resource_name_fields=parent_resource_name_fields,
|
||||
resource_id_validator=self._resource_id_validator,
|
||||
)
|
||||
|
||||
return getattr(self.api_client.select_version("v1beta1"), self._getter_method)(
|
||||
name=resource_name, retry=_DEFAULT_RETRY
|
||||
)
|
||||
|
||||
@base.optional_sync()
|
||||
def _run(
|
||||
self,
|
||||
service_account: Optional[str] = None,
|
||||
network: Optional[str] = None,
|
||||
timeout: Optional[int] = None, # seconds
|
||||
restart_job_on_worker_restart: bool = False,
|
||||
enable_web_access: bool = False,
|
||||
tensorboard: Optional[str] = None,
|
||||
sync: bool = True,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
disable_retries: bool = False,
|
||||
max_wait_duration: Optional[int] = None,
|
||||
) -> None:
|
||||
"""Helper method to ensure network synchronization and to run the configured CustomJob.
|
||||
|
||||
Args:
|
||||
service_account (str):
|
||||
Optional. Specifies the service account for workload run-as account.
|
||||
Users submitting jobs must have act-as permission on this run-as account.
|
||||
network (str):
|
||||
Optional. The full name of the Compute Engine network to which the job
|
||||
should be peered. For example, projects/12345/global/networks/myVPC.
|
||||
Private services access must already be configured for the network.
|
||||
timeout (int):
|
||||
Optional. The maximum job running time in seconds. The default is 7 days.
|
||||
restart_job_on_worker_restart (bool):
|
||||
Restarts the entire CustomJob if a worker
|
||||
gets restarted. This feature can be used by
|
||||
distributed training jobs that are not resilient
|
||||
to workers leaving and joining a job.
|
||||
enable_web_access (bool):
|
||||
Whether you want Vertex AI to enable interactive shell access
|
||||
to training containers.
|
||||
https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell
|
||||
tensorboard (str):
|
||||
Optional. The name of a Vertex AI
|
||||
[Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard]
|
||||
resource to which this CustomJob will upload Tensorboard
|
||||
logs. Format:
|
||||
``projects/{project}/locations/{location}/tensorboards/{tensorboard}``
|
||||
|
||||
The training script should write Tensorboard to following Vertex AI environment
|
||||
variable:
|
||||
|
||||
AIP_TENSORBOARD_LOG_DIR
|
||||
|
||||
`service_account` is required with provided `tensorboard`.
|
||||
For more information on configuring your service account please visit:
|
||||
https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training
|
||||
sync (bool):
|
||||
Whether to execute this method synchronously. If False, this method
|
||||
will unblock and it will be executed in a concurrent Future.
|
||||
create_request_timeout (float):
|
||||
Optional. The timeout for the create request in seconds.
|
||||
disable_retries (bool):
|
||||
Indicates if the job should retry for internal errors after the
|
||||
job starts running. If True, overrides
|
||||
`restart_job_on_worker_restart` to False.
|
||||
max_wait_duration (int):
|
||||
This is the maximum duration that a job will wait for the
|
||||
requested resources to be provisioned in seconds. If set to 0,
|
||||
the job will wait indefinitely. The default is 30 minutes.
|
||||
"""
|
||||
if service_account:
|
||||
self._gca_resource.trial_job_spec.service_account = service_account
|
||||
|
||||
if network:
|
||||
self._gca_resource.trial_job_spec.network = network
|
||||
|
||||
if (
|
||||
timeout
|
||||
or restart_job_on_worker_restart
|
||||
or disable_retries
|
||||
or max_wait_duration
|
||||
):
|
||||
timeout = duration_pb2.Duration(seconds=timeout) if timeout else None
|
||||
max_wait_duration = (
|
||||
duration_pb2.Duration(seconds=max_wait_duration)
|
||||
if max_wait_duration
|
||||
else None
|
||||
)
|
||||
self._gca_resource.trial_job_spec.scheduling = (
|
||||
gca_custom_job_compat.Scheduling(
|
||||
timeout=timeout,
|
||||
restart_job_on_worker_restart=restart_job_on_worker_restart,
|
||||
disable_retries=disable_retries,
|
||||
max_wait_duration=max_wait_duration,
|
||||
)
|
||||
)
|
||||
|
||||
if enable_web_access:
|
||||
self._gca_resource.trial_job_spec.enable_web_access = enable_web_access
|
||||
|
||||
if tensorboard:
|
||||
self._gca_resource.trial_job_spec.tensorboard = tensorboard
|
||||
|
||||
_LOGGER.log_create_with_lro(self.__class__)
|
||||
|
||||
self._gca_resource = self.api_client.select_version(
|
||||
"v1beta1"
|
||||
).create_hyperparameter_tuning_job(
|
||||
parent=self._parent,
|
||||
hyperparameter_tuning_job=self._gca_resource,
|
||||
timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
_LOGGER.log_create_complete_with_getter(
|
||||
self.__class__, self._gca_resource, "hpt_job"
|
||||
)
|
||||
|
||||
_LOGGER.info("View HyperparameterTuningJob:\n%s" % self._dashboard_uri())
|
||||
|
||||
if tensorboard:
|
||||
_LOGGER.info(
|
||||
"View Tensorboard:\n%s"
|
||||
% console_utils.custom_job_tensorboard_console_uri(
|
||||
tensorboard, self.resource_name
|
||||
)
|
||||
)
|
||||
|
||||
self._block_until_complete()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,430 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from google.api_core import operation
|
||||
from google.api_core import retry
|
||||
from google.auth import credentials as auth_credentials
|
||||
from google.cloud.aiplatform import base
|
||||
from google.cloud.aiplatform import initializer
|
||||
from google.cloud.aiplatform import utils
|
||||
from google.cloud.aiplatform.compat.services import (
|
||||
persistent_resource_service_client_v1beta1 as persistent_resource_service_client_compat,
|
||||
)
|
||||
from google.cloud.aiplatform_v1beta1.types import (
|
||||
encryption_spec as gca_encryption_spec_compat,
|
||||
)
|
||||
from google.cloud.aiplatform_v1beta1.types import (
|
||||
persistent_resource as gca_persistent_resource_compat,
|
||||
)
|
||||
|
||||
from google.protobuf import timestamp_pb2 # type: ignore
|
||||
from google.rpc import status_pb2 # type: ignore
|
||||
|
||||
|
||||
_LOGGER = base.Logger(__name__)
|
||||
_DEFAULT_RETRY = retry.Retry()
|
||||
|
||||
|
||||
class PersistentResource(base.VertexAiResourceNounWithFutureManager):
|
||||
"""Managed PersistentResource feature for Vertex AI (Preview)."""
|
||||
|
||||
client_class = utils.PersistentResourceClientWithOverride
|
||||
_resource_noun = "persistentResource"
|
||||
_getter_method = "get_persistent_resource"
|
||||
_list_method = "list_persistent_resources"
|
||||
_delete_method = "delete_persistent_resource"
|
||||
_parse_resource_name_method = "parse_persistent_resource_path"
|
||||
_format_resource_name_method = "persistent_resource_path"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
persistent_resource_id: str,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
):
|
||||
"""Retrieves the PersistentResource and instantiates its representation.
|
||||
|
||||
Args:
|
||||
persistent_resource_id (str):
|
||||
Required.
|
||||
project (str):
|
||||
Project this PersistentResource is in. Overrides
|
||||
project set in aiplatform.init.
|
||||
location (str):
|
||||
Location this PersistentResource is in. Overrides
|
||||
location set in aiplatform.init.
|
||||
credentials (auth_credentials.Credentials):
|
||||
Custom credentials to use to manage this PersistentResource.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
"""
|
||||
super().__init__(
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
resource_name=persistent_resource_id,
|
||||
)
|
||||
|
||||
self._gca_resource = self._get_gca_resource(
|
||||
resource_name=persistent_resource_id
|
||||
)
|
||||
|
||||
@property
|
||||
def display_name(self) -> Optional[str]:
|
||||
"""The display name of the PersistentResource."""
|
||||
self._assert_gca_resource_is_available()
|
||||
return getattr(self._gca_resource, "display_name", None)
|
||||
|
||||
@property
|
||||
def state(self) -> gca_persistent_resource_compat.PersistentResource.State:
|
||||
"""The state of the PersistentResource.
|
||||
|
||||
Values:
|
||||
STATE_UNSPECIFIED (0):
|
||||
Not set.
|
||||
PROVISIONING (1):
|
||||
The PROVISIONING state indicates the
|
||||
persistent resources is being created.
|
||||
RUNNING (3):
|
||||
The RUNNING state indicates the persistent
|
||||
resources is healthy and fully usable.
|
||||
STOPPING (4):
|
||||
The STOPPING state indicates the persistent
|
||||
resources is being deleted.
|
||||
ERROR (5):
|
||||
The ERROR state indicates the persistent resources may be
|
||||
unusable. Details can be found in the ``error`` field.
|
||||
"""
|
||||
self._assert_gca_resource_is_available()
|
||||
return getattr(self._gca_resource, "state", None)
|
||||
|
||||
@property
|
||||
def error(self) -> Optional[status_pb2.Status]:
|
||||
"""The error status of the PersistentResource.
|
||||
|
||||
Only populated when the resource's state is ``STOPPING`` or ``ERROR``.
|
||||
|
||||
"""
|
||||
self._assert_gca_resource_is_available()
|
||||
return getattr(self._gca_resource, "error", None)
|
||||
|
||||
@property
|
||||
def create_time(self) -> Optional[timestamp_pb2.Timestamp]:
|
||||
"""Time when the PersistentResource was created."""
|
||||
self._assert_gca_resource_is_available()
|
||||
return getattr(self._gca_resource, "create_time", None)
|
||||
|
||||
@property
|
||||
def start_time(self) -> Optional[timestamp_pb2.Timestamp]:
|
||||
"""Time when the PersistentResource first entered the ``RUNNING`` state."""
|
||||
self._assert_gca_resource_is_available()
|
||||
return getattr(self._gca_resource, "start_time", None)
|
||||
|
||||
@property
|
||||
def update_time(self) -> Optional[timestamp_pb2.Timestamp]:
|
||||
"""Time when the PersistentResource was most recently updated."""
|
||||
self._assert_gca_resource_is_available()
|
||||
return getattr(self._gca_resource, "update_time", None)
|
||||
|
||||
@property
|
||||
def network(self) -> Optional[str]:
|
||||
"""The network peered with the PersistentResource.
|
||||
|
||||
The full name of the Compute Engine
|
||||
`network </compute/docs/networks-and-firewalls#networks>`__ to peered
|
||||
with Vertex AI to host the persistent resources.
|
||||
|
||||
For example, ``projects/12345/global/networks/myVPC``.
|
||||
`Format </compute/docs/reference/rest/v1/networks/insert>`__ is of the
|
||||
form ``projects/{project}/global/networks/{network}``. Where {project}
|
||||
is a project number, as in ``12345``, and {network} is a network name.
|
||||
|
||||
To specify this field, you must have already `configured VPC Network
|
||||
Peering for Vertex
|
||||
AI <https://cloud.google.com/vertex-ai/docs/general/vpc-peering>`__.
|
||||
|
||||
If this field is left unspecified, the resources aren't peered with any
|
||||
network.
|
||||
"""
|
||||
self._assert_gca_resource_is_available()
|
||||
return getattr(self._gca_resource, "network", None)
|
||||
|
||||
@classmethod
|
||||
@base.optional_sync()
|
||||
def create(
|
||||
cls,
|
||||
persistent_resource_id: str,
|
||||
resource_pools: Union[
|
||||
List[Dict], List[gca_persistent_resource_compat.ResourcePool]
|
||||
],
|
||||
display_name: Optional[str] = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
network: Optional[str] = None,
|
||||
kms_key_name: Optional[str] = None,
|
||||
service_account: Optional[str] = None,
|
||||
reserved_ip_ranges: List[str] = None,
|
||||
sync: Optional[bool] = True, # pylint: disable=unused-argument
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> "PersistentResource":
|
||||
r"""Creates a PersistentResource.
|
||||
|
||||
Args:
|
||||
persistent_resource_id (str):
|
||||
Required. The ID to use for the PersistentResource,
|
||||
which become the final component of the
|
||||
PersistentResource's resource name.
|
||||
|
||||
The maximum length is 63 characters, and valid
|
||||
characters are ``/^[a-z]([a-z0-9-]{0,61}[a-z0-9])?$/``.
|
||||
|
||||
This corresponds to the ``persistent_resource_id`` field
|
||||
on the ``request`` instance; if ``request`` is provided, this
|
||||
should not be set.
|
||||
resource_pools (MutableSequence[google.cloud.aiplatform_v1.types.ResourcePool]):
|
||||
Required. The list of resource pools to create for the
|
||||
PersistentResource.
|
||||
display_name (str):
|
||||
Optional. The display name of the
|
||||
PersistentResource. The name can be up to 128
|
||||
characters long and can consist of any UTF-8
|
||||
characters.
|
||||
labels (MutableMapping[str, str]):
|
||||
Optional. The labels with user-defined
|
||||
metadata to organize PersistentResource.
|
||||
|
||||
Label keys and values can be no longer than 64
|
||||
characters (Unicode codepoints), can only
|
||||
contain lowercase letters, numeric characters,
|
||||
underscores and dashes. International characters
|
||||
are allowed.
|
||||
|
||||
See https://goo.gl/xmQnxf for more information
|
||||
and examples of labels.
|
||||
network (str):
|
||||
Optional. The full name of the Compute Engine
|
||||
`network </compute/docs/networks-and-firewalls#networks>`__
|
||||
to peered with Vertex AI to host the persistent resources.
|
||||
For example, ``projects/12345/global/networks/myVPC``.
|
||||
`Format </compute/docs/reference/rest/v1/networks/insert>`__
|
||||
is of the form
|
||||
``projects/{project}/global/networks/{network}``. Where
|
||||
{project} is a project number, as in ``12345``, and
|
||||
{network} is a network name.
|
||||
|
||||
To specify this field, you must have already `configured VPC
|
||||
Network Peering for Vertex
|
||||
AI <https://cloud.google.com/vertex-ai/docs/general/vpc-peering>`__.
|
||||
|
||||
If this field is left unspecified, the resources aren't
|
||||
peered with any network.
|
||||
kms_key_name (str):
|
||||
Optional. Customer-managed encryption key for the
|
||||
PersistentResource. If set, this PersistentResource and all
|
||||
sub-resources of this PersistentResource will be secured by
|
||||
this key.
|
||||
service_account (str):
|
||||
Optional. Default service account that this
|
||||
PersistentResource's workloads run as. The workloads
|
||||
including
|
||||
|
||||
- Any runtime specified via ``ResourceRuntimeSpec`` on
|
||||
creation time, for example, Ray.
|
||||
- Jobs submitted to PersistentResource, if no other service
|
||||
account specified in the job specs.
|
||||
|
||||
Only works when custom service account is enabled and users
|
||||
have the ``iam.serviceAccounts.actAs`` permission on this
|
||||
service account.
|
||||
reserved_ip_ranges (MutableSequence[str]):
|
||||
Optional. A list of names for the reserved IP ranges under
|
||||
the VPC network that can be used for this persistent
|
||||
resource.
|
||||
|
||||
If set, we will deploy the persistent resource within the
|
||||
provided IP ranges. Otherwise, the persistent resource is
|
||||
deployed to any IP ranges under the provided VPC network.
|
||||
|
||||
Example ['vertex-ai-ip-range'].
|
||||
sync (bool):
|
||||
Whether to execute this method synchonously. If False, this
|
||||
method will be executed in concurrent Future and any downstream
|
||||
object will be immediately returned and synced when the Future
|
||||
has completed.
|
||||
project (str):
|
||||
Project to create this PersistentResource in. Overrides project
|
||||
set in aiplatform.init.
|
||||
location (str):
|
||||
Location to create this PersistentResource in. Overrides
|
||||
location set in aiplatform.init.
|
||||
credentials (auth_credentials.Credentials):
|
||||
Custom credentials to use to create this PersistentResource.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
persistent_resource (PersistentResource):
|
||||
The object representation of the newly created
|
||||
PersistentResource.
|
||||
"""
|
||||
|
||||
if labels:
|
||||
utils.validate_labels(labels)
|
||||
|
||||
gca_persistent_resource = gca_persistent_resource_compat.PersistentResource(
|
||||
name=persistent_resource_id,
|
||||
display_name=display_name,
|
||||
resource_pools=resource_pools,
|
||||
labels=labels,
|
||||
network=network,
|
||||
reserved_ip_ranges=reserved_ip_ranges,
|
||||
)
|
||||
|
||||
if kms_key_name:
|
||||
gca_persistent_resource.encryption_spec = (
|
||||
gca_encryption_spec_compat.EncryptionSpec(kms_key_name=kms_key_name)
|
||||
)
|
||||
|
||||
if service_account:
|
||||
service_account_spec = gca_persistent_resource_compat.ServiceAccountSpec(
|
||||
enable_custom_service_account=True, service_account=service_account
|
||||
)
|
||||
gca_persistent_resource.resource_runtime_spec = (
|
||||
gca_persistent_resource_compat.ResourceRuntimeSpec(
|
||||
service_account_spec=service_account_spec
|
||||
)
|
||||
)
|
||||
|
||||
api_client = cls._instantiate_client(location, credentials).select_version(
|
||||
"v1beta1"
|
||||
)
|
||||
create_lro = cls._create(
|
||||
api_client=api_client,
|
||||
parent=initializer.global_config.common_location_path(
|
||||
project=project, location=location
|
||||
),
|
||||
persistent_resource=gca_persistent_resource,
|
||||
persistent_resource_id=persistent_resource_id,
|
||||
)
|
||||
|
||||
_LOGGER.log_create_with_lro(cls, create_lro)
|
||||
|
||||
create_lro.result(timeout=None)
|
||||
persistent_resource_result = cls(
|
||||
persistent_resource_id=persistent_resource_id,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
_LOGGER.log_create_complete(
|
||||
cls, persistent_resource_result._gca_resource, "persistent resource"
|
||||
)
|
||||
|
||||
return persistent_resource_result
|
||||
|
||||
@classmethod
|
||||
def _create(
|
||||
cls,
|
||||
api_client: (
|
||||
persistent_resource_service_client_compat.PersistentResourceServiceClient
|
||||
),
|
||||
parent: str,
|
||||
persistent_resource: gca_persistent_resource_compat.PersistentResource,
|
||||
persistent_resource_id: str,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
) -> operation.Operation:
|
||||
"""Creates a PersistentResource directly calling the API client.
|
||||
|
||||
Args:
|
||||
api_client (PersistentResourceServiceClient):
|
||||
An instance of PersistentResourceServiceClient with the correct
|
||||
api_endpoint already set based on user's preferences.
|
||||
parent (str):
|
||||
Required. Also known as common location path, that usually contains the
|
||||
project and location that the user provided to the upstream method.
|
||||
IE "projects/my-project/locations/us-central1"
|
||||
persistent_resource (gca_persistent_resource_compat.PersistentResource):
|
||||
Required. The PersistentResource object to use for the create request.
|
||||
persistent_resource_id (str):
|
||||
Required. The ID to use for the PersistentResource,
|
||||
which become the final component of the
|
||||
PersistentResource's resource name.
|
||||
|
||||
The maximum length is 63 characters, and valid
|
||||
characters are ``/^[a-z]([a-z0-9-]{0,61}[a-z0-9])?$/``.
|
||||
|
||||
This corresponds to the ``persistent_resource_id`` field
|
||||
on the ``request`` instance; if ``request`` is provided, this
|
||||
should not be set.
|
||||
create_request_timeout (float):
|
||||
Optional. The timeout for the create request in seconds.
|
||||
|
||||
Returns:
|
||||
operation (Operation):
|
||||
The long-running operation returned by the Persistent Resource
|
||||
create call.
|
||||
"""
|
||||
return api_client.create_persistent_resource(
|
||||
parent=parent,
|
||||
persistent_resource_id=persistent_resource_id,
|
||||
persistent_resource=persistent_resource,
|
||||
timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def list(
|
||||
cls,
|
||||
filter: Optional[str] = None,
|
||||
order_by: Optional[str] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> List["PersistentResource"]:
|
||||
"""Lists a Persistent Resources on the provided project and region.
|
||||
|
||||
Args:
|
||||
filter (str):
|
||||
Optional. An expression for filtering the results of the request.
|
||||
For field names both snake_case and camelCase are supported.
|
||||
order_by (str):
|
||||
Optional. A comma-separated list of fields to order by, sorted in
|
||||
ascending order. Use "desc" after a field name for descending.
|
||||
Supported fields: `display_name`, `create_time`, `update_time`
|
||||
project (str):
|
||||
Optional. Project to retrieve list from. If not set, project
|
||||
set in aiplatform.init will be used.
|
||||
location (str):
|
||||
Optional. Location to retrieve list from. If not set, location
|
||||
set in aiplatform.init will be used.
|
||||
credentials (auth_credentials.Credentials):
|
||||
Optional. Custom credentials to use to retrieve list. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
List[PersistentResource]
|
||||
A list of PersistentResource objects.
|
||||
"""
|
||||
return cls._list_with_local_order(
|
||||
filter=filter,
|
||||
order_by=order_by,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
Binary file not shown.
@@ -0,0 +1,615 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
import datetime
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from google.auth import credentials as auth_credentials
|
||||
from google.cloud import aiplatform_v1beta1
|
||||
from google.cloud.aiplatform import base
|
||||
from google.cloud.aiplatform import compat
|
||||
from google.cloud.aiplatform import initializer
|
||||
from google.cloud.aiplatform import pipeline_job_schedules
|
||||
from google.cloud.aiplatform import utils
|
||||
from google.cloud.aiplatform.constants import pipeline as pipeline_constants
|
||||
from google.cloud.aiplatform.metadata import constants as metadata_constants
|
||||
from google.cloud.aiplatform.metadata import experiment_resources
|
||||
from google.cloud.aiplatform.pipeline_jobs import (
|
||||
PipelineJob as PipelineJobGa,
|
||||
)
|
||||
from google.cloud.aiplatform_v1.services.pipeline_service import (
|
||||
PipelineServiceClient as PipelineServiceClientGa,
|
||||
)
|
||||
|
||||
from google.protobuf import json_format
|
||||
|
||||
|
||||
_LOGGER = base.Logger(__name__)
|
||||
|
||||
# Pattern for valid names used as a Vertex resource name.
|
||||
_VALID_NAME_PATTERN = pipeline_constants._VALID_NAME_PATTERN
|
||||
|
||||
# Pattern for an Artifact Registry URL.
|
||||
_VALID_AR_URL = pipeline_constants._VALID_AR_URL
|
||||
|
||||
# Pattern for any JSON or YAML file over HTTPS.
|
||||
_VALID_HTTPS_URL = pipeline_constants._VALID_HTTPS_URL
|
||||
|
||||
|
||||
def _get_current_time() -> datetime.datetime:
|
||||
"""Gets the current timestamp."""
|
||||
return datetime.datetime.now()
|
||||
|
||||
|
||||
def _set_enable_caching_value(
|
||||
pipeline_spec: Dict[str, Any], enable_caching: bool
|
||||
) -> None:
|
||||
"""Sets pipeline tasks caching options.
|
||||
|
||||
Args:
|
||||
pipeline_spec (Dict[str, Any]):
|
||||
Required. The dictionary of pipeline spec.
|
||||
enable_caching (bool):
|
||||
Required. Whether to enable caching.
|
||||
"""
|
||||
for component in [pipeline_spec["root"]] + list(
|
||||
pipeline_spec["components"].values()
|
||||
):
|
||||
if "dag" in component:
|
||||
for task in component["dag"]["tasks"].values():
|
||||
task["cachingOptions"] = {"enableCache": enable_caching}
|
||||
|
||||
|
||||
class _PipelineJob(
|
||||
PipelineJobGa,
|
||||
experiment_loggable_schemas=(
|
||||
experiment_resources._ExperimentLoggableSchema(
|
||||
title=metadata_constants.SYSTEM_PIPELINE_RUN
|
||||
),
|
||||
),
|
||||
):
|
||||
"""Preview PipelineJob resource for Vertex AI."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
display_name: str,
|
||||
template_path: str,
|
||||
job_id: Optional[str] = None,
|
||||
pipeline_root: Optional[str] = None,
|
||||
parameter_values: Optional[Dict[str, Any]] = None,
|
||||
input_artifacts: Optional[Dict[str, str]] = None,
|
||||
enable_caching: Optional[bool] = None,
|
||||
encryption_spec_key_name: Optional[str] = None,
|
||||
labels: Optional[Dict[str, str]] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
failure_policy: Optional[str] = None,
|
||||
enable_preflight_validations: Optional[bool] = False,
|
||||
default_runtime: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
"""Retrieves a PipelineJob resource and instantiates its
|
||||
representation.
|
||||
|
||||
Args:
|
||||
display_name (str):
|
||||
Required. The user-defined name of this Pipeline.
|
||||
template_path (str):
|
||||
Required. The path of PipelineJob or PipelineSpec JSON or YAML file. It
|
||||
can be a local path, a Google Cloud Storage URI (e.g. "gs://project.name"),
|
||||
an Artifact Registry URI (e.g.
|
||||
"https://us-central1-kfp.pkg.dev/proj/repo/pack/latest"), or an HTTPS URI.
|
||||
job_id (str):
|
||||
Optional. The unique ID of the job run.
|
||||
If not specified, pipeline name + timestamp will be used.
|
||||
pipeline_root (str):
|
||||
Optional. The root of the pipeline outputs. If not set, the staging bucket
|
||||
set in aiplatform.init will be used. If that's not set a pipeline-specific
|
||||
artifacts bucket will be used.
|
||||
parameter_values (Dict[str, Any]):
|
||||
Optional. The mapping from runtime parameter names to its values that
|
||||
control the pipeline run.
|
||||
input_artifacts (Dict[str, str]):
|
||||
Optional. The mapping from the runtime parameter name for this artifact to its resource id.
|
||||
For example: "vertex_model":"456". Note: full resource name ("projects/123/locations/us-central1/metadataStores/default/artifacts/456") cannot be used.
|
||||
enable_caching (bool):
|
||||
Optional. Whether to turn on caching for the run.
|
||||
|
||||
If this is not set, defaults to the compile time settings, which
|
||||
are True for all tasks by default, while users may specify
|
||||
different caching options for individual tasks.
|
||||
|
||||
If this is set, the setting applies to all tasks in the pipeline.
|
||||
|
||||
Overrides the compile time settings.
|
||||
encryption_spec_key_name (str):
|
||||
Optional. The Cloud KMS resource identifier of the customer
|
||||
managed encryption key used to protect the job. Has the
|
||||
form:
|
||||
``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``.
|
||||
The key needs to be in the same region as where the compute
|
||||
resource is created.
|
||||
|
||||
If this is set, then all
|
||||
resources created by the PipelineJob will
|
||||
be encrypted with the provided encryption key.
|
||||
|
||||
Overrides encryption_spec_key_name set in aiplatform.init.
|
||||
labels (Dict[str, str]):
|
||||
Optional. The user defined metadata to organize PipelineJob.
|
||||
credentials (auth_credentials.Credentials):
|
||||
Optional. Custom credentials to use to create this PipelineJob.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
project (str):
|
||||
Optional. The project that you want to run this PipelineJob in. If not set,
|
||||
the project set in aiplatform.init will be used.
|
||||
location (str):
|
||||
Optional. Location to create PipelineJob. If not set,
|
||||
location set in aiplatform.init will be used.
|
||||
failure_policy (str):
|
||||
Optional. The failure policy - "slow" or "fast".
|
||||
Currently, the default of a pipeline is that the pipeline will continue to
|
||||
run until no more tasks can be executed, also known as
|
||||
PIPELINE_FAILURE_POLICY_FAIL_SLOW (corresponds to "slow").
|
||||
However, if a pipeline is set to
|
||||
PIPELINE_FAILURE_POLICY_FAIL_FAST (corresponds to "fast"),
|
||||
it will stop scheduling any new tasks when a task has failed. Any
|
||||
scheduled tasks will continue to completion.
|
||||
enable_preflight_validations (bool):
|
||||
Optional. Whether to enable preflight validations or not.
|
||||
default_runtime (Dict[str, Any]):
|
||||
Optional. Specifies the runtime for the entire pipeline.
|
||||
All tasks will use the configured runtime unless overridden at the task level.
|
||||
If not provided, Vertex Training Custom Job (on-demand) will be used as the default runtime.
|
||||
|
||||
Supported Runtimes:
|
||||
- Custom Job(On-Demand) Runtime: Default if default_runtime is not provided or None.
|
||||
- Persistent Resource Runtime: To use a persistent resource as the runtime, see reference configuration below:
|
||||
default_runtime = {
|
||||
"persistentResourceRuntimeDetail": {
|
||||
"persistentResourceName": "projects/my-project/locations/my-location/persistentResources/my-persistent",
|
||||
"taskResourceUnavailableWaitTimeMs": 1000, # Time (ms) to wait if resource is unavailable
|
||||
"taskResourceUnavailableTimeoutBehavior": "FAIL", # Behavior if resource is unavailable after wait
|
||||
}
|
||||
}
|
||||
For more information, please see https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.pipelineJobs#PipelineJob.DefaultRuntime.
|
||||
Raises:
|
||||
ValueError: If job_id or labels have incorrect format.
|
||||
"""
|
||||
super().__init__(
|
||||
display_name=display_name,
|
||||
template_path=template_path,
|
||||
job_id=job_id,
|
||||
pipeline_root=pipeline_root,
|
||||
parameter_values=parameter_values,
|
||||
input_artifacts=input_artifacts,
|
||||
enable_caching=enable_caching,
|
||||
encryption_spec_key_name=encryption_spec_key_name,
|
||||
labels=labels,
|
||||
credentials=credentials,
|
||||
project=project,
|
||||
location=location,
|
||||
failure_policy=failure_policy,
|
||||
)
|
||||
|
||||
# needs to rebuild the v1beta version of pipeline_job and runtime_config
|
||||
pipeline_json = utils.yaml_utils.load_yaml(
|
||||
template_path, self.project, self.credentials
|
||||
)
|
||||
|
||||
# Pipeline_json can be either PipelineJob or PipelineSpec.
|
||||
if pipeline_json.get("pipelineSpec") is not None:
|
||||
pipeline_job = pipeline_json
|
||||
pipeline_root = (
|
||||
pipeline_root
|
||||
or pipeline_job["pipelineSpec"].get("defaultPipelineRoot")
|
||||
or pipeline_job["runtimeConfig"].get("gcsOutputDirectory")
|
||||
or initializer.global_config.staging_bucket
|
||||
)
|
||||
else:
|
||||
pipeline_job = {
|
||||
"pipelineSpec": pipeline_json,
|
||||
"runtimeConfig": {},
|
||||
}
|
||||
pipeline_root = (
|
||||
pipeline_root
|
||||
or pipeline_job["pipelineSpec"].get("defaultPipelineRoot")
|
||||
or initializer.global_config.staging_bucket
|
||||
)
|
||||
pipeline_root = (
|
||||
pipeline_root
|
||||
or utils.gcs_utils.generate_gcs_directory_for_pipeline_artifacts(
|
||||
project=project,
|
||||
location=location,
|
||||
)
|
||||
)
|
||||
builder = utils.pipeline_utils.PipelineRuntimeConfigBuilder.from_job_spec_json(
|
||||
pipeline_job
|
||||
)
|
||||
builder.update_pipeline_root(pipeline_root)
|
||||
builder.update_runtime_parameters(parameter_values)
|
||||
builder.update_input_artifacts(input_artifacts)
|
||||
|
||||
builder.update_failure_policy(failure_policy)
|
||||
builder.update_default_runtime(default_runtime)
|
||||
runtime_config_dict = builder.build()
|
||||
runtime_config = aiplatform_v1beta1.PipelineJob.RuntimeConfig()._pb
|
||||
json_format.ParseDict(runtime_config_dict, runtime_config)
|
||||
|
||||
pipeline_name = pipeline_job["pipelineSpec"]["pipelineInfo"]["name"]
|
||||
self.job_id = job_id or "{pipeline_name}-{timestamp}".format(
|
||||
pipeline_name=re.sub("[^-0-9a-z]+", "-", pipeline_name.lower())
|
||||
.lstrip("-")
|
||||
.rstrip("-"),
|
||||
timestamp=_get_current_time().strftime("%Y%m%d%H%M%S"),
|
||||
)
|
||||
if not _VALID_NAME_PATTERN.match(self.job_id):
|
||||
raise ValueError(
|
||||
f"Generated job ID: {self.job_id} is illegal as a Vertex pipelines job ID. "
|
||||
"Expecting an ID following the regex pattern "
|
||||
f'"{_VALID_NAME_PATTERN.pattern[1:-1]}"'
|
||||
)
|
||||
|
||||
if enable_caching is not None:
|
||||
_set_enable_caching_value(pipeline_job["pipelineSpec"], enable_caching)
|
||||
|
||||
pipeline_job_args = {
|
||||
"display_name": display_name,
|
||||
"pipeline_spec": pipeline_job["pipelineSpec"],
|
||||
"labels": labels,
|
||||
"runtime_config": runtime_config,
|
||||
"encryption_spec": initializer.global_config.get_encryption_spec(
|
||||
encryption_spec_key_name=encryption_spec_key_name
|
||||
),
|
||||
"preflight_validations": enable_preflight_validations,
|
||||
}
|
||||
|
||||
if _VALID_AR_URL.match(template_path) or _VALID_HTTPS_URL.match(template_path):
|
||||
pipeline_job_args["template_uri"] = template_path
|
||||
|
||||
self._v1_beta1_pipeline_job = aiplatform_v1beta1.PipelineJob(
|
||||
**pipeline_job_args
|
||||
)
|
||||
|
||||
def create_schedule(
|
||||
self,
|
||||
cron_expression: str,
|
||||
display_name: str,
|
||||
start_time: Optional[str] = None,
|
||||
end_time: Optional[str] = None,
|
||||
allow_queueing: bool = False,
|
||||
max_run_count: Optional[int] = None,
|
||||
max_concurrent_run_count: int = 1,
|
||||
service_account: Optional[str] = None,
|
||||
network: Optional[str] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
) -> "pipeline_job_schedules.PipelineJobSchedule": # noqa: F821
|
||||
"""Creates a PipelineJobSchedule directly from a PipelineJob.
|
||||
|
||||
Example Usage:
|
||||
|
||||
pipeline_job = aiplatform.PipelineJob(
|
||||
display_name='job_display_name',
|
||||
template_path='your_pipeline.yaml',
|
||||
)
|
||||
pipeline_job.run()
|
||||
pipeline_job_schedule = pipeline_job.create_schedule(
|
||||
cron_expression='* * * * *',
|
||||
display_name='schedule_display_name',
|
||||
)
|
||||
|
||||
Args:
|
||||
cron_expression (str):
|
||||
Required. Time specification (cron schedule expression) to launch scheduled runs.
|
||||
To explicitly set a timezone to the cron tab, apply a prefix: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}".
|
||||
The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone database.
|
||||
For example, "CRON_TZ=America/New_York 1 * * * *", or "TZ=America/New_York 1 * * * *".
|
||||
display_name (str):
|
||||
Required. The user-defined name of this PipelineJobSchedule.
|
||||
start_time (str):
|
||||
Optional. Timestamp after which the first run can be scheduled.
|
||||
If unspecified, it defaults to the schedule creation timestamp.
|
||||
end_time (str):
|
||||
Optional. Timestamp after which no more runs will be scheduled.
|
||||
If unspecified, then runs will be scheduled indefinitely.
|
||||
allow_queueing (bool):
|
||||
Optional. Whether new scheduled runs can be queued when max_concurrent_runs limit is reached.
|
||||
max_run_count (int):
|
||||
Optional. Maximum run count of the schedule.
|
||||
If specified, The schedule will be completed when either started_run_count >= max_run_count or when end_time is reached.
|
||||
Must be positive and <= 2^63-1.
|
||||
max_concurrent_run_count (int):
|
||||
Optional. Maximum number of runs that can be started concurrently for this PipelineJobSchedule.
|
||||
service_account (str):
|
||||
Optional. Specifies the service account for workload run-as account.
|
||||
Users submitting jobs must have act-as permission on this run-as account.
|
||||
network (str):
|
||||
Optional. The full name of the Compute Engine network to which the job
|
||||
should be peered. For example, projects/12345/global/networks/myVPC.
|
||||
Private services access must already be configured for the network.
|
||||
If left unspecified, the network set in aiplatform.init will be used.
|
||||
Otherwise, the job is not peered with any network.
|
||||
create_request_timeout (float):
|
||||
Optional. The timeout for the create request in seconds.
|
||||
|
||||
Returns:
|
||||
A Vertex AI PipelineJobSchedule.
|
||||
"""
|
||||
return super().create_schedule(
|
||||
cron=cron_expression,
|
||||
display_name=display_name,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
allow_queueing=allow_queueing,
|
||||
max_run_count=max_run_count,
|
||||
max_concurrent_run_count=max_concurrent_run_count,
|
||||
service_account=service_account,
|
||||
network=network,
|
||||
create_request_timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def batch_delete(
|
||||
cls,
|
||||
names: List[str],
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
) -> aiplatform_v1beta1.BatchDeletePipelineJobsResponse:
|
||||
"""
|
||||
Example Usage:
|
||||
aiplatform.init(
|
||||
project='your_project_name',
|
||||
location='your_location',
|
||||
)
|
||||
aiplatform.PipelineJob.batch_delete(
|
||||
names=['pipeline_job_name', 'pipeline_job_name2']
|
||||
)
|
||||
|
||||
Args:
|
||||
names (List[str]):
|
||||
Required. The fully-qualified resource name or ID of the
|
||||
Pipeline Jobs to batch delete. Example:
|
||||
"projects/123/locations/us-central1/pipelineJobs/456"
|
||||
or "456" when project and location are initialized or passed.
|
||||
project (str):
|
||||
Optional. Project containing the Pipeline Jobs to
|
||||
batch delete. If not set, the project given to `aiplatform.init`
|
||||
will be used.
|
||||
location (str):
|
||||
Optional. Location containing the Pipeline Jobs to
|
||||
batch delete. If not set, the location given to `aiplatform.init`
|
||||
will be used.
|
||||
|
||||
Returns:
|
||||
BatchDeletePipelineJobsResponse contains PipelineJobs deleted.
|
||||
"""
|
||||
user_project = project or initializer.global_config.project
|
||||
user_location = location or initializer.global_config.location
|
||||
parent = initializer.global_config.common_location_path(
|
||||
project=user_project, location=user_location
|
||||
)
|
||||
pipeline_jobs_names = [
|
||||
utils.full_resource_name(
|
||||
resource_name=name,
|
||||
resource_noun="pipelineJobs",
|
||||
parse_resource_name_method=PipelineServiceClientGa.parse_pipeline_job_path,
|
||||
format_resource_name_method=PipelineServiceClientGa.pipeline_job_path,
|
||||
project=user_project,
|
||||
location=user_location,
|
||||
)
|
||||
for name in names
|
||||
]
|
||||
request = aiplatform_v1beta1.BatchDeletePipelineJobsRequest(
|
||||
parent=parent, names=pipeline_jobs_names
|
||||
)
|
||||
client = cls._instantiate_client(
|
||||
location=user_location,
|
||||
appended_user_agent=["preview-pipeline-jobs-batch-delete"],
|
||||
)
|
||||
v1beta1_client = client.select_version(compat.V1BETA1)
|
||||
operation = v1beta1_client.batch_delete_pipeline_jobs(request)
|
||||
return operation.result()
|
||||
|
||||
def submit(
|
||||
self,
|
||||
service_account: Optional[str] = None,
|
||||
network: Optional[str] = None,
|
||||
reserved_ip_ranges: Optional[List[str]] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
job_id: Optional[str] = None,
|
||||
) -> None:
|
||||
"""Run this configured PipelineJob.
|
||||
|
||||
Args:
|
||||
service_account (str):
|
||||
Optional. Specifies the service account for workload run-as account.
|
||||
Users submitting jobs must have act-as permission on this run-as account.
|
||||
network (str):
|
||||
Optional. The full name of the Compute Engine network to which the job
|
||||
should be peered. For example, projects/12345/global/networks/myVPC.
|
||||
|
||||
Private services access must already be configured for the network.
|
||||
If left unspecified, the network set in aiplatform.init will be used.
|
||||
Otherwise, the job is not peered with any network.
|
||||
reserved_ip_ranges (List[str]):
|
||||
Optional. A list of names for the reserved IP ranges under the VPC
|
||||
network that can be used for this PipelineJob's workload. For example: ['vertex-ai-ip-range'].
|
||||
|
||||
If left unspecified, the job will be deployed to any IP ranges under
|
||||
the provided VPC network.
|
||||
create_request_timeout (float):
|
||||
Optional. The timeout for the create request in seconds.
|
||||
job_id (str):
|
||||
Optional. The ID to use for the PipelineJob, which will become the final
|
||||
component of the PipelineJob name. If not provided, an ID will be
|
||||
automatically generated.
|
||||
"""
|
||||
network = network or initializer.global_config.network
|
||||
service_account = service_account or initializer.global_config.service_account
|
||||
gca_resouce = self._v1_beta1_pipeline_job
|
||||
|
||||
if service_account:
|
||||
gca_resouce.service_account = service_account
|
||||
|
||||
if network:
|
||||
gca_resouce.network = network
|
||||
|
||||
if reserved_ip_ranges:
|
||||
gca_resouce.reserved_ip_ranges = reserved_ip_ranges
|
||||
user_project = initializer.global_config.project
|
||||
user_location = initializer.global_config.location
|
||||
parent = initializer.global_config.common_location_path(
|
||||
project=user_project, location=user_location
|
||||
)
|
||||
|
||||
client = self._instantiate_client(
|
||||
location=user_location,
|
||||
appended_user_agent=["preview-pipeline-job-submit"],
|
||||
)
|
||||
v1beta1_client = client.select_version(compat.V1BETA1)
|
||||
|
||||
_LOGGER.log_create_with_lro(self.__class__)
|
||||
|
||||
request = aiplatform_v1beta1.CreatePipelineJobRequest(
|
||||
parent=parent,
|
||||
pipeline_job=self._v1_beta1_pipeline_job,
|
||||
pipeline_job_id=job_id or self.job_id,
|
||||
)
|
||||
|
||||
response = v1beta1_client.create_pipeline_job(request=request)
|
||||
|
||||
self._gca_resource = response
|
||||
|
||||
_LOGGER.log_create_complete_with_getter(
|
||||
self.__class__, self._gca_resource, "pipeline_job"
|
||||
)
|
||||
|
||||
_LOGGER.info("View Pipeline Job:\n%s" % self._dashboard_uri())
|
||||
|
||||
def rerun(
|
||||
self,
|
||||
original_pipelinejob_name: str,
|
||||
pipeline_task_rerun_configs: Optional[
|
||||
List[aiplatform_v1beta1.PipelineTaskRerunConfig]
|
||||
] = None,
|
||||
parameter_values: Optional[Dict[str, Any]] = None,
|
||||
job_id: Optional[str] = None,
|
||||
service_account: Optional[str] = None,
|
||||
network: Optional[str] = None,
|
||||
reserved_ip_ranges: Optional[List[str]] = None,
|
||||
) -> None:
|
||||
"""Rerun a PipelineJob.
|
||||
|
||||
Args:
|
||||
original_pipelinejob_name (str):
|
||||
Required. The name of the original PipelineJob.
|
||||
pipeline_task_rerun_configs (List[aiplatform_v1beta1.PipelineTaskRerunConfig]):
|
||||
Optional. The list of PipelineTaskRerunConfig to specify the tasks to rerun.
|
||||
parameter_values (Dict[str, Any]):
|
||||
Optional. The parameter values to override the original PipelineJob.
|
||||
job_id (str):
|
||||
Optional. The ID to use for the PipelineJob, which will become the final
|
||||
component of the PipelineJob name. If not provided, an ID will be
|
||||
automatically generated.
|
||||
service_account (str):
|
||||
Optional. Specifies the service account for workload run-as account.
|
||||
Users submitting jobs must have act-as permission on this run-as account.
|
||||
network (str):
|
||||
Optional. The full name of the Compute Engine network to which the job
|
||||
should be peered. For example, projects/12345/global/networks/myVPC.
|
||||
|
||||
Private services access must already be configured for the network.
|
||||
If left unspecified, the network set in aiplatform.init will be used.
|
||||
Otherwise, the job is not peered with any network.
|
||||
reserved_ip_ranges (List[str]):
|
||||
Optional. A list of names for the reserved IP ranges under the VPC
|
||||
network that can be used for this PipelineJob's workload. For example: ['vertex-ai-ip-range'].
|
||||
|
||||
If left unspecified, the job will be deployed to any IP ranges under
|
||||
the provided VPC network.
|
||||
"""
|
||||
network = network or initializer.global_config.network
|
||||
service_account = service_account or initializer.global_config.service_account
|
||||
gca_resouce = self._v1_beta1_pipeline_job
|
||||
|
||||
if service_account:
|
||||
gca_resouce.service_account = service_account
|
||||
|
||||
if network:
|
||||
gca_resouce.network = network
|
||||
|
||||
if reserved_ip_ranges:
|
||||
gca_resouce.reserved_ip_ranges = reserved_ip_ranges
|
||||
user_project = initializer.global_config.project
|
||||
user_location = initializer.global_config.location
|
||||
parent = initializer.global_config.common_location_path(
|
||||
project=user_project, location=user_location
|
||||
)
|
||||
|
||||
client = self._instantiate_client(
|
||||
location=user_location,
|
||||
appended_user_agent=["preview-pipeline-job-submit"],
|
||||
)
|
||||
v1beta1_client = client.select_version(compat.V1BETA1)
|
||||
|
||||
_LOGGER.log_create_with_lro(self.__class__)
|
||||
|
||||
pipeline_job = self._v1_beta1_pipeline_job
|
||||
try:
|
||||
get_request = aiplatform_v1beta1.GetPipelineJobRequest(
|
||||
name=original_pipelinejob_name
|
||||
)
|
||||
original_pipeline_job = v1beta1_client.get_pipeline_job(request=get_request)
|
||||
pipeline_job.original_pipeline_job_id = int(
|
||||
original_pipeline_job.labels["vertex-ai-pipelines-run-billing-id"]
|
||||
)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"Failed to get original pipeline job: {original_pipelinejob_name}"
|
||||
) from e
|
||||
|
||||
pipeline_job.pipeline_task_rerun_configs = pipeline_task_rerun_configs
|
||||
|
||||
if parameter_values:
|
||||
runtime_config = self._v1_beta1_pipeline_job.runtime_config
|
||||
runtime_config.parameter_values = parameter_values
|
||||
|
||||
pipeline_name = self._v1_beta1_pipeline_job.display_name
|
||||
|
||||
job_id = job_id or "{pipeline_name}-{timestamp}".format(
|
||||
pipeline_name=re.sub("[^-0-9a-z]+", "-", pipeline_name.lower())
|
||||
.lstrip("-")
|
||||
.rstrip("-"),
|
||||
timestamp=_get_current_time().strftime("%Y%m%d%H%M%S"),
|
||||
)
|
||||
|
||||
request = aiplatform_v1beta1.CreatePipelineJobRequest(
|
||||
parent=parent,
|
||||
pipeline_job=self._v1_beta1_pipeline_job,
|
||||
pipeline_job_id=job_id,
|
||||
)
|
||||
|
||||
response = v1beta1_client.create_pipeline_job(request=request)
|
||||
|
||||
self._gca_resource = response
|
||||
|
||||
_LOGGER.log_create_complete_with_getter(
|
||||
self.__class__, self._gca_resource, "pipeline_job"
|
||||
)
|
||||
|
||||
_LOGGER.info("View Pipeline Job:\n%s" % self._dashboard_uri())
|
||||
Binary file not shown.
@@ -0,0 +1,237 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from google.auth import credentials as auth_credentials
|
||||
from google.cloud.aiplatform import (
|
||||
PipelineJob,
|
||||
)
|
||||
from google.cloud.aiplatform.pipeline_job_schedules import (
|
||||
PipelineJobSchedule as PipelineJobScheduleGa,
|
||||
)
|
||||
from google.cloud.aiplatform.preview.schedule.schedules import (
|
||||
_Schedule as _SchedulePreview,
|
||||
)
|
||||
|
||||
|
||||
class PipelineJobSchedule(
|
||||
PipelineJobScheduleGa,
|
||||
_SchedulePreview,
|
||||
):
|
||||
def __init__(
|
||||
self,
|
||||
pipeline_job: PipelineJob,
|
||||
display_name: str,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
):
|
||||
"""Retrieves a PipelineJobSchedule resource and instantiates its
|
||||
representation.
|
||||
|
||||
Args:
|
||||
pipeline_job (PipelineJob):
|
||||
Required. PipelineJob used to init the schedule.
|
||||
display_name (str):
|
||||
Required. The user-defined name of this PipelineJobSchedule.
|
||||
credentials (auth_credentials.Credentials):
|
||||
Optional. Custom credentials to use to create this PipelineJobSchedule.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
project (str):
|
||||
Optional. The project that you want to run this PipelineJobSchedule in.
|
||||
If not set, the project set in aiplatform.init will be used.
|
||||
location (str):
|
||||
Optional. Location to create PipelineJobSchedule. If not set,
|
||||
location set in aiplatform.init will be used.
|
||||
"""
|
||||
super().__init__(
|
||||
pipeline_job=pipeline_job,
|
||||
display_name=display_name,
|
||||
credentials=credentials,
|
||||
project=project,
|
||||
location=location,
|
||||
)
|
||||
|
||||
def create(
|
||||
self,
|
||||
cron_expression: str,
|
||||
start_time: Optional[str] = None,
|
||||
end_time: Optional[str] = None,
|
||||
allow_queueing: bool = False,
|
||||
max_run_count: Optional[int] = None,
|
||||
max_concurrent_run_count: int = 1,
|
||||
service_account: Optional[str] = None,
|
||||
network: Optional[str] = None,
|
||||
create_request_timeout: Optional[float] = None,
|
||||
) -> None:
|
||||
"""Create a PipelineJobSchedule.
|
||||
|
||||
Args:
|
||||
cron_expression (str):
|
||||
Required. Time specification (cron schedule expression) to launch scheduled runs.
|
||||
To explicitly set a timezone to the cron tab, apply a prefix: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}".
|
||||
The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone database.
|
||||
For example, "CRON_TZ=America/New_York 1 * * * *", or "TZ=America/New_York 1 * * * *".
|
||||
start_time (str):
|
||||
Optional. Timestamp after which the first run can be scheduled.
|
||||
If unspecified, it defaults to the schedule creation timestamp.
|
||||
end_time (str):
|
||||
Optional. Timestamp after which no more runs will be scheduled.
|
||||
If unspecified, then runs will be scheduled indefinitely.
|
||||
allow_queueing (bool):
|
||||
Optional. Whether new scheduled runs can be queued when max_concurrent_runs limit is reached.
|
||||
max_run_count (int):
|
||||
Optional. Maximum run count of the schedule.
|
||||
If specified, The schedule will be completed when either started_run_count >= max_run_count or when end_time is reached.
|
||||
Must be positive and <= 2^63-1.
|
||||
max_concurrent_run_count (int):
|
||||
Optional. Maximum number of runs that can be started concurrently for this PipelineJobSchedule.
|
||||
service_account (str):
|
||||
Optional. Specifies the service account for workload run-as account.
|
||||
Users submitting jobs must have act-as permission on this run-as account.
|
||||
network (str):
|
||||
Optional. The full name of the Compute Engine network to which the job
|
||||
should be peered. For example, projects/12345/global/networks/myVPC.
|
||||
Private services access must already be configured for the network.
|
||||
If left unspecified, the network set in aiplatform.init will be used.
|
||||
Otherwise, the job is not peered with any network.
|
||||
create_request_timeout (float):
|
||||
Optional. The timeout for the create request in seconds.
|
||||
"""
|
||||
super().create(
|
||||
cron=cron_expression,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
allow_queueing=allow_queueing,
|
||||
max_run_count=max_run_count,
|
||||
max_concurrent_run_count=max_concurrent_run_count,
|
||||
service_account=service_account,
|
||||
network=network,
|
||||
create_request_timeout=create_request_timeout,
|
||||
)
|
||||
|
||||
def list_jobs(
|
||||
self,
|
||||
filter: Optional[str] = None,
|
||||
order_by: Optional[str] = None,
|
||||
enable_simple_view: bool = False,
|
||||
project: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
credentials: Optional[auth_credentials.Credentials] = None,
|
||||
) -> List[PipelineJob]:
|
||||
"""List all PipelineJob 's created by this PipelineJobSchedule.
|
||||
|
||||
Example usage:
|
||||
|
||||
pipeline_job_schedule.list_jobs(order_by='create_time_desc')
|
||||
|
||||
Args:
|
||||
filter (str):
|
||||
Optional. An expression for filtering the results of the request.
|
||||
For field names both snake_case and camelCase are supported.
|
||||
order_by (str):
|
||||
Optional. A comma-separated list of fields to order by, sorted in
|
||||
ascending order. Use "desc" after a field name for descending.
|
||||
Supported fields: `display_name`, `create_time`, `update_time`
|
||||
enable_simple_view (bool):
|
||||
Optional. Whether to pass the `read_mask` parameter to the list call.
|
||||
Defaults to False if not provided. This will improve the performance of calling
|
||||
list(). However, the returned PipelineJob list will not include all fields for
|
||||
each PipelineJob. Setting this to True will exclude the following fields in your
|
||||
response: `runtime_config`, `service_account`, `network`, and some subfields of
|
||||
`pipeline_spec` and `job_detail`. The following fields will be included in
|
||||
each PipelineJob resource in your response: `state`, `display_name`,
|
||||
`pipeline_spec.pipeline_info`, `create_time`, `start_time`, `end_time`,
|
||||
`update_time`, `labels`, `template_uri`, `template_metadata.version`,
|
||||
`job_detail.pipeline_run_context`, `job_detail.pipeline_context`.
|
||||
project (str):
|
||||
Optional. Project to retrieve list from. If not set, project
|
||||
set in aiplatform.init will be used.
|
||||
location (str):
|
||||
Optional. Location to retrieve list from. If not set, location
|
||||
set in aiplatform.init will be used.
|
||||
credentials (auth_credentials.Credentials):
|
||||
Optional. Custom credentials to use to retrieve list. Overrides
|
||||
credentials set in aiplatform.init.
|
||||
|
||||
Returns:
|
||||
List[PipelineJob] - A list of PipelineJob resource objects.
|
||||
"""
|
||||
return super().list_jobs(
|
||||
filter=filter,
|
||||
order_by=order_by,
|
||||
enable_simple_view=enable_simple_view,
|
||||
project=project,
|
||||
location=location,
|
||||
credentials=credentials,
|
||||
)
|
||||
|
||||
def update(
|
||||
self,
|
||||
display_name: Optional[str] = None,
|
||||
cron_expression: Optional[str] = None,
|
||||
start_time: Optional[str] = None,
|
||||
end_time: Optional[str] = None,
|
||||
allow_queueing: Optional[bool] = None,
|
||||
max_run_count: Optional[int] = None,
|
||||
max_concurrent_run_count: Optional[int] = None,
|
||||
) -> None:
|
||||
"""Update an existing PipelineJobSchedule.
|
||||
|
||||
Example usage:
|
||||
|
||||
pipeline_job_schedule.update(
|
||||
display_name='updated-display-name',
|
||||
cron_expression='* * * * *',
|
||||
)
|
||||
|
||||
Args:
|
||||
display_name (str):
|
||||
Optional. The user-defined name of this PipelineJobSchedule.
|
||||
cron_expression (str):
|
||||
Optional. Time specification (cron schedule expression) to launch scheduled runs.
|
||||
To explicitly set a timezone to the cron tab, apply a prefix: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}".
|
||||
The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone database.
|
||||
For example, "CRON_TZ=America/New_York 1 * * * *", or "TZ=America/New_York 1 * * * *".
|
||||
start_time (str):
|
||||
Optional. Timestamp after which the first run can be scheduled.
|
||||
If unspecified, it defaults to the schedule creation timestamp.
|
||||
end_time (str):
|
||||
Optional. Timestamp after which no more runs will be scheduled.
|
||||
If unspecified, then runs will be scheduled indefinitely.
|
||||
allow_queueing (bool):
|
||||
Optional. Whether new scheduled runs can be queued when max_concurrent_runs limit is reached.
|
||||
max_run_count (int):
|
||||
Optional. Maximum run count of the schedule.
|
||||
If specified, The schedule will be completed when either started_run_count >= max_run_count or when end_time is reached.
|
||||
Must be positive and <= 2^63-1.
|
||||
max_concurrent_run_count (int):
|
||||
Optional. Maximum number of runs that can be started concurrently for this PipelineJobSchedule.
|
||||
|
||||
Raises:
|
||||
RuntimeError: User tried to call update() before create().
|
||||
"""
|
||||
super().update(
|
||||
display_name=display_name,
|
||||
cron=cron_expression,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
allow_queueing=allow_queueing,
|
||||
max_run_count=max_run_count,
|
||||
max_concurrent_run_count=max_concurrent_run_count,
|
||||
)
|
||||
@@ -0,0 +1,94 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from typing import NamedTuple, Optional, Dict, Union
|
||||
|
||||
from google.cloud.aiplatform import utils
|
||||
from google.cloud.aiplatform.compat.types import (
|
||||
accelerator_type_v1beta1 as gca_accelerator_type_compat,
|
||||
)
|
||||
|
||||
|
||||
class _ResourcePool(NamedTuple):
|
||||
"""Specification container for Worker Pool specs used for distributed training.
|
||||
|
||||
Usage:
|
||||
|
||||
resource_pool = _ResourcePool(
|
||||
replica_count=1,
|
||||
machine_type='n1-standard-4',
|
||||
accelerator_count=1,
|
||||
accelerator_type='NVIDIA_TESLA_K80',
|
||||
boot_disk_type='pd-ssd',
|
||||
boot_disk_size_gb=100,
|
||||
)
|
||||
|
||||
Note that container and python package specs are not stored with this spec.
|
||||
"""
|
||||
|
||||
replica_count: int = 1
|
||||
machine_type: str = "n1-standard-4"
|
||||
accelerator_count: int = 0
|
||||
accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED"
|
||||
boot_disk_type: str = "pd-ssd"
|
||||
boot_disk_size_gb: int = 100
|
||||
|
||||
def _get_accelerator_type(self) -> Optional[str]:
|
||||
"""Validates accelerator_type and returns the name of the accelerator.
|
||||
|
||||
Returns:
|
||||
None if no accelerator or valid accelerator name.
|
||||
|
||||
Raise:
|
||||
ValueError if accelerator type is invalid.
|
||||
"""
|
||||
|
||||
# Raises ValueError if invalid accelerator_type
|
||||
utils.validate_accelerator_type(self.accelerator_type)
|
||||
|
||||
accelerator_enum = getattr(
|
||||
gca_accelerator_type_compat.AcceleratorType, self.accelerator_type
|
||||
)
|
||||
|
||||
if (
|
||||
accelerator_enum
|
||||
!= gca_accelerator_type_compat.AcceleratorType.ACCELERATOR_TYPE_UNSPECIFIED
|
||||
):
|
||||
return self.accelerator_type
|
||||
|
||||
@property
|
||||
def spec_dict(self) -> Dict[str, Union[int, str, Dict[str, Union[int, str]]]]:
|
||||
"""Return specification as a Dict."""
|
||||
spec = {
|
||||
"machine_spec": {"machine_type": self.machine_type},
|
||||
"replica_count": self.replica_count,
|
||||
"disk_spec": {
|
||||
"boot_disk_type": self.boot_disk_type,
|
||||
"boot_disk_size_gb": self.boot_disk_size_gb,
|
||||
},
|
||||
}
|
||||
|
||||
accelerator_type = self._get_accelerator_type()
|
||||
if accelerator_type and self.accelerator_count:
|
||||
spec["machine_spec"]["accelerator_type"] = accelerator_type
|
||||
spec["machine_spec"]["accelerator_count"] = self.accelerator_count
|
||||
|
||||
return spec
|
||||
|
||||
@property
|
||||
def is_empty(self) -> bool:
|
||||
"""Returns True is replica_count > 0 False otherwise."""
|
||||
return self.replica_count <= 0
|
||||
Binary file not shown.
@@ -0,0 +1,55 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from google.auth import credentials as auth_credentials
|
||||
|
||||
from google.cloud.aiplatform.schedules import _Schedule as _ScheduleGa
|
||||
|
||||
|
||||
class _Schedule(
|
||||
_ScheduleGa,
|
||||
):
|
||||
"""Preview Schedule resource for Vertex AI."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
credentials: auth_credentials.Credentials,
|
||||
project: str,
|
||||
location: str,
|
||||
):
|
||||
"""Retrieves a Schedule resource and instantiates its representation.
|
||||
Args:
|
||||
credentials (auth_credentials.Credentials):
|
||||
Optional. Custom credentials to use to create this Schedule.
|
||||
Overrides credentials set in aiplatform.init.
|
||||
project (str):
|
||||
Optional. The project that you want to run this Schedule in.
|
||||
If not set, the project set in aiplatform.init will be used.
|
||||
location (str):
|
||||
Optional. Location to create Schedule. If not set,
|
||||
location set in aiplatform.init will be used.
|
||||
"""
|
||||
super().__init__(project=project, location=location, credentials=credentials)
|
||||
|
||||
@property
|
||||
def cron_expression(self) -> str:
|
||||
"""Current Schedule cron expression.
|
||||
|
||||
Returns:
|
||||
Schedule cron expression.
|
||||
"""
|
||||
return super().cron
|
||||
@@ -0,0 +1,64 @@
|
||||
"""Ray on Vertex AI."""
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2025 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import sys
|
||||
|
||||
from google.cloud.aiplatform.vertex_ray.bigquery_datasource import (
|
||||
_BigQueryDatasource,
|
||||
)
|
||||
from google.cloud.aiplatform.vertex_ray.client_builder import (
|
||||
VertexRayClientBuilder as ClientBuilder,
|
||||
)
|
||||
|
||||
from google.cloud.aiplatform.vertex_ray.cluster_init import (
|
||||
create_ray_cluster,
|
||||
delete_ray_cluster,
|
||||
get_ray_cluster,
|
||||
list_ray_clusters,
|
||||
update_ray_cluster,
|
||||
)
|
||||
|
||||
from google.cloud.aiplatform.vertex_ray import data
|
||||
|
||||
from google.cloud.aiplatform.vertex_ray.util.resources import (
|
||||
Resources,
|
||||
NodeImages,
|
||||
)
|
||||
|
||||
from google.cloud.aiplatform.vertex_ray.dashboard_sdk import (
|
||||
get_job_submission_client_cluster_info,
|
||||
)
|
||||
|
||||
if sys.version_info[1] not in (10, 11):
|
||||
print(
|
||||
"[Ray on Vertex]: The client environment with Python version 3.10 or 3.11 is required."
|
||||
)
|
||||
|
||||
__all__ = (
|
||||
"_BigQueryDatasource",
|
||||
"data",
|
||||
"ClientBuilder",
|
||||
"get_job_submission_client_cluster_info",
|
||||
"create_ray_cluster",
|
||||
"delete_ray_cluster",
|
||||
"get_ray_cluster",
|
||||
"list_ray_clusters",
|
||||
"update_ray_cluster",
|
||||
"Resources",
|
||||
"NodeImages",
|
||||
)
|
||||
Binary file not shown.
@@ -0,0 +1,18 @@
|
||||
"""Ray on Vertex AI Prediction."""
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
Binary file not shown.
@@ -0,0 +1,24 @@
|
||||
"""Ray on Vertex AI Prediction Tensorflow."""
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from google.cloud.aiplatform.vertex_ray.predict.sklearn import (
|
||||
register_sklearn,
|
||||
)
|
||||
|
||||
__all__ = ("register_sklearn",)
|
||||
Binary file not shown.
@@ -0,0 +1,24 @@
|
||||
"""Ray on Vertex AI Prediction Tensorflow."""
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from google.cloud.aiplatform.vertex_ray.predict.tensorflow import (
|
||||
register_tensorflow,
|
||||
)
|
||||
|
||||
__all__ = ("register_tensorflow",)
|
||||
Binary file not shown.
@@ -0,0 +1,24 @@
|
||||
"""Ray on Vertex AI Prediction Tensorflow."""
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from google.cloud.aiplatform.vertex_ray.predict.torch import (
|
||||
get_pytorch_model_from,
|
||||
)
|
||||
|
||||
__all__ = ("get_pytorch_model_from",)
|
||||
Binary file not shown.
@@ -0,0 +1,24 @@
|
||||
"""Ray on Vertex AI Prediction Tensorflow."""
|
||||
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
from google.cloud.aiplatform.vertex_ray.predict.xgboost import (
|
||||
register_xgboost,
|
||||
)
|
||||
|
||||
__all__ = ("register_xgboost",)
|
||||
Binary file not shown.
Reference in New Issue
Block a user