structure saas with tools
This commit is contained in:
217
.venv/lib/python3.10/site-packages/vertex_ray/util/resources.py
Normal file
217
.venv/lib/python3.10/site-packages/vertex_ray/util/resources.py
Normal file
@@ -0,0 +1,217 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2023 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import dataclasses
|
||||
from typing import Dict, List, Optional
|
||||
from google.cloud.aiplatform_v1beta1.types import PersistentResource
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class AutoscalingSpec:
|
||||
"""Autoscaling spec for a ray cluster node.
|
||||
|
||||
Attributes:
|
||||
min_replica_count: The minimum number of replicas in the cluster.
|
||||
max_replica_count: The maximum number of replicas in the cluster.
|
||||
"""
|
||||
|
||||
min_replica_count: int = 1
|
||||
max_replica_count: int = 2
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Resources:
|
||||
"""Resources for a ray cluster node.
|
||||
|
||||
Attributes:
|
||||
machine_type: See the list of machine types:
|
||||
https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types
|
||||
node_count: This argument represents how many nodes to start for the
|
||||
ray cluster.
|
||||
accelerator_type: e.g. "NVIDIA_TESLA_P4".
|
||||
Vertex AI supports the following types of GPU:
|
||||
https://cloud.google.com/vertex-ai/docs/training/configure-compute#specifying_gpus
|
||||
accelerator_count: The number of accelerators to attach to the machine.
|
||||
boot_disk_type: Type of the boot disk (default is "pd-ssd").
|
||||
Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
|
||||
"pd-standard" (Persistent Disk Hard Disk Drive).
|
||||
boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). Must
|
||||
be either unspecified or within the range of [100, 64000].
|
||||
custom_image: Custom image for this resource (e.g.
|
||||
us-docker.pkg.dev/my-project/ray-gpu.2-9.py310-tf:latest).
|
||||
autoscaling_spec: Autoscaling spec for this resource.
|
||||
"""
|
||||
|
||||
machine_type: Optional[str] = "n1-standard-16"
|
||||
node_count: Optional[int] = 1
|
||||
accelerator_type: Optional[str] = None
|
||||
accelerator_count: Optional[int] = 0
|
||||
boot_disk_type: Optional[str] = "pd-ssd"
|
||||
boot_disk_size_gb: Optional[int] = 100
|
||||
custom_image: Optional[str] = None
|
||||
autoscaling_spec: Optional[AutoscalingSpec] = None
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class NodeImages:
|
||||
"""Custom images for a ray cluster.
|
||||
|
||||
We currently support Ray v2.9, v2.33, v2.42 and python v3.10.
|
||||
We also support python v3.11 for Ray v2.42.
|
||||
The custom images must be extended from the following base images:
|
||||
"{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-9.py310:latest",
|
||||
"{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-9.py310:latest",
|
||||
"{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-33.py310:latest",
|
||||
"{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-33.py310:latest",
|
||||
"{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-42.py310:latest",
|
||||
"{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-42.py310:latest",
|
||||
"{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-42.py311:latest", or
|
||||
"{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-42.py311:latest". In
|
||||
order to use custom images, need to specify both head and worker images.
|
||||
|
||||
Attributes:
|
||||
head: image for head node (eg. us-docker.pkg.dev/my-project/ray-cpu.2-33.py310-tf:latest).
|
||||
worker: image for all worker nodes (eg. us-docker.pkg.dev/my-project/ray-gpu.2-33.py310-tf:latest).
|
||||
"""
|
||||
|
||||
head: str = None
|
||||
worker: str = None
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PscIConfig:
|
||||
"""PSC-I config.
|
||||
|
||||
Attributes:
|
||||
network_attachment: Optional. The name or full name of the Compute Engine
|
||||
`network attachment <https://cloud.google.com/vpc/docs/about-network-attachments>`
|
||||
to attach to the resource. It has a format:
|
||||
``projects/{project}/regions/{region}/networkAttachments/{networkAttachment}``.
|
||||
Where {project} is a project number, as in ``12345``, and
|
||||
{networkAttachment} is a network attachment name. To specify
|
||||
this field, you must have already [created a network
|
||||
attachment]
|
||||
(https://cloud.google.com/vpc/docs/create-manage-network-attachments#create-network-attachments).
|
||||
This field is only used for resources using PSC-I. Make sure you do not
|
||||
specify the network here for VPC peering.
|
||||
"""
|
||||
|
||||
network_attachment: str = None
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class NfsMount:
|
||||
"""NFS mount.
|
||||
|
||||
Attributes:
|
||||
server: Required. IP address of the NFS server.
|
||||
path: Required. Source path exported from NFS server. Has to start
|
||||
with '/', and combined with the ip address, it indicates the
|
||||
source mount path in the form of ``server:path``.
|
||||
mount_point: Required. Destination mount path. The NFS will be mounted
|
||||
for the user under /mnt/nfs/<mount_point>.
|
||||
"""
|
||||
|
||||
server: str = None
|
||||
path: str = None
|
||||
mount_point: str = None
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Cluster:
|
||||
"""Ray cluster (output only).
|
||||
|
||||
Attributes:
|
||||
cluster_resource_name: It has a format:
|
||||
"projects/<project_num>/locations/<region>/persistentResources/<pr_id>".
|
||||
network: Virtual private cloud (VPC) network. It has a format:
|
||||
"projects/<project_num>/global/networks/<network_name>".
|
||||
For Ray Client, VPC peering is required to connect to the cluster
|
||||
managed in the Vertex API service. For Ray Job API, VPC network is
|
||||
not required because cluster connection can be accessed through
|
||||
dashboard address.
|
||||
reserved_ip_ranges: A list of names for the reserved IP ranges under
|
||||
the VPC network that can be used for this cluster. If set, we will
|
||||
deploy the cluster within the provided IP ranges. Otherwise, the
|
||||
cluster is deployed to any IP ranges under the provided VPC network.
|
||||
Example: ["vertex-ai-ip-range"].
|
||||
service_account: Service account to be used for running Ray programs on
|
||||
the cluster.
|
||||
state: Describes the cluster state (defined in PersistentResource.State).
|
||||
python_version: Python version for the ray cluster (e.g. "3.10").
|
||||
ray_version: Ray version for the ray cluster (e.g. "2.33").
|
||||
head_node_type: The head node resource. Resources.node_count must be 1.
|
||||
If not set, by default it is a CPU node with machine_type of n1-standard-8.
|
||||
worker_node_types: The list of Resources of the worker nodes. Should not
|
||||
duplicate the elements in the list.
|
||||
dashboard_address: For Ray Job API (JobSubmissionClient), with this
|
||||
cluster connection doesn't require VPC peering.
|
||||
labels:
|
||||
The labels with user-defined metadata to organize Ray cluster.
|
||||
|
||||
Label keys and values can be no longer than 64 characters (Unicode
|
||||
codepoints), can only contain lowercase letters, numeric characters,
|
||||
underscores and dashes. International characters are allowed.
|
||||
|
||||
See https://goo.gl/xmQnxf for more information and examples of labels.
|
||||
"""
|
||||
|
||||
cluster_resource_name: str = None
|
||||
network: str = None
|
||||
reserved_ip_ranges: List[str] = None
|
||||
service_account: str = None
|
||||
state: PersistentResource.State = None
|
||||
python_version: str = None
|
||||
ray_version: str = None
|
||||
head_node_type: Resources = None
|
||||
worker_node_types: List[Resources] = None
|
||||
dashboard_address: str = None
|
||||
ray_metric_enabled: bool = True
|
||||
ray_logs_enabled: bool = True
|
||||
psc_interface_config: PscIConfig = None
|
||||
labels: Dict[str, str] = None
|
||||
|
||||
|
||||
def _check_machine_spec_identical(
|
||||
node_type_1: Resources,
|
||||
node_type_2: Resources,
|
||||
) -> int:
|
||||
"""Check if node_type_1 and node_type_2 have the same machine_spec.
|
||||
If they are identical, return additional_replica_count."""
|
||||
additional_replica_count = 0
|
||||
|
||||
# Check if machine_spec are the same
|
||||
if (
|
||||
node_type_1.machine_type == node_type_2.machine_type
|
||||
and node_type_1.accelerator_type == node_type_2.accelerator_type
|
||||
and node_type_1.accelerator_count == node_type_2.accelerator_count
|
||||
):
|
||||
if node_type_1.boot_disk_type != node_type_2.boot_disk_type:
|
||||
raise ValueError(
|
||||
"Worker disk type must match the head node's disk type if"
|
||||
" sharing the same machine_type, accelerator_type, and"
|
||||
" accelerator_count"
|
||||
)
|
||||
if node_type_1.boot_disk_size_gb != node_type_2.boot_disk_size_gb:
|
||||
raise ValueError(
|
||||
"Worker disk size must match the head node's disk size if"
|
||||
" sharing the same machine_type, accelerator_type, and"
|
||||
" accelerator_count"
|
||||
)
|
||||
additional_replica_count = node_type_2.node_count
|
||||
return additional_replica_count
|
||||
|
||||
return additional_replica_count
|
||||
Reference in New Issue
Block a user