structure saas with tools

2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions
--- a/.venv/lib/python3.10/site-packages/vertex_ray/util/pycache/_gapic_utils.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/vertex_ray/util/pycache/_gapic_utils.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/vertex_ray/util/pycache/_validation_utils.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/vertex_ray/util/pycache/_validation_utils.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/vertex_ray/util/pycache/resources.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/vertex_ray/util/pycache/resources.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/vertex_ray/util/_gapic_utils.py
+++ b/.venv/lib/python3.10/site-packages/vertex_ray/util/_gapic_utils.py
@@ -0,0 +1,288 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import datetime
+import logging
+import time
+from typing import Optional
+
+from google.api_core import exceptions
+from google.cloud.aiplatform import initializer
+from google.cloud.aiplatform.utils import (
+    PersistentResourceClientWithOverride,
+)
+from google.cloud.aiplatform.vertex_ray.util import _validation_utils
+from google.cloud.aiplatform.vertex_ray.util.resources import (
+    AutoscalingSpec,
+    Cluster,
+    PscIConfig,
+    Resources,
+)
+from google.cloud.aiplatform_v1beta1.types.persistent_resource import (
+    PersistentResource,
+)
+from google.cloud.aiplatform_v1beta1.types.persistent_resource_service import (
+    GetPersistentResourceRequest,
+)
+
+
+_PRIVATE_PREVIEW_IMAGE = "-docker.pkg.dev/vertex-ai/training/tf-"
+_OFFICIAL_IMAGE = "-docker.pkg.dev/vertex-ai/training/ray-"
+
+
+def create_persistent_resource_client():
+    # location is inhereted from the global configuration at aiplatform.init().
+    return initializer.global_config.create_client(
+        client_class=PersistentResourceClientWithOverride,
+        appended_gapic_version="vertex_ray",
+    ).select_version("v1beta1")
+
+
+def polling_delay(num_attempts: int, time_scale: float) -> datetime.timedelta:
+    """Computes a delay to the next attempt to poll the Vertex service.
+
+    This does bounded exponential backoff, starting with $time_scale.
+    If $time_scale == 0, it starts with a small time interval, less than
+    1 second.
+
+    Args:
+      num_attempts: The number of times have we polled and found that the
+        desired result was not yet available.
+      time_scale: The shortest polling interval, in seconds, or zero. Zero is
+        treated as a small interval, less than 1 second.
+
+    Returns:
+      A recommended delay interval, in seconds.
+    """
+    #  The polling schedule is slow initially , and then gets faster until 6
+    #  attempts (after that the sleeping time remains the same).
+    small_interval = 30.0  # Seconds
+    interval = max(time_scale, small_interval) * 0.765 ** min(num_attempts, 6)
+    return datetime.timedelta(seconds=interval)
+
+
+def get_persistent_resource(
+    persistent_resource_name: str, tolerance: Optional[int] = 0
+):
+    """Get persistent resource.
+
+    Args:
+      persistent_resource_name:
+          "projects/<project_num>/locations/<region>/persistentResources/<pr_id>".
+      tolerance: number of attemps to get persistent resource.
+
+    Returns:
+      aiplatform_v1.PersistentResource if state is RUNNING.
+
+    Raises:
+      ValueError: Invalid cluster resource name.
+      RuntimeError: Service returns error.
+      RuntimeError: Cluster resource state is STOPPING.
+      RuntimeError: Cluster resource state is ERROR.
+    """
+
+    client = create_persistent_resource_client()
+    request = GetPersistentResourceRequest(name=persistent_resource_name)
+
+    # TODO(b/277117901): Add test cases for polling and error handling
+    num_attempts = 0
+    while True:
+        try:
+            response = client.get_persistent_resource(request)
+        except exceptions.NotFound:
+            response = None
+            if num_attempts >= tolerance:
+                raise ValueError(
+                    "[Ray on Vertex AI]: Invalid cluster_resource_name (404 not found)."
+                )
+        if response:
+            if response.error.message:
+                logging.error("[Ray on Vertex AI]: %s" % response.error.message)
+                raise RuntimeError("[Ray on Vertex AI]: Cluster returned an error.")
+
+            print("[Ray on Vertex AI]: Cluster State =", response.state)
+            if response.state == PersistentResource.State.RUNNING:
+                return response
+            elif response.state == PersistentResource.State.STOPPING:
+                raise RuntimeError("[Ray on Vertex AI]: The cluster is stopping.")
+            elif response.state == PersistentResource.State.ERROR:
+                raise RuntimeError(
+                    "[Ray on Vertex AI]: The cluster encountered an error."
+                )
+        # Polling decay
+        sleep_time = polling_delay(num_attempts=num_attempts, time_scale=150.0)
+        num_attempts += 1
+        print(
+            "Waiting for cluster provisioning; attempt {}; sleeping for {} seconds".format(
+                num_attempts, sleep_time
+            )
+        )
+        time.sleep(sleep_time.total_seconds())
+
+
+def persistent_resource_to_cluster(
+    persistent_resource: PersistentResource,
+) -> Optional[Cluster]:
+    """Format a PersistentResource to a dictionary.
+
+    Args:
+        persistent_resource: PersistentResource.
+    Returns:
+        Cluster.
+    """
+    dashboard_address = persistent_resource.resource_runtime.access_uris.get(
+        "RAY_DASHBOARD_URI"
+    )
+    cluster = Cluster(
+        cluster_resource_name=persistent_resource.name,
+        network=persistent_resource.network,
+        reserved_ip_ranges=persistent_resource.reserved_ip_ranges,
+        state=persistent_resource.state.name,
+        labels=persistent_resource.labels,
+        dashboard_address=dashboard_address,
+    )
+    if not persistent_resource.resource_runtime_spec.ray_spec:
+        # skip PersistentResource without RaySpec
+        logging.info(
+            "[Ray on Vertex AI]: Cluster %s does not have Ray installed."
+            % persistent_resource.name,
+        )
+        return
+    if persistent_resource.psc_interface_config:
+        cluster.psc_interface_config = PscIConfig(
+            network_attachment=persistent_resource.psc_interface_config.network_attachment
+        )
+    resource_pools = persistent_resource.resource_pools
+
+    head_resource_pool = resource_pools[0]
+    head_id = head_resource_pool.id
+    head_image_uri = (
+        persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[head_id]
+    )
+    if persistent_resource.resource_runtime_spec.service_account_spec.service_account:
+        cluster.service_account = (
+            persistent_resource.resource_runtime_spec.service_account_spec.service_account
+        )
+    if not head_image_uri:
+        head_image_uri = persistent_resource.resource_runtime_spec.ray_spec.image_uri
+
+    try:
+        python_version, ray_version = _validation_utils.get_versions_from_image_uri(
+            head_image_uri
+        )
+    except IndexError:
+        if _PRIVATE_PREVIEW_IMAGE in head_image_uri:
+            # If using outdated images
+            logging.info(
+                "[Ray on Vertex AI]: The image of cluster %s is outdated."
+                " It is recommended to delete and recreate the cluster to obtain"
+                " the latest image." % persistent_resource.name
+            )
+            return None
+        else:
+            # Custom image might also cause IndexError
+            python_version = None
+            ray_version = None
+    cluster.python_version = python_version
+    cluster.ray_version = ray_version
+    cluster.ray_metric_enabled = not (
+        persistent_resource.resource_runtime_spec.ray_spec.ray_metric_spec.disabled
+    )
+    cluster.ray_logs_enabled = not (
+        persistent_resource.resource_runtime_spec.ray_spec.ray_logs_spec.disabled
+    )
+
+    accelerator_type = head_resource_pool.machine_spec.accelerator_type
+    if accelerator_type.value != 0:
+        accelerator_type = accelerator_type.name
+    else:
+        accelerator_type = None
+    if _OFFICIAL_IMAGE in head_image_uri:
+        # Official training image is not custom
+        head_image_uri = None
+    head_node_type = Resources(
+        machine_type=head_resource_pool.machine_spec.machine_type,
+        accelerator_type=accelerator_type,
+        accelerator_count=head_resource_pool.machine_spec.accelerator_count,
+        boot_disk_type=head_resource_pool.disk_spec.boot_disk_type,
+        boot_disk_size_gb=head_resource_pool.disk_spec.boot_disk_size_gb,
+        node_count=1,
+        custom_image=head_image_uri,
+    )
+    worker_node_types = []
+    if head_resource_pool.replica_count > 1:
+        # head_node_type.node_count must be 1. If the head_resource_pool (the first
+        # resource pool) has replica_count > 1, the rest replica are worker nodes.
+        worker_node_count = head_resource_pool.replica_count - 1
+        worker_node_types.append(
+            Resources(
+                machine_type=head_resource_pool.machine_spec.machine_type,
+                accelerator_type=accelerator_type,
+                accelerator_count=head_resource_pool.machine_spec.accelerator_count,
+                boot_disk_type=head_resource_pool.disk_spec.boot_disk_type,
+                boot_disk_size_gb=head_resource_pool.disk_spec.boot_disk_size_gb,
+                node_count=worker_node_count,
+                custom_image=head_image_uri,
+            )
+        )
+        if head_resource_pool.autoscaling_spec:
+            worker_node_types[0].autoscaling_spec = AutoscalingSpec(
+                min_replica_count=head_resource_pool.autoscaling_spec.min_replica_count,
+                max_replica_count=head_resource_pool.autoscaling_spec.max_replica_count,
+            )
+    for i in range(len(resource_pools) - 1):
+        # Convert the second and more resource pools to vertex_ray.Resources,
+        # and append then to worker_node_types.
+        accelerator_type = resource_pools[i + 1].machine_spec.accelerator_type
+        if accelerator_type.value != 0:
+            accelerator_type = accelerator_type.name
+        else:
+            accelerator_type = None
+        worker_image_uri = (
+            persistent_resource.resource_runtime_spec.ray_spec.resource_pool_images[
+                resource_pools[i + 1].id
+            ]
+        )
+        if _OFFICIAL_IMAGE in worker_image_uri:
+            # Official training image is not custom
+            worker_image_uri = None
+
+        resource = Resources(
+            machine_type=resource_pools[i + 1].machine_spec.machine_type,
+            accelerator_type=accelerator_type,
+            accelerator_count=resource_pools[i + 1].machine_spec.accelerator_count,
+            boot_disk_type=resource_pools[i + 1].disk_spec.boot_disk_type,
+            boot_disk_size_gb=resource_pools[i + 1].disk_spec.boot_disk_size_gb,
+            node_count=resource_pools[i + 1].replica_count,
+            custom_image=worker_image_uri,
+        )
+        if resource_pools[i + 1].autoscaling_spec:
+            resource.autoscaling_spec = AutoscalingSpec(
+                min_replica_count=resource_pools[
+                    i + 1
+                ].autoscaling_spec.min_replica_count,
+                max_replica_count=resource_pools[
+                    i + 1
+                ].autoscaling_spec.max_replica_count,
+            )
+
+        worker_node_types.append(resource)
+
+    cluster.head_node_type = head_node_type
+    cluster.worker_node_types = worker_node_types
+
+    return cluster
--- a/.venv/lib/python3.10/site-packages/vertex_ray/util/_validation_utils.py
+++ b/.venv/lib/python3.10/site-packages/vertex_ray/util/_validation_utils.py
@@ -0,0 +1,167 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import google.auth
+import google.auth.transport.requests
+import logging
+import ray
+import re
+from immutabledict import immutabledict
+
+from google.cloud.aiplatform import initializer
+from google.cloud.aiplatform.utils import resource_manager_utils
+
+SUPPORTED_RAY_VERSIONS = immutabledict(
+    {"2.9": "2.9.3", "2.33": "2.33.0", "2.42": "2.42.0"}
+)
+SUPPORTED_RAY_VERSIONS_FROM_PYTHON_VERSIONS = immutabledict(
+    {
+        "3.10": ("2.9", "2.33", "2.42"),
+        "3.11": ("2.42"),
+    }
+)
+_V2_4_WARNING_MESSAGE = (
+    "After google-cloud-aiplatform>1.53.0, using Ray version = 2.4 will result"
+    " in an error. Please use Ray version = 2.33.0 or 2.42.0 (default) instead."
+)
+_V2_9_WARNING_MESSAGE = (
+    "In March 2025, using Ray version = 2.9 will result in an error. "
+    "Please use Ray version = 2.33.0 or 2.42.0 (default) instead."
+)
+
+
+# Artifact Repository available regions.
+_AVAILABLE_REGIONS = ["us", "europe", "asia"]
+# If region is not available, assume using the default region.
+_DEFAULT_REGION = "us"
+
+_PERSISTENT_RESOURCE_NAME_PATTERN = "projects/{}/locations/{}/persistentResources/{}"
+_VALID_RESOURCE_NAME_REGEX = "[a-z][a-zA-Z0-9._-]{0,127}"
+_DASHBOARD_URI_SUFFIX = "aiplatform-training.googleusercontent.com"
+
+
+def valid_resource_name(resource_name):
+    """Check if address is a valid resource name."""
+    resource_name_split = resource_name.split("/")
+    if not (
+        len(resource_name_split) == 6
+        and resource_name_split[0] == "projects"
+        and resource_name_split[2] == "locations"
+        and resource_name_split[4] == "persistentResources"
+    ):
+        raise ValueError(
+            "[Ray on Vertex AI]: Address must be in the following "
+            "format: vertex_ray://projects/<project_num>/locations/<region>/persistentResources/<pr_id> "
+            "or vertex_ray://<pr_id>."
+        )
+
+
+def maybe_reconstruct_resource_name(address) -> str:
+    """Reconstruct full persistent resource name if only id was given."""
+    if re.match("^{}$".format(_VALID_RESOURCE_NAME_REGEX), address):
+        # Assume only cluster name (persistent resource id) was given.
+        logging.info(
+            "[Ray on Vertex AI]: Cluster name was given as address, reconstructing full resource name"
+        )
+        return _PERSISTENT_RESOURCE_NAME_PATTERN.format(
+            resource_manager_utils.get_project_number(
+                initializer.global_config.project
+            ),
+            initializer.global_config.location,
+            address,
+        )
+
+    return address
+
+
+def get_local_ray_version():
+    ray_version = ray.__version__.split(".")
+    if len(ray_version) == 3:
+        ray_version = ray_version[:2]
+    return ".".join(ray_version)
+
+
+def get_image_uri(ray_version, python_version, enable_cuda):
+    """Image uri for a given ray version and python version."""
+    if ray_version not in SUPPORTED_RAY_VERSIONS:
+        raise ValueError(
+            "[Ray on Vertex AI]: The supported Ray versions are %s (%s) and %s (%s)."
+            % (
+                list(SUPPORTED_RAY_VERSIONS.keys())[0],
+                list(SUPPORTED_RAY_VERSIONS.values())[0],
+                list(SUPPORTED_RAY_VERSIONS.keys())[1],
+                list(SUPPORTED_RAY_VERSIONS.values())[1],
+            )
+        )
+    if python_version not in SUPPORTED_RAY_VERSIONS_FROM_PYTHON_VERSIONS:
+        raise ValueError(
+            "[Ray on Vertex AI]: The supported Python versions are 3.10 or 3.11."
+        )
+
+    if ray_version not in SUPPORTED_RAY_VERSIONS_FROM_PYTHON_VERSIONS[python_version]:
+        raise ValueError(
+            "[Ray on Vertex AI]: The supported Ray version(s) for Python version %s: %s."
+            % (
+                python_version,
+                SUPPORTED_RAY_VERSIONS_FROM_PYTHON_VERSIONS[python_version],
+            )
+        )
+
+    location = initializer.global_config.location
+    region = location.split("-")[0]
+    if region not in _AVAILABLE_REGIONS:
+        region = _DEFAULT_REGION
+    ray_version = ray_version.replace(".", "-")
+    python_version = python_version.replace(".", "")
+    if enable_cuda:
+        return f"{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.{ray_version}.py{python_version}:latest"
+    else:
+        return f"{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.{ray_version}.py{python_version}:latest"
+
+
+def get_versions_from_image_uri(image_uri):
+    """Get ray version and python version from image uri."""
+    logging.info(f"[Ray on Vertex AI]: Getting versions from image uri: {image_uri}")
+    image_label = image_uri.split("/")[-1].split(":")[0]
+    py_version = image_label[-3] + "." + image_label[-2:]
+    ray_version = image_label.split(".")[1].replace("-", ".")
+    if (
+        py_version in SUPPORTED_RAY_VERSIONS_FROM_PYTHON_VERSIONS
+        and ray_version in SUPPORTED_RAY_VERSIONS_FROM_PYTHON_VERSIONS[py_version]
+    ):
+        return py_version, ray_version
+    else:
+        # May not parse custom image and get the versions correctly
+        return None, None
+
+
+def valid_dashboard_address(address):
+    """Check if address is a valid dashboard uri."""
+    return address.endswith(_DASHBOARD_URI_SUFFIX)
+
+
+def get_bearer_token():
+    """Get bearer token through Application Default Credentials."""
+    creds, _ = google.auth.default(
+        scopes=["https://www.googleapis.com/auth/cloud-platform"]
+    )
+
+    # creds.valid is False, and creds.token is None
+    # Need to refresh credentials to populate those
+    auth_req = google.auth.transport.requests.Request()
+    creds.refresh(auth_req)
+    return creds.token
--- a/.venv/lib/python3.10/site-packages/vertex_ray/util/resources.py
+++ b/.venv/lib/python3.10/site-packages/vertex_ray/util/resources.py
@@ -0,0 +1,217 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import dataclasses
+from typing import Dict, List, Optional
+from google.cloud.aiplatform_v1beta1.types import PersistentResource
+
+
+@dataclasses.dataclass
+class AutoscalingSpec:
+    """Autoscaling spec for a ray cluster node.
+
+    Attributes:
+        min_replica_count: The minimum number of replicas in the cluster.
+        max_replica_count: The maximum number of replicas in the cluster.
+    """
+
+    min_replica_count: int = 1
+    max_replica_count: int = 2
+
+
+@dataclasses.dataclass
+class Resources:
+    """Resources for a ray cluster node.
+
+    Attributes:
+        machine_type: See the list of machine types:
+            https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types
+        node_count: This argument represents how many nodes to start for the
+            ray cluster.
+        accelerator_type: e.g. "NVIDIA_TESLA_P4".
+            Vertex AI supports the following types of GPU:
+            https://cloud.google.com/vertex-ai/docs/training/configure-compute#specifying_gpus
+        accelerator_count: The number of accelerators to attach to the machine.
+        boot_disk_type: Type of the boot disk (default is "pd-ssd").
+            Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
+            "pd-standard" (Persistent Disk Hard Disk Drive).
+        boot_disk_size_gb: Size in GB of the boot disk (default is 100GB). Must
+            be either unspecified or within the range of [100, 64000].
+        custom_image: Custom image for this resource (e.g.
+            us-docker.pkg.dev/my-project/ray-gpu.2-9.py310-tf:latest).
+        autoscaling_spec: Autoscaling spec for this resource.
+    """
+
+    machine_type: Optional[str] = "n1-standard-16"
+    node_count: Optional[int] = 1
+    accelerator_type: Optional[str] = None
+    accelerator_count: Optional[int] = 0
+    boot_disk_type: Optional[str] = "pd-ssd"
+    boot_disk_size_gb: Optional[int] = 100
+    custom_image: Optional[str] = None
+    autoscaling_spec: Optional[AutoscalingSpec] = None
+
+
+@dataclasses.dataclass
+class NodeImages:
+    """Custom images for a ray cluster.
+
+    We currently support Ray v2.9, v2.33, v2.42 and python v3.10.
+    We also support python v3.11 for Ray v2.42.
+    The custom images must be extended from the following base images:
+    "{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-9.py310:latest",
+    "{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-9.py310:latest",
+    "{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-33.py310:latest",
+    "{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-33.py310:latest",
+    "{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-42.py310:latest",
+    "{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-42.py310:latest",
+    "{region}-docker.pkg.dev/vertex-ai/training/ray-cpu.2-42.py311:latest", or
+    "{region}-docker.pkg.dev/vertex-ai/training/ray-gpu.2-42.py311:latest". In
+    order to use custom images, need to specify both head and worker images.
+
+    Attributes:
+        head: image for head node (eg. us-docker.pkg.dev/my-project/ray-cpu.2-33.py310-tf:latest).
+        worker: image for all worker nodes (eg. us-docker.pkg.dev/my-project/ray-gpu.2-33.py310-tf:latest).
+    """
+
+    head: str = None
+    worker: str = None
+
+
+@dataclasses.dataclass
+class PscIConfig:
+    """PSC-I config.
+
+    Attributes:
+        network_attachment: Optional. The name or full name of the Compute Engine
+            `network attachment <https://cloud.google.com/vpc/docs/about-network-attachments>`
+            to attach to the resource. It has a format:
+            ``projects/{project}/regions/{region}/networkAttachments/{networkAttachment}``.
+            Where {project} is a project number, as in ``12345``, and
+            {networkAttachment} is a network attachment name. To specify
+            this field, you must have already [created a network
+            attachment]
+            (https://cloud.google.com/vpc/docs/create-manage-network-attachments#create-network-attachments).
+            This field is only used for resources using PSC-I. Make sure you do not
+            specify the network here for VPC peering.
+    """
+
+    network_attachment: str = None
+
+
+@dataclasses.dataclass
+class NfsMount:
+    """NFS mount.
+
+    Attributes:
+        server: Required. IP address of the NFS server.
+        path: Required. Source path exported from NFS server. Has to start
+            with '/', and combined with the ip address, it indicates the
+            source mount path in the form of ``server:path``.
+        mount_point: Required. Destination mount path. The NFS will be mounted
+            for the user under /mnt/nfs/<mount_point>.
+    """
+
+    server: str = None
+    path: str = None
+    mount_point: str = None
+
+
+@dataclasses.dataclass
+class Cluster:
+    """Ray cluster (output only).
+
+    Attributes:
+        cluster_resource_name: It has a format:
+            "projects/<project_num>/locations/<region>/persistentResources/<pr_id>".
+        network: Virtual private cloud (VPC) network. It has a format:
+            "projects/<project_num>/global/networks/<network_name>".
+            For Ray Client, VPC peering is required to connect to the cluster
+            managed in the Vertex API service. For Ray Job API, VPC network is
+            not required because cluster connection can be accessed through
+            dashboard address.
+        reserved_ip_ranges: A list of names for the reserved IP ranges under
+            the VPC network that can be used for this cluster. If set, we will
+            deploy the cluster within the provided IP ranges. Otherwise, the
+            cluster is deployed to any IP ranges under the provided VPC network.
+            Example: ["vertex-ai-ip-range"].
+        service_account: Service account to be used for running Ray programs on
+            the cluster.
+        state: Describes the cluster state (defined in PersistentResource.State).
+        python_version: Python version for the ray cluster (e.g. "3.10").
+        ray_version: Ray version for the ray cluster (e.g. "2.33").
+        head_node_type: The head node resource. Resources.node_count must be 1.
+            If not set, by default it is a CPU node with machine_type of n1-standard-8.
+        worker_node_types: The list of Resources of the worker nodes. Should not
+            duplicate the elements in the list.
+        dashboard_address: For Ray Job API (JobSubmissionClient), with this
+           cluster connection doesn't require VPC peering.
+        labels:
+            The labels with user-defined metadata to organize Ray cluster.
+
+            Label keys and values can be no longer than 64 characters (Unicode
+            codepoints), can only contain lowercase letters, numeric characters,
+            underscores and dashes. International characters are allowed.
+
+            See https://goo.gl/xmQnxf for more information and examples of labels.
+    """
+
+    cluster_resource_name: str = None
+    network: str = None
+    reserved_ip_ranges: List[str] = None
+    service_account: str = None
+    state: PersistentResource.State = None
+    python_version: str = None
+    ray_version: str = None
+    head_node_type: Resources = None
+    worker_node_types: List[Resources] = None
+    dashboard_address: str = None
+    ray_metric_enabled: bool = True
+    ray_logs_enabled: bool = True
+    psc_interface_config: PscIConfig = None
+    labels: Dict[str, str] = None
+
+
+def _check_machine_spec_identical(
+    node_type_1: Resources,
+    node_type_2: Resources,
+) -> int:
+    """Check if node_type_1 and node_type_2 have the same machine_spec.
+    If they are identical, return additional_replica_count."""
+    additional_replica_count = 0
+
+    # Check if machine_spec are the same
+    if (
+        node_type_1.machine_type == node_type_2.machine_type
+        and node_type_1.accelerator_type == node_type_2.accelerator_type
+        and node_type_1.accelerator_count == node_type_2.accelerator_count
+    ):
+        if node_type_1.boot_disk_type != node_type_2.boot_disk_type:
+            raise ValueError(
+                "Worker disk type must match the head node's disk type if"
+                " sharing the same machine_type, accelerator_type, and"
+                " accelerator_count"
+            )
+        if node_type_1.boot_disk_size_gb != node_type_2.boot_disk_size_gb:
+            raise ValueError(
+                "Worker disk size must match the head node's disk size if"
+                " sharing the same machine_type, accelerator_type, and"
+                " accelerator_count"
+            )
+        additional_replica_count = node_type_2.node_count
+        return additional_replica_count
+
+    return additional_replica_count