structure saas with tools

2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions
--- a/.venv/lib/python3.10/site-packages/vertex_ray/data.py
+++ b/.venv/lib/python3.10/site-packages/vertex_ray/data.py
@@ -0,0 +1,192 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import warnings
+import ray.data
+from ray.data.dataset import Dataset
+from typing import Any, Dict, Optional
+
+from google.cloud.aiplatform.vertex_ray.bigquery_datasource import (
+    _BigQueryDatasource,
+)
+
+try:
+    from google.cloud.aiplatform.vertex_ray.bigquery_datasink import (
+        _BigQueryDatasink,
+    )
+except ImportError:
+    _BigQueryDatasink = None
+
+from google.cloud.aiplatform.vertex_ray.util._validation_utils import (
+    _V2_4_WARNING_MESSAGE,
+    _V2_9_WARNING_MESSAGE,
+)
+
+
+def read_bigquery(
+    project_id: Optional[str] = None,
+    dataset: Optional[str] = None,
+    query: Optional[str] = None,
+    *,
+    parallelism: int = -1,
+    ray_remote_args: Dict[str, Any] = None,
+    concurrency: Optional[int] = None,
+    override_num_blocks: Optional[int] = None,
+) -> Dataset:
+    """Create a dataset from BigQuery.
+
+    The data to read from is specified via the ``project_id``, ``dataset``
+    and/or ``query`` parameters.
+
+    Args:
+        project_id: The name of the associated Google Cloud Project that hosts
+            the dataset to read.
+        dataset: The name of the dataset hosted in BigQuery in the format of
+            ``dataset_id.table_id``. Both the dataset_id and table_id must exist
+            otherwise an exception will be raised.
+        query: The query to execute. The dataset is created from the results of
+            executing the query if provided. Otherwise, the entire dataset is read.
+            For query syntax guidelines, see
+            https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax
+        parallelism: 2.33.0, 2.42.0: This argument is deprecated. Use
+            ``override_num_blocks`` argument. 2.9.3: The requested parallelism of
+            the read. If -1, it will be automatically chosen based on the available
+            cluster resources and estimated in-memory data size.
+        ray_remote_args: kwargs passed to ray.remote in the read tasks.
+        concurrency: Supported for 2.33.0 and 2.42.0 only: The maximum number of
+            Ray tasks to run concurrently. Set this to control number of tasks to
+            run concurrently. This doesn't change the total number of tasks run or
+            the total number of output blocks. By default, concurrency is
+            dynamically decided based on the available resources.
+        override_num_blocks: Supported for 2.33.0 and 2.42.0 only: Override the
+            number of output blocks from all read tasks. By default, the number of
+            output blocks is dynamically decided based on input data size and
+            available resources. You shouldn't manually set this value in most
+            cases.
+
+    Returns:
+        Dataset producing rows from the results of executing the query
+        or reading the entire dataset on the specified BigQuery dataset.
+    """
+    datasource = _BigQueryDatasource(
+        project_id=project_id,
+        dataset=dataset,
+        query=query,
+    )
+
+    if ray.__version__ == "2.9.3":
+        warnings.warn(_V2_9_WARNING_MESSAGE, DeprecationWarning, stacklevel=1)
+        # Concurrency and override_num_blocks are not supported in 2.9.3
+        return ray.data.read_datasource(
+            datasource=datasource,
+            parallelism=parallelism,
+            ray_remote_args=ray_remote_args,
+        )
+    elif ray.__version__ in ("2.33.0", "2.42.0"):
+        return ray.data.read_datasource(
+            datasource=datasource,
+            parallelism=parallelism,
+            ray_remote_args=ray_remote_args,
+            concurrency=concurrency,
+            override_num_blocks=override_num_blocks,
+        )
+    else:
+        raise ImportError(
+            f"[Ray on Vertex AI]: Unsupported version {ray.__version__}."
+            + "Only 2.42.0, 2.33.0, and 2.9.3 are supported."
+        )
+
+
+def write_bigquery(
+    ds: Dataset,
+    project_id: Optional[str] = None,
+    dataset: Optional[str] = None,
+    max_retry_cnt: int = 10,
+    ray_remote_args: Dict[str, Any] = None,
+    overwrite_table: Optional[bool] = True,
+    concurrency: Optional[int] = None,
+) -> Any:
+    """Write the dataset to a BigQuery dataset table.
+
+    Args:
+        ds: The dataset to write.
+        project_id: The name of the associated Google Cloud Project that hosts
+            the dataset table to write to.
+        dataset: The name of the dataset table hosted in BigQuery in the format of
+            ``dataset_id.table_id``.
+            The dataset table is created if it doesn't already exist.
+            In 2.9.3, the table_id is overwritten if it exists.
+        max_retry_cnt: The maximum number of retries that an individual block write
+            is retried due to BigQuery rate limiting errors.
+            The default number of retries is 10.
+        ray_remote_args: kwargs passed to ray.remote in the write tasks.
+        overwrite_table: Not supported in 2.9.3.
+            2.33.0, 2.42.0: Whether the write will overwrite the table if it already
+            exists. The default behavior is to overwrite the table.
+            If false, will append to the table if it exists.
+        concurrency: Not supported in 2.9.3.
+            2.33.0, 2.42.0: The maximum number of Ray tasks to run concurrently. Set this
+            to control number of tasks to run concurrently. This doesn't change the
+            total number of tasks run or the total number of output blocks. By default,
+            concurrency is dynamically decided based on the available resources.
+    """
+    if ray.__version__ == "2.4.0":
+        raise RuntimeError(_V2_4_WARNING_MESSAGE)
+
+    elif ray.__version__ in ("2.9.3", "2.33.0", "2.42.0"):
+        if ray.__version__ == "2.9.3":
+            warnings.warn(_V2_9_WARNING_MESSAGE, DeprecationWarning, stacklevel=1)
+        if ray_remote_args is None:
+            ray_remote_args = {}
+
+        # Each write task will launch individual remote tasks to write each block
+        # To avoid duplicate block writes, the write task should not be retried
+        if ray_remote_args.get("max_retries", 0) != 0:
+            print(
+                "[Ray on Vertex AI]: The max_retries of a BigQuery Write "
+                "Task should be set to 0 to avoid duplicate writes."
+            )
+        else:
+            ray_remote_args["max_retries"] = 0
+
+        if ray.__version__ == "2.9.3":
+            # Concurrency and overwrite_table are not supported in 2.9.3
+            datasink = _BigQueryDatasink(
+                project_id=project_id,
+                dataset=dataset,
+                max_retry_cnt=max_retry_cnt,
+            )
+            return ds.write_datasink(
+                datasink=datasink,
+                ray_remote_args=ray_remote_args,
+            )
+        elif ray.__version__ in ("2.33.0", "2.42.0"):
+            datasink = _BigQueryDatasink(
+                project_id=project_id,
+                dataset=dataset,
+                max_retry_cnt=max_retry_cnt,
+                overwrite_table=overwrite_table,
+            )
+            return ds.write_datasink(
+                datasink=datasink,
+                ray_remote_args=ray_remote_args,
+                concurrency=concurrency,
+            )
+    else:
+        raise ImportError(
+            f"[Ray on Vertex AI]: Unsupported version {ray.__version__}."
+            + "Only 2.42.0, 2.33.0 and 2.9.3 are supported."
+        )