structure saas with tools

2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions
--- a/.venv/lib/python3.10/site-packages/vertexai/rag/utils/pycache/_gapic_utils.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/vertexai/rag/utils/pycache/_gapic_utils.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/vertexai/rag/utils/pycache/resources.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/vertexai/rag/utils/pycache/resources.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/vertexai/rag/utils/_gapic_utils.py
+++ b/.venv/lib/python3.10/site-packages/vertexai/rag/utils/_gapic_utils.py
@@ -0,0 +1,656 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import re
+from typing import Any, Dict, Optional, Sequence, Union
+from google.cloud.aiplatform_v1.types import api_auth
+from google.cloud.aiplatform_v1 import (
+    RagEmbeddingModelConfig as GapicRagEmbeddingModelConfig,
+    GoogleDriveSource,
+    ImportRagFilesConfig,
+    ImportRagFilesRequest,
+    RagFileChunkingConfig,
+    RagFileParsingConfig,
+    RagFileTransformationConfig,
+    RagCorpus as GapicRagCorpus,
+    RagFile as GapicRagFile,
+    SharePointSources as GapicSharePointSources,
+    SlackSource as GapicSlackSource,
+    JiraSource as GapicJiraSource,
+    RagVectorDbConfig as GapicRagVectorDbConfig,
+    VertexAiSearchConfig as GapicVertexAiSearchConfig,
+)
+from google.cloud.aiplatform import initializer
+from google.cloud.aiplatform.utils import (
+    VertexRagDataAsyncClientWithOverride,
+    VertexRagDataClientWithOverride,
+    VertexRagClientWithOverride,
+)
+from vertexai.rag.utils.resources import (
+    LayoutParserConfig,
+    Pinecone,
+    RagCorpus,
+    RagEmbeddingModelConfig,
+    RagFile,
+    RagManagedDb,
+    RagVectorDbConfig,
+    SharePointSources,
+    SlackChannelsSource,
+    TransformationConfig,
+    JiraSource,
+    VertexAiSearchConfig,
+    VertexVectorSearch,
+    VertexPredictionEndpoint,
+)
+
+
+_VALID_RESOURCE_NAME_REGEX = "[a-z][a-zA-Z0-9._-]{0,127}"
+_VALID_DOCUMENT_AI_PROCESSOR_NAME_REGEX = (
+    r"projects/[^/]+/locations/[^/]+/processors/[^/]+(?:/processorVersions/[^/]+)?"
+)
+
+
+def create_rag_data_service_client():
+    return initializer.global_config.create_client(
+        client_class=VertexRagDataClientWithOverride,
+    ).select_version("v1")
+
+
+def create_rag_data_service_async_client():
+    return initializer.global_config.create_client(
+        client_class=VertexRagDataAsyncClientWithOverride,
+    ).select_version("v1")
+
+
+def create_rag_service_client():
+    return initializer.global_config.create_client(
+        client_class=VertexRagClientWithOverride,
+    ).select_version("v1")
+
+
+def convert_gapic_to_rag_embedding_model_config(
+    gapic_embedding_model_config: GapicRagEmbeddingModelConfig,
+) -> RagEmbeddingModelConfig:
+    """Convert GapicRagEmbeddingModelConfig to RagEmbeddingModelConfig."""
+    embedding_model_config = RagEmbeddingModelConfig()
+    path = gapic_embedding_model_config.vertex_prediction_endpoint.endpoint
+    publisher_model = re.match(
+        r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/publishers/google/models/(?P<model_id>.+?)$",
+        path,
+    )
+    endpoint = re.match(
+        r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/endpoints/(?P<endpoint>.+?)$",
+        path,
+    )
+    if publisher_model:
+        embedding_model_config.vertex_prediction_endpoint = VertexPredictionEndpoint(
+            publisher_model=path
+        )
+    if endpoint:
+        embedding_model_config.vertex_prediction_endpoint = VertexPredictionEndpoint(
+            endpoint=path,
+            model=gapic_embedding_model_config.vertex_prediction_endpoint.model,
+            model_version_id=gapic_embedding_model_config.vertex_prediction_endpoint.model_version_id,
+        )
+    return embedding_model_config
+
+
+def _check_weaviate(gapic_vector_db: GapicRagVectorDbConfig) -> bool:
+    try:
+        return gapic_vector_db.__contains__("weaviate")
+    except AttributeError:
+        return gapic_vector_db.weaviate.ByteSize() > 0
+
+
+def _check_rag_managed_db(gapic_vector_db: GapicRagVectorDbConfig) -> bool:
+    try:
+        return gapic_vector_db.__contains__("rag_managed_db")
+    except AttributeError:
+        return gapic_vector_db.rag_managed_db.ByteSize() > 0
+
+
+def _check_vertex_feature_store(gapic_vector_db: GapicRagVectorDbConfig) -> bool:
+    try:
+        return gapic_vector_db.__contains__("vertex_feature_store")
+    except AttributeError:
+        return gapic_vector_db.vertex_feature_store.ByteSize() > 0
+
+
+def _check_pinecone(gapic_vector_db: GapicRagVectorDbConfig) -> bool:
+    try:
+        return gapic_vector_db.__contains__("pinecone")
+    except AttributeError:
+        return gapic_vector_db.pinecone.ByteSize() > 0
+
+
+def _check_vertex_vector_search(gapic_vector_db: GapicRagVectorDbConfig) -> bool:
+    try:
+        return gapic_vector_db.__contains__("vertex_vector_search")
+    except AttributeError:
+        return gapic_vector_db.vertex_vector_search.ByteSize() > 0
+
+
+def _check_rag_embedding_model_config(
+    gapic_vector_db: GapicRagVectorDbConfig,
+) -> bool:
+    try:
+        return gapic_vector_db.__contains__("rag_embedding_model_config")
+    except AttributeError:
+        return gapic_vector_db.rag_embedding_model_config.ByteSize() > 0
+
+
+def convert_gapic_to_backend_config(
+    gapic_vector_db: GapicRagVectorDbConfig,
+) -> RagVectorDbConfig:
+    """Convert Gapic RagVectorDbConfig to VertexVectorSearch, Pinecone, or RagManagedDb."""
+    vector_config = RagVectorDbConfig()
+    if _check_pinecone(gapic_vector_db):
+        vector_config.vector_db = Pinecone(
+            index_name=gapic_vector_db.pinecone.index_name,
+            api_key=gapic_vector_db.api_auth.api_key_config.api_key_secret_version,
+        )
+    elif _check_vertex_vector_search(gapic_vector_db):
+        vector_config.vector_db = VertexVectorSearch(
+            index_endpoint=gapic_vector_db.vertex_vector_search.index_endpoint,
+            index=gapic_vector_db.vertex_vector_search.index,
+        )
+    elif _check_rag_managed_db(gapic_vector_db):
+        vector_config.vector_db = RagManagedDb()
+    if _check_rag_embedding_model_config(gapic_vector_db):
+        vector_config.rag_embedding_model_config = (
+            convert_gapic_to_rag_embedding_model_config(
+                gapic_vector_db.rag_embedding_model_config
+            )
+        )
+    return vector_config
+
+
+def convert_gapic_to_vertex_ai_search_config(
+    gapic_vertex_ai_search_config: VertexAiSearchConfig,
+) -> VertexAiSearchConfig:
+    """Convert Gapic VertexAiSearchConfig to VertexAiSearchConfig."""
+    if gapic_vertex_ai_search_config.serving_config:
+        return VertexAiSearchConfig(
+            serving_config=gapic_vertex_ai_search_config.serving_config,
+        )
+    return None
+
+
+def convert_gapic_to_rag_corpus(gapic_rag_corpus: GapicRagCorpus) -> RagCorpus:
+    """Convert GapicRagCorpus to RagCorpus."""
+    rag_corpus = RagCorpus(
+        name=gapic_rag_corpus.name,
+        display_name=gapic_rag_corpus.display_name,
+        description=gapic_rag_corpus.description,
+        vertex_ai_search_config=convert_gapic_to_vertex_ai_search_config(
+            gapic_rag_corpus.vertex_ai_search_config
+        ),
+        backend_config=convert_gapic_to_backend_config(
+            gapic_rag_corpus.vector_db_config
+        ),
+    )
+    return rag_corpus
+
+
+def convert_gapic_to_rag_corpus_no_embedding_model_config(
+    gapic_rag_corpus: GapicRagCorpus,
+) -> RagCorpus:
+    """Convert GapicRagCorpus without embedding model config (for UpdateRagCorpus) to RagCorpus."""
+    rag_vector_db_config_no_embedding_model_config = gapic_rag_corpus.vector_db_config
+    rag_vector_db_config_no_embedding_model_config.rag_embedding_model_config = None
+    rag_corpus = RagCorpus(
+        name=gapic_rag_corpus.name,
+        display_name=gapic_rag_corpus.display_name,
+        description=gapic_rag_corpus.description,
+        vertex_ai_search_config=convert_gapic_to_vertex_ai_search_config(
+            gapic_rag_corpus.vertex_ai_search_config
+        ),
+        backend_config=convert_gapic_to_backend_config(
+            rag_vector_db_config_no_embedding_model_config
+        ),
+    )
+    return rag_corpus
+
+
+def convert_gapic_to_rag_file(gapic_rag_file: GapicRagFile) -> RagFile:
+    """Convert GapicRagFile to RagFile."""
+    rag_file = RagFile(
+        name=gapic_rag_file.name,
+        display_name=gapic_rag_file.display_name,
+        description=gapic_rag_file.description,
+    )
+    return rag_file
+
+
+def convert_json_to_rag_file(upload_rag_file_response: Dict[str, Any]) -> RagFile:
+    """Converts a JSON response to a RagFile."""
+    rag_file = RagFile(
+        name=upload_rag_file_response.get("ragFile").get("name"),
+        display_name=upload_rag_file_response.get("ragFile").get("displayName"),
+        description=upload_rag_file_response.get("ragFile").get("description"),
+    )
+    return rag_file
+
+
+def convert_path_to_resource_id(
+    path: str,
+) -> Union[str, GoogleDriveSource.ResourceId]:
+    """Converts a path to a Google Cloud storage uri or GoogleDriveSource.ResourceId."""
+    if path.startswith("gs://"):
+        # Google Cloud Storage source
+        return path
+    elif path.startswith("https://drive.google.com/"):
+        # Google Drive source
+        path_list = path.split("/")
+        if "file" in path_list:
+            index = path_list.index("file") + 2
+            resource_id = path_list[index].split("?")[0]
+            resource_type = GoogleDriveSource.ResourceId.ResourceType.RESOURCE_TYPE_FILE
+        elif "folders" in path_list:
+            index = path_list.index("folders") + 1
+            resource_id = path_list[index].split("?")[0]
+            resource_type = (
+                GoogleDriveSource.ResourceId.ResourceType.RESOURCE_TYPE_FOLDER
+            )
+        else:
+            raise ValueError("path %s is not a valid Google Drive url.", path)
+
+        return GoogleDriveSource.ResourceId(
+            resource_id=resource_id,
+            resource_type=resource_type,
+        )
+    else:
+        raise ValueError(
+            "path must be a Google Cloud Storage uri or a Google Drive url."
+        )
+
+
+def convert_source_for_rag_import(
+    source: Union[SlackChannelsSource, JiraSource, SharePointSources]
+) -> Union[GapicSlackSource, GapicJiraSource]:
+    """Converts a SlackChannelsSource or JiraSource to a GapicSlackSource or GapicJiraSource."""
+    if isinstance(source, SlackChannelsSource):
+        result_source_channels = []
+        for channel in source.channels:
+            api_key = channel.api_key
+            cid = channel.channel_id
+            start_time = channel.start_time
+            end_time = channel.end_time
+            result_channels = GapicSlackSource.SlackChannels(
+                channels=[
+                    GapicSlackSource.SlackChannels.SlackChannel(
+                        channel_id=cid,
+                        start_time=start_time,
+                        end_time=end_time,
+                    )
+                ],
+                api_key_config=api_auth.ApiAuth.ApiKeyConfig(
+                    api_key_secret_version=api_key
+                ),
+            )
+            result_source_channels.append(result_channels)
+        return GapicSlackSource(
+            channels=result_source_channels,
+        )
+    elif isinstance(source, JiraSource):
+        result_source_queries = []
+        for query in source.queries:
+            api_key = query.api_key
+            custom_queries = query.custom_queries
+            projects = query.jira_projects
+            email = query.email
+            server_uri = query.server_uri
+            result_query = GapicJiraSource.JiraQueries(
+                custom_queries=custom_queries,
+                projects=projects,
+                email=email,
+                server_uri=server_uri,
+                api_key_config=api_auth.ApiAuth.ApiKeyConfig(
+                    api_key_secret_version=api_key
+                ),
+            )
+            result_source_queries.append(result_query)
+        return GapicJiraSource(
+            jira_queries=result_source_queries,
+        )
+    elif isinstance(source, SharePointSources):
+        result_source_share_point_sources = []
+        for share_point_source in source.share_point_sources:
+            sharepoint_folder_path = share_point_source.sharepoint_folder_path
+            sharepoint_folder_id = share_point_source.sharepoint_folder_id
+            drive_name = share_point_source.drive_name
+            drive_id = share_point_source.drive_id
+            client_id = share_point_source.client_id
+            client_secret = share_point_source.client_secret
+            tenant_id = share_point_source.tenant_id
+            sharepoint_site_name = share_point_source.sharepoint_site_name
+            result_share_point_source = GapicSharePointSources.SharePointSource(
+                client_id=client_id,
+                client_secret=api_auth.ApiAuth.ApiKeyConfig(
+                    api_key_secret_version=client_secret
+                ),
+                tenant_id=tenant_id,
+                sharepoint_site_name=sharepoint_site_name,
+            )
+            if sharepoint_folder_path is not None and sharepoint_folder_id is not None:
+                raise ValueError(
+                    "sharepoint_folder_path and sharepoint_folder_id cannot both be set."
+                )
+            elif sharepoint_folder_path is not None:
+                result_share_point_source.sharepoint_folder_path = (
+                    sharepoint_folder_path
+                )
+            elif sharepoint_folder_id is not None:
+                result_share_point_source.sharepoint_folder_id = sharepoint_folder_id
+            if drive_name is not None and drive_id is not None:
+                raise ValueError("drive_name and drive_id cannot both be set.")
+            elif drive_name is not None:
+                result_share_point_source.drive_name = drive_name
+            elif drive_id is not None:
+                result_share_point_source.drive_id = drive_id
+            else:
+                raise ValueError("Either drive_name and drive_id must be set.")
+            result_source_share_point_sources.append(result_share_point_source)
+        return GapicSharePointSources(
+            share_point_sources=result_source_share_point_sources,
+        )
+    else:
+        raise TypeError(
+            "source must be a SlackChannelsSource or JiraSource or SharePointSources."
+        )
+
+
+def prepare_import_files_request(
+    corpus_name: str,
+    paths: Optional[Sequence[str]] = None,
+    source: Optional[Union[SlackChannelsSource, JiraSource, SharePointSources]] = None,
+    transformation_config: Optional[TransformationConfig] = None,
+    max_embedding_requests_per_min: int = 1000,
+    import_result_sink: Optional[str] = None,
+    partial_failures_sink: Optional[str] = None,
+    parser: Optional[LayoutParserConfig] = None,
+) -> ImportRagFilesRequest:
+    if len(corpus_name.split("/")) != 6:
+        raise ValueError(
+            "corpus_name must be of the format `projects/{project}/locations/{location}/ragCorpora/{rag_corpus}`"
+        )
+
+    rag_file_parsing_config = RagFileParsingConfig()
+    if parser is not None:
+        if (
+            re.fullmatch(_VALID_DOCUMENT_AI_PROCESSOR_NAME_REGEX, parser.processor_name)
+            is None
+        ):
+            raise ValueError(
+                "processor_name must be of the format "
+                "`projects/{project_id}/locations/{location}/processors/{processor_id}`"
+                "or "
+                "`projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`, "
+                f"got {parser.processor_name!r}"
+            )
+        rag_file_parsing_config.layout_parser = RagFileParsingConfig.LayoutParser(
+            processor_name=parser.processor_name,
+            max_parsing_requests_per_min=parser.max_parsing_requests_per_min,
+        )
+
+    chunk_size = 1024
+    chunk_overlap = 200
+    if transformation_config and transformation_config.chunking_config:
+        chunk_size = transformation_config.chunking_config.chunk_size
+        chunk_overlap = transformation_config.chunking_config.chunk_overlap
+
+    rag_file_transformation_config = RagFileTransformationConfig(
+        rag_file_chunking_config=RagFileChunkingConfig(
+            fixed_length_chunking=RagFileChunkingConfig.FixedLengthChunking(
+                chunk_size=chunk_size,
+                chunk_overlap=chunk_overlap,
+            ),
+        ),
+    )
+
+    import_rag_files_config = ImportRagFilesConfig(
+        rag_file_transformation_config=rag_file_transformation_config,
+        rag_file_parsing_config=rag_file_parsing_config,
+        max_embedding_requests_per_min=max_embedding_requests_per_min,
+    )
+
+    import_result_sink = import_result_sink or partial_failures_sink
+
+    if import_result_sink is not None:
+        if import_result_sink.startswith("gs://"):
+            import_rag_files_config.partial_failure_gcs_sink.output_uri_prefix = (
+                import_result_sink
+            )
+        elif import_result_sink.startswith("bq://"):
+            import_rag_files_config.partial_failure_bigquery_sink.output_uri = (
+                import_result_sink
+            )
+        else:
+            raise ValueError(
+                "import_result_sink must be a GCS path or a BigQuery table."
+            )
+
+    if source is not None:
+        gapic_source = convert_source_for_rag_import(source)
+        if isinstance(gapic_source, GapicSlackSource):
+            import_rag_files_config.slack_source = gapic_source
+        if isinstance(gapic_source, GapicJiraSource):
+            import_rag_files_config.jira_source = gapic_source
+        if isinstance(gapic_source, GapicSharePointSources):
+            import_rag_files_config.share_point_sources = gapic_source
+    else:
+        uris = []
+        resource_ids = []
+        for p in paths:
+            output = convert_path_to_resource_id(p)
+            if isinstance(output, str):
+                uris.append(p)
+            else:
+                resource_ids.append(output)
+        if uris:
+            import_rag_files_config.gcs_source.uris = uris
+        if resource_ids:
+            google_drive_source = GoogleDriveSource(
+                resource_ids=resource_ids,
+            )
+            import_rag_files_config.google_drive_source = google_drive_source
+
+    request = ImportRagFilesRequest(
+        parent=corpus_name, import_rag_files_config=import_rag_files_config
+    )
+    return request
+
+
+def get_corpus_name(
+    name: str,
+) -> str:
+    if name:
+        client = create_rag_data_service_client()
+        if client.parse_rag_corpus_path(name):
+            return name
+        elif re.match("^{}$".format(_VALID_RESOURCE_NAME_REGEX), name):
+            return client.rag_corpus_path(
+                project=initializer.global_config.project,
+                location=initializer.global_config.location,
+                rag_corpus=name,
+            )
+        else:
+            raise ValueError(
+                "name must be of the format `projects/{project}/locations/{location}/ragCorpora/{rag_corpus}` or `{rag_corpus}`"
+            )
+    return name
+
+
+def get_file_name(
+    name: str,
+    corpus_name: str,
+) -> str:
+    client = create_rag_data_service_client()
+    if client.parse_rag_file_path(name):
+        return name
+    elif re.match("^{}$".format(_VALID_RESOURCE_NAME_REGEX), name):
+        if not corpus_name:
+            raise ValueError(
+                "corpus_name must be provided if name is a `{rag_file}`, not a "
+                "full resource name (`projects/{project}/locations/{location}/ragCorpora/{rag_corpus}/ragFiles/{rag_file}`). "
+            )
+        return client.rag_file_path(
+            project=initializer.global_config.project,
+            location=initializer.global_config.location,
+            rag_corpus=get_corpus_name(corpus_name),
+            rag_file=name,
+        )
+    else:
+        raise ValueError(
+            "name must be of the format `projects/{project}/locations/{location}/ragCorpora/{rag_corpus}/ragFiles/{rag_file}` or `{rag_file}`"
+        )
+
+
+def set_embedding_model_config(
+    embedding_model_config: RagEmbeddingModelConfig,
+    rag_corpus: GapicRagCorpus,
+) -> None:
+    if embedding_model_config.vertex_prediction_endpoint is None:
+        return
+    if (
+        embedding_model_config.vertex_prediction_endpoint.publisher_model
+        and embedding_model_config.vertex_prediction_endpoint.endpoint
+    ):
+        raise ValueError("publisher_model and endpoint cannot be set at the same time.")
+    if (
+        not embedding_model_config.vertex_prediction_endpoint.publisher_model
+        and not embedding_model_config.vertex_prediction_endpoint.endpoint
+    ):
+        raise ValueError("At least one of publisher_model and endpoint must be set.")
+    parent = initializer.global_config.common_location_path(project=None, location=None)
+
+    if embedding_model_config.vertex_prediction_endpoint.publisher_model:
+        publisher_model = (
+            embedding_model_config.vertex_prediction_endpoint.publisher_model
+        )
+        full_resource_name = re.match(
+            r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/publishers/google/models/(?P<model_id>.+?)$",
+            publisher_model,
+        )
+        resource_name = re.match(
+            r"^publishers/google/models/(?P<model_id>.+?)$",
+            publisher_model,
+        )
+        if full_resource_name:
+            rag_corpus.vector_db_config.rag_embedding_model_config.vertex_prediction_endpoint.endpoint = (
+                publisher_model
+            )
+        elif resource_name:
+            rag_corpus.vector_db_config.rag_embedding_model_config.vertex_prediction_endpoint.endpoint = (
+                parent + "/" + publisher_model
+            )
+        else:
+            raise ValueError(
+                "publisher_model must be of the format `projects/{project}/locations/{location}/publishers/google/models/{model_id}` or `publishers/google/models/{model_id}`"
+            )
+
+    if embedding_model_config.vertex_prediction_endpoint.endpoint:
+        endpoint = embedding_model_config.vertex_prediction_endpoint.endpoint
+        full_resource_name = re.match(
+            r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/endpoints/(?P<endpoint>.+?)$",
+            endpoint,
+        )
+        resource_name = re.match(
+            r"^endpoints/(?P<endpoint>.+?)$",
+            endpoint,
+        )
+        if full_resource_name:
+            rag_corpus.vector_db_config.rag_embedding_model_config.vertex_prediction_endpoint.endpoint = (
+                endpoint
+            )
+        elif resource_name:
+            rag_corpus.vector_db_config.rag_embedding_model_config.vertex_prediction_endpoint.endpoint = (
+                parent + "/" + endpoint
+            )
+        else:
+            raise ValueError(
+                "endpoint must be of the format `projects/{project}/locations/{location}/endpoints/{endpoint}` or `endpoints/{endpoint}`"
+            )
+
+
+def set_backend_config(
+    backend_config: Optional[
+        Union[
+            RagVectorDbConfig,
+            None,
+        ]
+    ],
+    rag_corpus: GapicRagCorpus,
+) -> None:
+    """Sets the vector db configuration for the rag corpus."""
+    if backend_config is None:
+        return
+
+    if backend_config.vector_db is not None:
+        vector_config = backend_config.vector_db
+        if vector_config is None or isinstance(vector_config, RagManagedDb):
+            rag_corpus.vector_db_config.rag_managed_db.CopyFrom(
+                GapicRagVectorDbConfig.RagManagedDb()
+            )
+        elif isinstance(vector_config, VertexVectorSearch):
+            index_endpoint = vector_config.index_endpoint
+            index = vector_config.index
+
+            rag_corpus.vector_db_config.vertex_vector_search.index_endpoint = (
+                index_endpoint
+            )
+            rag_corpus.vector_db_config.vertex_vector_search.index = index
+        elif isinstance(vector_config, Pinecone):
+            index_name = vector_config.index_name
+            api_key = vector_config.api_key
+
+            rag_corpus.vector_db_config.pinecone.index_name = index_name
+            rag_corpus.vector_db_config.api_auth.api_key_config.api_key_secret_version = (
+                api_key
+            )
+        else:
+            raise TypeError(
+                "backend_config must be a VertexFeatureStore,"
+                "RagManagedDb, or Pinecone."
+            )
+    if backend_config.rag_embedding_model_config:
+        set_embedding_model_config(
+            backend_config.rag_embedding_model_config, rag_corpus
+        )
+
+
+def set_vertex_ai_search_config(
+    vertex_ai_search_config: VertexAiSearchConfig,
+    rag_corpus: GapicRagCorpus,
+) -> None:
+    if not vertex_ai_search_config.serving_config:
+        raise ValueError("serving_config must be set.")
+    engine_resource_name = re.match(
+        r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/collections/(?P<collection>.+?)/engines/(?P<engine>.+?)/servingConfigs/(?P<serving_config>.+?)$",
+        vertex_ai_search_config.serving_config,
+    )
+    data_store_resource_name = re.match(
+        r"^projects/(?P<project>.+?)/locations/(?P<location>.+?)/collections/(?P<collection>.+?)/dataStores/(?P<data_store>.+?)/servingConfigs/(?P<serving_config>.+?)$",
+        vertex_ai_search_config.serving_config,
+    )
+    if engine_resource_name or data_store_resource_name:
+        rag_corpus.vertex_ai_search_config = GapicVertexAiSearchConfig(
+            serving_config=vertex_ai_search_config.serving_config,
+        )
+    else:
+        raise ValueError(
+            "serving_config must be of the format `projects/{project}/locations/{location}/collections/{collection}/engines/{engine}/servingConfigs/{serving_config}` or `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/servingConfigs/{serving_config}`"
+        )
--- a/.venv/lib/python3.10/site-packages/vertexai/rag/utils/resources.py
+++ b/.venv/lib/python3.10/site-packages/vertexai/rag/utils/resources.py
@@ -0,0 +1,447 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import dataclasses
+from typing import List, Optional, Sequence, Union
+
+from google.protobuf import timestamp_pb2
+
+
+@dataclasses.dataclass
+class RagFile:
+    """RAG file (output only).
+
+    Attributes:
+        name: Generated resource name. Format:
+            ``projects/{project}/locations/{location}/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file}``
+        display_name: Display name that was configured at client side.
+        description: The description of the RagFile.
+    """
+
+    name: Optional[str] = None
+    display_name: Optional[str] = None
+    description: Optional[str] = None
+
+
+@dataclasses.dataclass
+class VertexPredictionEndpoint:
+    """VertexPredictionEndpoint.
+
+    Attributes:
+        publisher_model: 1P publisher model resource name. Format:
+            ``publishers/google/models/{model}`` or
+            ``projects/{project}/locations/{location}/publishers/google/models/{model}``
+        endpoint: 1P fine tuned embedding model resource name. Format:
+            ``endpoints/{endpoint}`` or
+            ``projects/{project}/locations/{location}/endpoints/{endpoint}``.
+        model:
+            Output only. The resource name of the model that is deployed
+            on the endpoint. Present only when the endpoint is not a
+            publisher model. Pattern:
+            ``projects/{project}/locations/{location}/models/{model}``
+        model_version_id:
+            Output only. Version ID of the model that is
+            deployed on the endpoint. Present only when the
+            endpoint is not a publisher model.
+    """
+
+    endpoint: Optional[str] = None
+    publisher_model: Optional[str] = None
+    model: Optional[str] = None
+    model_version_id: Optional[str] = None
+
+
+@dataclasses.dataclass
+class RagEmbeddingModelConfig:
+    """RagEmbeddingModelConfig.
+
+    Attributes:
+        vertex_prediction_endpoint: The Vertex AI Prediction Endpoint resource
+            name. Format:
+            ``projects/{project}/locations/{location}/endpoints/{endpoint}``
+    """
+
+    vertex_prediction_endpoint: Optional[VertexPredictionEndpoint] = None
+
+
+@dataclasses.dataclass
+class Weaviate:
+    """Weaviate.
+
+    Attributes:
+        weaviate_http_endpoint: The Weaviate DB instance HTTP endpoint
+        collection_name: The corresponding Weaviate collection this corpus maps to
+        api_key: The SecretManager resource name for the Weaviate DB API token. Format:
+            ``projects/{project}/secrets/{secret}/versions/{version}``
+    """
+
+    weaviate_http_endpoint: Optional[str] = None
+    collection_name: Optional[str] = None
+    api_key: Optional[str] = None
+
+
+@dataclasses.dataclass
+class VertexFeatureStore:
+    """VertexFeatureStore.
+
+    Attributes:
+        resource_name: The resource name of the FeatureView. Format:
+            ``projects/{project}/locations/{location}/featureOnlineStores/
+              {feature_online_store}/featureViews/{feature_view}``
+    """
+
+    resource_name: Optional[str] = None
+
+
+@dataclasses.dataclass
+class VertexVectorSearch:
+    """VertexVectorSearch.
+
+    Attributes:
+        index_endpoint (str):
+            The resource name of the Index Endpoint. Format:
+            ``projects/{project}/locations/{location}/indexEndpoints/{index_endpoint}``
+        index (str):
+            The resource name of the Index. Format:
+            ``projects/{project}/locations/{location}/indexes/{index}``
+    """
+
+    index_endpoint: Optional[str] = None
+    index: Optional[str] = None
+
+
+@dataclasses.dataclass
+class RagManagedDb:
+    """RagManagedDb."""
+
+
+@dataclasses.dataclass
+class Pinecone:
+    """Pinecone.
+
+    Attributes:
+        index_name: The Pinecone index name.
+        api_key: The SecretManager resource name for the Pinecone DB API token. Format:
+            ``projects/{project}/secrets/{secret}/versions/{version}``
+    """
+
+    index_name: Optional[str] = None
+    api_key: Optional[str] = None
+
+
+@dataclasses.dataclass
+class VertexAiSearchConfig:
+    """VertexAiSearchConfig.
+
+    Attributes:
+        serving_config: The resource name of the Vertex AI Search serving config.
+            Format:
+                ``projects/{project}/locations/{location}/collections/{collection}/engines/{engine}/servingConfigs/{serving_config}``
+            or
+                ``projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/servingConfigs/{serving_config}``
+    """
+
+    serving_config: Optional[str] = None
+
+
+@dataclasses.dataclass
+class RagVectorDbConfig:
+    """RagVectorDbConfig.
+
+    Attributes:
+        vector_db: Can be one of the following: RagManagedDb, Pinecone,
+        VertexVectorSearch.
+        rag_embedding_model_config: The embedding model config of the Vector DB.
+    """
+
+    vector_db: Optional[
+        Union[
+            VertexVectorSearch,
+            Pinecone,
+            RagManagedDb,
+        ]
+    ] = None
+    rag_embedding_model_config: Optional[RagEmbeddingModelConfig] = None
+
+
+@dataclasses.dataclass
+class RagCorpus:
+    """RAG corpus(output only).
+
+    Attributes:
+        name: Generated resource name. Format:
+            ``projects/{project}/locations/{location}/ragCorpora/{rag_corpus_id}``
+        display_name: Display name that was configured at client side.
+        description: The description of the RagCorpus.
+        vertex_ai_search_config: The Vertex AI Search config of the RagCorpus.
+        backend_config: The backend config of the RagCorpus. It can be a data
+            store and/or retrieval engine.
+    """
+
+    name: Optional[str] = None
+    display_name: Optional[str] = None
+    description: Optional[str] = None
+    vertex_ai_search_config: Optional[VertexAiSearchConfig] = None
+    backend_config: Optional[
+        Union[
+            RagVectorDbConfig,
+            None,
+        ]
+    ] = None
+
+
+@dataclasses.dataclass
+class RagResource:
+    """RagResource.
+
+    The representation of the rag source. It can be used to specify corpus only
+    or ragfiles. Currently only support one corpus or multiple files from one
+    corpus. In the future we may open up multiple corpora support.
+
+    Attributes:
+        rag_corpus: A Rag corpus resource name or corpus id. Format:
+            ``projects/{project}/locations/{location}/ragCorpora/{rag_corpus_id}``
+            or ``{rag_corpus_id}``.
+        rag_files_id: List of Rag file resource name or file ids in the same corpus. Format:
+            ``{rag_file}``.
+    """
+
+    rag_corpus: Optional[str] = None
+    rag_file_ids: Optional[List[str]] = None
+
+
+@dataclasses.dataclass
+class SlackChannel:
+    """SlackChannel.
+
+    Attributes:
+        channel_id: The Slack channel ID.
+        api_key: The SecretManager resource name for the Slack API token. Format:
+            ``projects/{project}/secrets/{secret}/versions/{version}``
+            See: https://api.slack.com/tutorials/tracks/getting-a-token.
+        start_time: The starting timestamp for messages to import.
+        end_time: The ending timestamp for messages to import.
+    """
+
+    channel_id: str
+    api_key: str
+    start_time: Optional[timestamp_pb2.Timestamp] = None
+    end_time: Optional[timestamp_pb2.Timestamp] = None
+
+
+@dataclasses.dataclass
+class SlackChannelsSource:
+    """SlackChannelsSource.
+
+    Attributes:
+        channels: The Slack channels.
+    """
+
+    channels: Sequence[SlackChannel]
+
+
+@dataclasses.dataclass
+class JiraQuery:
+    """JiraQuery.
+
+    Attributes:
+        email: The Jira email address.
+        jira_projects: A list of Jira projects to import in their entirety.
+        custom_queries: A list of custom JQL Jira queries to import.
+        api_key: The SecretManager version resource name for Jira API access. Format:
+            ``projects/{project}/secrets/{secret}/versions/{version}``
+            See: https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/
+        server_uri: The Jira server URI. Format:
+            ``{server}.atlassian.net``
+    """
+
+    email: str
+    jira_projects: Sequence[str]
+    custom_queries: Sequence[str]
+    api_key: str
+    server_uri: str
+
+
+@dataclasses.dataclass
+class JiraSource:
+    """JiraSource.
+
+    Attributes:
+        queries: The Jira queries.
+    """
+
+    queries: Sequence[JiraQuery]
+
+
+@dataclasses.dataclass
+class SharePointSource:
+    """SharePointSource.
+
+    Attributes:
+        sharepoint_folder_path: The path of the SharePoint folder to download
+            from.
+        sharepoint_folder_id: The ID of the SharePoint folder to download
+            from.
+        drive_name: The name of the drive to download from.
+        drive_id: The ID of the drive to download from.
+        client_id: The Application ID for the app registered in
+            Microsoft Azure Portal. The application must
+            also be configured with MS Graph permissions
+            "Files.ReadAll", "Sites.ReadAll" and
+            BrowserSiteLists.Read.All.
+        client_secret: The application secret for the app registered
+            in Azure.
+        tenant_id: Unique identifier of the Azure Active
+            Directory Instance.
+        sharepoint_site_name: The name of the SharePoint site to download
+            from. This can be the site name or the site id.
+    """
+
+    sharepoint_folder_path: Optional[str] = None
+    sharepoint_folder_id: Optional[str] = None
+    drive_name: Optional[str] = None
+    drive_id: Optional[str] = None
+    client_id: str = None
+    client_secret: str = None
+    tenant_id: str = None
+    sharepoint_site_name: str = None
+
+
+@dataclasses.dataclass
+class SharePointSources:
+    """SharePointSources.
+
+    Attributes:
+        share_point_sources: The SharePoint sources.
+    """
+
+    share_point_sources: Sequence[SharePointSource]
+
+
+@dataclasses.dataclass
+class Filter:
+    """Filter.
+
+    Attributes:
+        vector_distance_threshold: Only returns contexts with vector
+            distance smaller than the threshold.
+        vector_similarity_threshold: Only returns contexts with vector
+            similarity larger than the threshold.
+        metadata_filter: String for metadata filtering.
+    """
+
+    vector_distance_threshold: Optional[float] = None
+    vector_similarity_threshold: Optional[float] = None
+    metadata_filter: Optional[str] = None
+
+
+@dataclasses.dataclass
+class LlmRanker:
+    """LlmRanker.
+
+    Attributes:
+        model_name: The model name used for ranking. Only Gemini models are
+            supported for now.
+    """
+
+    model_name: Optional[str] = None
+
+
+@dataclasses.dataclass
+class RankService:
+    """RankService.
+
+    Attributes:
+        model_name: The model name of the rank service. Format:
+            ``semantic-ranker-512@latest``
+    """
+
+    model_name: Optional[str] = None
+
+
+@dataclasses.dataclass
+class Ranking:
+    """Ranking.
+
+    Attributes:
+        rank_service: Config for Rank Service.
+        llm_ranker: Config for LlmRanker.
+    """
+
+    rank_service: Optional[RankService] = None
+    llm_ranker: Optional[LlmRanker] = None
+
+
+@dataclasses.dataclass
+class RagRetrievalConfig:
+    """RagRetrievalConfig.
+
+    Attributes:
+        top_k: The number of contexts to retrieve.
+        filter: Config for filters.
+        ranking: Config for ranking.
+    """
+
+    top_k: Optional[int] = None
+    filter: Optional[Filter] = None
+    ranking: Optional[Ranking] = None
+
+
+@dataclasses.dataclass
+class ChunkingConfig:
+    """ChunkingConfig.
+
+    Attributes:
+        chunk_size: The size of each chunk.
+        chunk_overlap: The size of the overlap between chunks.
+    """
+
+    chunk_size: int
+    chunk_overlap: int
+
+
+@dataclasses.dataclass
+class TransformationConfig:
+    """TransformationConfig.
+
+    Attributes:
+        chunking_config: The chunking config.
+    """
+
+    chunking_config: Optional[ChunkingConfig] = None
+
+
+@dataclasses.dataclass
+class LayoutParserConfig:
+    """Configuration for the Document AI Layout Parser Processor.
+
+    Attributes:
+        processor_name: The full resource name of a Document AI processor or
+            processor version. The processor must have type
+            `LAYOUT_PARSER_PROCESSOR`.
+            Format must be one of the following:
+            -  `projects/{project_id}/locations/{location}/processors/{processor_id}`
+            -  `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
+        max_parsing_requests_per_min: The maximum number of requests the job is
+            allowed to make to the Document AI processor per minute. Consult
+            https://cloud.google.com/document-ai/quotas and the Quota page for
+            your project to set an appropriate value here. If unspecified, a
+            default value of 120 QPM will be used.
+    """
+
+    processor_name: str
+    max_parsing_requests_per_min: Optional[int] = None