structure saas with tools
This commit is contained in:
@@ -0,0 +1,130 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
"""Base classes for evaluation."""
|
||||
|
||||
|
||||
import dataclasses
|
||||
from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING
|
||||
|
||||
from google.cloud.aiplatform_v1beta1.services import (
|
||||
evaluation_service as gapic_evaluation_services,
|
||||
)
|
||||
from google.cloud.aiplatform_v1beta1.types import (
|
||||
evaluation_service as gapic_eval_service_types,
|
||||
)
|
||||
from vertexai.preview.evaluation.metrics import (
|
||||
_base as metrics_base,
|
||||
)
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import pandas as pd
|
||||
|
||||
AutoraterConfig = gapic_eval_service_types.AutoraterConfig
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class EvaluationRunConfig:
|
||||
"""Evaluation Run Configurations.
|
||||
|
||||
Attributes:
|
||||
dataset: The dataset to evaluate.
|
||||
metrics: The list of metric names, or Metric instances to evaluate.
|
||||
metric_column_mapping: An optional dictionary column mapping that overrides
|
||||
the metric prompt template input variable names with mapped the evaluation
|
||||
dataset column names, used during evaluation. For example, if the
|
||||
input_variables of the metric prompt template are ["context",
|
||||
"reference"], the metric_column_mapping can be { "context":
|
||||
"news_context", "reference": "ground_truth", "response":
|
||||
"model_1_response" } if the dataset has columns "news_context",
|
||||
"ground_truth" and "model_1_response".
|
||||
client: The evaluation service client.
|
||||
evaluation_service_qps: The custom QPS limit for the evaluation service.
|
||||
retry_timeout: How long to keep retrying the evaluation requests, in
|
||||
seconds.
|
||||
autorater_config: The autorater config for model based evaluation.
|
||||
"""
|
||||
|
||||
dataset: "pd.DataFrame"
|
||||
metrics: List[Union[str, metrics_base._Metric]]
|
||||
metric_column_mapping: Dict[str, str]
|
||||
client: gapic_evaluation_services.EvaluationServiceClient
|
||||
evaluation_service_qps: float
|
||||
retry_timeout: float
|
||||
autorater_config: Optional[AutoraterConfig] = None
|
||||
|
||||
def validate_dataset_column(self, column_name: str) -> None:
|
||||
"""Validates that the column names in the column map are in the dataset.
|
||||
|
||||
Args:
|
||||
column_name: The column name to validate.
|
||||
|
||||
Raises:
|
||||
KeyError: If any of the column names are not in the dataset.
|
||||
"""
|
||||
if (
|
||||
self.metric_column_mapping.get(column_name, column_name)
|
||||
not in self.dataset.columns
|
||||
):
|
||||
raise KeyError(
|
||||
"Required column"
|
||||
f" `{self.metric_column_mapping.get(column_name, column_name)}` not"
|
||||
" found in the evaluation dataset. The columns in the evaluation"
|
||||
f" dataset are {list(self.dataset.columns)}."
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class EvalResult:
|
||||
"""Evaluation result.
|
||||
|
||||
Attributes:
|
||||
summary_metrics: A dictionary of summary evaluation metrics for an
|
||||
evaluation run.
|
||||
metrics_table: A pandas.DataFrame table containing evaluation dataset
|
||||
inputs, predictions, explanations, and metric results per row.
|
||||
metadata: The metadata for the evaluation run.
|
||||
"""
|
||||
|
||||
summary_metrics: Dict[str, float]
|
||||
metrics_table: Optional["pd.DataFrame"] = None
|
||||
metadata: Optional[Dict[str, str]] = None
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class AutoraterEvalResult:
|
||||
"""Evaluation result for autorater evaluation."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
eval_result: Optional[List[Dict[str, Any]]],
|
||||
eval_dataset_metadata: Optional[Dict[str, Any]],
|
||||
autorater_config: Optional[AutoraterConfig],
|
||||
**kwargs,
|
||||
):
|
||||
"""Initializes an AutoraterEvalResult.
|
||||
|
||||
Args:
|
||||
eval_result: Evaluation result from an evaluation run.
|
||||
eval_dataset_metadata: Evaluation dataset metadata.
|
||||
autorater_config: Autorater configuration.
|
||||
**kwargs: Additional arguments added to AutoraterEvalResult.
|
||||
"""
|
||||
self.eval_result = eval_result
|
||||
self.eval_dataset_metadata = eval_dataset_metadata
|
||||
self.autorater_config = autorater_config
|
||||
self.__dict__.update(kwargs)
|
||||
Reference in New Issue
Block a user