structure saas with tools
This commit is contained in:
@@ -0,0 +1,172 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2024 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
"""Base classes for evaluation metrics."""
|
||||
|
||||
import abc
|
||||
from typing import Any, Callable, Dict, Literal, Union
|
||||
|
||||
from vertexai.evaluation import constants
|
||||
from vertexai.evaluation.metrics import (
|
||||
metric_prompt_template as metric_prompt_template_base,
|
||||
)
|
||||
|
||||
|
||||
class _Metric(abc.ABC):
|
||||
"""The abstract class for evaluation metric."""
|
||||
|
||||
def __init__(self, metric: str):
|
||||
self._metric = metric
|
||||
|
||||
def __str__(self):
|
||||
return self.metric_name
|
||||
|
||||
@property
|
||||
def metric_name(self) -> str:
|
||||
return self._metric
|
||||
|
||||
|
||||
class _ModelBasedMetric(_Metric):
|
||||
"""A Model-based Metric.
|
||||
|
||||
An evaluation metric that evaluates generative AI model responses with
|
||||
another generative model as a judge. This metric can be used to evaluate a
|
||||
single model, or two models side-by-side.
|
||||
|
||||
For more details on when to use model-based metrics, see
|
||||
[Evaluation methods and metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
metric: str,
|
||||
metric_prompt_template: Union[
|
||||
metric_prompt_template_base.PointwiseMetricPromptTemplate,
|
||||
metric_prompt_template_base.PairwiseMetricPromptTemplate,
|
||||
str,
|
||||
],
|
||||
):
|
||||
"""Initializes the model-based evaluation metric.
|
||||
|
||||
Args:
|
||||
metric: Generic model based metric name.
|
||||
metric_prompt_template: A metric prompt template for performing
|
||||
the model-based evaluation. A freeform string is also accepted.
|
||||
"""
|
||||
super().__init__(metric=metric)
|
||||
self._raw_metric_prompt_template = metric_prompt_template
|
||||
self.metric_prompt_template = str(metric_prompt_template)
|
||||
|
||||
|
||||
class CustomMetric(_Metric):
|
||||
"""The custom evaluation metric.
|
||||
|
||||
A fully-customized CustomMetric that can be used to evaluate a single model
|
||||
by defining a metric function for a computation-based metric. The
|
||||
CustomMetric is computed on the client-side using the user-defined metric
|
||||
function in SDK only, not by the Vertex Gen AI Evaluation Service.
|
||||
|
||||
Attributes:
|
||||
name: The name of the metric.
|
||||
metric_function: The user-defined evaluation function to compute a metric
|
||||
score. Must use the dataset row dictionary as the metric function
|
||||
input and return per-instance metric result as a dictionary output.
|
||||
The metric score must mapped to the name of the CustomMetric as key.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
metric_function: Callable[
|
||||
[Dict[str, Any]],
|
||||
Dict[str, Any],
|
||||
],
|
||||
):
|
||||
"""Initializes the evaluation metric."""
|
||||
super().__init__(name)
|
||||
self.name = name
|
||||
self.metric_function = metric_function
|
||||
|
||||
|
||||
class _AutomaticMetric(_Metric):
|
||||
"""An automatic metric that computes deterministic score based on reference.
|
||||
|
||||
An lexicon-based evaluation metric that evaluate a generative model's
|
||||
response on the given evaluation task with reference ground truth answers.
|
||||
It is a type of pointwise evaluation metric.
|
||||
|
||||
For more details on when to use automatic metrics, see
|
||||
[Evaluation methods and
|
||||
metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
metric: Literal[constants.Metric.ROUGE],
|
||||
):
|
||||
"""Initializes the automatic evaluation metric.
|
||||
|
||||
Args:
|
||||
metric: The automatic evaluation metric name.
|
||||
"""
|
||||
super().__init__(metric=metric)
|
||||
|
||||
|
||||
class _TranslationMetric(_Metric):
|
||||
"""A Translation Metric.
|
||||
|
||||
Evaluates a score for the given instance using an underlying machine
|
||||
learning model.
|
||||
For now, only COMET and MetricX are supported.
|
||||
|
||||
For more details on how to evaluate translation, see
|
||||
[Evaluation a translation
|
||||
model](https://cloud.google.com/vertex-ai/generative-ai/docs/models/run-evaluation#translation).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
version: str,
|
||||
source_language: str,
|
||||
target_language: str,
|
||||
):
|
||||
"""Initializes the Translation metric.
|
||||
|
||||
Args:
|
||||
name: The name of the metric.
|
||||
version: The version to use for evaluation.
|
||||
source_language: The source language of the translation.
|
||||
target_language: The target language of the translation.
|
||||
"""
|
||||
self._version = version
|
||||
self._source_language = source_language
|
||||
self._target_language = target_language
|
||||
|
||||
super().__init__(metric=name)
|
||||
|
||||
@property
|
||||
def version(self) -> str:
|
||||
return self._version
|
||||
|
||||
@property
|
||||
def source_language(self) -> str:
|
||||
return self._source_language
|
||||
|
||||
@property
|
||||
def target_language(self) -> str:
|
||||
return self._target_language
|
||||
Reference in New Issue
Block a user