Files
evo-ai/.venv/lib/python3.10/site-packages/vertexai/evaluation/metrics/_base.py
2025-04-25 15:30:54 -03:00

173 lines
5.3 KiB
Python

# -*- coding: utf-8 -*-
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Base classes for evaluation metrics."""
import abc
from typing import Any, Callable, Dict, Literal, Union
from vertexai.evaluation import constants
from vertexai.evaluation.metrics import (
metric_prompt_template as metric_prompt_template_base,
)
class _Metric(abc.ABC):
"""The abstract class for evaluation metric."""
def __init__(self, metric: str):
self._metric = metric
def __str__(self):
return self.metric_name
@property
def metric_name(self) -> str:
return self._metric
class _ModelBasedMetric(_Metric):
"""A Model-based Metric.
An evaluation metric that evaluates generative AI model responses with
another generative model as a judge. This metric can be used to evaluate a
single model, or two models side-by-side.
For more details on when to use model-based metrics, see
[Evaluation methods and metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval).
"""
def __init__(
self,
*,
metric: str,
metric_prompt_template: Union[
metric_prompt_template_base.PointwiseMetricPromptTemplate,
metric_prompt_template_base.PairwiseMetricPromptTemplate,
str,
],
):
"""Initializes the model-based evaluation metric.
Args:
metric: Generic model based metric name.
metric_prompt_template: A metric prompt template for performing
the model-based evaluation. A freeform string is also accepted.
"""
super().__init__(metric=metric)
self._raw_metric_prompt_template = metric_prompt_template
self.metric_prompt_template = str(metric_prompt_template)
class CustomMetric(_Metric):
"""The custom evaluation metric.
A fully-customized CustomMetric that can be used to evaluate a single model
by defining a metric function for a computation-based metric. The
CustomMetric is computed on the client-side using the user-defined metric
function in SDK only, not by the Vertex Gen AI Evaluation Service.
Attributes:
name: The name of the metric.
metric_function: The user-defined evaluation function to compute a metric
score. Must use the dataset row dictionary as the metric function
input and return per-instance metric result as a dictionary output.
The metric score must mapped to the name of the CustomMetric as key.
"""
def __init__(
self,
name: str,
metric_function: Callable[
[Dict[str, Any]],
Dict[str, Any],
],
):
"""Initializes the evaluation metric."""
super().__init__(name)
self.name = name
self.metric_function = metric_function
class _AutomaticMetric(_Metric):
"""An automatic metric that computes deterministic score based on reference.
An lexicon-based evaluation metric that evaluate a generative model's
response on the given evaluation task with reference ground truth answers.
It is a type of pointwise evaluation metric.
For more details on when to use automatic metrics, see
[Evaluation methods and
metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval).
"""
def __init__(
self,
metric: Literal[constants.Metric.ROUGE],
):
"""Initializes the automatic evaluation metric.
Args:
metric: The automatic evaluation metric name.
"""
super().__init__(metric=metric)
class _TranslationMetric(_Metric):
"""A Translation Metric.
Evaluates a score for the given instance using an underlying machine
learning model.
For now, only COMET and MetricX are supported.
For more details on how to evaluate translation, see
[Evaluation a translation
model](https://cloud.google.com/vertex-ai/generative-ai/docs/models/run-evaluation#translation).
"""
def __init__(
self,
name: str,
version: str,
source_language: str,
target_language: str,
):
"""Initializes the Translation metric.
Args:
name: The name of the metric.
version: The version to use for evaluation.
source_language: The source language of the translation.
target_language: The target language of the translation.
"""
self._version = version
self._source_language = source_language
self._target_language = target_language
super().__init__(metric=name)
@property
def version(self) -> str:
return self._version
@property
def source_language(self) -> str:
return self._source_language
@property
def target_language(self) -> str:
return self._target_language