evo-ai/.venv/lib/python3.10/site-packages/vertexai/evaluation/metrics/_base.py

# -*- coding: utf-8 -*-

# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Base classes for evaluation metrics."""

import abc
from typing import Any, Callable, Dict, Literal, Union

from vertexai.evaluation import constants
from vertexai.evaluation.metrics import (
    metric_prompt_template as metric_prompt_template_base,
)


class _Metric(abc.ABC):
    """The abstract class for evaluation metric."""

    def __init__(self, metric: str):
        self._metric = metric

    def __str__(self):
        return self.metric_name

    @property
    def metric_name(self) -> str:
        return self._metric


class _ModelBasedMetric(_Metric):
    """A Model-based Metric.

    An evaluation metric that evaluates generative AI model responses with
    another generative model as a judge. This metric can be used to evaluate a
    single model, or two models side-by-side.

    For more details on when to use model-based metrics, see
    [Evaluation methods and metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval).
    """

    def __init__(
        self,
        *,
        metric: str,
        metric_prompt_template: Union[
            metric_prompt_template_base.PointwiseMetricPromptTemplate,
            metric_prompt_template_base.PairwiseMetricPromptTemplate,
            str,
        ],
    ):
        """Initializes the model-based evaluation metric.

        Args:
          metric: Generic model based metric name.
          metric_prompt_template: A metric prompt template for performing
            the model-based evaluation. A freeform string is also accepted.
        """
        super().__init__(metric=metric)
        self._raw_metric_prompt_template = metric_prompt_template
        self.metric_prompt_template = str(metric_prompt_template)


class CustomMetric(_Metric):
    """The custom evaluation metric.

    A fully-customized CustomMetric that can be used to evaluate a single model
    by defining a metric function for a computation-based metric. The
    CustomMetric is computed on the client-side using the user-defined metric
    function in SDK only, not by the Vertex Gen AI Evaluation Service.

      Attributes:
        name: The name of the metric.
        metric_function: The user-defined evaluation function to compute a metric
          score. Must use the dataset row dictionary as the metric function
          input and return per-instance metric result as a dictionary output.
          The metric score must mapped to the name of the CustomMetric as key.
    """

    def __init__(
        self,
        name: str,
        metric_function: Callable[
            [Dict[str, Any]],
            Dict[str, Any],
        ],
    ):
        """Initializes the evaluation metric."""
        super().__init__(name)
        self.name = name
        self.metric_function = metric_function


class _AutomaticMetric(_Metric):
    """An automatic metric that computes deterministic score based on reference.

    An lexicon-based evaluation metric that evaluate a generative model's
    response on the given evaluation task with reference ground truth answers.
    It is a type of pointwise evaluation metric.

    For more details on when to use automatic metrics, see
    [Evaluation methods and
    metrics](https://cloud.google.com/vertex-ai/generative-ai/docs/models/determine-eval).
    """

    def __init__(
        self,
        metric: Literal[constants.Metric.ROUGE],
    ):
        """Initializes the automatic evaluation metric.

        Args:
          metric: The automatic evaluation metric name.
        """
        super().__init__(metric=metric)


class _TranslationMetric(_Metric):
    """A Translation Metric.

    Evaluates a score for the given instance using an underlying machine
    learning model.
    For now, only COMET and MetricX are supported.

    For more details on how to evaluate translation, see
    [Evaluation a translation
    model](https://cloud.google.com/vertex-ai/generative-ai/docs/models/run-evaluation#translation).
    """

    def __init__(
        self,
        name: str,
        version: str,
        source_language: str,
        target_language: str,
    ):
        """Initializes the Translation metric.

        Args:
          name: The name of the metric.
          version: The version to use for evaluation.
          source_language: The source language of the translation.
          target_language: The target language of the translation.
        """
        self._version = version
        self._source_language = source_language
        self._target_language = target_language

        super().__init__(metric=name)

    @property
    def version(self) -> str:
        return self._version

    @property
    def source_language(self) -> str:
        return self._source_language

    @property
    def target_language(self) -> str:
        return self._target_language