evo-ai/.venv/lib/python3.10/site-packages/vertexai/preview/evaluation/metric_utils.py

# -*- coding: utf-8 -*-

# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Utility functions for metrics."""

import datetime
import io
from typing import Any, Dict, List, Optional, Union, Callable

from google.cloud.aiplatform import initializer
from google.cloud.aiplatform_v1beta1.types import (
    evaluation_service as gapic_eval_service_types,
)
from vertexai.preview.evaluation import metrics
from vertexai.preview.evaluation import prompt_template
from vertexai.preview.evaluation import utils
from vertexai.preview.evaluation.metrics import _schema
from vertexai.preview.evaluation.metrics import (
    custom_output_config,
)
from vertexai import generative_models
from vertexai.generative_models import _generative_models
import jsonschema
import ruamel.yaml
from ruamel.yaml import scalarstring


AutoraterConfig = gapic_eval_service_types.AutoraterConfig
GenerativeModel = generative_models.GenerativeModel
CustomOutputConfig = custom_output_config.CustomOutputConfig
PairwiseMetric = metrics.PairwiseMetric
PointwiseMetric = metrics.PointwiseMetric
RubricBasedMetric = metrics.RubricBasedMetric
RubricGenerationConfig = metrics.RubricGenerationConfig
PromptTemplate = prompt_template.PromptTemplate

# Initialize schema validator.
_schema = ruamel.yaml.YAML(typ="safe").load(_schema.AUTORATER_METRIC_SCHEMA)
_schema_validator = jsonschema.Draft202012Validator(schema=_schema)


def dump(
    metric: Union[PointwiseMetric, PairwiseMetric, RubricBasedMetric],
    file_path: str,
    version: Optional[str] = None,
):
    """Dumps a metric object to a YAML file.

    Args:
      metric: The metric to be dumped to a file.
      file_path: The path to the file. Local and GCS files are supported.
      version: Optional. The version of the metric. Defaults to the timestamp
        when the metric file is created.
    """
    yaml_data = dumps(metric, version)
    if file_path.startswith(utils._GCS_PREFIX):
        utils._upload_string_to_gcs(file_path, yaml_data)
    else:
        with open(file_path, "w") as f:
            f.write(yaml_data)


def dumps(
    metric: Union[PointwiseMetric, PairwiseMetric, RubricBasedMetric],
    version: Optional[str] = None,
) -> str:
    """Dumps a metric object to YAML data.

    Args:
      metric: The metric to be dumped to YAML data.
      version: Optional. The version of the metric. Defaults to the timestamp
        when the metric file is created.

    Returns:
      The YAML data of the metric.
    """
    steps = []
    metric_name = None
    if isinstance(metric, PointwiseMetric) or isinstance(metric, PairwiseMetric):
        metric_name = metric.metric_name
        steps.append(_dump_metric(metric))
    elif isinstance(metric, RubricBasedMetric):
        metric_name = metric.critique_metric.metric_name
        steps.append(_dump_rubric(metric.generation_config))
        steps.append(_dump_metric(metric.critique_metric))
    metadata = {
        "name": metric_name,
        "version": (
            datetime.datetime.now().strftime("%Y%m%d%H%M%S")
            if version is None
            else version
        ),
        "required_inputs": _parse_required_inputs(metric),
    }
    metric_config = {
        "metadata": metadata,
        "steps": steps,
    }
    yaml = ruamel.yaml.YAML()
    yaml.indent(sequence=4, offset=2)
    with io.StringIO() as s:
        yaml.dump(metric_config, s)
        return s.getvalue()


def _dump_metric(metric: Union[PointwiseMetric, PairwiseMetric]) -> Dict[str, Any]:
    """Dumps a metric object to autorater metric schema."""
    output_type = None
    if metric.custom_output_config and metric.custom_output_config.return_raw_output:
        output_type = "raw"
    step = {
        "type": (
            "pairwise_metric"
            if isinstance(metric, PairwiseMetric)
            else "pointwise_metric"
        ),
        "prompt": {
            "template": scalarstring.preserve_literal(metric.metric_prompt_template),
        },
    }
    if metric.system_instruction:
        step["prompt"]["system_instruction"] = metric.system_instruction
    if output_type:
        step["output"] = {
            "type": output_type,
        }
    if metric.autorater_config:
        step["model"] = {
            "model_name_or_endpoint": (metric.autorater_config.autorater_model),
        }
        options = {}
        if metric.autorater_config.flip_enabled:
            options["flip_enabled"] = metric.autorater_config.flip_enabled
        if metric.autorater_config.sampling_count:
            options["sample_count"] = metric.autorater_config.sampling_count
        if options:
            step["options"] = options
    return step


def _dump_rubric(generation_config: RubricGenerationConfig) -> Dict[str, Any]:
    """Dumps a rubric generation config to autorater metric schema."""
    # TODO: b/396217889 - add support for custom output.
    step = {
        "type": "rubric",
        "prompt": {
            "template": scalarstring.preserve_literal(
                generation_config.prompt_template
            ),
        },
    }
    if generation_config.model and isinstance(generation_config.model, GenerativeModel):
        step["model"] = {
            "model_name_or_endpoint": generation_config.model._model_name,
        }
    return step


def _parse_required_inputs(
    metric: Union[PointwiseMetric, PairwiseMetric, RubricBasedMetric],
) -> List[str]:
    """Parses required inputs from a metric object."""
    if isinstance(metric, PointwiseMetric) or isinstance(metric, PairwiseMetric):
        return list(PromptTemplate(metric.metric_prompt_template).variables)
    elif isinstance(metric, RubricBasedMetric):
        met = PromptTemplate(metric.critique_metric.metric_prompt_template).variables
        gen = PromptTemplate(metric.generation_config.prompt_template).variables
        return list(met.union(gen))
    else:
        raise ValueError(f"Unsupported metric type: {type(metric)}")


def load(
    file_path: str,
    baseline_model: Optional[Union[GenerativeModel, Callable[[str], str]]] = None,
) -> Union[PointwiseMetric, PairwiseMetric, RubricBasedMetric]:
    """Loads a metric object from a YAML file.

    Args:
      file_path: Path to the file containing the autorater metric configuration.
        Local and GCS files are supported.
      baseline_model: Optional. The baseline model to use for pairwise metrics.

    Returns:
      The metric object loaded from the file.
    """
    if file_path.startswith(utils._GCS_PREFIX):
        file_contents = utils._read_gcs_file_contents(file_path)
        return loads(file_contents, baseline_model)
    with open(file_path, "r") as f:
        return loads(f.read(), baseline_model)


def loads(
    yaml_data: str,
    baseline_model: Optional[Union[GenerativeModel, Callable[[str], str]]] = None,
) -> Union[PointwiseMetric, PairwiseMetric, RubricBasedMetric]:
    """Loads a metric object from YAML data.

    Args:
      yaml_data: YAML data containing the autorater metric configuration.
      baseline_model: Optional. The baseline model to use for pairwise metrics.

    Returns:
      The metric object loaded from the YAML data.
    """
    yaml = ruamel.yaml.YAML(typ="safe")
    yaml_obj = yaml.load(yaml_data)
    try:
        _schema_validator.validate(yaml_obj)
    except jsonschema.exceptions.ValidationError as e:
        raise ValueError(
            f"Invalid autorater metric config: {e.message} for {e.path.pop()}"
        ) from e
    metadata = yaml_obj["metadata"]
    steps = yaml_obj["steps"]
    required_inputs = set(metadata["required_inputs"])
    metric = None
    rubric = None
    for step in steps:
        _validate_template(step["prompt"]["template"], required_inputs)
        model_name = None
        flip = None
        sampling = None
        if "model" in step:
            model_name = _parse_model_name(step["model"]["model_name_or_endpoint"])
        if "options" in step:
            flip = step["options"].get("flip_enabled", False)
            sampling = step["options"].get("sample_count", 1)
        autorater = None
        if model_name:
            autorater = AutoraterConfig(
                autorater_model=model_name,
                flip_enabled=flip,
                sampling_count=sampling,
            )
        system_instruction = step["prompt"].get("system_instruction")
        custom_output = None
        if "output" in step and step["output"]["type"] == "raw":
            custom_output = CustomOutputConfig(return_raw_output=True)
        if step["type"] == "pointwise_metric":
            if metric is not None:
                raise ValueError("Only one metric step is supported.")
            if baseline_model:
                raise ValueError("Baseline model provided for pointwise metric.")
            metric = PointwiseMetric(
                metric=metadata["name"],
                metric_prompt_template=step["prompt"]["template"],
                system_instruction=system_instruction,
                autorater_config=autorater,
                custom_output_config=custom_output,
            )
        elif step["type"] == "pairwise_metric":
            if metric is not None:
                raise ValueError("Only one metric step is supported.")
            metric = PairwiseMetric(
                metric=metadata["name"],
                metric_prompt_template=step["prompt"]["template"],
                system_instruction=system_instruction,
                baseline_model=baseline_model,
                autorater_config=autorater,
                custom_output_config=custom_output,
            )
        elif step["type"] == "rubric":
            if rubric is not None:
                raise ValueError("Only one rubric step is supported.")
            model = None
            if model_name:
                model = generative_models.GenerativeModel(model_name=model_name)
            rubric = RubricGenerationConfig(
                prompt_template=step["prompt"]["template"],
                model=model,
            )
    if metric is None:
        raise ValueError("A metric step must be provided.")
    if rubric is not None:
        return RubricBasedMetric(
            generation_config=rubric,
            critique_metric=metric,
        )
    return metric


def _parse_model_name(model_name_or_endpoint: str) -> str:
    """Parses model name or endpoint.

    Args:
      model_name_or_endpoint: Model Garden model  name or tuned model endpoint
        resource name can be provided.

    Returns:
      The model resource name.
    """
    project = initializer.global_config.project
    location = initializer.global_config.location
    model_name = _generative_models._reconcile_model_name(
        model_name_or_endpoint, project, location
    )
    return _generative_models._get_resource_name_from_model_name(
        model_name, project, location
    )


def _validate_template(template: str, required_inputs: List[str]) -> None:
    """Validates the template contains only required inputs."""
    placeholders = PromptTemplate(template).variables
    if not placeholders.issubset(required_inputs):
        raise ValueError(
            "Template contains placeholders that are not in required inputs:"
            f" {placeholders - required_inputs}"
        )