feat: Added implementation for get_eval_case, update_eval_case and delete_eval_case for the local eval sets manager.

PiperOrigin-RevId: 766383391
This commit is contained in:
Ankur Sharma 2025-06-02 16:13:57 -07:00 committed by Copybara-Service
parent da4bc0efc0
commit a7575e078a
3 changed files with 197 additions and 10 deletions

View File

@ -57,6 +57,7 @@ from ..agents.llm_agent import Agent
from ..agents.run_config import StreamingMode
from ..artifacts.gcs_artifact_service import GcsArtifactService
from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
from ..errors.not_found_error import NotFoundError
from ..evaluation.eval_case import EvalCase
from ..evaluation.eval_case import SessionInput
from ..evaluation.eval_metrics import EvalMetric
@ -487,8 +488,66 @@ def get_fast_api_app(
"""Lists all evals in an eval set."""
eval_set_data = eval_sets_manager.get_eval_set(app_name, eval_set_id)
if not eval_set_data:
raise HTTPException(
status_code=400, detail=f"Eval set `{eval_set_id}` not found."
)
return sorted([x.eval_id for x in eval_set_data.eval_cases])
@app.get(
"/apps/{app_name}/eval_sets/{eval_set_id}/evals/{eval_case_id}",
response_model_exclude_none=True,
)
def get_eval(app_name: str, eval_set_id: str, eval_case_id: str) -> EvalCase:
"""Gets an eval case in an eval set."""
eval_case_to_find = eval_sets_manager.get_eval_case(
app_name, eval_set_id, eval_case_id
)
if eval_case_to_find:
return eval_case_to_find
raise HTTPException(
status_code=404,
detail=f"Eval set `{eval_set_id}` or Eval `{eval_case_id}` not found.",
)
@app.put(
"/apps/{app_name}/eval_sets/{eval_set_id}/evals/{eval_case_id}",
response_model_exclude_none=True,
)
def update_eval(
app_name: str,
eval_set_id: str,
eval_case_id: str,
updated_eval_case: EvalCase,
):
if updated_eval_case.eval_id and updated_eval_case.eval_id != eval_case_id:
raise HTTPException(
status_code=400,
detail=(
"Eval id in EvalCase should match the eval id in the API route."
),
)
# Overwrite the value. We are either overwriting the same value or an empty
# field.
updated_eval_case.eval_id = eval_case_id
try:
eval_sets_manager.update_eval_case(
app_name, eval_set_id, updated_eval_case
)
except NotFoundError as nfe:
raise HTTPException(status_code=404, detail=str(nfe)) from nfe
@app.delete("/apps/{app_name}/eval_sets/{eval_set_id}/evals/{eval_case_id}")
def delete_eval(app_name: str, eval_set_id: str, eval_case_id: str):
try:
eval_sets_manager.delete_eval_case(app_name, eval_set_id, eval_case_id)
except NotFoundError as nfe:
raise HTTPException(status_code=404, detail=str(nfe)) from nfe
@app.post(
"/apps/{app_name}/eval_sets/{eval_set_id}/run_eval",
response_model_exclude_none=True,
@ -503,6 +562,11 @@ def get_fast_api_app(
# run.
eval_set = eval_sets_manager.get_eval_set(app_name, eval_set_id)
if not eval_set:
raise HTTPException(
status_code=400, detail=f"Eval set `{eval_set_id}` not found."
)
if req.eval_ids:
eval_cases = [e for e in eval_set.eval_cases if e.eval_id in req.eval_ids]
eval_set_to_evals = {eval_set_id: eval_cases}

View File

@ -12,9 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from abc import ABC
from abc import abstractmethod
from typing import Optional
from ..errors.not_found_error import NotFoundError
from .eval_case import EvalCase
from .eval_set import EvalSet
@ -23,21 +27,47 @@ class EvalSetsManager(ABC):
"""An interface to manage an Eval Sets."""
@abstractmethod
def get_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
def get_eval_set(self, app_name: str, eval_set_id: str) -> Optional[EvalSet]:
"""Returns an EvalSet identified by an app_name and eval_set_id."""
raise NotImplementedError()
@abstractmethod
def create_eval_set(self, app_name: str, eval_set_id: str):
"""Creates an empty EvalSet given the app_name and eval_set_id."""
raise NotImplementedError()
@abstractmethod
def list_eval_sets(self, app_name: str) -> list[str]:
"""Returns a list of EvalSets that belong to the given app_name."""
raise NotImplementedError()
@abstractmethod
def get_eval_case(
self, app_name: str, eval_set_id: str, eval_case_id: str
) -> Optional[EvalCase]:
"""Returns an EvalCase if found, otherwise None."""
@abstractmethod
def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase):
"""Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id."""
raise NotImplementedError()
"""Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id.
Raises:
NotFoundError: If the eval set is not found.
"""
@abstractmethod
def update_eval_case(
self, app_name: str, eval_set_id: str, updated_eval_case: EvalCase
):
"""Updates an existing EvalCase give the app_name and eval_set_id.
Raises:
NotFoundError: If the eval set or the eval case is not found.
"""
@abstractmethod
def delete_eval_case(
self, app_name: str, eval_set_id: str, eval_case_id: str
):
"""Deletes the given EvalCase identified by app_name, eval_set_id and eval_case_id.
Raises:
NotFoundError: If the eval set or the eval case to delete is not found.
"""

View File

@ -20,12 +20,14 @@ import os
import re
import time
from typing import Any
from typing import Optional
import uuid
from google.genai import types as genai_types
from pydantic import ValidationError
from typing_extensions import override
from ..errors.not_found_error import NotFoundError
from .eval_case import EvalCase
from .eval_case import IntermediateData
from .eval_case import Invocation
@ -188,11 +190,14 @@ class LocalEvalSetsManager(EvalSetsManager):
self._agents_dir = agents_dir
@override
def get_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet:
def get_eval_set(self, app_name: str, eval_set_id: str) -> Optional[EvalSet]:
"""Returns an EvalSet identified by an app_name and eval_set_id."""
# Load the eval set file data
eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
return load_eval_set_from_file(eval_set_file_path, eval_set_id)
try:
eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
return load_eval_set_from_file(eval_set_file_path, eval_set_id)
except FileNotFoundError:
return None
@override
def create_eval_set(self, app_name: str, eval_set_id: str):
@ -230,12 +235,19 @@ class LocalEvalSetsManager(EvalSetsManager):
@override
def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase):
"""Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id."""
"""Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id.
Raises:
NotFoundError: If the eval set is not found.
"""
eval_case_id = eval_case.eval_id
self._validate_id(id_name="Eval Case Id", id_value=eval_case_id)
eval_set = self.get_eval_set(app_name, eval_set_id)
if not eval_set:
raise NotFoundError(f"Eval set `{eval_set_id}` not found.")
if [x for x in eval_set.eval_cases if x.eval_id == eval_case_id]:
raise ValueError(
f"Eval id `{eval_case_id}` already exists in `{eval_set_id}`"
@ -247,6 +259,87 @@ class LocalEvalSetsManager(EvalSetsManager):
eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
self._write_eval_set(eval_set_file_path, eval_set)
@override
def get_eval_case(
self, app_name: str, eval_set_id: str, eval_case_id: str
) -> Optional[EvalCase]:
"""Returns an EvalCase if found, otherwise None."""
eval_set = self.get_eval_set(app_name, eval_set_id)
if not eval_set:
return None
eval_case_to_find = None
# Look up the eval case by eval_case_id
for eval_case in eval_set.eval_cases:
if eval_case.eval_id == eval_case_id:
eval_case_to_find = eval_case
break
return eval_case_to_find
@override
def update_eval_case(
self, app_name: str, eval_set_id: str, updated_eval_case: EvalCase
):
"""Updates an existing EvalCase give the app_name and eval_set_id.
Raises:
NotFoundError: If the eval set or the eval case is not found.
"""
eval_case_id = updated_eval_case.eval_id
# Find the eval case to be updated.
eval_case_to_update = self.get_eval_case(
app_name, eval_set_id, eval_case_id
)
if eval_case_to_update:
# Remove the eval case from the existing eval set.
eval_set = self.get_eval_set(app_name, eval_set_id)
eval_set.eval_cases.remove(eval_case_to_update)
# Add the updated eval case to the existing eval set.
eval_set.eval_cases.append(updated_eval_case)
# Persit the eval set.
eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
self._write_eval_set(eval_set_file_path, eval_set)
else:
raise NotFoundError(
f"Eval Set `{eval_set_id}` or Eval id `{eval_case_id}` not found.",
)
@override
def delete_eval_case(
self, app_name: str, eval_set_id: str, eval_case_id: str
):
"""Deletes the given EvalCase identified by app_name, eval_set_id and eval_case_id.
Raises:
NotFoundError: If the eval set or the eval case to delete is not found.
"""
# Find the eval case that needs to be deleted.
eval_case_to_remove = self.get_eval_case(
app_name, eval_set_id, eval_case_id
)
if eval_case_to_remove:
logger.info(
"EvalCase`%s` was found in the eval set. It will be removed"
" permanently.",
eval_case_id,
)
eval_set = self.get_eval_set(app_name, eval_set_id)
eval_set.eval_cases.remove(eval_case_to_remove)
eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
self._write_eval_set(eval_set_file_path, eval_set)
else:
raise NotFoundError(
f"Eval Set `{eval_set_id}` or Eval id `{eval_case_id}` not found.",
)
def _get_eval_set_file_path(self, app_name: str, eval_set_id: str) -> str:
return os.path.join(
self._agents_dir,