feat: Added implementation for get_eval_case, update_eval_case and delete_eval_case for the local eval sets manager.

PiperOrigin-RevId: 766383391
This commit is contained in:
Ankur Sharma 2025-06-02 16:13:57 -07:00 committed by Copybara-Service
parent da4bc0efc0
commit a7575e078a
3 changed files with 197 additions and 10 deletions

View File

@ -57,6 +57,7 @@ from ..agents.llm_agent import Agent
from ..agents.run_config import StreamingMode from ..agents.run_config import StreamingMode
from ..artifacts.gcs_artifact_service import GcsArtifactService from ..artifacts.gcs_artifact_service import GcsArtifactService
from ..artifacts.in_memory_artifact_service import InMemoryArtifactService from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
from ..errors.not_found_error import NotFoundError
from ..evaluation.eval_case import EvalCase from ..evaluation.eval_case import EvalCase
from ..evaluation.eval_case import SessionInput from ..evaluation.eval_case import SessionInput
from ..evaluation.eval_metrics import EvalMetric from ..evaluation.eval_metrics import EvalMetric
@ -487,8 +488,66 @@ def get_fast_api_app(
"""Lists all evals in an eval set.""" """Lists all evals in an eval set."""
eval_set_data = eval_sets_manager.get_eval_set(app_name, eval_set_id) eval_set_data = eval_sets_manager.get_eval_set(app_name, eval_set_id)
if not eval_set_data:
raise HTTPException(
status_code=400, detail=f"Eval set `{eval_set_id}` not found."
)
return sorted([x.eval_id for x in eval_set_data.eval_cases]) return sorted([x.eval_id for x in eval_set_data.eval_cases])
@app.get(
"/apps/{app_name}/eval_sets/{eval_set_id}/evals/{eval_case_id}",
response_model_exclude_none=True,
)
def get_eval(app_name: str, eval_set_id: str, eval_case_id: str) -> EvalCase:
"""Gets an eval case in an eval set."""
eval_case_to_find = eval_sets_manager.get_eval_case(
app_name, eval_set_id, eval_case_id
)
if eval_case_to_find:
return eval_case_to_find
raise HTTPException(
status_code=404,
detail=f"Eval set `{eval_set_id}` or Eval `{eval_case_id}` not found.",
)
@app.put(
"/apps/{app_name}/eval_sets/{eval_set_id}/evals/{eval_case_id}",
response_model_exclude_none=True,
)
def update_eval(
app_name: str,
eval_set_id: str,
eval_case_id: str,
updated_eval_case: EvalCase,
):
if updated_eval_case.eval_id and updated_eval_case.eval_id != eval_case_id:
raise HTTPException(
status_code=400,
detail=(
"Eval id in EvalCase should match the eval id in the API route."
),
)
# Overwrite the value. We are either overwriting the same value or an empty
# field.
updated_eval_case.eval_id = eval_case_id
try:
eval_sets_manager.update_eval_case(
app_name, eval_set_id, updated_eval_case
)
except NotFoundError as nfe:
raise HTTPException(status_code=404, detail=str(nfe)) from nfe
@app.delete("/apps/{app_name}/eval_sets/{eval_set_id}/evals/{eval_case_id}")
def delete_eval(app_name: str, eval_set_id: str, eval_case_id: str):
try:
eval_sets_manager.delete_eval_case(app_name, eval_set_id, eval_case_id)
except NotFoundError as nfe:
raise HTTPException(status_code=404, detail=str(nfe)) from nfe
@app.post( @app.post(
"/apps/{app_name}/eval_sets/{eval_set_id}/run_eval", "/apps/{app_name}/eval_sets/{eval_set_id}/run_eval",
response_model_exclude_none=True, response_model_exclude_none=True,
@ -503,6 +562,11 @@ def get_fast_api_app(
# run. # run.
eval_set = eval_sets_manager.get_eval_set(app_name, eval_set_id) eval_set = eval_sets_manager.get_eval_set(app_name, eval_set_id)
if not eval_set:
raise HTTPException(
status_code=400, detail=f"Eval set `{eval_set_id}` not found."
)
if req.eval_ids: if req.eval_ids:
eval_cases = [e for e in eval_set.eval_cases if e.eval_id in req.eval_ids] eval_cases = [e for e in eval_set.eval_cases if e.eval_id in req.eval_ids]
eval_set_to_evals = {eval_set_id: eval_cases} eval_set_to_evals = {eval_set_id: eval_cases}

View File

@ -12,9 +12,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import annotations
from abc import ABC from abc import ABC
from abc import abstractmethod from abc import abstractmethod
from typing import Optional
from ..errors.not_found_error import NotFoundError
from .eval_case import EvalCase from .eval_case import EvalCase
from .eval_set import EvalSet from .eval_set import EvalSet
@ -23,21 +27,47 @@ class EvalSetsManager(ABC):
"""An interface to manage an Eval Sets.""" """An interface to manage an Eval Sets."""
@abstractmethod @abstractmethod
def get_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet: def get_eval_set(self, app_name: str, eval_set_id: str) -> Optional[EvalSet]:
"""Returns an EvalSet identified by an app_name and eval_set_id.""" """Returns an EvalSet identified by an app_name and eval_set_id."""
raise NotImplementedError()
@abstractmethod @abstractmethod
def create_eval_set(self, app_name: str, eval_set_id: str): def create_eval_set(self, app_name: str, eval_set_id: str):
"""Creates an empty EvalSet given the app_name and eval_set_id.""" """Creates an empty EvalSet given the app_name and eval_set_id."""
raise NotImplementedError()
@abstractmethod @abstractmethod
def list_eval_sets(self, app_name: str) -> list[str]: def list_eval_sets(self, app_name: str) -> list[str]:
"""Returns a list of EvalSets that belong to the given app_name.""" """Returns a list of EvalSets that belong to the given app_name."""
raise NotImplementedError()
@abstractmethod
def get_eval_case(
self, app_name: str, eval_set_id: str, eval_case_id: str
) -> Optional[EvalCase]:
"""Returns an EvalCase if found, otherwise None."""
@abstractmethod @abstractmethod
def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase): def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase):
"""Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id.""" """Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id.
raise NotImplementedError()
Raises:
NotFoundError: If the eval set is not found.
"""
@abstractmethod
def update_eval_case(
self, app_name: str, eval_set_id: str, updated_eval_case: EvalCase
):
"""Updates an existing EvalCase give the app_name and eval_set_id.
Raises:
NotFoundError: If the eval set or the eval case is not found.
"""
@abstractmethod
def delete_eval_case(
self, app_name: str, eval_set_id: str, eval_case_id: str
):
"""Deletes the given EvalCase identified by app_name, eval_set_id and eval_case_id.
Raises:
NotFoundError: If the eval set or the eval case to delete is not found.
"""

View File

@ -20,12 +20,14 @@ import os
import re import re
import time import time
from typing import Any from typing import Any
from typing import Optional
import uuid import uuid
from google.genai import types as genai_types from google.genai import types as genai_types
from pydantic import ValidationError from pydantic import ValidationError
from typing_extensions import override from typing_extensions import override
from ..errors.not_found_error import NotFoundError
from .eval_case import EvalCase from .eval_case import EvalCase
from .eval_case import IntermediateData from .eval_case import IntermediateData
from .eval_case import Invocation from .eval_case import Invocation
@ -188,11 +190,14 @@ class LocalEvalSetsManager(EvalSetsManager):
self._agents_dir = agents_dir self._agents_dir = agents_dir
@override @override
def get_eval_set(self, app_name: str, eval_set_id: str) -> EvalSet: def get_eval_set(self, app_name: str, eval_set_id: str) -> Optional[EvalSet]:
"""Returns an EvalSet identified by an app_name and eval_set_id.""" """Returns an EvalSet identified by an app_name and eval_set_id."""
# Load the eval set file data # Load the eval set file data
eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id) try:
return load_eval_set_from_file(eval_set_file_path, eval_set_id) eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
return load_eval_set_from_file(eval_set_file_path, eval_set_id)
except FileNotFoundError:
return None
@override @override
def create_eval_set(self, app_name: str, eval_set_id: str): def create_eval_set(self, app_name: str, eval_set_id: str):
@ -230,12 +235,19 @@ class LocalEvalSetsManager(EvalSetsManager):
@override @override
def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase): def add_eval_case(self, app_name: str, eval_set_id: str, eval_case: EvalCase):
"""Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id.""" """Adds the given EvalCase to an existing EvalSet identified by app_name and eval_set_id.
Raises:
NotFoundError: If the eval set is not found.
"""
eval_case_id = eval_case.eval_id eval_case_id = eval_case.eval_id
self._validate_id(id_name="Eval Case Id", id_value=eval_case_id) self._validate_id(id_name="Eval Case Id", id_value=eval_case_id)
eval_set = self.get_eval_set(app_name, eval_set_id) eval_set = self.get_eval_set(app_name, eval_set_id)
if not eval_set:
raise NotFoundError(f"Eval set `{eval_set_id}` not found.")
if [x for x in eval_set.eval_cases if x.eval_id == eval_case_id]: if [x for x in eval_set.eval_cases if x.eval_id == eval_case_id]:
raise ValueError( raise ValueError(
f"Eval id `{eval_case_id}` already exists in `{eval_set_id}`" f"Eval id `{eval_case_id}` already exists in `{eval_set_id}`"
@ -247,6 +259,87 @@ class LocalEvalSetsManager(EvalSetsManager):
eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id) eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
self._write_eval_set(eval_set_file_path, eval_set) self._write_eval_set(eval_set_file_path, eval_set)
@override
def get_eval_case(
self, app_name: str, eval_set_id: str, eval_case_id: str
) -> Optional[EvalCase]:
"""Returns an EvalCase if found, otherwise None."""
eval_set = self.get_eval_set(app_name, eval_set_id)
if not eval_set:
return None
eval_case_to_find = None
# Look up the eval case by eval_case_id
for eval_case in eval_set.eval_cases:
if eval_case.eval_id == eval_case_id:
eval_case_to_find = eval_case
break
return eval_case_to_find
@override
def update_eval_case(
self, app_name: str, eval_set_id: str, updated_eval_case: EvalCase
):
"""Updates an existing EvalCase give the app_name and eval_set_id.
Raises:
NotFoundError: If the eval set or the eval case is not found.
"""
eval_case_id = updated_eval_case.eval_id
# Find the eval case to be updated.
eval_case_to_update = self.get_eval_case(
app_name, eval_set_id, eval_case_id
)
if eval_case_to_update:
# Remove the eval case from the existing eval set.
eval_set = self.get_eval_set(app_name, eval_set_id)
eval_set.eval_cases.remove(eval_case_to_update)
# Add the updated eval case to the existing eval set.
eval_set.eval_cases.append(updated_eval_case)
# Persit the eval set.
eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
self._write_eval_set(eval_set_file_path, eval_set)
else:
raise NotFoundError(
f"Eval Set `{eval_set_id}` or Eval id `{eval_case_id}` not found.",
)
@override
def delete_eval_case(
self, app_name: str, eval_set_id: str, eval_case_id: str
):
"""Deletes the given EvalCase identified by app_name, eval_set_id and eval_case_id.
Raises:
NotFoundError: If the eval set or the eval case to delete is not found.
"""
# Find the eval case that needs to be deleted.
eval_case_to_remove = self.get_eval_case(
app_name, eval_set_id, eval_case_id
)
if eval_case_to_remove:
logger.info(
"EvalCase`%s` was found in the eval set. It will be removed"
" permanently.",
eval_case_id,
)
eval_set = self.get_eval_set(app_name, eval_set_id)
eval_set.eval_cases.remove(eval_case_to_remove)
eval_set_file_path = self._get_eval_set_file_path(app_name, eval_set_id)
self._write_eval_set(eval_set_file_path, eval_set)
else:
raise NotFoundError(
f"Eval Set `{eval_set_id}` or Eval id `{eval_case_id}` not found.",
)
def _get_eval_set_file_path(self, app_name: str, eval_set_id: str) -> str: def _get_eval_set_file_path(self, app_name: str, eval_set_id: str) -> str:
return os.path.join( return os.path.join(
self._agents_dir, self._agents_dir,