From 9928cafe3234935994c92d0a9d8be585ed63afb7 Mon Sep 17 00:00:00 2001 From: Google Team Member Date: Wed, 21 May 2025 11:29:02 -0700 Subject: [PATCH] Refactor eval results reporting with Eval Set Results manager. PiperOrigin-RevId: 761601525 --- src/google/adk/cli/cli_eval.py | 2 +- src/google/adk/cli/fast_api.py | 67 ++--------- .../evaluation/eval_set_results_manager.py | 44 ++++++++ .../local_eval_set_results_manager.py | 104 ++++++++++++++++++ 4 files changed, 161 insertions(+), 56 deletions(-) create mode 100644 src/google/adk/evaluation/eval_set_results_manager.py create mode 100644 src/google/adk/evaluation/local_eval_set_results_manager.py diff --git a/src/google/adk/cli/cli_eval.py b/src/google/adk/cli/cli_eval.py index 297321a..c0c5651 100644 --- a/src/google/adk/cli/cli_eval.py +++ b/src/google/adk/cli/cli_eval.py @@ -82,7 +82,7 @@ class EvalCaseResult(common.BaseModel): """The eval case id.""" final_eval_status: EvalStatus - """Final evalu status for this eval case.""" + """Final eval status for this eval case.""" eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field( deprecated=True, diff --git a/src/google/adk/cli/fast_api.py b/src/google/adk/cli/fast_api.py index 9ba608e..51cbf25 100644 --- a/src/google/adk/cli/fast_api.py +++ b/src/google/adk/cli/fast_api.py @@ -64,6 +64,7 @@ from ..agents.run_config import StreamingMode from ..artifacts.in_memory_artifact_service import InMemoryArtifactService from ..evaluation.eval_case import EvalCase from ..evaluation.eval_case import SessionInput +from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager from ..events.event import Event from ..memory.in_memory_memory_service import InMemoryMemoryService @@ -322,6 +323,7 @@ def get_fast_api_app( memory_service = InMemoryMemoryService() eval_sets_manager = LocalEvalSetsManager(agent_dir=agent_dir) + eval_set_results_manager = LocalEvalSetResultsManager(agent_dir=agent_dir) # Build the Session service agent_engine_id = "" @@ -594,32 +596,10 @@ def get_fast_api_app( ) eval_case_results.append(eval_case_result) - timestamp = time.time() - eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp) - eval_set_result = EvalSetResult( - eval_set_result_id=eval_set_result_name, - eval_set_result_name=eval_set_result_name, - eval_set_id=eval_set_id, - eval_case_results=eval_case_results, - creation_timestamp=timestamp, + eval_set_results_manager.save_eval_set_result( + app_name, eval_set_id, eval_case_results ) - # Write eval result file, with eval_set_result_name. - app_eval_history_dir = os.path.join( - agent_dir, app_name, ".adk", "eval_history" - ) - if not os.path.exists(app_eval_history_dir): - os.makedirs(app_eval_history_dir) - # Convert to json and write to file. - eval_set_result_json = eval_set_result.model_dump_json() - eval_set_result_file_path = os.path.join( - app_eval_history_dir, - eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION, - ) - logger.info("Writing eval result to file: %s", eval_set_result_file_path) - with open(eval_set_result_file_path, "w") as f: - f.write(json.dumps(eval_set_result_json, indent=2)) - return run_eval_results @app.get( @@ -631,25 +611,14 @@ def get_fast_api_app( eval_result_id: str, ) -> EvalSetResult: """Gets the eval result for the given eval id.""" - # Load the eval set file data - maybe_eval_result_file_path = ( - os.path.join( - agent_dir, app_name, ".adk", "eval_history", eval_result_id - ) - + _EVAL_SET_RESULT_FILE_EXTENSION - ) - if not os.path.exists(maybe_eval_result_file_path): - raise HTTPException( - status_code=404, - detail=f"Eval result `{eval_result_id}` not found.", - ) - with open(maybe_eval_result_file_path, "r") as file: - eval_result_data = json.load(file) # Load JSON into a list try: - eval_result = EvalSetResult.model_validate_json(eval_result_data) - return eval_result - except ValidationError as e: - logger.exception("get_eval_result validation error: %s", e) + return eval_set_results_manager.get_eval_set_result( + app_name, eval_result_id + ) + except ValueError as ve: + raise HTTPException(status_code=404, detail=str(ve)) from ve + except ValidationError as ve: + raise HTTPException(status_code=500, detail=str(ve)) from ve @app.get( "/apps/{app_name}/eval_results", @@ -657,19 +626,7 @@ def get_fast_api_app( ) def list_eval_results(app_name: str) -> list[str]: """Lists all eval results for the given app.""" - app_eval_history_directory = os.path.join( - agent_dir, app_name, ".adk", "eval_history" - ) - - if not os.path.exists(app_eval_history_directory): - return [] - - eval_result_files = [ - file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION) - for file in os.listdir(app_eval_history_directory) - if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION) - ] - return eval_result_files + return eval_set_results_manager.list_eval_set_results(app_name) @app.delete("/apps/{app_name}/users/{user_id}/sessions/{session_id}") async def delete_session(app_name: str, user_id: str, session_id: str): diff --git a/src/google/adk/evaluation/eval_set_results_manager.py b/src/google/adk/evaluation/eval_set_results_manager.py new file mode 100644 index 0000000..5c907f0 --- /dev/null +++ b/src/google/adk/evaluation/eval_set_results_manager.py @@ -0,0 +1,44 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC, abstractmethod + +from ..cli.cli_eval import EvalCaseResult +from ..cli.cli_eval import EvalSetResult + + +class EvalSetResultsManager(ABC): + """An interface to manage Eval Set Results.""" + + @abstractmethod + def save_eval_set_result( + self, + app_name: str, + eval_set_id: str, + eval_case_results: list[EvalCaseResult], + ) -> None: + """Creates and saves a new EvalSetResult given eval_case_results.""" + raise NotImplementedError() + + @abstractmethod + def get_eval_set_result( + self, app_name: str, eval_set_result_id: str + ) -> EvalSetResult: + """Returns an EvalSetResult identified by app_name and eval_set_result_id.""" + raise NotImplementedError() + + @abstractmethod + def list_eval_set_results(self, app_name: str) -> list[str]: + """Returns the eval result ids that belong to the given app_name.""" + raise NotImplementedError() diff --git a/src/google/adk/evaluation/local_eval_set_results_manager.py b/src/google/adk/evaluation/local_eval_set_results_manager.py new file mode 100644 index 0000000..a7538f1 --- /dev/null +++ b/src/google/adk/evaluation/local_eval_set_results_manager.py @@ -0,0 +1,104 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import logging +import os +import time +from typing_extensions import override +from ..cli.cli_eval import EvalCaseResult +from ..cli.cli_eval import EvalSetResult +from .eval_set_results_manager import EvalSetResultsManager + +logger = logging.getLogger("google_adk." + __name__) + +_ADK_EVAL_HISTORY_DIR = ".adk/eval_history" +_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json" + + +class LocalEvalSetResultsManager(EvalSetResultsManager): + """An EvalSetResult manager that stores eval set results locally on disk.""" + + def __init__(self, agent_dir: str): + self._agent_dir = agent_dir + + @override + def save_eval_set_result( + self, + app_name: str, + eval_set_id: str, + eval_case_results: list[EvalCaseResult], + ) -> None: + """Creates and saves a new EvalSetResult given eval_case_results.""" + timestamp = time.time() + eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp) + eval_set_result = EvalSetResult( + eval_set_result_id=eval_set_result_name, + eval_set_result_name=eval_set_result_name, + eval_set_id=eval_set_id, + eval_case_results=eval_case_results, + creation_timestamp=timestamp, + ) + # Write eval result file, with eval_set_result_name. + app_eval_history_dir = self._get_eval_history_dir(app_name) + if not os.path.exists(app_eval_history_dir): + os.makedirs(app_eval_history_dir) + # Convert to json and write to file. + eval_set_result_json = eval_set_result.model_dump_json() + eval_set_result_file_path = os.path.join( + app_eval_history_dir, + eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION, + ) + logger.info("Writing eval result to file: %s", eval_set_result_file_path) + with open(eval_set_result_file_path, "w") as f: + f.write(json.dumps(eval_set_result_json, indent=2)) + + @override + def get_eval_set_result( + self, app_name: str, eval_set_result_id: str + ) -> EvalSetResult: + """Returns an EvalSetResult identified by app_name and eval_set_result_id.""" + # Load the eval set result file data. + maybe_eval_result_file_path = ( + os.path.join( + self._get_eval_history_dir(app_name), + eval_set_result_id, + ) + + _EVAL_SET_RESULT_FILE_EXTENSION + ) + if not os.path.exists(maybe_eval_result_file_path): + raise ValueError( + f"Eval set result `{eval_set_result_id}` does not exist." + ) + with open(maybe_eval_result_file_path, "r") as file: + eval_result_data = json.load(file) + return EvalSetResult.model_validate_json(eval_result_data) + + @override + def list_eval_set_results(self, app_name: str) -> list[str]: + """Returns the eval result ids that belong to the given app_name.""" + app_eval_history_directory = self._get_eval_history_dir(app_name) + + if not os.path.exists(app_eval_history_directory): + return [] + + eval_result_files = [ + file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION) + for file in os.listdir(app_eval_history_directory) + if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION) + ] + return eval_result_files + + def _get_eval_history_dir(self, app_name: str) -> str: + return os.path.join(self._agent_dir, app_name, _ADK_EVAL_HISTORY_DIR)