Refactor eval results reporting with Eval Set Results manager.

PiperOrigin-RevId: 761601525
2025-07-13 15:14:50 -06:00 · 2025-05-21 11:29:02 -07:00 · 2025-05-21 11:29:02 -07:00 · 9928cafe32
commit 9928cafe32
parent c5a0437745
4 changed files with 161 additions and 56 deletions
--- a/src/google/adk/cli/cli_eval.py
+++ b/src/google/adk/cli/cli_eval.py
@ -82,7 +82,7 @@ class EvalCaseResult(common.BaseModel):
  """The eval case id."""

  final_eval_status: EvalStatus
-  """Final evalu status for this eval case."""
+  """Final eval status for this eval case."""

  eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
      deprecated=True,
--- a/src/google/adk/cli/fast_api.py
+++ b/src/google/adk/cli/fast_api.py
@ -64,6 +64,7 @@ from ..agents.run_config import StreamingMode
 from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
 from ..evaluation.eval_case import EvalCase
 from ..evaluation.eval_case import SessionInput
+from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
 from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
 from ..events.event import Event
 from ..memory.in_memory_memory_service import InMemoryMemoryService
@ -322,6 +323,7 @@ def get_fast_api_app(
  memory_service = InMemoryMemoryService()

  eval_sets_manager = LocalEvalSetsManager(agent_dir=agent_dir)
+  eval_set_results_manager = LocalEvalSetResultsManager(agent_dir=agent_dir)

  # Build the Session service
  agent_engine_id = ""
@ -594,32 +596,10 @@ def get_fast_api_app(
      )
      eval_case_results.append(eval_case_result)

-    timestamp = time.time()
-    eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp)
-    eval_set_result = EvalSetResult(
-        eval_set_result_id=eval_set_result_name,
-        eval_set_result_name=eval_set_result_name,
-        eval_set_id=eval_set_id,
-        eval_case_results=eval_case_results,
-        creation_timestamp=timestamp,
+    eval_set_results_manager.save_eval_set_result(
+        app_name, eval_set_id, eval_case_results
    )

-    # Write eval result file, with eval_set_result_name.
-    app_eval_history_dir = os.path.join(
-        agent_dir, app_name, ".adk", "eval_history"
-    )
-    if not os.path.exists(app_eval_history_dir):
-      os.makedirs(app_eval_history_dir)
-    # Convert to json and write to file.
-    eval_set_result_json = eval_set_result.model_dump_json()
-    eval_set_result_file_path = os.path.join(
-        app_eval_history_dir,
-        eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
-    )
-    logger.info("Writing eval result to file: %s", eval_set_result_file_path)
-    with open(eval_set_result_file_path, "w") as f:
-      f.write(json.dumps(eval_set_result_json, indent=2))
-
    return run_eval_results

  @app.get(
@ -631,25 +611,14 @@ def get_fast_api_app(
      eval_result_id: str,
  ) -> EvalSetResult:
    """Gets the eval result for the given eval id."""
-    # Load the eval set file data
-    maybe_eval_result_file_path = (
-        os.path.join(
-            agent_dir, app_name, ".adk", "eval_history", eval_result_id
-        )
-        + _EVAL_SET_RESULT_FILE_EXTENSION
-    )
-    if not os.path.exists(maybe_eval_result_file_path):
-      raise HTTPException(
-          status_code=404,
-          detail=f"Eval result `{eval_result_id}` not found.",
-      )
-    with open(maybe_eval_result_file_path, "r") as file:
-      eval_result_data = json.load(file)  # Load JSON into a list
    try:
-      eval_result = EvalSetResult.model_validate_json(eval_result_data)
-      return eval_result
-    except ValidationError as e:
-      logger.exception("get_eval_result validation error: %s", e)
+      return eval_set_results_manager.get_eval_set_result(
+          app_name, eval_result_id
+      )
+    except ValueError as ve:
+      raise HTTPException(status_code=404, detail=str(ve)) from ve
+    except ValidationError as ve:
+      raise HTTPException(status_code=500, detail=str(ve)) from ve

  @app.get(
      "/apps/{app_name}/eval_results",
@ -657,19 +626,7 @@ def get_fast_api_app(
  )
  def list_eval_results(app_name: str) -> list[str]:
    """Lists all eval results for the given app."""
-    app_eval_history_directory = os.path.join(
-        agent_dir, app_name, ".adk", "eval_history"
-    )
-
-    if not os.path.exists(app_eval_history_directory):
-      return []
-
-    eval_result_files = [
-        file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
-        for file in os.listdir(app_eval_history_directory)
-        if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
-    ]
-    return eval_result_files
+    return eval_set_results_manager.list_eval_set_results(app_name)

  @app.delete("/apps/{app_name}/users/{user_id}/sessions/{session_id}")
  async def delete_session(app_name: str, user_id: str, session_id: str):
--- a/src/google/adk/evaluation/eval_set_results_manager.py
+++ b/src/google/adk/evaluation/eval_set_results_manager.py
@ -0,0 +1,44 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod
+
+from ..cli.cli_eval import EvalCaseResult
+from ..cli.cli_eval import EvalSetResult
+
+
+class EvalSetResultsManager(ABC):
+  """An interface to manage Eval Set Results."""
+
+  @abstractmethod
+  def save_eval_set_result(
+      self,
+      app_name: str,
+      eval_set_id: str,
+      eval_case_results: list[EvalCaseResult],
+  ) -> None:
+    """Creates and saves a new EvalSetResult given eval_case_results."""
+    raise NotImplementedError()
+
+  @abstractmethod
+  def get_eval_set_result(
+      self, app_name: str, eval_set_result_id: str
+  ) -> EvalSetResult:
+    """Returns an EvalSetResult identified by app_name and eval_set_result_id."""
+    raise NotImplementedError()
+
+  @abstractmethod
+  def list_eval_set_results(self, app_name: str) -> list[str]:
+    """Returns the eval result ids that belong to the given app_name."""
+    raise NotImplementedError()
--- a/src/google/adk/evaluation/local_eval_set_results_manager.py
+++ b/src/google/adk/evaluation/local_eval_set_results_manager.py
@ -0,0 +1,104 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+import os
+import time
+from typing_extensions import override
+from ..cli.cli_eval import EvalCaseResult
+from ..cli.cli_eval import EvalSetResult
+from .eval_set_results_manager import EvalSetResultsManager
+
+logger = logging.getLogger("google_adk." + __name__)
+
+_ADK_EVAL_HISTORY_DIR = ".adk/eval_history"
+_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
+
+
+class LocalEvalSetResultsManager(EvalSetResultsManager):
+  """An EvalSetResult manager that stores eval set results locally on disk."""
+
+  def __init__(self, agent_dir: str):
+    self._agent_dir = agent_dir
+
+  @override
+  def save_eval_set_result(
+      self,
+      app_name: str,
+      eval_set_id: str,
+      eval_case_results: list[EvalCaseResult],
+  ) -> None:
+    """Creates and saves a new EvalSetResult given eval_case_results."""
+    timestamp = time.time()
+    eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp)
+    eval_set_result = EvalSetResult(
+        eval_set_result_id=eval_set_result_name,
+        eval_set_result_name=eval_set_result_name,
+        eval_set_id=eval_set_id,
+        eval_case_results=eval_case_results,
+        creation_timestamp=timestamp,
+    )
+    # Write eval result file, with eval_set_result_name.
+    app_eval_history_dir = self._get_eval_history_dir(app_name)
+    if not os.path.exists(app_eval_history_dir):
+      os.makedirs(app_eval_history_dir)
+    # Convert to json and write to file.
+    eval_set_result_json = eval_set_result.model_dump_json()
+    eval_set_result_file_path = os.path.join(
+        app_eval_history_dir,
+        eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
+    )
+    logger.info("Writing eval result to file: %s", eval_set_result_file_path)
+    with open(eval_set_result_file_path, "w") as f:
+      f.write(json.dumps(eval_set_result_json, indent=2))
+
+  @override
+  def get_eval_set_result(
+      self, app_name: str, eval_set_result_id: str
+  ) -> EvalSetResult:
+    """Returns an EvalSetResult identified by app_name and eval_set_result_id."""
+    # Load the eval set result file data.
+    maybe_eval_result_file_path = (
+        os.path.join(
+            self._get_eval_history_dir(app_name),
+            eval_set_result_id,
+        )
+        + _EVAL_SET_RESULT_FILE_EXTENSION
+    )
+    if not os.path.exists(maybe_eval_result_file_path):
+      raise ValueError(
+          f"Eval set result `{eval_set_result_id}` does not exist."
+      )
+    with open(maybe_eval_result_file_path, "r") as file:
+      eval_result_data = json.load(file)
+    return EvalSetResult.model_validate_json(eval_result_data)
+
+  @override
+  def list_eval_set_results(self, app_name: str) -> list[str]:
+    """Returns the eval result ids that belong to the given app_name."""
+    app_eval_history_directory = self._get_eval_history_dir(app_name)
+
+    if not os.path.exists(app_eval_history_directory):
+      return []
+
+    eval_result_files = [
+        file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
+        for file in os.listdir(app_eval_history_directory)
+        if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
+    ]
+    return eval_result_files
+
+  def _get_eval_history_dir(self, app_name: str) -> str:
+    return os.path.join(self._agent_dir, app_name, _ADK_EVAL_HISTORY_DIR)