ADK changes

PiperOrigin-RevId: 764776124
2025-07-13 23:17:35 -06:00 · 2025-05-29 10:03:49 -07:00 · 2025-05-29 10:03:49 -07:00 · ac52eab88e
commit ac52eab88e
parent a9345a072e
1 changed files with 228 additions and 0 deletions
--- a/tests/unittests/fast_api/test_fast_api.py
+++ b/tests/unittests/fast_api/test_fast_api.py
@ -21,6 +21,10 @@ from unittest.mock import patch
 from fastapi.testclient import TestClient
 from google.adk.agents.base_agent import BaseAgent
 from google.adk.agents.run_config import RunConfig
+from google.adk.evaluation.eval_case import EvalCase
+from google.adk.evaluation.eval_case import Invocation
+from google.adk.evaluation.eval_set import EvalSet
+from google.adk.evaluation.eval_result import EvalSetResult
 from google.adk.cli.fast_api import get_fast_api_app
 from google.adk.events import Event
 from google.adk.runners import Runner
@ -281,12 +285,107 @@ def mock_memory_service():
  return MagicMock()


+@pytest.fixture
+def mock_eval_sets_manager():
+  """Create a mock eval sets manager."""
+
+  # Storage for eval sets.
+  eval_sets = {}
+
+  class MockEvalSetsManager:
+    """Mock eval sets manager."""
+
+    def create_eval_set(self, app_name, eval_set_id):
+      """Create an eval set."""
+      if app_name not in eval_sets:
+        eval_sets[app_name] = {}
+
+      if eval_set_id in eval_sets[app_name]:
+        raise ValueError(f"Eval set {eval_set_id} already exists.")
+
+      eval_sets[app_name][eval_set_id] = EvalSet(
+          eval_set_id=eval_set_id, eval_cases=[]
+      )
+      return eval_set_id
+
+    def get_eval_set(self, app_name, eval_set_id):
+      """Get an eval set."""
+      if app_name not in eval_sets:
+        raise ValueError(f"App {app_name} not found.")
+      if eval_set_id not in eval_sets[app_name]:
+        raise ValueError(f"Eval set {eval_set_id} not found in app {app_name}.")
+      return eval_sets[app_name][eval_set_id]
+
+    def list_eval_sets(self, app_name):
+      """List eval sets."""
+      if app_name not in eval_sets:
+        raise ValueError(f"App {app_name} not found.")
+      return list(eval_sets[app_name].keys())
+
+    def add_eval_case(self, app_name, eval_set_id, eval_case):
+      """Add an eval case to an eval set."""
+      if app_name not in eval_sets:
+        raise ValueError(f"App {app_name} not found.")
+      if eval_set_id not in eval_sets[app_name]:
+        raise ValueError(f"Eval set {eval_set_id} not found in app {app_name}.")
+      eval_sets[app_name][eval_set_id].eval_cases.append(eval_case)
+
+  return MockEvalSetsManager()
+
+
+@pytest.fixture
+def mock_eval_set_results_manager():
+  """Create a mock eval set results manager."""
+
+  # Storage for eval set results.
+  eval_set_results = {}
+
+  class MockEvalSetResultsManager:
+    """Mock eval set results manager."""
+
+    def save_eval_set_result(
+        self, app_name, eval_set_id, eval_case_results
+    ):
+      if app_name not in eval_set_results:
+        eval_set_results[app_name] = {}
+      eval_set_result_id = f"{app_name}_{eval_set_id}_eval_result"
+      eval_set_result = EvalSetResult(
+          eval_set_result_id=eval_set_result_id,
+          eval_set_result_name=eval_set_result_id,
+          eval_set_id=eval_set_id,
+          eval_case_results=eval_case_results,
+      )
+      if eval_set_result_id not in eval_set_results[app_name]:
+        eval_set_results[app_name][eval_set_result_id] = eval_set_result
+      else:
+        eval_set_results[app_name][eval_set_result_id].append(eval_set_result)
+
+    def get_eval_set_result(self, app_name, eval_set_result_id):
+      if app_name not in eval_set_results:
+        raise ValueError(f"App {app_name} not found.")
+      if eval_set_result_id not in eval_set_results[app_name]:
+        raise ValueError(
+            f"Eval set result {eval_set_result_id} not found in app {app_name}."
+        )
+      return eval_set_results[app_name][eval_set_result_id]
+
+    def list_eval_set_results(self, app_name):
+      """List eval set results."""
+      if app_name not in eval_set_results:
+        raise ValueError(f"App {app_name} not found.")
+      return list(eval_set_results[app_name].keys())
+
+  return MockEvalSetResultsManager()
+
+
@pytest.fixture
 def test_app(
    mock_session_service,
    mock_artifact_service,
    mock_memory_service,
    mock_agent_loader,
+    mock_eval_sets_manager,
+    mock_eval_set_results_manager,
 ):
  """Create a TestClient for the FastAPI app without starting a server."""

@ -309,6 +408,14 @@ def test_app(
          "google.adk.cli.fast_api.AgentLoader",
          return_value=mock_agent_loader,
      ),
+      patch(
+          "google.adk.cli.fast_api.LocalEvalSetsManager",
+          return_value=mock_eval_sets_manager,
+      ),
+      patch(
+          "google.adk.cli.fast_api.LocalEvalSetResultsManager",
+          return_value=mock_eval_set_results_manager,
+      ),
  ):
    # Get the FastAPI app, but don't actually run it
    app = get_fast_api_app(
@ -339,6 +446,35 @@ async def create_test_session(
  return test_session_info


+@pytest.fixture
+async def create_test_eval_set(
+    test_app, test_session_info, mock_eval_sets_manager
+):
+  """Create a test eval set using the mocked eval sets manager."""
+  _ = mock_eval_sets_manager.create_eval_set(
+      app_name=test_session_info["app_name"],
+      eval_set_id="test_eval_set_id",
+  )
+  test_eval_case = EvalCase(
+      eval_id="test_eval_case_id",
+      conversation=[
+          Invocation(
+              invocation_id="test_invocation_id",
+              user_content=types.Content(
+                  parts=[types.Part(text="test_user_content")],
+                  role="user",
+              ),
+          )
+      ],
+  )
+  _ = mock_eval_sets_manager.add_eval_case(
+      app_name=test_session_info["app_name"],
+      eval_set_id="test_eval_set_id",
+      eval_case=test_eval_case,
+  )
+  return test_session_info
+
+
 #################################################
 # Test Cases
 #################################################
@ -479,6 +615,98 @@ def test_list_artifact_names(test_app, create_test_session):
  logger.info(f"Listed {len(data)} artifacts")


+def test_get_eval_set_not_found(test_app):
+  """Test getting an eval set that doesn't exist."""
+  url = "/apps/test_app_name/eval_sets/test_eval_set_id_not_found"
+  response = test_app.get(url)
+  assert response.status_code == 404
+
+
+def test_create_eval_set(test_app, test_session_info):
+  """Test creating an eval set."""
+  url = f"/apps/{test_session_info['app_name']}/eval_sets/test_eval_set_id"
+  response = test_app.post(url)
+
+  # Verify the response
+  assert response.status_code == 200
+
+
+def test_list_eval_sets(test_app, create_test_eval_set):
+  """Test get eval set."""
+  info = create_test_eval_set
+  url = f"/apps/{info['app_name']}/eval_sets"
+  response = test_app.get(url)
+
+  # Verify the response
+  assert response.status_code == 200
+  data = response.json()
+  assert isinstance(data, list)
+  assert len(data) == 1
+  assert data[0] == "test_eval_set_id"
+
+
+def test_get_eval_set_result_not_found(test_app):
+  """Test getting an eval set result that doesn't exist."""
+  url = "/apps/test_app_name/eval_results/test_eval_result_id_not_found"
+  response = test_app.get(url)
+  assert response.status_code == 404
+
+
+def test_run_eval(test_app, create_test_eval_set):
+  """Test running an eval."""
+
+  # Helper function to verify eval case result.
+  def verify_eval_case_result(actual_eval_case_result):
+    expected_eval_case_result = {
+        "evalSetId": "test_eval_set_id",
+        "evalId": "test_eval_case_id",
+        "finalEvalStatus": 1,
+        "overallEvalMetricResults": [{
+            "metricName": "tool_trajectory_avg_score",
+            "threshold": 0.5,
+            "score": 1.0,
+            "evalStatus": 1,
+        }],
+    }
+    for k, v in expected_eval_case_result.items():
+      assert actual_eval_case_result[k] == v
+
+  info = create_test_eval_set
+  url = f"/apps/{info['app_name']}/eval_sets/test_eval_set_id/run_eval"
+  payload = {
+      "eval_ids": ["test_eval_case_id"],
+      "eval_metrics": [
+          {"metric_name": "tool_trajectory_avg_score", "threshold": 0.5}
+      ],
+  }
+  response = test_app.post(url, json=payload)
+
+  # Verify the response
+  assert response.status_code == 200
+
+  data = response.json()
+  assert len(data) == 1
+  verify_eval_case_result(data[0])
+
+  # Verify the eval set result is saved via get_eval_result endpoint.
+  url = f"/apps/{info['app_name']}/eval_results/{info['app_name']}_test_eval_set_id_eval_result"
+  response = test_app.get(url)
+  assert response.status_code == 200
+  data = response.json()
+  assert isinstance(data, dict)
+  assert data["evalSetId"] == "test_eval_set_id"
+  assert data["evalSetResultId"] == f"{info['app_name']}_test_eval_set_id_eval_result"
+  assert len(data["evalCaseResults"]) == 1
+  verify_eval_case_result(data["evalCaseResults"][0])
+
+  # Verify the eval set result is saved via list_eval_results endpoint.
+  url = f"/apps/{info['app_name']}/eval_results"
+  response = test_app.get(url)
+  assert response.status_code == 200
+  data = response.json()
+  assert data == [f"{info['app_name']}_test_eval_set_id_eval_result"]
+
+
 def test_debug_trace(test_app):
  """Test the debug trace endpoint."""
  # This test will likely return 404 since we haven't set up trace data,