fix eval unittests failure

PiperOrigin-RevId: 757872670
2025-07-14 01:41:25 -06:00 · 2025-05-12 12:41:44 -07:00 · 2025-05-12 12:41:44 -07:00 · 5462862795
commit 5462862795
parent c39f24f072
2 changed files with 233 additions and 183 deletions
--- a/src/google/adk/cli/cli_tools_click.py
+++ b/src/google/adk/cli/cli_tools_click.py
@ -269,25 +269,20 @@ def cli_eval(
  eval_set_to_evals = parse_and_get_evals_to_run(eval_set_file_path)
-  async def _collect_async_gen(
+  async def _collect_eval_results() -> list[EvalResult]:
-      async_gen_coroutine: Coroutine[
+    return [
-          AsyncGenerator[EvalResult, None], None, None
+        result
-      ],
+        async for result in run_evals(
-  ) -> list[EvalResult]:
+            eval_set_to_evals,
-    return [result async for result in async_gen_coroutine]
+            root_agent,
            reset_func,
            eval_metrics,
            print_detailed_results=print_detailed_results,
        )
    ]
  try:
-    eval_results = asyncio.run(
+    eval_results = asyncio.run(_collect_eval_results())
        _collect_async_gen(
            run_evals(
                eval_set_to_evals,
                root_agent,
                reset_func,
                eval_metrics,
                print_detailed_results=print_detailed_results,
            )
        )
    )
  except ModuleNotFoundError:
    raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
--- a/tests/unittests/cli/utils/test_cli_tools_click.py
+++ b/tests/unittests/cli/utils/test_cli_tools_click.py
@ -18,241 +18,296 @@
 from __future__ import annotations
 import builtins
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any
 from typing import Dict
 from typing import List
 from typing import Tuple
 import click
 from click.testing import CliRunner
 from google.adk.cli import cli_tools_click
 import pytest
 from google.adk.cli import cli_tools_click
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
 from types import SimpleNamespace
 from click.testing import CliRunner
 # Helpers
 class _Recorder:
-    """Callable that records every invocation."""
+  """Callable that records every invocation."""
-    def __init__(self) -> None:
+  def __init__(self) -> None:
-        self.calls: List[Tuple[Tuple[Any, ...], Dict[str, Any]]] = []
+    self.calls: List[Tuple[Tuple[Any, ...], Dict[str, Any]]] = []
-    def __call__(self, *args: Any, **kwargs: Any) -> None:  # noqa: D401
+  def __call__(self, *args: Any, **kwargs: Any) -> None:  # noqa: D401
-        self.calls.append((args, kwargs))
+    self.calls.append((args, kwargs))
 # Fixtures
@pytest.fixture(autouse=True)
 def _mute_click(monkeypatch: pytest.MonkeyPatch) -> None:
-    """Suppress click output during tests."""
+  """Suppress click output during tests."""
-    monkeypatch.setattr(click, "echo", lambda *a, **k: None)
+  monkeypatch.setattr(click, "echo", lambda *a, **k: None)
-    monkeypatch.setattr(click, "secho", lambda *a, **k: None)
+  monkeypatch.setattr(click, "secho", lambda *a, **k: None)
 # validate_exclusive
 def test_validate_exclusive_allows_single() -> None:
-    """Providing exactly one exclusive option should pass."""
+  """Providing exactly one exclusive option should pass."""
-    ctx = click.Context(cli_tools_click.main)
+  ctx = click.Context(cli_tools_click.main)
-    param = SimpleNamespace(name="replay")
+  param = SimpleNamespace(name="replay")
-    assert cli_tools_click.validate_exclusive(ctx, param, "file.json") == "file.json"
+  assert (
      cli_tools_click.validate_exclusive(ctx, param, "file.json") == "file.json"
  )
 def test_validate_exclusive_blocks_multiple() -> None:
-    """Providing two exclusive options should raise UsageError."""
+  """Providing two exclusive options should raise UsageError."""
-    ctx = click.Context(cli_tools_click.main)
+  ctx = click.Context(cli_tools_click.main)
-    param1 = SimpleNamespace(name="replay")
+  param1 = SimpleNamespace(name="replay")
-    param2 = SimpleNamespace(name="resume")
+  param2 = SimpleNamespace(name="resume")
-    # First option registers fine
+  # First option registers fine
-    cli_tools_click.validate_exclusive(ctx, param1, "replay.json")
+  cli_tools_click.validate_exclusive(ctx, param1, "replay.json")
-    # Second option triggers conflict
+  # Second option triggers conflict
-    with pytest.raises(click.UsageError):
+  with pytest.raises(click.UsageError):
-        cli_tools_click.validate_exclusive(ctx, param2, "resume.json")
+    cli_tools_click.validate_exclusive(ctx, param2, "resume.json")
 # cli create
-def test_cli_create_cmd_invokes_run_cmd(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+def test_cli_create_cmd_invokes_run_cmd(
-    """`adk create` should forward arguments to cli_create.run_cmd."""
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
-    rec = _Recorder()
+) -> None:
-    monkeypatch.setattr(cli_tools_click.cli_create, "run_cmd", rec)
+  """`adk create` should forward arguments to cli_create.run_cmd."""
  rec = _Recorder()
  monkeypatch.setattr(cli_tools_click.cli_create, "run_cmd", rec)
-    app_dir = tmp_path / "my_app"
+  app_dir = tmp_path / "my_app"
-    runner = CliRunner()
+  runner = CliRunner()
-    result = runner.invoke(
+  result = runner.invoke(
-        cli_tools_click.main,
+      cli_tools_click.main,
-        ["create", "--model", "gemini", "--api_key", "key123", str(app_dir)],
+      ["create", "--model", "gemini", "--api_key", "key123", str(app_dir)],
-    )
+  )
-    assert result.exit_code == 0
+  assert result.exit_code == 0
-    assert rec.calls, "cli_create.run_cmd must be called"
+  assert rec.calls, "cli_create.run_cmd must be called"
 # cli run
@pytest.mark.asyncio
-async def test_cli_run_invokes_run_cli(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+async def test_cli_run_invokes_run_cli(
-    """`adk run` should call run_cli via asyncio.run with correct parameters."""
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
-    rec = _Recorder()
+) -> None:
-    monkeypatch.setattr(cli_tools_click, "run_cli", lambda **kwargs: rec(kwargs))
+  """`adk run` should call run_cli via asyncio.run with correct parameters."""
-    monkeypatch.setattr(cli_tools_click.asyncio, "run", lambda coro: coro)  # pass-through
+  rec = _Recorder()
  monkeypatch.setattr(cli_tools_click, "run_cli", lambda **kwargs: rec(kwargs))
  monkeypatch.setattr(
      cli_tools_click.asyncio, "run", lambda coro: coro
  )  # pass-through
-    # create dummy agent directory
+  # create dummy agent directory
-    agent_dir = tmp_path / "agent"
+  agent_dir = tmp_path / "agent"
-    agent_dir.mkdir()
+  agent_dir.mkdir()
-    (agent_dir / "__init__.py").touch()
+  (agent_dir / "__init__.py").touch()
-    (agent_dir / "agent.py").touch()
+  (agent_dir / "agent.py").touch()
-    runner = CliRunner()
+  runner = CliRunner()
-    result = runner.invoke(cli_tools_click.main, ["run", str(agent_dir)])
+  result = runner.invoke(cli_tools_click.main, ["run", str(agent_dir)])
-    assert result.exit_code == 0
+  assert result.exit_code == 0
-    assert rec.calls and rec.calls[0][0][0]["agent_folder_name"] == "agent"
+  assert rec.calls and rec.calls[0][0][0]["agent_folder_name"] == "agent"
 # cli deploy cloud_run
-def test_cli_deploy_cloud_run_success(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+def test_cli_deploy_cloud_run_success(
-    """Successful path should call cli_deploy.to_cloud_run once."""
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
-    rec = _Recorder()
+) -> None:
-    monkeypatch.setattr(cli_tools_click.cli_deploy, "to_cloud_run", rec)
+  """Successful path should call cli_deploy.to_cloud_run once."""
  rec = _Recorder()
  monkeypatch.setattr(cli_tools_click.cli_deploy, "to_cloud_run", rec)
-    agent_dir = tmp_path / "agent2"
+  agent_dir = tmp_path / "agent2"
-    agent_dir.mkdir()
+  agent_dir.mkdir()
-    runner = CliRunner()
+  runner = CliRunner()
-    result = runner.invoke(
+  result = runner.invoke(
-        cli_tools_click.main,
+      cli_tools_click.main,
-        [
+      [
-            "deploy",
+          "deploy",
-            "cloud_run",
+          "cloud_run",
-            "--project",
+          "--project",
-            "proj",
+          "proj",
-            "--region",
+          "--region",
-            "asia-northeast1",
+          "asia-northeast1",
-            str(agent_dir),
+          str(agent_dir),
-        ],
+      ],
-    )
+  )
-    assert result.exit_code == 0
+  assert result.exit_code == 0
-    assert rec.calls, "cli_deploy.to_cloud_run must be invoked"
+  assert rec.calls, "cli_deploy.to_cloud_run must be invoked"
-def test_cli_deploy_cloud_run_failure(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+def test_cli_deploy_cloud_run_failure(
-    """Exception from to_cloud_run should be caught and surfaced via click.secho."""
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
-    def _boom(*_a: Any, **_k: Any) -> None:  # noqa: D401
+) -> None:
-        raise RuntimeError("boom")
+  """Exception from to_cloud_run should be caught and surfaced via click.secho."""
-    monkeypatch.setattr(cli_tools_click.cli_deploy, "to_cloud_run", _boom)
+  def _boom(*_a: Any, **_k: Any) -> None:  # noqa: D401
    raise RuntimeError("boom")
-    # intercept click.secho(error=True) output
+  monkeypatch.setattr(cli_tools_click.cli_deploy, "to_cloud_run", _boom)
    captured: List[str] = []
    monkeypatch.setattr(click, "secho", lambda msg, **__: captured.append(msg))
-    agent_dir = tmp_path / "agent3"
+  # intercept click.secho(error=True) output
-    agent_dir.mkdir()
+  captured: List[str] = []
-    runner = CliRunner()
+  monkeypatch.setattr(click, "secho", lambda msg, **__: captured.append(msg))
    result = runner.invoke(cli_tools_click.main, ["deploy", "cloud_run", str(agent_dir)])
-    assert result.exit_code == 0
+  agent_dir = tmp_path / "agent3"
-    assert any("Deploy failed: boom" in m for m in captured)
+  agent_dir.mkdir()
  runner = CliRunner()
  result = runner.invoke(
      cli_tools_click.main, ["deploy", "cloud_run", str(agent_dir)]
  )
  assert result.exit_code == 0
  assert any("Deploy failed: boom" in m for m in captured)
 # cli eval
-def test_cli_eval_missing_deps_raises(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+def test_cli_eval_missing_deps_raises(
-    """If cli_eval sub-module is missing, command should raise ClickException."""
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
-    # Ensure .cli_eval is not importable
+) -> None:
-    orig_import = builtins.__import__
+  """If cli_eval sub-module is missing, command should raise ClickException."""
  # Ensure .cli_eval is not importable
  orig_import = builtins.__import__
-    def _fake_import(name: str, *a: Any, **k: Any):
+  def _fake_import(name: str, *a: Any, **k: Any):
-        if name.endswith(".cli_eval") or name == "google.adk.cli.cli_eval":
+    if name.endswith(".cli_eval") or name == "google.adk.cli.cli_eval":
-            raise ModuleNotFoundError()
+      raise ModuleNotFoundError()
-        return orig_import(name, *a, **k)
+    return orig_import(name, *a, **k)
-    monkeypatch.setattr(builtins, "__import__", _fake_import)
+  monkeypatch.setattr(builtins, "__import__", _fake_import)
 # cli web & api_server (uvicorn patched)
@pytest.fixture()
 def _patch_uvicorn(monkeypatch: pytest.MonkeyPatch) -> _Recorder:
-    """Patch uvicorn.Config/Server to avoid real network operations."""
+  """Patch uvicorn.Config/Server to avoid real network operations."""
-    rec = _Recorder()
+  rec = _Recorder()
-    class _DummyServer:
+  class _DummyServer:
        def __init__(self, *a: Any, **k: Any) -> None: ...
        def run(self) -> None:
            rec()
-    monkeypatch.setattr(cli_tools_click.uvicorn, "Config", lambda *a, **k: object())
+    def __init__(self, *a: Any, **k: Any) -> None:
-    monkeypatch.setattr(cli_tools_click.uvicorn, "Server", lambda *_a, **_k: _DummyServer())
+      ...
-    monkeypatch.setattr(cli_tools_click, "get_fast_api_app", lambda **_k: object())
+
-    return rec
+    def run(self) -> None:
      rec()
  monkeypatch.setattr(
      cli_tools_click.uvicorn, "Config", lambda *a, **k: object()
  )
  monkeypatch.setattr(
      cli_tools_click.uvicorn, "Server", lambda *_a, **_k: _DummyServer()
  )
  monkeypatch.setattr(
      cli_tools_click, "get_fast_api_app", lambda **_k: object()
  )
  return rec
-def test_cli_web_invokes_uvicorn(tmp_path: Path, _patch_uvicorn: _Recorder) -> None:
+def test_cli_web_invokes_uvicorn(
-    """`adk web` should configure and start uvicorn.Server.run."""
+    tmp_path: Path, _patch_uvicorn: _Recorder
-    agents_dir = tmp_path / "agents"
+) -> None:
-    agents_dir.mkdir()
+  """`adk web` should configure and start uvicorn.Server.run."""
-    runner = CliRunner()
+  agents_dir = tmp_path / "agents"
-    result = runner.invoke(cli_tools_click.main, ["web", str(agents_dir)])
+  agents_dir.mkdir()
-    assert result.exit_code == 0
+  runner = CliRunner()
-    assert _patch_uvicorn.calls, "uvicorn.Server.run must be called"
+  result = runner.invoke(cli_tools_click.main, ["web", str(agents_dir)])
  assert result.exit_code == 0
  assert _patch_uvicorn.calls, "uvicorn.Server.run must be called"
-def test_cli_api_server_invokes_uvicorn(tmp_path: Path, _patch_uvicorn: _Recorder) -> None:
+def test_cli_api_server_invokes_uvicorn(
-    """`adk api_server` should configure and start uvicorn.Server.run."""
+    tmp_path: Path, _patch_uvicorn: _Recorder
-    agents_dir = tmp_path / "agents_api"
+) -> None:
-    agents_dir.mkdir()
+  """`adk api_server` should configure and start uvicorn.Server.run."""
-    runner = CliRunner()
+  agents_dir = tmp_path / "agents_api"
-    result = runner.invoke(cli_tools_click.main, ["api_server", str(agents_dir)])
+  agents_dir.mkdir()
-    assert result.exit_code == 0
+  runner = CliRunner()
-    assert _patch_uvicorn.calls, "uvicorn.Server.run must be called"
+  result = runner.invoke(cli_tools_click.main, ["api_server", str(agents_dir)])
  assert result.exit_code == 0
  assert _patch_uvicorn.calls, "uvicorn.Server.run must be called"
-def test_cli_eval_success_path(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+def test_cli_eval_success_path(
-    """Test the success path of `adk eval` by fully executing it with a stub module, up to summary generation."""
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
-    import sys, types
+) -> None:
  """Test the success path of `adk eval` by fully executing it with a stub module, up to summary generation."""
  import asyncio
  import sys
  import types
-    # stub cli_eval module
+  # stub cli_eval module
-    stub = types.ModuleType("google.adk.cli.cli_eval")
+  stub = types.ModuleType("google.adk.cli.cli_eval")
-    class _EvalMetric:
+  class _EvalMetric:
        def __init__(self, metric_name: str, threshold: float) -> None: ...
-    class _EvalResult:
+    def __init__(self, metric_name: str, threshold: float) -> None:
-        def __init__(self, eval_set_file: str, final_eval_status: str) -> None:
+      ...
            self.eval_set_file = eval_set_file
            self.final_eval_status = final_eval_status
-    # minimal enum-like namespace
+  class _EvalResult:
    _EvalStatus = types.SimpleNamespace(PASSED="PASSED", FAILED="FAILED")
-    # helper funcs
+    def __init__(self, eval_set_file: str, final_eval_status: str) -> None:
-    stub.EvalMetric = _EvalMetric
+      self.eval_set_file = eval_set_file
-    stub.EvalResult = _EvalResult
+      self.final_eval_status = final_eval_status
    stub.EvalStatus = _EvalStatus
    stub.MISSING_EVAL_DEPENDENCIES_MESSAGE = "stub msg"
-    stub.get_evaluation_criteria_or_default = lambda _p: {"foo": 1.0}
+  # minimal enum-like namespace
-    stub.get_root_agent = lambda _p: object()
+  _EvalStatus = types.SimpleNamespace(PASSED="PASSED", FAILED="FAILED")
    stub.try_get_reset_func = lambda _p: None
    stub.parse_and_get_evals_to_run = lambda _paths: {"set1.json": ["e1", "e2"]}
    stub.run_evals = lambda *_a, **_k: iter(
        [_EvalResult("set1.json", "PASSED"), _EvalResult("set1.json", "FAILED")]
    )
-    monkeypatch.setattr(cli_tools_click.asyncio, "run", lambda coro: list(coro))
+  # helper funcs
  stub.EvalMetric = _EvalMetric
  stub.EvalResult = _EvalResult
  stub.EvalStatus = _EvalStatus
  stub.MISSING_EVAL_DEPENDENCIES_MESSAGE = "stub msg"
-    # inject stub
+  stub.get_evaluation_criteria_or_default = lambda _p: {"foo": 1.0}
-    sys.modules["google.adk.cli.cli_eval"] = stub
+  stub.get_root_agent = lambda _p: object()
  stub.try_get_reset_func = lambda _p: None
  stub.parse_and_get_evals_to_run = lambda _paths: {"set1.json": ["e1", "e2"]}
-    # create dummy agent directory
+  # Create an async generator function for run_evals
-    agent_dir = tmp_path / "agent5"
+  async def mock_run_evals(*_a, **_k):
-    agent_dir.mkdir()
+    yield _EvalResult("set1.json", "PASSED")
-    (agent_dir / "__init__.py").touch()
+    yield _EvalResult("set1.json", "FAILED")
-    # inject monkeypatch
+  stub.run_evals = mock_run_evals
    monkeypatch.setattr(cli_tools_click.envs, "load_dotenv_for_agent", lambda *a, **k: None)
-    runner = CliRunner()
+  # Replace asyncio.run with a function that properly handles coroutines
-    result = runner.invoke(
+  def mock_asyncio_run(coro):
-        cli_tools_click.main,
+    # Create a new event loop
-        ["eval", str(agent_dir), str(tmp_path / "dummy_eval.json")],
+    loop = asyncio.new_event_loop()
-    )
+    try:
      return loop.run_until_complete(coro)
    finally:
      loop.close()
-    assert result.exit_code == 0
+  monkeypatch.setattr(cli_tools_click.asyncio, "run", mock_asyncio_run)
-    assert "Eval Run Summary" in result.output
+
-    assert "Tests passed: 1" in result.output
+  # inject stub
-    assert "Tests failed: 1" in result.output
+  sys.modules["google.adk.cli.cli_eval"] = stub
  # create dummy agent directory
  agent_dir = tmp_path / "agent5"
  agent_dir.mkdir()
  (agent_dir / "__init__.py").touch()
  # inject monkeypatch
  monkeypatch.setattr(
      cli_tools_click.envs, "load_dotenv_for_agent", lambda *a, **k: None
  )
  runner = CliRunner()
  result = runner.invoke(
      cli_tools_click.main,
      ["eval", str(agent_dir), str(tmp_path / "dummy_eval.json")],
  )
  assert result.exit_code == 0
  assert "Eval Run Summary" in result.output
  assert "Tests passed: 1" in result.output
  assert "Tests failed: 1" in result.output