fix eval unittests failure

PiperOrigin-RevId: 757872670
2025-07-14 01:41:25 -06:00 · 2025-05-12 12:41:44 -07:00 · 2025-05-12 12:41:44 -07:00 · 5462862795
commit 5462862795
parent c39f24f072
2 changed files with 233 additions and 183 deletions
--- a/src/google/adk/cli/cli_tools_click.py
+++ b/src/google/adk/cli/cli_tools_click.py
@ -269,25 +269,20 @@ def cli_eval(

  eval_set_to_evals = parse_and_get_evals_to_run(eval_set_file_path)

-  async def _collect_async_gen(
-      async_gen_coroutine: Coroutine[
-          AsyncGenerator[EvalResult, None], None, None
-      ],
-  ) -> list[EvalResult]:
-    return [result async for result in async_gen_coroutine]
+  async def _collect_eval_results() -> list[EvalResult]:
+    return [
+        result
+        async for result in run_evals(
+            eval_set_to_evals,
+            root_agent,
+            reset_func,
+            eval_metrics,
+            print_detailed_results=print_detailed_results,
+        )
+    ]

  try:
-    eval_results = asyncio.run(
-        _collect_async_gen(
-            run_evals(
-                eval_set_to_evals,
-                root_agent,
-                reset_func,
-                eval_metrics,
-                print_detailed_results=print_detailed_results,
-            )
-        )
-    )
+    eval_results = asyncio.run(_collect_eval_results())
  except ModuleNotFoundError:
    raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)

--- a/tests/unittests/cli/utils/test_cli_tools_click.py
+++ b/tests/unittests/cli/utils/test_cli_tools_click.py
@ -18,241 +18,296 @@
 from __future__ import annotations

 import builtins
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Tuple
+
 import click
+from click.testing import CliRunner
+from google.adk.cli import cli_tools_click
 import pytest

-from google.adk.cli import cli_tools_click
-
-from pathlib import Path
-from typing import Any, Dict, List, Tuple
-from types import SimpleNamespace
-from click.testing import CliRunner

 # Helpers
 class _Recorder:
-    """Callable that records every invocation."""
+  """Callable that records every invocation."""

-    def __init__(self) -> None:
-        self.calls: List[Tuple[Tuple[Any, ...], Dict[str, Any]]] = []
+  def __init__(self) -> None:
+    self.calls: List[Tuple[Tuple[Any, ...], Dict[str, Any]]] = []

-    def __call__(self, *args: Any, **kwargs: Any) -> None:  # noqa: D401
-        self.calls.append((args, kwargs))
+  def __call__(self, *args: Any, **kwargs: Any) -> None:  # noqa: D401
+    self.calls.append((args, kwargs))


 # Fixtures
@pytest.fixture(autouse=True)
 def _mute_click(monkeypatch: pytest.MonkeyPatch) -> None:
-    """Suppress click output during tests."""
-    monkeypatch.setattr(click, "echo", lambda *a, **k: None)
-    monkeypatch.setattr(click, "secho", lambda *a, **k: None)
+  """Suppress click output during tests."""
+  monkeypatch.setattr(click, "echo", lambda *a, **k: None)
+  monkeypatch.setattr(click, "secho", lambda *a, **k: None)


 # validate_exclusive
 def test_validate_exclusive_allows_single() -> None:
-    """Providing exactly one exclusive option should pass."""
-    ctx = click.Context(cli_tools_click.main)
-    param = SimpleNamespace(name="replay")
-    assert cli_tools_click.validate_exclusive(ctx, param, "file.json") == "file.json"
+  """Providing exactly one exclusive option should pass."""
+  ctx = click.Context(cli_tools_click.main)
+  param = SimpleNamespace(name="replay")
+  assert (
+      cli_tools_click.validate_exclusive(ctx, param, "file.json") == "file.json"
+  )


 def test_validate_exclusive_blocks_multiple() -> None:
-    """Providing two exclusive options should raise UsageError."""
-    ctx = click.Context(cli_tools_click.main)
-    param1 = SimpleNamespace(name="replay")
-    param2 = SimpleNamespace(name="resume")
+  """Providing two exclusive options should raise UsageError."""
+  ctx = click.Context(cli_tools_click.main)
+  param1 = SimpleNamespace(name="replay")
+  param2 = SimpleNamespace(name="resume")

-    # First option registers fine
-    cli_tools_click.validate_exclusive(ctx, param1, "replay.json")
+  # First option registers fine
+  cli_tools_click.validate_exclusive(ctx, param1, "replay.json")

-    # Second option triggers conflict
-    with pytest.raises(click.UsageError):
-        cli_tools_click.validate_exclusive(ctx, param2, "resume.json")
+  # Second option triggers conflict
+  with pytest.raises(click.UsageError):
+    cli_tools_click.validate_exclusive(ctx, param2, "resume.json")


 # cli create
-def test_cli_create_cmd_invokes_run_cmd(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
-    """`adk create` should forward arguments to cli_create.run_cmd."""
-    rec = _Recorder()
-    monkeypatch.setattr(cli_tools_click.cli_create, "run_cmd", rec)
+def test_cli_create_cmd_invokes_run_cmd(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+  """`adk create` should forward arguments to cli_create.run_cmd."""
+  rec = _Recorder()
+  monkeypatch.setattr(cli_tools_click.cli_create, "run_cmd", rec)

-    app_dir = tmp_path / "my_app"
-    runner = CliRunner()
-    result = runner.invoke(
-        cli_tools_click.main,
-        ["create", "--model", "gemini", "--api_key", "key123", str(app_dir)],
-    )
-    assert result.exit_code == 0
-    assert rec.calls, "cli_create.run_cmd must be called"
+  app_dir = tmp_path / "my_app"
+  runner = CliRunner()
+  result = runner.invoke(
+      cli_tools_click.main,
+      ["create", "--model", "gemini", "--api_key", "key123", str(app_dir)],
+  )
+  assert result.exit_code == 0
+  assert rec.calls, "cli_create.run_cmd must be called"


 # cli run
@pytest.mark.asyncio
-async def test_cli_run_invokes_run_cli(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
-    """`adk run` should call run_cli via asyncio.run with correct parameters."""
-    rec = _Recorder()
-    monkeypatch.setattr(cli_tools_click, "run_cli", lambda **kwargs: rec(kwargs))
-    monkeypatch.setattr(cli_tools_click.asyncio, "run", lambda coro: coro)  # pass-through
+async def test_cli_run_invokes_run_cli(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+  """`adk run` should call run_cli via asyncio.run with correct parameters."""
+  rec = _Recorder()
+  monkeypatch.setattr(cli_tools_click, "run_cli", lambda **kwargs: rec(kwargs))
+  monkeypatch.setattr(
+      cli_tools_click.asyncio, "run", lambda coro: coro
+  )  # pass-through

-    # create dummy agent directory
-    agent_dir = tmp_path / "agent"
-    agent_dir.mkdir()
-    (agent_dir / "__init__.py").touch()
-    (agent_dir / "agent.py").touch()
+  # create dummy agent directory
+  agent_dir = tmp_path / "agent"
+  agent_dir.mkdir()
+  (agent_dir / "__init__.py").touch()
+  (agent_dir / "agent.py").touch()

-    runner = CliRunner()
-    result = runner.invoke(cli_tools_click.main, ["run", str(agent_dir)])
-    assert result.exit_code == 0
-    assert rec.calls and rec.calls[0][0][0]["agent_folder_name"] == "agent"
+  runner = CliRunner()
+  result = runner.invoke(cli_tools_click.main, ["run", str(agent_dir)])
+  assert result.exit_code == 0
+  assert rec.calls and rec.calls[0][0][0]["agent_folder_name"] == "agent"


 # cli deploy cloud_run
-def test_cli_deploy_cloud_run_success(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
-    """Successful path should call cli_deploy.to_cloud_run once."""
-    rec = _Recorder()
-    monkeypatch.setattr(cli_tools_click.cli_deploy, "to_cloud_run", rec)
+def test_cli_deploy_cloud_run_success(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+  """Successful path should call cli_deploy.to_cloud_run once."""
+  rec = _Recorder()
+  monkeypatch.setattr(cli_tools_click.cli_deploy, "to_cloud_run", rec)

-    agent_dir = tmp_path / "agent2"
-    agent_dir.mkdir()
-    runner = CliRunner()
-    result = runner.invoke(
-        cli_tools_click.main,
-        [
-            "deploy",
-            "cloud_run",
-            "--project",
-            "proj",
-            "--region",
-            "asia-northeast1",
-            str(agent_dir),
-        ],
-    )
-    assert result.exit_code == 0
-    assert rec.calls, "cli_deploy.to_cloud_run must be invoked"
+  agent_dir = tmp_path / "agent2"
+  agent_dir.mkdir()
+  runner = CliRunner()
+  result = runner.invoke(
+      cli_tools_click.main,
+      [
+          "deploy",
+          "cloud_run",
+          "--project",
+          "proj",
+          "--region",
+          "asia-northeast1",
+          str(agent_dir),
+      ],
+  )
+  assert result.exit_code == 0
+  assert rec.calls, "cli_deploy.to_cloud_run must be invoked"


-def test_cli_deploy_cloud_run_failure(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
-    """Exception from to_cloud_run should be caught and surfaced via click.secho."""
-    def _boom(*_a: Any, **_k: Any) -> None:  # noqa: D401
-        raise RuntimeError("boom")
+def test_cli_deploy_cloud_run_failure(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+  """Exception from to_cloud_run should be caught and surfaced via click.secho."""

-    monkeypatch.setattr(cli_tools_click.cli_deploy, "to_cloud_run", _boom)
+  def _boom(*_a: Any, **_k: Any) -> None:  # noqa: D401
+    raise RuntimeError("boom")

-    # intercept click.secho(error=True) output
-    captured: List[str] = []
-    monkeypatch.setattr(click, "secho", lambda msg, **__: captured.append(msg))
+  monkeypatch.setattr(cli_tools_click.cli_deploy, "to_cloud_run", _boom)

-    agent_dir = tmp_path / "agent3"
-    agent_dir.mkdir()
-    runner = CliRunner()
-    result = runner.invoke(cli_tools_click.main, ["deploy", "cloud_run", str(agent_dir)])
+  # intercept click.secho(error=True) output
+  captured: List[str] = []
+  monkeypatch.setattr(click, "secho", lambda msg, **__: captured.append(msg))

-    assert result.exit_code == 0
-    assert any("Deploy failed: boom" in m for m in captured)
+  agent_dir = tmp_path / "agent3"
+  agent_dir.mkdir()
+  runner = CliRunner()
+  result = runner.invoke(
+      cli_tools_click.main, ["deploy", "cloud_run", str(agent_dir)]
+  )
+
+  assert result.exit_code == 0
+  assert any("Deploy failed: boom" in m for m in captured)


 # cli eval
-def test_cli_eval_missing_deps_raises(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
-    """If cli_eval sub-module is missing, command should raise ClickException."""
-    # Ensure .cli_eval is not importable
-    orig_import = builtins.__import__
+def test_cli_eval_missing_deps_raises(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+  """If cli_eval sub-module is missing, command should raise ClickException."""
+  # Ensure .cli_eval is not importable
+  orig_import = builtins.__import__

-    def _fake_import(name: str, *a: Any, **k: Any):
-        if name.endswith(".cli_eval") or name == "google.adk.cli.cli_eval":
-            raise ModuleNotFoundError()
-        return orig_import(name, *a, **k)
+  def _fake_import(name: str, *a: Any, **k: Any):
+    if name.endswith(".cli_eval") or name == "google.adk.cli.cli_eval":
+      raise ModuleNotFoundError()
+    return orig_import(name, *a, **k)

-    monkeypatch.setattr(builtins, "__import__", _fake_import)
+  monkeypatch.setattr(builtins, "__import__", _fake_import)


 # cli web & api_server (uvicorn patched)
@pytest.fixture()
 def _patch_uvicorn(monkeypatch: pytest.MonkeyPatch) -> _Recorder:
-    """Patch uvicorn.Config/Server to avoid real network operations."""
-    rec = _Recorder()
+  """Patch uvicorn.Config/Server to avoid real network operations."""
+  rec = _Recorder()

-    class _DummyServer:
-        def __init__(self, *a: Any, **k: Any) -> None: ...
-        def run(self) -> None:
-            rec()
+  class _DummyServer:

-    monkeypatch.setattr(cli_tools_click.uvicorn, "Config", lambda *a, **k: object())
-    monkeypatch.setattr(cli_tools_click.uvicorn, "Server", lambda *_a, **_k: _DummyServer())
-    monkeypatch.setattr(cli_tools_click, "get_fast_api_app", lambda **_k: object())
-    return rec
+    def __init__(self, *a: Any, **k: Any) -> None:
+      ...
+
+    def run(self) -> None:
+      rec()
+
+  monkeypatch.setattr(
+      cli_tools_click.uvicorn, "Config", lambda *a, **k: object()
+  )
+  monkeypatch.setattr(
+      cli_tools_click.uvicorn, "Server", lambda *_a, **_k: _DummyServer()
+  )
+  monkeypatch.setattr(
+      cli_tools_click, "get_fast_api_app", lambda **_k: object()
+  )
+  return rec


-def test_cli_web_invokes_uvicorn(tmp_path: Path, _patch_uvicorn: _Recorder) -> None:
-    """`adk web` should configure and start uvicorn.Server.run."""
-    agents_dir = tmp_path / "agents"
-    agents_dir.mkdir()
-    runner = CliRunner()
-    result = runner.invoke(cli_tools_click.main, ["web", str(agents_dir)])
-    assert result.exit_code == 0
-    assert _patch_uvicorn.calls, "uvicorn.Server.run must be called"
+def test_cli_web_invokes_uvicorn(
+    tmp_path: Path, _patch_uvicorn: _Recorder
+) -> None:
+  """`adk web` should configure and start uvicorn.Server.run."""
+  agents_dir = tmp_path / "agents"
+  agents_dir.mkdir()
+  runner = CliRunner()
+  result = runner.invoke(cli_tools_click.main, ["web", str(agents_dir)])
+  assert result.exit_code == 0
+  assert _patch_uvicorn.calls, "uvicorn.Server.run must be called"


-def test_cli_api_server_invokes_uvicorn(tmp_path: Path, _patch_uvicorn: _Recorder) -> None:
-    """`adk api_server` should configure and start uvicorn.Server.run."""
-    agents_dir = tmp_path / "agents_api"
-    agents_dir.mkdir()
-    runner = CliRunner()
-    result = runner.invoke(cli_tools_click.main, ["api_server", str(agents_dir)])
-    assert result.exit_code == 0
-    assert _patch_uvicorn.calls, "uvicorn.Server.run must be called"
+def test_cli_api_server_invokes_uvicorn(
+    tmp_path: Path, _patch_uvicorn: _Recorder
+) -> None:
+  """`adk api_server` should configure and start uvicorn.Server.run."""
+  agents_dir = tmp_path / "agents_api"
+  agents_dir.mkdir()
+  runner = CliRunner()
+  result = runner.invoke(cli_tools_click.main, ["api_server", str(agents_dir)])
+  assert result.exit_code == 0
+  assert _patch_uvicorn.calls, "uvicorn.Server.run must be called"


-def test_cli_eval_success_path(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
-    """Test the success path of `adk eval` by fully executing it with a stub module, up to summary generation."""
-    import sys, types
+def test_cli_eval_success_path(
+    tmp_path: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+  """Test the success path of `adk eval` by fully executing it with a stub module, up to summary generation."""
+  import asyncio
+  import sys
+  import types

-    # stub cli_eval module
-    stub = types.ModuleType("google.adk.cli.cli_eval")
+  # stub cli_eval module
+  stub = types.ModuleType("google.adk.cli.cli_eval")

-    class _EvalMetric:
-        def __init__(self, metric_name: str, threshold: float) -> None: ...
+  class _EvalMetric:

-    class _EvalResult:
-        def __init__(self, eval_set_file: str, final_eval_status: str) -> None:
-            self.eval_set_file = eval_set_file
-            self.final_eval_status = final_eval_status
+    def __init__(self, metric_name: str, threshold: float) -> None:
+      ...

-    # minimal enum-like namespace
-    _EvalStatus = types.SimpleNamespace(PASSED="PASSED", FAILED="FAILED")
+  class _EvalResult:

-    # helper funcs
-    stub.EvalMetric = _EvalMetric
-    stub.EvalResult = _EvalResult
-    stub.EvalStatus = _EvalStatus
-    stub.MISSING_EVAL_DEPENDENCIES_MESSAGE = "stub msg"
+    def __init__(self, eval_set_file: str, final_eval_status: str) -> None:
+      self.eval_set_file = eval_set_file
+      self.final_eval_status = final_eval_status

-    stub.get_evaluation_criteria_or_default = lambda _p: {"foo": 1.0}
-    stub.get_root_agent = lambda _p: object()
-    stub.try_get_reset_func = lambda _p: None
-    stub.parse_and_get_evals_to_run = lambda _paths: {"set1.json": ["e1", "e2"]}
-    stub.run_evals = lambda *_a, **_k: iter(
-        [_EvalResult("set1.json", "PASSED"), _EvalResult("set1.json", "FAILED")]
-    )
+  # minimal enum-like namespace
+  _EvalStatus = types.SimpleNamespace(PASSED="PASSED", FAILED="FAILED")

-    monkeypatch.setattr(cli_tools_click.asyncio, "run", lambda coro: list(coro))
+  # helper funcs
+  stub.EvalMetric = _EvalMetric
+  stub.EvalResult = _EvalResult
+  stub.EvalStatus = _EvalStatus
+  stub.MISSING_EVAL_DEPENDENCIES_MESSAGE = "stub msg"

-    # inject stub
-    sys.modules["google.adk.cli.cli_eval"] = stub
+  stub.get_evaluation_criteria_or_default = lambda _p: {"foo": 1.0}
+  stub.get_root_agent = lambda _p: object()
+  stub.try_get_reset_func = lambda _p: None
+  stub.parse_and_get_evals_to_run = lambda _paths: {"set1.json": ["e1", "e2"]}

-    # create dummy agent directory
-    agent_dir = tmp_path / "agent5"
-    agent_dir.mkdir()
-    (agent_dir / "__init__.py").touch()
+  # Create an async generator function for run_evals
+  async def mock_run_evals(*_a, **_k):
+    yield _EvalResult("set1.json", "PASSED")
+    yield _EvalResult("set1.json", "FAILED")

-    # inject monkeypatch
-    monkeypatch.setattr(cli_tools_click.envs, "load_dotenv_for_agent", lambda *a, **k: None)
+  stub.run_evals = mock_run_evals

-    runner = CliRunner()
-    result = runner.invoke(
-        cli_tools_click.main,
-        ["eval", str(agent_dir), str(tmp_path / "dummy_eval.json")],
-    )
+  # Replace asyncio.run with a function that properly handles coroutines
+  def mock_asyncio_run(coro):
+    # Create a new event loop
+    loop = asyncio.new_event_loop()
+    try:
+      return loop.run_until_complete(coro)
+    finally:
+      loop.close()

-    assert result.exit_code == 0
-    assert "Eval Run Summary" in result.output
-    assert "Tests passed: 1" in result.output
-    assert "Tests failed: 1" in result.output
+  monkeypatch.setattr(cli_tools_click.asyncio, "run", mock_asyncio_run)
+
+  # inject stub
+  sys.modules["google.adk.cli.cli_eval"] = stub
+
+  # create dummy agent directory
+  agent_dir = tmp_path / "agent5"
+  agent_dir.mkdir()
+  (agent_dir / "__init__.py").touch()
+
+  # inject monkeypatch
+  monkeypatch.setattr(
+      cli_tools_click.envs, "load_dotenv_for_agent", lambda *a, **k: None
+  )
+
+  runner = CliRunner()
+  result = runner.invoke(
+      cli_tools_click.main,
+      ["eval", str(agent_dir), str(tmp_path / "dummy_eval.json")],
+  )
+
+  assert result.exit_code == 0
+  assert "Eval Run Summary" in result.output
+  assert "Tests passed: 1" in result.output
+  assert "Tests failed: 1" in result.output