refactor: refactor evaluation to make cli module depends on evaluation module. Modules outside of cli module should not reference cli module

PiperOrigin-RevId: 763577749
2025-07-13 15:14:50 -06:00 · 2025-05-26 19:18:07 -07:00 · 2025-05-26 19:18:07 -07:00 · 7c2df7e4f6
commit 7c2df7e4f6
parent d43c80b718
6 changed files with 178 additions and 92 deletions
--- a/src/google/adk/cli/cli_eval.py
+++ b/src/google/adk/cli/cli_eval.py
@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import annotations
+
 import importlib.util
 import json
 import logging
@ -22,98 +24,20 @@ from typing import AsyncGenerator
 from typing import Optional
 import uuid

-from pydantic import Field
-
 from ..agents import Agent
 from ..artifacts.base_artifact_service import BaseArtifactService
 from ..evaluation.eval_case import EvalCase
-from ..evaluation.eval_case import Invocation
+from ..evaluation.eval_metrics import EvalMetric
+from ..evaluation.eval_metrics import EvalMetricResult
+from ..evaluation.eval_metrics import EvalMetricResultPerInvocation
+from ..evaluation.eval_result import EvalCaseResult
 from ..evaluation.evaluator import EvalStatus
 from ..evaluation.evaluator import Evaluator
 from ..sessions.base_session_service import BaseSessionService
-from ..sessions.session import Session
-from .utils import common

 logger = logging.getLogger("google_adk." + __name__)


-class EvalMetric(common.BaseModel):
-  """A metric used to evaluate a particular aspect of an eval case."""
-
-  metric_name: str
-  """The name of the metric."""
-
-  threshold: float
-  """A threshold value. Each metric decides how to interpret this threshold."""
-
-
-class EvalMetricResult(EvalMetric):
-  """The actual computed score/value of a particular EvalMetric."""
-
-  score: Optional[float] = None
-  eval_status: EvalStatus
-
-
-class EvalMetricResultPerInvocation(common.BaseModel):
-  """Eval metric results per invocation."""
-
-  actual_invocation: Invocation
-  """The actual invocation, usually obtained by inferencing the agent."""
-
-  expected_invocation: Invocation
-  """The expected invocation, usually the reference or golden invocation."""
-
-  eval_metric_results: list[EvalMetricResult] = []
-  """Eval resutls for each applicable metric."""
-
-
-class EvalCaseResult(common.BaseModel):
-  """Case-level evaluation results."""
-
-  eval_set_file: str = Field(
-      deprecated=True,
-      description="This field is deprecated, use eval_set_id instead.",
-  )
-  eval_set_id: str = ""
-  """The eval set id."""
-
-  eval_id: str = ""
-  """The eval case id."""
-
-  final_eval_status: EvalStatus
-  """Final eval status for this eval case."""
-
-  eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
-      deprecated=True,
-      description=(
-          "This field is deprecated, use overall_eval_metric_results instead."
-      ),
-  )
-
-  overall_eval_metric_results: list[EvalMetricResult]
-  """Overall result for each metric for the entire eval case."""
-
-  eval_metric_result_per_invocation: list[EvalMetricResultPerInvocation]
-  """Result for each metric on a per invocation basis."""
-
-  session_id: str
-  """Session id of the session generated as result of inferencing/scraping stage of the eval."""
-
-  session_details: Optional[Session] = None
-  """Session generated as result of inferencing/scraping stage of the eval."""
-
-  user_id: Optional[str] = None
-  """User id used during inferencing/scraping stage of the eval."""
-
-
-class EvalSetResult(common.BaseModel):
-  eval_set_result_id: str
-  eval_set_result_name: str
-  eval_set_id: str
-  eval_case_results: list[EvalCaseResult] = Field(default_factory=list)
-  creation_timestamp: float = 0.0
-
-
 MISSING_EVAL_DEPENDENCIES_MESSAGE = (
    "Eval module is not installed, please install via `pip install"
    " google-adk[eval]`."
@ -227,8 +151,6 @@ async def run_evals(
  """
  try:
    from ..evaluation.agent_evaluator import EvaluationGenerator
-    from ..evaluation.response_evaluator import ResponseEvaluator
-    from ..evaluation.trajectory_evaluator import TrajectoryEvaluator
  except ModuleNotFoundError as e:
    raise ModuleNotFoundError(MISSING_EVAL_DEPENDENCIES_MESSAGE) from e

--- a/src/google/adk/cli/fast_api.py
+++ b/src/google/adk/cli/fast_api.py
@ -13,6 +13,8 @@
 # limitations under the License.


+from __future__ import annotations
+
 import asyncio
 from contextlib import asynccontextmanager
 import importlib
@ -59,6 +61,10 @@ from ..agents.run_config import StreamingMode
 from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
 from ..evaluation.eval_case import EvalCase
 from ..evaluation.eval_case import SessionInput
+from ..evaluation.eval_metrics import EvalMetric
+from ..evaluation.eval_metrics import EvalMetricResult
+from ..evaluation.eval_metrics import EvalMetricResultPerInvocation
+from ..evaluation.eval_result import EvalSetResult
 from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
 from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
 from ..events.event import Event
@ -69,10 +75,6 @@ from ..sessions.in_memory_session_service import InMemorySessionService
 from ..sessions.session import Session
 from ..sessions.vertex_ai_session_service import VertexAiSessionService
 from .cli_eval import EVAL_SESSION_ID_PREFIX
-from .cli_eval import EvalMetric
-from .cli_eval import EvalMetricResult
-from .cli_eval import EvalMetricResultPerInvocation
-from .cli_eval import EvalSetResult
 from .cli_eval import EvalStatus
 from .utils import cleanup
 from .utils import common
--- a/src/google/adk/evaluation/eval_metrics.py
+++ b/src/google/adk/evaluation/eval_metrics.py
@ -0,0 +1,72 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Optional
+
+from pydantic import alias_generators
+from pydantic import BaseModel
+from pydantic import ConfigDict
+
+from .eval_case import Invocation
+from .evaluator import EvalStatus
+
+
+class EvalMetric(BaseModel):
+  """A metric used to evaluate a particular aspect of an eval case."""
+
+  model_config = ConfigDict(
+      alias_generator=alias_generators.to_camel,
+      populate_by_name=True,
+  )
+
+  model_config = ConfigDict(
+      alias_generator=alias_generators.to_camel,
+      populate_by_name=True,
+  )
+
+  metric_name: str
+  """The name of the metric."""
+
+  threshold: float
+  """A threshold value. Each metric decides how to interpret this threshold."""
+
+
+class EvalMetricResult(EvalMetric):
+  """The actual computed score/value of a particular EvalMetric."""
+
+  model_config = ConfigDict(
+      alias_generator=alias_generators.to_camel,
+      populate_by_name=True,
+  )
+  model_config = ConfigDict(
+      alias_generator=alias_generators.to_camel,
+      populate_by_name=True,
+  )
+  score: Optional[float] = None
+  eval_status: EvalStatus
+
+
+class EvalMetricResultPerInvocation(BaseModel):
+  """Eval metric results per invocation."""
+
+  actual_invocation: Invocation
+  """The actual invocation, usually obtained by inferencing the agent."""
+
+  expected_invocation: Invocation
+  """The expected invocation, usually the reference or golden invocation."""
+
+  eval_metric_results: list[EvalMetricResult] = []
+  """Eval resutls for each applicable metric."""
--- a/src/google/adk/evaluation/eval_result.py
+++ b/src/google/adk/evaluation/eval_result.py
@ -0,0 +1,86 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from typing import Optional
+
+from pydantic import alias_generators
+from pydantic import BaseModel
+from pydantic import ConfigDict
+from pydantic import Field
+
+from ..sessions.session import Session
+from .eval_metrics import EvalMetric
+from .eval_metrics import EvalMetricResult
+from .eval_metrics import EvalMetricResultPerInvocation
+from .evaluator import EvalStatus
+
+
+class EvalCaseResult(BaseModel):
+  """Case level evaluation results."""
+
+  model_config = ConfigDict(
+      alias_generator=alias_generators.to_camel,
+      populate_by_name=True,
+  )
+
+  eval_set_file: str = Field(
+      deprecated=True,
+      description="This field is deprecated, use eval_set_id instead.",
+  )
+  eval_set_id: str = ""
+  """The eval set id."""
+
+  eval_id: str = ""
+  """The eval case id."""
+
+  final_eval_status: EvalStatus
+  """Final eval status for this eval case."""
+
+  eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
+      deprecated=True,
+      description=(
+          "This field is deprecated, use overall_eval_metric_results instead."
+      ),
+  )
+
+  overall_eval_metric_results: list[EvalMetricResult]
+  """Overall result for each metric for the entire eval case."""
+
+  eval_metric_result_per_invocation: list[EvalMetricResultPerInvocation]
+  """Result for each metric on a per invocation basis."""
+
+  session_id: str
+  """Session id of the session generated as result of inferencing/scraping stage of the eval."""
+
+  session_details: Optional[Session] = None
+  """Session generated as result of inferencing/scraping stage of the eval."""
+
+  user_id: Optional[str] = None
+  """User id used during inferencing/scraping stage of the eval."""
+
+
+class EvalSetResult(BaseModel):
+  """Eval set level evaluation results."""
+
+  model_config = ConfigDict(
+      alias_generator=alias_generators.to_camel,
+      populate_by_name=True,
+  )
+  eval_set_result_id: str
+  eval_set_result_name: str
+  eval_set_id: str
+  eval_case_results: list[EvalCaseResult] = Field(default_factory=list)
+  creation_timestamp: float = 0.0
--- a/src/google/adk/evaluation/eval_set_results_manager.py
+++ b/src/google/adk/evaluation/eval_set_results_manager.py
@ -12,11 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import annotations
+
 from abc import ABC
 from abc import abstractmethod

-from ..cli.cli_eval import EvalCaseResult
-from ..cli.cli_eval import EvalSetResult
+from .eval_result import EvalCaseResult
+from .eval_result import EvalSetResult


 class EvalSetResultsManager(ABC):
--- a/src/google/adk/evaluation/local_eval_set_results_manager.py
+++ b/src/google/adk/evaluation/local_eval_set_results_manager.py
@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import annotations
+
 import json
 import logging
 import os
@ -19,8 +21,8 @@ import time

 from typing_extensions import override

-from ..cli.cli_eval import EvalCaseResult
-from ..cli.cli_eval import EvalSetResult
+from .eval_result import EvalCaseResult
+from .eval_result import EvalSetResult
 from .eval_set_results_manager import EvalSetResultsManager

 logger = logging.getLogger("google_adk." + __name__)