Adding Pydantic data models for eval set and eval case.

PiperOrigin-RevId: 757920694
2025-07-22 20:12:03 -06:00 · 2025-05-12 14:47:24 -07:00 · 2025-05-12 14:47:24 -07:00 · 1237d5334f
commit 1237d5334f
parent 993f9971bb
2 changed files with 123 additions and 0 deletions
--- a/src/google/adk/evaluation/eval_case.py
+++ b/src/google/adk/evaluation/eval_case.py
@ -0,0 +1,86 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import Any, Optional
+
+from google.genai import types as genai_types
+from pydantic import BaseModel
+from pydantic import Field
+
+
+class IntermediateData(BaseModel):
+  """Container for intermediate data that an agent would generate as it responds with a final answer."""
+
+  tool_uses: list[genai_types.FunctionCall]
+  """Tool use trajectory in chronological order."""
+
+  intermediate_responses: list[genai_types.Part]
+  """Intermediate responses generated by sub-agents to convey progress or status
+  in a multi-agent system, distinct from the final response."""
+
+
+class Invocation(BaseModel):
+  """Represents a single invocation."""
+
+  invocation_id: str = ''
+  """Unique identifier for the invocation."""
+
+  user_content: genai_types.Content
+  """Content provided by the user in this invocation."""
+
+  final_response: Optional[genai_types.Content]
+  """Final response from the agent that acts a reference or benchmark."""
+
+  intermediate_data: IntermediateData
+  """Reference intermediate steps generated as a part of Agent execution.
+
+  For a multi-agent system, it is also helpful to inspect the route that
+  the agent took to generate final response.
+  """
+
+  creation_timestamp: float = 0.0
+  """Timestamp for the current invocation, primarily intended for debugging purposes."""
+
+
+class SessionInput(BaseModel):
+  """Values that help initialize a Session."""
+
+  app_name: str
+  """The name of the app."""
+
+  user_id: str
+  """The user id."""
+
+  state: dict[str, Any] = Field(default_factory=dict)
+  """The state of the session."""
+
+
+class EvalCase(BaseModel):
+  """An eval case."""
+
+  eval_id: str
+  """Unique identifier for the evaluation case."""
+
+  conversation: list[Invocation]
+  """A conversation between the user and the Agent. The conversation can have any number of invocations."""
+
+  session_input: SessionInput
+  """Session input that will be passed on to the Agent during eval.
+     It is common for Agents state to be initialized to some initial/default value,
+     for example, your agent may need to know today's date.
+  """
+
+  creation_timestamp: float = 0.0
+  """The time at which this eval case was created."""
--- a/src/google/adk/evaluation/eval_set.py
+++ b/src/google/adk/evaluation/eval_set.py
@ -0,0 +1,37 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional
+from pydantic import BaseModel
+from .eval_case import EvalCase
+
+
+class EvalSet(BaseModel):
+  """A set of eval cases."""
+
+  eval_set_id: str
+  """Unique identifier for the eval set."""
+
+  name: Optional[str]
+  """Name of the dataset."""
+
+  description: Optional[str]
+  """Description of the dataset."""
+
+  eval_cases: list[EvalCase]
+  """List of eval cases in the dataset. Each case represents a single
+  interaction to be evaluated."""
+
+  creation_timestamp: float = 0.0
+  """The time at which this eval set was created."""