diff --git a/src/google/adk/evaluation/eval_case.py b/src/google/adk/evaluation/eval_case.py new file mode 100644 index 0000000..9966a2d --- /dev/null +++ b/src/google/adk/evaluation/eval_case.py @@ -0,0 +1,86 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from typing import Any, Optional + +from google.genai import types as genai_types +from pydantic import BaseModel +from pydantic import Field + + +class IntermediateData(BaseModel): + """Container for intermediate data that an agent would generate as it responds with a final answer.""" + + tool_uses: list[genai_types.FunctionCall] + """Tool use trajectory in chronological order.""" + + intermediate_responses: list[genai_types.Part] + """Intermediate responses generated by sub-agents to convey progress or status + in a multi-agent system, distinct from the final response.""" + + +class Invocation(BaseModel): + """Represents a single invocation.""" + + invocation_id: str = '' + """Unique identifier for the invocation.""" + + user_content: genai_types.Content + """Content provided by the user in this invocation.""" + + final_response: Optional[genai_types.Content] + """Final response from the agent that acts a reference or benchmark.""" + + intermediate_data: IntermediateData + """Reference intermediate steps generated as a part of Agent execution. + + For a multi-agent system, it is also helpful to inspect the route that + the agent took to generate final response. + """ + + creation_timestamp: float = 0.0 + """Timestamp for the current invocation, primarily intended for debugging purposes.""" + + +class SessionInput(BaseModel): + """Values that help initialize a Session.""" + + app_name: str + """The name of the app.""" + + user_id: str + """The user id.""" + + state: dict[str, Any] = Field(default_factory=dict) + """The state of the session.""" + + +class EvalCase(BaseModel): + """An eval case.""" + + eval_id: str + """Unique identifier for the evaluation case.""" + + conversation: list[Invocation] + """A conversation between the user and the Agent. The conversation can have any number of invocations.""" + + session_input: SessionInput + """Session input that will be passed on to the Agent during eval. + It is common for Agents state to be initialized to some initial/default value, + for example, your agent may need to know today's date. + """ + + creation_timestamp: float = 0.0 + """The time at which this eval case was created.""" diff --git a/src/google/adk/evaluation/eval_set.py b/src/google/adk/evaluation/eval_set.py new file mode 100644 index 0000000..b17593a --- /dev/null +++ b/src/google/adk/evaluation/eval_set.py @@ -0,0 +1,37 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional +from pydantic import BaseModel +from .eval_case import EvalCase + + +class EvalSet(BaseModel): + """A set of eval cases.""" + + eval_set_id: str + """Unique identifier for the eval set.""" + + name: Optional[str] + """Name of the dataset.""" + + description: Optional[str] + """Description of the dataset.""" + + eval_cases: list[EvalCase] + """List of eval cases in the dataset. Each case represents a single + interaction to be evaluated.""" + + creation_timestamp: float = 0.0 + """The time at which this eval set was created."""