mirror of
https://github.com/EvolutionAPI/adk-python.git
synced 2025-12-18 11:22:22 -06:00
No public description
PiperOrigin-RevId: 748777998
This commit is contained in:
committed by
hangfei
parent
290058eb05
commit
61d4be2d76
13
tests/unittests/cli/utils/__init__.py
Normal file
13
tests/unittests/cli/utils/__init__.py
Normal file
@@ -0,0 +1,13 @@
|
||||
# Copyright 2025 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
434
tests/unittests/cli/utils/test_evals.py
Normal file
434
tests/unittests/cli/utils/test_evals.py
Normal file
@@ -0,0 +1,434 @@
|
||||
# Copyright 2025 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
"""Tests for utilities in eval."""
|
||||
|
||||
|
||||
from google.adk.cli.utils.evals import convert_session_to_eval_format
|
||||
from google.adk.events.event import Event
|
||||
from google.adk.sessions.session import Session
|
||||
from google.genai import types
|
||||
|
||||
|
||||
def build_event(author: str, parts_content: list[dict]) -> Event:
|
||||
"""Builds an Event object with specified parts."""
|
||||
parts = []
|
||||
for p_data in parts_content:
|
||||
part_args = {}
|
||||
if "text" in p_data:
|
||||
part_args["text"] = p_data["text"]
|
||||
if "func_name" in p_data:
|
||||
part_args["function_call"] = types.FunctionCall(
|
||||
name=p_data.get("func_name"), args=p_data.get("func_args")
|
||||
)
|
||||
# Add other part types here if needed for future tests
|
||||
parts.append(types.Part(**part_args))
|
||||
return Event(author=author, content=types.Content(parts=parts))
|
||||
|
||||
|
||||
def test_convert_empty_session():
|
||||
"""Test conversion function with empty events list in Session."""
|
||||
# Pydantic models require mandatory fields for instantiation
|
||||
session_empty_events = Session(
|
||||
id="s1", app_name="app", user_id="u1", events=[]
|
||||
)
|
||||
assert not convert_session_to_eval_format(session_empty_events)
|
||||
|
||||
|
||||
def test_convert_none_session():
|
||||
"""Test conversion function with None Session."""
|
||||
assert not convert_session_to_eval_format(None)
|
||||
|
||||
|
||||
def test_convert_session_skips_initial_non_user_events():
|
||||
"""Test conversion function with only user events."""
|
||||
events = [
|
||||
build_event("model", [{"text": "Hello"}]),
|
||||
build_event("user", [{"text": "How are you?"}]),
|
||||
]
|
||||
session = Session(id="s1", app_name="app", user_id="u1", events=events)
|
||||
expected = [
|
||||
{
|
||||
"query": "How are you?",
|
||||
"expected_tool_use": [],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "",
|
||||
},
|
||||
]
|
||||
assert convert_session_to_eval_format(session) == expected
|
||||
|
||||
|
||||
def test_convert_single_turn_text_only():
|
||||
"""Test a single user query followed by a single agent text response."""
|
||||
events = [
|
||||
build_event("user", [{"text": "What is the time?"}]),
|
||||
build_event("root_agent", [{"text": "It is 3 PM."}]),
|
||||
]
|
||||
session = Session(id="s1", app_name="app", user_id="u1", events=events)
|
||||
expected = [{
|
||||
"query": "What is the time?",
|
||||
"expected_tool_use": [],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "It is 3 PM.",
|
||||
}]
|
||||
assert convert_session_to_eval_format(session) == expected
|
||||
|
||||
|
||||
def test_convert_single_turn_tool_only():
|
||||
"""Test a single user query followed by a single agent tool call."""
|
||||
events = [
|
||||
build_event("user", [{"text": "Get weather for Seattle"}]),
|
||||
build_event(
|
||||
"root_agent",
|
||||
[{"func_name": "get_weather", "func_args": {"city": "Seattle"}}],
|
||||
),
|
||||
]
|
||||
session = Session(id="s1", app_name="app", user_id="u1", events=events)
|
||||
expected = [{
|
||||
"query": "Get weather for Seattle",
|
||||
"expected_tool_use": [
|
||||
{"tool_name": "get_weather", "tool_input": {"city": "Seattle"}}
|
||||
],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "",
|
||||
}]
|
||||
assert convert_session_to_eval_format(session) == expected
|
||||
|
||||
|
||||
def test_convert_single_turn_multiple_tools_and_texts():
|
||||
"""Test a turn with multiple agent responses (tools and text)."""
|
||||
events = [
|
||||
build_event("user", [{"text": "Do task A then task B"}]),
|
||||
build_event(
|
||||
"root_agent", [{"text": "Okay, starting task A."}]
|
||||
), # Intermediate Text 1
|
||||
build_event(
|
||||
"root_agent", [{"func_name": "task_A", "func_args": {"param": 1}}]
|
||||
), # Tool 1
|
||||
build_event(
|
||||
"root_agent", [{"text": "Task A done. Now starting task B."}]
|
||||
), # Intermediate Text 2
|
||||
build_event(
|
||||
"another_agent", [{"func_name": "task_B", "func_args": {}}]
|
||||
), # Tool 2
|
||||
build_event(
|
||||
"root_agent", [{"text": "All tasks completed."}]
|
||||
), # Final Text (Reference)
|
||||
]
|
||||
session = Session(id="s1", app_name="app", user_id="u1", events=events)
|
||||
expected = [{
|
||||
"query": "Do task A then task B",
|
||||
"expected_tool_use": [
|
||||
{"tool_name": "task_A", "tool_input": {"param": 1}},
|
||||
{"tool_name": "task_B", "tool_input": {}},
|
||||
],
|
||||
"expected_intermediate_agent_responses": [
|
||||
{"author": "root_agent", "text": "Okay, starting task A."},
|
||||
{
|
||||
"author": "root_agent",
|
||||
"text": "Task A done. Now starting task B.",
|
||||
},
|
||||
],
|
||||
"reference": "All tasks completed.",
|
||||
}]
|
||||
assert convert_session_to_eval_format(session) == expected
|
||||
|
||||
|
||||
def test_convert_multi_turn_session():
|
||||
"""Test a session with multiple user/agent turns."""
|
||||
events = [
|
||||
build_event("user", [{"text": "Query 1"}]),
|
||||
build_event("agent", [{"text": "Response 1"}]),
|
||||
build_event("user", [{"text": "Query 2"}]),
|
||||
build_event("agent", [{"func_name": "tool_X", "func_args": {}}]),
|
||||
build_event("agent", [{"text": "Response 2"}]),
|
||||
]
|
||||
session = Session(id="s1", app_name="app", user_id="u1", events=events)
|
||||
expected = [
|
||||
{ # Turn 1
|
||||
"query": "Query 1",
|
||||
"expected_tool_use": [],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "Response 1",
|
||||
},
|
||||
{ # Turn 2
|
||||
"query": "Query 2",
|
||||
"expected_tool_use": [{"tool_name": "tool_X", "tool_input": {}}],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "Response 2",
|
||||
},
|
||||
]
|
||||
assert convert_session_to_eval_format(session) == expected
|
||||
|
||||
|
||||
def test_convert_agent_event_multiple_parts():
|
||||
"""Test an agent event with both text and tool call parts."""
|
||||
events = [
|
||||
build_event("user", [{"text": "Do something complex"}]),
|
||||
# Build event with multiple dicts in parts_content list
|
||||
build_event(
|
||||
"agent",
|
||||
[
|
||||
{"text": "Okay, doing it."},
|
||||
{"func_name": "complex_tool", "func_args": {"value": True}},
|
||||
],
|
||||
),
|
||||
build_event("agent", [{"text": "Finished."}]),
|
||||
]
|
||||
session = Session(id="s1", app_name="app", user_id="u1", events=events)
|
||||
expected = [{
|
||||
"query": "Do something complex",
|
||||
"expected_tool_use": [
|
||||
{"tool_name": "complex_tool", "tool_input": {"value": True}}
|
||||
],
|
||||
"expected_intermediate_agent_responses": [{
|
||||
"author": "agent",
|
||||
"text": "Okay, doing it.",
|
||||
}], # Text from first part of agent event
|
||||
"reference": "Finished.", # Text from second agent event
|
||||
}]
|
||||
assert convert_session_to_eval_format(session) == expected
|
||||
|
||||
|
||||
def test_convert_handles_missing_content_or_parts():
|
||||
"""Test that events missing content or parts are skipped gracefully."""
|
||||
events = [
|
||||
build_event("user", [{"text": "Query 1"}]),
|
||||
Event(author="agent", content=None), # Agent event missing content
|
||||
build_event("agent", [{"text": "Response 1"}]),
|
||||
Event(author="user", content=None), # User event missing content
|
||||
build_event("user", [{"text": "Query 2"}]),
|
||||
Event(
|
||||
author="agent", content=types.Content(parts=[])
|
||||
), # Agent event with empty parts list
|
||||
build_event("agent", [{"text": "Response 2"}]),
|
||||
# User event with content but no parts (or None parts)
|
||||
Event(author="user", content=types.Content(parts=None)),
|
||||
build_event("user", [{"text": "Query 3"}]),
|
||||
build_event("agent", [{"text": "Response 3"}]),
|
||||
]
|
||||
session = Session(id="s1", app_name="app", user_id="u1", events=events)
|
||||
expected = [
|
||||
{ # Turn 1 (from Query 1)
|
||||
"query": "Query 1",
|
||||
"expected_tool_use": [],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "Response 1",
|
||||
},
|
||||
{ # Turn 2 (from Query 2 - user event with None content was skipped)
|
||||
"query": "Query 2",
|
||||
"expected_tool_use": [],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "Response 2",
|
||||
},
|
||||
{ # Turn 3 (from Query 3 - user event with None parts was skipped)
|
||||
"query": "Query 3",
|
||||
"expected_tool_use": [],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "Response 3",
|
||||
},
|
||||
]
|
||||
assert convert_session_to_eval_format(session) == expected
|
||||
|
||||
|
||||
def test_convert_handles_missing_tool_name_or_args():
|
||||
"""Test tool calls with missing name or args."""
|
||||
events = [
|
||||
build_event("user", [{"text": "Call tools"}]),
|
||||
# Event where FunctionCall has name=None
|
||||
Event(
|
||||
author="agent",
|
||||
content=types.Content(
|
||||
parts=[
|
||||
types.Part(
|
||||
function_call=types.FunctionCall(name=None, args={"a": 1})
|
||||
)
|
||||
]
|
||||
),
|
||||
),
|
||||
# Event where FunctionCall has args=None
|
||||
Event(
|
||||
author="agent",
|
||||
content=types.Content(
|
||||
parts=[
|
||||
types.Part(
|
||||
function_call=types.FunctionCall(name="tool_B", args=None)
|
||||
)
|
||||
]
|
||||
),
|
||||
),
|
||||
# Event where FunctionCall part exists but FunctionCall object is None
|
||||
# (should skip)
|
||||
Event(
|
||||
author="agent",
|
||||
content=types.Content(
|
||||
parts=[types.Part(function_call=None, text="some text")]
|
||||
),
|
||||
),
|
||||
build_event("agent", [{"text": "Done"}]),
|
||||
]
|
||||
session = Session(id="s1", app_name="app", user_id="u1", events=events)
|
||||
expected = [{
|
||||
"query": "Call tools",
|
||||
"expected_tool_use": [
|
||||
{"tool_name": "", "tool_input": {"a": 1}}, # Defaults name to ""
|
||||
{"tool_name": "tool_B", "tool_input": {}}, # Defaults args to {}
|
||||
],
|
||||
"expected_intermediate_agent_responses": [{
|
||||
"author": "agent",
|
||||
"text": "some text",
|
||||
}], # Text part from the event where function_call was None
|
||||
"reference": "Done",
|
||||
}]
|
||||
assert convert_session_to_eval_format(session) == expected
|
||||
|
||||
|
||||
def test_convert_handles_missing_user_query_text():
|
||||
"""Test user event where the first part has no text."""
|
||||
events = [
|
||||
# Event where user part has text=None
|
||||
Event(
|
||||
author="user", content=types.Content(parts=[types.Part(text=None)])
|
||||
),
|
||||
build_event("agent", [{"text": "Response 1"}]),
|
||||
# Event where user part has text=""
|
||||
build_event("user", [{"text": ""}]),
|
||||
build_event("agent", [{"text": "Response 2"}]),
|
||||
]
|
||||
session = Session(id="s1", app_name="app", user_id="u1", events=events)
|
||||
expected = [
|
||||
{
|
||||
"query": "", # Defaults to "" if text is None
|
||||
"expected_tool_use": [],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "Response 1",
|
||||
},
|
||||
{
|
||||
"query": "", # Defaults to "" if text is ""
|
||||
"expected_tool_use": [],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "Response 2",
|
||||
},
|
||||
]
|
||||
assert convert_session_to_eval_format(session) == expected
|
||||
|
||||
|
||||
def test_convert_handles_empty_agent_text():
|
||||
"""Test agent responses with empty string text."""
|
||||
events = [
|
||||
build_event("user", [{"text": "Query"}]),
|
||||
build_event("agent", [{"text": "Okay"}]),
|
||||
build_event("agent", [{"text": ""}]), # Empty text
|
||||
build_event("agent", [{"text": "Done"}]),
|
||||
]
|
||||
session = Session(id="s1", app_name="app", user_id="u1", events=events)
|
||||
expected = [{
|
||||
"query": "Query",
|
||||
"expected_tool_use": [],
|
||||
"expected_intermediate_agent_responses": [
|
||||
{"author": "agent", "text": "Okay"},
|
||||
],
|
||||
"reference": "Done",
|
||||
}]
|
||||
assert convert_session_to_eval_format(session) == expected
|
||||
|
||||
|
||||
def test_convert_complex_sample_session():
|
||||
"""Test using the complex sample session provided earlier."""
|
||||
events = [
|
||||
build_event("user", [{"text": "What can you do?"}]),
|
||||
build_event(
|
||||
"root_agent",
|
||||
[{"text": "I can roll dice and check if numbers are prime. \n"}],
|
||||
),
|
||||
build_event(
|
||||
"user",
|
||||
[{
|
||||
"text": (
|
||||
"Roll a 8 sided dice and then check if 90 is a prime number"
|
||||
" or not."
|
||||
)
|
||||
}],
|
||||
),
|
||||
build_event(
|
||||
"root_agent",
|
||||
[{
|
||||
"func_name": "transfer_to_agent",
|
||||
"func_args": {"agent_name": "roll_agent"},
|
||||
}],
|
||||
),
|
||||
# Skipping FunctionResponse events as they don't have text/functionCall
|
||||
# parts used by converter
|
||||
build_event(
|
||||
"roll_agent", [{"func_name": "roll_die", "func_args": {"sides": 8}}]
|
||||
),
|
||||
# Skipping FunctionResponse
|
||||
build_event(
|
||||
"roll_agent",
|
||||
[
|
||||
{"text": "I rolled a 2. Now, I'll check if 90 is prime. \n\n"},
|
||||
{
|
||||
"func_name": "transfer_to_agent",
|
||||
"func_args": {"agent_name": "prime_agent"},
|
||||
},
|
||||
],
|
||||
),
|
||||
# Skipping FunctionResponse
|
||||
build_event(
|
||||
"prime_agent",
|
||||
[{"func_name": "check_prime", "func_args": {"nums": [90]}}],
|
||||
),
|
||||
# Skipping FunctionResponse
|
||||
build_event("prime_agent", [{"text": "90 is not a prime number. \n"}]),
|
||||
]
|
||||
session = Session(
|
||||
id="some_id",
|
||||
app_name="hello_world_ma",
|
||||
user_id="user",
|
||||
events=events,
|
||||
)
|
||||
expected = [
|
||||
{
|
||||
"query": "What can you do?",
|
||||
"expected_tool_use": [],
|
||||
"expected_intermediate_agent_responses": [],
|
||||
"reference": "I can roll dice and check if numbers are prime. \n",
|
||||
},
|
||||
{
|
||||
"query": (
|
||||
"Roll a 8 sided dice and then check if 90 is a prime number or"
|
||||
" not."
|
||||
),
|
||||
"expected_tool_use": [
|
||||
{
|
||||
"tool_name": "transfer_to_agent",
|
||||
"tool_input": {"agent_name": "roll_agent"},
|
||||
},
|
||||
{"tool_name": "roll_die", "tool_input": {"sides": 8}},
|
||||
{
|
||||
"tool_name": "transfer_to_agent",
|
||||
"tool_input": {"agent_name": "prime_agent"},
|
||||
}, # From combined event
|
||||
{"tool_name": "check_prime", "tool_input": {"nums": [90]}},
|
||||
],
|
||||
"expected_intermediate_agent_responses": [{
|
||||
"author": "roll_agent",
|
||||
"text": "I rolled a 2. Now, I'll check if 90 is prime. \n\n",
|
||||
}], # Text from combined event
|
||||
"reference": "90 is not a prime number. \n",
|
||||
},
|
||||
]
|
||||
|
||||
actual = convert_session_to_eval_format(session)
|
||||
assert actual == expected
|
||||
Reference in New Issue
Block a user