Updated test cases to use the new EvalSet schema to store test data. Also, added a utility to help migrate existing tests files to the new schema.

Also, migrated existing test files to the new schema and deleted test session files as they are no longer needed.

PiperOrigin-RevId: 759318735
This commit is contained in:
Ankur Sharma
2025-05-15 15:09:30 -07:00
committed by Copybara-Service
parent a71d9ea9a1
commit 1c23556225
17 changed files with 1110 additions and 231 deletions

View File

@@ -19,7 +19,7 @@ import pytest
@pytest.mark.asyncio
async def test_simple_multi_turn_conversation():
"""Test a simple multi-turn conversation."""
AgentEvaluator.evaluate(
await AgentEvaluator.evaluate(
agent_module="tests.integration.fixture.home_automation_agent",
eval_dataset_file_path_or_dir="tests/integration/fixture/home_automation_agent/test_files/simple_multi_turn_conversation.test.json",
num_runs=4,
@@ -29,7 +29,7 @@ async def test_simple_multi_turn_conversation():
@pytest.mark.asyncio
async def test_dependent_tool_calls():
"""Test subsequent tool calls that are dependent on previous tool calls."""
AgentEvaluator.evaluate(
await AgentEvaluator.evaluate(
agent_module="tests.integration.fixture.home_automation_agent",
eval_dataset_file_path_or_dir="tests/integration/fixture/home_automation_agent/test_files/dependent_tool_calls.test.json",
num_runs=4,
@@ -39,8 +39,7 @@ async def test_dependent_tool_calls():
@pytest.mark.asyncio
async def test_memorizing_past_events():
"""Test memorizing past events."""
AgentEvaluator.evaluate(
await AgentEvaluator.evaluate(
agent_module="tests.integration.fixture.home_automation_agent",
eval_dataset_file_path_or_dir="tests/integration/fixture/home_automation_agent/test_files/memorizing_past_events/eval_data.test.json",
num_runs=4,