mirror of
https://github.com/EvolutionAPI/adk-python.git
synced 2025-12-18 11:22:22 -06:00
Updated test cases to use the new EvalSet schema to store test data. Also, added a utility to help migrate existing tests files to the new schema.
Also, migrated existing test files to the new schema and deleted test session files as they are no longer needed. PiperOrigin-RevId: 759318735
This commit is contained in:
committed by
Copybara-Service
parent
a71d9ea9a1
commit
1c23556225
@@ -1,5 +1,65 @@
|
||||
[{
|
||||
"query": "Turn off device_2 in the Bedroom.",
|
||||
"expected_tool_use": [{"tool_name": "set_device_info", "tool_input": {"location": "Bedroom", "device_id": "device_2", "status": "OFF"}}],
|
||||
"reference": "I have set the device_2 status to off."
|
||||
}]
|
||||
{
|
||||
"eval_set_id": "b305bd06-38c5-4796-b9c7-d9c7454338b9",
|
||||
"name": "b305bd06-38c5-4796-b9c7-d9c7454338b9",
|
||||
"description": null,
|
||||
"eval_cases": [
|
||||
{
|
||||
"eval_id": "tests/integration/fixture/home_automation_agent/simple_test.test.json",
|
||||
"conversation": [
|
||||
{
|
||||
"invocation_id": "b7982664-0ab6-47cc-ab13-326656afdf75",
|
||||
"user_content": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "Turn off device_2 in the Bedroom."
|
||||
}
|
||||
],
|
||||
"role": "user"
|
||||
},
|
||||
"final_response": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "I have set the device_2 status to off."
|
||||
}
|
||||
],
|
||||
"role": "model"
|
||||
},
|
||||
"intermediate_data": {
|
||||
"tool_uses": [
|
||||
{
|
||||
"id": null,
|
||||
"args": {
|
||||
"location": "Bedroom",
|
||||
"device_id": "device_2",
|
||||
"status": "OFF"
|
||||
},
|
||||
"name": "set_device_info"
|
||||
}
|
||||
],
|
||||
"intermediate_responses": []
|
||||
},
|
||||
"creation_timestamp": 1747337309.2360144
|
||||
}
|
||||
],
|
||||
"session_input": null,
|
||||
"creation_timestamp": 1747337309.2360282
|
||||
}
|
||||
],
|
||||
"creation_timestamp": 1747337309.2360387
|
||||
}
|
||||
@@ -1,18 +1,113 @@
|
||||
[
|
||||
{
|
||||
"query": "Turn off device_2 in the Bedroom.",
|
||||
"expected_tool_use": [{
|
||||
"tool_name": "set_device_info",
|
||||
"tool_input": {"location": "Bedroom", "status": "OFF", "device_id": "device_2"}
|
||||
}],
|
||||
"reference": "I have set the device 2 status to off."
|
||||
},
|
||||
{
|
||||
"query": "What's the status of device_2 in the Bedroom?",
|
||||
"expected_tool_use": [{
|
||||
"tool_name": "get_device_info",
|
||||
"tool_input": {"device_id": "device_2"}
|
||||
}],
|
||||
"reference": "Status of device_2 is off."
|
||||
}
|
||||
]
|
||||
{
|
||||
"eval_set_id": "1be50511-ff75-4d68-b2d7-2165cbdc1044",
|
||||
"name": "1be50511-ff75-4d68-b2d7-2165cbdc1044",
|
||||
"description": null,
|
||||
"eval_cases": [
|
||||
{
|
||||
"eval_id": "tests/integration/fixture/home_automation_agent/test_files/dependent_tool_calls.test.json",
|
||||
"conversation": [
|
||||
{
|
||||
"invocation_id": "cbece1c0-3811-45c0-96fc-9a4279075483",
|
||||
"user_content": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "Turn off device_2 in the Bedroom."
|
||||
}
|
||||
],
|
||||
"role": "user"
|
||||
},
|
||||
"final_response": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "I have set the device 2 status to off."
|
||||
}
|
||||
],
|
||||
"role": "model"
|
||||
},
|
||||
"intermediate_data": {
|
||||
"tool_uses": [
|
||||
{
|
||||
"id": null,
|
||||
"args": {
|
||||
"location": "Bedroom",
|
||||
"status": "OFF",
|
||||
"device_id": "device_2"
|
||||
},
|
||||
"name": "set_device_info"
|
||||
}
|
||||
],
|
||||
"intermediate_responses": []
|
||||
},
|
||||
"creation_timestamp": 1747340826.1082227
|
||||
},
|
||||
{
|
||||
"invocation_id": "cc85cdae-4258-4b94-8fe7-a985b8356190",
|
||||
"user_content": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "What's the status of device_2 in the Bedroom?"
|
||||
}
|
||||
],
|
||||
"role": "user"
|
||||
},
|
||||
"final_response": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "Status of device_2 is off."
|
||||
}
|
||||
],
|
||||
"role": "model"
|
||||
},
|
||||
"intermediate_data": {
|
||||
"tool_uses": [
|
||||
{
|
||||
"id": null,
|
||||
"args": {
|
||||
"device_id": "device_2"
|
||||
},
|
||||
"name": "get_device_info"
|
||||
}
|
||||
],
|
||||
"intermediate_responses": []
|
||||
},
|
||||
"creation_timestamp": 1747340826.1082554
|
||||
}
|
||||
],
|
||||
"session_input": null,
|
||||
"creation_timestamp": 1747340826.108262
|
||||
}
|
||||
],
|
||||
"creation_timestamp": 1747340826.108275
|
||||
}
|
||||
@@ -1,17 +1,105 @@
|
||||
[
|
||||
{
|
||||
"query": "Turn off device_2 in the Bedroom.",
|
||||
"expected_tool_use": [
|
||||
{
|
||||
"tool_name": "set_device_info",
|
||||
"tool_input": {"location": "Bedroom", "device_id": "device_2", "status": "OFF"}
|
||||
}
|
||||
{
|
||||
"eval_set_id": "94553685-5f19-492b-bc44-f3bc775955e9",
|
||||
"name": "94553685-5f19-492b-bc44-f3bc775955e9",
|
||||
"description": null,
|
||||
"eval_cases": [
|
||||
{
|
||||
"eval_id": "tests/integration/fixture/home_automation_agent/test_files/memorizing_past_events/eval_data.test.json",
|
||||
"conversation": [
|
||||
{
|
||||
"invocation_id": "a958b622-21d3-4a6c-9c15-1274bbb8a6b6",
|
||||
"user_content": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "Turn off device_2 in the Bedroom."
|
||||
}
|
||||
],
|
||||
"role": "user"
|
||||
},
|
||||
"final_response": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "OK. I've turned off device_2 in the Bedroom. Anything else?\n"
|
||||
}
|
||||
],
|
||||
"role": "model"
|
||||
},
|
||||
"intermediate_data": {
|
||||
"tool_uses": [
|
||||
{
|
||||
"id": null,
|
||||
"args": {
|
||||
"location": "Bedroom",
|
||||
"device_id": "device_2",
|
||||
"status": "OFF"
|
||||
},
|
||||
"name": "set_device_info"
|
||||
}
|
||||
],
|
||||
"intermediate_responses": []
|
||||
},
|
||||
"creation_timestamp": 1747340865.7043095
|
||||
},
|
||||
{
|
||||
"invocation_id": "1c07123d-4bed-4eb0-9e55-c7f80c70dadf",
|
||||
"user_content": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "What's the command I just issued?"
|
||||
}
|
||||
],
|
||||
"role": "user"
|
||||
},
|
||||
"final_response": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "You asked me to turn off device_2 in the Bedroom.\n"
|
||||
}
|
||||
],
|
||||
"role": "model"
|
||||
},
|
||||
"intermediate_data": {
|
||||
"tool_uses": [],
|
||||
"intermediate_responses": []
|
||||
},
|
||||
"creation_timestamp": 1747340865.7043421
|
||||
}
|
||||
],
|
||||
"reference": "OK. I've turned off device_2 in the Bedroom. Anything else?\n"
|
||||
},
|
||||
{
|
||||
"query": "What's the command I just issued?",
|
||||
"expected_tool_use": [],
|
||||
"reference": "You asked me to turn off device_2 in the Bedroom.\n"
|
||||
}
|
||||
]
|
||||
"session_input": null,
|
||||
"creation_timestamp": 1747340865.7043483
|
||||
}
|
||||
],
|
||||
"creation_timestamp": 1747340865.704361
|
||||
}
|
||||
@@ -1,18 +1,115 @@
|
||||
[
|
||||
{
|
||||
"eval_set_id": "4412cca6-dfcd-43ab-bbc5-9155380c7137",
|
||||
"name": "4412cca6-dfcd-43ab-bbc5-9155380c7137",
|
||||
"description": null,
|
||||
"eval_cases": [
|
||||
{
|
||||
"query": "Turn off device_2 in the Bedroom.",
|
||||
"expected_tool_use": [{
|
||||
"tool_name": "set_device_info",
|
||||
"tool_input": {"location": "Bedroom", "device_id": "device_2", "status": "OFF"}
|
||||
}],
|
||||
"reference": "I have set the device 2 status to off."
|
||||
},
|
||||
{
|
||||
"query": "Turn on device_2 in the Bedroom.",
|
||||
"expected_tool_use": [{
|
||||
"tool_name": "set_device_info",
|
||||
"tool_input": {"location": "Bedroom", "status": "ON", "device_id": "device_2"}
|
||||
}],
|
||||
"reference": "I have set the device 2 status to on."
|
||||
"eval_id": "tests/integration/fixture/home_automation_agent/test_files/simple_multi_turn_conversation.test.json",
|
||||
"conversation": [
|
||||
{
|
||||
"invocation_id": "9f51a1ac-56a4-4b4a-9878-36ff1ae312ce",
|
||||
"user_content": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "Turn off device_2 in the Bedroom."
|
||||
}
|
||||
],
|
||||
"role": "user"
|
||||
},
|
||||
"final_response": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "I have set the device 2 status to off."
|
||||
}
|
||||
],
|
||||
"role": "model"
|
||||
},
|
||||
"intermediate_data": {
|
||||
"tool_uses": [
|
||||
{
|
||||
"id": null,
|
||||
"args": {
|
||||
"location": "Bedroom",
|
||||
"device_id": "device_2",
|
||||
"status": "OFF"
|
||||
},
|
||||
"name": "set_device_info"
|
||||
}
|
||||
],
|
||||
"intermediate_responses": []
|
||||
},
|
||||
"creation_timestamp": 1747340791.7353904
|
||||
},
|
||||
{
|
||||
"invocation_id": "c82d54d0-5fa8-4f79-a6dc-692090f0d42b",
|
||||
"user_content": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "Turn on device_2 in the Bedroom."
|
||||
}
|
||||
],
|
||||
"role": "user"
|
||||
},
|
||||
"final_response": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "I have set the device 2 status to on."
|
||||
}
|
||||
],
|
||||
"role": "model"
|
||||
},
|
||||
"intermediate_data": {
|
||||
"tool_uses": [
|
||||
{
|
||||
"id": null,
|
||||
"args": {
|
||||
"location": "Bedroom",
|
||||
"status": "ON",
|
||||
"device_id": "device_2"
|
||||
},
|
||||
"name": "set_device_info"
|
||||
}
|
||||
],
|
||||
"intermediate_responses": []
|
||||
},
|
||||
"creation_timestamp": 1747340791.7354295
|
||||
}
|
||||
],
|
||||
"session_input": null,
|
||||
"creation_timestamp": 1747340791.7354348
|
||||
}
|
||||
]
|
||||
],
|
||||
"creation_timestamp": 1747340791.735446
|
||||
}
|
||||
@@ -1,17 +1,105 @@
|
||||
[
|
||||
{
|
||||
"query": "Turn off device_2 in the Bedroom.",
|
||||
"expected_tool_use": [
|
||||
{
|
||||
"tool_name": "set_device_info",
|
||||
"tool_input": {"location": "Bedroom", "device_id": "device_2", "status": "OFF"}
|
||||
}
|
||||
{
|
||||
"eval_set_id": "9100bfc9-cc28-4ab9-b920-2dc72e138997",
|
||||
"name": "9100bfc9-cc28-4ab9-b920-2dc72e138997",
|
||||
"description": null,
|
||||
"eval_cases": [
|
||||
{
|
||||
"eval_id": "tests/integration/fixture/home_automation_agent/test_files/simple_test.test.json",
|
||||
"conversation": [
|
||||
{
|
||||
"invocation_id": "9f5e8d91-8e51-41d6-addf-196a828168c5",
|
||||
"user_content": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "Turn off device_2 in the Bedroom."
|
||||
}
|
||||
],
|
||||
"role": "user"
|
||||
},
|
||||
"final_response": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "OK. I've turned off device_2 in the Bedroom. Anything else?\n"
|
||||
}
|
||||
],
|
||||
"role": "model"
|
||||
},
|
||||
"intermediate_data": {
|
||||
"tool_uses": [
|
||||
{
|
||||
"id": null,
|
||||
"args": {
|
||||
"location": "Bedroom",
|
||||
"device_id": "device_2",
|
||||
"status": "OFF"
|
||||
},
|
||||
"name": "set_device_info"
|
||||
}
|
||||
],
|
||||
"intermediate_responses": []
|
||||
},
|
||||
"creation_timestamp": 1747340849.0429707
|
||||
},
|
||||
{
|
||||
"invocation_id": "767b2451-5f7b-4c73-aeaf-a82c71e15788",
|
||||
"user_content": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "What's the command I just issued?"
|
||||
}
|
||||
],
|
||||
"role": "user"
|
||||
},
|
||||
"final_response": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "You asked me to turn off device_2 in the Bedroom.\n"
|
||||
}
|
||||
],
|
||||
"role": "model"
|
||||
},
|
||||
"intermediate_data": {
|
||||
"tool_uses": [],
|
||||
"intermediate_responses": []
|
||||
},
|
||||
"creation_timestamp": 1747340849.0429986
|
||||
}
|
||||
],
|
||||
"reference": "OK. I've turned off device_2 in the Bedroom. Anything else?\n"
|
||||
},
|
||||
{
|
||||
"query": "What's the command I just issued?",
|
||||
"expected_tool_use": [],
|
||||
"reference": "You asked me to turn off device_2 in the Bedroom.\n"
|
||||
}
|
||||
]
|
||||
"session_input": null,
|
||||
"creation_timestamp": 1747340849.0430045
|
||||
}
|
||||
],
|
||||
"creation_timestamp": 1747340849.0430162
|
||||
}
|
||||
@@ -1,5 +1,65 @@
|
||||
[{
|
||||
"query": "Turn off device_3 in the Bedroom.",
|
||||
"expected_tool_use": [{"tool_name": "set_device_info", "tool_input": {"location": "Bedroom", "device_id": "device_3", "status": "OFF"}}],
|
||||
"reference": "I have set the device_3 status to off."
|
||||
}]
|
||||
{
|
||||
"eval_set_id": "e141f90b-9e7e-4f06-94d7-bbe7e8080ead",
|
||||
"name": "e141f90b-9e7e-4f06-94d7-bbe7e8080ead",
|
||||
"description": null,
|
||||
"eval_cases": [
|
||||
{
|
||||
"eval_id": "tests/integration/fixture/home_automation_agent/test_files/simple_test2.test.json",
|
||||
"conversation": [
|
||||
{
|
||||
"invocation_id": "c35582f7-838a-460f-b783-039e278165e0",
|
||||
"user_content": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "Turn off device_3 in the Bedroom."
|
||||
}
|
||||
],
|
||||
"role": "user"
|
||||
},
|
||||
"final_response": {
|
||||
"parts": [
|
||||
{
|
||||
"video_metadata": null,
|
||||
"thought": null,
|
||||
"code_execution_result": null,
|
||||
"executable_code": null,
|
||||
"file_data": null,
|
||||
"function_call": null,
|
||||
"function_response": null,
|
||||
"inline_data": null,
|
||||
"text": "I have set the device_3 status to off."
|
||||
}
|
||||
],
|
||||
"role": "model"
|
||||
},
|
||||
"intermediate_data": {
|
||||
"tool_uses": [
|
||||
{
|
||||
"id": null,
|
||||
"args": {
|
||||
"location": "Bedroom",
|
||||
"device_id": "device_3",
|
||||
"status": "OFF"
|
||||
},
|
||||
"name": "set_device_info"
|
||||
}
|
||||
],
|
||||
"intermediate_responses": []
|
||||
},
|
||||
"creation_timestamp": 1747340814.8645504
|
||||
}
|
||||
],
|
||||
"session_input": null,
|
||||
"creation_timestamp": 1747340814.86456
|
||||
}
|
||||
],
|
||||
"creation_timestamp": 1747340814.864572
|
||||
}
|
||||
Reference in New Issue
Block a user