Updated test cases to use the new EvalSet schema to store test data. Also, added a utility to help migrate existing tests files to the new schema.

Also, migrated existing test files to the new schema and deleted test session files as they are no longer needed.

PiperOrigin-RevId: 759318735
This commit is contained in:
Ankur Sharma
2025-05-15 15:09:30 -07:00
committed by Copybara-Service
parent a71d9ea9a1
commit 1c23556225
17 changed files with 1110 additions and 231 deletions

View File

@@ -1,69 +1,229 @@
[
{
"query": "Send an email to user user_a whose email address is alice@example.com",
"expected_tool_use": [
{
"tool_name": "send_email",
"tool_input": {
"email": "alice@example.com",
"user_id": "user_a"
{
"eval_set_id": "a1157c01-851f-48a8-b956-83cf7f463510",
"name": "a1157c01-851f-48a8-b956-83cf7f463510",
"description": null,
"eval_cases": [
{
"eval_id": "tests/integration/fixture/ecommerce_customer_service_agent/order_query.test.json",
"conversation": [
{
"invocation_id": "38d54523-d789-4873-8cc0-d38826c7feb4",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Send an email to user user_a whose email address is alice@example.com"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Email sent to alice@example.com for user id user_a."
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"email": "alice@example.com",
"user_id": "user_a"
},
"name": "send_email"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747341706.6240807
},
{
"invocation_id": "916393ab-0bce-4cb0-98de-6573d4e8e25c",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Can you tell me the status of my order with ID 1?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Your order with ID 1 is FINISHED."
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"order_id": "1"
},
"name": "get_order_status"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747341706.6241167
},
{
"invocation_id": "511b23d9-56f9-423b-9c31-7626f3411c32",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Cancel all pending order for the user with user id user_a"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "I have checked your orders and order 4 was in pending status, so I have cancelled it. Order 1 was already finished and couldn't be cancelled.\n"
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"user_id": "user_a"
},
"name": "get_order_ids_for_user"
},
{
"id": null,
"args": {
"order_id": "1"
},
"name": "get_order_status"
},
{
"id": null,
"args": {
"order_id": "4"
},
"name": "get_order_status"
},
{
"id": null,
"args": {
"order_id": "4"
},
"name": "cancel_order"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747341706.6241703
},
{
"invocation_id": "dcdf4b6d-96dd-4602-8c14-0563c6f6b5d0",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "What orders have I placed under the username user_b?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "User user_b has placed one order with order ID 2.\n"
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"user_id": "user_b"
},
"name": "get_order_ids_for_user"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747341706.624196
}
}
],
"reference": "Email sent to alice@example.com for user id user_a."
},
{
"query": "Can you tell me the status of my order with ID 1?",
"expected_tool_use": [
{
"tool_name": "get_order_status",
"tool_input": {
"order_id": "1"
}
}
],
"reference": "Your order with ID 1 is FINISHED."
},
{
"query": "Cancel all pending order for the user with user id user_a",
"expected_tool_use": [
{
"tool_name": "get_order_ids_for_user",
"tool_input": {
"user_id": "user_a"
}
},
{
"tool_name": "get_order_status",
"tool_input": {
"order_id": "1"
}
},
{
"tool_name": "get_order_status",
"tool_input": {
"order_id": "4"
}
},
{
"tool_name": "cancel_order",
"tool_input": {
"order_id": "4"
}
}
],
"reference": "I have checked your orders and order 4 was in pending status, so I have cancelled it. Order 1 was already finished and couldn't be cancelled.\n"
},
{
"query": "What orders have I placed under the username user_b?",
"expected_tool_use": [
{
"tool_name": "get_order_ids_for_user",
"tool_input": {
"user_id": "user_b"
}
}
],
"reference": "User user_b has placed one order with order ID 2.\n"
}
]
],
"session_input": null,
"creation_timestamp": 1747341706.6242023
}
],
"creation_timestamp": 1747341706.6242158
}

View File

@@ -1,24 +1,143 @@
[
{
"query": "Hi who are you?",
"expected_tool_use": [],
"reference": "I am a data processing agent. I can roll dice and check if the results are prime numbers. What would you like me to do? \n"
},
{
"query": "What can you do?",
"expected_tool_use": [],
"reference": "I can roll dice for you of different sizes, and I can check if the results are prime numbers. I can also remember previous rolls if you'd like to check those for primes as well. What would you like me to do? \n"
},
{
"query": "Can you roll a die with 6 sides",
"expected_tool_use": [
{
"tool_name": "roll_die",
"tool_input": {
"sides": 6
{
"eval_set_id": "56540925-a5ff-49fe-a4e1-589fe78066f2",
"name": "56540925-a5ff-49fe-a4e1-589fe78066f2",
"description": null,
"eval_cases": [
{
"eval_id": "tests/integration/fixture/hello_world_agent/roll_die.test.json",
"conversation": [
{
"invocation_id": "b01f67f0-9f23-44d6-bbe4-36ea235cb9fb",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Hi who are you?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "I am a data processing agent. I can roll dice and check if the results are prime numbers. What would you like me to do? \n"
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [],
"intermediate_responses": []
},
"creation_timestamp": 1747341775.8937013
},
{
"invocation_id": "13be0093-ac29-4828-98c6-5bbd570c010c",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "What can you do?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "I can roll dice for you of different sizes, and I can check if the results are prime numbers. I can also remember previous rolls if you'd like to check those for primes as well. What would you like me to do? \n"
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [],
"intermediate_responses": []
},
"creation_timestamp": 1747341775.8937378
},
{
"invocation_id": "7deda353-c936-4c21-b242-9fa75e45b6a7",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Can you roll a die with 6 sides"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": null
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"sides": 6
},
"name": "roll_die"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747341775.8937788
}
}
],
"reference": null
}
]
],
"session_input": null,
"creation_timestamp": 1747341775.8937826
}
],
"creation_timestamp": 1747341775.8937957
}

View File

@@ -1,5 +1,65 @@
[{
"query": "Turn off device_2 in the Bedroom.",
"expected_tool_use": [{"tool_name": "set_device_info", "tool_input": {"location": "Bedroom", "device_id": "device_2", "status": "OFF"}}],
"reference": "I have set the device_2 status to off."
}]
{
"eval_set_id": "b305bd06-38c5-4796-b9c7-d9c7454338b9",
"name": "b305bd06-38c5-4796-b9c7-d9c7454338b9",
"description": null,
"eval_cases": [
{
"eval_id": "tests/integration/fixture/home_automation_agent/simple_test.test.json",
"conversation": [
{
"invocation_id": "b7982664-0ab6-47cc-ab13-326656afdf75",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Turn off device_2 in the Bedroom."
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "I have set the device_2 status to off."
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"location": "Bedroom",
"device_id": "device_2",
"status": "OFF"
},
"name": "set_device_info"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747337309.2360144
}
],
"session_input": null,
"creation_timestamp": 1747337309.2360282
}
],
"creation_timestamp": 1747337309.2360387
}

View File

@@ -1,18 +1,113 @@
[
{
"query": "Turn off device_2 in the Bedroom.",
"expected_tool_use": [{
"tool_name": "set_device_info",
"tool_input": {"location": "Bedroom", "status": "OFF", "device_id": "device_2"}
}],
"reference": "I have set the device 2 status to off."
},
{
"query": "What's the status of device_2 in the Bedroom?",
"expected_tool_use": [{
"tool_name": "get_device_info",
"tool_input": {"device_id": "device_2"}
}],
"reference": "Status of device_2 is off."
}
]
{
"eval_set_id": "1be50511-ff75-4d68-b2d7-2165cbdc1044",
"name": "1be50511-ff75-4d68-b2d7-2165cbdc1044",
"description": null,
"eval_cases": [
{
"eval_id": "tests/integration/fixture/home_automation_agent/test_files/dependent_tool_calls.test.json",
"conversation": [
{
"invocation_id": "cbece1c0-3811-45c0-96fc-9a4279075483",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Turn off device_2 in the Bedroom."
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "I have set the device 2 status to off."
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"location": "Bedroom",
"status": "OFF",
"device_id": "device_2"
},
"name": "set_device_info"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747340826.1082227
},
{
"invocation_id": "cc85cdae-4258-4b94-8fe7-a985b8356190",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "What's the status of device_2 in the Bedroom?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Status of device_2 is off."
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"device_id": "device_2"
},
"name": "get_device_info"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747340826.1082554
}
],
"session_input": null,
"creation_timestamp": 1747340826.108262
}
],
"creation_timestamp": 1747340826.108275
}

View File

@@ -1,17 +1,105 @@
[
{
"query": "Turn off device_2 in the Bedroom.",
"expected_tool_use": [
{
"tool_name": "set_device_info",
"tool_input": {"location": "Bedroom", "device_id": "device_2", "status": "OFF"}
}
{
"eval_set_id": "94553685-5f19-492b-bc44-f3bc775955e9",
"name": "94553685-5f19-492b-bc44-f3bc775955e9",
"description": null,
"eval_cases": [
{
"eval_id": "tests/integration/fixture/home_automation_agent/test_files/memorizing_past_events/eval_data.test.json",
"conversation": [
{
"invocation_id": "a958b622-21d3-4a6c-9c15-1274bbb8a6b6",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Turn off device_2 in the Bedroom."
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "OK. I've turned off device_2 in the Bedroom. Anything else?\n"
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"location": "Bedroom",
"device_id": "device_2",
"status": "OFF"
},
"name": "set_device_info"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747340865.7043095
},
{
"invocation_id": "1c07123d-4bed-4eb0-9e55-c7f80c70dadf",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "What's the command I just issued?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "You asked me to turn off device_2 in the Bedroom.\n"
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [],
"intermediate_responses": []
},
"creation_timestamp": 1747340865.7043421
}
],
"reference": "OK. I've turned off device_2 in the Bedroom. Anything else?\n"
},
{
"query": "What's the command I just issued?",
"expected_tool_use": [],
"reference": "You asked me to turn off device_2 in the Bedroom.\n"
}
]
"session_input": null,
"creation_timestamp": 1747340865.7043483
}
],
"creation_timestamp": 1747340865.704361
}

View File

@@ -1,18 +1,115 @@
[
{
"eval_set_id": "4412cca6-dfcd-43ab-bbc5-9155380c7137",
"name": "4412cca6-dfcd-43ab-bbc5-9155380c7137",
"description": null,
"eval_cases": [
{
"query": "Turn off device_2 in the Bedroom.",
"expected_tool_use": [{
"tool_name": "set_device_info",
"tool_input": {"location": "Bedroom", "device_id": "device_2", "status": "OFF"}
}],
"reference": "I have set the device 2 status to off."
},
{
"query": "Turn on device_2 in the Bedroom.",
"expected_tool_use": [{
"tool_name": "set_device_info",
"tool_input": {"location": "Bedroom", "status": "ON", "device_id": "device_2"}
}],
"reference": "I have set the device 2 status to on."
"eval_id": "tests/integration/fixture/home_automation_agent/test_files/simple_multi_turn_conversation.test.json",
"conversation": [
{
"invocation_id": "9f51a1ac-56a4-4b4a-9878-36ff1ae312ce",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Turn off device_2 in the Bedroom."
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "I have set the device 2 status to off."
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"location": "Bedroom",
"device_id": "device_2",
"status": "OFF"
},
"name": "set_device_info"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747340791.7353904
},
{
"invocation_id": "c82d54d0-5fa8-4f79-a6dc-692090f0d42b",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Turn on device_2 in the Bedroom."
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "I have set the device 2 status to on."
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"location": "Bedroom",
"status": "ON",
"device_id": "device_2"
},
"name": "set_device_info"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747340791.7354295
}
],
"session_input": null,
"creation_timestamp": 1747340791.7354348
}
]
],
"creation_timestamp": 1747340791.735446
}

View File

@@ -1,17 +1,105 @@
[
{
"query": "Turn off device_2 in the Bedroom.",
"expected_tool_use": [
{
"tool_name": "set_device_info",
"tool_input": {"location": "Bedroom", "device_id": "device_2", "status": "OFF"}
}
{
"eval_set_id": "9100bfc9-cc28-4ab9-b920-2dc72e138997",
"name": "9100bfc9-cc28-4ab9-b920-2dc72e138997",
"description": null,
"eval_cases": [
{
"eval_id": "tests/integration/fixture/home_automation_agent/test_files/simple_test.test.json",
"conversation": [
{
"invocation_id": "9f5e8d91-8e51-41d6-addf-196a828168c5",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Turn off device_2 in the Bedroom."
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "OK. I've turned off device_2 in the Bedroom. Anything else?\n"
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"location": "Bedroom",
"device_id": "device_2",
"status": "OFF"
},
"name": "set_device_info"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747340849.0429707
},
{
"invocation_id": "767b2451-5f7b-4c73-aeaf-a82c71e15788",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "What's the command I just issued?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "You asked me to turn off device_2 in the Bedroom.\n"
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [],
"intermediate_responses": []
},
"creation_timestamp": 1747340849.0429986
}
],
"reference": "OK. I've turned off device_2 in the Bedroom. Anything else?\n"
},
{
"query": "What's the command I just issued?",
"expected_tool_use": [],
"reference": "You asked me to turn off device_2 in the Bedroom.\n"
}
]
"session_input": null,
"creation_timestamp": 1747340849.0430045
}
],
"creation_timestamp": 1747340849.0430162
}

View File

@@ -1,5 +1,65 @@
[{
"query": "Turn off device_3 in the Bedroom.",
"expected_tool_use": [{"tool_name": "set_device_info", "tool_input": {"location": "Bedroom", "device_id": "device_3", "status": "OFF"}}],
"reference": "I have set the device_3 status to off."
}]
{
"eval_set_id": "e141f90b-9e7e-4f06-94d7-bbe7e8080ead",
"name": "e141f90b-9e7e-4f06-94d7-bbe7e8080ead",
"description": null,
"eval_cases": [
{
"eval_id": "tests/integration/fixture/home_automation_agent/test_files/simple_test2.test.json",
"conversation": [
{
"invocation_id": "c35582f7-838a-460f-b783-039e278165e0",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Turn off device_3 in the Bedroom."
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "I have set the device_3 status to off."
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [
{
"id": null,
"args": {
"location": "Bedroom",
"device_id": "device_3",
"status": "OFF"
},
"name": "set_device_info"
}
],
"intermediate_responses": []
},
"creation_timestamp": 1747340814.8645504
}
],
"session_input": null,
"creation_timestamp": 1747340814.86456
}
],
"creation_timestamp": 1747340814.864572
}

View File

@@ -1,13 +0,0 @@
{
"id": "test_id",
"app_name": "trip_planner_agent",
"user_id": "test_user",
"state": {
"origin": "San Francisco",
"interests": "Food, Shopping, Museums",
"range": "1000 miles",
"cities": ""
},
"events": [],
"last_update_time": 1741218714.258285
}

View File

@@ -1,7 +1,64 @@
[
{
"query": "Based on my interests, where should I go, Yosemite national park or Los Angeles?",
"expected_tool_use": [],
"reference": "Given your interests in food, shopping, and museums, Los Angeles would be a better choice than Yosemite National Park. Yosemite is primarily focused on outdoor activities and natural landscapes, while Los Angeles offers a diverse range of culinary experiences, shopping districts, and world-class museums. I will now gather information to create an in-depth guide for your trip to Los Angeles.\n"
}
]
{
"eval_set_id": "189d6856-9b90-4b9c-bda8-7cec899507ae",
"name": "189d6856-9b90-4b9c-bda8-7cec899507ae",
"description": null,
"eval_cases": [
{
"eval_id": "tests/integration/fixture/trip_planner_agent/test_files/trip_inquiry_sub_agent.test.json",
"conversation": [
{
"invocation_id": "1c2e8003-d19c-4912-b0ae-17b9d568f8fb",
"user_content": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Based on my interests, where should I go, Yosemite national park or Los Angeles?"
}
],
"role": "user"
},
"final_response": {
"parts": [
{
"video_metadata": null,
"thought": null,
"code_execution_result": null,
"executable_code": null,
"file_data": null,
"function_call": null,
"function_response": null,
"inline_data": null,
"text": "Given your interests in food, shopping, and museums, Los Angeles would be a better choice than Yosemite National Park. Yosemite is primarily focused on outdoor activities and natural landscapes, while Los Angeles offers a diverse range of culinary experiences, shopping districts, and world-class museums. I will now gather information to create an in-depth guide for your trip to Los Angeles.\n"
}
],
"role": "model"
},
"intermediate_data": {
"tool_uses": [],
"intermediate_responses": []
},
"creation_timestamp": 1747339378.484014
}
],
"session_input": {
"app_name": "trip_planner_agent",
"user_id": "test_user",
"state": {
"origin": "San Francisco",
"interests": "Food, Shopping, Museums",
"range": "1000 miles",
"cities": ""
}
},
"creation_timestamp": 1747339378.484044
}
],
"creation_timestamp": 1747339378.484056
}