mirror of
https://github.com/EvolutionAPI/adk-python.git
synced 2026-02-05 06:16:24 -06:00
Update Eval Run and TrajectoryEvaluator to use the new schema.
PiperOrigin-RevId: 758927160
This commit is contained in:
committed by
Copybara-Service
parent
2cb74dd20e
commit
ee674ce0ef
@@ -296,6 +296,7 @@ def cli_eval(
|
||||
from .cli_eval import parse_and_get_evals_to_run
|
||||
from .cli_eval import run_evals
|
||||
from .cli_eval import try_get_reset_func
|
||||
from ..evaluation.local_eval_sets_manager import load_eval_set_from_file
|
||||
except ModuleNotFoundError:
|
||||
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
|
||||
|
||||
@@ -311,17 +312,27 @@ def cli_eval(
|
||||
root_agent = get_root_agent(agent_module_file_path)
|
||||
reset_func = try_get_reset_func(agent_module_file_path)
|
||||
|
||||
eval_set_to_evals = parse_and_get_evals_to_run(eval_set_file_path)
|
||||
eval_set_file_path_to_evals = parse_and_get_evals_to_run(eval_set_file_path)
|
||||
eval_set_id_to_eval_cases = {}
|
||||
|
||||
# Read the eval_set files and get the cases.
|
||||
for eval_set_file_path, eval_case_ids in eval_set_file_path_to_evals.items():
|
||||
eval_set = load_eval_set_from_file(eval_set_file_path, eval_set_file_path)
|
||||
eval_cases = eval_set.eval_cases
|
||||
|
||||
if eval_case_ids:
|
||||
# There are eval_ids that we should select.
|
||||
eval_cases = [
|
||||
e for e in eval_set.eval_cases if e.eval_id in eval_case_ids
|
||||
]
|
||||
|
||||
eval_set_id_to_eval_cases[eval_set_file_path] = eval_cases
|
||||
|
||||
async def _collect_eval_results() -> list[EvalCaseResult]:
|
||||
return [
|
||||
result
|
||||
async for result in run_evals(
|
||||
eval_set_to_evals,
|
||||
root_agent,
|
||||
reset_func,
|
||||
eval_metrics,
|
||||
print_detailed_results=print_detailed_results,
|
||||
eval_set_id_to_eval_cases, root_agent, reset_func, eval_metrics
|
||||
)
|
||||
]
|
||||
|
||||
@@ -336,20 +347,28 @@ def cli_eval(
|
||||
for eval_result in eval_results:
|
||||
eval_result: EvalCaseResult
|
||||
|
||||
if eval_result.eval_set_file not in eval_run_summary:
|
||||
eval_run_summary[eval_result.eval_set_file] = [0, 0]
|
||||
if eval_result.eval_set_id not in eval_run_summary:
|
||||
eval_run_summary[eval_result.eval_set_id] = [0, 0]
|
||||
|
||||
if eval_result.final_eval_status == EvalStatus.PASSED:
|
||||
eval_run_summary[eval_result.eval_set_file][0] += 1
|
||||
eval_run_summary[eval_result.eval_set_id][0] += 1
|
||||
else:
|
||||
eval_run_summary[eval_result.eval_set_file][1] += 1
|
||||
eval_run_summary[eval_result.eval_set_id][1] += 1
|
||||
print("Eval Run Summary")
|
||||
for eval_set_file, pass_fail_count in eval_run_summary.items():
|
||||
for eval_set_id, pass_fail_count in eval_run_summary.items():
|
||||
print(
|
||||
f"{eval_set_file}:\n Tests passed: {pass_fail_count[0]}\n Tests"
|
||||
f"{eval_set_id}:\n Tests passed: {pass_fail_count[0]}\n Tests"
|
||||
f" failed: {pass_fail_count[1]}"
|
||||
)
|
||||
|
||||
if print_detailed_results:
|
||||
for eval_result in eval_results:
|
||||
eval_result: EvalCaseResult
|
||||
print(
|
||||
"*********************************************************************"
|
||||
)
|
||||
print(eval_result.model_dump_json(indent=2))
|
||||
|
||||
|
||||
@main.command("web")
|
||||
@click.option(
|
||||
|
||||
Reference in New Issue
Block a user