mirror of
https://github.com/EvolutionAPI/adk-python.git
synced 2025-07-13 15:14:50 -06:00
Write eval results locally from adk eval cli.
PiperOrigin-RevId: 762499588
This commit is contained in:
parent
33921d524f
commit
79681e3513
@ -13,11 +13,14 @@
|
||||
# limitations under the License.
|
||||
|
||||
import asyncio
|
||||
import collections
|
||||
from contextlib import asynccontextmanager
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
from typing import AsyncGenerator
|
||||
from typing import Coroutine
|
||||
from typing import Optional
|
||||
|
||||
import click
|
||||
@ -27,6 +30,8 @@ import uvicorn
|
||||
from . import cli_create
|
||||
from . import cli_deploy
|
||||
from .. import version
|
||||
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
|
||||
from ..sessions.in_memory_session_service import InMemorySessionService
|
||||
from .cli import run_cli
|
||||
from .cli_eval import MISSING_EVAL_DEPENDENCIES_MESSAGE
|
||||
from .fast_api import get_fast_api_app
|
||||
@ -306,7 +311,7 @@ def cli_eval(
|
||||
EvalMetric(metric_name=metric_name, threshold=threshold)
|
||||
)
|
||||
|
||||
print(f"Using evaluation creiteria: {evaluation_criteria}")
|
||||
print(f"Using evaluation criteria: {evaluation_criteria}")
|
||||
|
||||
root_agent = get_root_agent(agent_module_file_path)
|
||||
reset_func = try_get_reset_func(agent_module_file_path)
|
||||
@ -325,21 +330,47 @@ def cli_eval(
|
||||
e for e in eval_set.eval_cases if e.eval_id in eval_case_ids
|
||||
]
|
||||
|
||||
eval_set_id_to_eval_cases[eval_set_file_path] = eval_cases
|
||||
eval_set_id_to_eval_cases[eval_set.eval_set_id] = eval_cases
|
||||
|
||||
async def _collect_eval_results() -> list[EvalCaseResult]:
|
||||
return [
|
||||
result
|
||||
async for result in run_evals(
|
||||
eval_set_id_to_eval_cases, root_agent, reset_func, eval_metrics
|
||||
)
|
||||
]
|
||||
session_service = InMemorySessionService()
|
||||
eval_case_results = []
|
||||
async for eval_case_result in run_evals(
|
||||
eval_set_id_to_eval_cases,
|
||||
root_agent,
|
||||
reset_func,
|
||||
eval_metrics,
|
||||
session_service=session_service,
|
||||
):
|
||||
eval_case_result.session_details = await session_service.get_session(
|
||||
app_name=os.path.basename(agent_module_file_path),
|
||||
user_id=eval_case_result.user_id,
|
||||
session_id=eval_case_result.session_id,
|
||||
)
|
||||
eval_case_results.append(eval_case_result)
|
||||
return eval_case_results
|
||||
|
||||
try:
|
||||
eval_results = asyncio.run(_collect_eval_results())
|
||||
except ModuleNotFoundError:
|
||||
raise click.ClickException(MISSING_EVAL_DEPENDENCIES_MESSAGE)
|
||||
|
||||
# Write eval set results.
|
||||
local_eval_set_results_manager = LocalEvalSetResultsManager(
|
||||
agent_dir=os.path.dirname(agent_module_file_path)
|
||||
)
|
||||
eval_set_id_to_eval_results = collections.defaultdict(list)
|
||||
for eval_case_result in eval_results:
|
||||
eval_set_id = eval_case_result.eval_set_id
|
||||
eval_set_id_to_eval_results[eval_set_id].append(eval_case_result)
|
||||
|
||||
for eval_set_id, eval_case_results in eval_set_id_to_eval_results.items():
|
||||
local_eval_set_results_manager.save_eval_set_result(
|
||||
app_name=os.path.basename(agent_module_file_path),
|
||||
eval_set_id=eval_set_id,
|
||||
eval_case_results=eval_case_results,
|
||||
)
|
||||
|
||||
print("*********************************************************************")
|
||||
eval_run_summary = {}
|
||||
|
||||
|
@ -29,6 +29,10 @@ _ADK_EVAL_HISTORY_DIR = ".adk/eval_history"
|
||||
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
|
||||
|
||||
|
||||
def _sanitize_eval_set_result_name(eval_set_result_name: str) -> str:
|
||||
return eval_set_result_name.replace("/", "_")
|
||||
|
||||
|
||||
class LocalEvalSetResultsManager(EvalSetResultsManager):
|
||||
"""An EvalSetResult manager that stores eval set results locally on disk."""
|
||||
|
||||
@ -44,9 +48,10 @@ class LocalEvalSetResultsManager(EvalSetResultsManager):
|
||||
) -> None:
|
||||
"""Creates and saves a new EvalSetResult given eval_case_results."""
|
||||
timestamp = time.time()
|
||||
eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp)
|
||||
eval_set_result_id = app_name + "_" + eval_set_id + "_" + str(timestamp)
|
||||
eval_set_result_name = _sanitize_eval_set_result_name(eval_set_result_id)
|
||||
eval_set_result = EvalSetResult(
|
||||
eval_set_result_id=eval_set_result_name,
|
||||
eval_set_result_id=eval_set_result_id,
|
||||
eval_set_result_name=eval_set_result_name,
|
||||
eval_set_id=eval_set_id,
|
||||
eval_case_results=eval_case_results,
|
||||
|
Loading…
Reference in New Issue
Block a user