mirror of
https://github.com/EvolutionAPI/adk-python.git
synced 2025-07-15 11:42:54 -06:00
Add 'get_eval_report' and 'list_eval_reports' endpoints.
PiperOrigin-RevId: 757936497
This commit is contained in:
parent
df0892a7b8
commit
05a0c6b307
@ -57,6 +57,7 @@ class EvalCaseResult(BaseModel):
|
||||
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]]
|
||||
session_id: str
|
||||
session_details: Optional[Session] = None
|
||||
user_id: Optional[str] = None
|
||||
|
||||
|
||||
class EvalSetResult(BaseModel):
|
||||
@ -185,6 +186,7 @@ async def run_evals(
|
||||
eval_name = eval_item["name"]
|
||||
eval_data = eval_item["data"]
|
||||
initial_session = eval_item.get("initial_session", {})
|
||||
user_id = initial_session.get("user_id", "test_user_id")
|
||||
|
||||
if evals_to_run and eval_name not in evals_to_run:
|
||||
continue
|
||||
@ -267,6 +269,7 @@ async def run_evals(
|
||||
final_eval_status=final_eval_status,
|
||||
eval_metric_results=eval_metric_results,
|
||||
session_id=session_id,
|
||||
user_id=user_id,
|
||||
)
|
||||
|
||||
if final_eval_status == EvalStatus.PASSED:
|
||||
|
@ -22,13 +22,13 @@ import os
|
||||
from pathlib import Path
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import typing
|
||||
from typing import Any
|
||||
from typing import List
|
||||
from typing import Literal
|
||||
from typing import Optional
|
||||
from typing import Union
|
||||
|
||||
import click
|
||||
from fastapi import FastAPI
|
||||
@ -71,8 +71,10 @@ from ..sessions.session import Session
|
||||
from ..sessions.vertex_ai_session_service import VertexAiSessionService
|
||||
from ..tools.base_toolset import BaseToolset
|
||||
from .cli_eval import EVAL_SESSION_ID_PREFIX
|
||||
from .cli_eval import EvalCaseResult
|
||||
from .cli_eval import EvalMetric
|
||||
from .cli_eval import EvalMetricResult
|
||||
from .cli_eval import EvalSetResult
|
||||
from .cli_eval import EvalStatus
|
||||
from .utils import create_empty_state
|
||||
from .utils import envs
|
||||
@ -81,6 +83,7 @@ from .utils import evals
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_EVAL_SET_FILE_EXTENSION = ".evalset.json"
|
||||
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
|
||||
|
||||
|
||||
class ApiServerSpanExporter(export.SpanExporter):
|
||||
@ -137,10 +140,12 @@ class RunEvalResult(BaseModel):
|
||||
populate_by_name=True,
|
||||
)
|
||||
|
||||
eval_set_file: str
|
||||
eval_set_id: str
|
||||
eval_id: str
|
||||
final_eval_status: EvalStatus
|
||||
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]]
|
||||
user_id: str
|
||||
session_id: str
|
||||
|
||||
|
||||
@ -484,24 +489,117 @@ def get_fast_api_app(
|
||||
"Eval ids to run list is empty. We will all evals in the eval set."
|
||||
)
|
||||
root_agent = await _get_root_agent_async(app_name)
|
||||
return [
|
||||
RunEvalResult(
|
||||
app_name=app_name,
|
||||
eval_set_id=eval_set_id,
|
||||
eval_id=eval_result.eval_id,
|
||||
final_eval_status=eval_result.final_eval_status,
|
||||
eval_metric_results=eval_result.eval_metric_results,
|
||||
session_id=eval_result.session_id,
|
||||
)
|
||||
async for eval_result in run_evals(
|
||||
eval_set_to_evals,
|
||||
root_agent,
|
||||
getattr(root_agent, "reset_data", None),
|
||||
req.eval_metrics,
|
||||
session_service=session_service,
|
||||
artifact_service=artifact_service,
|
||||
run_eval_results = []
|
||||
eval_case_results = []
|
||||
async for eval_result in run_evals(
|
||||
eval_set_to_evals,
|
||||
root_agent,
|
||||
getattr(root_agent, "reset_data", None),
|
||||
req.eval_metrics,
|
||||
session_service=session_service,
|
||||
artifact_service=artifact_service,
|
||||
):
|
||||
run_eval_results.append(
|
||||
RunEvalResult(
|
||||
app_name=app_name,
|
||||
eval_set_file=eval_result.eval_set_file,
|
||||
eval_set_id=eval_set_id,
|
||||
eval_id=eval_result.eval_id,
|
||||
final_eval_status=eval_result.final_eval_status,
|
||||
eval_metric_results=eval_result.eval_metric_results,
|
||||
user_id=eval_result.user_id,
|
||||
session_id=eval_result.session_id,
|
||||
)
|
||||
)
|
||||
session = session_service.get_session(
|
||||
app_name=app_name,
|
||||
user_id=eval_result.user_id,
|
||||
session_id=eval_result.session_id,
|
||||
)
|
||||
eval_case_results.append(
|
||||
EvalCaseResult(
|
||||
eval_set_file=eval_result.eval_set_file,
|
||||
eval_id=eval_result.eval_id,
|
||||
final_eval_status=eval_result.final_eval_status,
|
||||
eval_metric_results=eval_result.eval_metric_results,
|
||||
session_id=eval_result.session_id,
|
||||
session_details=session,
|
||||
user_id=eval_result.user_id,
|
||||
)
|
||||
)
|
||||
|
||||
timestamp = time.time()
|
||||
eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp)
|
||||
eval_set_result = EvalSetResult(
|
||||
eval_set_result_id=eval_set_result_name,
|
||||
eval_set_result_name=eval_set_result_name,
|
||||
eval_set_id=eval_set_id,
|
||||
eval_case_results=eval_case_results,
|
||||
creation_timestamp=timestamp,
|
||||
)
|
||||
|
||||
# Write eval result file, with eval_set_result_name.
|
||||
app_eval_history_dir = os.path.join(
|
||||
agent_dir, app_name, ".adk", "eval_history"
|
||||
)
|
||||
if not os.path.exists(app_eval_history_dir):
|
||||
os.makedirs(app_eval_history_dir)
|
||||
# Convert to json and write to file.
|
||||
eval_set_result_json = eval_set_result.model_dump_json()
|
||||
eval_set_result_file_path = os.path.join(
|
||||
app_eval_history_dir,
|
||||
eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
|
||||
)
|
||||
logger.info("Writing eval result to file: %s", eval_set_result_file_path)
|
||||
with open(eval_set_result_file_path, "w") as f:
|
||||
f.write(json.dumps(eval_set_result_json, indent=2))
|
||||
|
||||
return run_eval_results
|
||||
|
||||
@app.get(
|
||||
"/apps/{app_name}/eval_results/{eval_result_id}",
|
||||
response_model_exclude_none=True,
|
||||
)
|
||||
def get_eval_result(
|
||||
app_name: str,
|
||||
eval_result_id: str,
|
||||
) -> EvalSetResult:
|
||||
"""Gets the eval result for the given eval id."""
|
||||
# Load the eval set file data
|
||||
maybe_eval_result_file_path = (
|
||||
os.path.join(
|
||||
agent_dir, app_name, ".adk", "eval_history", eval_result_id
|
||||
)
|
||||
+ _EVAL_SET_RESULT_FILE_EXTENSION
|
||||
)
|
||||
if not os.path.exists(maybe_eval_result_file_path):
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Eval result `{eval_result_id}` not found.",
|
||||
)
|
||||
with open(maybe_eval_result_file_path, "r") as file:
|
||||
eval_result_data = json.load(file) # Load JSON into a list
|
||||
try:
|
||||
eval_result = EvalSetResult.model_validate_json(eval_result_data)
|
||||
return eval_result
|
||||
except ValidationError as e:
|
||||
logger.exception("get_eval_result validation error: %s", e)
|
||||
|
||||
@app.get(
|
||||
"/apps/{app_name}/eval_results",
|
||||
response_model_exclude_none=True,
|
||||
)
|
||||
def list_eval_results(app_name: str) -> list[str]:
|
||||
"""Lists all eval results for the given app."""
|
||||
app_eval_history_directory = os.path.join(
|
||||
agent_dir, app_name, ".adk", "eval_history"
|
||||
)
|
||||
eval_result_files = [
|
||||
file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
|
||||
for file in os.listdir(app_eval_history_directory)
|
||||
if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
|
||||
]
|
||||
return eval_result_files
|
||||
|
||||
@app.delete("/apps/{app_name}/users/{user_id}/sessions/{session_id}")
|
||||
def delete_session(app_name: str, user_id: str, session_id: str):
|
||||
|
Loading…
Reference in New Issue
Block a user