mirror of
https://github.com/EvolutionAPI/adk-python.git
synced 2025-07-16 04:02:55 -06:00
Refactor eval results reporting with Eval Set Results manager.
PiperOrigin-RevId: 761601525
This commit is contained in:
parent
c5a0437745
commit
9928cafe32
@ -82,7 +82,7 @@ class EvalCaseResult(common.BaseModel):
|
|||||||
"""The eval case id."""
|
"""The eval case id."""
|
||||||
|
|
||||||
final_eval_status: EvalStatus
|
final_eval_status: EvalStatus
|
||||||
"""Final evalu status for this eval case."""
|
"""Final eval status for this eval case."""
|
||||||
|
|
||||||
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
|
eval_metric_results: list[tuple[EvalMetric, EvalMetricResult]] = Field(
|
||||||
deprecated=True,
|
deprecated=True,
|
||||||
|
@ -64,6 +64,7 @@ from ..agents.run_config import StreamingMode
|
|||||||
from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
|
from ..artifacts.in_memory_artifact_service import InMemoryArtifactService
|
||||||
from ..evaluation.eval_case import EvalCase
|
from ..evaluation.eval_case import EvalCase
|
||||||
from ..evaluation.eval_case import SessionInput
|
from ..evaluation.eval_case import SessionInput
|
||||||
|
from ..evaluation.local_eval_set_results_manager import LocalEvalSetResultsManager
|
||||||
from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
|
from ..evaluation.local_eval_sets_manager import LocalEvalSetsManager
|
||||||
from ..events.event import Event
|
from ..events.event import Event
|
||||||
from ..memory.in_memory_memory_service import InMemoryMemoryService
|
from ..memory.in_memory_memory_service import InMemoryMemoryService
|
||||||
@ -322,6 +323,7 @@ def get_fast_api_app(
|
|||||||
memory_service = InMemoryMemoryService()
|
memory_service = InMemoryMemoryService()
|
||||||
|
|
||||||
eval_sets_manager = LocalEvalSetsManager(agent_dir=agent_dir)
|
eval_sets_manager = LocalEvalSetsManager(agent_dir=agent_dir)
|
||||||
|
eval_set_results_manager = LocalEvalSetResultsManager(agent_dir=agent_dir)
|
||||||
|
|
||||||
# Build the Session service
|
# Build the Session service
|
||||||
agent_engine_id = ""
|
agent_engine_id = ""
|
||||||
@ -594,32 +596,10 @@ def get_fast_api_app(
|
|||||||
)
|
)
|
||||||
eval_case_results.append(eval_case_result)
|
eval_case_results.append(eval_case_result)
|
||||||
|
|
||||||
timestamp = time.time()
|
eval_set_results_manager.save_eval_set_result(
|
||||||
eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp)
|
app_name, eval_set_id, eval_case_results
|
||||||
eval_set_result = EvalSetResult(
|
|
||||||
eval_set_result_id=eval_set_result_name,
|
|
||||||
eval_set_result_name=eval_set_result_name,
|
|
||||||
eval_set_id=eval_set_id,
|
|
||||||
eval_case_results=eval_case_results,
|
|
||||||
creation_timestamp=timestamp,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Write eval result file, with eval_set_result_name.
|
|
||||||
app_eval_history_dir = os.path.join(
|
|
||||||
agent_dir, app_name, ".adk", "eval_history"
|
|
||||||
)
|
|
||||||
if not os.path.exists(app_eval_history_dir):
|
|
||||||
os.makedirs(app_eval_history_dir)
|
|
||||||
# Convert to json and write to file.
|
|
||||||
eval_set_result_json = eval_set_result.model_dump_json()
|
|
||||||
eval_set_result_file_path = os.path.join(
|
|
||||||
app_eval_history_dir,
|
|
||||||
eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
|
|
||||||
)
|
|
||||||
logger.info("Writing eval result to file: %s", eval_set_result_file_path)
|
|
||||||
with open(eval_set_result_file_path, "w") as f:
|
|
||||||
f.write(json.dumps(eval_set_result_json, indent=2))
|
|
||||||
|
|
||||||
return run_eval_results
|
return run_eval_results
|
||||||
|
|
||||||
@app.get(
|
@app.get(
|
||||||
@ -631,25 +611,14 @@ def get_fast_api_app(
|
|||||||
eval_result_id: str,
|
eval_result_id: str,
|
||||||
) -> EvalSetResult:
|
) -> EvalSetResult:
|
||||||
"""Gets the eval result for the given eval id."""
|
"""Gets the eval result for the given eval id."""
|
||||||
# Load the eval set file data
|
|
||||||
maybe_eval_result_file_path = (
|
|
||||||
os.path.join(
|
|
||||||
agent_dir, app_name, ".adk", "eval_history", eval_result_id
|
|
||||||
)
|
|
||||||
+ _EVAL_SET_RESULT_FILE_EXTENSION
|
|
||||||
)
|
|
||||||
if not os.path.exists(maybe_eval_result_file_path):
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=404,
|
|
||||||
detail=f"Eval result `{eval_result_id}` not found.",
|
|
||||||
)
|
|
||||||
with open(maybe_eval_result_file_path, "r") as file:
|
|
||||||
eval_result_data = json.load(file) # Load JSON into a list
|
|
||||||
try:
|
try:
|
||||||
eval_result = EvalSetResult.model_validate_json(eval_result_data)
|
return eval_set_results_manager.get_eval_set_result(
|
||||||
return eval_result
|
app_name, eval_result_id
|
||||||
except ValidationError as e:
|
)
|
||||||
logger.exception("get_eval_result validation error: %s", e)
|
except ValueError as ve:
|
||||||
|
raise HTTPException(status_code=404, detail=str(ve)) from ve
|
||||||
|
except ValidationError as ve:
|
||||||
|
raise HTTPException(status_code=500, detail=str(ve)) from ve
|
||||||
|
|
||||||
@app.get(
|
@app.get(
|
||||||
"/apps/{app_name}/eval_results",
|
"/apps/{app_name}/eval_results",
|
||||||
@ -657,19 +626,7 @@ def get_fast_api_app(
|
|||||||
)
|
)
|
||||||
def list_eval_results(app_name: str) -> list[str]:
|
def list_eval_results(app_name: str) -> list[str]:
|
||||||
"""Lists all eval results for the given app."""
|
"""Lists all eval results for the given app."""
|
||||||
app_eval_history_directory = os.path.join(
|
return eval_set_results_manager.list_eval_set_results(app_name)
|
||||||
agent_dir, app_name, ".adk", "eval_history"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not os.path.exists(app_eval_history_directory):
|
|
||||||
return []
|
|
||||||
|
|
||||||
eval_result_files = [
|
|
||||||
file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
|
|
||||||
for file in os.listdir(app_eval_history_directory)
|
|
||||||
if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
|
|
||||||
]
|
|
||||||
return eval_result_files
|
|
||||||
|
|
||||||
@app.delete("/apps/{app_name}/users/{user_id}/sessions/{session_id}")
|
@app.delete("/apps/{app_name}/users/{user_id}/sessions/{session_id}")
|
||||||
async def delete_session(app_name: str, user_id: str, session_id: str):
|
async def delete_session(app_name: str, user_id: str, session_id: str):
|
||||||
|
44
src/google/adk/evaluation/eval_set_results_manager.py
Normal file
44
src/google/adk/evaluation/eval_set_results_manager.py
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
# Copyright 2025 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from ..cli.cli_eval import EvalCaseResult
|
||||||
|
from ..cli.cli_eval import EvalSetResult
|
||||||
|
|
||||||
|
|
||||||
|
class EvalSetResultsManager(ABC):
|
||||||
|
"""An interface to manage Eval Set Results."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def save_eval_set_result(
|
||||||
|
self,
|
||||||
|
app_name: str,
|
||||||
|
eval_set_id: str,
|
||||||
|
eval_case_results: list[EvalCaseResult],
|
||||||
|
) -> None:
|
||||||
|
"""Creates and saves a new EvalSetResult given eval_case_results."""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_eval_set_result(
|
||||||
|
self, app_name: str, eval_set_result_id: str
|
||||||
|
) -> EvalSetResult:
|
||||||
|
"""Returns an EvalSetResult identified by app_name and eval_set_result_id."""
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def list_eval_set_results(self, app_name: str) -> list[str]:
|
||||||
|
"""Returns the eval result ids that belong to the given app_name."""
|
||||||
|
raise NotImplementedError()
|
104
src/google/adk/evaluation/local_eval_set_results_manager.py
Normal file
104
src/google/adk/evaluation/local_eval_set_results_manager.py
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
# Copyright 2025 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from typing_extensions import override
|
||||||
|
from ..cli.cli_eval import EvalCaseResult
|
||||||
|
from ..cli.cli_eval import EvalSetResult
|
||||||
|
from .eval_set_results_manager import EvalSetResultsManager
|
||||||
|
|
||||||
|
logger = logging.getLogger("google_adk." + __name__)
|
||||||
|
|
||||||
|
_ADK_EVAL_HISTORY_DIR = ".adk/eval_history"
|
||||||
|
_EVAL_SET_RESULT_FILE_EXTENSION = ".evalset_result.json"
|
||||||
|
|
||||||
|
|
||||||
|
class LocalEvalSetResultsManager(EvalSetResultsManager):
|
||||||
|
"""An EvalSetResult manager that stores eval set results locally on disk."""
|
||||||
|
|
||||||
|
def __init__(self, agent_dir: str):
|
||||||
|
self._agent_dir = agent_dir
|
||||||
|
|
||||||
|
@override
|
||||||
|
def save_eval_set_result(
|
||||||
|
self,
|
||||||
|
app_name: str,
|
||||||
|
eval_set_id: str,
|
||||||
|
eval_case_results: list[EvalCaseResult],
|
||||||
|
) -> None:
|
||||||
|
"""Creates and saves a new EvalSetResult given eval_case_results."""
|
||||||
|
timestamp = time.time()
|
||||||
|
eval_set_result_name = app_name + "_" + eval_set_id + "_" + str(timestamp)
|
||||||
|
eval_set_result = EvalSetResult(
|
||||||
|
eval_set_result_id=eval_set_result_name,
|
||||||
|
eval_set_result_name=eval_set_result_name,
|
||||||
|
eval_set_id=eval_set_id,
|
||||||
|
eval_case_results=eval_case_results,
|
||||||
|
creation_timestamp=timestamp,
|
||||||
|
)
|
||||||
|
# Write eval result file, with eval_set_result_name.
|
||||||
|
app_eval_history_dir = self._get_eval_history_dir(app_name)
|
||||||
|
if not os.path.exists(app_eval_history_dir):
|
||||||
|
os.makedirs(app_eval_history_dir)
|
||||||
|
# Convert to json and write to file.
|
||||||
|
eval_set_result_json = eval_set_result.model_dump_json()
|
||||||
|
eval_set_result_file_path = os.path.join(
|
||||||
|
app_eval_history_dir,
|
||||||
|
eval_set_result_name + _EVAL_SET_RESULT_FILE_EXTENSION,
|
||||||
|
)
|
||||||
|
logger.info("Writing eval result to file: %s", eval_set_result_file_path)
|
||||||
|
with open(eval_set_result_file_path, "w") as f:
|
||||||
|
f.write(json.dumps(eval_set_result_json, indent=2))
|
||||||
|
|
||||||
|
@override
|
||||||
|
def get_eval_set_result(
|
||||||
|
self, app_name: str, eval_set_result_id: str
|
||||||
|
) -> EvalSetResult:
|
||||||
|
"""Returns an EvalSetResult identified by app_name and eval_set_result_id."""
|
||||||
|
# Load the eval set result file data.
|
||||||
|
maybe_eval_result_file_path = (
|
||||||
|
os.path.join(
|
||||||
|
self._get_eval_history_dir(app_name),
|
||||||
|
eval_set_result_id,
|
||||||
|
)
|
||||||
|
+ _EVAL_SET_RESULT_FILE_EXTENSION
|
||||||
|
)
|
||||||
|
if not os.path.exists(maybe_eval_result_file_path):
|
||||||
|
raise ValueError(
|
||||||
|
f"Eval set result `{eval_set_result_id}` does not exist."
|
||||||
|
)
|
||||||
|
with open(maybe_eval_result_file_path, "r") as file:
|
||||||
|
eval_result_data = json.load(file)
|
||||||
|
return EvalSetResult.model_validate_json(eval_result_data)
|
||||||
|
|
||||||
|
@override
|
||||||
|
def list_eval_set_results(self, app_name: str) -> list[str]:
|
||||||
|
"""Returns the eval result ids that belong to the given app_name."""
|
||||||
|
app_eval_history_directory = self._get_eval_history_dir(app_name)
|
||||||
|
|
||||||
|
if not os.path.exists(app_eval_history_directory):
|
||||||
|
return []
|
||||||
|
|
||||||
|
eval_result_files = [
|
||||||
|
file.removesuffix(_EVAL_SET_RESULT_FILE_EXTENSION)
|
||||||
|
for file in os.listdir(app_eval_history_directory)
|
||||||
|
if file.endswith(_EVAL_SET_RESULT_FILE_EXTENSION)
|
||||||
|
]
|
||||||
|
return eval_result_files
|
||||||
|
|
||||||
|
def _get_eval_history_dir(self, app_name: str) -> str:
|
||||||
|
return os.path.join(self._agent_dir, app_name, _ADK_EVAL_HISTORY_DIR)
|
Loading…
Reference in New Issue
Block a user