diff --git a/src/google/adk/cli/cli_eval.py b/src/google/adk/cli/cli_eval.py
index fc9d5af..7a21cf8 100644
--- a/src/google/adk/cli/cli_eval.py
+++ b/src/google/adk/cli/cli_eval.py
@@ -20,7 +20,7 @@ import os
 import sys
 import traceback
 from typing import Any
-from typing import Generator
+from typing import AsyncGenerator
 from typing import Optional
 import uuid
 
@@ -146,7 +146,7 @@ def parse_and_get_evals_to_run(
   return eval_set_to_evals
 
 
-def run_evals(
+async def run_evals(
     eval_set_to_evals: dict[str, list[str]],
     root_agent: Agent,
     reset_func: Optional[Any],
@@ -154,7 +154,7 @@ def run_evals(
     session_service=None,
     artifact_service=None,
     print_detailed_results=False,
-) -> Generator[EvalResult, None, None]:
+) -> AsyncGenerator[EvalResult, None]:
   try:
     from ..evaluation.agent_evaluator import EvaluationGenerator
     from ..evaluation.response_evaluator import ResponseEvaluator
@@ -181,14 +181,16 @@ def run_evals(
         print(f"Running Eval: {eval_set_file}:{eval_name}")
         session_id = f"{EVAL_SESSION_ID_PREFIX}{str(uuid.uuid4())}"
 
-        scrape_result = EvaluationGenerator._process_query_with_root_agent(
-            data=eval_data,
-            root_agent=root_agent,
-            reset_func=reset_func,
-            initial_session=initial_session,
-            session_id=session_id,
-            session_service=session_service,
-            artifact_service=artifact_service,
+        scrape_result = (
+            await EvaluationGenerator._process_query_with_root_agent(
+                data=eval_data,
+                root_agent=root_agent,
+                reset_func=reset_func,
+                initial_session=initial_session,
+                session_id=session_id,
+                session_service=session_service,
+                artifact_service=artifact_service,
+            )
         )
 
         eval_metric_results = []
diff --git a/src/google/adk/cli/cli_tools_click.py b/src/google/adk/cli/cli_tools_click.py
index 83a2c3f..2898673 100644
--- a/src/google/adk/cli/cli_tools_click.py
+++ b/src/google/adk/cli/cli_tools_click.py
@@ -258,12 +258,14 @@ def cli_eval(
 
   try:
     eval_results = list(
-        run_evals(
-            eval_set_to_evals,
-            root_agent,
-            reset_func,
-            eval_metrics,
-            print_detailed_results=print_detailed_results,
+        asyncio.run(
+            run_evals(
+                eval_set_to_evals,
+                root_agent,
+                reset_func,
+                eval_metrics,
+                print_detailed_results=print_detailed_results,
+            )
         )
     )
   except ModuleNotFoundError:
diff --git a/src/google/adk/cli/fast_api.py b/src/google/adk/cli/fast_api.py
index ed663a5..aa3de52 100644
--- a/src/google/adk/cli/fast_api.py
+++ b/src/google/adk/cli/fast_api.py
@@ -467,7 +467,7 @@ def get_fast_api_app(
       )
     root_agent = await _get_root_agent_async(app_name)
     eval_results = list(
-        run_evals(
+        await run_evals(
             eval_set_to_evals,
             root_agent,
             getattr(root_agent, "reset_data", None),
diff --git a/src/google/adk/evaluation/agent_evaluator.py b/src/google/adk/evaluation/agent_evaluator.py
index 1de087f..d97cd1f 100644
--- a/src/google/adk/evaluation/agent_evaluator.py
+++ b/src/google/adk/evaluation/agent_evaluator.py
@@ -76,7 +76,7 @@ class AgentEvaluator:
     return DEFAULT_CRITERIA
 
   @staticmethod
-  def evaluate(
+  async def evaluate(
       agent_module,
       eval_dataset_file_path_or_dir,
       num_runs=NUM_RUNS,
@@ -120,7 +120,7 @@ class AgentEvaluator:
 
       AgentEvaluator._validate_input([dataset], criteria)
 
-      evaluation_response = AgentEvaluator._generate_responses(
+      evaluation_response = await AgentEvaluator._generate_responses(
           agent_module,
           [dataset],
           num_runs,
@@ -246,7 +246,7 @@ class AgentEvaluator:
     return inferred_criteria
 
   @staticmethod
-  def _generate_responses(
+  async def _generate_responses(
       agent_module, eval_dataset, num_runs, agent_name=None, initial_session={}
   ):
     """Generates evaluation responses by running the agent module multiple times."""
diff --git a/src/google/adk/evaluation/evaluation_generator.py b/src/google/adk/evaluation/evaluation_generator.py
index 3a43098..779c8ad 100644
--- a/src/google/adk/evaluation/evaluation_generator.py
+++ b/src/google/adk/evaluation/evaluation_generator.py
@@ -32,7 +32,7 @@ class EvaluationGenerator:
   """Generates evaluation responses for agents."""
 
   @staticmethod
-  def generate_responses(
+  async def generate_responses(
       eval_dataset,
       agent_module_path,
       repeat_num=3,
@@ -107,7 +107,7 @@ class EvaluationGenerator:
     )
 
   @staticmethod
-  def _process_query_with_root_agent(
+  async def _process_query_with_root_agent(
       data,
       root_agent,
       reset_func,
@@ -128,7 +128,7 @@ class EvaluationGenerator:
           all_mock_tools.add(expected[EvalConstants.TOOL_NAME])
 
     eval_data_copy = data.copy()
-    EvaluationGenerator.apply_before_tool_callback(
+    await EvaluationGenerator.apply_before_tool_callback(
         root_agent,
         lambda *args: EvaluationGenerator.before_tool_callback(
             *args, eval_dataset=eval_data_copy
@@ -247,7 +247,7 @@ class EvaluationGenerator:
     return None
 
   @staticmethod
-  def apply_before_tool_callback(
+  async def apply_before_tool_callback(
       agent: BaseAgent,
       callback: BeforeToolCallback,
       all_mock_tools: set[str],
@@ -265,6 +265,6 @@ class EvaluationGenerator:
 
     # Apply recursively to subagents if they exist
     for sub_agent in agent.sub_agents:
-      EvaluationGenerator.apply_before_tool_callback(
+      await EvaluationGenerator.apply_before_tool_callback(
           sub_agent, callback, all_mock_tools
       )
diff --git a/tests/integration/test_evalute_agent_in_fixture.py b/tests/integration/test_evalute_agent_in_fixture.py
index 3899a3f..8f9b77f 100644
--- a/tests/integration/test_evalute_agent_in_fixture.py
+++ b/tests/integration/test_evalute_agent_in_fixture.py
@@ -51,12 +51,13 @@ def agent_eval_artifacts_in_fixture():
   return agent_eval_artifacts
 
 
+@pytest.mark.asyncio
 @pytest.mark.parametrize(
     'agent_name, evalfile, initial_session_file',
     agent_eval_artifacts_in_fixture(),
     ids=[agent_name for agent_name, _, _ in agent_eval_artifacts_in_fixture()],
 )
-def test_evaluate_agents_long_running_4_runs_per_eval_item(
+async def test_evaluate_agents_long_running_4_runs_per_eval_item(
     agent_name, evalfile, initial_session_file
 ):
   """Test agents evaluation in fixture folder.
@@ -66,7 +67,7 @@ def test_evaluate_agents_long_running_4_runs_per_eval_item(
 
   A single eval item is a session that can have multiple queries in it.
   """
-  AgentEvaluator.evaluate(
+  await AgentEvaluator.evaluate(
       agent_module=agent_name,
       eval_dataset_file_path_or_dir=evalfile,
       initial_session_file=initial_session_file,
diff --git a/tests/integration/test_multi_agent.py b/tests/integration/test_multi_agent.py
index 48b5897..81beed1 100644
--- a/tests/integration/test_multi_agent.py
+++ b/tests/integration/test_multi_agent.py
@@ -15,7 +15,8 @@
 from google.adk.evaluation import AgentEvaluator
 
 
-def test_eval_agent():
+@pytest.mark.asyncio
+async def test_eval_agent():
   AgentEvaluator.evaluate(
       agent_module="tests.integration.fixture.trip_planner_agent",
       eval_dataset_file_path_or_dir=(
diff --git a/tests/integration/test_multi_turn.py b/tests/integration/test_multi_turn.py
index 0281082..ce56ede 100644
--- a/tests/integration/test_multi_turn.py
+++ b/tests/integration/test_multi_turn.py
@@ -15,7 +15,8 @@
 from google.adk.evaluation import AgentEvaluator
 
 
-def test_simple_multi_turn_conversation():
+@pytest.mark.asyncio
+async def test_simple_multi_turn_conversation():
   """Test a simple multi-turn conversation."""
   AgentEvaluator.evaluate(
       agent_module="tests.integration.fixture.home_automation_agent",
@@ -24,7 +25,8 @@ def test_simple_multi_turn_conversation():
   )
 
 
-def test_dependent_tool_calls():
+@pytest.mark.asyncio
+async def test_dependent_tool_calls():
   """Test subsequent tool calls that are dependent on previous tool calls."""
   AgentEvaluator.evaluate(
       agent_module="tests.integration.fixture.home_automation_agent",
@@ -33,8 +35,10 @@ def test_dependent_tool_calls():
   )
 
 
-def test_memorizing_past_events():
+@pytest.mark.asyncio
+async def test_memorizing_past_events():
   """Test memorizing past events."""
+
   AgentEvaluator.evaluate(
       agent_module="tests.integration.fixture.home_automation_agent",
       eval_dataset_file_path_or_dir="tests/integration/fixture/home_automation_agent/test_files/memorizing_past_events/eval_data.test.json",
diff --git a/tests/integration/test_single_agent.py b/tests/integration/test_single_agent.py
index 5c7c26d..cb18ce8 100644
--- a/tests/integration/test_single_agent.py
+++ b/tests/integration/test_single_agent.py
@@ -15,7 +15,8 @@
 from google.adk.evaluation import AgentEvaluator
 
 
-def test_eval_agent():
+@pytest.mark.asyncio
+async def test_eval_agent():
   AgentEvaluator.evaluate(
       agent_module="tests.integration.fixture.home_automation_agent",
       eval_dataset_file_path_or_dir="tests/integration/fixture/home_automation_agent/simple_test.test.json",
diff --git a/tests/integration/test_sub_agent.py b/tests/integration/test_sub_agent.py
index 27646bf..6eb7192 100644
--- a/tests/integration/test_sub_agent.py
+++ b/tests/integration/test_sub_agent.py
@@ -15,7 +15,8 @@
 from google.adk.evaluation import AgentEvaluator
 
 
-def test_eval_agent():
+@pytest.mark.asyncio
+async def test_eval_agent():
   """Test hotel sub agent in a multi-agent system."""
   AgentEvaluator.evaluate(
       agent_module="tests.integration.fixture.trip_planner_agent",
diff --git a/tests/integration/test_with_test_file.py b/tests/integration/test_with_test_file.py
index 2024e1d..68c9ba3 100644
--- a/tests/integration/test_with_test_file.py
+++ b/tests/integration/test_with_test_file.py
@@ -15,7 +15,8 @@
 from google.adk.evaluation import AgentEvaluator
 
 
-def test_with_single_test_file():
+@pytest.mark.asyncio
+async def test_with_single_test_file():
   """Test the agent's basic ability via session file."""
   AgentEvaluator.evaluate(
       agent_module="tests.integration.fixture.home_automation_agent",
@@ -23,7 +24,8 @@ def test_with_single_test_file():
   )
 
 
-def test_with_folder_of_test_files_long_running():
+@pytest.mark.asyncio
+async def test_with_folder_of_test_files_long_running():
   """Test the agent's basic ability via a folder of session files."""
   AgentEvaluator.evaluate(
       agent_module="tests.integration.fixture.home_automation_agent",