feat(live): Support live mode of sequential agent

Add a `task_completed` function to the agent so when a model finished the task, it can send a signal and the program knows it can go to next agent. This cl include: * Implements the `_run_live_impl` in `sequential_agent` so it can handle live case. * Add an example for sequential agent. * Improve error message for unimplemented _run_live_impl in other agents. Note: 1. Compared to non-live case, live agents process a continuous streams of audio or video, so it doesn't have a native way to tell if it's finished and should pass to next agent or not. So we introduce a task_compelted() function so the model can call this function to signal that it's finished the task and we can move on to next agent. 2. live agents doesn't seems to be very useful or natural in parallel or loop agents so we don't implement it for now. If there is user demand, we can implement it easily using similar approach. PiperOrigin-RevId: 758315430
2026-02-04 22:06:23 -06:00 · 2025-05-13 11:55:50 -07:00
parent 39f78dc28f
commit 4188673b0f
7 changed files with 180 additions and 19 deletions
@@ -58,5 +58,5 @@ class LoopAgent(BaseAgent):
  async def _run_live_impl(
      self, ctx: InvocationContext
  ) -> AsyncGenerator[Event, None]:
-    raise NotImplementedError('The behavior for run_live is not defined yet.')
+    raise NotImplementedError('This is not supported yet for LoopAgent.')
    yield  # AsyncGenerator requires having at least one yield statement
@@ -94,3 +94,10 @@ class ParallelAgent(BaseAgent):
    agent_runs = [agent.run_async(ctx) for agent in self.sub_agents]
    async for event in _merge_agent_run(agent_runs):
      yield event
+
+  @override
+  async def _run_live_impl(
+      self, ctx: InvocationContext
+  ) -> AsyncGenerator[Event, None]:
+    raise NotImplementedError("This is not supported yet for ParallelAgent.")
+    yield  # AsyncGenerator requires having at least one yield statement
@@ -23,6 +23,7 @@ from typing_extensions import override
 from ..agents.invocation_context import InvocationContext
 from ..events.event import Event
 from .base_agent import BaseAgent
+from .llm_agent import LlmAgent


 class SequentialAgent(BaseAgent):
@@ -40,6 +41,36 @@ class SequentialAgent(BaseAgent):
  async def _run_live_impl(
      self, ctx: InvocationContext
  ) -> AsyncGenerator[Event, None]:
+    """Implementation for live SequentialAgent.
+
+    Compared to non-live case, live agents process a continous streams of audio
+    or video, so it doesn't have a way to tell if it's finished and should pass
+    to next agent or not. So we introduce a task_compelted() function so the
+    model can call this function to signal that it's finished the task and we
+    can move on to next agent.
+
+    Args:
+      ctx: The invocation context of the agent.
+    """
+    # There is no way to know if it's using live during init phase so we have to init it here
+    for sub_agent in self.sub_agents:
+      # add tool
+      def task_completed():
+        """
+        Signals that the model has successfully completed the user's question
+        or task.
+        """
+        return "Task completion signaled."
+
+      if isinstance(sub_agent, LlmAgent):
+        # Use function name to dedupe.
+        if task_completed.__name__ not in sub_agent.tools:
+          sub_agent.tools.append(task_completed)
+          sub_agent.instruction += f"""If you finished the user' request
+          according to its description, call {task_completed.__name__} function
+          to exit so the next agents can take over. When calling this function,
+          do not generate any text other than the function call.'"""
+
    for sub_agent in self.sub_agents:
      async for event in sub_agent.run_live(ctx):
        yield event
@@ -135,6 +135,18 @@ class BaseLlmFlow(ABC):
            # cancel the tasks that belongs to the closed connection.
            send_task.cancel()
            await llm_connection.close()
+          if (
+              event.content
+              and event.content.parts
+              and event.content.parts[0].function_response
+              and event.content.parts[0].function_response.name
+              == 'task_completed'
+          ):
+            # this is used for sequential agent to signal the end of the agent.
+            await asyncio.sleep(1)
+            # cancel the tasks that belongs to the closed connection.
+            send_task.cancel()
+            return
      finally:
        # Clean up
        if not send_task.done():
@@ -237,7 +249,7 @@ class BaseLlmFlow(ABC):
            if (
                event.content
                and event.content.parts
-                and event.content.parts[0].text
+                and event.content.parts[0].inline_data is None
                and not event.partial
            ):
              # This can be either user data or transcription data.
@@ -254,13 +254,13 @@ class Runner:
    """Runs the agent in live mode (experimental feature).

    Args:
-        session: The session to use. This parameter is deprecated, please use
-          `user_id` and `session_id` instead.
        user_id: The user ID for the session. Required if `session` is None.
        session_id: The session ID for the session. Required if `session` is
          None.
        live_request_queue: The queue for live requests.
        run_config: The run config for the agent.
+        session: The session to use. This parameter is deprecated, please use
+          `user_id` and `session_id` instead.

    Yields:
        AsyncGenerator[Event, None]: An asynchronous generator that yields
@@ -302,22 +302,24 @@ class Runner:

    invocation_context.active_streaming_tools = {}
    # TODO(hangfei): switch to use canonical_tools.
-    for tool in invocation_context.agent.tools:
-      # replicate a LiveRequestQueue for streaming tools that relis on
-      # LiveRequestQueue
-      from typing import get_type_hints
+    # for shell agents, there is no tools associated with it so we should skip.
+    if hasattr(invocation_context.agent, 'tools'):
+      for tool in invocation_context.agent.tools:
+        # replicate a LiveRequestQueue for streaming tools that relis on
+        # LiveRequestQueue
+        from typing import get_type_hints

-      type_hints = get_type_hints(tool)
-      for arg_type in type_hints.values():
-        if arg_type is LiveRequestQueue:
-          if not invocation_context.active_streaming_tools:
-            invocation_context.active_streaming_tools = {}
-          active_streaming_tools = ActiveStreamingTool(
-              stream=LiveRequestQueue()
-          )
-          invocation_context.active_streaming_tools[tool.__name__] = (
-              active_streaming_tools
-          )
+        type_hints = get_type_hints(tool)
+        for arg_type in type_hints.values():
+          if arg_type is LiveRequestQueue:
+            if not invocation_context.active_streaming_tools:
+              invocation_context.active_streaming_tools = {}
+            active_streaming_tools = ActiveStreamingTool(
+                stream=LiveRequestQueue()
+            )
+            invocation_context.active_streaming_tools[tool.__name__] = (
+                active_streaming_tools
+            )

    async for event in invocation_context.agent.run_live(invocation_context):
      self.session_service.append_event(session=session, event=event)