Add token usage to gemini (streaming), litellm and anthropic

Also included a token_usage sample that showcases the token usage of subagents with different models under a parent agent. PiperOrigin-RevId: 759347015
2026-02-04 05:46:24 -06:00 · 2025-05-15 16:22:04 -07:00
parent 4d5760917d
commit 509db3f9fb
8 changed files with 515 additions and 45 deletions
@@ -0,0 +1,15 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import agent
@@ -0,0 +1,97 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+
+from google.adk import Agent
+from google.adk.agents.llm_agent import LlmAgent
+from google.adk.agents.sequential_agent import SequentialAgent
+from google.adk.models.anthropic_llm import Claude
+from google.adk.models.lite_llm import LiteLlm
+from google.adk.planners import BuiltInPlanner
+from google.adk.planners import PlanReActPlanner
+from google.adk.tools.tool_context import ToolContext
+from google.genai import types
+
+
+def roll_die(sides: int, tool_context: ToolContext) -> int:
+  """Roll a die and return the rolled result.
+
+  Args:
+    sides: The integer number of sides the die has.
+
+  Returns:
+    An integer of the result of rolling the die.
+  """
+  result = random.randint(1, sides)
+  if 'rolls' not in tool_context.state:
+    tool_context.state['rolls'] = []
+
+  tool_context.state['rolls'] = tool_context.state['rolls'] + [result]
+  return result
+
+
+roll_agent_with_openai = LlmAgent(
+    model=LiteLlm(model='openai/gpt-4o'),
+    description='Handles rolling dice of different sizes.',
+    name='roll_agent_with_openai',
+    instruction="""
+      You are responsible for rolling dice based on the user's request.
+      When asked to roll a die, you must call the roll_die tool with the number of sides as an integer.
+    """,
+    tools=[roll_die],
+)
+
+roll_agent_with_claude = LlmAgent(
+    model=Claude(model='claude-3-7-sonnet@20250219'),
+    description='Handles rolling dice of different sizes.',
+    name='roll_agent_with_claude',
+    instruction="""
+      You are responsible for rolling dice based on the user's request.
+      When asked to roll a die, you must call the roll_die tool with the number of sides as an integer.
+    """,
+    tools=[roll_die],
+)
+
+roll_agent_with_litellm_claude = LlmAgent(
+    model=LiteLlm(model='vertex_ai/claude-3-7-sonnet'),
+    description='Handles rolling dice of different sizes.',
+    name='roll_agent_with_litellm_claude',
+    instruction="""
+      You are responsible for rolling dice based on the user's request.
+      When asked to roll a die, you must call the roll_die tool with the number of sides as an integer.
+    """,
+    tools=[roll_die],
+)
+
+roll_agent_with_gemini = LlmAgent(
+    model='gemini-2.0-flash',
+    description='Handles rolling dice of different sizes.',
+    name='roll_agent_with_gemini',
+    instruction="""
+      You are responsible for rolling dice based on the user's request.
+      When asked to roll a die, you must call the roll_die tool with the number of sides as an integer.
+    """,
+    tools=[roll_die],
+)
+
+root_agent = SequentialAgent(
+    name='code_pipeline_agent',
+    sub_agents=[
+        roll_agent_with_openai,
+        roll_agent_with_claude,
+        roll_agent_with_litellm_claude,
+        roll_agent_with_gemini,
+    ],
+)
@@ -0,0 +1,102 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import time
+import warnings
+
+import agent
+from dotenv import load_dotenv
+from google.adk import Runner
+from google.adk.agents.run_config import RunConfig
+from google.adk.artifacts import InMemoryArtifactService
+from google.adk.cli.utils import logs
+from google.adk.sessions import InMemorySessionService
+from google.adk.sessions import Session
+from google.genai import types
+
+load_dotenv(override=True)
+warnings.filterwarnings('ignore', category=UserWarning)
+logs.log_to_tmp_folder()
+
+
+async def main():
+  app_name = 'my_app'
+  user_id_1 = 'user1'
+  session_service = InMemorySessionService()
+  artifact_service = InMemoryArtifactService()
+  runner = Runner(
+      app_name=app_name,
+      agent=agent.root_agent,
+      artifact_service=artifact_service,
+      session_service=session_service,
+  )
+  session_11 = session_service.create_session(
+      app_name=app_name, user_id=user_id_1
+  )
+
+  total_prompt_tokens = 0
+  total_candidate_tokens = 0
+  total_tokens = 0
+
+  async def run_prompt(session: Session, new_message: str):
+    nonlocal total_prompt_tokens
+    nonlocal total_candidate_tokens
+    nonlocal total_tokens
+    content = types.Content(
+        role='user', parts=[types.Part.from_text(text=new_message)]
+    )
+    print('** User says:', content.model_dump(exclude_none=True))
+    async for event in runner.run_async(
+        user_id=user_id_1,
+        session_id=session.id,
+        new_message=content,
+    ):
+      if event.content.parts and event.content.parts[0].text:
+        print(f'** {event.author}: {event.content.parts[0].text}')
+      if event.usage_metadata:
+        total_prompt_tokens += event.usage_metadata.prompt_token_count or 0
+        total_candidate_tokens += (
+            event.usage_metadata.candidates_token_count or 0
+        )
+        total_tokens += event.usage_metadata.total_token_count or 0
+        print(
+            'Turn tokens:'
+            f' {event.usage_metadata.total_token_count} (prompt={event.usage_metadata.prompt_token_count},'
+            f' candidates={event.usage_metadata.candidates_token_count})'
+        )
+
+    print(
+        f'Session tokens: {total_tokens} (prompt={total_prompt_tokens},'
+        f' candidates={total_candidate_tokens})'
+    )
+
+  start_time = time.time()
+  print('Start time:', start_time)
+  print('------------------------------------')
+  await run_prompt(session_11, 'Hi')
+  await run_prompt(session_11, 'Roll a die with 100 sides')
+  print(
+      await artifact_service.list_artifact_keys(
+          app_name=app_name, user_id=user_id_1, session_id=session_11.id
+      )
+  )
+  end_time = time.time()
+  print('------------------------------------')
+  print('End time:', end_time)
+  print('Total time:', end_time - start_time)
+
+
+if __name__ == '__main__':
+  asyncio.run(main())