Add token usage to gemini (streaming), litellm and anthropic

Also included a token_usage sample that showcases the token usage of subagents with different models under a parent agent.

PiperOrigin-RevId: 759347015
This commit is contained in:
Selcuk Gun
2025-05-15 16:22:04 -07:00
committed by Copybara-Service
parent 4d5760917d
commit 509db3f9fb
8 changed files with 515 additions and 45 deletions

View File

@@ -0,0 +1,15 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import agent

View File

@@ -0,0 +1,97 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
from google.adk import Agent
from google.adk.agents.llm_agent import LlmAgent
from google.adk.agents.sequential_agent import SequentialAgent
from google.adk.models.anthropic_llm import Claude
from google.adk.models.lite_llm import LiteLlm
from google.adk.planners import BuiltInPlanner
from google.adk.planners import PlanReActPlanner
from google.adk.tools.tool_context import ToolContext
from google.genai import types
def roll_die(sides: int, tool_context: ToolContext) -> int:
"""Roll a die and return the rolled result.
Args:
sides: The integer number of sides the die has.
Returns:
An integer of the result of rolling the die.
"""
result = random.randint(1, sides)
if 'rolls' not in tool_context.state:
tool_context.state['rolls'] = []
tool_context.state['rolls'] = tool_context.state['rolls'] + [result]
return result
roll_agent_with_openai = LlmAgent(
model=LiteLlm(model='openai/gpt-4o'),
description='Handles rolling dice of different sizes.',
name='roll_agent_with_openai',
instruction="""
You are responsible for rolling dice based on the user's request.
When asked to roll a die, you must call the roll_die tool with the number of sides as an integer.
""",
tools=[roll_die],
)
roll_agent_with_claude = LlmAgent(
model=Claude(model='claude-3-7-sonnet@20250219'),
description='Handles rolling dice of different sizes.',
name='roll_agent_with_claude',
instruction="""
You are responsible for rolling dice based on the user's request.
When asked to roll a die, you must call the roll_die tool with the number of sides as an integer.
""",
tools=[roll_die],
)
roll_agent_with_litellm_claude = LlmAgent(
model=LiteLlm(model='vertex_ai/claude-3-7-sonnet'),
description='Handles rolling dice of different sizes.',
name='roll_agent_with_litellm_claude',
instruction="""
You are responsible for rolling dice based on the user's request.
When asked to roll a die, you must call the roll_die tool with the number of sides as an integer.
""",
tools=[roll_die],
)
roll_agent_with_gemini = LlmAgent(
model='gemini-2.0-flash',
description='Handles rolling dice of different sizes.',
name='roll_agent_with_gemini',
instruction="""
You are responsible for rolling dice based on the user's request.
When asked to roll a die, you must call the roll_die tool with the number of sides as an integer.
""",
tools=[roll_die],
)
root_agent = SequentialAgent(
name='code_pipeline_agent',
sub_agents=[
roll_agent_with_openai,
roll_agent_with_claude,
roll_agent_with_litellm_claude,
roll_agent_with_gemini,
],
)

View File

@@ -0,0 +1,102 @@
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import asyncio
import time
import warnings
import agent
from dotenv import load_dotenv
from google.adk import Runner
from google.adk.agents.run_config import RunConfig
from google.adk.artifacts import InMemoryArtifactService
from google.adk.cli.utils import logs
from google.adk.sessions import InMemorySessionService
from google.adk.sessions import Session
from google.genai import types
load_dotenv(override=True)
warnings.filterwarnings('ignore', category=UserWarning)
logs.log_to_tmp_folder()
async def main():
app_name = 'my_app'
user_id_1 = 'user1'
session_service = InMemorySessionService()
artifact_service = InMemoryArtifactService()
runner = Runner(
app_name=app_name,
agent=agent.root_agent,
artifact_service=artifact_service,
session_service=session_service,
)
session_11 = session_service.create_session(
app_name=app_name, user_id=user_id_1
)
total_prompt_tokens = 0
total_candidate_tokens = 0
total_tokens = 0
async def run_prompt(session: Session, new_message: str):
nonlocal total_prompt_tokens
nonlocal total_candidate_tokens
nonlocal total_tokens
content = types.Content(
role='user', parts=[types.Part.from_text(text=new_message)]
)
print('** User says:', content.model_dump(exclude_none=True))
async for event in runner.run_async(
user_id=user_id_1,
session_id=session.id,
new_message=content,
):
if event.content.parts and event.content.parts[0].text:
print(f'** {event.author}: {event.content.parts[0].text}')
if event.usage_metadata:
total_prompt_tokens += event.usage_metadata.prompt_token_count or 0
total_candidate_tokens += (
event.usage_metadata.candidates_token_count or 0
)
total_tokens += event.usage_metadata.total_token_count or 0
print(
'Turn tokens:'
f' {event.usage_metadata.total_token_count} (prompt={event.usage_metadata.prompt_token_count},'
f' candidates={event.usage_metadata.candidates_token_count})'
)
print(
f'Session tokens: {total_tokens} (prompt={total_prompt_tokens},'
f' candidates={total_candidate_tokens})'
)
start_time = time.time()
print('Start time:', start_time)
print('------------------------------------')
await run_prompt(session_11, 'Hi')
await run_prompt(session_11, 'Roll a die with 100 sides')
print(
await artifact_service.list_artifact_keys(
app_name=app_name, user_id=user_id_1, session_id=session_11.id
)
)
end_time = time.time()
print('------------------------------------')
print('End time:', end_time)
print('Total time:', end_time - start_time)
if __name__ == '__main__':
asyncio.run(main())