mirror of
https://github.com/EvolutionAPI/adk-python.git
synced 2026-02-05 06:16:24 -06:00
Agent Development Kit(ADK)
An easy-to-use and powerful framework to build AI agents.
This commit is contained in:
@@ -0,0 +1,200 @@
|
||||
# Copyright 2025 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import logging
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from google.genai import live
|
||||
from google.genai import types
|
||||
|
||||
from .base_llm_connection import BaseLlmConnection
|
||||
from .llm_response import LlmResponse
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GeminiLlmConnection(BaseLlmConnection):
|
||||
"""The Gemini model connection."""
|
||||
|
||||
def __init__(self, gemini_session: live.AsyncSession):
|
||||
self._gemini_session = gemini_session
|
||||
|
||||
async def send_history(self, history: list[types.Content]):
|
||||
"""Sends the conversation history to the gemini model.
|
||||
|
||||
You call this method right after setting up the model connection.
|
||||
The model will respond if the last content is from user, otherwise it will
|
||||
wait for new user input before responding.
|
||||
|
||||
Args:
|
||||
history: The conversation history to send to the model.
|
||||
"""
|
||||
|
||||
# TODO: Remove this filter and translate unary contents to streaming
|
||||
# contents properly.
|
||||
|
||||
# We ignore any audio from user during the agent transfer phase
|
||||
contents = [
|
||||
content
|
||||
for content in history
|
||||
if content.parts and content.parts[0].text
|
||||
]
|
||||
|
||||
if contents:
|
||||
await self._gemini_session.send(
|
||||
input=types.LiveClientContent(
|
||||
turns=contents,
|
||||
turn_complete=contents[-1].role == 'user',
|
||||
),
|
||||
)
|
||||
else:
|
||||
logger.info('no content is sent')
|
||||
|
||||
async def send_content(self, content: types.Content):
|
||||
"""Sends a user content to the gemini model.
|
||||
|
||||
The model will respond immediately upon receiving the content.
|
||||
If you send function responses, all parts in the content should be function
|
||||
responses.
|
||||
|
||||
Args:
|
||||
content: The content to send to the model.
|
||||
"""
|
||||
|
||||
assert content.parts
|
||||
if content.parts[0].function_response:
|
||||
# All parts have to be function responses.
|
||||
function_responses = [part.function_response for part in content.parts]
|
||||
logger.debug('Sending LLM function response: %s', function_responses)
|
||||
await self._gemini_session.send(
|
||||
input=types.LiveClientToolResponse(
|
||||
function_responses=function_responses
|
||||
),
|
||||
)
|
||||
else:
|
||||
logger.debug('Sending LLM new content %s', content)
|
||||
await self._gemini_session.send(
|
||||
input=types.LiveClientContent(
|
||||
turns=[content],
|
||||
turn_complete=True,
|
||||
)
|
||||
)
|
||||
|
||||
async def send_realtime(self, blob: types.Blob):
|
||||
"""Sends a chunk of audio or a frame of video to the model in realtime.
|
||||
|
||||
Args:
|
||||
blob: The blob to send to the model.
|
||||
"""
|
||||
|
||||
input_blob = blob.model_dump()
|
||||
logger.debug('Sending LLM Blob: %s', input_blob)
|
||||
await self._gemini_session.send(input=input_blob)
|
||||
|
||||
def __build_full_text_response(self, text: str):
|
||||
"""Builds a full text response.
|
||||
|
||||
The text should not partial and the returned LlmResponse is not be
|
||||
partial.
|
||||
|
||||
Args:
|
||||
text: The text to be included in the response.
|
||||
|
||||
Returns:
|
||||
An LlmResponse containing the full text.
|
||||
"""
|
||||
return LlmResponse(
|
||||
content=types.Content(
|
||||
role='model',
|
||||
parts=[types.Part.from_text(text=text)],
|
||||
),
|
||||
)
|
||||
|
||||
async def receive(self) -> AsyncGenerator[LlmResponse, None]:
|
||||
"""Receives the model response using the llm server connection.
|
||||
|
||||
Yields:
|
||||
LlmResponse: The model response.
|
||||
"""
|
||||
|
||||
text = ''
|
||||
async for message in self._gemini_session.receive():
|
||||
logger.debug('Got LLM Live message: %s', message)
|
||||
if message.server_content:
|
||||
content = message.server_content.model_turn
|
||||
if content and content.parts:
|
||||
llm_response = LlmResponse(
|
||||
content=content, interrupted=message.server_content.interrupted
|
||||
)
|
||||
if content.parts[0].text:
|
||||
text += content.parts[0].text
|
||||
llm_response.partial = True
|
||||
# don't yield the merged text event when receiving audio data
|
||||
elif text and not content.parts[0].inline_data:
|
||||
yield self.__build_full_text_response(text)
|
||||
text = ''
|
||||
yield llm_response
|
||||
|
||||
if (
|
||||
message.server_content.output_transcription
|
||||
and message.server_content.output_transcription.text
|
||||
):
|
||||
# TODO: Right now, we just support output_transcription without
|
||||
# changing interface and data protocol. Later, we can consider to
|
||||
# support output_transcription as a separete field in LlmResponse.
|
||||
|
||||
# Transcription is always considered as partial event
|
||||
# We rely on other control signals to determine when to yield the
|
||||
# full text response(turn_complete, interrupted, or tool_call).
|
||||
text += message.server_content.output_transcription.text
|
||||
parts = [
|
||||
types.Part.from_text(
|
||||
text=message.server_content.output_transcription.text
|
||||
)
|
||||
]
|
||||
llm_response = LlmResponse(
|
||||
content=types.Content(role='model', parts=parts), partial=True
|
||||
)
|
||||
yield llm_response
|
||||
|
||||
if message.server_content.turn_complete:
|
||||
if text:
|
||||
yield self.__build_full_text_response(text)
|
||||
text = ''
|
||||
yield LlmResponse(
|
||||
turn_complete=True, interrupted=message.server_content.interrupted
|
||||
)
|
||||
break
|
||||
# in case of empty content or parts, we sill surface it
|
||||
# in case it's an interrupted message, we merge the previous partial
|
||||
# text. Other we don't merge. because content can be none when model
|
||||
# safty threshold is triggered
|
||||
if message.server_content.interrupted and text:
|
||||
yield self.__build_full_text_response(text)
|
||||
text = ''
|
||||
yield LlmResponse(interrupted=message.server_content.interrupted)
|
||||
if message.tool_call:
|
||||
if text:
|
||||
yield self.__build_full_text_response(text)
|
||||
text = ''
|
||||
parts = [
|
||||
types.Part(function_call=function_call)
|
||||
for function_call in message.tool_call.function_calls
|
||||
]
|
||||
yield LlmResponse(content=types.Content(role='model', parts=parts))
|
||||
|
||||
async def close(self):
|
||||
"""Closes the llm server connection."""
|
||||
|
||||
await self._gemini_session.close()
|
||||
Reference in New Issue
Block a user