mirror of
https://github.com/EvolutionAPI/adk-python.git
synced 2025-07-13 15:14:50 -06:00
add input transcription support for live/streaming.
Copybara import of the project: -- d481e0604a79470e2c1308827b3ecb78bfb5327e by Alan B <alan@nerds.ai>: feat: 🚧 catch user transcription -- bba436bb76d1d2f9d5ba969fce38ff8b8a443254 by Alan B <alan@nerds.ai>: feat: ✨ send user transcription event as llm_response -- ad2abf540c60895b79c50f9051a6289ce394b98d by Alan B <death1027@outlook.com>: style: 💄 update lint problems -- 744703c06716300c0f9f41633d3bafdf4cb180a1 by Hangfei Lin <hangfeilin@gmail.com>: fix: set right order for input transcription -- 31a5d42d6155b0e5caad0c73c8df43255322016f by Hangfei Lin <hangfeilin@gmail.com>: remove print -- 59e5d9c72060f97d124883150989315401a4c1b5 by Hangfei Lin <hangfeilin@gmail.com>: remove api version COPYBARA_INTEGRATE_REVIEW=https://github.com/google/adk-python/pull/495 from BloodBoy21:main ea29015af041f9785abaa8583e2c767f9d8c8bc8 PiperOrigin-RevId: 755401615
This commit is contained in:
parent
905c20dad6
commit
fcca0afdac
@ -33,7 +33,7 @@ dependencies = [
|
||||
"google-cloud-secret-manager>=2.22.0", # Fetching secrets in RestAPI Tool
|
||||
"google-cloud-speech>=2.30.0", # For Audo Transcription
|
||||
"google-cloud-storage>=2.18.0, <3.0.0", # For GCS Artifact service
|
||||
"google-genai>=1.11.0", # Google GenAI SDK
|
||||
"google-genai>=1.12.1", # Google GenAI SDK
|
||||
"graphviz>=0.20.2", # Graphviz for graph rendering
|
||||
"mcp>=1.5.0;python_version>='3.10'", # For MCP Toolset
|
||||
"opentelemetry-api>=1.31.0", # OpenTelemetry
|
||||
|
@ -65,6 +65,9 @@ class RunConfig(BaseModel):
|
||||
output_audio_transcription: Optional[types.AudioTranscriptionConfig] = None
|
||||
"""Output transcription for live agents with audio response."""
|
||||
|
||||
input_audio_transcription: Optional[types.AudioTranscriptionConfig] = None
|
||||
"""Input transcription for live agents with audio input from user."""
|
||||
|
||||
max_llm_calls: int = 500
|
||||
"""
|
||||
A limit on the total number of llm calls for a given run.
|
||||
|
@ -190,6 +190,16 @@ class BaseLlmFlow(ABC):
|
||||
llm_request: LlmRequest,
|
||||
) -> AsyncGenerator[Event, None]:
|
||||
"""Receive data from model and process events using BaseLlmConnection."""
|
||||
def get_author(llm_response):
|
||||
"""Get the author of the event.
|
||||
|
||||
When the model returns transcription, the author is "user". Otherwise, the author is the agent.
|
||||
"""
|
||||
if llm_response and llm_response.content and llm_response.content.role == "user":
|
||||
return "user"
|
||||
else:
|
||||
return invocation_context.agent.name
|
||||
|
||||
assert invocation_context.live_request_queue
|
||||
try:
|
||||
while True:
|
||||
@ -197,7 +207,7 @@ class BaseLlmFlow(ABC):
|
||||
model_response_event = Event(
|
||||
id=Event.new_id(),
|
||||
invocation_id=invocation_context.invocation_id,
|
||||
author=invocation_context.agent.name,
|
||||
author=get_author(llm_response),
|
||||
)
|
||||
async for event in self._postprocess_live(
|
||||
invocation_context,
|
||||
|
@ -62,6 +62,9 @@ class _BasicLlmRequestProcessor(BaseLlmRequestProcessor):
|
||||
llm_request.live_connect_config.output_audio_transcription = (
|
||||
invocation_context.run_config.output_audio_transcription
|
||||
)
|
||||
llm_request.live_connect_config.input_audio_transcription = (
|
||||
invocation_context.run_config.input_audio_transcription
|
||||
)
|
||||
|
||||
# TODO: handle tool append here, instead of in BaseTool.process_llm_request.
|
||||
|
||||
|
@ -145,7 +145,20 @@ class GeminiLlmConnection(BaseLlmConnection):
|
||||
yield self.__build_full_text_response(text)
|
||||
text = ''
|
||||
yield llm_response
|
||||
|
||||
if (
|
||||
message.server_content.input_transcription
|
||||
and message.server_content.input_transcription.text
|
||||
):
|
||||
user_text = message.server_content.input_transcription.text
|
||||
parts = [
|
||||
types.Part.from_text(
|
||||
text=user_text,
|
||||
)
|
||||
]
|
||||
llm_response = LlmResponse(
|
||||
content=types.Content(role='user', parts=parts)
|
||||
)
|
||||
yield llm_response
|
||||
if (
|
||||
message.server_content.output_transcription
|
||||
and message.server_content.output_transcription.text
|
||||
|
Loading…
Reference in New Issue
Block a user