structure saas with tools

This commit is contained in:
Davidson Gomes
2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions

View File

@@ -0,0 +1,115 @@
"""
Handler for transforming responses api requests to litellm.completion requests
"""
from typing import Any, Coroutine, Optional, Union
import litellm
from litellm.responses.litellm_completion_transformation.streaming_iterator import (
LiteLLMCompletionStreamingIterator,
)
from litellm.responses.litellm_completion_transformation.transformation import (
LiteLLMCompletionResponsesConfig,
)
from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
from litellm.types.llms.openai import (
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
ResponsesAPIResponse,
)
from litellm.types.utils import ModelResponse
class LiteLLMCompletionTransformationHandler:
def response_api_handler(
self,
model: str,
input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
custom_llm_provider: Optional[str] = None,
_is_async: bool = False,
stream: Optional[bool] = None,
**kwargs,
) -> Union[
ResponsesAPIResponse,
BaseResponsesAPIStreamingIterator,
Coroutine[
Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
],
]:
litellm_completion_request: dict = (
LiteLLMCompletionResponsesConfig.transform_responses_api_request_to_chat_completion_request(
model=model,
input=input,
responses_api_request=responses_api_request,
custom_llm_provider=custom_llm_provider,
stream=stream,
**kwargs,
)
)
if _is_async:
return self.async_response_api_handler(
litellm_completion_request=litellm_completion_request,
request_input=input,
responses_api_request=responses_api_request,
**kwargs,
)
litellm_completion_response: Union[
ModelResponse, litellm.CustomStreamWrapper
] = litellm.completion(
**litellm_completion_request,
**kwargs,
)
if isinstance(litellm_completion_response, ModelResponse):
responses_api_response: ResponsesAPIResponse = (
LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
chat_completion_response=litellm_completion_response,
request_input=input,
responses_api_request=responses_api_request,
)
)
return responses_api_response
elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
return LiteLLMCompletionStreamingIterator(
litellm_custom_stream_wrapper=litellm_completion_response,
request_input=input,
responses_api_request=responses_api_request,
)
async def async_response_api_handler(
self,
litellm_completion_request: dict,
request_input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
**kwargs,
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
litellm_completion_response: Union[
ModelResponse, litellm.CustomStreamWrapper
] = await litellm.acompletion(
**litellm_completion_request,
**kwargs,
)
if isinstance(litellm_completion_response, ModelResponse):
responses_api_response: ResponsesAPIResponse = (
LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
chat_completion_response=litellm_completion_response,
request_input=request_input,
responses_api_request=responses_api_request,
)
)
return responses_api_response
elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
return LiteLLMCompletionStreamingIterator(
litellm_custom_stream_wrapper=litellm_completion_response,
request_input=request_input,
responses_api_request=responses_api_request,
)

View File

@@ -0,0 +1,59 @@
"""
Responses API has previous_response_id, which is the id of the previous response.
LiteLLM needs to maintain a cache of the previous response input, output, previous_response_id, and model.
This class handles that cache.
"""
from typing import List, Optional, Tuple, Union
from typing_extensions import TypedDict
from litellm.caching import InMemoryCache
from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
RESPONSES_API_PREVIOUS_RESPONSES_CACHE = InMemoryCache()
MAX_PREV_SESSION_INPUTS = 50
class ResponsesAPISessionElement(TypedDict, total=False):
input: Union[str, ResponseInputParam]
output: ResponsesAPIResponse
response_id: str
previous_response_id: Optional[str]
class SessionHandler:
def add_completed_response_to_cache(
self, response_id: str, session_element: ResponsesAPISessionElement
):
RESPONSES_API_PREVIOUS_RESPONSES_CACHE.set_cache(
key=response_id, value=session_element
)
def get_chain_of_previous_input_output_pairs(
self, previous_response_id: str
) -> List[Tuple[ResponseInputParam, ResponsesAPIResponse]]:
response_api_inputs: List[Tuple[ResponseInputParam, ResponsesAPIResponse]] = []
current_previous_response_id = previous_response_id
count_session_elements = 0
while current_previous_response_id:
if count_session_elements > MAX_PREV_SESSION_INPUTS:
break
session_element = RESPONSES_API_PREVIOUS_RESPONSES_CACHE.get_cache(
key=current_previous_response_id
)
if session_element:
response_api_inputs.append(
(session_element.get("input"), session_element.get("output"))
)
current_previous_response_id = session_element.get(
"previous_response_id"
)
else:
break
count_session_elements += 1
return response_api_inputs

View File

@@ -0,0 +1,157 @@
from typing import List, Optional, Union
import litellm
from litellm.main import stream_chunk_builder
from litellm.responses.litellm_completion_transformation.transformation import (
LiteLLMCompletionResponsesConfig,
)
from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator
from litellm.types.llms.openai import (
OutputTextDeltaEvent,
ResponseCompletedEvent,
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
ResponsesAPIStreamEvents,
ResponsesAPIStreamingResponse,
)
from litellm.types.utils import Delta as ChatCompletionDelta
from litellm.types.utils import (
ModelResponse,
ModelResponseStream,
StreamingChoices,
TextCompletionResponse,
)
class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
"""
Async iterator for processing streaming responses from the Responses API.
"""
def __init__(
self,
litellm_custom_stream_wrapper: litellm.CustomStreamWrapper,
request_input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
):
self.litellm_custom_stream_wrapper: litellm.CustomStreamWrapper = (
litellm_custom_stream_wrapper
)
self.request_input: Union[str, ResponseInputParam] = request_input
self.responses_api_request: ResponsesAPIOptionalRequestParams = (
responses_api_request
)
self.collected_chat_completion_chunks: List[ModelResponseStream] = []
self.finished: bool = False
async def __anext__(
self,
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
try:
while True:
if self.finished is True:
raise StopAsyncIteration
# Get the next chunk from the stream
try:
chunk = await self.litellm_custom_stream_wrapper.__anext__()
self.collected_chat_completion_chunks.append(chunk)
response_api_chunk = (
self._transform_chat_completion_chunk_to_response_api_chunk(
chunk
)
)
if response_api_chunk:
return response_api_chunk
except StopAsyncIteration:
self.finished = True
response_completed_event = self._emit_response_completed_event()
if response_completed_event:
return response_completed_event
else:
raise StopAsyncIteration
except Exception as e:
# Handle HTTP errors
self.finished = True
raise e
def __iter__(self):
return self
def __next__(
self,
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
try:
while True:
if self.finished is True:
raise StopIteration
# Get the next chunk from the stream
try:
chunk = self.litellm_custom_stream_wrapper.__next__()
self.collected_chat_completion_chunks.append(chunk)
response_api_chunk = (
self._transform_chat_completion_chunk_to_response_api_chunk(
chunk
)
)
if response_api_chunk:
return response_api_chunk
except StopIteration:
self.finished = True
response_completed_event = self._emit_response_completed_event()
if response_completed_event:
return response_completed_event
else:
raise StopIteration
except Exception as e:
# Handle HTTP errors
self.finished = True
raise e
def _transform_chat_completion_chunk_to_response_api_chunk(
self, chunk: ModelResponseStream
) -> Optional[ResponsesAPIStreamingResponse]:
"""
Transform a chat completion chunk to a response API chunk.
This currently only handles emitting the OutputTextDeltaEvent, which is used by other tools using the responses API.
"""
return OutputTextDeltaEvent(
type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
item_id=chunk.id,
output_index=0,
content_index=0,
delta=self._get_delta_string_from_streaming_choices(chunk.choices),
)
def _get_delta_string_from_streaming_choices(
self, choices: List[StreamingChoices]
) -> str:
"""
Get the delta string from the streaming choices
For now this collected the first choice's delta string.
It's unclear how users expect litellm to translate multiple-choices-per-chunk to the responses API output.
"""
choice = choices[0]
chat_completion_delta: ChatCompletionDelta = choice.delta
return chat_completion_delta.content or ""
def _emit_response_completed_event(self) -> Optional[ResponseCompletedEvent]:
litellm_model_response: Optional[
Union[ModelResponse, TextCompletionResponse]
] = stream_chunk_builder(chunks=self.collected_chat_completion_chunks)
if litellm_model_response and isinstance(litellm_model_response, ModelResponse):
return ResponseCompletedEvent(
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
response=LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
request_input=self.request_input,
chat_completion_response=litellm_model_response,
responses_api_request=self.responses_api_request,
),
)
else:
return None

View File

@@ -0,0 +1,664 @@
"""
Handles transforming from Responses API -> LiteLLM completion (Chat Completion API)
"""
from typing import Any, Dict, List, Optional, Union
from openai.types.responses.tool_param import FunctionToolParam
from litellm.caching import InMemoryCache
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.responses.litellm_completion_transformation.session_handler import (
ResponsesAPISessionElement,
SessionHandler,
)
from litellm.types.llms.openai import (
AllMessageValues,
ChatCompletionResponseMessage,
ChatCompletionSystemMessage,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionToolMessage,
ChatCompletionToolParam,
ChatCompletionToolParamFunctionChunk,
ChatCompletionUserMessage,
GenericChatCompletionMessage,
Reasoning,
ResponseAPIUsage,
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
ResponsesAPIResponse,
ResponseTextConfig,
)
from litellm.types.responses.main import (
GenericResponseOutputItem,
GenericResponseOutputItemContentAnnotation,
OutputFunctionToolCall,
OutputText,
)
from litellm.types.utils import (
ChatCompletionAnnotation,
ChatCompletionMessageToolCall,
Choices,
Function,
Message,
ModelResponse,
Usage,
)
########### Initialize Classes used for Responses API ###########
TOOL_CALLS_CACHE = InMemoryCache()
RESPONSES_API_SESSION_HANDLER = SessionHandler()
########### End of Initialize Classes used for Responses API ###########
class LiteLLMCompletionResponsesConfig:
@staticmethod
def get_supported_openai_params(model: str) -> list:
"""
LiteLLM Adapter from OpenAI Responses API to Chat Completion API supports a subset of OpenAI Responses API params
"""
return [
"input",
"model",
"instructions",
"max_output_tokens",
"metadata",
"parallel_tool_calls",
"previous_response_id",
"stream",
"temperature",
"tool_choice",
"tools",
"top_p",
"user",
]
@staticmethod
def transform_responses_api_request_to_chat_completion_request(
model: str,
input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
custom_llm_provider: Optional[str] = None,
stream: Optional[bool] = None,
**kwargs,
) -> dict:
"""
Transform a Responses API request into a Chat Completion request
"""
litellm_completion_request: dict = {
"messages": LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
input=input,
responses_api_request=responses_api_request,
previous_response_id=responses_api_request.get("previous_response_id"),
),
"model": model,
"tool_choice": responses_api_request.get("tool_choice"),
"tools": LiteLLMCompletionResponsesConfig.transform_responses_api_tools_to_chat_completion_tools(
responses_api_request.get("tools") or [] # type: ignore
),
"top_p": responses_api_request.get("top_p"),
"user": responses_api_request.get("user"),
"temperature": responses_api_request.get("temperature"),
"parallel_tool_calls": responses_api_request.get("parallel_tool_calls"),
"max_tokens": responses_api_request.get("max_output_tokens"),
"stream": stream,
"metadata": kwargs.get("metadata"),
"service_tier": kwargs.get("service_tier"),
# litellm specific params
"custom_llm_provider": custom_llm_provider,
}
# Responses API `Completed` events require usage, we pass `stream_options` to litellm.completion to include usage
if stream is True:
stream_options = {
"include_usage": True,
}
litellm_completion_request["stream_options"] = stream_options
litellm_logging_obj: Optional[LiteLLMLoggingObj] = kwargs.get(
"litellm_logging_obj"
)
if litellm_logging_obj:
litellm_logging_obj.stream_options = stream_options
# only pass non-None values
litellm_completion_request = {
k: v for k, v in litellm_completion_request.items() if v is not None
}
return litellm_completion_request
@staticmethod
def transform_responses_api_input_to_messages(
input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
previous_response_id: Optional[str] = None,
) -> List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionResponseMessage,
]
]:
"""
Transform a Responses API input into a list of messages
"""
messages: List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionResponseMessage,
]
] = []
if responses_api_request.get("instructions"):
messages.append(
LiteLLMCompletionResponsesConfig.transform_instructions_to_system_message(
responses_api_request.get("instructions")
)
)
if previous_response_id:
previous_response_pairs = (
RESPONSES_API_SESSION_HANDLER.get_chain_of_previous_input_output_pairs(
previous_response_id=previous_response_id
)
)
if previous_response_pairs:
for previous_response_pair in previous_response_pairs:
chat_completion_input_messages = LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
input=previous_response_pair[0],
)
chat_completion_output_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_outputs_to_chat_completion_messages(
responses_api_output=previous_response_pair[1],
)
messages.extend(chat_completion_input_messages)
messages.extend(chat_completion_output_messages)
messages.extend(
LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
input=input,
)
)
return messages
@staticmethod
def _transform_response_input_param_to_chat_completion_message(
input: Union[str, ResponseInputParam],
) -> List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionResponseMessage,
]
]:
"""
Transform a ResponseInputParam into a Chat Completion message
"""
messages: List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionResponseMessage,
]
] = []
tool_call_output_messages: List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
ChatCompletionResponseMessage,
]
] = []
if isinstance(input, str):
messages.append(ChatCompletionUserMessage(role="user", content=input))
elif isinstance(input, list):
for _input in input:
chat_completion_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_input_item_to_chat_completion_message(
input_item=_input
)
if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(
input_item=_input
):
tool_call_output_messages.extend(chat_completion_messages)
else:
messages.extend(chat_completion_messages)
messages.extend(tool_call_output_messages)
return messages
@staticmethod
def _ensure_tool_call_output_has_corresponding_tool_call(
messages: List[Union[AllMessageValues, GenericChatCompletionMessage]],
) -> bool:
"""
If any tool call output is present, ensure there is a corresponding tool call/tool_use block
"""
for message in messages:
if message.get("role") == "tool":
return True
return False
@staticmethod
def _transform_responses_api_input_item_to_chat_completion_message(
input_item: Any,
) -> List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionResponseMessage,
]
]:
"""
Transform a Responses API input item into a Chat Completion message
- EasyInputMessageParam
- Message
- ResponseOutputMessageParam
- ResponseFileSearchToolCallParam
- ResponseComputerToolCallParam
- ComputerCallOutput
- ResponseFunctionWebSearchParam
- ResponseFunctionToolCallParam
- FunctionCallOutput
- ResponseReasoningItemParam
- ItemReference
"""
if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(input_item):
# handle executed tool call results
return LiteLLMCompletionResponsesConfig._transform_responses_api_tool_call_output_to_chat_completion_message(
tool_call_output=input_item
)
else:
return [
GenericChatCompletionMessage(
role=input_item.get("role") or "user",
content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
input_item.get("content")
),
)
]
@staticmethod
def _is_input_item_tool_call_output(input_item: Any) -> bool:
"""
Check if the input item is a tool call output
"""
return input_item.get("type") in [
"function_call_output",
"web_search_call",
"computer_call_output",
]
@staticmethod
def _transform_responses_api_tool_call_output_to_chat_completion_message(
tool_call_output: Dict[str, Any],
) -> List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionResponseMessage,
]
]:
"""
ChatCompletionToolMessage is used to indicate the output from a tool call
"""
tool_output_message = ChatCompletionToolMessage(
role="tool",
content=tool_call_output.get("output") or "",
tool_call_id=tool_call_output.get("call_id") or "",
)
_tool_use_definition = TOOL_CALLS_CACHE.get_cache(
key=tool_call_output.get("call_id") or "",
)
if _tool_use_definition:
"""
Append the tool use definition to the list of messages
Providers like Anthropic require the tool use definition to be included with the tool output
- Input:
{'function':
arguments:'{"command": ["echo","<html>\\n<head>\\n <title>Hello</title>\\n</head>\\n<body>\\n <h1>Hi</h1>\\n</body>\\n</html>",">","index.html"]}',
name='shell',
'id': 'toolu_018KFWsEySHjdKZPdUzXpymJ',
'type': 'function'
}
- Output:
{
"id": "toolu_018KFWsEySHjdKZPdUzXpymJ",
"type": "function",
"function": {
"name": "get_weather",
"arguments": "{\"latitude\":48.8566,\"longitude\":2.3522}"
}
}
"""
function: dict = _tool_use_definition.get("function") or {}
tool_call_chunk = ChatCompletionToolCallChunk(
id=_tool_use_definition.get("id") or "",
type=_tool_use_definition.get("type") or "function",
function=ChatCompletionToolCallFunctionChunk(
name=function.get("name") or "",
arguments=function.get("arguments") or "",
),
index=0,
)
chat_completion_response_message = ChatCompletionResponseMessage(
tool_calls=[tool_call_chunk],
role="assistant",
)
return [chat_completion_response_message, tool_output_message]
return [tool_output_message]
@staticmethod
def _transform_responses_api_content_to_chat_completion_content(
content: Any,
) -> Union[str, List[Union[str, Dict[str, Any]]]]:
"""
Transform a Responses API content into a Chat Completion content
"""
if isinstance(content, str):
return content
elif isinstance(content, list):
content_list: List[Union[str, Dict[str, Any]]] = []
for item in content:
if isinstance(item, str):
content_list.append(item)
elif isinstance(item, dict):
content_list.append(
{
"type": LiteLLMCompletionResponsesConfig._get_chat_completion_request_content_type(
item.get("type") or "text"
),
"text": item.get("text"),
}
)
return content_list
else:
raise ValueError(f"Invalid content type: {type(content)}")
@staticmethod
def _get_chat_completion_request_content_type(content_type: str) -> str:
"""
Get the Chat Completion request content type
"""
# Responses API content has `input_` prefix, if it exists, remove it
if content_type.startswith("input_"):
return content_type[len("input_") :]
else:
return content_type
@staticmethod
def transform_instructions_to_system_message(
instructions: Optional[str],
) -> ChatCompletionSystemMessage:
"""
Transform a Instructions into a system message
"""
return ChatCompletionSystemMessage(role="system", content=instructions or "")
@staticmethod
def transform_responses_api_tools_to_chat_completion_tools(
tools: Optional[List[FunctionToolParam]],
) -> List[ChatCompletionToolParam]:
"""
Transform a Responses API tools into a Chat Completion tools
"""
if tools is None:
return []
chat_completion_tools: List[ChatCompletionToolParam] = []
for tool in tools:
chat_completion_tools.append(
ChatCompletionToolParam(
type="function",
function=ChatCompletionToolParamFunctionChunk(
name=tool["name"],
description=tool.get("description") or "",
parameters=tool.get("parameters", {}),
strict=tool.get("strict", False),
),
)
)
return chat_completion_tools
@staticmethod
def transform_chat_completion_tools_to_responses_tools(
chat_completion_response: ModelResponse,
) -> List[OutputFunctionToolCall]:
"""
Transform a Chat Completion tools into a Responses API tools
"""
all_chat_completion_tools: List[ChatCompletionMessageToolCall] = []
for choice in chat_completion_response.choices:
if isinstance(choice, Choices):
if choice.message.tool_calls:
all_chat_completion_tools.extend(choice.message.tool_calls)
for tool_call in choice.message.tool_calls:
TOOL_CALLS_CACHE.set_cache(
key=tool_call.id,
value=tool_call,
)
responses_tools: List[OutputFunctionToolCall] = []
for tool in all_chat_completion_tools:
if tool.type == "function":
function_definition = tool.function
responses_tools.append(
OutputFunctionToolCall(
name=function_definition.name or "",
arguments=function_definition.get("arguments") or "",
call_id=tool.id or "",
id=tool.id or "",
type="function_call", # critical this is "function_call" to work with tools like openai codex
status=function_definition.get("status") or "completed",
)
)
return responses_tools
@staticmethod
def transform_chat_completion_response_to_responses_api_response(
request_input: Union[str, ResponseInputParam],
responses_api_request: ResponsesAPIOptionalRequestParams,
chat_completion_response: ModelResponse,
) -> ResponsesAPIResponse:
"""
Transform a Chat Completion response into a Responses API response
"""
responses_api_response: ResponsesAPIResponse = ResponsesAPIResponse(
id=chat_completion_response.id,
created_at=chat_completion_response.created,
model=chat_completion_response.model,
object=chat_completion_response.object,
error=getattr(chat_completion_response, "error", None),
incomplete_details=getattr(
chat_completion_response, "incomplete_details", None
),
instructions=getattr(chat_completion_response, "instructions", None),
metadata=getattr(chat_completion_response, "metadata", {}),
output=LiteLLMCompletionResponsesConfig._transform_chat_completion_choices_to_responses_output(
chat_completion_response=chat_completion_response,
choices=getattr(chat_completion_response, "choices", []),
),
parallel_tool_calls=getattr(
chat_completion_response, "parallel_tool_calls", False
),
temperature=getattr(chat_completion_response, "temperature", 0),
tool_choice=getattr(chat_completion_response, "tool_choice", "auto"),
tools=getattr(chat_completion_response, "tools", []),
top_p=getattr(chat_completion_response, "top_p", None),
max_output_tokens=getattr(
chat_completion_response, "max_output_tokens", None
),
previous_response_id=getattr(
chat_completion_response, "previous_response_id", None
),
reasoning=Reasoning(),
status=getattr(chat_completion_response, "status", "completed"),
text=ResponseTextConfig(),
truncation=getattr(chat_completion_response, "truncation", None),
usage=LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
chat_completion_response=chat_completion_response
),
user=getattr(chat_completion_response, "user", None),
)
RESPONSES_API_SESSION_HANDLER.add_completed_response_to_cache(
response_id=responses_api_response.id,
session_element=ResponsesAPISessionElement(
input=request_input,
output=responses_api_response,
response_id=responses_api_response.id,
previous_response_id=responses_api_request.get("previous_response_id"),
),
)
return responses_api_response
@staticmethod
def _transform_chat_completion_choices_to_responses_output(
chat_completion_response: ModelResponse,
choices: List[Choices],
) -> List[Union[GenericResponseOutputItem, OutputFunctionToolCall]]:
responses_output: List[
Union[GenericResponseOutputItem, OutputFunctionToolCall]
] = []
for choice in choices:
responses_output.append(
GenericResponseOutputItem(
type="message",
id=chat_completion_response.id,
status=choice.finish_reason,
role=choice.message.role,
content=[
LiteLLMCompletionResponsesConfig._transform_chat_message_to_response_output_text(
choice.message
)
],
)
)
tool_calls = LiteLLMCompletionResponsesConfig.transform_chat_completion_tools_to_responses_tools(
chat_completion_response=chat_completion_response
)
responses_output.extend(tool_calls)
return responses_output
@staticmethod
def _transform_responses_api_outputs_to_chat_completion_messages(
responses_api_output: ResponsesAPIResponse,
) -> List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
]
]:
messages: List[
Union[
AllMessageValues,
GenericChatCompletionMessage,
ChatCompletionMessageToolCall,
]
] = []
output_items = responses_api_output.output
for _output_item in output_items:
output_item: dict = dict(_output_item)
if output_item.get("type") == "function_call":
# handle function call output
messages.append(
LiteLLMCompletionResponsesConfig._transform_responses_output_tool_call_to_chat_completion_output_tool_call(
tool_call=output_item
)
)
else:
# transform as generic ResponseOutputItem
messages.append(
GenericChatCompletionMessage(
role=str(output_item.get("role")) or "user",
content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
output_item.get("content")
),
)
)
return messages
@staticmethod
def _transform_responses_output_tool_call_to_chat_completion_output_tool_call(
tool_call: dict,
) -> ChatCompletionMessageToolCall:
return ChatCompletionMessageToolCall(
id=tool_call.get("id") or "",
type="function",
function=Function(
name=tool_call.get("name") or "",
arguments=tool_call.get("arguments") or "",
),
)
@staticmethod
def _transform_chat_message_to_response_output_text(
message: Message,
) -> OutputText:
return OutputText(
type="output_text",
text=message.content,
annotations=LiteLLMCompletionResponsesConfig._transform_chat_completion_annotations_to_response_output_annotations(
annotations=getattr(message, "annotations", None)
),
)
@staticmethod
def _transform_chat_completion_annotations_to_response_output_annotations(
annotations: Optional[List[ChatCompletionAnnotation]],
) -> List[GenericResponseOutputItemContentAnnotation]:
response_output_annotations: List[
GenericResponseOutputItemContentAnnotation
] = []
if annotations is None:
return response_output_annotations
for annotation in annotations:
annotation_type = annotation.get("type")
if annotation_type == "url_citation" and "url_citation" in annotation:
url_citation = annotation["url_citation"]
response_output_annotations.append(
GenericResponseOutputItemContentAnnotation(
type=annotation_type,
start_index=url_citation.get("start_index"),
end_index=url_citation.get("end_index"),
url=url_citation.get("url"),
title=url_citation.get("title"),
)
)
# Handle other annotation types here
return response_output_annotations
@staticmethod
def _transform_chat_completion_usage_to_responses_usage(
chat_completion_response: ModelResponse,
) -> ResponseAPIUsage:
usage: Optional[Usage] = getattr(chat_completion_response, "usage", None)
if usage is None:
return ResponseAPIUsage(
input_tokens=0,
output_tokens=0,
total_tokens=0,
)
return ResponseAPIUsage(
input_tokens=usage.prompt_tokens,
output_tokens=usage.completion_tokens,
total_tokens=usage.total_tokens,
)

View File

@@ -0,0 +1,436 @@
import asyncio
import contextvars
from functools import partial
from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
import httpx
import litellm
from litellm.constants import request_timeout
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
from litellm.responses.litellm_completion_transformation.handler import (
LiteLLMCompletionTransformationHandler,
)
from litellm.responses.utils import ResponsesAPIRequestUtils
from litellm.types.llms.openai import (
Reasoning,
ResponseIncludable,
ResponseInputParam,
ResponsesAPIOptionalRequestParams,
ResponsesAPIResponse,
ResponseTextConfigParam,
ToolChoice,
ToolParam,
)
from litellm.types.responses.main import *
from litellm.types.router import GenericLiteLLMParams
from litellm.utils import ProviderConfigManager, client
from .streaming_iterator import BaseResponsesAPIStreamingIterator
####### ENVIRONMENT VARIABLES ###################
# Initialize any necessary instances or variables here
base_llm_http_handler = BaseLLMHTTPHandler()
litellm_completion_transformation_handler = LiteLLMCompletionTransformationHandler()
#################################################
@client
async def aresponses(
input: Union[str, ResponseInputParam],
model: str,
include: Optional[List[ResponseIncludable]] = None,
instructions: Optional[str] = None,
max_output_tokens: Optional[int] = None,
metadata: Optional[Dict[str, Any]] = None,
parallel_tool_calls: Optional[bool] = None,
previous_response_id: Optional[str] = None,
reasoning: Optional[Reasoning] = None,
store: Optional[bool] = None,
stream: Optional[bool] = None,
temperature: Optional[float] = None,
text: Optional[ResponseTextConfigParam] = None,
tool_choice: Optional[ToolChoice] = None,
tools: Optional[Iterable[ToolParam]] = None,
top_p: Optional[float] = None,
truncation: Optional[Literal["auto", "disabled"]] = None,
user: Optional[str] = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
# LiteLLM specific params,
custom_llm_provider: Optional[str] = None,
**kwargs,
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
"""
Async: Handles responses API requests by reusing the synchronous function
"""
local_vars = locals()
try:
loop = asyncio.get_event_loop()
kwargs["aresponses"] = True
# get custom llm provider so we can use this for mapping exceptions
if custom_llm_provider is None:
_, custom_llm_provider, _, _ = litellm.get_llm_provider(
model=model, api_base=local_vars.get("base_url", None)
)
func = partial(
responses,
input=input,
model=model,
include=include,
instructions=instructions,
max_output_tokens=max_output_tokens,
metadata=metadata,
parallel_tool_calls=parallel_tool_calls,
previous_response_id=previous_response_id,
reasoning=reasoning,
store=store,
stream=stream,
temperature=temperature,
text=text,
tool_choice=tool_choice,
tools=tools,
top_p=top_p,
truncation=truncation,
user=user,
extra_headers=extra_headers,
extra_query=extra_query,
extra_body=extra_body,
timeout=timeout,
custom_llm_provider=custom_llm_provider,
**kwargs,
)
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response
# Update the responses_api_response_id with the model_id
if isinstance(response, ResponsesAPIResponse):
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
responses_api_response=response,
litellm_metadata=kwargs.get("litellm_metadata", {}),
custom_llm_provider=custom_llm_provider,
)
return response
except Exception as e:
raise litellm.exception_type(
model=model,
custom_llm_provider=custom_llm_provider,
original_exception=e,
completion_kwargs=local_vars,
extra_kwargs=kwargs,
)
@client
def responses(
input: Union[str, ResponseInputParam],
model: str,
include: Optional[List[ResponseIncludable]] = None,
instructions: Optional[str] = None,
max_output_tokens: Optional[int] = None,
metadata: Optional[Dict[str, Any]] = None,
parallel_tool_calls: Optional[bool] = None,
previous_response_id: Optional[str] = None,
reasoning: Optional[Reasoning] = None,
store: Optional[bool] = None,
stream: Optional[bool] = None,
temperature: Optional[float] = None,
text: Optional[ResponseTextConfigParam] = None,
tool_choice: Optional[ToolChoice] = None,
tools: Optional[Iterable[ToolParam]] = None,
top_p: Optional[float] = None,
truncation: Optional[Literal["auto", "disabled"]] = None,
user: Optional[str] = None,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
# LiteLLM specific params,
custom_llm_provider: Optional[str] = None,
**kwargs,
):
"""
Synchronous version of the Responses API.
Uses the synchronous HTTP handler to make requests.
"""
local_vars = locals()
try:
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
_is_async = kwargs.pop("aresponses", False) is True
# get llm provider logic
litellm_params = GenericLiteLLMParams(**kwargs)
(
model,
custom_llm_provider,
dynamic_api_key,
dynamic_api_base,
) = litellm.get_llm_provider(
model=model,
custom_llm_provider=custom_llm_provider,
api_base=litellm_params.api_base,
api_key=litellm_params.api_key,
)
# get provider config
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
ProviderConfigManager.get_provider_responses_api_config(
model=model,
provider=litellm.LlmProviders(custom_llm_provider),
)
)
local_vars.update(kwargs)
# Get ResponsesAPIOptionalRequestParams with only valid parameters
response_api_optional_params: ResponsesAPIOptionalRequestParams = (
ResponsesAPIRequestUtils.get_requested_response_api_optional_param(
local_vars
)
)
if responses_api_provider_config is None:
return litellm_completion_transformation_handler.response_api_handler(
model=model,
input=input,
responses_api_request=response_api_optional_params,
custom_llm_provider=custom_llm_provider,
_is_async=_is_async,
stream=stream,
**kwargs,
)
# Get optional parameters for the responses API
responses_api_request_params: Dict = (
ResponsesAPIRequestUtils.get_optional_params_responses_api(
model=model,
responses_api_provider_config=responses_api_provider_config,
response_api_optional_params=response_api_optional_params,
)
)
# Pre Call logging
litellm_logging_obj.update_environment_variables(
model=model,
user=user,
optional_params=dict(responses_api_request_params),
litellm_params={
"litellm_call_id": litellm_call_id,
**responses_api_request_params,
},
custom_llm_provider=custom_llm_provider,
)
# Call the handler with _is_async flag instead of directly calling the async handler
response = base_llm_http_handler.response_api_handler(
model=model,
input=input,
responses_api_provider_config=responses_api_provider_config,
response_api_optional_request_params=responses_api_request_params,
custom_llm_provider=custom_llm_provider,
litellm_params=litellm_params,
logging_obj=litellm_logging_obj,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout or request_timeout,
_is_async=_is_async,
client=kwargs.get("client"),
fake_stream=responses_api_provider_config.should_fake_stream(
model=model, stream=stream, custom_llm_provider=custom_llm_provider
),
litellm_metadata=kwargs.get("litellm_metadata", {}),
)
# Update the responses_api_response_id with the model_id
if isinstance(response, ResponsesAPIResponse):
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
responses_api_response=response,
litellm_metadata=kwargs.get("litellm_metadata", {}),
custom_llm_provider=custom_llm_provider,
)
return response
except Exception as e:
raise litellm.exception_type(
model=model,
custom_llm_provider=custom_llm_provider,
original_exception=e,
completion_kwargs=local_vars,
extra_kwargs=kwargs,
)
@client
async def adelete_responses(
response_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
# LiteLLM specific params,
custom_llm_provider: Optional[str] = None,
**kwargs,
) -> DeleteResponseResult:
"""
Async version of the DELETE Responses API
DELETE /v1/responses/{response_id} endpoint in the responses API
"""
local_vars = locals()
try:
loop = asyncio.get_event_loop()
kwargs["adelete_responses"] = True
# get custom llm provider from response_id
decoded_response_id: DecodedResponseId = (
ResponsesAPIRequestUtils._decode_responses_api_response_id(
response_id=response_id,
)
)
response_id = decoded_response_id.get("response_id") or response_id
custom_llm_provider = (
decoded_response_id.get("custom_llm_provider") or custom_llm_provider
)
func = partial(
delete_responses,
response_id=response_id,
custom_llm_provider=custom_llm_provider,
extra_headers=extra_headers,
extra_query=extra_query,
extra_body=extra_body,
timeout=timeout,
**kwargs,
)
ctx = contextvars.copy_context()
func_with_context = partial(ctx.run, func)
init_response = await loop.run_in_executor(None, func_with_context)
if asyncio.iscoroutine(init_response):
response = await init_response
else:
response = init_response
return response
except Exception as e:
raise litellm.exception_type(
model=None,
custom_llm_provider=custom_llm_provider,
original_exception=e,
completion_kwargs=local_vars,
extra_kwargs=kwargs,
)
@client
def delete_responses(
response_id: str,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Optional[Dict[str, Any]] = None,
extra_query: Optional[Dict[str, Any]] = None,
extra_body: Optional[Dict[str, Any]] = None,
timeout: Optional[Union[float, httpx.Timeout]] = None,
# LiteLLM specific params,
custom_llm_provider: Optional[str] = None,
**kwargs,
) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
"""
Synchronous version of the DELETE Responses API
DELETE /v1/responses/{response_id} endpoint in the responses API
"""
local_vars = locals()
try:
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
_is_async = kwargs.pop("adelete_responses", False) is True
# get llm provider logic
litellm_params = GenericLiteLLMParams(**kwargs)
# get custom llm provider from response_id
decoded_response_id: DecodedResponseId = (
ResponsesAPIRequestUtils._decode_responses_api_response_id(
response_id=response_id,
)
)
response_id = decoded_response_id.get("response_id") or response_id
custom_llm_provider = (
decoded_response_id.get("custom_llm_provider") or custom_llm_provider
)
if custom_llm_provider is None:
raise ValueError("custom_llm_provider is required but passed as None")
# get provider config
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
ProviderConfigManager.get_provider_responses_api_config(
model=None,
provider=litellm.LlmProviders(custom_llm_provider),
)
)
if responses_api_provider_config is None:
raise ValueError(
f"DELETE responses is not supported for {custom_llm_provider}"
)
local_vars.update(kwargs)
# Pre Call logging
litellm_logging_obj.update_environment_variables(
model=None,
optional_params={
"response_id": response_id,
},
litellm_params={
"litellm_call_id": litellm_call_id,
},
custom_llm_provider=custom_llm_provider,
)
# Call the handler with _is_async flag instead of directly calling the async handler
response = base_llm_http_handler.delete_response_api_handler(
response_id=response_id,
custom_llm_provider=custom_llm_provider,
responses_api_provider_config=responses_api_provider_config,
litellm_params=litellm_params,
logging_obj=litellm_logging_obj,
extra_headers=extra_headers,
extra_body=extra_body,
timeout=timeout or request_timeout,
_is_async=_is_async,
client=kwargs.get("client"),
)
return response
except Exception as e:
raise litellm.exception_type(
model=None,
custom_llm_provider=custom_llm_provider,
original_exception=e,
completion_kwargs=local_vars,
extra_kwargs=kwargs,
)

View File

@@ -0,0 +1,336 @@
import asyncio
import json
from datetime import datetime
from typing import Any, Dict, Optional
import httpx
from litellm.constants import STREAM_SSE_DONE_STRING
from litellm.litellm_core_utils.asyncify import run_async_function
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.litellm_core_utils.thread_pool_executor import executor
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.responses.utils import ResponsesAPIRequestUtils
from litellm.types.llms.openai import (
OutputTextDeltaEvent,
ResponseCompletedEvent,
ResponsesAPIResponse,
ResponsesAPIStreamEvents,
ResponsesAPIStreamingResponse,
)
from litellm.utils import CustomStreamWrapper
class BaseResponsesAPIStreamingIterator:
"""
Base class for streaming iterators that process responses from the Responses API.
This class contains shared logic for both synchronous and asynchronous iterators.
"""
def __init__(
self,
response: httpx.Response,
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj,
litellm_metadata: Optional[Dict[str, Any]] = None,
custom_llm_provider: Optional[str] = None,
):
self.response = response
self.model = model
self.logging_obj = logging_obj
self.finished = False
self.responses_api_provider_config = responses_api_provider_config
self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
self.start_time = datetime.now()
# set request kwargs
self.litellm_metadata = litellm_metadata
self.custom_llm_provider = custom_llm_provider
def _process_chunk(self, chunk) -> Optional[ResponsesAPIStreamingResponse]:
"""Process a single chunk of data from the stream"""
if not chunk:
return None
# Handle SSE format (data: {...})
chunk = CustomStreamWrapper._strip_sse_data_from_chunk(chunk)
if chunk is None:
return None
# Handle "[DONE]" marker
if chunk == STREAM_SSE_DONE_STRING:
self.finished = True
return None
try:
# Parse the JSON chunk
parsed_chunk = json.loads(chunk)
# Format as ResponsesAPIStreamingResponse
if isinstance(parsed_chunk, dict):
openai_responses_api_chunk = (
self.responses_api_provider_config.transform_streaming_response(
model=self.model,
parsed_chunk=parsed_chunk,
logging_obj=self.logging_obj,
)
)
# if "response" in parsed_chunk, then encode litellm specific information like custom_llm_provider
response_object = getattr(openai_responses_api_chunk, "response", None)
if response_object:
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
responses_api_response=response_object,
litellm_metadata=self.litellm_metadata,
custom_llm_provider=self.custom_llm_provider,
)
setattr(openai_responses_api_chunk, "response", response)
# Store the completed response
if (
openai_responses_api_chunk
and openai_responses_api_chunk.type
== ResponsesAPIStreamEvents.RESPONSE_COMPLETED
):
self.completed_response = openai_responses_api_chunk
self._handle_logging_completed_response()
return openai_responses_api_chunk
return None
except json.JSONDecodeError:
# If we can't parse the chunk, continue
return None
def _handle_logging_completed_response(self):
"""Base implementation - should be overridden by subclasses"""
pass
class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
"""
Async iterator for processing streaming responses from the Responses API.
"""
def __init__(
self,
response: httpx.Response,
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj,
litellm_metadata: Optional[Dict[str, Any]] = None,
custom_llm_provider: Optional[str] = None,
):
super().__init__(
response,
model,
responses_api_provider_config,
logging_obj,
litellm_metadata,
custom_llm_provider,
)
self.stream_iterator = response.aiter_lines()
def __aiter__(self):
return self
async def __anext__(self) -> ResponsesAPIStreamingResponse:
try:
while True:
# Get the next chunk from the stream
try:
chunk = await self.stream_iterator.__anext__()
except StopAsyncIteration:
self.finished = True
raise StopAsyncIteration
result = self._process_chunk(chunk)
if self.finished:
raise StopAsyncIteration
elif result is not None:
return result
# If result is None, continue the loop to get the next chunk
except httpx.HTTPError as e:
# Handle HTTP errors
self.finished = True
raise e
def _handle_logging_completed_response(self):
"""Handle logging for completed responses in async context"""
asyncio.create_task(
self.logging_obj.async_success_handler(
result=self.completed_response,
start_time=self.start_time,
end_time=datetime.now(),
cache_hit=None,
)
)
executor.submit(
self.logging_obj.success_handler,
result=self.completed_response,
cache_hit=None,
start_time=self.start_time,
end_time=datetime.now(),
)
class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
"""
Synchronous iterator for processing streaming responses from the Responses API.
"""
def __init__(
self,
response: httpx.Response,
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj,
litellm_metadata: Optional[Dict[str, Any]] = None,
custom_llm_provider: Optional[str] = None,
):
super().__init__(
response,
model,
responses_api_provider_config,
logging_obj,
litellm_metadata,
custom_llm_provider,
)
self.stream_iterator = response.iter_lines()
def __iter__(self):
return self
def __next__(self):
try:
while True:
# Get the next chunk from the stream
try:
chunk = next(self.stream_iterator)
except StopIteration:
self.finished = True
raise StopIteration
result = self._process_chunk(chunk)
if self.finished:
raise StopIteration
elif result is not None:
return result
# If result is None, continue the loop to get the next chunk
except httpx.HTTPError as e:
# Handle HTTP errors
self.finished = True
raise e
def _handle_logging_completed_response(self):
"""Handle logging for completed responses in sync context"""
run_async_function(
async_function=self.logging_obj.async_success_handler,
result=self.completed_response,
start_time=self.start_time,
end_time=datetime.now(),
cache_hit=None,
)
executor.submit(
self.logging_obj.success_handler,
result=self.completed_response,
cache_hit=None,
start_time=self.start_time,
end_time=datetime.now(),
)
class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
"""
Mock iterator—fake a stream by slicing the full response text into
5 char deltas, then emit a completed event.
Models like o1-pro don't support streaming, so we fake it.
"""
CHUNK_SIZE = 5
def __init__(
self,
response: httpx.Response,
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
logging_obj: LiteLLMLoggingObj,
litellm_metadata: Optional[Dict[str, Any]] = None,
custom_llm_provider: Optional[str] = None,
):
super().__init__(
response=response,
model=model,
responses_api_provider_config=responses_api_provider_config,
logging_obj=logging_obj,
litellm_metadata=litellm_metadata,
custom_llm_provider=custom_llm_provider,
)
# one-time transform
transformed = (
self.responses_api_provider_config.transform_response_api_response(
model=self.model,
raw_response=response,
logging_obj=logging_obj,
)
)
full_text = self._collect_text(transformed)
# build a list of 5char delta events
deltas = [
OutputTextDeltaEvent(
type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
delta=full_text[i : i + self.CHUNK_SIZE],
item_id=transformed.id,
output_index=0,
content_index=0,
)
for i in range(0, len(full_text), self.CHUNK_SIZE)
]
# append the completed event
self._events = deltas + [
ResponseCompletedEvent(
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
response=transformed,
)
]
self._idx = 0
def __aiter__(self):
return self
async def __anext__(self) -> ResponsesAPIStreamingResponse:
if self._idx >= len(self._events):
raise StopAsyncIteration
evt = self._events[self._idx]
self._idx += 1
return evt
def __iter__(self):
return self
def __next__(self) -> ResponsesAPIStreamingResponse:
if self._idx >= len(self._events):
raise StopIteration
evt = self._events[self._idx]
self._idx += 1
return evt
def _collect_text(self, resp: ResponsesAPIResponse) -> str:
out = ""
for out_item in resp.output:
if out_item.type == "message":
for c in getattr(out_item, "content", []):
out += c.text
return out

View File

@@ -0,0 +1,204 @@
import base64
from typing import Any, Dict, Optional, Union, cast, get_type_hints
import litellm
from litellm._logging import verbose_logger
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
from litellm.types.llms.openai import (
ResponseAPIUsage,
ResponsesAPIOptionalRequestParams,
ResponsesAPIResponse,
)
from litellm.types.responses.main import DecodedResponseId
from litellm.types.utils import SpecialEnums, Usage
class ResponsesAPIRequestUtils:
"""Helper utils for constructing ResponseAPI requests"""
@staticmethod
def get_optional_params_responses_api(
model: str,
responses_api_provider_config: BaseResponsesAPIConfig,
response_api_optional_params: ResponsesAPIOptionalRequestParams,
) -> Dict:
"""
Get optional parameters for the responses API.
Args:
params: Dictionary of all parameters
model: The model name
responses_api_provider_config: The provider configuration for responses API
Returns:
A dictionary of supported parameters for the responses API
"""
# Remove None values and internal parameters
# Get supported parameters for the model
supported_params = responses_api_provider_config.get_supported_openai_params(
model
)
# Check for unsupported parameters
unsupported_params = [
param
for param in response_api_optional_params
if param not in supported_params
]
if unsupported_params:
raise litellm.UnsupportedParamsError(
model=model,
message=f"The following parameters are not supported for model {model}: {', '.join(unsupported_params)}",
)
# Map parameters to provider-specific format
mapped_params = responses_api_provider_config.map_openai_params(
response_api_optional_params=response_api_optional_params,
model=model,
drop_params=litellm.drop_params,
)
return mapped_params
@staticmethod
def get_requested_response_api_optional_param(
params: Dict[str, Any],
) -> ResponsesAPIOptionalRequestParams:
"""
Filter parameters to only include those defined in ResponsesAPIOptionalRequestParams.
Args:
params: Dictionary of parameters to filter
Returns:
ResponsesAPIOptionalRequestParams instance with only the valid parameters
"""
valid_keys = get_type_hints(ResponsesAPIOptionalRequestParams).keys()
filtered_params = {
k: v for k, v in params.items() if k in valid_keys and v is not None
}
return cast(ResponsesAPIOptionalRequestParams, filtered_params)
@staticmethod
def _update_responses_api_response_id_with_model_id(
responses_api_response: ResponsesAPIResponse,
custom_llm_provider: Optional[str],
litellm_metadata: Optional[Dict[str, Any]] = None,
) -> ResponsesAPIResponse:
"""
Update the responses_api_response_id with model_id and custom_llm_provider
This builds a composite ID containing the custom LLM provider, model ID, and original response ID
"""
litellm_metadata = litellm_metadata or {}
model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
model_id = model_info.get("id")
updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
model_id=model_id,
custom_llm_provider=custom_llm_provider,
response_id=responses_api_response.id,
)
responses_api_response.id = updated_id
return responses_api_response
@staticmethod
def _build_responses_api_response_id(
custom_llm_provider: Optional[str],
model_id: Optional[str],
response_id: str,
) -> str:
"""Build the responses_api_response_id"""
assembled_id: str = str(
SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
).format(custom_llm_provider, model_id, response_id)
base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
"utf-8"
)
return f"resp_{base64_encoded_id}"
@staticmethod
def _decode_responses_api_response_id(
response_id: str,
) -> DecodedResponseId:
"""
Decode the responses_api_response_id
Returns:
DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id
"""
try:
# Remove prefix and decode
cleaned_id = response_id.replace("resp_", "")
decoded_id = base64.b64decode(cleaned_id.encode("utf-8")).decode("utf-8")
# Parse components using known prefixes
if ";" not in decoded_id:
return DecodedResponseId(
custom_llm_provider=None,
model_id=None,
response_id=response_id,
)
parts = decoded_id.split(";")
# Format: litellm:custom_llm_provider:{};model_id:{};response_id:{}
custom_llm_provider = None
model_id = None
if (
len(parts) >= 3
): # Full format with custom_llm_provider, model_id, and response_id
custom_llm_provider_part = parts[0]
model_id_part = parts[1]
response_part = parts[2]
custom_llm_provider = custom_llm_provider_part.replace(
"litellm:custom_llm_provider:", ""
)
model_id = model_id_part.replace("model_id:", "")
decoded_response_id = response_part.replace("response_id:", "")
else:
decoded_response_id = response_id
return DecodedResponseId(
custom_llm_provider=custom_llm_provider,
model_id=model_id,
response_id=decoded_response_id,
)
except Exception as e:
verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
return DecodedResponseId(
custom_llm_provider=None,
model_id=None,
response_id=response_id,
)
class ResponseAPILoggingUtils:
@staticmethod
def _is_response_api_usage(usage: Union[dict, ResponseAPIUsage]) -> bool:
"""returns True if usage is from OpenAI Response API"""
if isinstance(usage, ResponseAPIUsage):
return True
if "input_tokens" in usage and "output_tokens" in usage:
return True
return False
@staticmethod
def _transform_response_api_usage_to_chat_usage(
usage: Union[dict, ResponseAPIUsage],
) -> Usage:
"""Tranforms the ResponseAPIUsage object to a Usage object"""
response_api_usage: ResponseAPIUsage = (
ResponseAPIUsage(**usage) if isinstance(usage, dict) else usage
)
prompt_tokens: int = response_api_usage.input_tokens or 0
completion_tokens: int = response_api_usage.output_tokens or 0
return Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)