structure saas with tools
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,115 @@
|
||||
"""
|
||||
Handler for transforming responses api requests to litellm.completion requests
|
||||
"""
|
||||
|
||||
from typing import Any, Coroutine, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm.responses.litellm_completion_transformation.streaming_iterator import (
|
||||
LiteLLMCompletionStreamingIterator,
|
||||
)
|
||||
from litellm.responses.litellm_completion_transformation.transformation import (
|
||||
LiteLLMCompletionResponsesConfig,
|
||||
)
|
||||
from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseInputParam,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
ResponsesAPIResponse,
|
||||
)
|
||||
from litellm.types.utils import ModelResponse
|
||||
|
||||
|
||||
class LiteLLMCompletionTransformationHandler:
|
||||
|
||||
def response_api_handler(
|
||||
self,
|
||||
model: str,
|
||||
input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
_is_async: bool = False,
|
||||
stream: Optional[bool] = None,
|
||||
**kwargs,
|
||||
) -> Union[
|
||||
ResponsesAPIResponse,
|
||||
BaseResponsesAPIStreamingIterator,
|
||||
Coroutine[
|
||||
Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
|
||||
],
|
||||
]:
|
||||
litellm_completion_request: dict = (
|
||||
LiteLLMCompletionResponsesConfig.transform_responses_api_request_to_chat_completion_request(
|
||||
model=model,
|
||||
input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
stream=stream,
|
||||
**kwargs,
|
||||
)
|
||||
)
|
||||
|
||||
if _is_async:
|
||||
return self.async_response_api_handler(
|
||||
litellm_completion_request=litellm_completion_request,
|
||||
request_input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
litellm_completion_response: Union[
|
||||
ModelResponse, litellm.CustomStreamWrapper
|
||||
] = litellm.completion(
|
||||
**litellm_completion_request,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
if isinstance(litellm_completion_response, ModelResponse):
|
||||
responses_api_response: ResponsesAPIResponse = (
|
||||
LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
|
||||
chat_completion_response=litellm_completion_response,
|
||||
request_input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
)
|
||||
)
|
||||
|
||||
return responses_api_response
|
||||
|
||||
elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
|
||||
return LiteLLMCompletionStreamingIterator(
|
||||
litellm_custom_stream_wrapper=litellm_completion_response,
|
||||
request_input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
)
|
||||
|
||||
async def async_response_api_handler(
|
||||
self,
|
||||
litellm_completion_request: dict,
|
||||
request_input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
**kwargs,
|
||||
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
|
||||
litellm_completion_response: Union[
|
||||
ModelResponse, litellm.CustomStreamWrapper
|
||||
] = await litellm.acompletion(
|
||||
**litellm_completion_request,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
if isinstance(litellm_completion_response, ModelResponse):
|
||||
responses_api_response: ResponsesAPIResponse = (
|
||||
LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
|
||||
chat_completion_response=litellm_completion_response,
|
||||
request_input=request_input,
|
||||
responses_api_request=responses_api_request,
|
||||
)
|
||||
)
|
||||
|
||||
return responses_api_response
|
||||
|
||||
elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
|
||||
return LiteLLMCompletionStreamingIterator(
|
||||
litellm_custom_stream_wrapper=litellm_completion_response,
|
||||
request_input=request_input,
|
||||
responses_api_request=responses_api_request,
|
||||
)
|
||||
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
Responses API has previous_response_id, which is the id of the previous response.
|
||||
|
||||
LiteLLM needs to maintain a cache of the previous response input, output, previous_response_id, and model.
|
||||
|
||||
This class handles that cache.
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from litellm.caching import InMemoryCache
|
||||
from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
|
||||
|
||||
RESPONSES_API_PREVIOUS_RESPONSES_CACHE = InMemoryCache()
|
||||
MAX_PREV_SESSION_INPUTS = 50
|
||||
|
||||
|
||||
class ResponsesAPISessionElement(TypedDict, total=False):
|
||||
input: Union[str, ResponseInputParam]
|
||||
output: ResponsesAPIResponse
|
||||
response_id: str
|
||||
previous_response_id: Optional[str]
|
||||
|
||||
|
||||
class SessionHandler:
|
||||
|
||||
def add_completed_response_to_cache(
|
||||
self, response_id: str, session_element: ResponsesAPISessionElement
|
||||
):
|
||||
RESPONSES_API_PREVIOUS_RESPONSES_CACHE.set_cache(
|
||||
key=response_id, value=session_element
|
||||
)
|
||||
|
||||
def get_chain_of_previous_input_output_pairs(
|
||||
self, previous_response_id: str
|
||||
) -> List[Tuple[ResponseInputParam, ResponsesAPIResponse]]:
|
||||
response_api_inputs: List[Tuple[ResponseInputParam, ResponsesAPIResponse]] = []
|
||||
current_previous_response_id = previous_response_id
|
||||
|
||||
count_session_elements = 0
|
||||
while current_previous_response_id:
|
||||
if count_session_elements > MAX_PREV_SESSION_INPUTS:
|
||||
break
|
||||
session_element = RESPONSES_API_PREVIOUS_RESPONSES_CACHE.get_cache(
|
||||
key=current_previous_response_id
|
||||
)
|
||||
if session_element:
|
||||
response_api_inputs.append(
|
||||
(session_element.get("input"), session_element.get("output"))
|
||||
)
|
||||
current_previous_response_id = session_element.get(
|
||||
"previous_response_id"
|
||||
)
|
||||
else:
|
||||
break
|
||||
count_session_elements += 1
|
||||
return response_api_inputs
|
||||
@@ -0,0 +1,157 @@
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm.main import stream_chunk_builder
|
||||
from litellm.responses.litellm_completion_transformation.transformation import (
|
||||
LiteLLMCompletionResponsesConfig,
|
||||
)
|
||||
from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator
|
||||
from litellm.types.llms.openai import (
|
||||
OutputTextDeltaEvent,
|
||||
ResponseCompletedEvent,
|
||||
ResponseInputParam,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
ResponsesAPIStreamEvents,
|
||||
ResponsesAPIStreamingResponse,
|
||||
)
|
||||
from litellm.types.utils import Delta as ChatCompletionDelta
|
||||
from litellm.types.utils import (
|
||||
ModelResponse,
|
||||
ModelResponseStream,
|
||||
StreamingChoices,
|
||||
TextCompletionResponse,
|
||||
)
|
||||
|
||||
|
||||
class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
|
||||
"""
|
||||
Async iterator for processing streaming responses from the Responses API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
litellm_custom_stream_wrapper: litellm.CustomStreamWrapper,
|
||||
request_input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
):
|
||||
self.litellm_custom_stream_wrapper: litellm.CustomStreamWrapper = (
|
||||
litellm_custom_stream_wrapper
|
||||
)
|
||||
self.request_input: Union[str, ResponseInputParam] = request_input
|
||||
self.responses_api_request: ResponsesAPIOptionalRequestParams = (
|
||||
responses_api_request
|
||||
)
|
||||
self.collected_chat_completion_chunks: List[ModelResponseStream] = []
|
||||
self.finished: bool = False
|
||||
|
||||
async def __anext__(
|
||||
self,
|
||||
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
|
||||
try:
|
||||
while True:
|
||||
if self.finished is True:
|
||||
raise StopAsyncIteration
|
||||
# Get the next chunk from the stream
|
||||
try:
|
||||
chunk = await self.litellm_custom_stream_wrapper.__anext__()
|
||||
self.collected_chat_completion_chunks.append(chunk)
|
||||
response_api_chunk = (
|
||||
self._transform_chat_completion_chunk_to_response_api_chunk(
|
||||
chunk
|
||||
)
|
||||
)
|
||||
if response_api_chunk:
|
||||
return response_api_chunk
|
||||
except StopAsyncIteration:
|
||||
self.finished = True
|
||||
response_completed_event = self._emit_response_completed_event()
|
||||
if response_completed_event:
|
||||
return response_completed_event
|
||||
else:
|
||||
raise StopAsyncIteration
|
||||
|
||||
except Exception as e:
|
||||
# Handle HTTP errors
|
||||
self.finished = True
|
||||
raise e
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(
|
||||
self,
|
||||
) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
|
||||
try:
|
||||
while True:
|
||||
if self.finished is True:
|
||||
raise StopIteration
|
||||
# Get the next chunk from the stream
|
||||
try:
|
||||
chunk = self.litellm_custom_stream_wrapper.__next__()
|
||||
self.collected_chat_completion_chunks.append(chunk)
|
||||
response_api_chunk = (
|
||||
self._transform_chat_completion_chunk_to_response_api_chunk(
|
||||
chunk
|
||||
)
|
||||
)
|
||||
if response_api_chunk:
|
||||
return response_api_chunk
|
||||
except StopIteration:
|
||||
self.finished = True
|
||||
response_completed_event = self._emit_response_completed_event()
|
||||
if response_completed_event:
|
||||
return response_completed_event
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
except Exception as e:
|
||||
# Handle HTTP errors
|
||||
self.finished = True
|
||||
raise e
|
||||
|
||||
def _transform_chat_completion_chunk_to_response_api_chunk(
|
||||
self, chunk: ModelResponseStream
|
||||
) -> Optional[ResponsesAPIStreamingResponse]:
|
||||
"""
|
||||
Transform a chat completion chunk to a response API chunk.
|
||||
|
||||
This currently only handles emitting the OutputTextDeltaEvent, which is used by other tools using the responses API.
|
||||
"""
|
||||
return OutputTextDeltaEvent(
|
||||
type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
|
||||
item_id=chunk.id,
|
||||
output_index=0,
|
||||
content_index=0,
|
||||
delta=self._get_delta_string_from_streaming_choices(chunk.choices),
|
||||
)
|
||||
|
||||
def _get_delta_string_from_streaming_choices(
|
||||
self, choices: List[StreamingChoices]
|
||||
) -> str:
|
||||
"""
|
||||
Get the delta string from the streaming choices
|
||||
|
||||
For now this collected the first choice's delta string.
|
||||
|
||||
It's unclear how users expect litellm to translate multiple-choices-per-chunk to the responses API output.
|
||||
"""
|
||||
choice = choices[0]
|
||||
chat_completion_delta: ChatCompletionDelta = choice.delta
|
||||
return chat_completion_delta.content or ""
|
||||
|
||||
def _emit_response_completed_event(self) -> Optional[ResponseCompletedEvent]:
|
||||
litellm_model_response: Optional[
|
||||
Union[ModelResponse, TextCompletionResponse]
|
||||
] = stream_chunk_builder(chunks=self.collected_chat_completion_chunks)
|
||||
if litellm_model_response and isinstance(litellm_model_response, ModelResponse):
|
||||
|
||||
return ResponseCompletedEvent(
|
||||
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
|
||||
response=LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
|
||||
request_input=self.request_input,
|
||||
chat_completion_response=litellm_model_response,
|
||||
responses_api_request=self.responses_api_request,
|
||||
),
|
||||
)
|
||||
else:
|
||||
return None
|
||||
@@ -0,0 +1,664 @@
|
||||
"""
|
||||
Handles transforming from Responses API -> LiteLLM completion (Chat Completion API)
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from openai.types.responses.tool_param import FunctionToolParam
|
||||
|
||||
from litellm.caching import InMemoryCache
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.responses.litellm_completion_transformation.session_handler import (
|
||||
ResponsesAPISessionElement,
|
||||
SessionHandler,
|
||||
)
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
ChatCompletionResponseMessage,
|
||||
ChatCompletionSystemMessage,
|
||||
ChatCompletionToolCallChunk,
|
||||
ChatCompletionToolCallFunctionChunk,
|
||||
ChatCompletionToolMessage,
|
||||
ChatCompletionToolParam,
|
||||
ChatCompletionToolParamFunctionChunk,
|
||||
ChatCompletionUserMessage,
|
||||
GenericChatCompletionMessage,
|
||||
Reasoning,
|
||||
ResponseAPIUsage,
|
||||
ResponseInputParam,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfig,
|
||||
)
|
||||
from litellm.types.responses.main import (
|
||||
GenericResponseOutputItem,
|
||||
GenericResponseOutputItemContentAnnotation,
|
||||
OutputFunctionToolCall,
|
||||
OutputText,
|
||||
)
|
||||
from litellm.types.utils import (
|
||||
ChatCompletionAnnotation,
|
||||
ChatCompletionMessageToolCall,
|
||||
Choices,
|
||||
Function,
|
||||
Message,
|
||||
ModelResponse,
|
||||
Usage,
|
||||
)
|
||||
|
||||
########### Initialize Classes used for Responses API ###########
|
||||
TOOL_CALLS_CACHE = InMemoryCache()
|
||||
RESPONSES_API_SESSION_HANDLER = SessionHandler()
|
||||
########### End of Initialize Classes used for Responses API ###########
|
||||
|
||||
|
||||
class LiteLLMCompletionResponsesConfig:
|
||||
@staticmethod
|
||||
def get_supported_openai_params(model: str) -> list:
|
||||
"""
|
||||
LiteLLM Adapter from OpenAI Responses API to Chat Completion API supports a subset of OpenAI Responses API params
|
||||
"""
|
||||
return [
|
||||
"input",
|
||||
"model",
|
||||
"instructions",
|
||||
"max_output_tokens",
|
||||
"metadata",
|
||||
"parallel_tool_calls",
|
||||
"previous_response_id",
|
||||
"stream",
|
||||
"temperature",
|
||||
"tool_choice",
|
||||
"tools",
|
||||
"top_p",
|
||||
"user",
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def transform_responses_api_request_to_chat_completion_request(
|
||||
model: str,
|
||||
input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
stream: Optional[bool] = None,
|
||||
**kwargs,
|
||||
) -> dict:
|
||||
"""
|
||||
Transform a Responses API request into a Chat Completion request
|
||||
"""
|
||||
litellm_completion_request: dict = {
|
||||
"messages": LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
|
||||
input=input,
|
||||
responses_api_request=responses_api_request,
|
||||
previous_response_id=responses_api_request.get("previous_response_id"),
|
||||
),
|
||||
"model": model,
|
||||
"tool_choice": responses_api_request.get("tool_choice"),
|
||||
"tools": LiteLLMCompletionResponsesConfig.transform_responses_api_tools_to_chat_completion_tools(
|
||||
responses_api_request.get("tools") or [] # type: ignore
|
||||
),
|
||||
"top_p": responses_api_request.get("top_p"),
|
||||
"user": responses_api_request.get("user"),
|
||||
"temperature": responses_api_request.get("temperature"),
|
||||
"parallel_tool_calls": responses_api_request.get("parallel_tool_calls"),
|
||||
"max_tokens": responses_api_request.get("max_output_tokens"),
|
||||
"stream": stream,
|
||||
"metadata": kwargs.get("metadata"),
|
||||
"service_tier": kwargs.get("service_tier"),
|
||||
# litellm specific params
|
||||
"custom_llm_provider": custom_llm_provider,
|
||||
}
|
||||
|
||||
# Responses API `Completed` events require usage, we pass `stream_options` to litellm.completion to include usage
|
||||
if stream is True:
|
||||
stream_options = {
|
||||
"include_usage": True,
|
||||
}
|
||||
litellm_completion_request["stream_options"] = stream_options
|
||||
litellm_logging_obj: Optional[LiteLLMLoggingObj] = kwargs.get(
|
||||
"litellm_logging_obj"
|
||||
)
|
||||
if litellm_logging_obj:
|
||||
litellm_logging_obj.stream_options = stream_options
|
||||
|
||||
# only pass non-None values
|
||||
litellm_completion_request = {
|
||||
k: v for k, v in litellm_completion_request.items() if v is not None
|
||||
}
|
||||
|
||||
return litellm_completion_request
|
||||
|
||||
@staticmethod
|
||||
def transform_responses_api_input_to_messages(
|
||||
input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
previous_response_id: Optional[str] = None,
|
||||
) -> List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
]:
|
||||
"""
|
||||
Transform a Responses API input into a list of messages
|
||||
"""
|
||||
messages: List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
] = []
|
||||
if responses_api_request.get("instructions"):
|
||||
messages.append(
|
||||
LiteLLMCompletionResponsesConfig.transform_instructions_to_system_message(
|
||||
responses_api_request.get("instructions")
|
||||
)
|
||||
)
|
||||
|
||||
if previous_response_id:
|
||||
previous_response_pairs = (
|
||||
RESPONSES_API_SESSION_HANDLER.get_chain_of_previous_input_output_pairs(
|
||||
previous_response_id=previous_response_id
|
||||
)
|
||||
)
|
||||
if previous_response_pairs:
|
||||
for previous_response_pair in previous_response_pairs:
|
||||
chat_completion_input_messages = LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
|
||||
input=previous_response_pair[0],
|
||||
)
|
||||
chat_completion_output_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_outputs_to_chat_completion_messages(
|
||||
responses_api_output=previous_response_pair[1],
|
||||
)
|
||||
|
||||
messages.extend(chat_completion_input_messages)
|
||||
messages.extend(chat_completion_output_messages)
|
||||
|
||||
messages.extend(
|
||||
LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
|
||||
input=input,
|
||||
)
|
||||
)
|
||||
|
||||
return messages
|
||||
|
||||
@staticmethod
|
||||
def _transform_response_input_param_to_chat_completion_message(
|
||||
input: Union[str, ResponseInputParam],
|
||||
) -> List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
]:
|
||||
"""
|
||||
Transform a ResponseInputParam into a Chat Completion message
|
||||
"""
|
||||
messages: List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
] = []
|
||||
tool_call_output_messages: List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
] = []
|
||||
|
||||
if isinstance(input, str):
|
||||
messages.append(ChatCompletionUserMessage(role="user", content=input))
|
||||
elif isinstance(input, list):
|
||||
for _input in input:
|
||||
chat_completion_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_input_item_to_chat_completion_message(
|
||||
input_item=_input
|
||||
)
|
||||
if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(
|
||||
input_item=_input
|
||||
):
|
||||
tool_call_output_messages.extend(chat_completion_messages)
|
||||
else:
|
||||
messages.extend(chat_completion_messages)
|
||||
|
||||
messages.extend(tool_call_output_messages)
|
||||
return messages
|
||||
|
||||
@staticmethod
|
||||
def _ensure_tool_call_output_has_corresponding_tool_call(
|
||||
messages: List[Union[AllMessageValues, GenericChatCompletionMessage]],
|
||||
) -> bool:
|
||||
"""
|
||||
If any tool call output is present, ensure there is a corresponding tool call/tool_use block
|
||||
"""
|
||||
for message in messages:
|
||||
if message.get("role") == "tool":
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _transform_responses_api_input_item_to_chat_completion_message(
|
||||
input_item: Any,
|
||||
) -> List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
]:
|
||||
"""
|
||||
Transform a Responses API input item into a Chat Completion message
|
||||
|
||||
- EasyInputMessageParam
|
||||
- Message
|
||||
- ResponseOutputMessageParam
|
||||
- ResponseFileSearchToolCallParam
|
||||
- ResponseComputerToolCallParam
|
||||
- ComputerCallOutput
|
||||
- ResponseFunctionWebSearchParam
|
||||
- ResponseFunctionToolCallParam
|
||||
- FunctionCallOutput
|
||||
- ResponseReasoningItemParam
|
||||
- ItemReference
|
||||
"""
|
||||
if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(input_item):
|
||||
# handle executed tool call results
|
||||
return LiteLLMCompletionResponsesConfig._transform_responses_api_tool_call_output_to_chat_completion_message(
|
||||
tool_call_output=input_item
|
||||
)
|
||||
else:
|
||||
return [
|
||||
GenericChatCompletionMessage(
|
||||
role=input_item.get("role") or "user",
|
||||
content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
|
||||
input_item.get("content")
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _is_input_item_tool_call_output(input_item: Any) -> bool:
|
||||
"""
|
||||
Check if the input item is a tool call output
|
||||
"""
|
||||
return input_item.get("type") in [
|
||||
"function_call_output",
|
||||
"web_search_call",
|
||||
"computer_call_output",
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _transform_responses_api_tool_call_output_to_chat_completion_message(
|
||||
tool_call_output: Dict[str, Any],
|
||||
) -> List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionResponseMessage,
|
||||
]
|
||||
]:
|
||||
"""
|
||||
ChatCompletionToolMessage is used to indicate the output from a tool call
|
||||
"""
|
||||
tool_output_message = ChatCompletionToolMessage(
|
||||
role="tool",
|
||||
content=tool_call_output.get("output") or "",
|
||||
tool_call_id=tool_call_output.get("call_id") or "",
|
||||
)
|
||||
|
||||
_tool_use_definition = TOOL_CALLS_CACHE.get_cache(
|
||||
key=tool_call_output.get("call_id") or "",
|
||||
)
|
||||
if _tool_use_definition:
|
||||
"""
|
||||
Append the tool use definition to the list of messages
|
||||
|
||||
|
||||
Providers like Anthropic require the tool use definition to be included with the tool output
|
||||
|
||||
- Input:
|
||||
{'function':
|
||||
arguments:'{"command": ["echo","<html>\\n<head>\\n <title>Hello</title>\\n</head>\\n<body>\\n <h1>Hi</h1>\\n</body>\\n</html>",">","index.html"]}',
|
||||
name='shell',
|
||||
'id': 'toolu_018KFWsEySHjdKZPdUzXpymJ',
|
||||
'type': 'function'
|
||||
}
|
||||
- Output:
|
||||
{
|
||||
"id": "toolu_018KFWsEySHjdKZPdUzXpymJ",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"arguments": "{\"latitude\":48.8566,\"longitude\":2.3522}"
|
||||
}
|
||||
}
|
||||
|
||||
"""
|
||||
function: dict = _tool_use_definition.get("function") or {}
|
||||
tool_call_chunk = ChatCompletionToolCallChunk(
|
||||
id=_tool_use_definition.get("id") or "",
|
||||
type=_tool_use_definition.get("type") or "function",
|
||||
function=ChatCompletionToolCallFunctionChunk(
|
||||
name=function.get("name") or "",
|
||||
arguments=function.get("arguments") or "",
|
||||
),
|
||||
index=0,
|
||||
)
|
||||
chat_completion_response_message = ChatCompletionResponseMessage(
|
||||
tool_calls=[tool_call_chunk],
|
||||
role="assistant",
|
||||
)
|
||||
return [chat_completion_response_message, tool_output_message]
|
||||
|
||||
return [tool_output_message]
|
||||
|
||||
@staticmethod
|
||||
def _transform_responses_api_content_to_chat_completion_content(
|
||||
content: Any,
|
||||
) -> Union[str, List[Union[str, Dict[str, Any]]]]:
|
||||
"""
|
||||
Transform a Responses API content into a Chat Completion content
|
||||
"""
|
||||
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
elif isinstance(content, list):
|
||||
content_list: List[Union[str, Dict[str, Any]]] = []
|
||||
for item in content:
|
||||
if isinstance(item, str):
|
||||
content_list.append(item)
|
||||
elif isinstance(item, dict):
|
||||
content_list.append(
|
||||
{
|
||||
"type": LiteLLMCompletionResponsesConfig._get_chat_completion_request_content_type(
|
||||
item.get("type") or "text"
|
||||
),
|
||||
"text": item.get("text"),
|
||||
}
|
||||
)
|
||||
return content_list
|
||||
else:
|
||||
raise ValueError(f"Invalid content type: {type(content)}")
|
||||
|
||||
@staticmethod
|
||||
def _get_chat_completion_request_content_type(content_type: str) -> str:
|
||||
"""
|
||||
Get the Chat Completion request content type
|
||||
"""
|
||||
# Responses API content has `input_` prefix, if it exists, remove it
|
||||
if content_type.startswith("input_"):
|
||||
return content_type[len("input_") :]
|
||||
else:
|
||||
return content_type
|
||||
|
||||
@staticmethod
|
||||
def transform_instructions_to_system_message(
|
||||
instructions: Optional[str],
|
||||
) -> ChatCompletionSystemMessage:
|
||||
"""
|
||||
Transform a Instructions into a system message
|
||||
"""
|
||||
return ChatCompletionSystemMessage(role="system", content=instructions or "")
|
||||
|
||||
@staticmethod
|
||||
def transform_responses_api_tools_to_chat_completion_tools(
|
||||
tools: Optional[List[FunctionToolParam]],
|
||||
) -> List[ChatCompletionToolParam]:
|
||||
"""
|
||||
Transform a Responses API tools into a Chat Completion tools
|
||||
"""
|
||||
if tools is None:
|
||||
return []
|
||||
chat_completion_tools: List[ChatCompletionToolParam] = []
|
||||
for tool in tools:
|
||||
chat_completion_tools.append(
|
||||
ChatCompletionToolParam(
|
||||
type="function",
|
||||
function=ChatCompletionToolParamFunctionChunk(
|
||||
name=tool["name"],
|
||||
description=tool.get("description") or "",
|
||||
parameters=tool.get("parameters", {}),
|
||||
strict=tool.get("strict", False),
|
||||
),
|
||||
)
|
||||
)
|
||||
return chat_completion_tools
|
||||
|
||||
@staticmethod
|
||||
def transform_chat_completion_tools_to_responses_tools(
|
||||
chat_completion_response: ModelResponse,
|
||||
) -> List[OutputFunctionToolCall]:
|
||||
"""
|
||||
Transform a Chat Completion tools into a Responses API tools
|
||||
"""
|
||||
all_chat_completion_tools: List[ChatCompletionMessageToolCall] = []
|
||||
for choice in chat_completion_response.choices:
|
||||
if isinstance(choice, Choices):
|
||||
if choice.message.tool_calls:
|
||||
all_chat_completion_tools.extend(choice.message.tool_calls)
|
||||
for tool_call in choice.message.tool_calls:
|
||||
TOOL_CALLS_CACHE.set_cache(
|
||||
key=tool_call.id,
|
||||
value=tool_call,
|
||||
)
|
||||
|
||||
responses_tools: List[OutputFunctionToolCall] = []
|
||||
for tool in all_chat_completion_tools:
|
||||
if tool.type == "function":
|
||||
function_definition = tool.function
|
||||
responses_tools.append(
|
||||
OutputFunctionToolCall(
|
||||
name=function_definition.name or "",
|
||||
arguments=function_definition.get("arguments") or "",
|
||||
call_id=tool.id or "",
|
||||
id=tool.id or "",
|
||||
type="function_call", # critical this is "function_call" to work with tools like openai codex
|
||||
status=function_definition.get("status") or "completed",
|
||||
)
|
||||
)
|
||||
return responses_tools
|
||||
|
||||
@staticmethod
|
||||
def transform_chat_completion_response_to_responses_api_response(
|
||||
request_input: Union[str, ResponseInputParam],
|
||||
responses_api_request: ResponsesAPIOptionalRequestParams,
|
||||
chat_completion_response: ModelResponse,
|
||||
) -> ResponsesAPIResponse:
|
||||
"""
|
||||
Transform a Chat Completion response into a Responses API response
|
||||
"""
|
||||
responses_api_response: ResponsesAPIResponse = ResponsesAPIResponse(
|
||||
id=chat_completion_response.id,
|
||||
created_at=chat_completion_response.created,
|
||||
model=chat_completion_response.model,
|
||||
object=chat_completion_response.object,
|
||||
error=getattr(chat_completion_response, "error", None),
|
||||
incomplete_details=getattr(
|
||||
chat_completion_response, "incomplete_details", None
|
||||
),
|
||||
instructions=getattr(chat_completion_response, "instructions", None),
|
||||
metadata=getattr(chat_completion_response, "metadata", {}),
|
||||
output=LiteLLMCompletionResponsesConfig._transform_chat_completion_choices_to_responses_output(
|
||||
chat_completion_response=chat_completion_response,
|
||||
choices=getattr(chat_completion_response, "choices", []),
|
||||
),
|
||||
parallel_tool_calls=getattr(
|
||||
chat_completion_response, "parallel_tool_calls", False
|
||||
),
|
||||
temperature=getattr(chat_completion_response, "temperature", 0),
|
||||
tool_choice=getattr(chat_completion_response, "tool_choice", "auto"),
|
||||
tools=getattr(chat_completion_response, "tools", []),
|
||||
top_p=getattr(chat_completion_response, "top_p", None),
|
||||
max_output_tokens=getattr(
|
||||
chat_completion_response, "max_output_tokens", None
|
||||
),
|
||||
previous_response_id=getattr(
|
||||
chat_completion_response, "previous_response_id", None
|
||||
),
|
||||
reasoning=Reasoning(),
|
||||
status=getattr(chat_completion_response, "status", "completed"),
|
||||
text=ResponseTextConfig(),
|
||||
truncation=getattr(chat_completion_response, "truncation", None),
|
||||
usage=LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
|
||||
chat_completion_response=chat_completion_response
|
||||
),
|
||||
user=getattr(chat_completion_response, "user", None),
|
||||
)
|
||||
|
||||
RESPONSES_API_SESSION_HANDLER.add_completed_response_to_cache(
|
||||
response_id=responses_api_response.id,
|
||||
session_element=ResponsesAPISessionElement(
|
||||
input=request_input,
|
||||
output=responses_api_response,
|
||||
response_id=responses_api_response.id,
|
||||
previous_response_id=responses_api_request.get("previous_response_id"),
|
||||
),
|
||||
)
|
||||
return responses_api_response
|
||||
|
||||
@staticmethod
|
||||
def _transform_chat_completion_choices_to_responses_output(
|
||||
chat_completion_response: ModelResponse,
|
||||
choices: List[Choices],
|
||||
) -> List[Union[GenericResponseOutputItem, OutputFunctionToolCall]]:
|
||||
responses_output: List[
|
||||
Union[GenericResponseOutputItem, OutputFunctionToolCall]
|
||||
] = []
|
||||
for choice in choices:
|
||||
responses_output.append(
|
||||
GenericResponseOutputItem(
|
||||
type="message",
|
||||
id=chat_completion_response.id,
|
||||
status=choice.finish_reason,
|
||||
role=choice.message.role,
|
||||
content=[
|
||||
LiteLLMCompletionResponsesConfig._transform_chat_message_to_response_output_text(
|
||||
choice.message
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
||||
|
||||
tool_calls = LiteLLMCompletionResponsesConfig.transform_chat_completion_tools_to_responses_tools(
|
||||
chat_completion_response=chat_completion_response
|
||||
)
|
||||
responses_output.extend(tool_calls)
|
||||
return responses_output
|
||||
|
||||
@staticmethod
|
||||
def _transform_responses_api_outputs_to_chat_completion_messages(
|
||||
responses_api_output: ResponsesAPIResponse,
|
||||
) -> List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
]
|
||||
]:
|
||||
messages: List[
|
||||
Union[
|
||||
AllMessageValues,
|
||||
GenericChatCompletionMessage,
|
||||
ChatCompletionMessageToolCall,
|
||||
]
|
||||
] = []
|
||||
output_items = responses_api_output.output
|
||||
for _output_item in output_items:
|
||||
output_item: dict = dict(_output_item)
|
||||
if output_item.get("type") == "function_call":
|
||||
# handle function call output
|
||||
messages.append(
|
||||
LiteLLMCompletionResponsesConfig._transform_responses_output_tool_call_to_chat_completion_output_tool_call(
|
||||
tool_call=output_item
|
||||
)
|
||||
)
|
||||
else:
|
||||
# transform as generic ResponseOutputItem
|
||||
messages.append(
|
||||
GenericChatCompletionMessage(
|
||||
role=str(output_item.get("role")) or "user",
|
||||
content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
|
||||
output_item.get("content")
|
||||
),
|
||||
)
|
||||
)
|
||||
return messages
|
||||
|
||||
@staticmethod
|
||||
def _transform_responses_output_tool_call_to_chat_completion_output_tool_call(
|
||||
tool_call: dict,
|
||||
) -> ChatCompletionMessageToolCall:
|
||||
return ChatCompletionMessageToolCall(
|
||||
id=tool_call.get("id") or "",
|
||||
type="function",
|
||||
function=Function(
|
||||
name=tool_call.get("name") or "",
|
||||
arguments=tool_call.get("arguments") or "",
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _transform_chat_message_to_response_output_text(
|
||||
message: Message,
|
||||
) -> OutputText:
|
||||
return OutputText(
|
||||
type="output_text",
|
||||
text=message.content,
|
||||
annotations=LiteLLMCompletionResponsesConfig._transform_chat_completion_annotations_to_response_output_annotations(
|
||||
annotations=getattr(message, "annotations", None)
|
||||
),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _transform_chat_completion_annotations_to_response_output_annotations(
|
||||
annotations: Optional[List[ChatCompletionAnnotation]],
|
||||
) -> List[GenericResponseOutputItemContentAnnotation]:
|
||||
response_output_annotations: List[
|
||||
GenericResponseOutputItemContentAnnotation
|
||||
] = []
|
||||
|
||||
if annotations is None:
|
||||
return response_output_annotations
|
||||
|
||||
for annotation in annotations:
|
||||
annotation_type = annotation.get("type")
|
||||
if annotation_type == "url_citation" and "url_citation" in annotation:
|
||||
url_citation = annotation["url_citation"]
|
||||
response_output_annotations.append(
|
||||
GenericResponseOutputItemContentAnnotation(
|
||||
type=annotation_type,
|
||||
start_index=url_citation.get("start_index"),
|
||||
end_index=url_citation.get("end_index"),
|
||||
url=url_citation.get("url"),
|
||||
title=url_citation.get("title"),
|
||||
)
|
||||
)
|
||||
# Handle other annotation types here
|
||||
|
||||
return response_output_annotations
|
||||
|
||||
@staticmethod
|
||||
def _transform_chat_completion_usage_to_responses_usage(
|
||||
chat_completion_response: ModelResponse,
|
||||
) -> ResponseAPIUsage:
|
||||
usage: Optional[Usage] = getattr(chat_completion_response, "usage", None)
|
||||
if usage is None:
|
||||
return ResponseAPIUsage(
|
||||
input_tokens=0,
|
||||
output_tokens=0,
|
||||
total_tokens=0,
|
||||
)
|
||||
return ResponseAPIUsage(
|
||||
input_tokens=usage.prompt_tokens,
|
||||
output_tokens=usage.completion_tokens,
|
||||
total_tokens=usage.total_tokens,
|
||||
)
|
||||
436
.venv/lib/python3.10/site-packages/litellm/responses/main.py
Normal file
436
.venv/lib/python3.10/site-packages/litellm/responses/main.py
Normal file
@@ -0,0 +1,436 @@
|
||||
import asyncio
|
||||
import contextvars
|
||||
from functools import partial
|
||||
from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.constants import request_timeout
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||
from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
|
||||
from litellm.responses.litellm_completion_transformation.handler import (
|
||||
LiteLLMCompletionTransformationHandler,
|
||||
)
|
||||
from litellm.responses.utils import ResponsesAPIRequestUtils
|
||||
from litellm.types.llms.openai import (
|
||||
Reasoning,
|
||||
ResponseIncludable,
|
||||
ResponseInputParam,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
ResponsesAPIResponse,
|
||||
ResponseTextConfigParam,
|
||||
ToolChoice,
|
||||
ToolParam,
|
||||
)
|
||||
from litellm.types.responses.main import *
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
from litellm.utils import ProviderConfigManager, client
|
||||
|
||||
from .streaming_iterator import BaseResponsesAPIStreamingIterator
|
||||
|
||||
####### ENVIRONMENT VARIABLES ###################
|
||||
# Initialize any necessary instances or variables here
|
||||
base_llm_http_handler = BaseLLMHTTPHandler()
|
||||
litellm_completion_transformation_handler = LiteLLMCompletionTransformationHandler()
|
||||
#################################################
|
||||
|
||||
|
||||
@client
|
||||
async def aresponses(
|
||||
input: Union[str, ResponseInputParam],
|
||||
model: str,
|
||||
include: Optional[List[ResponseIncludable]] = None,
|
||||
instructions: Optional[str] = None,
|
||||
max_output_tokens: Optional[int] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
parallel_tool_calls: Optional[bool] = None,
|
||||
previous_response_id: Optional[str] = None,
|
||||
reasoning: Optional[Reasoning] = None,
|
||||
store: Optional[bool] = None,
|
||||
stream: Optional[bool] = None,
|
||||
temperature: Optional[float] = None,
|
||||
text: Optional[ResponseTextConfigParam] = None,
|
||||
tool_choice: Optional[ToolChoice] = None,
|
||||
tools: Optional[Iterable[ToolParam]] = None,
|
||||
top_p: Optional[float] = None,
|
||||
truncation: Optional[Literal["auto", "disabled"]] = None,
|
||||
user: Optional[str] = None,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
extra_query: Optional[Dict[str, Any]] = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
# LiteLLM specific params,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
|
||||
"""
|
||||
Async: Handles responses API requests by reusing the synchronous function
|
||||
"""
|
||||
local_vars = locals()
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
kwargs["aresponses"] = True
|
||||
|
||||
# get custom llm provider so we can use this for mapping exceptions
|
||||
if custom_llm_provider is None:
|
||||
_, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
||||
model=model, api_base=local_vars.get("base_url", None)
|
||||
)
|
||||
|
||||
func = partial(
|
||||
responses,
|
||||
input=input,
|
||||
model=model,
|
||||
include=include,
|
||||
instructions=instructions,
|
||||
max_output_tokens=max_output_tokens,
|
||||
metadata=metadata,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
previous_response_id=previous_response_id,
|
||||
reasoning=reasoning,
|
||||
store=store,
|
||||
stream=stream,
|
||||
temperature=temperature,
|
||||
text=text,
|
||||
tool_choice=tool_choice,
|
||||
tools=tools,
|
||||
top_p=top_p,
|
||||
truncation=truncation,
|
||||
user=user,
|
||||
extra_headers=extra_headers,
|
||||
extra_query=extra_query,
|
||||
extra_body=extra_body,
|
||||
timeout=timeout,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
ctx = contextvars.copy_context()
|
||||
func_with_context = partial(ctx.run, func)
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
|
||||
if asyncio.iscoroutine(init_response):
|
||||
response = await init_response
|
||||
else:
|
||||
response = init_response
|
||||
|
||||
# Update the responses_api_response_id with the model_id
|
||||
if isinstance(response, ResponsesAPIResponse):
|
||||
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
|
||||
responses_api_response=response,
|
||||
litellm_metadata=kwargs.get("litellm_metadata", {}),
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
raise litellm.exception_type(
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
original_exception=e,
|
||||
completion_kwargs=local_vars,
|
||||
extra_kwargs=kwargs,
|
||||
)
|
||||
|
||||
|
||||
@client
|
||||
def responses(
|
||||
input: Union[str, ResponseInputParam],
|
||||
model: str,
|
||||
include: Optional[List[ResponseIncludable]] = None,
|
||||
instructions: Optional[str] = None,
|
||||
max_output_tokens: Optional[int] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
parallel_tool_calls: Optional[bool] = None,
|
||||
previous_response_id: Optional[str] = None,
|
||||
reasoning: Optional[Reasoning] = None,
|
||||
store: Optional[bool] = None,
|
||||
stream: Optional[bool] = None,
|
||||
temperature: Optional[float] = None,
|
||||
text: Optional[ResponseTextConfigParam] = None,
|
||||
tool_choice: Optional[ToolChoice] = None,
|
||||
tools: Optional[Iterable[ToolParam]] = None,
|
||||
top_p: Optional[float] = None,
|
||||
truncation: Optional[Literal["auto", "disabled"]] = None,
|
||||
user: Optional[str] = None,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
extra_query: Optional[Dict[str, Any]] = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
# LiteLLM specific params,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Synchronous version of the Responses API.
|
||||
Uses the synchronous HTTP handler to make requests.
|
||||
"""
|
||||
local_vars = locals()
|
||||
try:
|
||||
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore
|
||||
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
|
||||
_is_async = kwargs.pop("aresponses", False) is True
|
||||
|
||||
# get llm provider logic
|
||||
litellm_params = GenericLiteLLMParams(**kwargs)
|
||||
(
|
||||
model,
|
||||
custom_llm_provider,
|
||||
dynamic_api_key,
|
||||
dynamic_api_base,
|
||||
) = litellm.get_llm_provider(
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
api_base=litellm_params.api_base,
|
||||
api_key=litellm_params.api_key,
|
||||
)
|
||||
|
||||
# get provider config
|
||||
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
|
||||
ProviderConfigManager.get_provider_responses_api_config(
|
||||
model=model,
|
||||
provider=litellm.LlmProviders(custom_llm_provider),
|
||||
)
|
||||
)
|
||||
|
||||
local_vars.update(kwargs)
|
||||
# Get ResponsesAPIOptionalRequestParams with only valid parameters
|
||||
response_api_optional_params: ResponsesAPIOptionalRequestParams = (
|
||||
ResponsesAPIRequestUtils.get_requested_response_api_optional_param(
|
||||
local_vars
|
||||
)
|
||||
)
|
||||
|
||||
if responses_api_provider_config is None:
|
||||
return litellm_completion_transformation_handler.response_api_handler(
|
||||
model=model,
|
||||
input=input,
|
||||
responses_api_request=response_api_optional_params,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
_is_async=_is_async,
|
||||
stream=stream,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
# Get optional parameters for the responses API
|
||||
responses_api_request_params: Dict = (
|
||||
ResponsesAPIRequestUtils.get_optional_params_responses_api(
|
||||
model=model,
|
||||
responses_api_provider_config=responses_api_provider_config,
|
||||
response_api_optional_params=response_api_optional_params,
|
||||
)
|
||||
)
|
||||
|
||||
# Pre Call logging
|
||||
litellm_logging_obj.update_environment_variables(
|
||||
model=model,
|
||||
user=user,
|
||||
optional_params=dict(responses_api_request_params),
|
||||
litellm_params={
|
||||
"litellm_call_id": litellm_call_id,
|
||||
**responses_api_request_params,
|
||||
},
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
# Call the handler with _is_async flag instead of directly calling the async handler
|
||||
response = base_llm_http_handler.response_api_handler(
|
||||
model=model,
|
||||
input=input,
|
||||
responses_api_provider_config=responses_api_provider_config,
|
||||
response_api_optional_request_params=responses_api_request_params,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
litellm_params=litellm_params,
|
||||
logging_obj=litellm_logging_obj,
|
||||
extra_headers=extra_headers,
|
||||
extra_body=extra_body,
|
||||
timeout=timeout or request_timeout,
|
||||
_is_async=_is_async,
|
||||
client=kwargs.get("client"),
|
||||
fake_stream=responses_api_provider_config.should_fake_stream(
|
||||
model=model, stream=stream, custom_llm_provider=custom_llm_provider
|
||||
),
|
||||
litellm_metadata=kwargs.get("litellm_metadata", {}),
|
||||
)
|
||||
|
||||
# Update the responses_api_response_id with the model_id
|
||||
if isinstance(response, ResponsesAPIResponse):
|
||||
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
|
||||
responses_api_response=response,
|
||||
litellm_metadata=kwargs.get("litellm_metadata", {}),
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise litellm.exception_type(
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
original_exception=e,
|
||||
completion_kwargs=local_vars,
|
||||
extra_kwargs=kwargs,
|
||||
)
|
||||
|
||||
|
||||
@client
|
||||
async def adelete_responses(
|
||||
response_id: str,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
extra_query: Optional[Dict[str, Any]] = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
# LiteLLM specific params,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> DeleteResponseResult:
|
||||
"""
|
||||
Async version of the DELETE Responses API
|
||||
|
||||
DELETE /v1/responses/{response_id} endpoint in the responses API
|
||||
|
||||
"""
|
||||
local_vars = locals()
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
kwargs["adelete_responses"] = True
|
||||
|
||||
# get custom llm provider from response_id
|
||||
decoded_response_id: DecodedResponseId = (
|
||||
ResponsesAPIRequestUtils._decode_responses_api_response_id(
|
||||
response_id=response_id,
|
||||
)
|
||||
)
|
||||
response_id = decoded_response_id.get("response_id") or response_id
|
||||
custom_llm_provider = (
|
||||
decoded_response_id.get("custom_llm_provider") or custom_llm_provider
|
||||
)
|
||||
|
||||
func = partial(
|
||||
delete_responses,
|
||||
response_id=response_id,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
extra_headers=extra_headers,
|
||||
extra_query=extra_query,
|
||||
extra_body=extra_body,
|
||||
timeout=timeout,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
ctx = contextvars.copy_context()
|
||||
func_with_context = partial(ctx.run, func)
|
||||
init_response = await loop.run_in_executor(None, func_with_context)
|
||||
|
||||
if asyncio.iscoroutine(init_response):
|
||||
response = await init_response
|
||||
else:
|
||||
response = init_response
|
||||
return response
|
||||
except Exception as e:
|
||||
raise litellm.exception_type(
|
||||
model=None,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
original_exception=e,
|
||||
completion_kwargs=local_vars,
|
||||
extra_kwargs=kwargs,
|
||||
)
|
||||
|
||||
|
||||
@client
|
||||
def delete_responses(
|
||||
response_id: str,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Optional[Dict[str, Any]] = None,
|
||||
extra_query: Optional[Dict[str, Any]] = None,
|
||||
extra_body: Optional[Dict[str, Any]] = None,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
# LiteLLM specific params,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
|
||||
"""
|
||||
Synchronous version of the DELETE Responses API
|
||||
|
||||
DELETE /v1/responses/{response_id} endpoint in the responses API
|
||||
|
||||
"""
|
||||
local_vars = locals()
|
||||
try:
|
||||
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") # type: ignore
|
||||
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
|
||||
_is_async = kwargs.pop("adelete_responses", False) is True
|
||||
|
||||
# get llm provider logic
|
||||
litellm_params = GenericLiteLLMParams(**kwargs)
|
||||
|
||||
# get custom llm provider from response_id
|
||||
decoded_response_id: DecodedResponseId = (
|
||||
ResponsesAPIRequestUtils._decode_responses_api_response_id(
|
||||
response_id=response_id,
|
||||
)
|
||||
)
|
||||
response_id = decoded_response_id.get("response_id") or response_id
|
||||
custom_llm_provider = (
|
||||
decoded_response_id.get("custom_llm_provider") or custom_llm_provider
|
||||
)
|
||||
|
||||
if custom_llm_provider is None:
|
||||
raise ValueError("custom_llm_provider is required but passed as None")
|
||||
|
||||
# get provider config
|
||||
responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
|
||||
ProviderConfigManager.get_provider_responses_api_config(
|
||||
model=None,
|
||||
provider=litellm.LlmProviders(custom_llm_provider),
|
||||
)
|
||||
)
|
||||
|
||||
if responses_api_provider_config is None:
|
||||
raise ValueError(
|
||||
f"DELETE responses is not supported for {custom_llm_provider}"
|
||||
)
|
||||
|
||||
local_vars.update(kwargs)
|
||||
|
||||
# Pre Call logging
|
||||
litellm_logging_obj.update_environment_variables(
|
||||
model=None,
|
||||
optional_params={
|
||||
"response_id": response_id,
|
||||
},
|
||||
litellm_params={
|
||||
"litellm_call_id": litellm_call_id,
|
||||
},
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
# Call the handler with _is_async flag instead of directly calling the async handler
|
||||
response = base_llm_http_handler.delete_response_api_handler(
|
||||
response_id=response_id,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
responses_api_provider_config=responses_api_provider_config,
|
||||
litellm_params=litellm_params,
|
||||
logging_obj=litellm_logging_obj,
|
||||
extra_headers=extra_headers,
|
||||
extra_body=extra_body,
|
||||
timeout=timeout or request_timeout,
|
||||
_is_async=_is_async,
|
||||
client=kwargs.get("client"),
|
||||
)
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
raise litellm.exception_type(
|
||||
model=None,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
original_exception=e,
|
||||
completion_kwargs=local_vars,
|
||||
extra_kwargs=kwargs,
|
||||
)
|
||||
@@ -0,0 +1,336 @@
|
||||
import asyncio
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm.constants import STREAM_SSE_DONE_STRING
|
||||
from litellm.litellm_core_utils.asyncify import run_async_function
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.litellm_core_utils.thread_pool_executor import executor
|
||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||
from litellm.responses.utils import ResponsesAPIRequestUtils
|
||||
from litellm.types.llms.openai import (
|
||||
OutputTextDeltaEvent,
|
||||
ResponseCompletedEvent,
|
||||
ResponsesAPIResponse,
|
||||
ResponsesAPIStreamEvents,
|
||||
ResponsesAPIStreamingResponse,
|
||||
)
|
||||
from litellm.utils import CustomStreamWrapper
|
||||
|
||||
|
||||
class BaseResponsesAPIStreamingIterator:
|
||||
"""
|
||||
Base class for streaming iterators that process responses from the Responses API.
|
||||
|
||||
This class contains shared logic for both synchronous and asynchronous iterators.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
response: httpx.Response,
|
||||
model: str,
|
||||
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
):
|
||||
self.response = response
|
||||
self.model = model
|
||||
self.logging_obj = logging_obj
|
||||
self.finished = False
|
||||
self.responses_api_provider_config = responses_api_provider_config
|
||||
self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
|
||||
self.start_time = datetime.now()
|
||||
|
||||
# set request kwargs
|
||||
self.litellm_metadata = litellm_metadata
|
||||
self.custom_llm_provider = custom_llm_provider
|
||||
|
||||
def _process_chunk(self, chunk) -> Optional[ResponsesAPIStreamingResponse]:
|
||||
"""Process a single chunk of data from the stream"""
|
||||
if not chunk:
|
||||
return None
|
||||
|
||||
# Handle SSE format (data: {...})
|
||||
chunk = CustomStreamWrapper._strip_sse_data_from_chunk(chunk)
|
||||
if chunk is None:
|
||||
return None
|
||||
|
||||
# Handle "[DONE]" marker
|
||||
if chunk == STREAM_SSE_DONE_STRING:
|
||||
self.finished = True
|
||||
return None
|
||||
|
||||
try:
|
||||
# Parse the JSON chunk
|
||||
parsed_chunk = json.loads(chunk)
|
||||
|
||||
# Format as ResponsesAPIStreamingResponse
|
||||
if isinstance(parsed_chunk, dict):
|
||||
openai_responses_api_chunk = (
|
||||
self.responses_api_provider_config.transform_streaming_response(
|
||||
model=self.model,
|
||||
parsed_chunk=parsed_chunk,
|
||||
logging_obj=self.logging_obj,
|
||||
)
|
||||
)
|
||||
|
||||
# if "response" in parsed_chunk, then encode litellm specific information like custom_llm_provider
|
||||
response_object = getattr(openai_responses_api_chunk, "response", None)
|
||||
if response_object:
|
||||
response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
|
||||
responses_api_response=response_object,
|
||||
litellm_metadata=self.litellm_metadata,
|
||||
custom_llm_provider=self.custom_llm_provider,
|
||||
)
|
||||
setattr(openai_responses_api_chunk, "response", response)
|
||||
|
||||
# Store the completed response
|
||||
if (
|
||||
openai_responses_api_chunk
|
||||
and openai_responses_api_chunk.type
|
||||
== ResponsesAPIStreamEvents.RESPONSE_COMPLETED
|
||||
):
|
||||
self.completed_response = openai_responses_api_chunk
|
||||
self._handle_logging_completed_response()
|
||||
|
||||
return openai_responses_api_chunk
|
||||
|
||||
return None
|
||||
except json.JSONDecodeError:
|
||||
# If we can't parse the chunk, continue
|
||||
return None
|
||||
|
||||
def _handle_logging_completed_response(self):
|
||||
"""Base implementation - should be overridden by subclasses"""
|
||||
pass
|
||||
|
||||
|
||||
class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
||||
"""
|
||||
Async iterator for processing streaming responses from the Responses API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
response: httpx.Response,
|
||||
model: str,
|
||||
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
):
|
||||
super().__init__(
|
||||
response,
|
||||
model,
|
||||
responses_api_provider_config,
|
||||
logging_obj,
|
||||
litellm_metadata,
|
||||
custom_llm_provider,
|
||||
)
|
||||
self.stream_iterator = response.aiter_lines()
|
||||
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self) -> ResponsesAPIStreamingResponse:
|
||||
try:
|
||||
while True:
|
||||
# Get the next chunk from the stream
|
||||
try:
|
||||
chunk = await self.stream_iterator.__anext__()
|
||||
except StopAsyncIteration:
|
||||
self.finished = True
|
||||
raise StopAsyncIteration
|
||||
|
||||
result = self._process_chunk(chunk)
|
||||
|
||||
if self.finished:
|
||||
raise StopAsyncIteration
|
||||
elif result is not None:
|
||||
return result
|
||||
# If result is None, continue the loop to get the next chunk
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
# Handle HTTP errors
|
||||
self.finished = True
|
||||
raise e
|
||||
|
||||
def _handle_logging_completed_response(self):
|
||||
"""Handle logging for completed responses in async context"""
|
||||
asyncio.create_task(
|
||||
self.logging_obj.async_success_handler(
|
||||
result=self.completed_response,
|
||||
start_time=self.start_time,
|
||||
end_time=datetime.now(),
|
||||
cache_hit=None,
|
||||
)
|
||||
)
|
||||
|
||||
executor.submit(
|
||||
self.logging_obj.success_handler,
|
||||
result=self.completed_response,
|
||||
cache_hit=None,
|
||||
start_time=self.start_time,
|
||||
end_time=datetime.now(),
|
||||
)
|
||||
|
||||
|
||||
class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
||||
"""
|
||||
Synchronous iterator for processing streaming responses from the Responses API.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
response: httpx.Response,
|
||||
model: str,
|
||||
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
):
|
||||
super().__init__(
|
||||
response,
|
||||
model,
|
||||
responses_api_provider_config,
|
||||
logging_obj,
|
||||
litellm_metadata,
|
||||
custom_llm_provider,
|
||||
)
|
||||
self.stream_iterator = response.iter_lines()
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
while True:
|
||||
# Get the next chunk from the stream
|
||||
try:
|
||||
chunk = next(self.stream_iterator)
|
||||
except StopIteration:
|
||||
self.finished = True
|
||||
raise StopIteration
|
||||
|
||||
result = self._process_chunk(chunk)
|
||||
|
||||
if self.finished:
|
||||
raise StopIteration
|
||||
elif result is not None:
|
||||
return result
|
||||
# If result is None, continue the loop to get the next chunk
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
# Handle HTTP errors
|
||||
self.finished = True
|
||||
raise e
|
||||
|
||||
def _handle_logging_completed_response(self):
|
||||
"""Handle logging for completed responses in sync context"""
|
||||
run_async_function(
|
||||
async_function=self.logging_obj.async_success_handler,
|
||||
result=self.completed_response,
|
||||
start_time=self.start_time,
|
||||
end_time=datetime.now(),
|
||||
cache_hit=None,
|
||||
)
|
||||
|
||||
executor.submit(
|
||||
self.logging_obj.success_handler,
|
||||
result=self.completed_response,
|
||||
cache_hit=None,
|
||||
start_time=self.start_time,
|
||||
end_time=datetime.now(),
|
||||
)
|
||||
|
||||
|
||||
class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
|
||||
"""
|
||||
Mock iterator—fake a stream by slicing the full response text into
|
||||
5 char deltas, then emit a completed event.
|
||||
|
||||
Models like o1-pro don't support streaming, so we fake it.
|
||||
"""
|
||||
|
||||
CHUNK_SIZE = 5
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
response: httpx.Response,
|
||||
model: str,
|
||||
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
):
|
||||
super().__init__(
|
||||
response=response,
|
||||
model=model,
|
||||
responses_api_provider_config=responses_api_provider_config,
|
||||
logging_obj=logging_obj,
|
||||
litellm_metadata=litellm_metadata,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
|
||||
# one-time transform
|
||||
transformed = (
|
||||
self.responses_api_provider_config.transform_response_api_response(
|
||||
model=self.model,
|
||||
raw_response=response,
|
||||
logging_obj=logging_obj,
|
||||
)
|
||||
)
|
||||
full_text = self._collect_text(transformed)
|
||||
|
||||
# build a list of 5‑char delta events
|
||||
deltas = [
|
||||
OutputTextDeltaEvent(
|
||||
type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
|
||||
delta=full_text[i : i + self.CHUNK_SIZE],
|
||||
item_id=transformed.id,
|
||||
output_index=0,
|
||||
content_index=0,
|
||||
)
|
||||
for i in range(0, len(full_text), self.CHUNK_SIZE)
|
||||
]
|
||||
|
||||
# append the completed event
|
||||
self._events = deltas + [
|
||||
ResponseCompletedEvent(
|
||||
type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
|
||||
response=transformed,
|
||||
)
|
||||
]
|
||||
self._idx = 0
|
||||
|
||||
def __aiter__(self):
|
||||
return self
|
||||
|
||||
async def __anext__(self) -> ResponsesAPIStreamingResponse:
|
||||
if self._idx >= len(self._events):
|
||||
raise StopAsyncIteration
|
||||
evt = self._events[self._idx]
|
||||
self._idx += 1
|
||||
return evt
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self) -> ResponsesAPIStreamingResponse:
|
||||
if self._idx >= len(self._events):
|
||||
raise StopIteration
|
||||
evt = self._events[self._idx]
|
||||
self._idx += 1
|
||||
return evt
|
||||
|
||||
def _collect_text(self, resp: ResponsesAPIResponse) -> str:
|
||||
out = ""
|
||||
for out_item in resp.output:
|
||||
if out_item.type == "message":
|
||||
for c in getattr(out_item, "content", []):
|
||||
out += c.text
|
||||
return out
|
||||
204
.venv/lib/python3.10/site-packages/litellm/responses/utils.py
Normal file
204
.venv/lib/python3.10/site-packages/litellm/responses/utils.py
Normal file
@@ -0,0 +1,204 @@
|
||||
import base64
|
||||
from typing import Any, Dict, Optional, Union, cast, get_type_hints
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
|
||||
from litellm.types.llms.openai import (
|
||||
ResponseAPIUsage,
|
||||
ResponsesAPIOptionalRequestParams,
|
||||
ResponsesAPIResponse,
|
||||
)
|
||||
from litellm.types.responses.main import DecodedResponseId
|
||||
from litellm.types.utils import SpecialEnums, Usage
|
||||
|
||||
|
||||
class ResponsesAPIRequestUtils:
|
||||
"""Helper utils for constructing ResponseAPI requests"""
|
||||
|
||||
@staticmethod
|
||||
def get_optional_params_responses_api(
|
||||
model: str,
|
||||
responses_api_provider_config: BaseResponsesAPIConfig,
|
||||
response_api_optional_params: ResponsesAPIOptionalRequestParams,
|
||||
) -> Dict:
|
||||
"""
|
||||
Get optional parameters for the responses API.
|
||||
|
||||
Args:
|
||||
params: Dictionary of all parameters
|
||||
model: The model name
|
||||
responses_api_provider_config: The provider configuration for responses API
|
||||
|
||||
Returns:
|
||||
A dictionary of supported parameters for the responses API
|
||||
"""
|
||||
# Remove None values and internal parameters
|
||||
|
||||
# Get supported parameters for the model
|
||||
supported_params = responses_api_provider_config.get_supported_openai_params(
|
||||
model
|
||||
)
|
||||
|
||||
# Check for unsupported parameters
|
||||
unsupported_params = [
|
||||
param
|
||||
for param in response_api_optional_params
|
||||
if param not in supported_params
|
||||
]
|
||||
|
||||
if unsupported_params:
|
||||
raise litellm.UnsupportedParamsError(
|
||||
model=model,
|
||||
message=f"The following parameters are not supported for model {model}: {', '.join(unsupported_params)}",
|
||||
)
|
||||
|
||||
# Map parameters to provider-specific format
|
||||
mapped_params = responses_api_provider_config.map_openai_params(
|
||||
response_api_optional_params=response_api_optional_params,
|
||||
model=model,
|
||||
drop_params=litellm.drop_params,
|
||||
)
|
||||
|
||||
return mapped_params
|
||||
|
||||
@staticmethod
|
||||
def get_requested_response_api_optional_param(
|
||||
params: Dict[str, Any],
|
||||
) -> ResponsesAPIOptionalRequestParams:
|
||||
"""
|
||||
Filter parameters to only include those defined in ResponsesAPIOptionalRequestParams.
|
||||
|
||||
Args:
|
||||
params: Dictionary of parameters to filter
|
||||
|
||||
Returns:
|
||||
ResponsesAPIOptionalRequestParams instance with only the valid parameters
|
||||
"""
|
||||
valid_keys = get_type_hints(ResponsesAPIOptionalRequestParams).keys()
|
||||
filtered_params = {
|
||||
k: v for k, v in params.items() if k in valid_keys and v is not None
|
||||
}
|
||||
return cast(ResponsesAPIOptionalRequestParams, filtered_params)
|
||||
|
||||
@staticmethod
|
||||
def _update_responses_api_response_id_with_model_id(
|
||||
responses_api_response: ResponsesAPIResponse,
|
||||
custom_llm_provider: Optional[str],
|
||||
litellm_metadata: Optional[Dict[str, Any]] = None,
|
||||
) -> ResponsesAPIResponse:
|
||||
"""
|
||||
Update the responses_api_response_id with model_id and custom_llm_provider
|
||||
|
||||
This builds a composite ID containing the custom LLM provider, model ID, and original response ID
|
||||
"""
|
||||
litellm_metadata = litellm_metadata or {}
|
||||
model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
|
||||
model_id = model_info.get("id")
|
||||
updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
|
||||
model_id=model_id,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
response_id=responses_api_response.id,
|
||||
)
|
||||
|
||||
responses_api_response.id = updated_id
|
||||
return responses_api_response
|
||||
|
||||
@staticmethod
|
||||
def _build_responses_api_response_id(
|
||||
custom_llm_provider: Optional[str],
|
||||
model_id: Optional[str],
|
||||
response_id: str,
|
||||
) -> str:
|
||||
"""Build the responses_api_response_id"""
|
||||
assembled_id: str = str(
|
||||
SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
|
||||
).format(custom_llm_provider, model_id, response_id)
|
||||
base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
|
||||
"utf-8"
|
||||
)
|
||||
return f"resp_{base64_encoded_id}"
|
||||
|
||||
@staticmethod
|
||||
def _decode_responses_api_response_id(
|
||||
response_id: str,
|
||||
) -> DecodedResponseId:
|
||||
"""
|
||||
Decode the responses_api_response_id
|
||||
|
||||
Returns:
|
||||
DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id
|
||||
"""
|
||||
try:
|
||||
# Remove prefix and decode
|
||||
cleaned_id = response_id.replace("resp_", "")
|
||||
decoded_id = base64.b64decode(cleaned_id.encode("utf-8")).decode("utf-8")
|
||||
|
||||
# Parse components using known prefixes
|
||||
if ";" not in decoded_id:
|
||||
return DecodedResponseId(
|
||||
custom_llm_provider=None,
|
||||
model_id=None,
|
||||
response_id=response_id,
|
||||
)
|
||||
|
||||
parts = decoded_id.split(";")
|
||||
|
||||
# Format: litellm:custom_llm_provider:{};model_id:{};response_id:{}
|
||||
custom_llm_provider = None
|
||||
model_id = None
|
||||
|
||||
if (
|
||||
len(parts) >= 3
|
||||
): # Full format with custom_llm_provider, model_id, and response_id
|
||||
custom_llm_provider_part = parts[0]
|
||||
model_id_part = parts[1]
|
||||
response_part = parts[2]
|
||||
|
||||
custom_llm_provider = custom_llm_provider_part.replace(
|
||||
"litellm:custom_llm_provider:", ""
|
||||
)
|
||||
model_id = model_id_part.replace("model_id:", "")
|
||||
decoded_response_id = response_part.replace("response_id:", "")
|
||||
else:
|
||||
decoded_response_id = response_id
|
||||
|
||||
return DecodedResponseId(
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
model_id=model_id,
|
||||
response_id=decoded_response_id,
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
|
||||
return DecodedResponseId(
|
||||
custom_llm_provider=None,
|
||||
model_id=None,
|
||||
response_id=response_id,
|
||||
)
|
||||
|
||||
|
||||
class ResponseAPILoggingUtils:
|
||||
@staticmethod
|
||||
def _is_response_api_usage(usage: Union[dict, ResponseAPIUsage]) -> bool:
|
||||
"""returns True if usage is from OpenAI Response API"""
|
||||
if isinstance(usage, ResponseAPIUsage):
|
||||
return True
|
||||
if "input_tokens" in usage and "output_tokens" in usage:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _transform_response_api_usage_to_chat_usage(
|
||||
usage: Union[dict, ResponseAPIUsage],
|
||||
) -> Usage:
|
||||
"""Tranforms the ResponseAPIUsage object to a Usage object"""
|
||||
response_api_usage: ResponseAPIUsage = (
|
||||
ResponseAPIUsage(**usage) if isinstance(usage, dict) else usage
|
||||
)
|
||||
prompt_tokens: int = response_api_usage.input_tokens or 0
|
||||
completion_tokens: int = response_api_usage.output_tokens or 0
|
||||
return Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=prompt_tokens + completion_tokens,
|
||||
)
|
||||
Reference in New Issue
Block a user