structure saas with tools

2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions
--- a/.venv/lib/python3.10/site-packages/litellm/responses/pycache/main.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/pycache/main.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/responses/pycache/streaming_iterator.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/pycache/streaming_iterator.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/responses/pycache/utils.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/pycache/utils.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/pycache/handler.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/pycache/handler.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/pycache/session_handler.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/pycache/session_handler.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/pycache/streaming_iterator.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/pycache/streaming_iterator.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/pycache/transformation.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/pycache/transformation.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/handler.py
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/handler.py
@@ -0,0 +1,115 @@
+"""
+Handler for transforming responses api requests to litellm.completion requests
+"""
+
+from typing import Any, Coroutine, Optional, Union
+
+import litellm
+from litellm.responses.litellm_completion_transformation.streaming_iterator import (
+    LiteLLMCompletionStreamingIterator,
+)
+from litellm.responses.litellm_completion_transformation.transformation import (
+    LiteLLMCompletionResponsesConfig,
+)
+from litellm.responses.streaming_iterator import BaseResponsesAPIStreamingIterator
+from litellm.types.llms.openai import (
+    ResponseInputParam,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIResponse,
+)
+from litellm.types.utils import ModelResponse
+
+
+class LiteLLMCompletionTransformationHandler:
+
+    def response_api_handler(
+        self,
+        model: str,
+        input: Union[str, ResponseInputParam],
+        responses_api_request: ResponsesAPIOptionalRequestParams,
+        custom_llm_provider: Optional[str] = None,
+        _is_async: bool = False,
+        stream: Optional[bool] = None,
+        **kwargs,
+    ) -> Union[
+        ResponsesAPIResponse,
+        BaseResponsesAPIStreamingIterator,
+        Coroutine[
+            Any, Any, Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]
+        ],
+    ]:
+        litellm_completion_request: dict = (
+            LiteLLMCompletionResponsesConfig.transform_responses_api_request_to_chat_completion_request(
+                model=model,
+                input=input,
+                responses_api_request=responses_api_request,
+                custom_llm_provider=custom_llm_provider,
+                stream=stream,
+                **kwargs,
+            )
+        )
+
+        if _is_async:
+            return self.async_response_api_handler(
+                litellm_completion_request=litellm_completion_request,
+                request_input=input,
+                responses_api_request=responses_api_request,
+                **kwargs,
+            )
+
+        litellm_completion_response: Union[
+            ModelResponse, litellm.CustomStreamWrapper
+        ] = litellm.completion(
+            **litellm_completion_request,
+            **kwargs,
+        )
+
+        if isinstance(litellm_completion_response, ModelResponse):
+            responses_api_response: ResponsesAPIResponse = (
+                LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
+                    chat_completion_response=litellm_completion_response,
+                    request_input=input,
+                    responses_api_request=responses_api_request,
+                )
+            )
+
+            return responses_api_response
+
+        elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
+            return LiteLLMCompletionStreamingIterator(
+                litellm_custom_stream_wrapper=litellm_completion_response,
+                request_input=input,
+                responses_api_request=responses_api_request,
+            )
+
+    async def async_response_api_handler(
+        self,
+        litellm_completion_request: dict,
+        request_input: Union[str, ResponseInputParam],
+        responses_api_request: ResponsesAPIOptionalRequestParams,
+        **kwargs,
+    ) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
+        litellm_completion_response: Union[
+            ModelResponse, litellm.CustomStreamWrapper
+        ] = await litellm.acompletion(
+            **litellm_completion_request,
+            **kwargs,
+        )
+
+        if isinstance(litellm_completion_response, ModelResponse):
+            responses_api_response: ResponsesAPIResponse = (
+                LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
+                    chat_completion_response=litellm_completion_response,
+                    request_input=request_input,
+                    responses_api_request=responses_api_request,
+                )
+            )
+
+            return responses_api_response
+
+        elif isinstance(litellm_completion_response, litellm.CustomStreamWrapper):
+            return LiteLLMCompletionStreamingIterator(
+                litellm_custom_stream_wrapper=litellm_completion_response,
+                request_input=request_input,
+                responses_api_request=responses_api_request,
+            )
--- a/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/session_handler.py
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/session_handler.py
@@ -0,0 +1,59 @@
+"""
+Responses API has previous_response_id, which is the id of the previous response.
+
+LiteLLM needs to maintain a cache of the previous response input, output, previous_response_id, and model.
+
+This class handles that cache.
+"""
+
+from typing import List, Optional, Tuple, Union
+
+from typing_extensions import TypedDict
+
+from litellm.caching import InMemoryCache
+from litellm.types.llms.openai import ResponseInputParam, ResponsesAPIResponse
+
+RESPONSES_API_PREVIOUS_RESPONSES_CACHE = InMemoryCache()
+MAX_PREV_SESSION_INPUTS = 50
+
+
+class ResponsesAPISessionElement(TypedDict, total=False):
+    input: Union[str, ResponseInputParam]
+    output: ResponsesAPIResponse
+    response_id: str
+    previous_response_id: Optional[str]
+
+
+class SessionHandler:
+
+    def add_completed_response_to_cache(
+        self, response_id: str, session_element: ResponsesAPISessionElement
+    ):
+        RESPONSES_API_PREVIOUS_RESPONSES_CACHE.set_cache(
+            key=response_id, value=session_element
+        )
+
+    def get_chain_of_previous_input_output_pairs(
+        self, previous_response_id: str
+    ) -> List[Tuple[ResponseInputParam, ResponsesAPIResponse]]:
+        response_api_inputs: List[Tuple[ResponseInputParam, ResponsesAPIResponse]] = []
+        current_previous_response_id = previous_response_id
+
+        count_session_elements = 0
+        while current_previous_response_id:
+            if count_session_elements > MAX_PREV_SESSION_INPUTS:
+                break
+            session_element = RESPONSES_API_PREVIOUS_RESPONSES_CACHE.get_cache(
+                key=current_previous_response_id
+            )
+            if session_element:
+                response_api_inputs.append(
+                    (session_element.get("input"), session_element.get("output"))
+                )
+                current_previous_response_id = session_element.get(
+                    "previous_response_id"
+                )
+            else:
+                break
+            count_session_elements += 1
+        return response_api_inputs
--- a/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/streaming_iterator.py
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/streaming_iterator.py
@@ -0,0 +1,157 @@
+from typing import List, Optional, Union
+
+import litellm
+from litellm.main import stream_chunk_builder
+from litellm.responses.litellm_completion_transformation.transformation import (
+    LiteLLMCompletionResponsesConfig,
+)
+from litellm.responses.streaming_iterator import ResponsesAPIStreamingIterator
+from litellm.types.llms.openai import (
+    OutputTextDeltaEvent,
+    ResponseCompletedEvent,
+    ResponseInputParam,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIStreamEvents,
+    ResponsesAPIStreamingResponse,
+)
+from litellm.types.utils import Delta as ChatCompletionDelta
+from litellm.types.utils import (
+    ModelResponse,
+    ModelResponseStream,
+    StreamingChoices,
+    TextCompletionResponse,
+)
+
+
+class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
+    """
+    Async iterator for processing streaming responses from the Responses API.
+    """
+
+    def __init__(
+        self,
+        litellm_custom_stream_wrapper: litellm.CustomStreamWrapper,
+        request_input: Union[str, ResponseInputParam],
+        responses_api_request: ResponsesAPIOptionalRequestParams,
+    ):
+        self.litellm_custom_stream_wrapper: litellm.CustomStreamWrapper = (
+            litellm_custom_stream_wrapper
+        )
+        self.request_input: Union[str, ResponseInputParam] = request_input
+        self.responses_api_request: ResponsesAPIOptionalRequestParams = (
+            responses_api_request
+        )
+        self.collected_chat_completion_chunks: List[ModelResponseStream] = []
+        self.finished: bool = False
+
+    async def __anext__(
+        self,
+    ) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
+        try:
+            while True:
+                if self.finished is True:
+                    raise StopAsyncIteration
+                # Get the next chunk from the stream
+                try:
+                    chunk = await self.litellm_custom_stream_wrapper.__anext__()
+                    self.collected_chat_completion_chunks.append(chunk)
+                    response_api_chunk = (
+                        self._transform_chat_completion_chunk_to_response_api_chunk(
+                            chunk
+                        )
+                    )
+                    if response_api_chunk:
+                        return response_api_chunk
+                except StopAsyncIteration:
+                    self.finished = True
+                    response_completed_event = self._emit_response_completed_event()
+                    if response_completed_event:
+                        return response_completed_event
+                    else:
+                        raise StopAsyncIteration
+
+        except Exception as e:
+            # Handle HTTP errors
+            self.finished = True
+            raise e
+
+    def __iter__(self):
+        return self
+
+    def __next__(
+        self,
+    ) -> Union[ResponsesAPIStreamingResponse, ResponseCompletedEvent]:
+        try:
+            while True:
+                if self.finished is True:
+                    raise StopIteration
+                # Get the next chunk from the stream
+                try:
+                    chunk = self.litellm_custom_stream_wrapper.__next__()
+                    self.collected_chat_completion_chunks.append(chunk)
+                    response_api_chunk = (
+                        self._transform_chat_completion_chunk_to_response_api_chunk(
+                            chunk
+                        )
+                    )
+                    if response_api_chunk:
+                        return response_api_chunk
+                except StopIteration:
+                    self.finished = True
+                    response_completed_event = self._emit_response_completed_event()
+                    if response_completed_event:
+                        return response_completed_event
+                    else:
+                        raise StopIteration
+
+        except Exception as e:
+            # Handle HTTP errors
+            self.finished = True
+            raise e
+
+    def _transform_chat_completion_chunk_to_response_api_chunk(
+        self, chunk: ModelResponseStream
+    ) -> Optional[ResponsesAPIStreamingResponse]:
+        """
+        Transform a chat completion chunk to a response API chunk.
+
+        This currently only handles emitting the OutputTextDeltaEvent, which is used by other tools using the responses API.
+        """
+        return OutputTextDeltaEvent(
+            type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
+            item_id=chunk.id,
+            output_index=0,
+            content_index=0,
+            delta=self._get_delta_string_from_streaming_choices(chunk.choices),
+        )
+
+    def _get_delta_string_from_streaming_choices(
+        self, choices: List[StreamingChoices]
+    ) -> str:
+        """
+        Get the delta string from the streaming choices
+
+        For now this collected the first choice's delta string.
+
+        It's unclear how users expect litellm to translate multiple-choices-per-chunk to the responses API output.
+        """
+        choice = choices[0]
+        chat_completion_delta: ChatCompletionDelta = choice.delta
+        return chat_completion_delta.content or ""
+
+    def _emit_response_completed_event(self) -> Optional[ResponseCompletedEvent]:
+        litellm_model_response: Optional[
+            Union[ModelResponse, TextCompletionResponse]
+        ] = stream_chunk_builder(chunks=self.collected_chat_completion_chunks)
+        if litellm_model_response and isinstance(litellm_model_response, ModelResponse):
+
+            return ResponseCompletedEvent(
+                type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
+                response=LiteLLMCompletionResponsesConfig.transform_chat_completion_response_to_responses_api_response(
+                    request_input=self.request_input,
+                    chat_completion_response=litellm_model_response,
+                    responses_api_request=self.responses_api_request,
+                ),
+            )
+        else:
+            return None
--- a/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/transformation.py
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/litellm_completion_transformation/transformation.py
@@ -0,0 +1,664 @@
+"""
+Handles transforming from Responses API -> LiteLLM completion  (Chat Completion API)
+"""
+
+from typing import Any, Dict, List, Optional, Union
+
+from openai.types.responses.tool_param import FunctionToolParam
+
+from litellm.caching import InMemoryCache
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.responses.litellm_completion_transformation.session_handler import (
+    ResponsesAPISessionElement,
+    SessionHandler,
+)
+from litellm.types.llms.openai import (
+    AllMessageValues,
+    ChatCompletionResponseMessage,
+    ChatCompletionSystemMessage,
+    ChatCompletionToolCallChunk,
+    ChatCompletionToolCallFunctionChunk,
+    ChatCompletionToolMessage,
+    ChatCompletionToolParam,
+    ChatCompletionToolParamFunctionChunk,
+    ChatCompletionUserMessage,
+    GenericChatCompletionMessage,
+    Reasoning,
+    ResponseAPIUsage,
+    ResponseInputParam,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIResponse,
+    ResponseTextConfig,
+)
+from litellm.types.responses.main import (
+    GenericResponseOutputItem,
+    GenericResponseOutputItemContentAnnotation,
+    OutputFunctionToolCall,
+    OutputText,
+)
+from litellm.types.utils import (
+    ChatCompletionAnnotation,
+    ChatCompletionMessageToolCall,
+    Choices,
+    Function,
+    Message,
+    ModelResponse,
+    Usage,
+)
+
+########### Initialize Classes used for Responses API  ###########
+TOOL_CALLS_CACHE = InMemoryCache()
+RESPONSES_API_SESSION_HANDLER = SessionHandler()
+########### End of Initialize Classes used for Responses API  ###########
+
+
+class LiteLLMCompletionResponsesConfig:
+    @staticmethod
+    def get_supported_openai_params(model: str) -> list:
+        """
+        LiteLLM Adapter from OpenAI Responses API to Chat Completion API supports a subset of OpenAI Responses API params
+        """
+        return [
+            "input",
+            "model",
+            "instructions",
+            "max_output_tokens",
+            "metadata",
+            "parallel_tool_calls",
+            "previous_response_id",
+            "stream",
+            "temperature",
+            "tool_choice",
+            "tools",
+            "top_p",
+            "user",
+        ]
+
+    @staticmethod
+    def transform_responses_api_request_to_chat_completion_request(
+        model: str,
+        input: Union[str, ResponseInputParam],
+        responses_api_request: ResponsesAPIOptionalRequestParams,
+        custom_llm_provider: Optional[str] = None,
+        stream: Optional[bool] = None,
+        **kwargs,
+    ) -> dict:
+        """
+        Transform a Responses API request into a Chat Completion request
+        """
+        litellm_completion_request: dict = {
+            "messages": LiteLLMCompletionResponsesConfig.transform_responses_api_input_to_messages(
+                input=input,
+                responses_api_request=responses_api_request,
+                previous_response_id=responses_api_request.get("previous_response_id"),
+            ),
+            "model": model,
+            "tool_choice": responses_api_request.get("tool_choice"),
+            "tools": LiteLLMCompletionResponsesConfig.transform_responses_api_tools_to_chat_completion_tools(
+                responses_api_request.get("tools") or []  # type: ignore
+            ),
+            "top_p": responses_api_request.get("top_p"),
+            "user": responses_api_request.get("user"),
+            "temperature": responses_api_request.get("temperature"),
+            "parallel_tool_calls": responses_api_request.get("parallel_tool_calls"),
+            "max_tokens": responses_api_request.get("max_output_tokens"),
+            "stream": stream,
+            "metadata": kwargs.get("metadata"),
+            "service_tier": kwargs.get("service_tier"),
+            # litellm specific params
+            "custom_llm_provider": custom_llm_provider,
+        }
+
+        # Responses API `Completed` events require usage, we pass `stream_options` to litellm.completion to include usage
+        if stream is True:
+            stream_options = {
+                "include_usage": True,
+            }
+            litellm_completion_request["stream_options"] = stream_options
+            litellm_logging_obj: Optional[LiteLLMLoggingObj] = kwargs.get(
+                "litellm_logging_obj"
+            )
+            if litellm_logging_obj:
+                litellm_logging_obj.stream_options = stream_options
+
+        # only pass non-None values
+        litellm_completion_request = {
+            k: v for k, v in litellm_completion_request.items() if v is not None
+        }
+
+        return litellm_completion_request
+
+    @staticmethod
+    def transform_responses_api_input_to_messages(
+        input: Union[str, ResponseInputParam],
+        responses_api_request: ResponsesAPIOptionalRequestParams,
+        previous_response_id: Optional[str] = None,
+    ) -> List[
+        Union[
+            AllMessageValues,
+            GenericChatCompletionMessage,
+            ChatCompletionMessageToolCall,
+            ChatCompletionResponseMessage,
+        ]
+    ]:
+        """
+        Transform a Responses API input into a list of messages
+        """
+        messages: List[
+            Union[
+                AllMessageValues,
+                GenericChatCompletionMessage,
+                ChatCompletionMessageToolCall,
+                ChatCompletionResponseMessage,
+            ]
+        ] = []
+        if responses_api_request.get("instructions"):
+            messages.append(
+                LiteLLMCompletionResponsesConfig.transform_instructions_to_system_message(
+                    responses_api_request.get("instructions")
+                )
+            )
+
+        if previous_response_id:
+            previous_response_pairs = (
+                RESPONSES_API_SESSION_HANDLER.get_chain_of_previous_input_output_pairs(
+                    previous_response_id=previous_response_id
+                )
+            )
+            if previous_response_pairs:
+                for previous_response_pair in previous_response_pairs:
+                    chat_completion_input_messages = LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
+                        input=previous_response_pair[0],
+                    )
+                    chat_completion_output_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_outputs_to_chat_completion_messages(
+                        responses_api_output=previous_response_pair[1],
+                    )
+
+                    messages.extend(chat_completion_input_messages)
+                    messages.extend(chat_completion_output_messages)
+
+        messages.extend(
+            LiteLLMCompletionResponsesConfig._transform_response_input_param_to_chat_completion_message(
+                input=input,
+            )
+        )
+
+        return messages
+
+    @staticmethod
+    def _transform_response_input_param_to_chat_completion_message(
+        input: Union[str, ResponseInputParam],
+    ) -> List[
+        Union[
+            AllMessageValues,
+            GenericChatCompletionMessage,
+            ChatCompletionMessageToolCall,
+            ChatCompletionResponseMessage,
+        ]
+    ]:
+        """
+        Transform a ResponseInputParam into a Chat Completion message
+        """
+        messages: List[
+            Union[
+                AllMessageValues,
+                GenericChatCompletionMessage,
+                ChatCompletionMessageToolCall,
+                ChatCompletionResponseMessage,
+            ]
+        ] = []
+        tool_call_output_messages: List[
+            Union[
+                AllMessageValues,
+                GenericChatCompletionMessage,
+                ChatCompletionMessageToolCall,
+                ChatCompletionResponseMessage,
+            ]
+        ] = []
+
+        if isinstance(input, str):
+            messages.append(ChatCompletionUserMessage(role="user", content=input))
+        elif isinstance(input, list):
+            for _input in input:
+                chat_completion_messages = LiteLLMCompletionResponsesConfig._transform_responses_api_input_item_to_chat_completion_message(
+                    input_item=_input
+                )
+                if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(
+                    input_item=_input
+                ):
+                    tool_call_output_messages.extend(chat_completion_messages)
+                else:
+                    messages.extend(chat_completion_messages)
+
+        messages.extend(tool_call_output_messages)
+        return messages
+
+    @staticmethod
+    def _ensure_tool_call_output_has_corresponding_tool_call(
+        messages: List[Union[AllMessageValues, GenericChatCompletionMessage]],
+    ) -> bool:
+        """
+        If any tool call output is present, ensure there is a corresponding tool call/tool_use block
+        """
+        for message in messages:
+            if message.get("role") == "tool":
+                return True
+        return False
+
+    @staticmethod
+    def _transform_responses_api_input_item_to_chat_completion_message(
+        input_item: Any,
+    ) -> List[
+        Union[
+            AllMessageValues,
+            GenericChatCompletionMessage,
+            ChatCompletionResponseMessage,
+        ]
+    ]:
+        """
+        Transform a Responses API input item into a Chat Completion message
+
+        - EasyInputMessageParam
+        - Message
+        - ResponseOutputMessageParam
+        - ResponseFileSearchToolCallParam
+        - ResponseComputerToolCallParam
+        - ComputerCallOutput
+        - ResponseFunctionWebSearchParam
+        - ResponseFunctionToolCallParam
+        - FunctionCallOutput
+        - ResponseReasoningItemParam
+        - ItemReference
+        """
+        if LiteLLMCompletionResponsesConfig._is_input_item_tool_call_output(input_item):
+            # handle executed tool call results
+            return LiteLLMCompletionResponsesConfig._transform_responses_api_tool_call_output_to_chat_completion_message(
+                tool_call_output=input_item
+            )
+        else:
+            return [
+                GenericChatCompletionMessage(
+                    role=input_item.get("role") or "user",
+                    content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
+                        input_item.get("content")
+                    ),
+                )
+            ]
+
+    @staticmethod
+    def _is_input_item_tool_call_output(input_item: Any) -> bool:
+        """
+        Check if the input item is a tool call output
+        """
+        return input_item.get("type") in [
+            "function_call_output",
+            "web_search_call",
+            "computer_call_output",
+        ]
+
+    @staticmethod
+    def _transform_responses_api_tool_call_output_to_chat_completion_message(
+        tool_call_output: Dict[str, Any],
+    ) -> List[
+        Union[
+            AllMessageValues,
+            GenericChatCompletionMessage,
+            ChatCompletionResponseMessage,
+        ]
+    ]:
+        """
+        ChatCompletionToolMessage is used to indicate the output from a tool call
+        """
+        tool_output_message = ChatCompletionToolMessage(
+            role="tool",
+            content=tool_call_output.get("output") or "",
+            tool_call_id=tool_call_output.get("call_id") or "",
+        )
+
+        _tool_use_definition = TOOL_CALLS_CACHE.get_cache(
+            key=tool_call_output.get("call_id") or "",
+        )
+        if _tool_use_definition:
+            """
+            Append the tool use definition to the list of messages
+
+
+            Providers like Anthropic require the tool use definition to be included with the tool output
+
+            - Input:
+                {'function':
+                    arguments:'{"command": ["echo","<html>\\n<head>\\n  <title>Hello</title>\\n</head>\\n<body>\\n  <h1>Hi</h1>\\n</body>\\n</html>",">","index.html"]}',
+                    name='shell',
+                    'id': 'toolu_018KFWsEySHjdKZPdUzXpymJ',
+                    'type': 'function'
+                }
+            - Output:
+                {
+                    "id": "toolu_018KFWsEySHjdKZPdUzXpymJ",
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": "{\"latitude\":48.8566,\"longitude\":2.3522}"
+                        }
+                }
+
+            """
+            function: dict = _tool_use_definition.get("function") or {}
+            tool_call_chunk = ChatCompletionToolCallChunk(
+                id=_tool_use_definition.get("id") or "",
+                type=_tool_use_definition.get("type") or "function",
+                function=ChatCompletionToolCallFunctionChunk(
+                    name=function.get("name") or "",
+                    arguments=function.get("arguments") or "",
+                ),
+                index=0,
+            )
+            chat_completion_response_message = ChatCompletionResponseMessage(
+                tool_calls=[tool_call_chunk],
+                role="assistant",
+            )
+            return [chat_completion_response_message, tool_output_message]
+
+        return [tool_output_message]
+
+    @staticmethod
+    def _transform_responses_api_content_to_chat_completion_content(
+        content: Any,
+    ) -> Union[str, List[Union[str, Dict[str, Any]]]]:
+        """
+        Transform a Responses API content into a Chat Completion content
+        """
+
+        if isinstance(content, str):
+            return content
+        elif isinstance(content, list):
+            content_list: List[Union[str, Dict[str, Any]]] = []
+            for item in content:
+                if isinstance(item, str):
+                    content_list.append(item)
+                elif isinstance(item, dict):
+                    content_list.append(
+                        {
+                            "type": LiteLLMCompletionResponsesConfig._get_chat_completion_request_content_type(
+                                item.get("type") or "text"
+                            ),
+                            "text": item.get("text"),
+                        }
+                    )
+            return content_list
+        else:
+            raise ValueError(f"Invalid content type: {type(content)}")
+
+    @staticmethod
+    def _get_chat_completion_request_content_type(content_type: str) -> str:
+        """
+        Get the Chat Completion request content type
+        """
+        # Responses API content has `input_` prefix, if it exists, remove it
+        if content_type.startswith("input_"):
+            return content_type[len("input_") :]
+        else:
+            return content_type
+
+    @staticmethod
+    def transform_instructions_to_system_message(
+        instructions: Optional[str],
+    ) -> ChatCompletionSystemMessage:
+        """
+        Transform a Instructions into a system message
+        """
+        return ChatCompletionSystemMessage(role="system", content=instructions or "")
+
+    @staticmethod
+    def transform_responses_api_tools_to_chat_completion_tools(
+        tools: Optional[List[FunctionToolParam]],
+    ) -> List[ChatCompletionToolParam]:
+        """
+        Transform a Responses API tools into a Chat Completion tools
+        """
+        if tools is None:
+            return []
+        chat_completion_tools: List[ChatCompletionToolParam] = []
+        for tool in tools:
+            chat_completion_tools.append(
+                ChatCompletionToolParam(
+                    type="function",
+                    function=ChatCompletionToolParamFunctionChunk(
+                        name=tool["name"],
+                        description=tool.get("description") or "",
+                        parameters=tool.get("parameters", {}),
+                        strict=tool.get("strict", False),
+                    ),
+                )
+            )
+        return chat_completion_tools
+
+    @staticmethod
+    def transform_chat_completion_tools_to_responses_tools(
+        chat_completion_response: ModelResponse,
+    ) -> List[OutputFunctionToolCall]:
+        """
+        Transform a Chat Completion tools into a Responses API tools
+        """
+        all_chat_completion_tools: List[ChatCompletionMessageToolCall] = []
+        for choice in chat_completion_response.choices:
+            if isinstance(choice, Choices):
+                if choice.message.tool_calls:
+                    all_chat_completion_tools.extend(choice.message.tool_calls)
+                    for tool_call in choice.message.tool_calls:
+                        TOOL_CALLS_CACHE.set_cache(
+                            key=tool_call.id,
+                            value=tool_call,
+                        )
+
+        responses_tools: List[OutputFunctionToolCall] = []
+        for tool in all_chat_completion_tools:
+            if tool.type == "function":
+                function_definition = tool.function
+                responses_tools.append(
+                    OutputFunctionToolCall(
+                        name=function_definition.name or "",
+                        arguments=function_definition.get("arguments") or "",
+                        call_id=tool.id or "",
+                        id=tool.id or "",
+                        type="function_call",  # critical this is "function_call" to work with tools like openai codex
+                        status=function_definition.get("status") or "completed",
+                    )
+                )
+        return responses_tools
+
+    @staticmethod
+    def transform_chat_completion_response_to_responses_api_response(
+        request_input: Union[str, ResponseInputParam],
+        responses_api_request: ResponsesAPIOptionalRequestParams,
+        chat_completion_response: ModelResponse,
+    ) -> ResponsesAPIResponse:
+        """
+        Transform a Chat Completion response into a Responses API response
+        """
+        responses_api_response: ResponsesAPIResponse = ResponsesAPIResponse(
+            id=chat_completion_response.id,
+            created_at=chat_completion_response.created,
+            model=chat_completion_response.model,
+            object=chat_completion_response.object,
+            error=getattr(chat_completion_response, "error", None),
+            incomplete_details=getattr(
+                chat_completion_response, "incomplete_details", None
+            ),
+            instructions=getattr(chat_completion_response, "instructions", None),
+            metadata=getattr(chat_completion_response, "metadata", {}),
+            output=LiteLLMCompletionResponsesConfig._transform_chat_completion_choices_to_responses_output(
+                chat_completion_response=chat_completion_response,
+                choices=getattr(chat_completion_response, "choices", []),
+            ),
+            parallel_tool_calls=getattr(
+                chat_completion_response, "parallel_tool_calls", False
+            ),
+            temperature=getattr(chat_completion_response, "temperature", 0),
+            tool_choice=getattr(chat_completion_response, "tool_choice", "auto"),
+            tools=getattr(chat_completion_response, "tools", []),
+            top_p=getattr(chat_completion_response, "top_p", None),
+            max_output_tokens=getattr(
+                chat_completion_response, "max_output_tokens", None
+            ),
+            previous_response_id=getattr(
+                chat_completion_response, "previous_response_id", None
+            ),
+            reasoning=Reasoning(),
+            status=getattr(chat_completion_response, "status", "completed"),
+            text=ResponseTextConfig(),
+            truncation=getattr(chat_completion_response, "truncation", None),
+            usage=LiteLLMCompletionResponsesConfig._transform_chat_completion_usage_to_responses_usage(
+                chat_completion_response=chat_completion_response
+            ),
+            user=getattr(chat_completion_response, "user", None),
+        )
+
+        RESPONSES_API_SESSION_HANDLER.add_completed_response_to_cache(
+            response_id=responses_api_response.id,
+            session_element=ResponsesAPISessionElement(
+                input=request_input,
+                output=responses_api_response,
+                response_id=responses_api_response.id,
+                previous_response_id=responses_api_request.get("previous_response_id"),
+            ),
+        )
+        return responses_api_response
+
+    @staticmethod
+    def _transform_chat_completion_choices_to_responses_output(
+        chat_completion_response: ModelResponse,
+        choices: List[Choices],
+    ) -> List[Union[GenericResponseOutputItem, OutputFunctionToolCall]]:
+        responses_output: List[
+            Union[GenericResponseOutputItem, OutputFunctionToolCall]
+        ] = []
+        for choice in choices:
+            responses_output.append(
+                GenericResponseOutputItem(
+                    type="message",
+                    id=chat_completion_response.id,
+                    status=choice.finish_reason,
+                    role=choice.message.role,
+                    content=[
+                        LiteLLMCompletionResponsesConfig._transform_chat_message_to_response_output_text(
+                            choice.message
+                        )
+                    ],
+                )
+            )
+
+        tool_calls = LiteLLMCompletionResponsesConfig.transform_chat_completion_tools_to_responses_tools(
+            chat_completion_response=chat_completion_response
+        )
+        responses_output.extend(tool_calls)
+        return responses_output
+
+    @staticmethod
+    def _transform_responses_api_outputs_to_chat_completion_messages(
+        responses_api_output: ResponsesAPIResponse,
+    ) -> List[
+        Union[
+            AllMessageValues,
+            GenericChatCompletionMessage,
+            ChatCompletionMessageToolCall,
+        ]
+    ]:
+        messages: List[
+            Union[
+                AllMessageValues,
+                GenericChatCompletionMessage,
+                ChatCompletionMessageToolCall,
+            ]
+        ] = []
+        output_items = responses_api_output.output
+        for _output_item in output_items:
+            output_item: dict = dict(_output_item)
+            if output_item.get("type") == "function_call":
+                # handle function call output
+                messages.append(
+                    LiteLLMCompletionResponsesConfig._transform_responses_output_tool_call_to_chat_completion_output_tool_call(
+                        tool_call=output_item
+                    )
+                )
+            else:
+                # transform as generic ResponseOutputItem
+                messages.append(
+                    GenericChatCompletionMessage(
+                        role=str(output_item.get("role")) or "user",
+                        content=LiteLLMCompletionResponsesConfig._transform_responses_api_content_to_chat_completion_content(
+                            output_item.get("content")
+                        ),
+                    )
+                )
+        return messages
+
+    @staticmethod
+    def _transform_responses_output_tool_call_to_chat_completion_output_tool_call(
+        tool_call: dict,
+    ) -> ChatCompletionMessageToolCall:
+        return ChatCompletionMessageToolCall(
+            id=tool_call.get("id") or "",
+            type="function",
+            function=Function(
+                name=tool_call.get("name") or "",
+                arguments=tool_call.get("arguments") or "",
+            ),
+        )
+
+    @staticmethod
+    def _transform_chat_message_to_response_output_text(
+        message: Message,
+    ) -> OutputText:
+        return OutputText(
+            type="output_text",
+            text=message.content,
+            annotations=LiteLLMCompletionResponsesConfig._transform_chat_completion_annotations_to_response_output_annotations(
+                annotations=getattr(message, "annotations", None)
+            ),
+        )
+
+    @staticmethod
+    def _transform_chat_completion_annotations_to_response_output_annotations(
+        annotations: Optional[List[ChatCompletionAnnotation]],
+    ) -> List[GenericResponseOutputItemContentAnnotation]:
+        response_output_annotations: List[
+            GenericResponseOutputItemContentAnnotation
+        ] = []
+
+        if annotations is None:
+            return response_output_annotations
+
+        for annotation in annotations:
+            annotation_type = annotation.get("type")
+            if annotation_type == "url_citation" and "url_citation" in annotation:
+                url_citation = annotation["url_citation"]
+                response_output_annotations.append(
+                    GenericResponseOutputItemContentAnnotation(
+                        type=annotation_type,
+                        start_index=url_citation.get("start_index"),
+                        end_index=url_citation.get("end_index"),
+                        url=url_citation.get("url"),
+                        title=url_citation.get("title"),
+                    )
+                )
+            # Handle other annotation types here
+
+        return response_output_annotations
+
+    @staticmethod
+    def _transform_chat_completion_usage_to_responses_usage(
+        chat_completion_response: ModelResponse,
+    ) -> ResponseAPIUsage:
+        usage: Optional[Usage] = getattr(chat_completion_response, "usage", None)
+        if usage is None:
+            return ResponseAPIUsage(
+                input_tokens=0,
+                output_tokens=0,
+                total_tokens=0,
+            )
+        return ResponseAPIUsage(
+            input_tokens=usage.prompt_tokens,
+            output_tokens=usage.completion_tokens,
+            total_tokens=usage.total_tokens,
+        )
--- a/.venv/lib/python3.10/site-packages/litellm/responses/main.py
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/main.py
@@ -0,0 +1,436 @@
+import asyncio
+import contextvars
+from functools import partial
+from typing import Any, Coroutine, Dict, Iterable, List, Literal, Optional, Union
+
+import httpx
+
+import litellm
+from litellm.constants import request_timeout
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+from litellm.responses.litellm_completion_transformation.handler import (
+    LiteLLMCompletionTransformationHandler,
+)
+from litellm.responses.utils import ResponsesAPIRequestUtils
+from litellm.types.llms.openai import (
+    Reasoning,
+    ResponseIncludable,
+    ResponseInputParam,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIResponse,
+    ResponseTextConfigParam,
+    ToolChoice,
+    ToolParam,
+)
+from litellm.types.responses.main import *
+from litellm.types.router import GenericLiteLLMParams
+from litellm.utils import ProviderConfigManager, client
+
+from .streaming_iterator import BaseResponsesAPIStreamingIterator
+
+####### ENVIRONMENT VARIABLES ###################
+# Initialize any necessary instances or variables here
+base_llm_http_handler = BaseLLMHTTPHandler()
+litellm_completion_transformation_handler = LiteLLMCompletionTransformationHandler()
+#################################################
+
+
+@client
+async def aresponses(
+    input: Union[str, ResponseInputParam],
+    model: str,
+    include: Optional[List[ResponseIncludable]] = None,
+    instructions: Optional[str] = None,
+    max_output_tokens: Optional[int] = None,
+    metadata: Optional[Dict[str, Any]] = None,
+    parallel_tool_calls: Optional[bool] = None,
+    previous_response_id: Optional[str] = None,
+    reasoning: Optional[Reasoning] = None,
+    store: Optional[bool] = None,
+    stream: Optional[bool] = None,
+    temperature: Optional[float] = None,
+    text: Optional[ResponseTextConfigParam] = None,
+    tool_choice: Optional[ToolChoice] = None,
+    tools: Optional[Iterable[ToolParam]] = None,
+    top_p: Optional[float] = None,
+    truncation: Optional[Literal["auto", "disabled"]] = None,
+    user: Optional[str] = None,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    # LiteLLM specific params,
+    custom_llm_provider: Optional[str] = None,
+    **kwargs,
+) -> Union[ResponsesAPIResponse, BaseResponsesAPIStreamingIterator]:
+    """
+    Async: Handles responses API requests by reusing the synchronous function
+    """
+    local_vars = locals()
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["aresponses"] = True
+
+        # get custom llm provider so we can use this for mapping exceptions
+        if custom_llm_provider is None:
+            _, custom_llm_provider, _, _ = litellm.get_llm_provider(
+                model=model, api_base=local_vars.get("base_url", None)
+            )
+
+        func = partial(
+            responses,
+            input=input,
+            model=model,
+            include=include,
+            instructions=instructions,
+            max_output_tokens=max_output_tokens,
+            metadata=metadata,
+            parallel_tool_calls=parallel_tool_calls,
+            previous_response_id=previous_response_id,
+            reasoning=reasoning,
+            store=store,
+            stream=stream,
+            temperature=temperature,
+            text=text,
+            tool_choice=tool_choice,
+            tools=tools,
+            top_p=top_p,
+            truncation=truncation,
+            user=user,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            custom_llm_provider=custom_llm_provider,
+            **kwargs,
+        )
+
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response
+
+        # Update the responses_api_response_id with the model_id
+        if isinstance(response, ResponsesAPIResponse):
+            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
+                responses_api_response=response,
+                litellm_metadata=kwargs.get("litellm_metadata", {}),
+                custom_llm_provider=custom_llm_provider,
+            )
+        return response
+    except Exception as e:
+        raise litellm.exception_type(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=local_vars,
+            extra_kwargs=kwargs,
+        )
+
+
+@client
+def responses(
+    input: Union[str, ResponseInputParam],
+    model: str,
+    include: Optional[List[ResponseIncludable]] = None,
+    instructions: Optional[str] = None,
+    max_output_tokens: Optional[int] = None,
+    metadata: Optional[Dict[str, Any]] = None,
+    parallel_tool_calls: Optional[bool] = None,
+    previous_response_id: Optional[str] = None,
+    reasoning: Optional[Reasoning] = None,
+    store: Optional[bool] = None,
+    stream: Optional[bool] = None,
+    temperature: Optional[float] = None,
+    text: Optional[ResponseTextConfigParam] = None,
+    tool_choice: Optional[ToolChoice] = None,
+    tools: Optional[Iterable[ToolParam]] = None,
+    top_p: Optional[float] = None,
+    truncation: Optional[Literal["auto", "disabled"]] = None,
+    user: Optional[str] = None,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    # LiteLLM specific params,
+    custom_llm_provider: Optional[str] = None,
+    **kwargs,
+):
+    """
+    Synchronous version of the Responses API.
+    Uses the synchronous HTTP handler to make requests.
+    """
+    local_vars = locals()
+    try:
+        litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj")  # type: ignore
+        litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
+        _is_async = kwargs.pop("aresponses", False) is True
+
+        # get llm provider logic
+        litellm_params = GenericLiteLLMParams(**kwargs)
+        (
+            model,
+            custom_llm_provider,
+            dynamic_api_key,
+            dynamic_api_base,
+        ) = litellm.get_llm_provider(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            api_base=litellm_params.api_base,
+            api_key=litellm_params.api_key,
+        )
+
+        # get provider config
+        responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
+            ProviderConfigManager.get_provider_responses_api_config(
+                model=model,
+                provider=litellm.LlmProviders(custom_llm_provider),
+            )
+        )
+
+        local_vars.update(kwargs)
+        # Get ResponsesAPIOptionalRequestParams with only valid parameters
+        response_api_optional_params: ResponsesAPIOptionalRequestParams = (
+            ResponsesAPIRequestUtils.get_requested_response_api_optional_param(
+                local_vars
+            )
+        )
+
+        if responses_api_provider_config is None:
+            return litellm_completion_transformation_handler.response_api_handler(
+                model=model,
+                input=input,
+                responses_api_request=response_api_optional_params,
+                custom_llm_provider=custom_llm_provider,
+                _is_async=_is_async,
+                stream=stream,
+                **kwargs,
+            )
+
+        # Get optional parameters for the responses API
+        responses_api_request_params: Dict = (
+            ResponsesAPIRequestUtils.get_optional_params_responses_api(
+                model=model,
+                responses_api_provider_config=responses_api_provider_config,
+                response_api_optional_params=response_api_optional_params,
+            )
+        )
+
+        # Pre Call logging
+        litellm_logging_obj.update_environment_variables(
+            model=model,
+            user=user,
+            optional_params=dict(responses_api_request_params),
+            litellm_params={
+                "litellm_call_id": litellm_call_id,
+                **responses_api_request_params,
+            },
+            custom_llm_provider=custom_llm_provider,
+        )
+
+        # Call the handler with _is_async flag instead of directly calling the async handler
+        response = base_llm_http_handler.response_api_handler(
+            model=model,
+            input=input,
+            responses_api_provider_config=responses_api_provider_config,
+            response_api_optional_request_params=responses_api_request_params,
+            custom_llm_provider=custom_llm_provider,
+            litellm_params=litellm_params,
+            logging_obj=litellm_logging_obj,
+            extra_headers=extra_headers,
+            extra_body=extra_body,
+            timeout=timeout or request_timeout,
+            _is_async=_is_async,
+            client=kwargs.get("client"),
+            fake_stream=responses_api_provider_config.should_fake_stream(
+                model=model, stream=stream, custom_llm_provider=custom_llm_provider
+            ),
+            litellm_metadata=kwargs.get("litellm_metadata", {}),
+        )
+
+        # Update the responses_api_response_id with the model_id
+        if isinstance(response, ResponsesAPIResponse):
+            response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
+                responses_api_response=response,
+                litellm_metadata=kwargs.get("litellm_metadata", {}),
+                custom_llm_provider=custom_llm_provider,
+            )
+
+        return response
+    except Exception as e:
+        raise litellm.exception_type(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=local_vars,
+            extra_kwargs=kwargs,
+        )
+
+
+@client
+async def adelete_responses(
+    response_id: str,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    # LiteLLM specific params,
+    custom_llm_provider: Optional[str] = None,
+    **kwargs,
+) -> DeleteResponseResult:
+    """
+    Async version of the DELETE Responses API
+
+    DELETE /v1/responses/{response_id} endpoint in the responses API
+
+    """
+    local_vars = locals()
+    try:
+        loop = asyncio.get_event_loop()
+        kwargs["adelete_responses"] = True
+
+        # get custom llm provider from response_id
+        decoded_response_id: DecodedResponseId = (
+            ResponsesAPIRequestUtils._decode_responses_api_response_id(
+                response_id=response_id,
+            )
+        )
+        response_id = decoded_response_id.get("response_id") or response_id
+        custom_llm_provider = (
+            decoded_response_id.get("custom_llm_provider") or custom_llm_provider
+        )
+
+        func = partial(
+            delete_responses,
+            response_id=response_id,
+            custom_llm_provider=custom_llm_provider,
+            extra_headers=extra_headers,
+            extra_query=extra_query,
+            extra_body=extra_body,
+            timeout=timeout,
+            **kwargs,
+        )
+
+        ctx = contextvars.copy_context()
+        func_with_context = partial(ctx.run, func)
+        init_response = await loop.run_in_executor(None, func_with_context)
+
+        if asyncio.iscoroutine(init_response):
+            response = await init_response
+        else:
+            response = init_response
+        return response
+    except Exception as e:
+        raise litellm.exception_type(
+            model=None,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=local_vars,
+            extra_kwargs=kwargs,
+        )
+
+
+@client
+def delete_responses(
+    response_id: str,
+    # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+    # The extra values given here take precedence over values defined on the client or passed to this method.
+    extra_headers: Optional[Dict[str, Any]] = None,
+    extra_query: Optional[Dict[str, Any]] = None,
+    extra_body: Optional[Dict[str, Any]] = None,
+    timeout: Optional[Union[float, httpx.Timeout]] = None,
+    # LiteLLM specific params,
+    custom_llm_provider: Optional[str] = None,
+    **kwargs,
+) -> Union[DeleteResponseResult, Coroutine[Any, Any, DeleteResponseResult]]:
+    """
+    Synchronous version of the DELETE Responses API
+
+    DELETE /v1/responses/{response_id} endpoint in the responses API
+
+    """
+    local_vars = locals()
+    try:
+        litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj")  # type: ignore
+        litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
+        _is_async = kwargs.pop("adelete_responses", False) is True
+
+        # get llm provider logic
+        litellm_params = GenericLiteLLMParams(**kwargs)
+
+        # get custom llm provider from response_id
+        decoded_response_id: DecodedResponseId = (
+            ResponsesAPIRequestUtils._decode_responses_api_response_id(
+                response_id=response_id,
+            )
+        )
+        response_id = decoded_response_id.get("response_id") or response_id
+        custom_llm_provider = (
+            decoded_response_id.get("custom_llm_provider") or custom_llm_provider
+        )
+
+        if custom_llm_provider is None:
+            raise ValueError("custom_llm_provider is required but passed as None")
+
+        # get provider config
+        responses_api_provider_config: Optional[BaseResponsesAPIConfig] = (
+            ProviderConfigManager.get_provider_responses_api_config(
+                model=None,
+                provider=litellm.LlmProviders(custom_llm_provider),
+            )
+        )
+
+        if responses_api_provider_config is None:
+            raise ValueError(
+                f"DELETE responses is not supported for {custom_llm_provider}"
+            )
+
+        local_vars.update(kwargs)
+
+        # Pre Call logging
+        litellm_logging_obj.update_environment_variables(
+            model=None,
+            optional_params={
+                "response_id": response_id,
+            },
+            litellm_params={
+                "litellm_call_id": litellm_call_id,
+            },
+            custom_llm_provider=custom_llm_provider,
+        )
+
+        # Call the handler with _is_async flag instead of directly calling the async handler
+        response = base_llm_http_handler.delete_response_api_handler(
+            response_id=response_id,
+            custom_llm_provider=custom_llm_provider,
+            responses_api_provider_config=responses_api_provider_config,
+            litellm_params=litellm_params,
+            logging_obj=litellm_logging_obj,
+            extra_headers=extra_headers,
+            extra_body=extra_body,
+            timeout=timeout or request_timeout,
+            _is_async=_is_async,
+            client=kwargs.get("client"),
+        )
+
+        return response
+    except Exception as e:
+        raise litellm.exception_type(
+            model=None,
+            custom_llm_provider=custom_llm_provider,
+            original_exception=e,
+            completion_kwargs=local_vars,
+            extra_kwargs=kwargs,
+        )
--- a/.venv/lib/python3.10/site-packages/litellm/responses/streaming_iterator.py
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/streaming_iterator.py
@@ -0,0 +1,336 @@
+import asyncio
+import json
+from datetime import datetime
+from typing import Any, Dict, Optional
+
+import httpx
+
+from litellm.constants import STREAM_SSE_DONE_STRING
+from litellm.litellm_core_utils.asyncify import run_async_function
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.litellm_core_utils.thread_pool_executor import executor
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.responses.utils import ResponsesAPIRequestUtils
+from litellm.types.llms.openai import (
+    OutputTextDeltaEvent,
+    ResponseCompletedEvent,
+    ResponsesAPIResponse,
+    ResponsesAPIStreamEvents,
+    ResponsesAPIStreamingResponse,
+)
+from litellm.utils import CustomStreamWrapper
+
+
+class BaseResponsesAPIStreamingIterator:
+    """
+    Base class for streaming iterators that process responses from the Responses API.
+
+    This class contains shared logic for both synchronous and asynchronous iterators.
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+        litellm_metadata: Optional[Dict[str, Any]] = None,
+        custom_llm_provider: Optional[str] = None,
+    ):
+        self.response = response
+        self.model = model
+        self.logging_obj = logging_obj
+        self.finished = False
+        self.responses_api_provider_config = responses_api_provider_config
+        self.completed_response: Optional[ResponsesAPIStreamingResponse] = None
+        self.start_time = datetime.now()
+        
+        # set request kwargs
+        self.litellm_metadata = litellm_metadata
+        self.custom_llm_provider = custom_llm_provider
+
+    def _process_chunk(self, chunk) -> Optional[ResponsesAPIStreamingResponse]:
+        """Process a single chunk of data from the stream"""
+        if not chunk:
+            return None
+
+        # Handle SSE format (data: {...})
+        chunk = CustomStreamWrapper._strip_sse_data_from_chunk(chunk)
+        if chunk is None:
+            return None
+
+        # Handle "[DONE]" marker
+        if chunk == STREAM_SSE_DONE_STRING:
+            self.finished = True
+            return None
+
+        try:
+            # Parse the JSON chunk
+            parsed_chunk = json.loads(chunk)
+
+            # Format as ResponsesAPIStreamingResponse
+            if isinstance(parsed_chunk, dict):
+                openai_responses_api_chunk = (
+                    self.responses_api_provider_config.transform_streaming_response(
+                        model=self.model,
+                        parsed_chunk=parsed_chunk,
+                        logging_obj=self.logging_obj,
+                    )
+                )
+
+                # if "response" in parsed_chunk, then encode litellm specific information like custom_llm_provider
+                response_object = getattr(openai_responses_api_chunk, "response", None)
+                if response_object:
+                    response = ResponsesAPIRequestUtils._update_responses_api_response_id_with_model_id(
+                        responses_api_response=response_object,
+                        litellm_metadata=self.litellm_metadata,
+                        custom_llm_provider=self.custom_llm_provider,
+                    )
+                    setattr(openai_responses_api_chunk, "response", response)
+
+                # Store the completed response
+                if (
+                    openai_responses_api_chunk
+                    and openai_responses_api_chunk.type
+                    == ResponsesAPIStreamEvents.RESPONSE_COMPLETED
+                ):
+                    self.completed_response = openai_responses_api_chunk
+                    self._handle_logging_completed_response()
+
+                return openai_responses_api_chunk
+
+            return None
+        except json.JSONDecodeError:
+            # If we can't parse the chunk, continue
+            return None
+
+    def _handle_logging_completed_response(self):
+        """Base implementation - should be overridden by subclasses"""
+        pass
+
+
+class ResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
+    """
+    Async iterator for processing streaming responses from the Responses API.
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+        litellm_metadata: Optional[Dict[str, Any]] = None,
+        custom_llm_provider: Optional[str] = None,
+    ):
+        super().__init__(
+            response,
+            model,
+            responses_api_provider_config,
+            logging_obj,
+            litellm_metadata,
+            custom_llm_provider,
+        )
+        self.stream_iterator = response.aiter_lines()
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self) -> ResponsesAPIStreamingResponse:
+        try:
+            while True:
+                # Get the next chunk from the stream
+                try:
+                    chunk = await self.stream_iterator.__anext__()
+                except StopAsyncIteration:
+                    self.finished = True
+                    raise StopAsyncIteration
+
+                result = self._process_chunk(chunk)
+
+                if self.finished:
+                    raise StopAsyncIteration
+                elif result is not None:
+                    return result
+                # If result is None, continue the loop to get the next chunk
+
+        except httpx.HTTPError as e:
+            # Handle HTTP errors
+            self.finished = True
+            raise e
+
+    def _handle_logging_completed_response(self):
+        """Handle logging for completed responses in async context"""
+        asyncio.create_task(
+            self.logging_obj.async_success_handler(
+                result=self.completed_response,
+                start_time=self.start_time,
+                end_time=datetime.now(),
+                cache_hit=None,
+            )
+        )
+
+        executor.submit(
+            self.logging_obj.success_handler,
+            result=self.completed_response,
+            cache_hit=None,
+            start_time=self.start_time,
+            end_time=datetime.now(),
+        )
+
+
+class SyncResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
+    """
+    Synchronous iterator for processing streaming responses from the Responses API.
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+        litellm_metadata: Optional[Dict[str, Any]] = None,
+        custom_llm_provider: Optional[str] = None,
+    ):
+        super().__init__(
+            response,
+            model,
+            responses_api_provider_config,
+            logging_obj,
+            litellm_metadata,
+            custom_llm_provider,
+        )
+        self.stream_iterator = response.iter_lines()
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        try:
+            while True:
+                # Get the next chunk from the stream
+                try:
+                    chunk = next(self.stream_iterator)
+                except StopIteration:
+                    self.finished = True
+                    raise StopIteration
+
+                result = self._process_chunk(chunk)
+
+                if self.finished:
+                    raise StopIteration
+                elif result is not None:
+                    return result
+                # If result is None, continue the loop to get the next chunk
+
+        except httpx.HTTPError as e:
+            # Handle HTTP errors
+            self.finished = True
+            raise e
+
+    def _handle_logging_completed_response(self):
+        """Handle logging for completed responses in sync context"""
+        run_async_function(
+            async_function=self.logging_obj.async_success_handler,
+            result=self.completed_response,
+            start_time=self.start_time,
+            end_time=datetime.now(),
+            cache_hit=None,
+        )
+
+        executor.submit(
+            self.logging_obj.success_handler,
+            result=self.completed_response,
+            cache_hit=None,
+            start_time=self.start_time,
+            end_time=datetime.now(),
+        )
+
+
+class MockResponsesAPIStreamingIterator(BaseResponsesAPIStreamingIterator):
+    """
+    Mock iterator—fake a stream by slicing the full response text into
+    5 char deltas, then emit a completed event.
+
+    Models like o1-pro don't support streaming, so we fake it.
+    """
+
+    CHUNK_SIZE = 5
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        logging_obj: LiteLLMLoggingObj,
+        litellm_metadata: Optional[Dict[str, Any]] = None,
+        custom_llm_provider: Optional[str] = None,
+    ):
+        super().__init__(
+            response=response,
+            model=model,
+            responses_api_provider_config=responses_api_provider_config,
+            logging_obj=logging_obj,
+            litellm_metadata=litellm_metadata,
+            custom_llm_provider=custom_llm_provider,
+        )
+
+        # one-time transform
+        transformed = (
+            self.responses_api_provider_config.transform_response_api_response(
+                model=self.model,
+                raw_response=response,
+                logging_obj=logging_obj,
+            )
+        )
+        full_text = self._collect_text(transformed)
+
+        # build a list of 5‑char delta events
+        deltas = [
+            OutputTextDeltaEvent(
+                type=ResponsesAPIStreamEvents.OUTPUT_TEXT_DELTA,
+                delta=full_text[i : i + self.CHUNK_SIZE],
+                item_id=transformed.id,
+                output_index=0,
+                content_index=0,
+            )
+            for i in range(0, len(full_text), self.CHUNK_SIZE)
+        ]
+
+        # append the completed event
+        self._events = deltas + [
+            ResponseCompletedEvent(
+                type=ResponsesAPIStreamEvents.RESPONSE_COMPLETED,
+                response=transformed,
+            )
+        ]
+        self._idx = 0
+
+    def __aiter__(self):
+        return self
+
+    async def __anext__(self) -> ResponsesAPIStreamingResponse:
+        if self._idx >= len(self._events):
+            raise StopAsyncIteration
+        evt = self._events[self._idx]
+        self._idx += 1
+        return evt
+
+    def __iter__(self):
+        return self
+
+    def __next__(self) -> ResponsesAPIStreamingResponse:
+        if self._idx >= len(self._events):
+            raise StopIteration
+        evt = self._events[self._idx]
+        self._idx += 1
+        return evt
+
+    def _collect_text(self, resp: ResponsesAPIResponse) -> str:
+        out = ""
+        for out_item in resp.output:
+            if out_item.type == "message":
+                for c in getattr(out_item, "content", []):
+                    out += c.text
+        return out
--- a/.venv/lib/python3.10/site-packages/litellm/responses/utils.py
+++ b/.venv/lib/python3.10/site-packages/litellm/responses/utils.py
@@ -0,0 +1,204 @@
+import base64
+from typing import Any, Dict, Optional, Union, cast, get_type_hints
+
+import litellm
+from litellm._logging import verbose_logger
+from litellm.llms.base_llm.responses.transformation import BaseResponsesAPIConfig
+from litellm.types.llms.openai import (
+    ResponseAPIUsage,
+    ResponsesAPIOptionalRequestParams,
+    ResponsesAPIResponse,
+)
+from litellm.types.responses.main import DecodedResponseId
+from litellm.types.utils import SpecialEnums, Usage
+
+
+class ResponsesAPIRequestUtils:
+    """Helper utils for constructing ResponseAPI requests"""
+
+    @staticmethod
+    def get_optional_params_responses_api(
+        model: str,
+        responses_api_provider_config: BaseResponsesAPIConfig,
+        response_api_optional_params: ResponsesAPIOptionalRequestParams,
+    ) -> Dict:
+        """
+        Get optional parameters for the responses API.
+
+        Args:
+            params: Dictionary of all parameters
+            model: The model name
+            responses_api_provider_config: The provider configuration for responses API
+
+        Returns:
+            A dictionary of supported parameters for the responses API
+        """
+        # Remove None values and internal parameters
+
+        # Get supported parameters for the model
+        supported_params = responses_api_provider_config.get_supported_openai_params(
+            model
+        )
+
+        # Check for unsupported parameters
+        unsupported_params = [
+            param
+            for param in response_api_optional_params
+            if param not in supported_params
+        ]
+
+        if unsupported_params:
+            raise litellm.UnsupportedParamsError(
+                model=model,
+                message=f"The following parameters are not supported for model {model}: {', '.join(unsupported_params)}",
+            )
+
+        # Map parameters to provider-specific format
+        mapped_params = responses_api_provider_config.map_openai_params(
+            response_api_optional_params=response_api_optional_params,
+            model=model,
+            drop_params=litellm.drop_params,
+        )
+
+        return mapped_params
+
+    @staticmethod
+    def get_requested_response_api_optional_param(
+        params: Dict[str, Any],
+    ) -> ResponsesAPIOptionalRequestParams:
+        """
+        Filter parameters to only include those defined in ResponsesAPIOptionalRequestParams.
+
+        Args:
+            params: Dictionary of parameters to filter
+
+        Returns:
+            ResponsesAPIOptionalRequestParams instance with only the valid parameters
+        """
+        valid_keys = get_type_hints(ResponsesAPIOptionalRequestParams).keys()
+        filtered_params = {
+            k: v for k, v in params.items() if k in valid_keys and v is not None
+        }
+        return cast(ResponsesAPIOptionalRequestParams, filtered_params)
+
+    @staticmethod
+    def _update_responses_api_response_id_with_model_id(
+        responses_api_response: ResponsesAPIResponse,
+        custom_llm_provider: Optional[str],
+        litellm_metadata: Optional[Dict[str, Any]] = None,
+    ) -> ResponsesAPIResponse:
+        """
+        Update the responses_api_response_id with model_id and custom_llm_provider
+
+        This builds a composite ID containing the custom LLM provider, model ID, and original response ID
+        """
+        litellm_metadata = litellm_metadata or {}
+        model_info: Dict[str, Any] = litellm_metadata.get("model_info", {}) or {}
+        model_id = model_info.get("id")
+        updated_id = ResponsesAPIRequestUtils._build_responses_api_response_id(
+            model_id=model_id,
+            custom_llm_provider=custom_llm_provider,
+            response_id=responses_api_response.id,
+        )
+
+        responses_api_response.id = updated_id
+        return responses_api_response
+
+    @staticmethod
+    def _build_responses_api_response_id(
+        custom_llm_provider: Optional[str],
+        model_id: Optional[str],
+        response_id: str,
+    ) -> str:
+        """Build the responses_api_response_id"""
+        assembled_id: str = str(
+            SpecialEnums.LITELLM_MANAGED_RESPONSE_COMPLETE_STR.value
+        ).format(custom_llm_provider, model_id, response_id)
+        base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode(
+            "utf-8"
+        )
+        return f"resp_{base64_encoded_id}"
+
+    @staticmethod
+    def _decode_responses_api_response_id(
+        response_id: str,
+    ) -> DecodedResponseId:
+        """
+        Decode the responses_api_response_id
+
+        Returns:
+            DecodedResponseId: Structured tuple with custom_llm_provider, model_id, and response_id
+        """
+        try:
+            # Remove prefix and decode
+            cleaned_id = response_id.replace("resp_", "")
+            decoded_id = base64.b64decode(cleaned_id.encode("utf-8")).decode("utf-8")
+
+            # Parse components using known prefixes
+            if ";" not in decoded_id:
+                return DecodedResponseId(
+                    custom_llm_provider=None,
+                    model_id=None,
+                    response_id=response_id,
+                )
+
+            parts = decoded_id.split(";")
+
+            # Format: litellm:custom_llm_provider:{};model_id:{};response_id:{}
+            custom_llm_provider = None
+            model_id = None
+
+            if (
+                len(parts) >= 3
+            ):  # Full format with custom_llm_provider, model_id, and response_id
+                custom_llm_provider_part = parts[0]
+                model_id_part = parts[1]
+                response_part = parts[2]
+
+                custom_llm_provider = custom_llm_provider_part.replace(
+                    "litellm:custom_llm_provider:", ""
+                )
+                model_id = model_id_part.replace("model_id:", "")
+                decoded_response_id = response_part.replace("response_id:", "")
+            else:
+                decoded_response_id = response_id
+
+            return DecodedResponseId(
+                custom_llm_provider=custom_llm_provider,
+                model_id=model_id,
+                response_id=decoded_response_id,
+            )
+        except Exception as e:
+            verbose_logger.debug(f"Error decoding response_id '{response_id}': {e}")
+            return DecodedResponseId(
+                custom_llm_provider=None,
+                model_id=None,
+                response_id=response_id,
+            )
+
+
+class ResponseAPILoggingUtils:
+    @staticmethod
+    def _is_response_api_usage(usage: Union[dict, ResponseAPIUsage]) -> bool:
+        """returns True if usage is from OpenAI Response API"""
+        if isinstance(usage, ResponseAPIUsage):
+            return True
+        if "input_tokens" in usage and "output_tokens" in usage:
+            return True
+        return False
+
+    @staticmethod
+    def _transform_response_api_usage_to_chat_usage(
+        usage: Union[dict, ResponseAPIUsage],
+    ) -> Usage:
+        """Tranforms the ResponseAPIUsage object to a Usage object"""
+        response_api_usage: ResponseAPIUsage = (
+            ResponseAPIUsage(**usage) if isinstance(usage, dict) else usage
+        )
+        prompt_tokens: int = response_api_usage.input_tokens or 0
+        completion_tokens: int = response_api_usage.output_tokens or 0
+        return Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )