structure saas with tools

2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/pycache/common_utils.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/pycache/common_utils.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/pycache/cost_calculation.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/pycache/cost_calculation.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/init.py
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/init.py
@@ -0,0 +1 @@
+from .handler import AnthropicChatCompletion, ModelResponseIterator
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/pycache/init.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/pycache/init.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/pycache/handler.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/pycache/handler.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/pycache/transformation.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/pycache/transformation.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/handler.py
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/handler.py
@@ -0,0 +1,846 @@
+"""
+Calling + translation logic for anthropic's `/v1/messages` endpoint
+"""
+
+import copy
+import json
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
+
+import httpx  # type: ignore
+
+import litellm
+import litellm.litellm_core_utils
+import litellm.types
+import litellm.types.utils
+from litellm import LlmProviders
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
+from litellm.llms.base_llm.chat.transformation import BaseConfig
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    HTTPHandler,
+    get_async_httpx_client,
+)
+from litellm.types.llms.anthropic import (
+    ContentBlockDelta,
+    ContentBlockStart,
+    ContentBlockStop,
+    MessageBlockDelta,
+    MessageStartBlock,
+    UsageDelta,
+)
+from litellm.types.llms.openai import (
+    ChatCompletionRedactedThinkingBlock,
+    ChatCompletionThinkingBlock,
+    ChatCompletionToolCallChunk,
+)
+from litellm.types.utils import (
+    Delta,
+    GenericStreamingChunk,
+    ModelResponseStream,
+    StreamingChoices,
+    Usage,
+)
+from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
+
+from ...base import BaseLLM
+from ..common_utils import AnthropicError, process_anthropic_headers
+from .transformation import AnthropicConfig
+
+
+async def make_call(
+    client: Optional[AsyncHTTPHandler],
+    api_base: str,
+    headers: dict,
+    data: str,
+    model: str,
+    messages: list,
+    logging_obj,
+    timeout: Optional[Union[float, httpx.Timeout]],
+    json_mode: bool,
+) -> Tuple[Any, httpx.Headers]:
+    if client is None:
+        client = litellm.module_level_aclient
+
+    try:
+        response = await client.post(
+            api_base, headers=headers, data=data, stream=True, timeout=timeout
+        )
+    except httpx.HTTPStatusError as e:
+        error_headers = getattr(e, "headers", None)
+        error_response = getattr(e, "response", None)
+        if error_headers is None and error_response:
+            error_headers = getattr(error_response, "headers", None)
+        raise AnthropicError(
+            status_code=e.response.status_code,
+            message=await e.response.aread(),
+            headers=error_headers,
+        )
+    except Exception as e:
+        for exception in litellm.LITELLM_EXCEPTION_TYPES:
+            if isinstance(e, exception):
+                raise e
+        raise AnthropicError(status_code=500, message=str(e))
+
+    completion_stream = ModelResponseIterator(
+        streaming_response=response.aiter_lines(),
+        sync_stream=False,
+        json_mode=json_mode,
+    )
+
+    # LOGGING
+    logging_obj.post_call(
+        input=messages,
+        api_key="",
+        original_response=completion_stream,  # Pass the completion stream for logging
+        additional_args={"complete_input_dict": data},
+    )
+
+    return completion_stream, response.headers
+
+
+def make_sync_call(
+    client: Optional[HTTPHandler],
+    api_base: str,
+    headers: dict,
+    data: str,
+    model: str,
+    messages: list,
+    logging_obj,
+    timeout: Optional[Union[float, httpx.Timeout]],
+    json_mode: bool,
+) -> Tuple[Any, httpx.Headers]:
+    if client is None:
+        client = litellm.module_level_client  # re-use a module level client
+
+    try:
+        response = client.post(
+            api_base, headers=headers, data=data, stream=True, timeout=timeout
+        )
+    except httpx.HTTPStatusError as e:
+        error_headers = getattr(e, "headers", None)
+        error_response = getattr(e, "response", None)
+        if error_headers is None and error_response:
+            error_headers = getattr(error_response, "headers", None)
+        raise AnthropicError(
+            status_code=e.response.status_code,
+            message=e.response.read(),
+            headers=error_headers,
+        )
+    except Exception as e:
+        for exception in litellm.LITELLM_EXCEPTION_TYPES:
+            if isinstance(e, exception):
+                raise e
+        raise AnthropicError(status_code=500, message=str(e))
+
+    if response.status_code != 200:
+        response_headers = getattr(response, "headers", None)
+        raise AnthropicError(
+            status_code=response.status_code,
+            message=response.read(),
+            headers=response_headers,
+        )
+
+    completion_stream = ModelResponseIterator(
+        streaming_response=response.iter_lines(), sync_stream=True, json_mode=json_mode
+    )
+
+    # LOGGING
+    logging_obj.post_call(
+        input=messages,
+        api_key="",
+        original_response="first stream response received",
+        additional_args={"complete_input_dict": data},
+    )
+
+    return completion_stream, response.headers
+
+
+class AnthropicChatCompletion(BaseLLM):
+    def __init__(self) -> None:
+        super().__init__()
+
+    async def acompletion_stream_function(
+        self,
+        model: str,
+        messages: list,
+        api_base: str,
+        custom_prompt_dict: dict,
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        timeout: Union[float, httpx.Timeout],
+        client: Optional[AsyncHTTPHandler],
+        encoding,
+        api_key,
+        logging_obj,
+        stream,
+        _is_function_call,
+        data: dict,
+        json_mode: bool,
+        optional_params=None,
+        litellm_params=None,
+        logger_fn=None,
+        headers={},
+    ):
+        data["stream"] = True
+
+        completion_stream, headers = await make_call(
+            client=client,
+            api_base=api_base,
+            headers=headers,
+            data=json.dumps(data),
+            model=model,
+            messages=messages,
+            logging_obj=logging_obj,
+            timeout=timeout,
+            json_mode=json_mode,
+        )
+        streamwrapper = CustomStreamWrapper(
+            completion_stream=completion_stream,
+            model=model,
+            custom_llm_provider="anthropic",
+            logging_obj=logging_obj,
+            _response_headers=process_anthropic_headers(headers),
+        )
+        return streamwrapper
+
+    async def acompletion_function(
+        self,
+        model: str,
+        messages: list,
+        api_base: str,
+        custom_prompt_dict: dict,
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        timeout: Union[float, httpx.Timeout],
+        encoding,
+        api_key,
+        logging_obj,
+        stream,
+        _is_function_call,
+        data: dict,
+        optional_params: dict,
+        json_mode: bool,
+        litellm_params: dict,
+        provider_config: BaseConfig,
+        logger_fn=None,
+        headers={},
+        client: Optional[AsyncHTTPHandler] = None,
+    ) -> Union[ModelResponse, CustomStreamWrapper]:
+        async_handler = client or get_async_httpx_client(
+            llm_provider=litellm.LlmProviders.ANTHROPIC
+        )
+
+        try:
+            response = await async_handler.post(
+                api_base, headers=headers, json=data, timeout=timeout
+            )
+        except Exception as e:
+            ## LOGGING
+            logging_obj.post_call(
+                input=messages,
+                api_key=api_key,
+                original_response=str(e),
+                additional_args={"complete_input_dict": data},
+            )
+            status_code = getattr(e, "status_code", 500)
+            error_headers = getattr(e, "headers", None)
+            error_text = getattr(e, "text", str(e))
+            error_response = getattr(e, "response", None)
+            if error_headers is None and error_response:
+                error_headers = getattr(error_response, "headers", None)
+            if error_response and hasattr(error_response, "text"):
+                error_text = getattr(error_response, "text", error_text)
+            raise AnthropicError(
+                message=error_text,
+                status_code=status_code,
+                headers=error_headers,
+            )
+
+        return provider_config.transform_response(
+            model=model,
+            raw_response=response,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            api_key=api_key,
+            request_data=data,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            encoding=encoding,
+            json_mode=json_mode,
+        )
+
+    def completion(
+        self,
+        model: str,
+        messages: list,
+        api_base: str,
+        custom_llm_provider: str,
+        custom_prompt_dict: dict,
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        encoding,
+        api_key,
+        logging_obj,
+        optional_params: dict,
+        timeout: Union[float, httpx.Timeout],
+        litellm_params: dict,
+        acompletion=None,
+        logger_fn=None,
+        headers={},
+        client=None,
+    ):
+        optional_params = copy.deepcopy(optional_params)
+        stream = optional_params.pop("stream", None)
+        json_mode: bool = optional_params.pop("json_mode", False)
+        is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
+        _is_function_call = False
+        messages = copy.deepcopy(messages)
+        headers = AnthropicConfig().validate_environment(
+            api_key=api_key,
+            headers=headers,
+            model=model,
+            messages=messages,
+            optional_params={**optional_params, "is_vertex_request": is_vertex_request},
+            litellm_params=litellm_params,
+        )
+
+        config = ProviderConfigManager.get_provider_chat_config(
+            model=model,
+            provider=LlmProviders(custom_llm_provider),
+        )
+        if config is None:
+            raise ValueError(
+                f"Provider config not found for model: {model} and provider: {custom_llm_provider}"
+            )
+
+        data = config.transform_request(
+            model=model,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            headers=headers,
+        )
+
+        ## LOGGING
+        logging_obj.pre_call(
+            input=messages,
+            api_key=api_key,
+            additional_args={
+                "complete_input_dict": data,
+                "api_base": api_base,
+                "headers": headers,
+            },
+        )
+        print_verbose(f"_is_function_call: {_is_function_call}")
+        if acompletion is True:
+            if (
+                stream is True
+            ):  # if function call - fake the streaming (need complete blocks for output parsing in openai format)
+                print_verbose("makes async anthropic streaming POST request")
+                data["stream"] = stream
+                return self.acompletion_stream_function(
+                    model=model,
+                    messages=messages,
+                    data=data,
+                    api_base=api_base,
+                    custom_prompt_dict=custom_prompt_dict,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    encoding=encoding,
+                    api_key=api_key,
+                    logging_obj=logging_obj,
+                    optional_params=optional_params,
+                    stream=stream,
+                    _is_function_call=_is_function_call,
+                    json_mode=json_mode,
+                    litellm_params=litellm_params,
+                    logger_fn=logger_fn,
+                    headers=headers,
+                    timeout=timeout,
+                    client=(
+                        client
+                        if client is not None and isinstance(client, AsyncHTTPHandler)
+                        else None
+                    ),
+                )
+            else:
+                return self.acompletion_function(
+                    model=model,
+                    messages=messages,
+                    data=data,
+                    api_base=api_base,
+                    custom_prompt_dict=custom_prompt_dict,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    encoding=encoding,
+                    api_key=api_key,
+                    provider_config=config,
+                    logging_obj=logging_obj,
+                    optional_params=optional_params,
+                    stream=stream,
+                    _is_function_call=_is_function_call,
+                    litellm_params=litellm_params,
+                    logger_fn=logger_fn,
+                    headers=headers,
+                    client=client,
+                    json_mode=json_mode,
+                    timeout=timeout,
+                )
+        else:
+            ## COMPLETION CALL
+            if (
+                stream is True
+            ):  # if function call - fake the streaming (need complete blocks for output parsing in openai format)
+                data["stream"] = stream
+                completion_stream, headers = make_sync_call(
+                    client=client,
+                    api_base=api_base,
+                    headers=headers,  # type: ignore
+                    data=json.dumps(data),
+                    model=model,
+                    messages=messages,
+                    logging_obj=logging_obj,
+                    timeout=timeout,
+                    json_mode=json_mode,
+                )
+                return CustomStreamWrapper(
+                    completion_stream=completion_stream,
+                    model=model,
+                    custom_llm_provider="anthropic",
+                    logging_obj=logging_obj,
+                    _response_headers=process_anthropic_headers(headers),
+                )
+
+            else:
+                if client is None or not isinstance(client, HTTPHandler):
+                    client = HTTPHandler(timeout=timeout)  # type: ignore
+                else:
+                    client = client
+
+                try:
+                    response = client.post(
+                        api_base,
+                        headers=headers,
+                        data=json.dumps(data),
+                        timeout=timeout,
+                    )
+                except Exception as e:
+                    status_code = getattr(e, "status_code", 500)
+                    error_headers = getattr(e, "headers", None)
+                    error_text = getattr(e, "text", str(e))
+                    error_response = getattr(e, "response", None)
+                    if error_headers is None and error_response:
+                        error_headers = getattr(error_response, "headers", None)
+                    if error_response and hasattr(error_response, "text"):
+                        error_text = getattr(error_response, "text", error_text)
+                    raise AnthropicError(
+                        message=error_text,
+                        status_code=status_code,
+                        headers=error_headers,
+                    )
+
+        return config.transform_response(
+            model=model,
+            raw_response=response,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            api_key=api_key,
+            request_data=data,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            encoding=encoding,
+            json_mode=json_mode,
+        )
+
+    def embedding(self):
+        # logic for parsing in - calling - parsing out model embedding calls
+        pass
+
+
+class ModelResponseIterator:
+    def __init__(
+        self, streaming_response, sync_stream: bool, json_mode: Optional[bool] = False
+    ):
+        self.streaming_response = streaming_response
+        self.response_iterator = self.streaming_response
+        self.content_blocks: List[ContentBlockDelta] = []
+        self.tool_index = -1
+        self.json_mode = json_mode
+
+    def check_empty_tool_call_args(self) -> bool:
+        """
+        Check if the tool call block so far has been an empty string
+        """
+        args = ""
+        # if text content block -> skip
+        if len(self.content_blocks) == 0:
+            return False
+
+        if (
+            self.content_blocks[0]["delta"]["type"] == "text_delta"
+            or self.content_blocks[0]["delta"]["type"] == "thinking_delta"
+        ):
+            return False
+
+        for block in self.content_blocks:
+            if block["delta"]["type"] == "input_json_delta":
+                args += block["delta"].get("partial_json", "")  # type: ignore
+
+        if len(args) == 0:
+            return True
+        return False
+
+    def _handle_usage(self, anthropic_usage_chunk: Union[dict, UsageDelta]) -> Usage:
+        return AnthropicConfig().calculate_usage(
+            usage_object=cast(dict, anthropic_usage_chunk), reasoning_content=None
+        )
+
+    def _content_block_delta_helper(
+        self, chunk: dict
+    ) -> Tuple[
+        str,
+        Optional[ChatCompletionToolCallChunk],
+        List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]],
+        Dict[str, Any],
+    ]:
+        """
+        Helper function to handle the content block delta
+        """
+        text = ""
+        tool_use: Optional[ChatCompletionToolCallChunk] = None
+        provider_specific_fields = {}
+        content_block = ContentBlockDelta(**chunk)  # type: ignore
+        thinking_blocks: List[
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+        ] = []
+
+        self.content_blocks.append(content_block)
+        if "text" in content_block["delta"]:
+            text = content_block["delta"]["text"]
+        elif "partial_json" in content_block["delta"]:
+            tool_use = {
+                "id": None,
+                "type": "function",
+                "function": {
+                    "name": None,
+                    "arguments": content_block["delta"]["partial_json"],
+                },
+                "index": self.tool_index,
+            }
+        elif "citation" in content_block["delta"]:
+            provider_specific_fields["citation"] = content_block["delta"]["citation"]
+        elif (
+            "thinking" in content_block["delta"]
+            or "signature" in content_block["delta"]
+        ):
+            thinking_blocks = [
+                ChatCompletionThinkingBlock(
+                    type="thinking",
+                    thinking=content_block["delta"].get("thinking") or "",
+                    signature=content_block["delta"].get("signature"),
+                )
+            ]
+            provider_specific_fields["thinking_blocks"] = thinking_blocks
+
+        return text, tool_use, thinking_blocks, provider_specific_fields
+
+    def _handle_reasoning_content(
+        self,
+        thinking_blocks: List[
+            Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+        ],
+    ) -> Optional[str]:
+        """
+        Handle the reasoning content
+        """
+        reasoning_content = None
+        for block in thinking_blocks:
+            thinking_content = cast(Optional[str], block.get("thinking"))
+            if reasoning_content is None:
+                reasoning_content = ""
+            if thinking_content is not None:
+                reasoning_content += thinking_content
+        return reasoning_content
+
+    def chunk_parser(self, chunk: dict) -> ModelResponseStream:
+        try:
+            type_chunk = chunk.get("type", "") or ""
+
+            text = ""
+            tool_use: Optional[ChatCompletionToolCallChunk] = None
+            finish_reason = ""
+            usage: Optional[Usage] = None
+            provider_specific_fields: Dict[str, Any] = {}
+            reasoning_content: Optional[str] = None
+            thinking_blocks: Optional[
+                List[
+                    Union[
+                        ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
+                    ]
+                ]
+            ] = None
+
+            index = int(chunk.get("index", 0))
+            if type_chunk == "content_block_delta":
+                """
+                Anthropic content chunk
+                chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
+                """
+                (
+                    text,
+                    tool_use,
+                    thinking_blocks,
+                    provider_specific_fields,
+                ) = self._content_block_delta_helper(chunk=chunk)
+                if thinking_blocks:
+                    reasoning_content = self._handle_reasoning_content(
+                        thinking_blocks=thinking_blocks
+                    )
+            elif type_chunk == "content_block_start":
+                """
+                event: content_block_start
+                data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}}
+                """
+                content_block_start = ContentBlockStart(**chunk)  # type: ignore
+                self.content_blocks = []  # reset content blocks when new block starts
+                if content_block_start["content_block"]["type"] == "text":
+                    text = content_block_start["content_block"]["text"]
+                elif content_block_start["content_block"]["type"] == "tool_use":
+                    self.tool_index += 1
+                    tool_use = {
+                        "id": content_block_start["content_block"]["id"],
+                        "type": "function",
+                        "function": {
+                            "name": content_block_start["content_block"]["name"],
+                            "arguments": "",
+                        },
+                        "index": self.tool_index,
+                    }
+                elif (
+                    content_block_start["content_block"]["type"] == "redacted_thinking"
+                ):
+                    thinking_blocks = [
+                        ChatCompletionRedactedThinkingBlock(
+                            type="redacted_thinking",
+                            data=content_block_start["content_block"]["data"],
+                        )
+                    ]
+            elif type_chunk == "content_block_stop":
+                ContentBlockStop(**chunk)  # type: ignore
+                # check if tool call content block
+                is_empty = self.check_empty_tool_call_args()
+
+                if is_empty:
+                    tool_use = {
+                        "id": None,
+                        "type": "function",
+                        "function": {
+                            "name": None,
+                            "arguments": "{}",
+                        },
+                        "index": self.tool_index,
+                    }
+            elif type_chunk == "message_delta":
+                """
+                Anthropic
+                chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}}
+                """
+                # TODO - get usage from this chunk, set in response
+                message_delta = MessageBlockDelta(**chunk)  # type: ignore
+                finish_reason = map_finish_reason(
+                    finish_reason=message_delta["delta"].get("stop_reason", "stop")
+                    or "stop"
+                )
+                usage = self._handle_usage(anthropic_usage_chunk=message_delta["usage"])
+            elif type_chunk == "message_start":
+                """
+                Anthropic
+                chunk = {
+                    "type": "message_start",
+                    "message": {
+                        "id": "msg_vrtx_011PqREFEMzd3REdCoUFAmdG",
+                        "type": "message",
+                        "role": "assistant",
+                        "model": "claude-3-sonnet-20240229",
+                        "content": [],
+                        "stop_reason": null,
+                        "stop_sequence": null,
+                        "usage": {
+                            "input_tokens": 270,
+                            "output_tokens": 1
+                        }
+                    }
+                }
+                """
+                message_start_block = MessageStartBlock(**chunk)  # type: ignore
+                if "usage" in message_start_block["message"]:
+                    usage = self._handle_usage(
+                        anthropic_usage_chunk=message_start_block["message"]["usage"]
+                    )
+            elif type_chunk == "error":
+                """
+                {"type":"error","error":{"details":null,"type":"api_error","message":"Internal server error"}      }
+                """
+                _error_dict = chunk.get("error", {}) or {}
+                message = _error_dict.get("message", None) or str(chunk)
+                raise AnthropicError(
+                    message=message,
+                    status_code=500,  # it looks like Anthropic API does not return a status code in the chunk error - default to 500
+                )
+
+            text, tool_use = self._handle_json_mode_chunk(text=text, tool_use=tool_use)
+
+            returned_chunk = ModelResponseStream(
+                choices=[
+                    StreamingChoices(
+                        index=index,
+                        delta=Delta(
+                            content=text,
+                            tool_calls=[tool_use] if tool_use is not None else None,
+                            provider_specific_fields=(
+                                provider_specific_fields
+                                if provider_specific_fields
+                                else None
+                            ),
+                            thinking_blocks=(
+                                thinking_blocks if thinking_blocks else None
+                            ),
+                            reasoning_content=reasoning_content,
+                        ),
+                        finish_reason=finish_reason,
+                    )
+                ],
+                usage=usage,
+            )
+
+            return returned_chunk
+
+        except json.JSONDecodeError:
+            raise ValueError(f"Failed to decode JSON from chunk: {chunk}")
+
+    def _handle_json_mode_chunk(
+        self, text: str, tool_use: Optional[ChatCompletionToolCallChunk]
+    ) -> Tuple[str, Optional[ChatCompletionToolCallChunk]]:
+        """
+        If JSON mode is enabled, convert the tool call to a message.
+
+        Anthropic returns the JSON schema as part of the tool call
+        OpenAI returns the JSON schema as part of the content, this handles placing it in the content
+
+        Args:
+            text: str
+            tool_use: Optional[ChatCompletionToolCallChunk]
+        Returns:
+            Tuple[str, Optional[ChatCompletionToolCallChunk]]
+
+            text: The text to use in the content
+            tool_use: The ChatCompletionToolCallChunk to use in the chunk response
+        """
+        if self.json_mode is True and tool_use is not None:
+            message = AnthropicConfig._convert_tool_response_to_message(
+                tool_calls=[tool_use]
+            )
+            if message is not None:
+                text = message.content or ""
+                tool_use = None
+
+        return text, tool_use
+
+    # Sync iterator
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        try:
+            chunk = self.response_iterator.__next__()
+        except StopIteration:
+            raise StopIteration
+        except ValueError as e:
+            raise RuntimeError(f"Error receiving chunk from stream: {e}")
+
+        try:
+            str_line = chunk
+            if isinstance(chunk, bytes):  # Handle binary data
+                str_line = chunk.decode("utf-8")  # Convert bytes to string
+                index = str_line.find("data:")
+                if index != -1:
+                    str_line = str_line[index:]
+
+            if str_line.startswith("data:"):
+                data_json = json.loads(str_line[5:])
+                return self.chunk_parser(chunk=data_json)
+            else:
+                return GenericStreamingChunk(
+                    text="",
+                    is_finished=False,
+                    finish_reason="",
+                    usage=None,
+                    index=0,
+                    tool_use=None,
+                )
+        except StopIteration:
+            raise StopIteration
+        except ValueError as e:
+            raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
+
+    # Async iterator
+    def __aiter__(self):
+        self.async_response_iterator = self.streaming_response.__aiter__()
+        return self
+
+    async def __anext__(self):
+        try:
+            chunk = await self.async_response_iterator.__anext__()
+        except StopAsyncIteration:
+            raise StopAsyncIteration
+        except ValueError as e:
+            raise RuntimeError(f"Error receiving chunk from stream: {e}")
+
+        try:
+            str_line = chunk
+            if isinstance(chunk, bytes):  # Handle binary data
+                str_line = chunk.decode("utf-8")  # Convert bytes to string
+                index = str_line.find("data:")
+                if index != -1:
+                    str_line = str_line[index:]
+
+            if str_line.startswith("data:"):
+                data_json = json.loads(str_line[5:])
+                return self.chunk_parser(chunk=data_json)
+            else:
+                return GenericStreamingChunk(
+                    text="",
+                    is_finished=False,
+                    finish_reason="",
+                    usage=None,
+                    index=0,
+                    tool_use=None,
+                )
+        except StopAsyncIteration:
+            raise StopAsyncIteration
+        except ValueError as e:
+            raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
+
+    def convert_str_chunk_to_generic_chunk(self, chunk: str) -> ModelResponseStream:
+        """
+        Convert a string chunk to a GenericStreamingChunk
+
+        Note: This is used for Anthropic pass through streaming logging
+
+        We can move __anext__, and __next__ to use this function since it's common logic.
+        Did not migrate them to minmize changes made in 1 PR.
+        """
+        str_line = chunk
+        if isinstance(chunk, bytes):  # Handle binary data
+            str_line = chunk.decode("utf-8")  # Convert bytes to string
+            index = str_line.find("data:")
+            if index != -1:
+                str_line = str_line[index:]
+
+        if str_line.startswith("data:"):
+            data_json = json.loads(str_line[5:])
+            return self.chunk_parser(chunk=data_json)
+        else:
+            return ModelResponseStream()
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/transformation.py
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/chat/transformation.py
@@ -0,0 +1,823 @@
+import json
+import time
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
+
+import httpx
+
+import litellm
+from litellm.constants import (
+    DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS,
+    DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
+    DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
+    DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
+    RESPONSE_FORMAT_TOOL_NAME,
+)
+from litellm.litellm_core_utils.core_helpers import map_finish_reason
+from litellm.litellm_core_utils.prompt_templates.factory import anthropic_messages_pt
+from litellm.llms.base_llm.base_utils import type_to_response_format_param
+from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
+from litellm.types.llms.anthropic import (
+    AllAnthropicToolsValues,
+    AnthropicComputerTool,
+    AnthropicHostedTools,
+    AnthropicInputSchema,
+    AnthropicMessagesTool,
+    AnthropicMessagesToolChoice,
+    AnthropicSystemMessageContent,
+    AnthropicThinkingParam,
+)
+from litellm.types.llms.openai import (
+    REASONING_EFFORT,
+    AllMessageValues,
+    ChatCompletionCachedContent,
+    ChatCompletionRedactedThinkingBlock,
+    ChatCompletionSystemMessage,
+    ChatCompletionThinkingBlock,
+    ChatCompletionToolCallChunk,
+    ChatCompletionToolCallFunctionChunk,
+    ChatCompletionToolParam,
+)
+from litellm.types.utils import CompletionTokensDetailsWrapper
+from litellm.types.utils import Message as LitellmMessage
+from litellm.types.utils import PromptTokensDetailsWrapper
+from litellm.utils import (
+    ModelResponse,
+    Usage,
+    add_dummy_tool,
+    has_tool_call_blocks,
+    token_counter,
+)
+
+from ..common_utils import AnthropicError, AnthropicModelInfo, process_anthropic_headers
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+
+    LoggingClass = LiteLLMLoggingObj
+else:
+    LoggingClass = Any
+
+
+class AnthropicConfig(AnthropicModelInfo, BaseConfig):
+    """
+    Reference: https://docs.anthropic.com/claude/reference/messages_post
+
+    to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
+    """
+
+    max_tokens: Optional[
+        int
+    ] = DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS  # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
+    stop_sequences: Optional[list] = None
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    top_k: Optional[int] = None
+    metadata: Optional[dict] = None
+    system: Optional[str] = None
+
+    def __init__(
+        self,
+        max_tokens: Optional[
+            int
+        ] = DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS,  # You can pass in a value yourself or use the default value 4096
+        stop_sequences: Optional[list] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        top_k: Optional[int] = None,
+        metadata: Optional[dict] = None,
+        system: Optional[str] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return super().get_config()
+
+    def get_supported_openai_params(self, model: str):
+        params = [
+            "stream",
+            "stop",
+            "temperature",
+            "top_p",
+            "max_tokens",
+            "max_completion_tokens",
+            "tools",
+            "tool_choice",
+            "extra_headers",
+            "parallel_tool_calls",
+            "response_format",
+            "user",
+            "reasoning_effort",
+        ]
+
+        if "claude-3-7-sonnet" in model:
+            params.append("thinking")
+
+        return params
+
+    def get_json_schema_from_pydantic_object(
+        self, response_format: Union[Any, Dict, None]
+    ) -> Optional[dict]:
+        return type_to_response_format_param(
+            response_format, ref_template="/$defs/{model}"
+        )  # Relevant issue: https://github.com/BerriAI/litellm/issues/7755
+
+    def get_cache_control_headers(self) -> dict:
+        return {
+            "anthropic-version": "2023-06-01",
+            "anthropic-beta": "prompt-caching-2024-07-31",
+        }
+
+    def _map_tool_choice(
+        self, tool_choice: Optional[str], parallel_tool_use: Optional[bool]
+    ) -> Optional[AnthropicMessagesToolChoice]:
+        _tool_choice: Optional[AnthropicMessagesToolChoice] = None
+        if tool_choice == "auto":
+            _tool_choice = AnthropicMessagesToolChoice(
+                type="auto",
+            )
+        elif tool_choice == "required":
+            _tool_choice = AnthropicMessagesToolChoice(type="any")
+        elif isinstance(tool_choice, dict):
+            _tool_name = tool_choice.get("function", {}).get("name")
+            _tool_choice = AnthropicMessagesToolChoice(type="tool")
+            if _tool_name is not None:
+                _tool_choice["name"] = _tool_name
+
+        if parallel_tool_use is not None:
+            # Anthropic uses 'disable_parallel_tool_use' flag to determine if parallel tool use is allowed
+            # this is the inverse of the openai flag.
+            if _tool_choice is not None:
+                _tool_choice["disable_parallel_tool_use"] = not parallel_tool_use
+            else:  # use anthropic defaults and make sure to send the disable_parallel_tool_use flag
+                _tool_choice = AnthropicMessagesToolChoice(
+                    type="auto",
+                    disable_parallel_tool_use=not parallel_tool_use,
+                )
+        return _tool_choice
+
+    def _map_tool_helper(
+        self, tool: ChatCompletionToolParam
+    ) -> AllAnthropicToolsValues:
+        returned_tool: Optional[AllAnthropicToolsValues] = None
+
+        if tool["type"] == "function" or tool["type"] == "custom":
+            _input_schema: dict = tool["function"].get(
+                "parameters",
+                {
+                    "type": "object",
+                    "properties": {},
+                },
+            )
+            input_schema: AnthropicInputSchema = AnthropicInputSchema(**_input_schema)
+            _tool = AnthropicMessagesTool(
+                name=tool["function"]["name"],
+                input_schema=input_schema,
+            )
+
+            _description = tool["function"].get("description")
+            if _description is not None:
+                _tool["description"] = _description
+
+            returned_tool = _tool
+
+        elif tool["type"].startswith("computer_"):
+            ## check if all required 'display_' params are given
+            if "parameters" not in tool["function"]:
+                raise ValueError("Missing required parameter: parameters")
+
+            _display_width_px: Optional[int] = tool["function"]["parameters"].get(
+                "display_width_px"
+            )
+            _display_height_px: Optional[int] = tool["function"]["parameters"].get(
+                "display_height_px"
+            )
+            if _display_width_px is None or _display_height_px is None:
+                raise ValueError(
+                    "Missing required parameter: display_width_px or display_height_px"
+                )
+
+            _computer_tool = AnthropicComputerTool(
+                type=tool["type"],
+                name=tool["function"].get("name", "computer"),
+                display_width_px=_display_width_px,
+                display_height_px=_display_height_px,
+            )
+
+            _display_number = tool["function"]["parameters"].get("display_number")
+            if _display_number is not None:
+                _computer_tool["display_number"] = _display_number
+
+            returned_tool = _computer_tool
+        elif tool["type"].startswith("bash_") or tool["type"].startswith(
+            "text_editor_"
+        ):
+            function_name = tool["function"].get("name")
+            if function_name is None:
+                raise ValueError("Missing required parameter: name")
+
+            returned_tool = AnthropicHostedTools(
+                type=tool["type"],
+                name=function_name,
+            )
+        if returned_tool is None:
+            raise ValueError(f"Unsupported tool type: {tool['type']}")
+
+        ## check if cache_control is set in the tool
+        _cache_control = tool.get("cache_control", None)
+        _cache_control_function = tool.get("function", {}).get("cache_control", None)
+        if _cache_control is not None:
+            returned_tool["cache_control"] = _cache_control
+        elif _cache_control_function is not None and isinstance(
+            _cache_control_function, dict
+        ):
+            returned_tool["cache_control"] = ChatCompletionCachedContent(
+                **_cache_control_function  # type: ignore
+            )
+
+        return returned_tool
+
+    def _map_tools(self, tools: List) -> List[AllAnthropicToolsValues]:
+        anthropic_tools = []
+        for tool in tools:
+            if "input_schema" in tool:  # assume in anthropic format
+                anthropic_tools.append(tool)
+            else:  # assume openai tool call
+                new_tool = self._map_tool_helper(tool)
+
+                anthropic_tools.append(new_tool)
+        return anthropic_tools
+
+    def _map_stop_sequences(
+        self, stop: Optional[Union[str, List[str]]]
+    ) -> Optional[List[str]]:
+        new_stop: Optional[List[str]] = None
+        if isinstance(stop, str):
+            if (
+                stop.isspace() and litellm.drop_params is True
+            ):  # anthropic doesn't allow whitespace characters as stop-sequences
+                return new_stop
+            new_stop = [stop]
+        elif isinstance(stop, list):
+            new_v = []
+            for v in stop:
+                if (
+                    v.isspace() and litellm.drop_params is True
+                ):  # anthropic doesn't allow whitespace characters as stop-sequences
+                    continue
+                new_v.append(v)
+            if len(new_v) > 0:
+                new_stop = new_v
+        return new_stop
+
+    @staticmethod
+    def _map_reasoning_effort(
+        reasoning_effort: Optional[Union[REASONING_EFFORT, str]]
+    ) -> Optional[AnthropicThinkingParam]:
+        if reasoning_effort is None:
+            return None
+        elif reasoning_effort == "low":
+            return AnthropicThinkingParam(
+                type="enabled",
+                budget_tokens=DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
+            )
+        elif reasoning_effort == "medium":
+            return AnthropicThinkingParam(
+                type="enabled",
+                budget_tokens=DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
+            )
+        elif reasoning_effort == "high":
+            return AnthropicThinkingParam(
+                type="enabled",
+                budget_tokens=DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
+            )
+        else:
+            raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
+
+    def map_response_format_to_anthropic_tool(
+        self, value: Optional[dict], optional_params: dict, is_thinking_enabled: bool
+    ) -> Optional[AnthropicMessagesTool]:
+        ignore_response_format_types = ["text"]
+        if (
+            value is None or value["type"] in ignore_response_format_types
+        ):  # value is a no-op
+            return None
+
+        json_schema: Optional[dict] = None
+        if "response_schema" in value:
+            json_schema = value["response_schema"]
+        elif "json_schema" in value:
+            json_schema = value["json_schema"]["schema"]
+        """
+        When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode
+        - You usually want to provide a single tool
+        - You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
+        - Remember that the model will pass the input to the tool, so the name of the tool and description should be from the model’s perspective.
+        """
+
+        _tool = self._create_json_tool_call_for_response_format(
+            json_schema=json_schema,
+        )
+
+        return _tool
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        is_thinking_enabled = self.is_thinking_enabled(
+            non_default_params=non_default_params
+        )
+
+        for param, value in non_default_params.items():
+            if param == "max_tokens":
+                optional_params["max_tokens"] = value
+            if param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
+            if param == "tools":
+                # check if optional params already has tools
+                tool_value = self._map_tools(value)
+                optional_params = self._add_tools_to_optional_params(
+                    optional_params=optional_params, tools=tool_value
+                )
+            if param == "tool_choice" or param == "parallel_tool_calls":
+                _tool_choice: Optional[
+                    AnthropicMessagesToolChoice
+                ] = self._map_tool_choice(
+                    tool_choice=non_default_params.get("tool_choice"),
+                    parallel_tool_use=non_default_params.get("parallel_tool_calls"),
+                )
+
+                if _tool_choice is not None:
+                    optional_params["tool_choice"] = _tool_choice
+            if param == "stream" and value is True:
+                optional_params["stream"] = value
+            if param == "stop" and (isinstance(value, str) or isinstance(value, list)):
+                _value = self._map_stop_sequences(value)
+                if _value is not None:
+                    optional_params["stop_sequences"] = _value
+            if param == "temperature":
+                optional_params["temperature"] = value
+            if param == "top_p":
+                optional_params["top_p"] = value
+            if param == "response_format" and isinstance(value, dict):
+                _tool = self.map_response_format_to_anthropic_tool(
+                    value, optional_params, is_thinking_enabled
+                )
+                if _tool is None:
+                    continue
+                if not is_thinking_enabled:
+                    _tool_choice = {"name": RESPONSE_FORMAT_TOOL_NAME, "type": "tool"}
+                    optional_params["tool_choice"] = _tool_choice
+                optional_params["json_mode"] = True
+                optional_params = self._add_tools_to_optional_params(
+                    optional_params=optional_params, tools=[_tool]
+                )
+            if param == "user":
+                optional_params["metadata"] = {"user_id": value}
+            if param == "thinking":
+                optional_params["thinking"] = value
+            elif param == "reasoning_effort" and isinstance(value, str):
+                optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
+                    value
+                )
+
+        ## handle thinking tokens
+        self.update_optional_params_with_thinking_tokens(
+            non_default_params=non_default_params, optional_params=optional_params
+        )
+        return optional_params
+
+    def _create_json_tool_call_for_response_format(
+        self,
+        json_schema: Optional[dict] = None,
+    ) -> AnthropicMessagesTool:
+        """
+        Handles creating a tool call for getting responses in JSON format.
+
+        Args:
+            json_schema (Optional[dict]): The JSON schema the response should be in
+
+        Returns:
+            AnthropicMessagesTool: The tool call to send to Anthropic API to get responses in JSON format
+        """
+        _input_schema: AnthropicInputSchema = AnthropicInputSchema(
+            type="object",
+        )
+
+        if json_schema is None:
+            # Anthropic raises a 400 BadRequest error if properties is passed as None
+            # see usage with additionalProperties (Example 5) https://github.com/anthropics/anthropic-cookbook/blob/main/tool_use/extracting_structured_json.ipynb
+            _input_schema["additionalProperties"] = True
+            _input_schema["properties"] = {}
+        else:
+            _input_schema.update(cast(AnthropicInputSchema, json_schema))
+
+        _tool = AnthropicMessagesTool(
+            name=RESPONSE_FORMAT_TOOL_NAME, input_schema=_input_schema
+        )
+        return _tool
+
+    def translate_system_message(
+        self, messages: List[AllMessageValues]
+    ) -> List[AnthropicSystemMessageContent]:
+        """
+        Translate system message to anthropic format.
+
+        Removes system message from the original list and returns a new list of anthropic system message content.
+        """
+        system_prompt_indices = []
+        anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
+        for idx, message in enumerate(messages):
+            if message["role"] == "system":
+                valid_content: bool = False
+                system_message_block = ChatCompletionSystemMessage(**message)
+                if isinstance(system_message_block["content"], str):
+                    anthropic_system_message_content = AnthropicSystemMessageContent(
+                        type="text",
+                        text=system_message_block["content"],
+                    )
+                    if "cache_control" in system_message_block:
+                        anthropic_system_message_content[
+                            "cache_control"
+                        ] = system_message_block["cache_control"]
+                    anthropic_system_message_list.append(
+                        anthropic_system_message_content
+                    )
+                    valid_content = True
+                elif isinstance(message["content"], list):
+                    for _content in message["content"]:
+                        anthropic_system_message_content = (
+                            AnthropicSystemMessageContent(
+                                type=_content.get("type"),
+                                text=_content.get("text"),
+                            )
+                        )
+                        if "cache_control" in _content:
+                            anthropic_system_message_content[
+                                "cache_control"
+                            ] = _content["cache_control"]
+
+                        anthropic_system_message_list.append(
+                            anthropic_system_message_content
+                        )
+                    valid_content = True
+
+                if valid_content:
+                    system_prompt_indices.append(idx)
+        if len(system_prompt_indices) > 0:
+            for idx in reversed(system_prompt_indices):
+                messages.pop(idx)
+
+        return anthropic_system_message_list
+
+    def transform_request(
+        self,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        """
+        Translate messages to anthropic format.
+        """
+        ## VALIDATE REQUEST
+        """
+        Anthropic doesn't support tool calling without `tools=` param specified.
+        """
+        if (
+            "tools" not in optional_params
+            and messages is not None
+            and has_tool_call_blocks(messages)
+        ):
+            if litellm.modify_params:
+                optional_params["tools"] = self._map_tools(
+                    add_dummy_tool(custom_llm_provider="anthropic")
+                )
+            else:
+                raise litellm.UnsupportedParamsError(
+                    message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
+                    model="",
+                    llm_provider="anthropic",
+                )
+
+        # Separate system prompt from rest of message
+        anthropic_system_message_list = self.translate_system_message(messages=messages)
+        # Handling anthropic API Prompt Caching
+        if len(anthropic_system_message_list) > 0:
+            optional_params["system"] = anthropic_system_message_list
+        # Format rest of message according to anthropic guidelines
+        try:
+            anthropic_messages = anthropic_messages_pt(
+                model=model,
+                messages=messages,
+                llm_provider="anthropic",
+            )
+        except Exception as e:
+            raise AnthropicError(
+                status_code=400,
+                message="{}\nReceived Messages={}".format(str(e), messages),
+            )  # don't use verbose_logger.exception, if exception is raised
+
+        ## Load Config
+        config = litellm.AnthropicConfig.get_config()
+        for k, v in config.items():
+            if (
+                k not in optional_params
+            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                optional_params[k] = v
+
+        ## Handle user_id in metadata
+        _litellm_metadata = litellm_params.get("metadata", None)
+        if (
+            _litellm_metadata
+            and isinstance(_litellm_metadata, dict)
+            and "user_id" in _litellm_metadata
+        ):
+            optional_params["metadata"] = {"user_id": _litellm_metadata["user_id"]}
+
+        data = {
+            "model": model,
+            "messages": anthropic_messages,
+            **optional_params,
+        }
+
+        return data
+
+    def _transform_response_for_json_mode(
+        self,
+        json_mode: Optional[bool],
+        tool_calls: List[ChatCompletionToolCallChunk],
+    ) -> Optional[LitellmMessage]:
+        _message: Optional[LitellmMessage] = None
+        if json_mode is True and len(tool_calls) == 1:
+            # check if tool name is the default tool name
+            json_mode_content_str: Optional[str] = None
+            if (
+                "name" in tool_calls[0]["function"]
+                and tool_calls[0]["function"]["name"] == RESPONSE_FORMAT_TOOL_NAME
+            ):
+                json_mode_content_str = tool_calls[0]["function"].get("arguments")
+            if json_mode_content_str is not None:
+                _message = AnthropicConfig._convert_tool_response_to_message(
+                    tool_calls=tool_calls,
+                )
+        return _message
+
+    def extract_response_content(
+        self, completion_response: dict
+    ) -> Tuple[
+        str,
+        Optional[List[Any]],
+        Optional[
+            List[
+                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+            ]
+        ],
+        Optional[str],
+        List[ChatCompletionToolCallChunk],
+    ]:
+        text_content = ""
+        citations: Optional[List[Any]] = None
+        thinking_blocks: Optional[
+            List[
+                Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
+            ]
+        ] = None
+        reasoning_content: Optional[str] = None
+        tool_calls: List[ChatCompletionToolCallChunk] = []
+        for idx, content in enumerate(completion_response["content"]):
+            if content["type"] == "text":
+                text_content += content["text"]
+            ## TOOL CALLING
+            elif content["type"] == "tool_use":
+                tool_calls.append(
+                    ChatCompletionToolCallChunk(
+                        id=content["id"],
+                        type="function",
+                        function=ChatCompletionToolCallFunctionChunk(
+                            name=content["name"],
+                            arguments=json.dumps(content["input"]),
+                        ),
+                        index=idx,
+                    )
+                )
+
+            elif content.get("thinking", None) is not None:
+                if thinking_blocks is None:
+                    thinking_blocks = []
+                thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
+            elif content["type"] == "redacted_thinking":
+                if thinking_blocks is None:
+                    thinking_blocks = []
+                thinking_blocks.append(
+                    cast(ChatCompletionRedactedThinkingBlock, content)
+                )
+
+            ## CITATIONS
+            if content.get("citations") is not None:
+                if citations is None:
+                    citations = []
+                citations.append(content["citations"])
+        if thinking_blocks is not None:
+            reasoning_content = ""
+            for block in thinking_blocks:
+                thinking_content = cast(Optional[str], block.get("thinking"))
+                if thinking_content is not None:
+                    reasoning_content += thinking_content
+
+        return text_content, citations, thinking_blocks, reasoning_content, tool_calls
+
+    def calculate_usage(
+        self, usage_object: dict, reasoning_content: Optional[str]
+    ) -> Usage:
+        prompt_tokens = usage_object.get("input_tokens", 0)
+        completion_tokens = usage_object.get("output_tokens", 0)
+        _usage = usage_object
+        cache_creation_input_tokens: int = 0
+        cache_read_input_tokens: int = 0
+
+        if "cache_creation_input_tokens" in _usage:
+            cache_creation_input_tokens = _usage["cache_creation_input_tokens"]
+        if "cache_read_input_tokens" in _usage:
+            cache_read_input_tokens = _usage["cache_read_input_tokens"]
+            prompt_tokens += cache_read_input_tokens
+
+        prompt_tokens_details = PromptTokensDetailsWrapper(
+            cached_tokens=cache_read_input_tokens
+        )
+        completion_token_details = (
+            CompletionTokensDetailsWrapper(
+                reasoning_tokens=token_counter(
+                    text=reasoning_content, count_response_tokens=True
+                )
+            )
+            if reasoning_content
+            else None
+        )
+        total_tokens = prompt_tokens + completion_tokens
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=total_tokens,
+            prompt_tokens_details=prompt_tokens_details,
+            cache_creation_input_tokens=cache_creation_input_tokens,
+            cache_read_input_tokens=cache_read_input_tokens,
+            completion_tokens_details=completion_token_details,
+        )
+        return usage
+
+    def transform_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: ModelResponse,
+        logging_obj: LoggingClass,
+        request_data: Dict,
+        messages: List[AllMessageValues],
+        optional_params: Dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ModelResponse:
+        _hidden_params: Dict = {}
+        _hidden_params["additional_headers"] = process_anthropic_headers(
+            dict(raw_response.headers)
+        )
+        ## LOGGING
+        logging_obj.post_call(
+            input=messages,
+            api_key=api_key,
+            original_response=raw_response.text,
+            additional_args={"complete_input_dict": request_data},
+        )
+
+        ## RESPONSE OBJECT
+        try:
+            completion_response = raw_response.json()
+        except Exception as e:
+            response_headers = getattr(raw_response, "headers", None)
+            raise AnthropicError(
+                message="Unable to get json response - {}, Original Response: {}".format(
+                    str(e), raw_response.text
+                ),
+                status_code=raw_response.status_code,
+                headers=response_headers,
+            )
+        if "error" in completion_response:
+            response_headers = getattr(raw_response, "headers", None)
+            raise AnthropicError(
+                message=str(completion_response["error"]),
+                status_code=raw_response.status_code,
+                headers=response_headers,
+            )
+        else:
+            text_content = ""
+            citations: Optional[List[Any]] = None
+            thinking_blocks: Optional[
+                List[
+                    Union[
+                        ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
+                    ]
+                ]
+            ] = None
+            reasoning_content: Optional[str] = None
+            tool_calls: List[ChatCompletionToolCallChunk] = []
+
+            (
+                text_content,
+                citations,
+                thinking_blocks,
+                reasoning_content,
+                tool_calls,
+            ) = self.extract_response_content(completion_response=completion_response)
+
+            _message = litellm.Message(
+                tool_calls=tool_calls,
+                content=text_content or None,
+                provider_specific_fields={
+                    "citations": citations,
+                    "thinking_blocks": thinking_blocks,
+                },
+                thinking_blocks=thinking_blocks,
+                reasoning_content=reasoning_content,
+            )
+
+            ## HANDLE JSON MODE - anthropic returns single function call
+            json_mode_message = self._transform_response_for_json_mode(
+                json_mode=json_mode,
+                tool_calls=tool_calls,
+            )
+            if json_mode_message is not None:
+                completion_response["stop_reason"] = "stop"
+                _message = json_mode_message
+
+            model_response.choices[0].message = _message  # type: ignore
+            model_response._hidden_params["original_response"] = completion_response[
+                "content"
+            ]  # allow user to access raw anthropic tool calling response
+
+            model_response.choices[0].finish_reason = map_finish_reason(
+                completion_response["stop_reason"]
+            )
+
+        ## CALCULATING USAGE
+        usage = self.calculate_usage(
+            usage_object=completion_response["usage"],
+            reasoning_content=reasoning_content,
+        )
+        setattr(model_response, "usage", usage)  # type: ignore
+
+        model_response.created = int(time.time())
+        model_response.model = completion_response["model"]
+
+        model_response._hidden_params = _hidden_params
+        return model_response
+
+    @staticmethod
+    def _convert_tool_response_to_message(
+        tool_calls: List[ChatCompletionToolCallChunk],
+    ) -> Optional[LitellmMessage]:
+        """
+        In JSON mode, Anthropic API returns JSON schema as a tool call, we need to convert it to a message to follow the OpenAI format
+
+        """
+        ## HANDLE JSON MODE - anthropic returns single function call
+        json_mode_content_str: Optional[str] = tool_calls[0]["function"].get(
+            "arguments"
+        )
+        try:
+            if json_mode_content_str is not None:
+                args = json.loads(json_mode_content_str)
+                if (
+                    isinstance(args, dict)
+                    and (values := args.get("values")) is not None
+                ):
+                    _message = litellm.Message(content=json.dumps(values))
+                    return _message
+                else:
+                    # a lot of the times the `values` key is not present in the tool response
+                    # relevant issue: https://github.com/BerriAI/litellm/issues/6741
+                    _message = litellm.Message(content=json.dumps(args))
+                    return _message
+        except json.JSONDecodeError:
+            # json decode error does occur, return the original tool response str
+            return litellm.Message(content=json_mode_content_str)
+        return None
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
+    ) -> BaseLLMException:
+        return AnthropicError(
+            status_code=status_code,
+            message=error_message,
+            headers=cast(httpx.Headers, headers),
+        )
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/common_utils.py
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/common_utils.py
@@ -0,0 +1,221 @@
+"""
+This file contains common utils for anthropic calls.
+"""
+
+from typing import Dict, List, Optional, Union
+
+import httpx
+
+import litellm
+from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
+from litellm.llms.base_llm.chat.transformation import BaseLLMException
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.llms.anthropic import AllAnthropicToolsValues
+from litellm.types.llms.openai import AllMessageValues
+
+
+class AnthropicError(BaseLLMException):
+    def __init__(
+        self,
+        status_code: int,
+        message,
+        headers: Optional[httpx.Headers] = None,
+    ):
+        super().__init__(status_code=status_code, message=message, headers=headers)
+
+
+class AnthropicModelInfo(BaseLLMModelInfo):
+    def is_cache_control_set(self, messages: List[AllMessageValues]) -> bool:
+        """
+        Return if {"cache_control": ..} in message content block
+
+        Used to check if anthropic prompt caching headers need to be set.
+        """
+        for message in messages:
+            if message.get("cache_control", None) is not None:
+                return True
+            _message_content = message.get("content")
+            if _message_content is not None and isinstance(_message_content, list):
+                for content in _message_content:
+                    if "cache_control" in content:
+                        return True
+
+        return False
+
+    def is_computer_tool_used(
+        self, tools: Optional[List[AllAnthropicToolsValues]]
+    ) -> bool:
+        if tools is None:
+            return False
+        for tool in tools:
+            if "type" in tool and tool["type"].startswith("computer_"):
+                return True
+        return False
+
+    def is_pdf_used(self, messages: List[AllMessageValues]) -> bool:
+        """
+        Set to true if media passed into messages.
+
+        """
+        for message in messages:
+            if (
+                "content" in message
+                and message["content"] is not None
+                and isinstance(message["content"], list)
+            ):
+                for content in message["content"]:
+                    if "type" in content and content["type"] != "text":
+                        return True
+        return False
+
+    def _get_user_anthropic_beta_headers(
+        self, anthropic_beta_header: Optional[str]
+    ) -> Optional[List[str]]:
+        if anthropic_beta_header is None:
+            return None
+        return anthropic_beta_header.split(",")
+
+    def get_anthropic_headers(
+        self,
+        api_key: str,
+        anthropic_version: Optional[str] = None,
+        computer_tool_used: bool = False,
+        prompt_caching_set: bool = False,
+        pdf_used: bool = False,
+        is_vertex_request: bool = False,
+        user_anthropic_beta_headers: Optional[List[str]] = None,
+    ) -> dict:
+        betas = set()
+        if prompt_caching_set:
+            betas.add("prompt-caching-2024-07-31")
+        if computer_tool_used:
+            betas.add("computer-use-2024-10-22")
+        if pdf_used:
+            betas.add("pdfs-2024-09-25")
+        headers = {
+            "anthropic-version": anthropic_version or "2023-06-01",
+            "x-api-key": api_key,
+            "accept": "application/json",
+            "content-type": "application/json",
+        }
+
+        if user_anthropic_beta_headers is not None:
+            betas.update(user_anthropic_beta_headers)
+
+        # Don't send any beta headers to Vertex, Vertex has failed requests when they are sent
+        if is_vertex_request is True:
+            pass
+        elif len(betas) > 0:
+            headers["anthropic-beta"] = ",".join(betas)
+
+        return headers
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> Dict:
+        if api_key is None:
+            raise litellm.AuthenticationError(
+                message="Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params. Please set `ANTHROPIC_API_KEY` in your environment vars",
+                llm_provider="anthropic",
+                model=model,
+            )
+
+        tools = optional_params.get("tools")
+        prompt_caching_set = self.is_cache_control_set(messages=messages)
+        computer_tool_used = self.is_computer_tool_used(tools=tools)
+        pdf_used = self.is_pdf_used(messages=messages)
+        user_anthropic_beta_headers = self._get_user_anthropic_beta_headers(
+            anthropic_beta_header=headers.get("anthropic-beta")
+        )
+        anthropic_headers = self.get_anthropic_headers(
+            computer_tool_used=computer_tool_used,
+            prompt_caching_set=prompt_caching_set,
+            pdf_used=pdf_used,
+            api_key=api_key,
+            is_vertex_request=optional_params.get("is_vertex_request", False),
+            user_anthropic_beta_headers=user_anthropic_beta_headers,
+        )
+
+        headers = {**headers, **anthropic_headers}
+
+        return headers
+
+    @staticmethod
+    def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
+        return (
+            api_base
+            or get_secret_str("ANTHROPIC_API_BASE")
+            or "https://api.anthropic.com"
+        )
+
+    @staticmethod
+    def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
+        return api_key or get_secret_str("ANTHROPIC_API_KEY")
+
+    @staticmethod
+    def get_base_model(model: Optional[str] = None) -> Optional[str]:
+        return model.replace("anthropic/", "") if model else None
+
+    def get_models(
+        self, api_key: Optional[str] = None, api_base: Optional[str] = None
+    ) -> List[str]:
+        api_base = AnthropicModelInfo.get_api_base(api_base)
+        api_key = AnthropicModelInfo.get_api_key(api_key)
+        if api_base is None or api_key is None:
+            raise ValueError(
+                "ANTHROPIC_API_BASE or ANTHROPIC_API_KEY is not set. Please set the environment variable, to query Anthropic's `/models` endpoint."
+            )
+        response = litellm.module_level_client.get(
+            url=f"{api_base}/v1/models",
+            headers={"x-api-key": api_key, "anthropic-version": "2023-06-01"},
+        )
+
+        try:
+            response.raise_for_status()
+        except httpx.HTTPStatusError:
+            raise Exception(
+                f"Failed to fetch models from Anthropic. Status code: {response.status_code}, Response: {response.text}"
+            )
+
+        models = response.json()["data"]
+
+        litellm_model_names = []
+        for model in models:
+            stripped_model_name = model["id"]
+            litellm_model_name = "anthropic/" + stripped_model_name
+            litellm_model_names.append(litellm_model_name)
+        return litellm_model_names
+
+
+def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:
+    openai_headers = {}
+    if "anthropic-ratelimit-requests-limit" in headers:
+        openai_headers["x-ratelimit-limit-requests"] = headers[
+            "anthropic-ratelimit-requests-limit"
+        ]
+    if "anthropic-ratelimit-requests-remaining" in headers:
+        openai_headers["x-ratelimit-remaining-requests"] = headers[
+            "anthropic-ratelimit-requests-remaining"
+        ]
+    if "anthropic-ratelimit-tokens-limit" in headers:
+        openai_headers["x-ratelimit-limit-tokens"] = headers[
+            "anthropic-ratelimit-tokens-limit"
+        ]
+    if "anthropic-ratelimit-tokens-remaining" in headers:
+        openai_headers["x-ratelimit-remaining-tokens"] = headers[
+            "anthropic-ratelimit-tokens-remaining"
+        ]
+
+    llm_response_headers = {
+        "{}-{}".format("llm_provider", k): v for k, v in headers.items()
+    }
+
+    additional_headers = {**llm_response_headers, **openai_headers}
+    return additional_headers
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/completion/pycache/handler.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/completion/pycache/handler.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/completion/pycache/transformation.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/completion/pycache/transformation.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/completion/handler.py
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/completion/handler.py
@@ -0,0 +1,5 @@
+"""
+Anthropic /complete API - uses `llm_http_handler.py` to make httpx requests
+
+Request/Response transformation is handled in `transformation.py`
+"""
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/completion/transformation.py
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/completion/transformation.py
@@ -0,0 +1,310 @@
+"""
+Translation logic for anthropic's `/v1/complete` endpoint
+
+Litellm provider slug: `anthropic_text/<model_name>`
+"""
+
+import json
+import time
+from typing import AsyncIterator, Dict, Iterator, List, Optional, Union
+
+import httpx
+
+import litellm
+from litellm.constants import DEFAULT_MAX_TOKENS
+from litellm.litellm_core_utils.prompt_templates.factory import (
+    custom_prompt,
+    prompt_factory,
+)
+from litellm.llms.base_llm.base_model_iterator import BaseModelResponseIterator
+from litellm.llms.base_llm.chat.transformation import (
+    BaseConfig,
+    BaseLLMException,
+    LiteLLMLoggingObj,
+)
+from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import (
+    ChatCompletionToolCallChunk,
+    ChatCompletionUsageBlock,
+    GenericStreamingChunk,
+    ModelResponse,
+    Usage,
+)
+
+
+class AnthropicTextError(BaseLLMException):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(
+            method="POST", url="https://api.anthropic.com/v1/complete"
+        )
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            message=self.message,
+            status_code=self.status_code,
+            request=self.request,
+            response=self.response,
+        )  # Call the base class constructor with the parameters it needs
+
+
+class AnthropicTextConfig(BaseConfig):
+    """
+    Reference: https://docs.anthropic.com/claude/reference/complete_post
+
+    to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
+    """
+
+    max_tokens_to_sample: Optional[
+        int
+    ] = litellm.max_tokens  # anthropic requires a default
+    stop_sequences: Optional[list] = None
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    top_k: Optional[int] = None
+    metadata: Optional[dict] = None
+
+    def __init__(
+        self,
+        max_tokens_to_sample: Optional[
+            int
+        ] = DEFAULT_MAX_TOKENS,  # anthropic requires a default
+        stop_sequences: Optional[list] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        top_k: Optional[int] = None,
+        metadata: Optional[dict] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    # makes headers for API call
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+    ) -> dict:
+        if api_key is None:
+            raise ValueError(
+                "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
+            )
+        _headers = {
+            "accept": "application/json",
+            "anthropic-version": "2023-06-01",
+            "content-type": "application/json",
+            "x-api-key": api_key,
+        }
+        headers.update(_headers)
+        return headers
+
+    def transform_request(
+        self,
+        model: str,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        headers: dict,
+    ) -> dict:
+        prompt = self._get_anthropic_text_prompt_from_messages(
+            messages=messages, model=model
+        )
+        ## Load Config
+        config = litellm.AnthropicTextConfig.get_config()
+        for k, v in config.items():
+            if (
+                k not in optional_params
+            ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                optional_params[k] = v
+
+        data = {
+            "model": model,
+            "prompt": prompt,
+            **optional_params,
+        }
+
+        return data
+
+    def get_supported_openai_params(self, model: str):
+        """
+        Anthropic /complete API Ref: https://docs.anthropic.com/en/api/complete
+        """
+        return [
+            "stream",
+            "max_tokens",
+            "max_completion_tokens",
+            "stop",
+            "temperature",
+            "top_p",
+            "extra_headers",
+            "user",
+        ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        """
+        Follows the same logic as the AnthropicConfig.map_openai_params method (which is the Anthropic /messages API)
+
+        Note: the only difference is in the get supported openai params method between the AnthropicConfig and AnthropicTextConfig
+        API Ref: https://docs.anthropic.com/en/api/complete
+        """
+        for param, value in non_default_params.items():
+            if param == "max_tokens":
+                optional_params["max_tokens_to_sample"] = value
+            if param == "max_completion_tokens":
+                optional_params["max_tokens_to_sample"] = value
+            if param == "stream" and value is True:
+                optional_params["stream"] = value
+            if param == "stop" and (isinstance(value, str) or isinstance(value, list)):
+                _value = litellm.AnthropicConfig()._map_stop_sequences(value)
+                if _value is not None:
+                    optional_params["stop_sequences"] = _value
+            if param == "temperature":
+                optional_params["temperature"] = value
+            if param == "top_p":
+                optional_params["top_p"] = value
+            if param == "user":
+                optional_params["metadata"] = {"user_id": value}
+
+        return optional_params
+
+    def transform_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: ModelResponse,
+        logging_obj: LiteLLMLoggingObj,
+        request_data: dict,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        encoding: str,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ModelResponse:
+        try:
+            completion_response = raw_response.json()
+        except Exception:
+            raise AnthropicTextError(
+                message=raw_response.text, status_code=raw_response.status_code
+            )
+        prompt = self._get_anthropic_text_prompt_from_messages(
+            messages=messages, model=model
+        )
+        if "error" in completion_response:
+            raise AnthropicTextError(
+                message=str(completion_response["error"]),
+                status_code=raw_response.status_code,
+            )
+        else:
+            if len(completion_response["completion"]) > 0:
+                model_response.choices[0].message.content = completion_response[  # type: ignore
+                    "completion"
+                ]
+            model_response.choices[0].finish_reason = completion_response["stop_reason"]
+
+        ## CALCULATING USAGE
+        prompt_tokens = len(
+            encoding.encode(prompt)
+        )  ##[TODO] use the anthropic tokenizer here
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )  ##[TODO] use the anthropic tokenizer here
+
+        model_response.created = int(time.time())
+        model_response.model = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+
+        setattr(model_response, "usage", usage)
+        return model_response
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
+    ) -> BaseLLMException:
+        return AnthropicTextError(
+            status_code=status_code,
+            message=error_message,
+        )
+
+    @staticmethod
+    def _is_anthropic_text_model(model: str) -> bool:
+        return model == "claude-2" or model == "claude-instant-1"
+
+    def _get_anthropic_text_prompt_from_messages(
+        self, messages: List[AllMessageValues], model: str
+    ) -> str:
+        custom_prompt_dict = litellm.custom_prompt_dict
+        if model in custom_prompt_dict:
+            # check if the model has a registered custom prompt
+            model_prompt_details = custom_prompt_dict[model]
+            prompt = custom_prompt(
+                role_dict=model_prompt_details["roles"],
+                initial_prompt_value=model_prompt_details["initial_prompt_value"],
+                final_prompt_value=model_prompt_details["final_prompt_value"],
+                messages=messages,
+            )
+        else:
+            prompt = prompt_factory(
+                model=model, messages=messages, custom_llm_provider="anthropic"
+            )
+
+        return str(prompt)
+
+    def get_model_response_iterator(
+        self,
+        streaming_response: Union[Iterator[str], AsyncIterator[str], ModelResponse],
+        sync_stream: bool,
+        json_mode: Optional[bool] = False,
+    ):
+        return AnthropicTextCompletionResponseIterator(
+            streaming_response=streaming_response,
+            sync_stream=sync_stream,
+            json_mode=json_mode,
+        )
+
+
+class AnthropicTextCompletionResponseIterator(BaseModelResponseIterator):
+    def chunk_parser(self, chunk: dict) -> GenericStreamingChunk:
+        try:
+            text = ""
+            tool_use: Optional[ChatCompletionToolCallChunk] = None
+            is_finished = False
+            finish_reason = ""
+            usage: Optional[ChatCompletionUsageBlock] = None
+            provider_specific_fields = None
+            index = int(chunk.get("index", 0))
+            _chunk_text = chunk.get("completion", None)
+            if _chunk_text is not None and isinstance(_chunk_text, str):
+                text = _chunk_text
+            finish_reason = chunk.get("stop_reason", None)
+            if finish_reason is not None:
+                is_finished = True
+            returned_chunk = GenericStreamingChunk(
+                text=text,
+                tool_use=tool_use,
+                is_finished=is_finished,
+                finish_reason=finish_reason,
+                usage=usage,
+                index=index,
+                provider_specific_fields=provider_specific_fields,
+            )
+
+            return returned_chunk
+
+        except json.JSONDecodeError:
+            raise ValueError(f"Failed to decode JSON from chunk: {chunk}")
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/cost_calculation.py
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/cost_calculation.py
@@ -0,0 +1,25 @@
+"""
+Helper util for handling anthropic-specific cost calculation
+- e.g.: prompt caching
+"""
+
+from typing import Tuple
+
+from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
+from litellm.types.utils import Usage
+
+
+def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
+    """
+    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
+
+    Input:
+        - model: str, the model name without provider prefix
+        - usage: LiteLLM Usage block, containing anthropic caching information
+
+    Returns:
+        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+    """
+    return generic_cost_per_token(
+        model=model, usage=usage, custom_llm_provider="anthropic"
+    )
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/experimental_pass_through/messages/pycache/handler.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/experimental_pass_through/messages/pycache/handler.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/experimental_pass_through/messages/pycache/transformation.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/experimental_pass_through/messages/pycache/transformation.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@@ -0,0 +1,197 @@
+"""
+- call /messages on Anthropic API
+- Make streaming + non-streaming request - just pass it through direct to Anthropic. No need to do anything special here
+- Ensure requests are logged in the DB - stream + non-stream
+
+"""
+
+import json
+from typing import AsyncIterator, Dict, List, Optional, Union, cast
+
+import httpx
+
+import litellm
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.base_llm.anthropic_messages.transformation import (
+    BaseAnthropicMessagesConfig,
+)
+from litellm.llms.custom_httpx.http_handler import (
+    AsyncHTTPHandler,
+    get_async_httpx_client,
+)
+from litellm.types.llms.anthropic_messages.anthropic_response import (
+    AnthropicMessagesResponse,
+)
+from litellm.types.router import GenericLiteLLMParams
+from litellm.types.utils import ProviderSpecificHeader
+from litellm.utils import ProviderConfigManager, client
+
+
+class AnthropicMessagesHandler:
+    @staticmethod
+    async def _handle_anthropic_streaming(
+        response: httpx.Response,
+        request_body: dict,
+        litellm_logging_obj: LiteLLMLoggingObj,
+    ) -> AsyncIterator:
+        """Helper function to handle Anthropic streaming responses using the existing logging handlers"""
+        from datetime import datetime
+
+        from litellm.proxy.pass_through_endpoints.streaming_handler import (
+            PassThroughStreamingHandler,
+        )
+        from litellm.proxy.pass_through_endpoints.success_handler import (
+            PassThroughEndpointLogging,
+        )
+        from litellm.types.passthrough_endpoints.pass_through_endpoints import (
+            EndpointType,
+        )
+
+        # Create success handler object
+        passthrough_success_handler_obj = PassThroughEndpointLogging()
+
+        # Use the existing streaming handler for Anthropic
+        start_time = datetime.now()
+        return PassThroughStreamingHandler.chunk_processor(
+            response=response,
+            request_body=request_body,
+            litellm_logging_obj=litellm_logging_obj,
+            endpoint_type=EndpointType.ANTHROPIC,
+            start_time=start_time,
+            passthrough_success_handler_obj=passthrough_success_handler_obj,
+            url_route="/v1/messages",
+        )
+
+
+@client
+async def anthropic_messages(
+    max_tokens: int,
+    messages: List[Dict],
+    model: str,
+    metadata: Optional[Dict] = None,
+    stop_sequences: Optional[List[str]] = None,
+    stream: Optional[bool] = False,
+    system: Optional[str] = None,
+    temperature: Optional[float] = None,
+    thinking: Optional[Dict] = None,
+    tool_choice: Optional[Dict] = None,
+    tools: Optional[List[Dict]] = None,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+    api_key: Optional[str] = None,
+    api_base: Optional[str] = None,
+    client: Optional[AsyncHTTPHandler] = None,
+    custom_llm_provider: Optional[str] = None,
+    **kwargs,
+) -> Union[AnthropicMessagesResponse, AsyncIterator]:
+    """
+    Makes Anthropic `/v1/messages` API calls In the Anthropic API Spec
+    """
+    # Use provided client or create a new one
+    optional_params = GenericLiteLLMParams(**kwargs)
+    (
+        model,
+        _custom_llm_provider,
+        dynamic_api_key,
+        dynamic_api_base,
+    ) = litellm.get_llm_provider(
+        model=model,
+        custom_llm_provider=custom_llm_provider,
+        api_base=optional_params.api_base,
+        api_key=optional_params.api_key,
+    )
+    anthropic_messages_provider_config: Optional[BaseAnthropicMessagesConfig] = (
+        ProviderConfigManager.get_provider_anthropic_messages_config(
+            model=model,
+            provider=litellm.LlmProviders(_custom_llm_provider),
+        )
+    )
+    if anthropic_messages_provider_config is None:
+        raise ValueError(
+            f"Anthropic messages provider config not found for model: {model}"
+        )
+    if client is None or not isinstance(client, AsyncHTTPHandler):
+        async_httpx_client = get_async_httpx_client(
+            llm_provider=litellm.LlmProviders.ANTHROPIC
+        )
+    else:
+        async_httpx_client = client
+
+    litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj", None)
+
+    # Prepare headers
+    provider_specific_header = cast(
+        Optional[ProviderSpecificHeader], kwargs.get("provider_specific_header", None)
+    )
+    extra_headers = (
+        provider_specific_header.get("extra_headers", {})
+        if provider_specific_header
+        else {}
+    )
+    headers = anthropic_messages_provider_config.validate_environment(
+        headers=extra_headers or {},
+        model=model,
+        api_key=api_key,
+    )
+
+    litellm_logging_obj.update_environment_variables(
+        model=model,
+        optional_params=dict(optional_params),
+        litellm_params={
+            "metadata": kwargs.get("metadata", {}),
+            "preset_cache_key": None,
+            "stream_response": {},
+            **optional_params.model_dump(exclude_unset=True),
+        },
+        custom_llm_provider=_custom_llm_provider,
+    )
+    # Prepare request body
+    request_body = locals().copy()
+    request_body = {
+        k: v
+        for k, v in request_body.items()
+        if k
+        in anthropic_messages_provider_config.get_supported_anthropic_messages_params(
+            model=model
+        )
+        and v is not None
+    }
+    request_body["stream"] = stream
+    request_body["model"] = model
+    litellm_logging_obj.stream = stream
+    litellm_logging_obj.model_call_details.update(request_body)
+
+    # Make the request
+    request_url = anthropic_messages_provider_config.get_complete_url(
+        api_base=api_base, model=model
+    )
+
+    litellm_logging_obj.pre_call(
+        input=[{"role": "user", "content": json.dumps(request_body)}],
+        api_key="",
+        additional_args={
+            "complete_input_dict": request_body,
+            "api_base": str(request_url),
+            "headers": headers,
+        },
+    )
+
+    response = await async_httpx_client.post(
+        url=request_url,
+        headers=headers,
+        data=json.dumps(request_body),
+        stream=stream or False,
+    )
+    response.raise_for_status()
+
+    # used for logging + cost tracking
+    litellm_logging_obj.model_call_details["httpx_response"] = response
+
+    if stream:
+        return await AnthropicMessagesHandler._handle_anthropic_streaming(
+            response=response,
+            request_body=request_body,
+            litellm_logging_obj=litellm_logging_obj,
+        )
+    else:
+        return response.json()
--- a/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/anthropic/experimental_pass_through/messages/transformation.py
@@ -0,0 +1,47 @@
+from typing import Optional
+
+from litellm.llms.base_llm.anthropic_messages.transformation import (
+    BaseAnthropicMessagesConfig,
+)
+
+DEFAULT_ANTHROPIC_API_BASE = "https://api.anthropic.com"
+DEFAULT_ANTHROPIC_API_VERSION = "2023-06-01"
+
+
+class AnthropicMessagesConfig(BaseAnthropicMessagesConfig):
+    def get_supported_anthropic_messages_params(self, model: str) -> list:
+        return [
+            "messages",
+            "model",
+            "system",
+            "max_tokens",
+            "stop_sequences",
+            "temperature",
+            "top_p",
+            "top_k",
+            "tools",
+            "tool_choice",
+            "thinking",
+            # TODO: Add Anthropic `metadata` support
+            # "metadata",
+        ]
+
+    def get_complete_url(self, api_base: Optional[str], model: str) -> str:
+        api_base = api_base or DEFAULT_ANTHROPIC_API_BASE
+        if not api_base.endswith("/v1/messages"):
+            api_base = f"{api_base}/v1/messages"
+        return api_base
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        api_key: Optional[str] = None,
+    ) -> dict:
+        if "x-api-key" not in headers:
+            headers["x-api-key"] = api_key
+        if "anthropic-version" not in headers:
+            headers["anthropic-version"] = DEFAULT_ANTHROPIC_API_VERSION
+        if "content-type" not in headers:
+            headers["content-type"] = "application/json"
+        return headers
				`@@ -0,0 +1 @@`
				`from .handler import AnthropicChatCompletion, ModelResponseIterator`