structure saas with tools

This commit is contained in:
Davidson Gomes
2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions

View File

@@ -0,0 +1 @@
from .handler import AnthropicChatCompletion, ModelResponseIterator

View File

@@ -0,0 +1,846 @@
"""
Calling + translation logic for anthropic's `/v1/messages` endpoint
"""
import copy
import json
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
import httpx # type: ignore
import litellm
import litellm.litellm_core_utils
import litellm.types
import litellm.types.utils
from litellm import LlmProviders
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.llms.base_llm.chat.transformation import BaseConfig
from litellm.llms.custom_httpx.http_handler import (
AsyncHTTPHandler,
HTTPHandler,
get_async_httpx_client,
)
from litellm.types.llms.anthropic import (
ContentBlockDelta,
ContentBlockStart,
ContentBlockStop,
MessageBlockDelta,
MessageStartBlock,
UsageDelta,
)
from litellm.types.llms.openai import (
ChatCompletionRedactedThinkingBlock,
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
)
from litellm.types.utils import (
Delta,
GenericStreamingChunk,
ModelResponseStream,
StreamingChoices,
Usage,
)
from litellm.utils import CustomStreamWrapper, ModelResponse, ProviderConfigManager
from ...base import BaseLLM
from ..common_utils import AnthropicError, process_anthropic_headers
from .transformation import AnthropicConfig
async def make_call(
client: Optional[AsyncHTTPHandler],
api_base: str,
headers: dict,
data: str,
model: str,
messages: list,
logging_obj,
timeout: Optional[Union[float, httpx.Timeout]],
json_mode: bool,
) -> Tuple[Any, httpx.Headers]:
if client is None:
client = litellm.module_level_aclient
try:
response = await client.post(
api_base, headers=headers, data=data, stream=True, timeout=timeout
)
except httpx.HTTPStatusError as e:
error_headers = getattr(e, "headers", None)
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise AnthropicError(
status_code=e.response.status_code,
message=await e.response.aread(),
headers=error_headers,
)
except Exception as e:
for exception in litellm.LITELLM_EXCEPTION_TYPES:
if isinstance(e, exception):
raise e
raise AnthropicError(status_code=500, message=str(e))
completion_stream = ModelResponseIterator(
streaming_response=response.aiter_lines(),
sync_stream=False,
json_mode=json_mode,
)
# LOGGING
logging_obj.post_call(
input=messages,
api_key="",
original_response=completion_stream, # Pass the completion stream for logging
additional_args={"complete_input_dict": data},
)
return completion_stream, response.headers
def make_sync_call(
client: Optional[HTTPHandler],
api_base: str,
headers: dict,
data: str,
model: str,
messages: list,
logging_obj,
timeout: Optional[Union[float, httpx.Timeout]],
json_mode: bool,
) -> Tuple[Any, httpx.Headers]:
if client is None:
client = litellm.module_level_client # re-use a module level client
try:
response = client.post(
api_base, headers=headers, data=data, stream=True, timeout=timeout
)
except httpx.HTTPStatusError as e:
error_headers = getattr(e, "headers", None)
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
raise AnthropicError(
status_code=e.response.status_code,
message=e.response.read(),
headers=error_headers,
)
except Exception as e:
for exception in litellm.LITELLM_EXCEPTION_TYPES:
if isinstance(e, exception):
raise e
raise AnthropicError(status_code=500, message=str(e))
if response.status_code != 200:
response_headers = getattr(response, "headers", None)
raise AnthropicError(
status_code=response.status_code,
message=response.read(),
headers=response_headers,
)
completion_stream = ModelResponseIterator(
streaming_response=response.iter_lines(), sync_stream=True, json_mode=json_mode
)
# LOGGING
logging_obj.post_call(
input=messages,
api_key="",
original_response="first stream response received",
additional_args={"complete_input_dict": data},
)
return completion_stream, response.headers
class AnthropicChatCompletion(BaseLLM):
def __init__(self) -> None:
super().__init__()
async def acompletion_stream_function(
self,
model: str,
messages: list,
api_base: str,
custom_prompt_dict: dict,
model_response: ModelResponse,
print_verbose: Callable,
timeout: Union[float, httpx.Timeout],
client: Optional[AsyncHTTPHandler],
encoding,
api_key,
logging_obj,
stream,
_is_function_call,
data: dict,
json_mode: bool,
optional_params=None,
litellm_params=None,
logger_fn=None,
headers={},
):
data["stream"] = True
completion_stream, headers = await make_call(
client=client,
api_base=api_base,
headers=headers,
data=json.dumps(data),
model=model,
messages=messages,
logging_obj=logging_obj,
timeout=timeout,
json_mode=json_mode,
)
streamwrapper = CustomStreamWrapper(
completion_stream=completion_stream,
model=model,
custom_llm_provider="anthropic",
logging_obj=logging_obj,
_response_headers=process_anthropic_headers(headers),
)
return streamwrapper
async def acompletion_function(
self,
model: str,
messages: list,
api_base: str,
custom_prompt_dict: dict,
model_response: ModelResponse,
print_verbose: Callable,
timeout: Union[float, httpx.Timeout],
encoding,
api_key,
logging_obj,
stream,
_is_function_call,
data: dict,
optional_params: dict,
json_mode: bool,
litellm_params: dict,
provider_config: BaseConfig,
logger_fn=None,
headers={},
client: Optional[AsyncHTTPHandler] = None,
) -> Union[ModelResponse, CustomStreamWrapper]:
async_handler = client or get_async_httpx_client(
llm_provider=litellm.LlmProviders.ANTHROPIC
)
try:
response = await async_handler.post(
api_base, headers=headers, json=data, timeout=timeout
)
except Exception as e:
## LOGGING
logging_obj.post_call(
input=messages,
api_key=api_key,
original_response=str(e),
additional_args={"complete_input_dict": data},
)
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
if error_response and hasattr(error_response, "text"):
error_text = getattr(error_response, "text", error_text)
raise AnthropicError(
message=error_text,
status_code=status_code,
headers=error_headers,
)
return provider_config.transform_response(
model=model,
raw_response=response,
model_response=model_response,
logging_obj=logging_obj,
api_key=api_key,
request_data=data,
messages=messages,
optional_params=optional_params,
litellm_params=litellm_params,
encoding=encoding,
json_mode=json_mode,
)
def completion(
self,
model: str,
messages: list,
api_base: str,
custom_llm_provider: str,
custom_prompt_dict: dict,
model_response: ModelResponse,
print_verbose: Callable,
encoding,
api_key,
logging_obj,
optional_params: dict,
timeout: Union[float, httpx.Timeout],
litellm_params: dict,
acompletion=None,
logger_fn=None,
headers={},
client=None,
):
optional_params = copy.deepcopy(optional_params)
stream = optional_params.pop("stream", None)
json_mode: bool = optional_params.pop("json_mode", False)
is_vertex_request: bool = optional_params.pop("is_vertex_request", False)
_is_function_call = False
messages = copy.deepcopy(messages)
headers = AnthropicConfig().validate_environment(
api_key=api_key,
headers=headers,
model=model,
messages=messages,
optional_params={**optional_params, "is_vertex_request": is_vertex_request},
litellm_params=litellm_params,
)
config = ProviderConfigManager.get_provider_chat_config(
model=model,
provider=LlmProviders(custom_llm_provider),
)
if config is None:
raise ValueError(
f"Provider config not found for model: {model} and provider: {custom_llm_provider}"
)
data = config.transform_request(
model=model,
messages=messages,
optional_params=optional_params,
litellm_params=litellm_params,
headers=headers,
)
## LOGGING
logging_obj.pre_call(
input=messages,
api_key=api_key,
additional_args={
"complete_input_dict": data,
"api_base": api_base,
"headers": headers,
},
)
print_verbose(f"_is_function_call: {_is_function_call}")
if acompletion is True:
if (
stream is True
): # if function call - fake the streaming (need complete blocks for output parsing in openai format)
print_verbose("makes async anthropic streaming POST request")
data["stream"] = stream
return self.acompletion_stream_function(
model=model,
messages=messages,
data=data,
api_base=api_base,
custom_prompt_dict=custom_prompt_dict,
model_response=model_response,
print_verbose=print_verbose,
encoding=encoding,
api_key=api_key,
logging_obj=logging_obj,
optional_params=optional_params,
stream=stream,
_is_function_call=_is_function_call,
json_mode=json_mode,
litellm_params=litellm_params,
logger_fn=logger_fn,
headers=headers,
timeout=timeout,
client=(
client
if client is not None and isinstance(client, AsyncHTTPHandler)
else None
),
)
else:
return self.acompletion_function(
model=model,
messages=messages,
data=data,
api_base=api_base,
custom_prompt_dict=custom_prompt_dict,
model_response=model_response,
print_verbose=print_verbose,
encoding=encoding,
api_key=api_key,
provider_config=config,
logging_obj=logging_obj,
optional_params=optional_params,
stream=stream,
_is_function_call=_is_function_call,
litellm_params=litellm_params,
logger_fn=logger_fn,
headers=headers,
client=client,
json_mode=json_mode,
timeout=timeout,
)
else:
## COMPLETION CALL
if (
stream is True
): # if function call - fake the streaming (need complete blocks for output parsing in openai format)
data["stream"] = stream
completion_stream, headers = make_sync_call(
client=client,
api_base=api_base,
headers=headers, # type: ignore
data=json.dumps(data),
model=model,
messages=messages,
logging_obj=logging_obj,
timeout=timeout,
json_mode=json_mode,
)
return CustomStreamWrapper(
completion_stream=completion_stream,
model=model,
custom_llm_provider="anthropic",
logging_obj=logging_obj,
_response_headers=process_anthropic_headers(headers),
)
else:
if client is None or not isinstance(client, HTTPHandler):
client = HTTPHandler(timeout=timeout) # type: ignore
else:
client = client
try:
response = client.post(
api_base,
headers=headers,
data=json.dumps(data),
timeout=timeout,
)
except Exception as e:
status_code = getattr(e, "status_code", 500)
error_headers = getattr(e, "headers", None)
error_text = getattr(e, "text", str(e))
error_response = getattr(e, "response", None)
if error_headers is None and error_response:
error_headers = getattr(error_response, "headers", None)
if error_response and hasattr(error_response, "text"):
error_text = getattr(error_response, "text", error_text)
raise AnthropicError(
message=error_text,
status_code=status_code,
headers=error_headers,
)
return config.transform_response(
model=model,
raw_response=response,
model_response=model_response,
logging_obj=logging_obj,
api_key=api_key,
request_data=data,
messages=messages,
optional_params=optional_params,
litellm_params=litellm_params,
encoding=encoding,
json_mode=json_mode,
)
def embedding(self):
# logic for parsing in - calling - parsing out model embedding calls
pass
class ModelResponseIterator:
def __init__(
self, streaming_response, sync_stream: bool, json_mode: Optional[bool] = False
):
self.streaming_response = streaming_response
self.response_iterator = self.streaming_response
self.content_blocks: List[ContentBlockDelta] = []
self.tool_index = -1
self.json_mode = json_mode
def check_empty_tool_call_args(self) -> bool:
"""
Check if the tool call block so far has been an empty string
"""
args = ""
# if text content block -> skip
if len(self.content_blocks) == 0:
return False
if (
self.content_blocks[0]["delta"]["type"] == "text_delta"
or self.content_blocks[0]["delta"]["type"] == "thinking_delta"
):
return False
for block in self.content_blocks:
if block["delta"]["type"] == "input_json_delta":
args += block["delta"].get("partial_json", "") # type: ignore
if len(args) == 0:
return True
return False
def _handle_usage(self, anthropic_usage_chunk: Union[dict, UsageDelta]) -> Usage:
return AnthropicConfig().calculate_usage(
usage_object=cast(dict, anthropic_usage_chunk), reasoning_content=None
)
def _content_block_delta_helper(
self, chunk: dict
) -> Tuple[
str,
Optional[ChatCompletionToolCallChunk],
List[Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]],
Dict[str, Any],
]:
"""
Helper function to handle the content block delta
"""
text = ""
tool_use: Optional[ChatCompletionToolCallChunk] = None
provider_specific_fields = {}
content_block = ContentBlockDelta(**chunk) # type: ignore
thinking_blocks: List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
] = []
self.content_blocks.append(content_block)
if "text" in content_block["delta"]:
text = content_block["delta"]["text"]
elif "partial_json" in content_block["delta"]:
tool_use = {
"id": None,
"type": "function",
"function": {
"name": None,
"arguments": content_block["delta"]["partial_json"],
},
"index": self.tool_index,
}
elif "citation" in content_block["delta"]:
provider_specific_fields["citation"] = content_block["delta"]["citation"]
elif (
"thinking" in content_block["delta"]
or "signature" in content_block["delta"]
):
thinking_blocks = [
ChatCompletionThinkingBlock(
type="thinking",
thinking=content_block["delta"].get("thinking") or "",
signature=content_block["delta"].get("signature"),
)
]
provider_specific_fields["thinking_blocks"] = thinking_blocks
return text, tool_use, thinking_blocks, provider_specific_fields
def _handle_reasoning_content(
self,
thinking_blocks: List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
],
) -> Optional[str]:
"""
Handle the reasoning content
"""
reasoning_content = None
for block in thinking_blocks:
thinking_content = cast(Optional[str], block.get("thinking"))
if reasoning_content is None:
reasoning_content = ""
if thinking_content is not None:
reasoning_content += thinking_content
return reasoning_content
def chunk_parser(self, chunk: dict) -> ModelResponseStream:
try:
type_chunk = chunk.get("type", "") or ""
text = ""
tool_use: Optional[ChatCompletionToolCallChunk] = None
finish_reason = ""
usage: Optional[Usage] = None
provider_specific_fields: Dict[str, Any] = {}
reasoning_content: Optional[str] = None
thinking_blocks: Optional[
List[
Union[
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
]
]
] = None
index = int(chunk.get("index", 0))
if type_chunk == "content_block_delta":
"""
Anthropic content chunk
chunk = {'type': 'content_block_delta', 'index': 0, 'delta': {'type': 'text_delta', 'text': 'Hello'}}
"""
(
text,
tool_use,
thinking_blocks,
provider_specific_fields,
) = self._content_block_delta_helper(chunk=chunk)
if thinking_blocks:
reasoning_content = self._handle_reasoning_content(
thinking_blocks=thinking_blocks
)
elif type_chunk == "content_block_start":
"""
event: content_block_start
data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01T1x1fJ34qAmk2tNTrN7Up6","name":"get_weather","input":{}}}
"""
content_block_start = ContentBlockStart(**chunk) # type: ignore
self.content_blocks = [] # reset content blocks when new block starts
if content_block_start["content_block"]["type"] == "text":
text = content_block_start["content_block"]["text"]
elif content_block_start["content_block"]["type"] == "tool_use":
self.tool_index += 1
tool_use = {
"id": content_block_start["content_block"]["id"],
"type": "function",
"function": {
"name": content_block_start["content_block"]["name"],
"arguments": "",
},
"index": self.tool_index,
}
elif (
content_block_start["content_block"]["type"] == "redacted_thinking"
):
thinking_blocks = [
ChatCompletionRedactedThinkingBlock(
type="redacted_thinking",
data=content_block_start["content_block"]["data"],
)
]
elif type_chunk == "content_block_stop":
ContentBlockStop(**chunk) # type: ignore
# check if tool call content block
is_empty = self.check_empty_tool_call_args()
if is_empty:
tool_use = {
"id": None,
"type": "function",
"function": {
"name": None,
"arguments": "{}",
},
"index": self.tool_index,
}
elif type_chunk == "message_delta":
"""
Anthropic
chunk = {'type': 'message_delta', 'delta': {'stop_reason': 'max_tokens', 'stop_sequence': None}, 'usage': {'output_tokens': 10}}
"""
# TODO - get usage from this chunk, set in response
message_delta = MessageBlockDelta(**chunk) # type: ignore
finish_reason = map_finish_reason(
finish_reason=message_delta["delta"].get("stop_reason", "stop")
or "stop"
)
usage = self._handle_usage(anthropic_usage_chunk=message_delta["usage"])
elif type_chunk == "message_start":
"""
Anthropic
chunk = {
"type": "message_start",
"message": {
"id": "msg_vrtx_011PqREFEMzd3REdCoUFAmdG",
"type": "message",
"role": "assistant",
"model": "claude-3-sonnet-20240229",
"content": [],
"stop_reason": null,
"stop_sequence": null,
"usage": {
"input_tokens": 270,
"output_tokens": 1
}
}
}
"""
message_start_block = MessageStartBlock(**chunk) # type: ignore
if "usage" in message_start_block["message"]:
usage = self._handle_usage(
anthropic_usage_chunk=message_start_block["message"]["usage"]
)
elif type_chunk == "error":
"""
{"type":"error","error":{"details":null,"type":"api_error","message":"Internal server error"} }
"""
_error_dict = chunk.get("error", {}) or {}
message = _error_dict.get("message", None) or str(chunk)
raise AnthropicError(
message=message,
status_code=500, # it looks like Anthropic API does not return a status code in the chunk error - default to 500
)
text, tool_use = self._handle_json_mode_chunk(text=text, tool_use=tool_use)
returned_chunk = ModelResponseStream(
choices=[
StreamingChoices(
index=index,
delta=Delta(
content=text,
tool_calls=[tool_use] if tool_use is not None else None,
provider_specific_fields=(
provider_specific_fields
if provider_specific_fields
else None
),
thinking_blocks=(
thinking_blocks if thinking_blocks else None
),
reasoning_content=reasoning_content,
),
finish_reason=finish_reason,
)
],
usage=usage,
)
return returned_chunk
except json.JSONDecodeError:
raise ValueError(f"Failed to decode JSON from chunk: {chunk}")
def _handle_json_mode_chunk(
self, text: str, tool_use: Optional[ChatCompletionToolCallChunk]
) -> Tuple[str, Optional[ChatCompletionToolCallChunk]]:
"""
If JSON mode is enabled, convert the tool call to a message.
Anthropic returns the JSON schema as part of the tool call
OpenAI returns the JSON schema as part of the content, this handles placing it in the content
Args:
text: str
tool_use: Optional[ChatCompletionToolCallChunk]
Returns:
Tuple[str, Optional[ChatCompletionToolCallChunk]]
text: The text to use in the content
tool_use: The ChatCompletionToolCallChunk to use in the chunk response
"""
if self.json_mode is True and tool_use is not None:
message = AnthropicConfig._convert_tool_response_to_message(
tool_calls=[tool_use]
)
if message is not None:
text = message.content or ""
tool_use = None
return text, tool_use
# Sync iterator
def __iter__(self):
return self
def __next__(self):
try:
chunk = self.response_iterator.__next__()
except StopIteration:
raise StopIteration
except ValueError as e:
raise RuntimeError(f"Error receiving chunk from stream: {e}")
try:
str_line = chunk
if isinstance(chunk, bytes): # Handle binary data
str_line = chunk.decode("utf-8") # Convert bytes to string
index = str_line.find("data:")
if index != -1:
str_line = str_line[index:]
if str_line.startswith("data:"):
data_json = json.loads(str_line[5:])
return self.chunk_parser(chunk=data_json)
else:
return GenericStreamingChunk(
text="",
is_finished=False,
finish_reason="",
usage=None,
index=0,
tool_use=None,
)
except StopIteration:
raise StopIteration
except ValueError as e:
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
# Async iterator
def __aiter__(self):
self.async_response_iterator = self.streaming_response.__aiter__()
return self
async def __anext__(self):
try:
chunk = await self.async_response_iterator.__anext__()
except StopAsyncIteration:
raise StopAsyncIteration
except ValueError as e:
raise RuntimeError(f"Error receiving chunk from stream: {e}")
try:
str_line = chunk
if isinstance(chunk, bytes): # Handle binary data
str_line = chunk.decode("utf-8") # Convert bytes to string
index = str_line.find("data:")
if index != -1:
str_line = str_line[index:]
if str_line.startswith("data:"):
data_json = json.loads(str_line[5:])
return self.chunk_parser(chunk=data_json)
else:
return GenericStreamingChunk(
text="",
is_finished=False,
finish_reason="",
usage=None,
index=0,
tool_use=None,
)
except StopAsyncIteration:
raise StopAsyncIteration
except ValueError as e:
raise RuntimeError(f"Error parsing chunk: {e},\nReceived chunk: {chunk}")
def convert_str_chunk_to_generic_chunk(self, chunk: str) -> ModelResponseStream:
"""
Convert a string chunk to a GenericStreamingChunk
Note: This is used for Anthropic pass through streaming logging
We can move __anext__, and __next__ to use this function since it's common logic.
Did not migrate them to minmize changes made in 1 PR.
"""
str_line = chunk
if isinstance(chunk, bytes): # Handle binary data
str_line = chunk.decode("utf-8") # Convert bytes to string
index = str_line.find("data:")
if index != -1:
str_line = str_line[index:]
if str_line.startswith("data:"):
data_json = json.loads(str_line[5:])
return self.chunk_parser(chunk=data_json)
else:
return ModelResponseStream()

View File

@@ -0,0 +1,823 @@
import json
import time
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union, cast
import httpx
import litellm
from litellm.constants import (
DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS,
DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
RESPONSE_FORMAT_TOOL_NAME,
)
from litellm.litellm_core_utils.core_helpers import map_finish_reason
from litellm.litellm_core_utils.prompt_templates.factory import anthropic_messages_pt
from litellm.llms.base_llm.base_utils import type_to_response_format_param
from litellm.llms.base_llm.chat.transformation import BaseConfig, BaseLLMException
from litellm.types.llms.anthropic import (
AllAnthropicToolsValues,
AnthropicComputerTool,
AnthropicHostedTools,
AnthropicInputSchema,
AnthropicMessagesTool,
AnthropicMessagesToolChoice,
AnthropicSystemMessageContent,
AnthropicThinkingParam,
)
from litellm.types.llms.openai import (
REASONING_EFFORT,
AllMessageValues,
ChatCompletionCachedContent,
ChatCompletionRedactedThinkingBlock,
ChatCompletionSystemMessage,
ChatCompletionThinkingBlock,
ChatCompletionToolCallChunk,
ChatCompletionToolCallFunctionChunk,
ChatCompletionToolParam,
)
from litellm.types.utils import CompletionTokensDetailsWrapper
from litellm.types.utils import Message as LitellmMessage
from litellm.types.utils import PromptTokensDetailsWrapper
from litellm.utils import (
ModelResponse,
Usage,
add_dummy_tool,
has_tool_call_blocks,
token_counter,
)
from ..common_utils import AnthropicError, AnthropicModelInfo, process_anthropic_headers
if TYPE_CHECKING:
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
LoggingClass = LiteLLMLoggingObj
else:
LoggingClass = Any
class AnthropicConfig(AnthropicModelInfo, BaseConfig):
"""
Reference: https://docs.anthropic.com/claude/reference/messages_post
to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
"""
max_tokens: Optional[
int
] = DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS # anthropic requires a default value (Opus, Sonnet, and Haiku have the same default)
stop_sequences: Optional[list] = None
temperature: Optional[int] = None
top_p: Optional[int] = None
top_k: Optional[int] = None
metadata: Optional[dict] = None
system: Optional[str] = None
def __init__(
self,
max_tokens: Optional[
int
] = DEFAULT_ANTHROPIC_CHAT_MAX_TOKENS, # You can pass in a value yourself or use the default value 4096
stop_sequences: Optional[list] = None,
temperature: Optional[int] = None,
top_p: Optional[int] = None,
top_k: Optional[int] = None,
metadata: Optional[dict] = None,
system: Optional[str] = None,
) -> None:
locals_ = locals().copy()
for key, value in locals_.items():
if key != "self" and value is not None:
setattr(self.__class__, key, value)
@classmethod
def get_config(cls):
return super().get_config()
def get_supported_openai_params(self, model: str):
params = [
"stream",
"stop",
"temperature",
"top_p",
"max_tokens",
"max_completion_tokens",
"tools",
"tool_choice",
"extra_headers",
"parallel_tool_calls",
"response_format",
"user",
"reasoning_effort",
]
if "claude-3-7-sonnet" in model:
params.append("thinking")
return params
def get_json_schema_from_pydantic_object(
self, response_format: Union[Any, Dict, None]
) -> Optional[dict]:
return type_to_response_format_param(
response_format, ref_template="/$defs/{model}"
) # Relevant issue: https://github.com/BerriAI/litellm/issues/7755
def get_cache_control_headers(self) -> dict:
return {
"anthropic-version": "2023-06-01",
"anthropic-beta": "prompt-caching-2024-07-31",
}
def _map_tool_choice(
self, tool_choice: Optional[str], parallel_tool_use: Optional[bool]
) -> Optional[AnthropicMessagesToolChoice]:
_tool_choice: Optional[AnthropicMessagesToolChoice] = None
if tool_choice == "auto":
_tool_choice = AnthropicMessagesToolChoice(
type="auto",
)
elif tool_choice == "required":
_tool_choice = AnthropicMessagesToolChoice(type="any")
elif isinstance(tool_choice, dict):
_tool_name = tool_choice.get("function", {}).get("name")
_tool_choice = AnthropicMessagesToolChoice(type="tool")
if _tool_name is not None:
_tool_choice["name"] = _tool_name
if parallel_tool_use is not None:
# Anthropic uses 'disable_parallel_tool_use' flag to determine if parallel tool use is allowed
# this is the inverse of the openai flag.
if _tool_choice is not None:
_tool_choice["disable_parallel_tool_use"] = not parallel_tool_use
else: # use anthropic defaults and make sure to send the disable_parallel_tool_use flag
_tool_choice = AnthropicMessagesToolChoice(
type="auto",
disable_parallel_tool_use=not parallel_tool_use,
)
return _tool_choice
def _map_tool_helper(
self, tool: ChatCompletionToolParam
) -> AllAnthropicToolsValues:
returned_tool: Optional[AllAnthropicToolsValues] = None
if tool["type"] == "function" or tool["type"] == "custom":
_input_schema: dict = tool["function"].get(
"parameters",
{
"type": "object",
"properties": {},
},
)
input_schema: AnthropicInputSchema = AnthropicInputSchema(**_input_schema)
_tool = AnthropicMessagesTool(
name=tool["function"]["name"],
input_schema=input_schema,
)
_description = tool["function"].get("description")
if _description is not None:
_tool["description"] = _description
returned_tool = _tool
elif tool["type"].startswith("computer_"):
## check if all required 'display_' params are given
if "parameters" not in tool["function"]:
raise ValueError("Missing required parameter: parameters")
_display_width_px: Optional[int] = tool["function"]["parameters"].get(
"display_width_px"
)
_display_height_px: Optional[int] = tool["function"]["parameters"].get(
"display_height_px"
)
if _display_width_px is None or _display_height_px is None:
raise ValueError(
"Missing required parameter: display_width_px or display_height_px"
)
_computer_tool = AnthropicComputerTool(
type=tool["type"],
name=tool["function"].get("name", "computer"),
display_width_px=_display_width_px,
display_height_px=_display_height_px,
)
_display_number = tool["function"]["parameters"].get("display_number")
if _display_number is not None:
_computer_tool["display_number"] = _display_number
returned_tool = _computer_tool
elif tool["type"].startswith("bash_") or tool["type"].startswith(
"text_editor_"
):
function_name = tool["function"].get("name")
if function_name is None:
raise ValueError("Missing required parameter: name")
returned_tool = AnthropicHostedTools(
type=tool["type"],
name=function_name,
)
if returned_tool is None:
raise ValueError(f"Unsupported tool type: {tool['type']}")
## check if cache_control is set in the tool
_cache_control = tool.get("cache_control", None)
_cache_control_function = tool.get("function", {}).get("cache_control", None)
if _cache_control is not None:
returned_tool["cache_control"] = _cache_control
elif _cache_control_function is not None and isinstance(
_cache_control_function, dict
):
returned_tool["cache_control"] = ChatCompletionCachedContent(
**_cache_control_function # type: ignore
)
return returned_tool
def _map_tools(self, tools: List) -> List[AllAnthropicToolsValues]:
anthropic_tools = []
for tool in tools:
if "input_schema" in tool: # assume in anthropic format
anthropic_tools.append(tool)
else: # assume openai tool call
new_tool = self._map_tool_helper(tool)
anthropic_tools.append(new_tool)
return anthropic_tools
def _map_stop_sequences(
self, stop: Optional[Union[str, List[str]]]
) -> Optional[List[str]]:
new_stop: Optional[List[str]] = None
if isinstance(stop, str):
if (
stop.isspace() and litellm.drop_params is True
): # anthropic doesn't allow whitespace characters as stop-sequences
return new_stop
new_stop = [stop]
elif isinstance(stop, list):
new_v = []
for v in stop:
if (
v.isspace() and litellm.drop_params is True
): # anthropic doesn't allow whitespace characters as stop-sequences
continue
new_v.append(v)
if len(new_v) > 0:
new_stop = new_v
return new_stop
@staticmethod
def _map_reasoning_effort(
reasoning_effort: Optional[Union[REASONING_EFFORT, str]]
) -> Optional[AnthropicThinkingParam]:
if reasoning_effort is None:
return None
elif reasoning_effort == "low":
return AnthropicThinkingParam(
type="enabled",
budget_tokens=DEFAULT_REASONING_EFFORT_LOW_THINKING_BUDGET,
)
elif reasoning_effort == "medium":
return AnthropicThinkingParam(
type="enabled",
budget_tokens=DEFAULT_REASONING_EFFORT_MEDIUM_THINKING_BUDGET,
)
elif reasoning_effort == "high":
return AnthropicThinkingParam(
type="enabled",
budget_tokens=DEFAULT_REASONING_EFFORT_HIGH_THINKING_BUDGET,
)
else:
raise ValueError(f"Unmapped reasoning effort: {reasoning_effort}")
def map_response_format_to_anthropic_tool(
self, value: Optional[dict], optional_params: dict, is_thinking_enabled: bool
) -> Optional[AnthropicMessagesTool]:
ignore_response_format_types = ["text"]
if (
value is None or value["type"] in ignore_response_format_types
): # value is a no-op
return None
json_schema: Optional[dict] = None
if "response_schema" in value:
json_schema = value["response_schema"]
elif "json_schema" in value:
json_schema = value["json_schema"]["schema"]
"""
When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode
- You usually want to provide a single tool
- You should set tool_choice (see Forcing tool use) to instruct the model to explicitly use that tool
- Remember that the model will pass the input to the tool, so the name of the tool and description should be from the models perspective.
"""
_tool = self._create_json_tool_call_for_response_format(
json_schema=json_schema,
)
return _tool
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
is_thinking_enabled = self.is_thinking_enabled(
non_default_params=non_default_params
)
for param, value in non_default_params.items():
if param == "max_tokens":
optional_params["max_tokens"] = value
if param == "max_completion_tokens":
optional_params["max_tokens"] = value
if param == "tools":
# check if optional params already has tools
tool_value = self._map_tools(value)
optional_params = self._add_tools_to_optional_params(
optional_params=optional_params, tools=tool_value
)
if param == "tool_choice" or param == "parallel_tool_calls":
_tool_choice: Optional[
AnthropicMessagesToolChoice
] = self._map_tool_choice(
tool_choice=non_default_params.get("tool_choice"),
parallel_tool_use=non_default_params.get("parallel_tool_calls"),
)
if _tool_choice is not None:
optional_params["tool_choice"] = _tool_choice
if param == "stream" and value is True:
optional_params["stream"] = value
if param == "stop" and (isinstance(value, str) or isinstance(value, list)):
_value = self._map_stop_sequences(value)
if _value is not None:
optional_params["stop_sequences"] = _value
if param == "temperature":
optional_params["temperature"] = value
if param == "top_p":
optional_params["top_p"] = value
if param == "response_format" and isinstance(value, dict):
_tool = self.map_response_format_to_anthropic_tool(
value, optional_params, is_thinking_enabled
)
if _tool is None:
continue
if not is_thinking_enabled:
_tool_choice = {"name": RESPONSE_FORMAT_TOOL_NAME, "type": "tool"}
optional_params["tool_choice"] = _tool_choice
optional_params["json_mode"] = True
optional_params = self._add_tools_to_optional_params(
optional_params=optional_params, tools=[_tool]
)
if param == "user":
optional_params["metadata"] = {"user_id": value}
if param == "thinking":
optional_params["thinking"] = value
elif param == "reasoning_effort" and isinstance(value, str):
optional_params["thinking"] = AnthropicConfig._map_reasoning_effort(
value
)
## handle thinking tokens
self.update_optional_params_with_thinking_tokens(
non_default_params=non_default_params, optional_params=optional_params
)
return optional_params
def _create_json_tool_call_for_response_format(
self,
json_schema: Optional[dict] = None,
) -> AnthropicMessagesTool:
"""
Handles creating a tool call for getting responses in JSON format.
Args:
json_schema (Optional[dict]): The JSON schema the response should be in
Returns:
AnthropicMessagesTool: The tool call to send to Anthropic API to get responses in JSON format
"""
_input_schema: AnthropicInputSchema = AnthropicInputSchema(
type="object",
)
if json_schema is None:
# Anthropic raises a 400 BadRequest error if properties is passed as None
# see usage with additionalProperties (Example 5) https://github.com/anthropics/anthropic-cookbook/blob/main/tool_use/extracting_structured_json.ipynb
_input_schema["additionalProperties"] = True
_input_schema["properties"] = {}
else:
_input_schema.update(cast(AnthropicInputSchema, json_schema))
_tool = AnthropicMessagesTool(
name=RESPONSE_FORMAT_TOOL_NAME, input_schema=_input_schema
)
return _tool
def translate_system_message(
self, messages: List[AllMessageValues]
) -> List[AnthropicSystemMessageContent]:
"""
Translate system message to anthropic format.
Removes system message from the original list and returns a new list of anthropic system message content.
"""
system_prompt_indices = []
anthropic_system_message_list: List[AnthropicSystemMessageContent] = []
for idx, message in enumerate(messages):
if message["role"] == "system":
valid_content: bool = False
system_message_block = ChatCompletionSystemMessage(**message)
if isinstance(system_message_block["content"], str):
anthropic_system_message_content = AnthropicSystemMessageContent(
type="text",
text=system_message_block["content"],
)
if "cache_control" in system_message_block:
anthropic_system_message_content[
"cache_control"
] = system_message_block["cache_control"]
anthropic_system_message_list.append(
anthropic_system_message_content
)
valid_content = True
elif isinstance(message["content"], list):
for _content in message["content"]:
anthropic_system_message_content = (
AnthropicSystemMessageContent(
type=_content.get("type"),
text=_content.get("text"),
)
)
if "cache_control" in _content:
anthropic_system_message_content[
"cache_control"
] = _content["cache_control"]
anthropic_system_message_list.append(
anthropic_system_message_content
)
valid_content = True
if valid_content:
system_prompt_indices.append(idx)
if len(system_prompt_indices) > 0:
for idx in reversed(system_prompt_indices):
messages.pop(idx)
return anthropic_system_message_list
def transform_request(
self,
model: str,
messages: List[AllMessageValues],
optional_params: dict,
litellm_params: dict,
headers: dict,
) -> dict:
"""
Translate messages to anthropic format.
"""
## VALIDATE REQUEST
"""
Anthropic doesn't support tool calling without `tools=` param specified.
"""
if (
"tools" not in optional_params
and messages is not None
and has_tool_call_blocks(messages)
):
if litellm.modify_params:
optional_params["tools"] = self._map_tools(
add_dummy_tool(custom_llm_provider="anthropic")
)
else:
raise litellm.UnsupportedParamsError(
message="Anthropic doesn't support tool calling without `tools=` param specified. Pass `tools=` param OR set `litellm.modify_params = True` // `litellm_settings::modify_params: True` to add dummy tool to the request.",
model="",
llm_provider="anthropic",
)
# Separate system prompt from rest of message
anthropic_system_message_list = self.translate_system_message(messages=messages)
# Handling anthropic API Prompt Caching
if len(anthropic_system_message_list) > 0:
optional_params["system"] = anthropic_system_message_list
# Format rest of message according to anthropic guidelines
try:
anthropic_messages = anthropic_messages_pt(
model=model,
messages=messages,
llm_provider="anthropic",
)
except Exception as e:
raise AnthropicError(
status_code=400,
message="{}\nReceived Messages={}".format(str(e), messages),
) # don't use verbose_logger.exception, if exception is raised
## Load Config
config = litellm.AnthropicConfig.get_config()
for k, v in config.items():
if (
k not in optional_params
): # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
optional_params[k] = v
## Handle user_id in metadata
_litellm_metadata = litellm_params.get("metadata", None)
if (
_litellm_metadata
and isinstance(_litellm_metadata, dict)
and "user_id" in _litellm_metadata
):
optional_params["metadata"] = {"user_id": _litellm_metadata["user_id"]}
data = {
"model": model,
"messages": anthropic_messages,
**optional_params,
}
return data
def _transform_response_for_json_mode(
self,
json_mode: Optional[bool],
tool_calls: List[ChatCompletionToolCallChunk],
) -> Optional[LitellmMessage]:
_message: Optional[LitellmMessage] = None
if json_mode is True and len(tool_calls) == 1:
# check if tool name is the default tool name
json_mode_content_str: Optional[str] = None
if (
"name" in tool_calls[0]["function"]
and tool_calls[0]["function"]["name"] == RESPONSE_FORMAT_TOOL_NAME
):
json_mode_content_str = tool_calls[0]["function"].get("arguments")
if json_mode_content_str is not None:
_message = AnthropicConfig._convert_tool_response_to_message(
tool_calls=tool_calls,
)
return _message
def extract_response_content(
self, completion_response: dict
) -> Tuple[
str,
Optional[List[Any]],
Optional[
List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
]
],
Optional[str],
List[ChatCompletionToolCallChunk],
]:
text_content = ""
citations: Optional[List[Any]] = None
thinking_blocks: Optional[
List[
Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
]
] = None
reasoning_content: Optional[str] = None
tool_calls: List[ChatCompletionToolCallChunk] = []
for idx, content in enumerate(completion_response["content"]):
if content["type"] == "text":
text_content += content["text"]
## TOOL CALLING
elif content["type"] == "tool_use":
tool_calls.append(
ChatCompletionToolCallChunk(
id=content["id"],
type="function",
function=ChatCompletionToolCallFunctionChunk(
name=content["name"],
arguments=json.dumps(content["input"]),
),
index=idx,
)
)
elif content.get("thinking", None) is not None:
if thinking_blocks is None:
thinking_blocks = []
thinking_blocks.append(cast(ChatCompletionThinkingBlock, content))
elif content["type"] == "redacted_thinking":
if thinking_blocks is None:
thinking_blocks = []
thinking_blocks.append(
cast(ChatCompletionRedactedThinkingBlock, content)
)
## CITATIONS
if content.get("citations") is not None:
if citations is None:
citations = []
citations.append(content["citations"])
if thinking_blocks is not None:
reasoning_content = ""
for block in thinking_blocks:
thinking_content = cast(Optional[str], block.get("thinking"))
if thinking_content is not None:
reasoning_content += thinking_content
return text_content, citations, thinking_blocks, reasoning_content, tool_calls
def calculate_usage(
self, usage_object: dict, reasoning_content: Optional[str]
) -> Usage:
prompt_tokens = usage_object.get("input_tokens", 0)
completion_tokens = usage_object.get("output_tokens", 0)
_usage = usage_object
cache_creation_input_tokens: int = 0
cache_read_input_tokens: int = 0
if "cache_creation_input_tokens" in _usage:
cache_creation_input_tokens = _usage["cache_creation_input_tokens"]
if "cache_read_input_tokens" in _usage:
cache_read_input_tokens = _usage["cache_read_input_tokens"]
prompt_tokens += cache_read_input_tokens
prompt_tokens_details = PromptTokensDetailsWrapper(
cached_tokens=cache_read_input_tokens
)
completion_token_details = (
CompletionTokensDetailsWrapper(
reasoning_tokens=token_counter(
text=reasoning_content, count_response_tokens=True
)
)
if reasoning_content
else None
)
total_tokens = prompt_tokens + completion_tokens
usage = Usage(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
prompt_tokens_details=prompt_tokens_details,
cache_creation_input_tokens=cache_creation_input_tokens,
cache_read_input_tokens=cache_read_input_tokens,
completion_tokens_details=completion_token_details,
)
return usage
def transform_response(
self,
model: str,
raw_response: httpx.Response,
model_response: ModelResponse,
logging_obj: LoggingClass,
request_data: Dict,
messages: List[AllMessageValues],
optional_params: Dict,
litellm_params: dict,
encoding: Any,
api_key: Optional[str] = None,
json_mode: Optional[bool] = None,
) -> ModelResponse:
_hidden_params: Dict = {}
_hidden_params["additional_headers"] = process_anthropic_headers(
dict(raw_response.headers)
)
## LOGGING
logging_obj.post_call(
input=messages,
api_key=api_key,
original_response=raw_response.text,
additional_args={"complete_input_dict": request_data},
)
## RESPONSE OBJECT
try:
completion_response = raw_response.json()
except Exception as e:
response_headers = getattr(raw_response, "headers", None)
raise AnthropicError(
message="Unable to get json response - {}, Original Response: {}".format(
str(e), raw_response.text
),
status_code=raw_response.status_code,
headers=response_headers,
)
if "error" in completion_response:
response_headers = getattr(raw_response, "headers", None)
raise AnthropicError(
message=str(completion_response["error"]),
status_code=raw_response.status_code,
headers=response_headers,
)
else:
text_content = ""
citations: Optional[List[Any]] = None
thinking_blocks: Optional[
List[
Union[
ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock
]
]
] = None
reasoning_content: Optional[str] = None
tool_calls: List[ChatCompletionToolCallChunk] = []
(
text_content,
citations,
thinking_blocks,
reasoning_content,
tool_calls,
) = self.extract_response_content(completion_response=completion_response)
_message = litellm.Message(
tool_calls=tool_calls,
content=text_content or None,
provider_specific_fields={
"citations": citations,
"thinking_blocks": thinking_blocks,
},
thinking_blocks=thinking_blocks,
reasoning_content=reasoning_content,
)
## HANDLE JSON MODE - anthropic returns single function call
json_mode_message = self._transform_response_for_json_mode(
json_mode=json_mode,
tool_calls=tool_calls,
)
if json_mode_message is not None:
completion_response["stop_reason"] = "stop"
_message = json_mode_message
model_response.choices[0].message = _message # type: ignore
model_response._hidden_params["original_response"] = completion_response[
"content"
] # allow user to access raw anthropic tool calling response
model_response.choices[0].finish_reason = map_finish_reason(
completion_response["stop_reason"]
)
## CALCULATING USAGE
usage = self.calculate_usage(
usage_object=completion_response["usage"],
reasoning_content=reasoning_content,
)
setattr(model_response, "usage", usage) # type: ignore
model_response.created = int(time.time())
model_response.model = completion_response["model"]
model_response._hidden_params = _hidden_params
return model_response
@staticmethod
def _convert_tool_response_to_message(
tool_calls: List[ChatCompletionToolCallChunk],
) -> Optional[LitellmMessage]:
"""
In JSON mode, Anthropic API returns JSON schema as a tool call, we need to convert it to a message to follow the OpenAI format
"""
## HANDLE JSON MODE - anthropic returns single function call
json_mode_content_str: Optional[str] = tool_calls[0]["function"].get(
"arguments"
)
try:
if json_mode_content_str is not None:
args = json.loads(json_mode_content_str)
if (
isinstance(args, dict)
and (values := args.get("values")) is not None
):
_message = litellm.Message(content=json.dumps(values))
return _message
else:
# a lot of the times the `values` key is not present in the tool response
# relevant issue: https://github.com/BerriAI/litellm/issues/6741
_message = litellm.Message(content=json.dumps(args))
return _message
except json.JSONDecodeError:
# json decode error does occur, return the original tool response str
return litellm.Message(content=json_mode_content_str)
return None
def get_error_class(
self, error_message: str, status_code: int, headers: Union[Dict, httpx.Headers]
) -> BaseLLMException:
return AnthropicError(
status_code=status_code,
message=error_message,
headers=cast(httpx.Headers, headers),
)