structure saas with tools

This commit is contained in:
Davidson Gomes
2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
"""
Support for OpenAI's `/v1/chat/completions` endpoint.
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
Docs: https://docs.together.ai/reference/completions-1
"""
from typing import Optional
from litellm import get_model_info, verbose_logger
from ..openai.chat.gpt_transformation import OpenAIGPTConfig
class TogetherAIConfig(OpenAIGPTConfig):
def get_supported_openai_params(self, model: str) -> list:
"""
Only some together models support response_format / tool calling
Docs: https://docs.together.ai/docs/json-mode
"""
supports_function_calling: Optional[bool] = None
try:
model_info = get_model_info(model, custom_llm_provider="together_ai")
supports_function_calling = model_info.get(
"supports_function_calling", False
)
except Exception as e:
verbose_logger.debug(f"Error getting supported openai params: {e}")
pass
optional_params = super().get_supported_openai_params(model)
if supports_function_calling is not True:
verbose_logger.debug(
"Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
)
optional_params.remove("tools")
optional_params.remove("tool_choice")
optional_params.remove("function_call")
optional_params.remove("response_format")
return optional_params
def map_openai_params(
self,
non_default_params: dict,
optional_params: dict,
model: str,
drop_params: bool,
) -> dict:
mapped_openai_params = super().map_openai_params(
non_default_params, optional_params, model, drop_params
)
if "response_format" in mapped_openai_params and mapped_openai_params[
"response_format"
] == {"type": "text"}:
mapped_openai_params.pop("response_format")
return mapped_openai_params

View File

@@ -0,0 +1,3 @@
"""
Uses openai's `/completion` handler.py
"""

View File

@@ -0,0 +1,61 @@
"""
Translates calls from OpenAI's `/v1/completions` endpoint to TogetherAI's `/v1/completions` endpoint.
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
Docs: https://docs.together.ai/reference/completions-1
"""
from typing import List, Union, cast
from litellm.llms.openai.completion.utils import is_tokens_or_list_of_tokens
from litellm.types.llms.openai import (
AllMessageValues,
AllPromptValues,
OpenAITextCompletionUserMessage,
)
from ...openai.completion.transformation import OpenAITextCompletionConfig
from ...openai.completion.utils import _transform_prompt
class TogetherAITextCompletionConfig(OpenAITextCompletionConfig):
def _transform_prompt(
self,
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
) -> AllPromptValues:
"""
TogetherAI expects a string prompt.
"""
initial_prompt: AllPromptValues = _transform_prompt(messages)
## TOGETHER AI SPECIFIC VALIDATION ##
if isinstance(initial_prompt, list) and is_tokens_or_list_of_tokens(
value=initial_prompt
):
raise ValueError("TogetherAI does not support integers as input")
if (
isinstance(initial_prompt, list)
and len(initial_prompt) == 1
and isinstance(initial_prompt[0], str)
):
together_prompt = initial_prompt[0]
elif isinstance(initial_prompt, list):
raise ValueError("TogetherAI does not support multiple prompts.")
else:
together_prompt = cast(str, initial_prompt)
return together_prompt
def transform_text_completion_request(
self,
model: str,
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
optional_params: dict,
headers: dict,
) -> dict:
prompt = self._transform_prompt(messages)
return {
"model": model,
"prompt": prompt,
**optional_params,
}

View File

@@ -0,0 +1,89 @@
"""
Handles calculating cost for together ai models
"""
import re
from litellm.constants import (
TOGETHER_AI_4_B,
TOGETHER_AI_8_B,
TOGETHER_AI_21_B,
TOGETHER_AI_41_B,
TOGETHER_AI_80_B,
TOGETHER_AI_110_B,
TOGETHER_AI_EMBEDDING_150_M,
TOGETHER_AI_EMBEDDING_350_M,
)
from litellm.types.utils import CallTypes
# Extract the number of billion parameters from the model name
# only used for together_computer LLMs
def get_model_params_and_category(model_name, call_type: CallTypes) -> str:
"""
Helper function for calculating together ai pricing.
Returns
- str - model pricing category if mapped else received model name
"""
if call_type == CallTypes.embedding or call_type == CallTypes.aembedding:
return get_model_params_and_category_embeddings(model_name=model_name)
model_name = model_name.lower()
re_params_match = re.search(
r"(\d+b)", model_name
) # catch all decimals like 3b, 70b, etc
category = None
if re_params_match is not None:
params_match = str(re_params_match.group(1))
params_match = params_match.replace("b", "")
if params_match is not None:
params_billion = float(params_match)
else:
return model_name
# Determine the category based on the number of parameters
if params_billion <= TOGETHER_AI_4_B:
category = "together-ai-up-to-4b"
elif params_billion <= TOGETHER_AI_8_B:
category = "together-ai-4.1b-8b"
elif params_billion <= TOGETHER_AI_21_B:
category = "together-ai-8.1b-21b"
elif params_billion <= TOGETHER_AI_41_B:
category = "together-ai-21.1b-41b"
elif params_billion <= TOGETHER_AI_80_B:
category = "together-ai-41.1b-80b"
elif params_billion <= TOGETHER_AI_110_B:
category = "together-ai-81.1b-110b"
if category is not None:
return category
return model_name
def get_model_params_and_category_embeddings(model_name) -> str:
"""
Helper function for calculating together ai embedding pricing.
Returns
- str - model pricing category if mapped else received model name
"""
model_name = model_name.lower()
re_params_match = re.search(
r"(\d+m)", model_name
) # catch all decimals like 100m, 200m, etc.
category = None
if re_params_match is not None:
params_match = str(re_params_match.group(1))
params_match = params_match.replace("m", "")
if params_match is not None:
params_million = float(params_match)
else:
return model_name
# Determine the category based on the number of parameters
if params_million <= TOGETHER_AI_EMBEDDING_150_M:
category = "together-ai-embedding-up-to-150m"
elif params_million <= TOGETHER_AI_EMBEDDING_350_M:
category = "together-ai-embedding-151m-to-350m"
if category is not None:
return category
return model_name

View File

@@ -0,0 +1,7 @@
"""
Support for OpenAI's `/v1/embeddings` endpoint.
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
Docs: https://docs.together.ai/reference/completions-1
"""

View File

@@ -0,0 +1,92 @@
"""
Re rank api
LiteLLM supports the re rank API format, no paramter transformation occurs
"""
from typing import Any, Dict, List, Optional, Union
import litellm
from litellm.llms.base import BaseLLM
from litellm.llms.custom_httpx.http_handler import (
_get_httpx_client,
get_async_httpx_client,
)
from litellm.llms.together_ai.rerank.transformation import TogetherAIRerankConfig
from litellm.types.rerank import RerankRequest, RerankResponse
class TogetherAIRerank(BaseLLM):
def rerank(
self,
model: str,
api_key: str,
query: str,
documents: List[Union[str, Dict[str, Any]]],
top_n: Optional[int] = None,
rank_fields: Optional[List[str]] = None,
return_documents: Optional[bool] = True,
max_chunks_per_doc: Optional[int] = None,
_is_async: Optional[bool] = False,
) -> RerankResponse:
client = _get_httpx_client()
request_data = RerankRequest(
model=model,
query=query,
top_n=top_n,
documents=documents,
rank_fields=rank_fields,
return_documents=return_documents,
)
# exclude None values from request_data
request_data_dict = request_data.dict(exclude_none=True)
if max_chunks_per_doc is not None:
raise ValueError("TogetherAI does not support max_chunks_per_doc")
if _is_async:
return self.async_rerank(request_data_dict, api_key) # type: ignore # Call async method
response = client.post(
"https://api.together.xyz/v1/rerank",
headers={
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {api_key}",
},
json=request_data_dict,
)
if response.status_code != 200:
raise Exception(response.text)
_json_response = response.json()
return TogetherAIRerankConfig()._transform_response(_json_response)
async def async_rerank( # New async method
self,
request_data_dict: Dict[str, Any],
api_key: str,
) -> RerankResponse:
client = get_async_httpx_client(
llm_provider=litellm.LlmProviders.TOGETHER_AI
) # Use async client
response = await client.post(
"https://api.together.xyz/v1/rerank",
headers={
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {api_key}",
},
json=request_data_dict,
)
if response.status_code != 200:
raise Exception(response.text)
_json_response = response.json()
return TogetherAIRerankConfig()._transform_response(_json_response)

View File

@@ -0,0 +1,62 @@
"""
Transformation logic from Cohere's /v1/rerank format to Together AI's `/v1/rerank` format.
Why separate file? Make it easy to see how transformation works
"""
import uuid
from typing import List, Optional
from litellm.types.rerank import (
RerankBilledUnits,
RerankResponse,
RerankResponseDocument,
RerankResponseMeta,
RerankResponseResult,
RerankTokens,
)
class TogetherAIRerankConfig:
def _transform_response(self, response: dict) -> RerankResponse:
_billed_units = RerankBilledUnits(**response.get("usage", {}))
_tokens = RerankTokens(**response.get("usage", {}))
rerank_meta = RerankResponseMeta(billed_units=_billed_units, tokens=_tokens)
_results: Optional[List[dict]] = response.get("results")
if _results is None:
raise ValueError(f"No results found in the response={response}")
rerank_results: List[RerankResponseResult] = []
for result in _results:
# Validate required fields exist
if not all(key in result for key in ["index", "relevance_score"]):
raise ValueError(f"Missing required fields in the result={result}")
# Get document data if it exists
document_data = result.get("document", {})
document = (
RerankResponseDocument(text=str(document_data.get("text", "")))
if document_data
else None
)
# Create typed result
rerank_result = RerankResponseResult(
index=int(result["index"]),
relevance_score=float(result["relevance_score"]),
)
# Only add document if it exists
if document:
rerank_result["document"] = document
rerank_results.append(rerank_result)
return RerankResponse(
id=response.get("id") or str(uuid.uuid4()),
results=rerank_results,
meta=rerank_meta,
) # Return response