structure saas with tools
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
Support for OpenAI's `/v1/chat/completions` endpoint.
|
||||
|
||||
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
|
||||
|
||||
Docs: https://docs.together.ai/reference/completions-1
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from litellm import get_model_info, verbose_logger
|
||||
|
||||
from ..openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
|
||||
|
||||
class TogetherAIConfig(OpenAIGPTConfig):
|
||||
def get_supported_openai_params(self, model: str) -> list:
|
||||
"""
|
||||
Only some together models support response_format / tool calling
|
||||
|
||||
Docs: https://docs.together.ai/docs/json-mode
|
||||
"""
|
||||
supports_function_calling: Optional[bool] = None
|
||||
try:
|
||||
model_info = get_model_info(model, custom_llm_provider="together_ai")
|
||||
supports_function_calling = model_info.get(
|
||||
"supports_function_calling", False
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.debug(f"Error getting supported openai params: {e}")
|
||||
pass
|
||||
|
||||
optional_params = super().get_supported_openai_params(model)
|
||||
if supports_function_calling is not True:
|
||||
verbose_logger.debug(
|
||||
"Only some together models support function calling/response_format. Docs - https://docs.together.ai/docs/function-calling"
|
||||
)
|
||||
optional_params.remove("tools")
|
||||
optional_params.remove("tool_choice")
|
||||
optional_params.remove("function_call")
|
||||
optional_params.remove("response_format")
|
||||
return optional_params
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
mapped_openai_params = super().map_openai_params(
|
||||
non_default_params, optional_params, model, drop_params
|
||||
)
|
||||
|
||||
if "response_format" in mapped_openai_params and mapped_openai_params[
|
||||
"response_format"
|
||||
] == {"type": "text"}:
|
||||
mapped_openai_params.pop("response_format")
|
||||
return mapped_openai_params
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,3 @@
|
||||
"""
|
||||
Uses openai's `/completion` handler.py
|
||||
"""
|
||||
@@ -0,0 +1,61 @@
|
||||
"""
|
||||
Translates calls from OpenAI's `/v1/completions` endpoint to TogetherAI's `/v1/completions` endpoint.
|
||||
|
||||
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
|
||||
|
||||
Docs: https://docs.together.ai/reference/completions-1
|
||||
"""
|
||||
|
||||
from typing import List, Union, cast
|
||||
|
||||
from litellm.llms.openai.completion.utils import is_tokens_or_list_of_tokens
|
||||
from litellm.types.llms.openai import (
|
||||
AllMessageValues,
|
||||
AllPromptValues,
|
||||
OpenAITextCompletionUserMessage,
|
||||
)
|
||||
|
||||
from ...openai.completion.transformation import OpenAITextCompletionConfig
|
||||
from ...openai.completion.utils import _transform_prompt
|
||||
|
||||
|
||||
class TogetherAITextCompletionConfig(OpenAITextCompletionConfig):
|
||||
def _transform_prompt(
|
||||
self,
|
||||
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
|
||||
) -> AllPromptValues:
|
||||
"""
|
||||
TogetherAI expects a string prompt.
|
||||
"""
|
||||
initial_prompt: AllPromptValues = _transform_prompt(messages)
|
||||
## TOGETHER AI SPECIFIC VALIDATION ##
|
||||
if isinstance(initial_prompt, list) and is_tokens_or_list_of_tokens(
|
||||
value=initial_prompt
|
||||
):
|
||||
raise ValueError("TogetherAI does not support integers as input")
|
||||
if (
|
||||
isinstance(initial_prompt, list)
|
||||
and len(initial_prompt) == 1
|
||||
and isinstance(initial_prompt[0], str)
|
||||
):
|
||||
together_prompt = initial_prompt[0]
|
||||
elif isinstance(initial_prompt, list):
|
||||
raise ValueError("TogetherAI does not support multiple prompts.")
|
||||
else:
|
||||
together_prompt = cast(str, initial_prompt)
|
||||
|
||||
return together_prompt
|
||||
|
||||
def transform_text_completion_request(
|
||||
self,
|
||||
model: str,
|
||||
messages: Union[List[AllMessageValues], List[OpenAITextCompletionUserMessage]],
|
||||
optional_params: dict,
|
||||
headers: dict,
|
||||
) -> dict:
|
||||
prompt = self._transform_prompt(messages)
|
||||
return {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
**optional_params,
|
||||
}
|
||||
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
Handles calculating cost for together ai models
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from litellm.constants import (
|
||||
TOGETHER_AI_4_B,
|
||||
TOGETHER_AI_8_B,
|
||||
TOGETHER_AI_21_B,
|
||||
TOGETHER_AI_41_B,
|
||||
TOGETHER_AI_80_B,
|
||||
TOGETHER_AI_110_B,
|
||||
TOGETHER_AI_EMBEDDING_150_M,
|
||||
TOGETHER_AI_EMBEDDING_350_M,
|
||||
)
|
||||
from litellm.types.utils import CallTypes
|
||||
|
||||
|
||||
# Extract the number of billion parameters from the model name
|
||||
# only used for together_computer LLMs
|
||||
def get_model_params_and_category(model_name, call_type: CallTypes) -> str:
|
||||
"""
|
||||
Helper function for calculating together ai pricing.
|
||||
|
||||
Returns
|
||||
- str - model pricing category if mapped else received model name
|
||||
"""
|
||||
if call_type == CallTypes.embedding or call_type == CallTypes.aembedding:
|
||||
return get_model_params_and_category_embeddings(model_name=model_name)
|
||||
model_name = model_name.lower()
|
||||
re_params_match = re.search(
|
||||
r"(\d+b)", model_name
|
||||
) # catch all decimals like 3b, 70b, etc
|
||||
category = None
|
||||
if re_params_match is not None:
|
||||
params_match = str(re_params_match.group(1))
|
||||
params_match = params_match.replace("b", "")
|
||||
if params_match is not None:
|
||||
params_billion = float(params_match)
|
||||
else:
|
||||
return model_name
|
||||
# Determine the category based on the number of parameters
|
||||
if params_billion <= TOGETHER_AI_4_B:
|
||||
category = "together-ai-up-to-4b"
|
||||
elif params_billion <= TOGETHER_AI_8_B:
|
||||
category = "together-ai-4.1b-8b"
|
||||
elif params_billion <= TOGETHER_AI_21_B:
|
||||
category = "together-ai-8.1b-21b"
|
||||
elif params_billion <= TOGETHER_AI_41_B:
|
||||
category = "together-ai-21.1b-41b"
|
||||
elif params_billion <= TOGETHER_AI_80_B:
|
||||
category = "together-ai-41.1b-80b"
|
||||
elif params_billion <= TOGETHER_AI_110_B:
|
||||
category = "together-ai-81.1b-110b"
|
||||
if category is not None:
|
||||
return category
|
||||
|
||||
return model_name
|
||||
|
||||
|
||||
def get_model_params_and_category_embeddings(model_name) -> str:
|
||||
"""
|
||||
Helper function for calculating together ai embedding pricing.
|
||||
|
||||
Returns
|
||||
- str - model pricing category if mapped else received model name
|
||||
"""
|
||||
model_name = model_name.lower()
|
||||
re_params_match = re.search(
|
||||
r"(\d+m)", model_name
|
||||
) # catch all decimals like 100m, 200m, etc.
|
||||
category = None
|
||||
if re_params_match is not None:
|
||||
params_match = str(re_params_match.group(1))
|
||||
params_match = params_match.replace("m", "")
|
||||
if params_match is not None:
|
||||
params_million = float(params_match)
|
||||
else:
|
||||
return model_name
|
||||
# Determine the category based on the number of parameters
|
||||
if params_million <= TOGETHER_AI_EMBEDDING_150_M:
|
||||
category = "together-ai-embedding-up-to-150m"
|
||||
elif params_million <= TOGETHER_AI_EMBEDDING_350_M:
|
||||
category = "together-ai-embedding-151m-to-350m"
|
||||
if category is not None:
|
||||
return category
|
||||
|
||||
return model_name
|
||||
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Support for OpenAI's `/v1/embeddings` endpoint.
|
||||
|
||||
Calls done in OpenAI/openai.py as TogetherAI is openai-compatible.
|
||||
|
||||
Docs: https://docs.together.ai/reference/completions-1
|
||||
"""
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,92 @@
|
||||
"""
|
||||
Re rank api
|
||||
|
||||
LiteLLM supports the re rank API format, no paramter transformation occurs
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm.llms.base import BaseLLM
|
||||
from litellm.llms.custom_httpx.http_handler import (
|
||||
_get_httpx_client,
|
||||
get_async_httpx_client,
|
||||
)
|
||||
from litellm.llms.together_ai.rerank.transformation import TogetherAIRerankConfig
|
||||
from litellm.types.rerank import RerankRequest, RerankResponse
|
||||
|
||||
|
||||
class TogetherAIRerank(BaseLLM):
|
||||
def rerank(
|
||||
self,
|
||||
model: str,
|
||||
api_key: str,
|
||||
query: str,
|
||||
documents: List[Union[str, Dict[str, Any]]],
|
||||
top_n: Optional[int] = None,
|
||||
rank_fields: Optional[List[str]] = None,
|
||||
return_documents: Optional[bool] = True,
|
||||
max_chunks_per_doc: Optional[int] = None,
|
||||
_is_async: Optional[bool] = False,
|
||||
) -> RerankResponse:
|
||||
client = _get_httpx_client()
|
||||
|
||||
request_data = RerankRequest(
|
||||
model=model,
|
||||
query=query,
|
||||
top_n=top_n,
|
||||
documents=documents,
|
||||
rank_fields=rank_fields,
|
||||
return_documents=return_documents,
|
||||
)
|
||||
|
||||
# exclude None values from request_data
|
||||
request_data_dict = request_data.dict(exclude_none=True)
|
||||
if max_chunks_per_doc is not None:
|
||||
raise ValueError("TogetherAI does not support max_chunks_per_doc")
|
||||
|
||||
if _is_async:
|
||||
return self.async_rerank(request_data_dict, api_key) # type: ignore # Call async method
|
||||
|
||||
response = client.post(
|
||||
"https://api.together.xyz/v1/rerank",
|
||||
headers={
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
"authorization": f"Bearer {api_key}",
|
||||
},
|
||||
json=request_data_dict,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(response.text)
|
||||
|
||||
_json_response = response.json()
|
||||
|
||||
return TogetherAIRerankConfig()._transform_response(_json_response)
|
||||
|
||||
async def async_rerank( # New async method
|
||||
self,
|
||||
request_data_dict: Dict[str, Any],
|
||||
api_key: str,
|
||||
) -> RerankResponse:
|
||||
client = get_async_httpx_client(
|
||||
llm_provider=litellm.LlmProviders.TOGETHER_AI
|
||||
) # Use async client
|
||||
|
||||
response = await client.post(
|
||||
"https://api.together.xyz/v1/rerank",
|
||||
headers={
|
||||
"accept": "application/json",
|
||||
"content-type": "application/json",
|
||||
"authorization": f"Bearer {api_key}",
|
||||
},
|
||||
json=request_data_dict,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise Exception(response.text)
|
||||
|
||||
_json_response = response.json()
|
||||
|
||||
return TogetherAIRerankConfig()._transform_response(_json_response)
|
||||
@@ -0,0 +1,62 @@
|
||||
"""
|
||||
Transformation logic from Cohere's /v1/rerank format to Together AI's `/v1/rerank` format.
|
||||
|
||||
Why separate file? Make it easy to see how transformation works
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from typing import List, Optional
|
||||
|
||||
from litellm.types.rerank import (
|
||||
RerankBilledUnits,
|
||||
RerankResponse,
|
||||
RerankResponseDocument,
|
||||
RerankResponseMeta,
|
||||
RerankResponseResult,
|
||||
RerankTokens,
|
||||
)
|
||||
|
||||
|
||||
class TogetherAIRerankConfig:
|
||||
def _transform_response(self, response: dict) -> RerankResponse:
|
||||
_billed_units = RerankBilledUnits(**response.get("usage", {}))
|
||||
_tokens = RerankTokens(**response.get("usage", {}))
|
||||
rerank_meta = RerankResponseMeta(billed_units=_billed_units, tokens=_tokens)
|
||||
|
||||
_results: Optional[List[dict]] = response.get("results")
|
||||
|
||||
if _results is None:
|
||||
raise ValueError(f"No results found in the response={response}")
|
||||
|
||||
rerank_results: List[RerankResponseResult] = []
|
||||
|
||||
for result in _results:
|
||||
# Validate required fields exist
|
||||
if not all(key in result for key in ["index", "relevance_score"]):
|
||||
raise ValueError(f"Missing required fields in the result={result}")
|
||||
|
||||
# Get document data if it exists
|
||||
document_data = result.get("document", {})
|
||||
document = (
|
||||
RerankResponseDocument(text=str(document_data.get("text", "")))
|
||||
if document_data
|
||||
else None
|
||||
)
|
||||
|
||||
# Create typed result
|
||||
rerank_result = RerankResponseResult(
|
||||
index=int(result["index"]),
|
||||
relevance_score=float(result["relevance_score"]),
|
||||
)
|
||||
|
||||
# Only add document if it exists
|
||||
if document:
|
||||
rerank_result["document"] = document
|
||||
|
||||
rerank_results.append(rerank_result)
|
||||
|
||||
return RerankResponse(
|
||||
id=response.get("id") or str(uuid.uuid4()),
|
||||
results=rerank_results,
|
||||
meta=rerank_meta,
|
||||
) # Return response
|
||||
Reference in New Issue
Block a user