structure saas with tools
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,137 @@
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.prompt_templates.factory import (
|
||||
convert_generic_image_chunk_to_openai_image_obj,
|
||||
convert_to_anthropic_image_obj,
|
||||
)
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
from litellm.types.llms.vertex_ai import ContentType, PartType
|
||||
from litellm.utils import supports_reasoning
|
||||
|
||||
from ...vertex_ai.gemini.transformation import _gemini_convert_messages_with_history
|
||||
from ...vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexGeminiConfig
|
||||
|
||||
|
||||
class GoogleAIStudioGeminiConfig(VertexGeminiConfig):
|
||||
"""
|
||||
Reference: https://ai.google.dev/api/rest/v1beta/GenerationConfig
|
||||
|
||||
The class `GoogleAIStudioGeminiConfig` provides configuration for the Google AI Studio's Gemini API interface. Below are the parameters:
|
||||
|
||||
- `temperature` (float): This controls the degree of randomness in token selection.
|
||||
|
||||
- `max_output_tokens` (integer): This sets the limitation for the maximum amount of token in the text output. In this case, the default value is 256.
|
||||
|
||||
- `top_p` (float): The tokens are selected from the most probable to the least probable until the sum of their probabilities equals the `top_p` value. Default is 0.95.
|
||||
|
||||
- `top_k` (integer): The value of `top_k` determines how many of the most probable tokens are considered in the selection. For example, a `top_k` of 1 means the selected token is the most probable among all tokens. The default value is 40.
|
||||
|
||||
- `response_mime_type` (str): The MIME type of the response. The default value is 'text/plain'. Other values - `application/json`.
|
||||
|
||||
- `response_schema` (dict): Optional. Output response schema of the generated candidate text when response mime type can have schema. Schema can be objects, primitives or arrays and is a subset of OpenAPI schema. If set, a compatible response_mime_type must also be set. Compatible mimetypes: application/json: Schema for JSON response.
|
||||
|
||||
- `candidate_count` (int): Number of generated responses to return.
|
||||
|
||||
- `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
|
||||
|
||||
Note: Please make sure to modify the default parameters as required for your use case.
|
||||
"""
|
||||
|
||||
temperature: Optional[float] = None
|
||||
max_output_tokens: Optional[int] = None
|
||||
top_p: Optional[float] = None
|
||||
top_k: Optional[int] = None
|
||||
response_mime_type: Optional[str] = None
|
||||
response_schema: Optional[dict] = None
|
||||
candidate_count: Optional[int] = None
|
||||
stop_sequences: Optional[list] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
temperature: Optional[float] = None,
|
||||
max_output_tokens: Optional[int] = None,
|
||||
top_p: Optional[float] = None,
|
||||
top_k: Optional[int] = None,
|
||||
response_mime_type: Optional[str] = None,
|
||||
response_schema: Optional[dict] = None,
|
||||
candidate_count: Optional[int] = None,
|
||||
stop_sequences: Optional[list] = None,
|
||||
) -> None:
|
||||
locals_ = locals().copy()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return super().get_config()
|
||||
|
||||
def get_supported_openai_params(self, model: str) -> List[str]:
|
||||
supported_params = [
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
"max_completion_tokens",
|
||||
"stream",
|
||||
"tools",
|
||||
"tool_choice",
|
||||
"functions",
|
||||
"response_format",
|
||||
"n",
|
||||
"stop",
|
||||
"logprobs",
|
||||
"frequency_penalty",
|
||||
"modalities",
|
||||
]
|
||||
if supports_reasoning(model):
|
||||
supported_params.append("reasoning_effort")
|
||||
supported_params.append("thinking")
|
||||
return supported_params
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: Dict,
|
||||
optional_params: Dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> Dict:
|
||||
if litellm.vertex_ai_safety_settings is not None:
|
||||
optional_params["safety_settings"] = litellm.vertex_ai_safety_settings
|
||||
return super().map_openai_params(
|
||||
model=model,
|
||||
non_default_params=non_default_params,
|
||||
optional_params=optional_params,
|
||||
drop_params=drop_params,
|
||||
)
|
||||
|
||||
def _transform_messages(
|
||||
self, messages: List[AllMessageValues]
|
||||
) -> List[ContentType]:
|
||||
"""
|
||||
Google AI Studio Gemini does not support image urls in messages.
|
||||
"""
|
||||
for message in messages:
|
||||
_message_content = message.get("content")
|
||||
if _message_content is not None and isinstance(_message_content, list):
|
||||
_parts: List[PartType] = []
|
||||
for element in _message_content:
|
||||
if element.get("type") == "image_url":
|
||||
img_element = element
|
||||
_image_url: Optional[str] = None
|
||||
format: Optional[str] = None
|
||||
if isinstance(img_element.get("image_url"), dict):
|
||||
_image_url = img_element["image_url"].get("url") # type: ignore
|
||||
format = img_element["image_url"].get("format") # type: ignore
|
||||
else:
|
||||
_image_url = img_element.get("image_url") # type: ignore
|
||||
if _image_url and "https://" in _image_url:
|
||||
image_obj = convert_to_anthropic_image_obj(
|
||||
_image_url, format=format
|
||||
)
|
||||
img_element["image_url"] = ( # type: ignore
|
||||
convert_generic_image_chunk_to_openai_image_obj(
|
||||
image_obj
|
||||
)
|
||||
)
|
||||
return _gemini_convert_messages_with_history(messages=messages)
|
||||
@@ -0,0 +1,84 @@
|
||||
from typing import List, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
import litellm
|
||||
from litellm.llms.base_llm.base_utils import BaseLLMModelInfo
|
||||
from litellm.llms.base_llm.chat.transformation import BaseLLMException
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
|
||||
|
||||
class GeminiError(BaseLLMException):
|
||||
pass
|
||||
|
||||
|
||||
class GeminiModelInfo(BaseLLMModelInfo):
|
||||
def validate_environment(
|
||||
self,
|
||||
headers: dict,
|
||||
model: str,
|
||||
messages: List[AllMessageValues],
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Google AI Studio sends api key in query params"""
|
||||
return headers
|
||||
|
||||
@property
|
||||
def api_version(self) -> str:
|
||||
return "v1beta"
|
||||
|
||||
@staticmethod
|
||||
def get_api_base(api_base: Optional[str] = None) -> Optional[str]:
|
||||
return (
|
||||
api_base
|
||||
or get_secret_str("GEMINI_API_BASE")
|
||||
or "https://generativelanguage.googleapis.com"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_api_key(api_key: Optional[str] = None) -> Optional[str]:
|
||||
return api_key or (get_secret_str("GEMINI_API_KEY"))
|
||||
|
||||
@staticmethod
|
||||
def get_base_model(model: str) -> Optional[str]:
|
||||
return model.replace("gemini/", "")
|
||||
|
||||
def get_models(
|
||||
self, api_key: Optional[str] = None, api_base: Optional[str] = None
|
||||
) -> List[str]:
|
||||
api_base = GeminiModelInfo.get_api_base(api_base)
|
||||
api_key = GeminiModelInfo.get_api_key(api_key)
|
||||
endpoint = f"/{self.api_version}/models"
|
||||
if api_base is None or api_key is None:
|
||||
raise ValueError(
|
||||
"GEMINI_API_BASE or GEMINI_API_KEY is not set. Please set the environment variable, to query Gemini's `/models` endpoint."
|
||||
)
|
||||
|
||||
response = litellm.module_level_client.get(
|
||||
url=f"{api_base}{endpoint}?key={api_key}",
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
raise ValueError(
|
||||
f"Failed to fetch models from Gemini. Status code: {response.status_code}, Response: {response.json()}"
|
||||
)
|
||||
|
||||
models = response.json()["models"]
|
||||
|
||||
litellm_model_names = []
|
||||
for model in models:
|
||||
stripped_model_name = model["name"].strip("models/")
|
||||
litellm_model_name = "gemini/" + stripped_model_name
|
||||
litellm_model_names.append(litellm_model_name)
|
||||
return litellm_model_names
|
||||
|
||||
def get_error_class(
|
||||
self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
|
||||
) -> BaseLLMException:
|
||||
return GeminiError(
|
||||
status_code=status_code, message=error_message, headers=headers
|
||||
)
|
||||
@@ -0,0 +1 @@
|
||||
[Go here for the Gemini Context Caching code](../../vertex_ai/context_caching/)
|
||||
@@ -0,0 +1,21 @@
|
||||
"""
|
||||
This file is used to calculate the cost of the Gemini API.
|
||||
|
||||
Handles the context caching for Gemini API.
|
||||
"""
|
||||
|
||||
from typing import Tuple
|
||||
|
||||
from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
|
||||
from litellm.types.utils import Usage
|
||||
|
||||
|
||||
def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||
|
||||
Follows the same logic as Anthropic's cost per token calculation.
|
||||
"""
|
||||
return generic_cost_per_token(
|
||||
model=model, usage=usage, custom_llm_provider="gemini"
|
||||
)
|
||||
Binary file not shown.
@@ -0,0 +1,173 @@
|
||||
"""
|
||||
Supports writing files to Google AI Studio Files API.
|
||||
|
||||
For vertex ai, check out the vertex_ai/files/handler.py file.
|
||||
"""
|
||||
import time
|
||||
from typing import List, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.prompt_templates.common_utils import extract_file_data
|
||||
from litellm.llms.base_llm.files.transformation import (
|
||||
BaseFilesConfig,
|
||||
LiteLLMLoggingObj,
|
||||
)
|
||||
from litellm.types.llms.gemini import GeminiCreateFilesResponseObject
|
||||
from litellm.types.llms.openai import (
|
||||
CreateFileRequest,
|
||||
OpenAICreateFileRequestOptionalParams,
|
||||
OpenAIFileObject,
|
||||
)
|
||||
from litellm.types.utils import LlmProviders
|
||||
|
||||
from ..common_utils import GeminiModelInfo
|
||||
|
||||
|
||||
class GoogleAIStudioFilesHandler(GeminiModelInfo, BaseFilesConfig):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@property
|
||||
def custom_llm_provider(self) -> LlmProviders:
|
||||
return LlmProviders.GEMINI
|
||||
|
||||
def get_complete_url(
|
||||
self,
|
||||
api_base: Optional[str],
|
||||
api_key: Optional[str],
|
||||
model: str,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
stream: Optional[bool] = None,
|
||||
) -> str:
|
||||
"""
|
||||
OPTIONAL
|
||||
|
||||
Get the complete url for the request
|
||||
|
||||
Some providers need `model` in `api_base`
|
||||
"""
|
||||
endpoint = "upload/v1beta/files"
|
||||
api_base = self.get_api_base(api_base)
|
||||
if not api_base:
|
||||
raise ValueError("api_base is required")
|
||||
|
||||
if not api_key:
|
||||
raise ValueError("api_key is required")
|
||||
|
||||
url = "{}/{}?key={}".format(api_base, endpoint, api_key)
|
||||
return url
|
||||
|
||||
def get_supported_openai_params(
|
||||
self, model: str
|
||||
) -> List[OpenAICreateFileRequestOptionalParams]:
|
||||
return []
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
return optional_params
|
||||
|
||||
def transform_create_file_request(
|
||||
self,
|
||||
model: str,
|
||||
create_file_data: CreateFileRequest,
|
||||
optional_params: dict,
|
||||
litellm_params: dict,
|
||||
) -> dict:
|
||||
"""
|
||||
Transform the OpenAI-style file creation request into Gemini's format
|
||||
|
||||
Returns:
|
||||
dict: Contains both request data and headers for the two-step upload
|
||||
"""
|
||||
# Extract the file information
|
||||
file_data = create_file_data.get("file")
|
||||
if file_data is None:
|
||||
raise ValueError("File data is required")
|
||||
|
||||
# Use the common utility function to extract file data
|
||||
extracted_data = extract_file_data(file_data)
|
||||
|
||||
# Get file size
|
||||
file_size = len(extracted_data["content"])
|
||||
|
||||
# Step 1: Initial resumable upload request
|
||||
headers = {
|
||||
"X-Goog-Upload-Protocol": "resumable",
|
||||
"X-Goog-Upload-Command": "start",
|
||||
"X-Goog-Upload-Header-Content-Length": str(file_size),
|
||||
"X-Goog-Upload-Header-Content-Type": extracted_data["content_type"],
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
headers.update(extracted_data["headers"]) # Add any custom headers
|
||||
|
||||
# Initial metadata request body
|
||||
initial_data = {
|
||||
"file": {
|
||||
"display_name": extracted_data["filename"] or str(int(time.time()))
|
||||
}
|
||||
}
|
||||
|
||||
# Step 2: Actual file upload data
|
||||
upload_headers = {
|
||||
"Content-Length": str(file_size),
|
||||
"X-Goog-Upload-Offset": "0",
|
||||
"X-Goog-Upload-Command": "upload, finalize",
|
||||
}
|
||||
|
||||
return {
|
||||
"initial_request": {"headers": headers, "data": initial_data},
|
||||
"upload_request": {
|
||||
"headers": upload_headers,
|
||||
"data": extracted_data["content"],
|
||||
},
|
||||
}
|
||||
|
||||
def transform_create_file_response(
|
||||
self,
|
||||
model: Optional[str],
|
||||
raw_response: httpx.Response,
|
||||
logging_obj: LiteLLMLoggingObj,
|
||||
litellm_params: dict,
|
||||
) -> OpenAIFileObject:
|
||||
"""
|
||||
Transform Gemini's file upload response into OpenAI-style FileObject
|
||||
"""
|
||||
try:
|
||||
response_json = raw_response.json()
|
||||
|
||||
response_object = GeminiCreateFilesResponseObject(
|
||||
**response_json.get("file", {}) # type: ignore
|
||||
)
|
||||
|
||||
# Extract file information from Gemini response
|
||||
|
||||
return OpenAIFileObject(
|
||||
id=response_object["uri"], # Gemini uses URI as identifier
|
||||
bytes=int(
|
||||
response_object["sizeBytes"]
|
||||
), # Gemini doesn't return file size
|
||||
created_at=int(
|
||||
time.mktime(
|
||||
time.strptime(
|
||||
response_object["createTime"].replace("Z", "+00:00"),
|
||||
"%Y-%m-%dT%H:%M:%S.%f%z",
|
||||
)
|
||||
)
|
||||
),
|
||||
filename=response_object["displayName"],
|
||||
object="file",
|
||||
purpose="user_data", # Default to assistants as that's the main use case
|
||||
status="uploaded",
|
||||
status_details=None,
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_logger.exception(f"Error parsing file upload response: {str(e)}")
|
||||
raise ValueError(f"Error parsing file upload response: {str(e)}")
|
||||
Reference in New Issue
Block a user