structure saas with tools

This commit is contained in:
Davidson Gomes
2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions

View File

@@ -0,0 +1,17 @@
model_list:
- model_name: text-embedding-ada-002
litellm_params:
model: azure/azure-embedding-model
api_base: "os.environ/AZURE_API_BASE"
api_key: "os.environ/AZURE_API_KEY"
api_version: "2023-07-01-preview"
model_info:
mode: embedding
base_model: text-embedding-ada-002
litellm_settings:
set_verbose: True
general_settings:
background_health_checks: True # enable background health checks
health_check_interval: 300 # frequency of background health checks

View File

@@ -0,0 +1,30 @@
model_list:
- model_name: text-davinci-003
litellm_params:
model: ollama/zephyr
- model_name: gpt-4
litellm_params:
model: ollama/llama2
- model_name: gpt-3.5-turbo
litellm_params:
model: ollama/llama2
temperature: 0.1
max_tokens: 20
# request to gpt-4, response from ollama/llama2
# curl --location 'http://0.0.0.0:8000/chat/completions' \
# --header 'Content-Type: application/json' \
# --data ' {
# "model": "gpt-4",
# "messages": [
# {
# "role": "user",
# "content": "what llm are you"
# }
# ],
# }
# '
#
# {"id":"chatcmpl-27c85cf0-ab09-4bcf-8cb1-0ee950520743","choices":[{"finish_reason":"stop","index":0,"message":{"content":" Hello! I'm just an AI, I don't have personal experiences or emotions like humans do. However, I can help you with any questions or tasks you may have! Is there something specific you'd like to know or discuss?","role":"assistant","_logprobs":null}}],"created":1700094955.373751,"model":"ollama/llama2","object":"chat.completion","system_fingerprint":null,"usage":{"prompt_tokens":12,"completion_tokens":47,"total_tokens":59},"_response_ms":8028.017999999999}%

View File

@@ -0,0 +1,21 @@
model_list:
- model_name: gpt-4-team1
litellm_params:
model: azure/chatgpt-v-2
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
api_version: "2023-05-15"
api_key: os.environ/AZURE_API_KEY
tpm: 20_000
timeout: 5 # 1 second timeout
stream_timeout: 0.5 # 0.5 second timeout for streaming requests
max_retries: 4
- model_name: gpt-4-team2
litellm_params:
model: azure/gpt-4
api_key: os.environ/AZURE_API_KEY
api_base: https://openai-gpt-4-test-v-2.openai.azure.com/
tpm: 100_000
timeout: 5 # 1 second timeout
stream_timeout: 0.5 # 0.5 second timeout for streaming requests
max_retries: 4

View File

@@ -0,0 +1,265 @@
datasource client {
provider = "postgresql"
url = env("DATABASE_URL")
}
generator client {
provider = "prisma-client-py"
}
// Budget / Rate Limits for an org
model LiteLLM_BudgetTable {
budget_id String @id @default(uuid())
max_budget Float?
soft_budget Float?
max_parallel_requests Int?
tpm_limit BigInt?
rpm_limit BigInt?
model_max_budget Json?
temp_verification_token String? // bad param for testing
budget_duration String?
budget_reset_at DateTime?
created_at DateTime @default(now()) @map("created_at")
created_by String
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
updated_by String
organization LiteLLM_OrganizationTable[] // multiple orgs can have the same budget
keys LiteLLM_VerificationToken[] // multiple keys can have the same budget
end_users LiteLLM_EndUserTable[] // multiple end-users can have the same budget
team_membership LiteLLM_TeamMembership[] // budgets of Users within a Team
}
// Models on proxy
model LiteLLM_ProxyModelTable {
model_id String @id @default(uuid())
model_name String
litellm_params Json
model_info Json?
created_at DateTime @default(now()) @map("created_at")
created_by String
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
updated_by String
}
model LiteLLM_OrganizationTable {
organization_id String @id @default(uuid())
organization_alias String
budget_id String
metadata Json @default("{}")
models String[]
spend Float @default(0.0)
model_spend Json @default("{}")
created_at DateTime @default(now()) @map("created_at")
created_by String
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
updated_by String
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
teams LiteLLM_TeamTable[]
users LiteLLM_UserTable[]
}
// Model info for teams, just has model aliases for now.
model LiteLLM_ModelTable {
id Int @id @default(autoincrement())
model_aliases Json? @map("aliases")
created_at DateTime @default(now()) @map("created_at")
created_by String
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
updated_by String
team LiteLLM_TeamTable?
}
// Assign prod keys to groups, not individuals
model LiteLLM_TeamTable {
team_id String @id @default(uuid())
team_alias String?
organization_id String?
admins String[]
members String[]
members_with_roles Json @default("{}")
metadata Json @default("{}")
max_budget Float?
spend Float @default(0.0)
models String[]
max_parallel_requests Int?
tpm_limit BigInt?
rpm_limit BigInt?
budget_duration String?
budget_reset_at DateTime?
blocked Boolean @default(false)
created_at DateTime @default(now()) @map("created_at")
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
model_spend Json @default("{}")
model_max_budget Json @default("{}")
model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
}
// Track spend, rate limit, budget Users
model LiteLLM_UserTable {
user_id String @id
user_alias String?
team_id String?
organization_id String?
password String?
teams String[] @default([])
user_role String?
max_budget Float?
spend Float @default(0.0)
user_email String?
models String[]
metadata Json @default("{}")
max_parallel_requests Int?
tpm_limit BigInt?
rpm_limit BigInt?
budget_duration String?
budget_reset_at DateTime?
allowed_cache_controls String[] @default([])
model_spend Json @default("{}")
model_max_budget Json @default("{}")
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
invitations_created LiteLLM_InvitationLink[] @relation("CreatedBy")
invitations_updated LiteLLM_InvitationLink[] @relation("UpdatedBy")
invitations_user LiteLLM_InvitationLink[] @relation("UserId")
}
// Generate Tokens for Proxy
model LiteLLM_VerificationToken {
token String @id
key_name String?
key_alias String?
soft_budget_cooldown Boolean @default(false) // key-level state on if budget alerts need to be cooled down
spend Float @default(0.0)
expires DateTime?
models String[]
aliases Json @default("{}")
config Json @default("{}")
user_id String?
team_id String?
permissions Json @default("{}")
max_parallel_requests Int?
metadata Json @default("{}")
blocked Boolean?
tpm_limit BigInt?
rpm_limit BigInt?
max_budget Float?
budget_duration String?
budget_reset_at DateTime?
allowed_cache_controls String[] @default([])
model_spend Json @default("{}")
model_max_budget Json @default("{}")
budget_id String?
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
}
model LiteLLM_EndUserTable {
user_id String @id
alias String? // admin-facing alias
spend Float @default(0.0)
allowed_model_region String? // require all user requests to use models in this specific region
default_model String? // use along with 'allowed_model_region'. if no available model in region, default to this model.
budget_id String?
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
blocked Boolean @default(false)
}
// store proxy config.yaml
model LiteLLM_Config {
param_name String @id
param_value Json?
}
// View spend, model, api_key per request
model LiteLLM_SpendLogs {
request_id String @id
call_type String
api_key String @default ("") // Hashed API Token. Not the actual Virtual Key. Equivalent to 'token' column in LiteLLM_VerificationToken
spend Float @default(0.0)
total_tokens Int @default(0)
prompt_tokens Int @default(0)
completion_tokens Int @default(0)
startTime DateTime // Assuming start_time is a DateTime field
endTime DateTime // Assuming end_time is a DateTime field
completionStartTime DateTime? // Assuming completionStartTime is a DateTime field
model String @default("")
model_id String? @default("") // the model id stored in proxy model db
model_group String? @default("") // public model_name / model_group
api_base String? @default("")
user String? @default("")
metadata Json? @default("{}")
cache_hit String? @default("")
cache_key String? @default("")
request_tags Json? @default("[]")
team_id String?
end_user String?
requester_ip_address String?
@@index([startTime])
@@index([end_user])
}
// View spend, model, api_key per request
model LiteLLM_ErrorLogs {
request_id String @id @default(uuid())
startTime DateTime // Assuming start_time is a DateTime field
endTime DateTime // Assuming end_time is a DateTime field
api_base String @default("")
model_group String @default("") // public model_name / model_group
litellm_model_name String @default("") // model passed to litellm
model_id String @default("") // ID of model in ProxyModelTable
request_kwargs Json @default("{}")
exception_type String @default("")
exception_string String @default("")
status_code String @default("")
}
// Beta - allow team members to request access to a model
model LiteLLM_UserNotifications {
request_id String @id
user_id String
models String[]
justification String
status String // approved, disapproved, pending
}
model LiteLLM_TeamMembership {
// Use this table to track the Internal User's Spend within a Team + Set Budgets, rpm limits for the user within the team
user_id String
team_id String
spend Float @default(0.0)
budget_id String?
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
@@id([user_id, team_id])
}
model LiteLLM_InvitationLink {
// use this table to track invite links sent by admin for people to join the proxy
id String @id @default(uuid())
user_id String
is_accepted Boolean @default(false)
accepted_at DateTime? // when link is claimed (user successfully onboards via link)
expires_at DateTime // till when is link valid
created_at DateTime // when did admin create the link
created_by String // who created the link
updated_at DateTime // when was invite status updated
updated_by String // who updated the status (admin/user who accepted invite)
// Relations
liteLLM_user_table_user LiteLLM_UserTable @relation("UserId", fields: [user_id], references: [user_id])
liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id])
liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id])
}
model LiteLLM_AuditLog {
id String @id @default(uuid())
updated_at DateTime @default(now())
changed_by String @default("") // user or system that performed the action
changed_by_api_key String @default("") // api key hash that performed the action
action String // create, update, delete
table_name String // on of LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
object_id String // id of the object being audited. This can be the key id, team id, user id, model id
before_value Json? // value of the row
updated_values Json? // value of the row after change
}

View File

@@ -0,0 +1,52 @@
import os
from fastapi import Request
from litellm.proxy._types import GenerateKeyRequest, UserAPIKeyAuth
async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
try:
modified_master_key = f"{os.getenv('LITELLM_MASTER_KEY')}-1234"
if api_key == modified_master_key:
return UserAPIKeyAuth(api_key=api_key)
raise Exception
except Exception:
raise Exception
async def generate_key_fn(data: GenerateKeyRequest):
"""
Asynchronously decides if a key should be generated or not based on the provided data.
Args:
data (GenerateKeyRequest): The data to be used for decision making.
Returns:
bool: True if a key should be generated, False otherwise.
"""
# decide if a key should be generated or not
data_json = data.json() # type: ignore
# Unpacking variables
team_id = data_json.get("team_id")
data_json.get("duration")
data_json.get("models")
data_json.get("aliases")
data_json.get("config")
data_json.get("spend")
data_json.get("user_id")
data_json.get("max_parallel_requests")
data_json.get("metadata")
data_json.get("tpm_limit")
data_json.get("rpm_limit")
if team_id is not None and len(team_id) > 0:
return {
"decision": True,
}
else:
return {
"decision": True,
"message": "This violates LiteLLM Proxy Rules. No team id provided.",
}

View File

@@ -0,0 +1,14 @@
from fastapi import Request
from litellm.proxy._types import UserAPIKeyAuth
async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
try:
return UserAPIKeyAuth(
api_key="best-api-key-ever",
user_id="best-user-id-ever",
team_id="best-team-id-ever",
)
except Exception:
raise Exception

View File

@@ -0,0 +1,74 @@
import os
import sys
import traceback
# this file is to test litellm/proxy
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import inspect
import litellm
from litellm.integrations.custom_logger import CustomLogger
# This file includes the custom callbacks for LiteLLM Proxy
# Once defined, these can be passed in proxy_config.yaml
def print_verbose(print_statement):
if litellm.set_verbose:
print(print_statement) # noqa
class MyCustomHandler(CustomLogger):
def __init__(self):
blue_color_code = "\033[94m"
reset_color_code = "\033[0m"
print_verbose(f"{blue_color_code}Initialized LiteLLM custom logger")
try:
print_verbose("Logger Initialized with following methods:")
methods = [
method
for method in dir(self)
if inspect.ismethod(getattr(self, method))
]
# Pretty print_verbose the methods
for method in methods:
print_verbose(f" - {method}")
print_verbose(f"{reset_color_code}")
except Exception:
pass
def log_pre_api_call(self, model, messages, kwargs):
print_verbose("Pre-API Call")
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print_verbose("Post-API Call")
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print_verbose("On Stream")
def log_success_event(self, kwargs, response_obj, start_time, end_time):
print_verbose("On Success!")
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print_verbose("On Async Success!")
response_cost = litellm.completion_cost(completion_response=response_obj)
assert response_cost > 0.0
return
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
try:
print_verbose("On Async Failure !")
except Exception as e:
print_verbose(f"Exception: {e}")
proxy_handler_instance = MyCustomHandler()
# need to set litellm.callbacks = [customHandler] # on the proxy
# litellm.success_callback = [async_on_succes_logger]

View File

@@ -0,0 +1,78 @@
from typing import Literal, Optional
import litellm
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy.proxy_server import DualCache, UserAPIKeyAuth
# This file includes the custom callbacks for LiteLLM Proxy
# Once defined, these can be passed in proxy_config.yaml
class MyCustomHandler(
CustomLogger
): # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
# Class variables or attributes
def __init__(self):
pass
#### CALL HOOKS - proxy only ####
async def async_pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
cache: DualCache,
data: dict,
call_type: Literal[
"completion",
"text_completion",
"embeddings",
"image_generation",
"moderation",
"audio_transcription",
"pass_through_endpoint",
"rerank",
],
):
return data
async def async_post_call_failure_hook(
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
pass
async def async_post_call_success_hook(
self,
data: dict,
user_api_key_dict: UserAPIKeyAuth,
response,
):
# print("in async_post_call_success_hook")
pass
async def async_moderation_hook( # call made in parallel to llm api call
self,
data: dict,
user_api_key_dict: UserAPIKeyAuth,
call_type: Literal[
"completion",
"embeddings",
"image_generation",
"moderation",
"audio_transcription",
"responses",
],
):
pass
async def async_post_call_streaming_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
response: str,
):
# print("in async_post_call_streaming_hook")
pass
proxy_handler_instance = MyCustomHandler()

View File

@@ -0,0 +1,112 @@
from typing import Any, Dict, List, Literal, Optional, Union
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
class myCustomGuardrail(CustomGuardrail):
def __init__(
self,
**kwargs,
):
# store kwargs as optional_params
self.optional_params = kwargs
super().__init__(**kwargs)
async def async_pre_call_hook(
self,
user_api_key_dict: UserAPIKeyAuth,
cache: DualCache,
data: dict,
call_type: Literal[
"completion",
"text_completion",
"embeddings",
"image_generation",
"moderation",
"audio_transcription",
"pass_through_endpoint",
"rerank",
],
) -> Optional[Union[Exception, str, dict]]:
"""
Runs before the LLM API call
Runs on only Input
Use this if you want to MODIFY the input
"""
# In this guardrail, if a user inputs `litellm` we will mask it and then send it to the LLM
_messages = data.get("messages")
if _messages:
for message in _messages:
_content = message.get("content")
if isinstance(_content, str):
if "litellm" in _content.lower():
_content = _content.replace("litellm", "********")
message["content"] = _content
verbose_proxy_logger.debug(
"async_pre_call_hook: Message after masking %s", _messages
)
return data
async def async_moderation_hook(
self,
data: dict,
user_api_key_dict: UserAPIKeyAuth,
call_type: Literal[
"completion",
"embeddings",
"image_generation",
"moderation",
"audio_transcription",
"responses",
],
):
"""
Runs in parallel to LLM API call
Runs on only Input
This can NOT modify the input, only used to reject or accept a call before going to LLM API
"""
# this works the same as async_pre_call_hook, but just runs in parallel as the LLM API Call
# In this guardrail, if a user inputs `litellm` we will mask it.
_messages = data.get("messages")
if _messages:
for message in _messages:
_content = message.get("content")
if isinstance(_content, str):
if "litellm" in _content.lower():
raise ValueError("Guardrail failed words - `litellm` detected")
async def async_post_call_success_hook(
self,
data: dict,
user_api_key_dict: UserAPIKeyAuth,
response,
):
"""
Runs on response from LLM API call
It can be used to reject a response
If a response contains the word "coffee" -> we will raise an exception
"""
verbose_proxy_logger.debug("async_pre_call_hook response: %s", response)
if isinstance(response, litellm.ModelResponse):
for choice in response.choices:
if isinstance(choice, litellm.Choices):
verbose_proxy_logger.debug("async_pre_call_hook choice: %s", choice)
if (
choice.message.content
and isinstance(choice.message.content, str)
and "coffee" in choice.message.content
):
raise ValueError("Guardrail failed Coffee Detected")

View File

@@ -0,0 +1,26 @@
import time
from typing import Any, Optional
import litellm
from litellm import CustomLLM, ImageObject, ImageResponse, completion, get_llm_provider
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
from litellm.types.utils import ModelResponse
class MyCustomLLM(CustomLLM):
def completion(self, *args, **kwargs) -> ModelResponse:
return litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello world"}],
mock_response="Hi!",
) # type: ignore
async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse:
return litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello world"}],
mock_response="Hi!",
) # type: ignore
my_custom_llm = MyCustomLLM()

View File

@@ -0,0 +1,17 @@
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_name: gpt-4
litellm_params:
model: openai/gpt-4
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
litellm_settings:
callbacks: ["gcs_bucket"]
general_settings:
disable_prisma_schema_update: true

View File

@@ -0,0 +1,17 @@
model_list:
- model_name: gpt-4
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["teamA"]
model_info:
id: "team-a-model"
litellm_settings:
cache: true
callbacks: ["prometheus"]
router_settings:
enable_tag_filtering: True # 👈 Key Change

View File

@@ -0,0 +1,7 @@
model_list:
- model_name: gpt-3.5-turbo
litellm_settings:
drop_params: True
success_callback: ["langfuse"] # https://docs.litellm.ai/docs/observability/langfuse_integration

View File

@@ -0,0 +1,28 @@
litellm_settings:
drop_params: True
# Model-specific settings
model_list: # use the same model_name for using the litellm router. LiteLLM will use the router between gpt-3.5-turbo
- model_name: gpt-3.5-turbo # litellm will
litellm_params:
model: gpt-3.5-turbo
api_key: sk-uj6F
tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo
api_key: sk-Imn
tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
- model_name: gpt-3.5-turbo
litellm_params:
model: openrouter/gpt-3.5-turbo
- model_name: mistral-7b-instruct
litellm_params:
model: mistralai/mistral-7b-instruct
environment_variables:
REDIS_HOST: localhost
REDIS_PASSWORD:
REDIS_PORT:

View File

@@ -0,0 +1,12 @@
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/my-fake-model
api_key: my-fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
litellm_settings:
cache: True
cache_params:
type: redis

View File

@@ -0,0 +1,62 @@
model_list:
- model_name: gpt-3.5-turbo-end-user-test
litellm_params:
model: gpt-3.5-turbo
region_name: "eu"
model_info:
id: "1"
- model_name: "*"
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
# provider specific wildcard routing
- model_name: "anthropic/*"
litellm_params:
model: "anthropic/*"
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: "groq/*"
litellm_params:
model: "groq/*"
api_key: os.environ/GROQ_API_KEY
litellm_settings:
# set_verbose: True # Uncomment this if you want to see verbose logs; not recommended in production
drop_params: True
# max_budget: 100
# budget_duration: 30d
num_retries: 5
request_timeout: 600
telemetry: False
context_window_fallbacks: [{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}]
default_team_settings:
- team_id: team-1
success_callback: ["langfuse"]
failure_callback: ["langfuse"]
langfuse_public_key: os.environ/LANGFUSE_PROJECT1_PUBLIC # Project 1
langfuse_secret: os.environ/LANGFUSE_PROJECT1_SECRET # Project 1
- team_id: team-2
success_callback: ["langfuse"]
failure_callback: ["langfuse"]
langfuse_public_key: os.environ/LANGFUSE_PROJECT2_PUBLIC # Project 2
langfuse_secret: os.environ/LANGFUSE_PROJECT2_SECRET # Project 2
langfuse_host: https://us.cloud.langfuse.com
# For /fine_tuning/jobs endpoints
finetune_settings:
- custom_llm_provider: azure
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2024-05-01-preview"
- custom_llm_provider: openai
api_key: os.environ/OPENAI_API_KEY
# for /files endpoints
files_settings:
- custom_llm_provider: azure
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2024-05-01-preview"
- custom_llm_provider: openai
api_key: os.environ/OPENAI_API_KEY
general_settings:
master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys

View File

@@ -0,0 +1,7 @@
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo
general_settings:
otel: True # OpenTelemetry Logger this logs OTEL data to your collector

View File

@@ -0,0 +1,83 @@
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["teamA"]
model_info:
id: "team-a-model"
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["teamB"]
model_info:
id: "team-b-model"
- model_name: rerank-english-v3.0
litellm_params:
model: cohere/rerank-english-v3.0
api_key: os.environ/COHERE_API_KEY
- model_name: fake-azure-endpoint
litellm_params:
model: openai/429
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app
- model_name: llava-hf
litellm_params:
model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
api_base: http://localhost:8000
api_key: fake-key
model_info:
supports_vision: True
- model_name: bedrock/*
litellm_params:
model: bedrock/*
api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_name: openai/*
litellm_params:
model: openai/*
api_key: os.environ/OPENAI_API_KEY
api_base: https://exampleopenaiendpoint-production.up.railway.app/
litellm_settings:
cache: true
callbacks: ["otel", "prometheus"]
disable_end_user_cost_tracking_prometheus_only: True
guardrails:
- guardrail_name: "aporia-pre-guard"
litellm_params:
guardrail: aporia # supported values: "aporia", "bedrock", "lakera"
mode: "post_call"
api_key: os.environ/APORIA_API_KEY_1
api_base: os.environ/APORIA_API_BASE_1
- guardrail_name: "aporia-post-guard"
litellm_params:
guardrail: aporia # supported values: "aporia", "bedrock", "lakera"
mode: "post_call"
api_key: os.environ/APORIA_API_KEY_2
api_base: os.environ/APORIA_API_BASE_2
- guardrail_name: "bedrock-pre-guard"
litellm_params:
guardrail: bedrock # supported values: "aporia", "bedrock", "lakera"
mode: "during_call"
guardrailIdentifier: ff6ujrregl1q
guardrailVersion: "DRAFT"
- guardrail_name: "custom-pre-guard"
litellm_params:
guardrail: custom_guardrail.myCustomGuardrail
mode: "pre_call"
- guardrail_name: "custom-during-guard"
litellm_params:
guardrail: custom_guardrail.myCustomGuardrail
mode: "during_call"
- guardrail_name: "custom-post-guard"
litellm_params:
guardrail: custom_guardrail.myCustomGuardrail
mode: "post_call"
router_settings:
enable_tag_filtering: True # 👈 Key Change

View File

@@ -0,0 +1,29 @@
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
- model_name: claude-3-5-sonnet-20241022
litellm_params:
model: anthropic/claude-3-5-sonnet-20241022
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: claude-special-alias
litellm_params:
model: anthropic/claude-3-haiku-20240307
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: claude-3-5-sonnet-20241022
litellm_params:
model: anthropic/claude-3-5-sonnet-20241022
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: claude-3-7-sonnet-20250219
litellm_params:
model: anthropic/claude-3-7-sonnet-20250219
api_key: os.environ/ANTHROPIC_API_KEY
- model_name: anthropic/*
litellm_params:
model: anthropic/*
api_key: os.environ/ANTHROPIC_API_KEY
general_settings:
master_key: sk-1234
custom_auth: custom_auth_basic.user_api_key_auth

View File

@@ -0,0 +1,4 @@
model_list:
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo

View File

@@ -0,0 +1,15 @@
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
use_redis_transaction_buffer: true
litellm_settings:
cache: True
cache_params:
type: redis
supported_call_types: []

View File

@@ -0,0 +1,10 @@
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/my-fake-model
api_key: my-fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
store_model_in_db: true