structure saas with tools
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,17 @@
|
||||
model_list:
|
||||
- model_name: text-embedding-ada-002
|
||||
litellm_params:
|
||||
model: azure/azure-embedding-model
|
||||
api_base: "os.environ/AZURE_API_BASE"
|
||||
api_key: "os.environ/AZURE_API_KEY"
|
||||
api_version: "2023-07-01-preview"
|
||||
model_info:
|
||||
mode: embedding
|
||||
base_model: text-embedding-ada-002
|
||||
|
||||
litellm_settings:
|
||||
set_verbose: True
|
||||
|
||||
general_settings:
|
||||
background_health_checks: True # enable background health checks
|
||||
health_check_interval: 300 # frequency of background health checks
|
||||
@@ -0,0 +1,30 @@
|
||||
model_list:
|
||||
- model_name: text-davinci-003
|
||||
litellm_params:
|
||||
model: ollama/zephyr
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: ollama/llama2
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: ollama/llama2
|
||||
temperature: 0.1
|
||||
max_tokens: 20
|
||||
|
||||
|
||||
# request to gpt-4, response from ollama/llama2
|
||||
# curl --location 'http://0.0.0.0:8000/chat/completions' \
|
||||
# --header 'Content-Type: application/json' \
|
||||
# --data ' {
|
||||
# "model": "gpt-4",
|
||||
# "messages": [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "what llm are you"
|
||||
# }
|
||||
# ],
|
||||
# }
|
||||
# '
|
||||
#
|
||||
|
||||
# {"id":"chatcmpl-27c85cf0-ab09-4bcf-8cb1-0ee950520743","choices":[{"finish_reason":"stop","index":0,"message":{"content":" Hello! I'm just an AI, I don't have personal experiences or emotions like humans do. However, I can help you with any questions or tasks you may have! Is there something specific you'd like to know or discuss?","role":"assistant","_logprobs":null}}],"created":1700094955.373751,"model":"ollama/llama2","object":"chat.completion","system_fingerprint":null,"usage":{"prompt_tokens":12,"completion_tokens":47,"total_tokens":59},"_response_ms":8028.017999999999}%
|
||||
@@ -0,0 +1,21 @@
|
||||
model_list:
|
||||
- model_name: gpt-4-team1
|
||||
litellm_params:
|
||||
model: azure/chatgpt-v-2
|
||||
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
|
||||
api_version: "2023-05-15"
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
tpm: 20_000
|
||||
timeout: 5 # 1 second timeout
|
||||
stream_timeout: 0.5 # 0.5 second timeout for streaming requests
|
||||
max_retries: 4
|
||||
- model_name: gpt-4-team2
|
||||
litellm_params:
|
||||
model: azure/gpt-4
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: https://openai-gpt-4-test-v-2.openai.azure.com/
|
||||
tpm: 100_000
|
||||
timeout: 5 # 1 second timeout
|
||||
stream_timeout: 0.5 # 0.5 second timeout for streaming requests
|
||||
max_retries: 4
|
||||
|
||||
@@ -0,0 +1,265 @@
|
||||
datasource client {
|
||||
provider = "postgresql"
|
||||
url = env("DATABASE_URL")
|
||||
}
|
||||
|
||||
generator client {
|
||||
provider = "prisma-client-py"
|
||||
}
|
||||
|
||||
// Budget / Rate Limits for an org
|
||||
model LiteLLM_BudgetTable {
|
||||
budget_id String @id @default(uuid())
|
||||
max_budget Float?
|
||||
soft_budget Float?
|
||||
max_parallel_requests Int?
|
||||
tpm_limit BigInt?
|
||||
rpm_limit BigInt?
|
||||
model_max_budget Json?
|
||||
temp_verification_token String? // bad param for testing
|
||||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
created_at DateTime @default(now()) @map("created_at")
|
||||
created_by String
|
||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||
updated_by String
|
||||
organization LiteLLM_OrganizationTable[] // multiple orgs can have the same budget
|
||||
keys LiteLLM_VerificationToken[] // multiple keys can have the same budget
|
||||
end_users LiteLLM_EndUserTable[] // multiple end-users can have the same budget
|
||||
team_membership LiteLLM_TeamMembership[] // budgets of Users within a Team
|
||||
}
|
||||
|
||||
// Models on proxy
|
||||
model LiteLLM_ProxyModelTable {
|
||||
model_id String @id @default(uuid())
|
||||
model_name String
|
||||
litellm_params Json
|
||||
model_info Json?
|
||||
created_at DateTime @default(now()) @map("created_at")
|
||||
created_by String
|
||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||
updated_by String
|
||||
}
|
||||
|
||||
model LiteLLM_OrganizationTable {
|
||||
organization_id String @id @default(uuid())
|
||||
organization_alias String
|
||||
budget_id String
|
||||
metadata Json @default("{}")
|
||||
models String[]
|
||||
spend Float @default(0.0)
|
||||
model_spend Json @default("{}")
|
||||
created_at DateTime @default(now()) @map("created_at")
|
||||
created_by String
|
||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||
updated_by String
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
teams LiteLLM_TeamTable[]
|
||||
users LiteLLM_UserTable[]
|
||||
}
|
||||
|
||||
// Model info for teams, just has model aliases for now.
|
||||
model LiteLLM_ModelTable {
|
||||
id Int @id @default(autoincrement())
|
||||
model_aliases Json? @map("aliases")
|
||||
created_at DateTime @default(now()) @map("created_at")
|
||||
created_by String
|
||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||
updated_by String
|
||||
team LiteLLM_TeamTable?
|
||||
}
|
||||
|
||||
|
||||
// Assign prod keys to groups, not individuals
|
||||
model LiteLLM_TeamTable {
|
||||
team_id String @id @default(uuid())
|
||||
team_alias String?
|
||||
organization_id String?
|
||||
admins String[]
|
||||
members String[]
|
||||
members_with_roles Json @default("{}")
|
||||
metadata Json @default("{}")
|
||||
max_budget Float?
|
||||
spend Float @default(0.0)
|
||||
models String[]
|
||||
max_parallel_requests Int?
|
||||
tpm_limit BigInt?
|
||||
rpm_limit BigInt?
|
||||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
blocked Boolean @default(false)
|
||||
created_at DateTime @default(now()) @map("created_at")
|
||||
updated_at DateTime @default(now()) @updatedAt @map("updated_at")
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
|
||||
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
||||
litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
|
||||
}
|
||||
|
||||
// Track spend, rate limit, budget Users
|
||||
model LiteLLM_UserTable {
|
||||
user_id String @id
|
||||
user_alias String?
|
||||
team_id String?
|
||||
organization_id String?
|
||||
password String?
|
||||
teams String[] @default([])
|
||||
user_role String?
|
||||
max_budget Float?
|
||||
spend Float @default(0.0)
|
||||
user_email String?
|
||||
models String[]
|
||||
metadata Json @default("{}")
|
||||
max_parallel_requests Int?
|
||||
tpm_limit BigInt?
|
||||
rpm_limit BigInt?
|
||||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
allowed_cache_controls String[] @default([])
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
litellm_organization_table LiteLLM_OrganizationTable? @relation(fields: [organization_id], references: [organization_id])
|
||||
invitations_created LiteLLM_InvitationLink[] @relation("CreatedBy")
|
||||
invitations_updated LiteLLM_InvitationLink[] @relation("UpdatedBy")
|
||||
invitations_user LiteLLM_InvitationLink[] @relation("UserId")
|
||||
}
|
||||
|
||||
// Generate Tokens for Proxy
|
||||
model LiteLLM_VerificationToken {
|
||||
token String @id
|
||||
key_name String?
|
||||
key_alias String?
|
||||
soft_budget_cooldown Boolean @default(false) // key-level state on if budget alerts need to be cooled down
|
||||
spend Float @default(0.0)
|
||||
expires DateTime?
|
||||
models String[]
|
||||
aliases Json @default("{}")
|
||||
config Json @default("{}")
|
||||
user_id String?
|
||||
team_id String?
|
||||
permissions Json @default("{}")
|
||||
max_parallel_requests Int?
|
||||
metadata Json @default("{}")
|
||||
blocked Boolean?
|
||||
tpm_limit BigInt?
|
||||
rpm_limit BigInt?
|
||||
max_budget Float?
|
||||
budget_duration String?
|
||||
budget_reset_at DateTime?
|
||||
allowed_cache_controls String[] @default([])
|
||||
model_spend Json @default("{}")
|
||||
model_max_budget Json @default("{}")
|
||||
budget_id String?
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
}
|
||||
|
||||
model LiteLLM_EndUserTable {
|
||||
user_id String @id
|
||||
alias String? // admin-facing alias
|
||||
spend Float @default(0.0)
|
||||
allowed_model_region String? // require all user requests to use models in this specific region
|
||||
default_model String? // use along with 'allowed_model_region'. if no available model in region, default to this model.
|
||||
budget_id String?
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
blocked Boolean @default(false)
|
||||
}
|
||||
|
||||
// store proxy config.yaml
|
||||
model LiteLLM_Config {
|
||||
param_name String @id
|
||||
param_value Json?
|
||||
}
|
||||
|
||||
// View spend, model, api_key per request
|
||||
model LiteLLM_SpendLogs {
|
||||
request_id String @id
|
||||
call_type String
|
||||
api_key String @default ("") // Hashed API Token. Not the actual Virtual Key. Equivalent to 'token' column in LiteLLM_VerificationToken
|
||||
spend Float @default(0.0)
|
||||
total_tokens Int @default(0)
|
||||
prompt_tokens Int @default(0)
|
||||
completion_tokens Int @default(0)
|
||||
startTime DateTime // Assuming start_time is a DateTime field
|
||||
endTime DateTime // Assuming end_time is a DateTime field
|
||||
completionStartTime DateTime? // Assuming completionStartTime is a DateTime field
|
||||
model String @default("")
|
||||
model_id String? @default("") // the model id stored in proxy model db
|
||||
model_group String? @default("") // public model_name / model_group
|
||||
api_base String? @default("")
|
||||
user String? @default("")
|
||||
metadata Json? @default("{}")
|
||||
cache_hit String? @default("")
|
||||
cache_key String? @default("")
|
||||
request_tags Json? @default("[]")
|
||||
team_id String?
|
||||
end_user String?
|
||||
requester_ip_address String?
|
||||
@@index([startTime])
|
||||
@@index([end_user])
|
||||
}
|
||||
|
||||
// View spend, model, api_key per request
|
||||
model LiteLLM_ErrorLogs {
|
||||
request_id String @id @default(uuid())
|
||||
startTime DateTime // Assuming start_time is a DateTime field
|
||||
endTime DateTime // Assuming end_time is a DateTime field
|
||||
api_base String @default("")
|
||||
model_group String @default("") // public model_name / model_group
|
||||
litellm_model_name String @default("") // model passed to litellm
|
||||
model_id String @default("") // ID of model in ProxyModelTable
|
||||
request_kwargs Json @default("{}")
|
||||
exception_type String @default("")
|
||||
exception_string String @default("")
|
||||
status_code String @default("")
|
||||
}
|
||||
|
||||
// Beta - allow team members to request access to a model
|
||||
model LiteLLM_UserNotifications {
|
||||
request_id String @id
|
||||
user_id String
|
||||
models String[]
|
||||
justification String
|
||||
status String // approved, disapproved, pending
|
||||
}
|
||||
|
||||
model LiteLLM_TeamMembership {
|
||||
// Use this table to track the Internal User's Spend within a Team + Set Budgets, rpm limits for the user within the team
|
||||
user_id String
|
||||
team_id String
|
||||
spend Float @default(0.0)
|
||||
budget_id String?
|
||||
litellm_budget_table LiteLLM_BudgetTable? @relation(fields: [budget_id], references: [budget_id])
|
||||
@@id([user_id, team_id])
|
||||
}
|
||||
|
||||
model LiteLLM_InvitationLink {
|
||||
// use this table to track invite links sent by admin for people to join the proxy
|
||||
id String @id @default(uuid())
|
||||
user_id String
|
||||
is_accepted Boolean @default(false)
|
||||
accepted_at DateTime? // when link is claimed (user successfully onboards via link)
|
||||
expires_at DateTime // till when is link valid
|
||||
created_at DateTime // when did admin create the link
|
||||
created_by String // who created the link
|
||||
updated_at DateTime // when was invite status updated
|
||||
updated_by String // who updated the status (admin/user who accepted invite)
|
||||
|
||||
// Relations
|
||||
liteLLM_user_table_user LiteLLM_UserTable @relation("UserId", fields: [user_id], references: [user_id])
|
||||
liteLLM_user_table_created LiteLLM_UserTable @relation("CreatedBy", fields: [created_by], references: [user_id])
|
||||
liteLLM_user_table_updated LiteLLM_UserTable @relation("UpdatedBy", fields: [updated_by], references: [user_id])
|
||||
}
|
||||
|
||||
|
||||
model LiteLLM_AuditLog {
|
||||
id String @id @default(uuid())
|
||||
updated_at DateTime @default(now())
|
||||
changed_by String @default("") // user or system that performed the action
|
||||
changed_by_api_key String @default("") // api key hash that performed the action
|
||||
action String // create, update, delete
|
||||
table_name String // on of LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
|
||||
object_id String // id of the object being audited. This can be the key id, team id, user id, model id
|
||||
before_value Json? // value of the row
|
||||
updated_values Json? // value of the row after change
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
import os
|
||||
|
||||
from fastapi import Request
|
||||
|
||||
from litellm.proxy._types import GenerateKeyRequest, UserAPIKeyAuth
|
||||
|
||||
|
||||
async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
|
||||
try:
|
||||
modified_master_key = f"{os.getenv('LITELLM_MASTER_KEY')}-1234"
|
||||
if api_key == modified_master_key:
|
||||
return UserAPIKeyAuth(api_key=api_key)
|
||||
raise Exception
|
||||
except Exception:
|
||||
raise Exception
|
||||
|
||||
|
||||
async def generate_key_fn(data: GenerateKeyRequest):
|
||||
"""
|
||||
Asynchronously decides if a key should be generated or not based on the provided data.
|
||||
|
||||
Args:
|
||||
data (GenerateKeyRequest): The data to be used for decision making.
|
||||
|
||||
Returns:
|
||||
bool: True if a key should be generated, False otherwise.
|
||||
"""
|
||||
# decide if a key should be generated or not
|
||||
data_json = data.json() # type: ignore
|
||||
|
||||
# Unpacking variables
|
||||
team_id = data_json.get("team_id")
|
||||
data_json.get("duration")
|
||||
data_json.get("models")
|
||||
data_json.get("aliases")
|
||||
data_json.get("config")
|
||||
data_json.get("spend")
|
||||
data_json.get("user_id")
|
||||
data_json.get("max_parallel_requests")
|
||||
data_json.get("metadata")
|
||||
data_json.get("tpm_limit")
|
||||
data_json.get("rpm_limit")
|
||||
|
||||
if team_id is not None and len(team_id) > 0:
|
||||
return {
|
||||
"decision": True,
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"decision": True,
|
||||
"message": "This violates LiteLLM Proxy Rules. No team id provided.",
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
from fastapi import Request
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
|
||||
|
||||
async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
|
||||
try:
|
||||
return UserAPIKeyAuth(
|
||||
api_key="best-api-key-ever",
|
||||
user_id="best-user-id-ever",
|
||||
team_id="best-team-id-ever",
|
||||
)
|
||||
except Exception:
|
||||
raise Exception
|
||||
@@ -0,0 +1,74 @@
|
||||
import os
|
||||
import sys
|
||||
import traceback
|
||||
|
||||
# this file is to test litellm/proxy
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
import inspect
|
||||
|
||||
import litellm
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
|
||||
# This file includes the custom callbacks for LiteLLM Proxy
|
||||
# Once defined, these can be passed in proxy_config.yaml
|
||||
def print_verbose(print_statement):
|
||||
if litellm.set_verbose:
|
||||
print(print_statement) # noqa
|
||||
|
||||
|
||||
class MyCustomHandler(CustomLogger):
|
||||
def __init__(self):
|
||||
blue_color_code = "\033[94m"
|
||||
reset_color_code = "\033[0m"
|
||||
print_verbose(f"{blue_color_code}Initialized LiteLLM custom logger")
|
||||
try:
|
||||
print_verbose("Logger Initialized with following methods:")
|
||||
methods = [
|
||||
method
|
||||
for method in dir(self)
|
||||
if inspect.ismethod(getattr(self, method))
|
||||
]
|
||||
|
||||
# Pretty print_verbose the methods
|
||||
for method in methods:
|
||||
print_verbose(f" - {method}")
|
||||
print_verbose(f"{reset_color_code}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def log_pre_api_call(self, model, messages, kwargs):
|
||||
print_verbose("Pre-API Call")
|
||||
|
||||
def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
|
||||
print_verbose("Post-API Call")
|
||||
|
||||
def log_stream_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print_verbose("On Stream")
|
||||
|
||||
def log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print_verbose("On Success!")
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
print_verbose("On Async Success!")
|
||||
response_cost = litellm.completion_cost(completion_response=response_obj)
|
||||
assert response_cost > 0.0
|
||||
return
|
||||
|
||||
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
|
||||
try:
|
||||
print_verbose("On Async Failure !")
|
||||
except Exception as e:
|
||||
print_verbose(f"Exception: {e}")
|
||||
|
||||
|
||||
proxy_handler_instance = MyCustomHandler()
|
||||
|
||||
|
||||
# need to set litellm.callbacks = [customHandler] # on the proxy
|
||||
|
||||
# litellm.success_callback = [async_on_succes_logger]
|
||||
@@ -0,0 +1,78 @@
|
||||
from typing import Literal, Optional
|
||||
|
||||
import litellm
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy.proxy_server import DualCache, UserAPIKeyAuth
|
||||
|
||||
|
||||
# This file includes the custom callbacks for LiteLLM Proxy
|
||||
# Once defined, these can be passed in proxy_config.yaml
|
||||
class MyCustomHandler(
|
||||
CustomLogger
|
||||
): # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
|
||||
# Class variables or attributes
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
#### CALL HOOKS - proxy only ####
|
||||
|
||||
async def async_pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
cache: DualCache,
|
||||
data: dict,
|
||||
call_type: Literal[
|
||||
"completion",
|
||||
"text_completion",
|
||||
"embeddings",
|
||||
"image_generation",
|
||||
"moderation",
|
||||
"audio_transcription",
|
||||
"pass_through_endpoint",
|
||||
"rerank",
|
||||
],
|
||||
):
|
||||
return data
|
||||
|
||||
async def async_post_call_failure_hook(
|
||||
self,
|
||||
request_data: dict,
|
||||
original_exception: Exception,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
):
|
||||
pass
|
||||
|
||||
async def async_post_call_success_hook(
|
||||
self,
|
||||
data: dict,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
response,
|
||||
):
|
||||
# print("in async_post_call_success_hook")
|
||||
pass
|
||||
|
||||
async def async_moderation_hook( # call made in parallel to llm api call
|
||||
self,
|
||||
data: dict,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
call_type: Literal[
|
||||
"completion",
|
||||
"embeddings",
|
||||
"image_generation",
|
||||
"moderation",
|
||||
"audio_transcription",
|
||||
"responses",
|
||||
],
|
||||
):
|
||||
pass
|
||||
|
||||
async def async_post_call_streaming_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
response: str,
|
||||
):
|
||||
# print("in async_post_call_streaming_hook")
|
||||
pass
|
||||
|
||||
|
||||
proxy_handler_instance = MyCustomHandler()
|
||||
@@ -0,0 +1,112 @@
|
||||
from typing import Any, Dict, List, Literal, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
|
||||
|
||||
|
||||
class myCustomGuardrail(CustomGuardrail):
|
||||
def __init__(
|
||||
self,
|
||||
**kwargs,
|
||||
):
|
||||
# store kwargs as optional_params
|
||||
self.optional_params = kwargs
|
||||
|
||||
super().__init__(**kwargs)
|
||||
|
||||
async def async_pre_call_hook(
|
||||
self,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
cache: DualCache,
|
||||
data: dict,
|
||||
call_type: Literal[
|
||||
"completion",
|
||||
"text_completion",
|
||||
"embeddings",
|
||||
"image_generation",
|
||||
"moderation",
|
||||
"audio_transcription",
|
||||
"pass_through_endpoint",
|
||||
"rerank",
|
||||
],
|
||||
) -> Optional[Union[Exception, str, dict]]:
|
||||
"""
|
||||
Runs before the LLM API call
|
||||
Runs on only Input
|
||||
Use this if you want to MODIFY the input
|
||||
"""
|
||||
|
||||
# In this guardrail, if a user inputs `litellm` we will mask it and then send it to the LLM
|
||||
_messages = data.get("messages")
|
||||
if _messages:
|
||||
for message in _messages:
|
||||
_content = message.get("content")
|
||||
if isinstance(_content, str):
|
||||
if "litellm" in _content.lower():
|
||||
_content = _content.replace("litellm", "********")
|
||||
message["content"] = _content
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"async_pre_call_hook: Message after masking %s", _messages
|
||||
)
|
||||
|
||||
return data
|
||||
|
||||
async def async_moderation_hook(
|
||||
self,
|
||||
data: dict,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
call_type: Literal[
|
||||
"completion",
|
||||
"embeddings",
|
||||
"image_generation",
|
||||
"moderation",
|
||||
"audio_transcription",
|
||||
"responses",
|
||||
],
|
||||
):
|
||||
"""
|
||||
Runs in parallel to LLM API call
|
||||
Runs on only Input
|
||||
|
||||
This can NOT modify the input, only used to reject or accept a call before going to LLM API
|
||||
"""
|
||||
|
||||
# this works the same as async_pre_call_hook, but just runs in parallel as the LLM API Call
|
||||
# In this guardrail, if a user inputs `litellm` we will mask it.
|
||||
_messages = data.get("messages")
|
||||
if _messages:
|
||||
for message in _messages:
|
||||
_content = message.get("content")
|
||||
if isinstance(_content, str):
|
||||
if "litellm" in _content.lower():
|
||||
raise ValueError("Guardrail failed words - `litellm` detected")
|
||||
|
||||
async def async_post_call_success_hook(
|
||||
self,
|
||||
data: dict,
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
response,
|
||||
):
|
||||
"""
|
||||
Runs on response from LLM API call
|
||||
|
||||
It can be used to reject a response
|
||||
|
||||
If a response contains the word "coffee" -> we will raise an exception
|
||||
"""
|
||||
verbose_proxy_logger.debug("async_pre_call_hook response: %s", response)
|
||||
if isinstance(response, litellm.ModelResponse):
|
||||
for choice in response.choices:
|
||||
if isinstance(choice, litellm.Choices):
|
||||
verbose_proxy_logger.debug("async_pre_call_hook choice: %s", choice)
|
||||
if (
|
||||
choice.message.content
|
||||
and isinstance(choice.message.content, str)
|
||||
and "coffee" in choice.message.content
|
||||
):
|
||||
raise ValueError("Guardrail failed Coffee Detected")
|
||||
@@ -0,0 +1,26 @@
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
import litellm
|
||||
from litellm import CustomLLM, ImageObject, ImageResponse, completion, get_llm_provider
|
||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
|
||||
from litellm.types.utils import ModelResponse
|
||||
|
||||
|
||||
class MyCustomLLM(CustomLLM):
|
||||
def completion(self, *args, **kwargs) -> ModelResponse:
|
||||
return litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hello world"}],
|
||||
mock_response="Hi!",
|
||||
) # type: ignore
|
||||
|
||||
async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse:
|
||||
return litellm.completion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hello world"}],
|
||||
mock_response="Hi!",
|
||||
) # type: ignore
|
||||
|
||||
|
||||
my_custom_llm = MyCustomLLM()
|
||||
@@ -0,0 +1,17 @@
|
||||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: openai/gpt-4
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
litellm_settings:
|
||||
callbacks: ["gcs_bucket"]
|
||||
|
||||
general_settings:
|
||||
disable_prisma_schema_update: true
|
||||
@@ -0,0 +1,17 @@
|
||||
model_list:
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
tags: ["teamA"]
|
||||
model_info:
|
||||
id: "team-a-model"
|
||||
|
||||
litellm_settings:
|
||||
cache: true
|
||||
callbacks: ["prometheus"]
|
||||
|
||||
router_settings:
|
||||
enable_tag_filtering: True # 👈 Key Change
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
|
||||
litellm_settings:
|
||||
drop_params: True
|
||||
success_callback: ["langfuse"] # https://docs.litellm.ai/docs/observability/langfuse_integration
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
litellm_settings:
|
||||
drop_params: True
|
||||
|
||||
# Model-specific settings
|
||||
model_list: # use the same model_name for using the litellm router. LiteLLM will use the router between gpt-3.5-turbo
|
||||
- model_name: gpt-3.5-turbo # litellm will
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_key: sk-uj6F
|
||||
tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
|
||||
rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
api_key: sk-Imn
|
||||
tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
|
||||
rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: openrouter/gpt-3.5-turbo
|
||||
- model_name: mistral-7b-instruct
|
||||
litellm_params:
|
||||
model: mistralai/mistral-7b-instruct
|
||||
|
||||
environment_variables:
|
||||
REDIS_HOST: localhost
|
||||
REDIS_PASSWORD:
|
||||
REDIS_PORT:
|
||||
@@ -0,0 +1,12 @@
|
||||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/my-fake-model
|
||||
api_key: my-fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
litellm_settings:
|
||||
cache: True
|
||||
cache_params:
|
||||
type: redis
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo-end-user-test
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
region_name: "eu"
|
||||
model_info:
|
||||
id: "1"
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
# provider specific wildcard routing
|
||||
- model_name: "anthropic/*"
|
||||
litellm_params:
|
||||
model: "anthropic/*"
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: "groq/*"
|
||||
litellm_params:
|
||||
model: "groq/*"
|
||||
api_key: os.environ/GROQ_API_KEY
|
||||
litellm_settings:
|
||||
# set_verbose: True # Uncomment this if you want to see verbose logs; not recommended in production
|
||||
drop_params: True
|
||||
# max_budget: 100
|
||||
# budget_duration: 30d
|
||||
num_retries: 5
|
||||
request_timeout: 600
|
||||
telemetry: False
|
||||
context_window_fallbacks: [{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}]
|
||||
default_team_settings:
|
||||
- team_id: team-1
|
||||
success_callback: ["langfuse"]
|
||||
failure_callback: ["langfuse"]
|
||||
langfuse_public_key: os.environ/LANGFUSE_PROJECT1_PUBLIC # Project 1
|
||||
langfuse_secret: os.environ/LANGFUSE_PROJECT1_SECRET # Project 1
|
||||
- team_id: team-2
|
||||
success_callback: ["langfuse"]
|
||||
failure_callback: ["langfuse"]
|
||||
langfuse_public_key: os.environ/LANGFUSE_PROJECT2_PUBLIC # Project 2
|
||||
langfuse_secret: os.environ/LANGFUSE_PROJECT2_SECRET # Project 2
|
||||
langfuse_host: https://us.cloud.langfuse.com
|
||||
|
||||
# For /fine_tuning/jobs endpoints
|
||||
finetune_settings:
|
||||
- custom_llm_provider: azure
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2024-05-01-preview"
|
||||
- custom_llm_provider: openai
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
# for /files endpoints
|
||||
files_settings:
|
||||
- custom_llm_provider: azure
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2024-05-01-preview"
|
||||
- custom_llm_provider: openai
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
|
||||
@@ -0,0 +1,7 @@
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
|
||||
general_settings:
|
||||
otel: True # OpenTelemetry Logger this logs OTEL data to your collector
|
||||
@@ -0,0 +1,83 @@
|
||||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
tags: ["teamA"]
|
||||
model_info:
|
||||
id: "team-a-model"
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
tags: ["teamB"]
|
||||
model_info:
|
||||
id: "team-b-model"
|
||||
- model_name: rerank-english-v3.0
|
||||
litellm_params:
|
||||
model: cohere/rerank-english-v3.0
|
||||
api_key: os.environ/COHERE_API_KEY
|
||||
- model_name: fake-azure-endpoint
|
||||
litellm_params:
|
||||
model: openai/429
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app
|
||||
- model_name: llava-hf
|
||||
litellm_params:
|
||||
model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
|
||||
api_base: http://localhost:8000
|
||||
api_key: fake-key
|
||||
model_info:
|
||||
supports_vision: True
|
||||
- model_name: bedrock/*
|
||||
litellm_params:
|
||||
model: bedrock/*
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: openai/*
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
|
||||
litellm_settings:
|
||||
cache: true
|
||||
callbacks: ["otel", "prometheus"]
|
||||
disable_end_user_cost_tracking_prometheus_only: True
|
||||
|
||||
guardrails:
|
||||
- guardrail_name: "aporia-pre-guard"
|
||||
litellm_params:
|
||||
guardrail: aporia # supported values: "aporia", "bedrock", "lakera"
|
||||
mode: "post_call"
|
||||
api_key: os.environ/APORIA_API_KEY_1
|
||||
api_base: os.environ/APORIA_API_BASE_1
|
||||
- guardrail_name: "aporia-post-guard"
|
||||
litellm_params:
|
||||
guardrail: aporia # supported values: "aporia", "bedrock", "lakera"
|
||||
mode: "post_call"
|
||||
api_key: os.environ/APORIA_API_KEY_2
|
||||
api_base: os.environ/APORIA_API_BASE_2
|
||||
- guardrail_name: "bedrock-pre-guard"
|
||||
litellm_params:
|
||||
guardrail: bedrock # supported values: "aporia", "bedrock", "lakera"
|
||||
mode: "during_call"
|
||||
guardrailIdentifier: ff6ujrregl1q
|
||||
guardrailVersion: "DRAFT"
|
||||
- guardrail_name: "custom-pre-guard"
|
||||
litellm_params:
|
||||
guardrail: custom_guardrail.myCustomGuardrail
|
||||
mode: "pre_call"
|
||||
- guardrail_name: "custom-during-guard"
|
||||
litellm_params:
|
||||
guardrail: custom_guardrail.myCustomGuardrail
|
||||
mode: "during_call"
|
||||
- guardrail_name: "custom-post-guard"
|
||||
litellm_params:
|
||||
guardrail: custom_guardrail.myCustomGuardrail
|
||||
mode: "post_call"
|
||||
|
||||
router_settings:
|
||||
enable_tag_filtering: True # 👈 Key Change
|
||||
@@ -0,0 +1,29 @@
|
||||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: claude-3-5-sonnet-20241022
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-5-sonnet-20241022
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: claude-special-alias
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-haiku-20240307
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: claude-3-5-sonnet-20241022
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-5-sonnet-20241022
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: claude-3-7-sonnet-20250219
|
||||
litellm_params:
|
||||
model: anthropic/claude-3-7-sonnet-20250219
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: anthropic/*
|
||||
litellm_params:
|
||||
model: anthropic/*
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
custom_auth: custom_auth_basic.user_api_key_auth
|
||||
@@ -0,0 +1,4 @@
|
||||
model_list:
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: gpt-3.5-turbo
|
||||
@@ -0,0 +1,15 @@
|
||||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
general_settings:
|
||||
use_redis_transaction_buffer: true
|
||||
|
||||
litellm_settings:
|
||||
cache: True
|
||||
cache_params:
|
||||
type: redis
|
||||
supported_call_types: []
|
||||
@@ -0,0 +1,10 @@
|
||||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/my-fake-model
|
||||
api_key: my-fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
general_settings:
|
||||
store_model_in_db: true
|
||||
|
||||
Reference in New Issue
Block a user