structure saas with tools

2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_auth.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_auth.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_auth_basic.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_auth_basic.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_callbacks.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_callbacks.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_callbacks1.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_callbacks1.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_guardrail.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_guardrail.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_handler.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pycache/custom_handler.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/_health_check_test_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/_health_check_test_config.yaml
@@ -0,0 +1,17 @@
+model_list:
+  - model_name: text-embedding-ada-002
+    litellm_params:
+      model: azure/azure-embedding-model
+      api_base: "os.environ/AZURE_API_BASE"
+      api_key: "os.environ/AZURE_API_KEY"
+      api_version: "2023-07-01-preview"
+    model_info:
+      mode: embedding
+      base_model: text-embedding-ada-002
+
+litellm_settings: 
+  set_verbose: True
+
+general_settings: 
+  background_health_checks: True # enable background health checks
+  health_check_interval: 300 # frequency of background health checks
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/aliases_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/aliases_config.yaml
@@ -0,0 +1,30 @@
+model_list:
+  - model_name: text-davinci-003
+    litellm_params:
+        model: ollama/zephyr
+  - model_name: gpt-4
+    litellm_params:
+        model: ollama/llama2
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+        model: ollama/llama2
+        temperature: 0.1
+        max_tokens: 20
+
+
+# request to gpt-4, response from ollama/llama2
+# curl --location 'http://0.0.0.0:8000/chat/completions' \
+# --header 'Content-Type: application/json' \
+# --data ' {
+#       "model": "gpt-4",
+#       "messages": [
+#         {
+#           "role": "user",
+#           "content": "what llm are you"
+#         }
+#       ],
+#     }
+# '
+#
+
+# {"id":"chatcmpl-27c85cf0-ab09-4bcf-8cb1-0ee950520743","choices":[{"finish_reason":"stop","index":0,"message":{"content":" Hello! I'm just an AI, I don't have personal experiences or emotions like humans do. However, I can help you with any questions or tasks you may have! Is there something specific you'd like to know or discuss?","role":"assistant","_logprobs":null}}],"created":1700094955.373751,"model":"ollama/llama2","object":"chat.completion","system_fingerprint":null,"usage":{"prompt_tokens":12,"completion_tokens":47,"total_tokens":59},"_response_ms":8028.017999999999}%              
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/azure_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/azure_config.yaml
@@ -0,0 +1,21 @@
+model_list:
+  - model_name: gpt-4-team1
+    litellm_params:
+      model: azure/chatgpt-v-2
+      api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
+      api_version: "2023-05-15"
+      api_key:  os.environ/AZURE_API_KEY
+      tpm: 20_000
+      timeout: 5                  # 1 second timeout
+      stream_timeout: 0.5         # 0.5 second timeout for streaming requests
+      max_retries: 4
+  - model_name: gpt-4-team2
+    litellm_params:
+      model: azure/gpt-4
+      api_key: os.environ/AZURE_API_KEY
+      api_base: https://openai-gpt-4-test-v-2.openai.azure.com/
+      tpm: 100_000
+      timeout: 5                  # 1 second timeout
+      stream_timeout: 0.5         # 0.5 second timeout for streaming requests
+      max_retries: 4
+
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/bad_schema.prisma
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/bad_schema.prisma
@@ -0,0 +1,265 @@
+datasource client {
+  provider = "postgresql"
+  url      = env("DATABASE_URL")
+}
+
+generator client {
+  provider = "prisma-client-py"
+}
+
+// Budget / Rate Limits for an org
+model LiteLLM_BudgetTable {
+  budget_id String @id @default(uuid())
+  max_budget Float?
+  soft_budget Float?
+  max_parallel_requests Int?
+  tpm_limit     BigInt?
+  rpm_limit     BigInt?
+  model_max_budget Json?
+  temp_verification_token String? // bad param for testing
+  budget_duration String? 
+  budget_reset_at DateTime?
+  created_at    DateTime               @default(now()) @map("created_at")
+  created_by String
+  updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+  updated_by String
+  organization LiteLLM_OrganizationTable[] // multiple orgs can have the same budget
+  keys LiteLLM_VerificationToken[] // multiple keys can have the same budget
+  end_users LiteLLM_EndUserTable[] // multiple end-users can have the same budget
+  team_membership LiteLLM_TeamMembership[] // budgets of Users within a Team 
+}
+
+// Models on proxy
+model LiteLLM_ProxyModelTable {
+  model_id String @id @default(uuid())
+  model_name String 
+  litellm_params Json
+  model_info Json? 
+  created_at    DateTime               @default(now()) @map("created_at")
+  created_by String
+  updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+  updated_by String
+}
+
+model LiteLLM_OrganizationTable {
+		organization_id String @id @default(uuid())
+    organization_alias  String
+    budget_id String
+    metadata  Json  @default("{}")
+    models     String[]
+    spend      Float    @default(0.0)
+    model_spend      Json @default("{}")
+    created_at    DateTime               @default(now()) @map("created_at")
+    created_by String
+    updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+    updated_by String
+    litellm_budget_table LiteLLM_BudgetTable?   @relation(fields: [budget_id], references: [budget_id])
+    teams LiteLLM_TeamTable[] 
+    users LiteLLM_UserTable[]
+}
+
+// Model info for teams, just has model aliases for now.
+model LiteLLM_ModelTable {
+  id Int @id @default(autoincrement())
+  model_aliases Json? @map("aliases")
+  created_at    DateTime               @default(now()) @map("created_at")
+  created_by String
+  updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+  updated_by String
+  team LiteLLM_TeamTable?
+}
+
+
+// Assign prod keys to groups, not individuals 
+model LiteLLM_TeamTable {
+		team_id    String @id @default(uuid())
+    team_alias  String? 
+    organization_id String?
+    admins String[]
+    members String[]
+    members_with_roles Json @default("{}")
+    metadata  Json  @default("{}")
+    max_budget Float?
+    spend      Float    @default(0.0)
+    models     String[]
+    max_parallel_requests Int?
+    tpm_limit     BigInt?
+    rpm_limit     BigInt?
+    budget_duration String? 
+    budget_reset_at DateTime?
+    blocked Boolean @default(false)
+    created_at    DateTime               @default(now()) @map("created_at")
+    updated_at    DateTime               @default(now()) @updatedAt @map("updated_at")
+    model_spend      Json @default("{}")
+    model_max_budget Json @default("{}")
+    model_id Int? @unique // id for LiteLLM_ModelTable -> stores team-level model aliases
+    litellm_organization_table LiteLLM_OrganizationTable?   @relation(fields: [organization_id], references: [organization_id])
+    litellm_model_table LiteLLM_ModelTable? @relation(fields: [model_id], references: [id])
+}
+
+// Track spend, rate limit, budget Users
+model LiteLLM_UserTable {
+		user_id    String @id
+    user_alias String? 
+    team_id    String?
+    organization_id String?
+    password  String?
+    teams    String[] @default([])
+    user_role  String?
+		max_budget Float?
+    spend      Float    @default(0.0)
+    user_email    String?
+    models     String[]
+    metadata  Json  @default("{}")
+    max_parallel_requests Int?
+    tpm_limit     BigInt?
+    rpm_limit     BigInt?
+    budget_duration String? 
+    budget_reset_at DateTime?
+    allowed_cache_controls String[] @default([])
+    model_spend      Json @default("{}")
+    model_max_budget Json @default("{}")
+    litellm_organization_table LiteLLM_OrganizationTable?   @relation(fields: [organization_id], references: [organization_id])
+    invitations_created LiteLLM_InvitationLink[] @relation("CreatedBy")
+    invitations_updated LiteLLM_InvitationLink[] @relation("UpdatedBy")
+    invitations_user    LiteLLM_InvitationLink[] @relation("UserId")
+}
+
+// Generate Tokens for Proxy
+model LiteLLM_VerificationToken {
+    token      String   @id
+    key_name   String?
+    key_alias   String?
+    soft_budget_cooldown Boolean @default(false) // key-level state on if budget alerts need to be cooled down
+    spend      Float    @default(0.0)
+    expires    DateTime?
+    models     String[]
+    aliases    Json  @default("{}")
+    config     Json  @default("{}")
+    user_id    String?
+    team_id    String?
+    permissions Json @default("{}")
+    max_parallel_requests Int?
+    metadata   Json  @default("{}")
+    blocked Boolean?
+    tpm_limit     BigInt?
+    rpm_limit     BigInt?
+    max_budget Float?    
+    budget_duration String? 
+    budget_reset_at DateTime?
+    allowed_cache_controls String[] @default([])
+    model_spend      Json @default("{}")
+    model_max_budget Json @default("{}")
+    budget_id String?
+    litellm_budget_table LiteLLM_BudgetTable?   @relation(fields: [budget_id], references: [budget_id])
+}
+
+model LiteLLM_EndUserTable {
+  user_id String @id
+  alias    String? // admin-facing alias
+  spend      Float    @default(0.0)
+  allowed_model_region String? // require all user requests to use models in this specific region
+  default_model String? // use along with 'allowed_model_region'. if no available model in region, default to this model.
+  budget_id String?
+  litellm_budget_table LiteLLM_BudgetTable?   @relation(fields: [budget_id], references: [budget_id])
+  blocked Boolean @default(false)
+}
+
+// store proxy config.yaml
+model LiteLLM_Config {
+  param_name String @id
+  param_value Json?
+}
+
+// View spend, model, api_key per request
+model LiteLLM_SpendLogs {
+  request_id          String @id
+  call_type           String
+  api_key             String  @default ("") // Hashed API Token. Not the actual Virtual Key. Equivalent to 'token' column in LiteLLM_VerificationToken
+  spend               Float    @default(0.0)
+  total_tokens        Int     @default(0)
+  prompt_tokens       Int     @default(0)
+  completion_tokens   Int     @default(0)
+  startTime           DateTime // Assuming start_time is a DateTime field
+  endTime             DateTime // Assuming end_time is a DateTime field
+  completionStartTime DateTime? // Assuming completionStartTime is a DateTime field
+  model               String   @default("")
+  model_id            String?   @default("") // the model id stored in proxy model db
+  model_group         String?   @default("") // public model_name / model_group
+  api_base            String?   @default("")
+  user                String?   @default("")
+  metadata            Json?     @default("{}")
+  cache_hit           String?   @default("")
+  cache_key           String?   @default("")
+  request_tags        Json?     @default("[]")
+  team_id             String? 
+  end_user            String?
+  requester_ip_address String?
+  @@index([startTime])
+  @@index([end_user])
+}
+
+// View spend, model, api_key per request
+model LiteLLM_ErrorLogs {
+  request_id          String   @id @default(uuid())
+  startTime           DateTime // Assuming start_time is a DateTime field
+  endTime             DateTime // Assuming end_time is a DateTime field
+  api_base            String   @default("") 
+  model_group         String   @default("")      // public model_name / model_group
+  litellm_model_name  String   @default("")      // model passed to litellm
+  model_id            String   @default("")      // ID of model in ProxyModelTable
+  request_kwargs      Json     @default("{}")
+  exception_type      String   @default("")
+  exception_string    String   @default("")
+  status_code         String   @default("")
+}
+
+// Beta - allow team members to request access to a model
+model LiteLLM_UserNotifications {
+  request_id          String @id
+  user_id             String 
+  models              String[]
+  justification       String
+  status              String // approved, disapproved, pending
+}
+
+model LiteLLM_TeamMembership {
+  // Use this table to track the Internal User's Spend within a Team + Set Budgets, rpm limits for the user within the team
+  user_id    String
+  team_id    String
+  spend      Float    @default(0.0)
+  budget_id String?
+  litellm_budget_table LiteLLM_BudgetTable?   @relation(fields: [budget_id], references: [budget_id])
+  @@id([user_id, team_id])
+}
+
+model LiteLLM_InvitationLink {
+  // use this table to track invite links sent by admin for people to join the proxy
+  id String   @id @default(uuid())
+  user_id String
+  is_accepted Boolean @default(false)
+  accepted_at DateTime? // when link is claimed (user successfully onboards via link)
+  expires_at  DateTime // till when is link valid
+  created_at DateTime // when did admin create the link
+  created_by String // who created the link
+  updated_at DateTime // when was invite status updated
+  updated_by String // who updated the status (admin/user who accepted invite)
+
+  // Relations
+  liteLLM_user_table_user    LiteLLM_UserTable  @relation("UserId", fields: [user_id], references: [user_id])
+  liteLLM_user_table_created LiteLLM_UserTable  @relation("CreatedBy", fields: [created_by], references: [user_id])
+  liteLLM_user_table_updated LiteLLM_UserTable  @relation("UpdatedBy", fields: [updated_by], references: [user_id])
+}
+
+
+model LiteLLM_AuditLog {
+  id                 String   @id @default(uuid())
+  updated_at         DateTime @default(now())
+  changed_by         String   @default("")   // user or system that performed the action
+  changed_by_api_key String   @default("")   // api key hash that performed the action
+  action             String      // create, update, delete
+  table_name         String      // on of  LitellmTableNames.TEAM_TABLE_NAME, LitellmTableNames.USER_TABLE_NAME, LitellmTableNames.PROXY_MODEL_TABLE_NAME,
+  object_id          String      // id of the object being audited. This can be the key id, team id, user id, model id
+  before_value       Json?       // value of the row 
+  updated_values     Json?       // value of the row after change
+}
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_auth.py
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_auth.py
@@ -0,0 +1,52 @@
+import os
+
+from fastapi import Request
+
+from litellm.proxy._types import GenerateKeyRequest, UserAPIKeyAuth
+
+
+async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
+    try:
+        modified_master_key = f"{os.getenv('LITELLM_MASTER_KEY')}-1234"
+        if api_key == modified_master_key:
+            return UserAPIKeyAuth(api_key=api_key)
+        raise Exception
+    except Exception:
+        raise Exception
+
+
+async def generate_key_fn(data: GenerateKeyRequest):
+    """
+    Asynchronously decides if a key should be generated or not based on the provided data.
+
+    Args:
+        data (GenerateKeyRequest): The data to be used for decision making.
+
+    Returns:
+        bool: True if a key should be generated, False otherwise.
+    """
+    # decide if a key should be generated or not
+    data_json = data.json()  # type: ignore
+
+    # Unpacking variables
+    team_id = data_json.get("team_id")
+    data_json.get("duration")
+    data_json.get("models")
+    data_json.get("aliases")
+    data_json.get("config")
+    data_json.get("spend")
+    data_json.get("user_id")
+    data_json.get("max_parallel_requests")
+    data_json.get("metadata")
+    data_json.get("tpm_limit")
+    data_json.get("rpm_limit")
+
+    if team_id is not None and len(team_id) > 0:
+        return {
+            "decision": True,
+        }
+    else:
+        return {
+            "decision": True,
+            "message": "This violates LiteLLM Proxy Rules. No team id provided.",
+        }
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_auth_basic.py
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_auth_basic.py
@@ -0,0 +1,14 @@
+from fastapi import Request
+
+from litellm.proxy._types import UserAPIKeyAuth
+
+
+async def user_api_key_auth(request: Request, api_key: str) -> UserAPIKeyAuth:
+    try:
+        return UserAPIKeyAuth(
+            api_key="best-api-key-ever",
+            user_id="best-user-id-ever",
+            team_id="best-team-id-ever",
+        )
+    except Exception:
+        raise Exception
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_callbacks.py
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_callbacks.py
@@ -0,0 +1,74 @@
+import os
+import sys
+import traceback
+
+# this file is to test litellm/proxy
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+
+import inspect
+
+import litellm
+from litellm.integrations.custom_logger import CustomLogger
+
+
+# This file includes the custom callbacks for LiteLLM Proxy
+# Once defined, these can be passed in proxy_config.yaml
+def print_verbose(print_statement):
+    if litellm.set_verbose:
+        print(print_statement)  # noqa
+
+
+class MyCustomHandler(CustomLogger):
+    def __init__(self):
+        blue_color_code = "\033[94m"
+        reset_color_code = "\033[0m"
+        print_verbose(f"{blue_color_code}Initialized LiteLLM custom logger")
+        try:
+            print_verbose("Logger Initialized with following methods:")
+            methods = [
+                method
+                for method in dir(self)
+                if inspect.ismethod(getattr(self, method))
+            ]
+
+            # Pretty print_verbose the methods
+            for method in methods:
+                print_verbose(f" - {method}")
+            print_verbose(f"{reset_color_code}")
+        except Exception:
+            pass
+
+    def log_pre_api_call(self, model, messages, kwargs):
+        print_verbose("Pre-API Call")
+
+    def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
+        print_verbose("Post-API Call")
+
+    def log_stream_event(self, kwargs, response_obj, start_time, end_time):
+        print_verbose("On Stream")
+
+    def log_success_event(self, kwargs, response_obj, start_time, end_time):
+        print_verbose("On Success!")
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        print_verbose("On Async Success!")
+        response_cost = litellm.completion_cost(completion_response=response_obj)
+        assert response_cost > 0.0
+        return
+
+    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        try:
+            print_verbose("On Async Failure !")
+        except Exception as e:
+            print_verbose(f"Exception: {e}")
+
+
+proxy_handler_instance = MyCustomHandler()
+
+
+# need to set litellm.callbacks = [customHandler] # on the proxy
+
+# litellm.success_callback = [async_on_succes_logger]
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_callbacks1.py
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_callbacks1.py
@@ -0,0 +1,78 @@
+from typing import Literal, Optional
+
+import litellm
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.proxy.proxy_server import DualCache, UserAPIKeyAuth
+
+
+# This file includes the custom callbacks for LiteLLM Proxy
+# Once defined, these can be passed in proxy_config.yaml
+class MyCustomHandler(
+    CustomLogger
+):  # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
+    # Class variables or attributes
+    def __init__(self):
+        pass
+
+    #### CALL HOOKS - proxy only ####
+
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+            "pass_through_endpoint",
+            "rerank",
+        ],
+    ):
+        return data
+
+    async def async_post_call_failure_hook(
+        self,
+        request_data: dict,
+        original_exception: Exception,
+        user_api_key_dict: UserAPIKeyAuth,
+    ):
+        pass
+
+    async def async_post_call_success_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        response,
+    ):
+        # print("in async_post_call_success_hook")
+        pass
+
+    async def async_moderation_hook(  # call made in parallel to llm api call
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        call_type: Literal[
+            "completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+            "responses",
+        ],
+    ):
+        pass
+
+    async def async_post_call_streaming_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: str,
+    ):
+        # print("in async_post_call_streaming_hook")
+        pass
+
+
+proxy_handler_instance = MyCustomHandler()
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_guardrail.py
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_guardrail.py
@@ -0,0 +1,112 @@
+from typing import Any, Dict, List, Literal, Optional, Union
+
+import litellm
+from litellm._logging import verbose_proxy_logger
+from litellm.caching.caching import DualCache
+from litellm.integrations.custom_guardrail import CustomGuardrail
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.guardrails.guardrail_helpers import should_proceed_based_on_metadata
+
+
+class myCustomGuardrail(CustomGuardrail):
+    def __init__(
+        self,
+        **kwargs,
+    ):
+        # store kwargs as optional_params
+        self.optional_params = kwargs
+
+        super().__init__(**kwargs)
+
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: Literal[
+            "completion",
+            "text_completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+            "pass_through_endpoint",
+            "rerank",
+        ],
+    ) -> Optional[Union[Exception, str, dict]]:
+        """
+        Runs before the LLM API call
+        Runs on only Input
+        Use this if you want to MODIFY the input
+        """
+
+        # In this guardrail, if a user inputs `litellm` we will mask it and then send it to the LLM
+        _messages = data.get("messages")
+        if _messages:
+            for message in _messages:
+                _content = message.get("content")
+                if isinstance(_content, str):
+                    if "litellm" in _content.lower():
+                        _content = _content.replace("litellm", "********")
+                        message["content"] = _content
+
+        verbose_proxy_logger.debug(
+            "async_pre_call_hook: Message after masking %s", _messages
+        )
+
+        return data
+
+    async def async_moderation_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        call_type: Literal[
+            "completion",
+            "embeddings",
+            "image_generation",
+            "moderation",
+            "audio_transcription",
+            "responses",
+        ],
+    ):
+        """
+        Runs in parallel to LLM API call
+        Runs on only Input
+
+        This can NOT modify the input, only used to reject or accept a call before going to LLM API
+        """
+
+        # this works the same as async_pre_call_hook, but just runs in parallel as the LLM API Call
+        # In this guardrail, if a user inputs `litellm` we will mask it.
+        _messages = data.get("messages")
+        if _messages:
+            for message in _messages:
+                _content = message.get("content")
+                if isinstance(_content, str):
+                    if "litellm" in _content.lower():
+                        raise ValueError("Guardrail failed words - `litellm` detected")
+
+    async def async_post_call_success_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        response,
+    ):
+        """
+        Runs on response from LLM API call
+
+        It can be used to reject a response
+
+        If a response contains the word "coffee" -> we will raise an exception
+        """
+        verbose_proxy_logger.debug("async_pre_call_hook response: %s", response)
+        if isinstance(response, litellm.ModelResponse):
+            for choice in response.choices:
+                if isinstance(choice, litellm.Choices):
+                    verbose_proxy_logger.debug("async_pre_call_hook choice: %s", choice)
+                    if (
+                        choice.message.content
+                        and isinstance(choice.message.content, str)
+                        and "coffee" in choice.message.content
+                    ):
+                        raise ValueError("Guardrail failed Coffee Detected")
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_handler.py
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/custom_handler.py
@@ -0,0 +1,26 @@
+import time
+from typing import Any, Optional
+
+import litellm
+from litellm import CustomLLM, ImageObject, ImageResponse, completion, get_llm_provider
+from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler
+from litellm.types.utils import ModelResponse
+
+
+class MyCustomLLM(CustomLLM):
+    def completion(self, *args, **kwargs) -> ModelResponse:
+        return litellm.completion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hello world"}],
+            mock_response="Hi!",
+        )  # type: ignore
+
+    async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse:
+        return litellm.completion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hello world"}],
+            mock_response="Hi!",
+        )  # type: ignore
+
+
+my_custom_llm = MyCustomLLM()
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/disable_schema_update.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/disable_schema_update.yaml
@@ -0,0 +1,17 @@
+model_list:
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/fake
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+  - model_name: gpt-4
+    litellm_params:
+      model: openai/gpt-4
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+
+litellm_settings:
+  callbacks: ["gcs_bucket"]
+
+general_settings:
+  disable_prisma_schema_update: true
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/enterprise_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/enterprise_config.yaml
@@ -0,0 +1,17 @@
+model_list:
+ - model_name: gpt-4
+   litellm_params:
+     model: openai/fake
+     api_key: fake-key
+     api_base: https://exampleopenaiendpoint-production.up.railway.app/
+     tags: ["teamA"]
+   model_info:
+     id: "team-a-model"
+
+litellm_settings:
+  cache: true
+  callbacks: ["prometheus"]
+
+router_settings:
+  enable_tag_filtering: True # 👈 Key Change
+
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/langfuse_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/langfuse_config.yaml
@@ -0,0 +1,7 @@
+model_list:
+  - model_name: gpt-3.5-turbo
+
+litellm_settings:
+  drop_params: True
+  success_callback: ["langfuse"] # https://docs.litellm.ai/docs/observability/langfuse_integration
+
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/load_balancer.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/load_balancer.yaml
@@ -0,0 +1,28 @@
+litellm_settings:
+  drop_params: True
+
+# Model-specific settings
+model_list: # use the same model_name for using the litellm router. LiteLLM will use the router between gpt-3.5-turbo
+  - model_name: gpt-3.5-turbo # litellm will 
+    litellm_params:
+      model: gpt-3.5-turbo
+      api_key: sk-uj6F
+    tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
+    rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: gpt-3.5-turbo
+      api_key: sk-Imn
+    tpm: 20000 # [OPTIONAL] REPLACE with your openai tpm
+    rpm: 3 # [OPTIONAL] REPLACE with your openai rpm
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: openrouter/gpt-3.5-turbo
+  - model_name: mistral-7b-instruct
+    litellm_params:
+      model: mistralai/mistral-7b-instruct
+
+environment_variables:
+  REDIS_HOST: localhost
+  REDIS_PASSWORD: 
+  REDIS_PORT: 
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/multi_instance_simple_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/multi_instance_simple_config.yaml
@@ -0,0 +1,12 @@
+model_list:
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/my-fake-model
+      api_key: my-fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+
+litellm_settings:
+  cache: True
+  cache_params:
+    type: redis
+
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/oai_misc_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/oai_misc_config.yaml
@@ -0,0 +1,62 @@
+model_list:
+  - model_name: gpt-3.5-turbo-end-user-test
+    litellm_params:
+      model: gpt-3.5-turbo
+      region_name: "eu"
+    model_info:
+      id: "1"
+  - model_name: "*"
+    litellm_params:
+      model: openai/*
+      api_key: os.environ/OPENAI_API_KEY
+  # provider specific wildcard routing
+  - model_name: "anthropic/*"
+    litellm_params:
+      model: "anthropic/*"
+      api_key: os.environ/ANTHROPIC_API_KEY
+  - model_name: "groq/*"
+    litellm_params:
+      model: "groq/*"
+      api_key: os.environ/GROQ_API_KEY
+litellm_settings:
+  # set_verbose: True  # Uncomment this if you want to see verbose logs; not recommended in production
+  drop_params: True
+  # max_budget: 100 
+  # budget_duration: 30d
+  num_retries: 5
+  request_timeout: 600
+  telemetry: False
+  context_window_fallbacks: [{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}]
+  default_team_settings: 
+    - team_id: team-1
+      success_callback: ["langfuse"]
+      failure_callback: ["langfuse"]
+      langfuse_public_key: os.environ/LANGFUSE_PROJECT1_PUBLIC # Project 1
+      langfuse_secret: os.environ/LANGFUSE_PROJECT1_SECRET # Project 1
+    - team_id: team-2
+      success_callback: ["langfuse"]
+      failure_callback: ["langfuse"]
+      langfuse_public_key: os.environ/LANGFUSE_PROJECT2_PUBLIC # Project 2
+      langfuse_secret: os.environ/LANGFUSE_PROJECT2_SECRET # Project 2
+      langfuse_host: https://us.cloud.langfuse.com
+
+# For /fine_tuning/jobs endpoints
+finetune_settings:
+  - custom_llm_provider: azure
+    api_base: os.environ/AZURE_API_BASE
+    api_key: os.environ/AZURE_API_KEY
+    api_version: "2024-05-01-preview"
+  - custom_llm_provider: openai
+    api_key: os.environ/OPENAI_API_KEY
+
+# for /files endpoints
+files_settings:
+  - custom_llm_provider: azure
+    api_base: os.environ/AZURE_API_BASE
+    api_key: os.environ/AZURE_API_KEY
+    api_version: "2024-05-01-preview"
+  - custom_llm_provider: openai
+    api_key: os.environ/OPENAI_API_KEY
+
+general_settings: 
+  master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/opentelemetry_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/opentelemetry_config.yaml
@@ -0,0 +1,7 @@
+model_list:
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: gpt-3.5-turbo
+
+general_settings: 
+  otel: True          # OpenTelemetry Logger this logs OTEL data to your collector
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/otel_test_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/otel_test_config.yaml
@@ -0,0 +1,83 @@
+model_list:
+ - model_name: fake-openai-endpoint
+   litellm_params:
+     model: openai/fake
+     api_key: fake-key
+     api_base: https://exampleopenaiendpoint-production.up.railway.app/
+     tags: ["teamA"]
+   model_info:
+     id: "team-a-model"
+ - model_name: fake-openai-endpoint
+   litellm_params:
+     model: openai/fake
+     api_key: fake-key
+     api_base: https://exampleopenaiendpoint-production.up.railway.app/
+     tags: ["teamB"]
+   model_info:
+     id: "team-b-model"
+ - model_name: rerank-english-v3.0
+   litellm_params:
+     model: cohere/rerank-english-v3.0
+     api_key: os.environ/COHERE_API_KEY
+ - model_name: fake-azure-endpoint
+   litellm_params:
+     model: openai/429
+     api_key: fake-key
+     api_base: https://exampleopenaiendpoint-production.up.railway.app
+ - model_name: llava-hf
+   litellm_params:
+     model: openai/llava-hf/llava-v1.6-vicuna-7b-hf
+     api_base: http://localhost:8000
+     api_key: fake-key
+   model_info:
+     supports_vision: True
+ - model_name: bedrock/*
+   litellm_params:
+     model:  bedrock/*
+     api_base: https://exampleopenaiendpoint-production.up.railway.app/
+ - model_name: openai/*
+   litellm_params:
+     model: openai/*
+     api_key: os.environ/OPENAI_API_KEY
+     api_base: https://exampleopenaiendpoint-production.up.railway.app/
+
+
+litellm_settings:
+  cache: true
+  callbacks: ["otel", "prometheus"]
+  disable_end_user_cost_tracking_prometheus_only: True
+
+guardrails:
+  - guardrail_name: "aporia-pre-guard"
+    litellm_params:
+      guardrail: aporia  # supported values: "aporia", "bedrock", "lakera"
+      mode: "post_call"
+      api_key: os.environ/APORIA_API_KEY_1
+      api_base: os.environ/APORIA_API_BASE_1
+  - guardrail_name: "aporia-post-guard"
+    litellm_params:
+      guardrail: aporia  # supported values: "aporia", "bedrock", "lakera"
+      mode: "post_call"
+      api_key: os.environ/APORIA_API_KEY_2
+      api_base: os.environ/APORIA_API_BASE_2
+  - guardrail_name: "bedrock-pre-guard"
+    litellm_params:
+      guardrail: bedrock  # supported values: "aporia", "bedrock", "lakera"
+      mode: "during_call"
+      guardrailIdentifier: ff6ujrregl1q
+      guardrailVersion: "DRAFT"
+  - guardrail_name: "custom-pre-guard"
+    litellm_params:
+      guardrail: custom_guardrail.myCustomGuardrail  
+      mode: "pre_call"
+  - guardrail_name: "custom-during-guard"
+    litellm_params:
+      guardrail: custom_guardrail.myCustomGuardrail  
+      mode: "during_call"
+  - guardrail_name: "custom-post-guard"
+    litellm_params:
+      guardrail: custom_guardrail.myCustomGuardrail
+      mode: "post_call"
+
+router_settings:
+  enable_tag_filtering: True # 👈 Key Change
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pass_through_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/pass_through_config.yaml
@@ -0,0 +1,29 @@
+model_list:
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/fake
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+  - model_name: claude-3-5-sonnet-20241022
+    litellm_params:
+      model: anthropic/claude-3-5-sonnet-20241022
+      api_key: os.environ/ANTHROPIC_API_KEY
+  - model_name: claude-special-alias
+    litellm_params:
+      model: anthropic/claude-3-haiku-20240307
+      api_key: os.environ/ANTHROPIC_API_KEY
+  - model_name: claude-3-5-sonnet-20241022
+    litellm_params:
+      model: anthropic/claude-3-5-sonnet-20241022
+      api_key: os.environ/ANTHROPIC_API_KEY
+  - model_name: claude-3-7-sonnet-20250219
+    litellm_params:
+      model: anthropic/claude-3-7-sonnet-20250219
+      api_key: os.environ/ANTHROPIC_API_KEY
+  - model_name: anthropic/*
+    litellm_params:
+      model: anthropic/*
+      api_key: os.environ/ANTHROPIC_API_KEY
+general_settings: 
+  master_key: sk-1234 
+  custom_auth: custom_auth_basic.user_api_key_auth
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/simple_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/simple_config.yaml
@@ -0,0 +1,4 @@
+model_list:
+  - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: gpt-3.5-turbo
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/spend_tracking_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/spend_tracking_config.yaml
@@ -0,0 +1,15 @@
+model_list:
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/fake
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+
+general_settings:
+  use_redis_transaction_buffer: true
+
+litellm_settings:
+  cache: True
+  cache_params:
+    type: redis
+    supported_call_types: []
--- a/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/store_model_db_config.yaml
+++ b/.venv/lib/python3.10/site-packages/litellm/proxy/example_config_yaml/store_model_db_config.yaml
@@ -0,0 +1,10 @@
+model_list:
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/my-fake-model
+      api_key: my-fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+
+general_settings:
+  store_model_in_db: true
+