structure saas with tools

2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions
--- a/.venv/lib/python3.10/site-packages/litellm/llms/nvidia_nim/chat.py
+++ b/.venv/lib/python3.10/site-packages/litellm/llms/nvidia_nim/chat.py
@@ -0,0 +1,134 @@
+"""
+Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer 
+
+This is OpenAI compatible 
+
+This file only contains param mapping logic
+
+API calling is done using the OpenAI SDK with an api_base
+"""
+
+from typing import Optional, Union
+
+from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
+
+
+class NvidiaNimConfig(OpenAIGPTConfig):
+    """
+    Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
+
+    The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters:
+    """
+
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    frequency_penalty: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    max_tokens: Optional[int] = None
+    stop: Optional[Union[str, list]] = None
+
+    def __init__(
+        self,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        frequency_penalty: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        stop: Optional[Union[str, list]] = None,
+    ) -> None:
+        locals_ = locals().copy()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+
+    @classmethod
+    def get_config(cls):
+        return super().get_config()
+
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        Get the supported OpenAI params for the given model
+
+
+        Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference
+        """
+        if model in [
+            "google/recurrentgemma-2b",
+            "google/gemma-2-27b-it",
+            "google/gemma-2-9b-it",
+            "gemma-2-9b-it",
+        ]:
+            return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"]
+        elif model == "nvidia/nemotron-4-340b-instruct":
+            return [
+                "stream",
+                "temperature",
+                "top_p",
+                "max_tokens",
+                "max_completion_tokens",
+            ]
+        elif model == "nvidia/nemotron-4-340b-reward":
+            return [
+                "stream",
+            ]
+        elif model in ["google/codegemma-1.1-7b"]:
+            # most params - but no 'seed' :(
+            return [
+                "stream",
+                "temperature",
+                "top_p",
+                "frequency_penalty",
+                "presence_penalty",
+                "max_tokens",
+                "max_completion_tokens",
+                "stop",
+            ]
+        else:
+            # DEFAULT Case - The vast majority of Nvidia NIM Models lie here
+            # "upstage/solar-10.7b-instruct",
+            # "snowflake/arctic",
+            # "seallms/seallm-7b-v2.5",
+            # "nvidia/llama3-chatqa-1.5-8b",
+            # "nvidia/llama3-chatqa-1.5-70b",
+            # "mistralai/mistral-large",
+            # "mistralai/mixtral-8x22b-instruct-v0.1",
+            # "mistralai/mixtral-8x7b-instruct-v0.1",
+            # "mistralai/mistral-7b-instruct-v0.3",
+            # "mistralai/mistral-7b-instruct-v0.2",
+            # "mistralai/codestral-22b-instruct-v0.1",
+            # "microsoft/phi-3-small-8k-instruct",
+            # "microsoft/phi-3-small-128k-instruct",
+            # "microsoft/phi-3-mini-4k-instruct",
+            # "microsoft/phi-3-mini-128k-instruct",
+            # "microsoft/phi-3-medium-4k-instruct",
+            # "microsoft/phi-3-medium-128k-instruct",
+            # "meta/llama3-70b-instruct",
+            # "meta/llama3-8b-instruct",
+            # "meta/llama2-70b",
+            # "meta/codellama-70b",
+            return [
+                "stream",
+                "temperature",
+                "top_p",
+                "frequency_penalty",
+                "presence_penalty",
+                "max_tokens",
+                "max_completion_tokens",
+                "stop",
+                "seed",
+            ]
+
+    def map_openai_params(
+        self,
+        non_default_params: dict,
+        optional_params: dict,
+        model: str,
+        drop_params: bool,
+    ) -> dict:
+        supported_openai_params = self.get_supported_openai_params(model=model)
+        for param, value in non_default_params.items():
+            if param == "max_completion_tokens":
+                optional_params["max_tokens"] = value
+            elif param in supported_openai_params:
+                optional_params[param] = value
+        return optional_params