structure saas with tools
This commit is contained in:
@@ -0,0 +1,134 @@
|
||||
"""
|
||||
Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
|
||||
|
||||
This is OpenAI compatible
|
||||
|
||||
This file only contains param mapping logic
|
||||
|
||||
API calling is done using the OpenAI SDK with an api_base
|
||||
"""
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
|
||||
|
||||
|
||||
class NvidiaNimConfig(OpenAIGPTConfig):
|
||||
"""
|
||||
Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer
|
||||
|
||||
The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters:
|
||||
"""
|
||||
|
||||
temperature: Optional[int] = None
|
||||
top_p: Optional[int] = None
|
||||
frequency_penalty: Optional[int] = None
|
||||
presence_penalty: Optional[int] = None
|
||||
max_tokens: Optional[int] = None
|
||||
stop: Optional[Union[str, list]] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
temperature: Optional[int] = None,
|
||||
top_p: Optional[int] = None,
|
||||
frequency_penalty: Optional[int] = None,
|
||||
presence_penalty: Optional[int] = None,
|
||||
max_tokens: Optional[int] = None,
|
||||
stop: Optional[Union[str, list]] = None,
|
||||
) -> None:
|
||||
locals_ = locals().copy()
|
||||
for key, value in locals_.items():
|
||||
if key != "self" and value is not None:
|
||||
setattr(self.__class__, key, value)
|
||||
|
||||
@classmethod
|
||||
def get_config(cls):
|
||||
return super().get_config()
|
||||
|
||||
def get_supported_openai_params(self, model: str) -> list:
|
||||
"""
|
||||
Get the supported OpenAI params for the given model
|
||||
|
||||
|
||||
Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference
|
||||
"""
|
||||
if model in [
|
||||
"google/recurrentgemma-2b",
|
||||
"google/gemma-2-27b-it",
|
||||
"google/gemma-2-9b-it",
|
||||
"gemma-2-9b-it",
|
||||
]:
|
||||
return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"]
|
||||
elif model == "nvidia/nemotron-4-340b-instruct":
|
||||
return [
|
||||
"stream",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"max_tokens",
|
||||
"max_completion_tokens",
|
||||
]
|
||||
elif model == "nvidia/nemotron-4-340b-reward":
|
||||
return [
|
||||
"stream",
|
||||
]
|
||||
elif model in ["google/codegemma-1.1-7b"]:
|
||||
# most params - but no 'seed' :(
|
||||
return [
|
||||
"stream",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"frequency_penalty",
|
||||
"presence_penalty",
|
||||
"max_tokens",
|
||||
"max_completion_tokens",
|
||||
"stop",
|
||||
]
|
||||
else:
|
||||
# DEFAULT Case - The vast majority of Nvidia NIM Models lie here
|
||||
# "upstage/solar-10.7b-instruct",
|
||||
# "snowflake/arctic",
|
||||
# "seallms/seallm-7b-v2.5",
|
||||
# "nvidia/llama3-chatqa-1.5-8b",
|
||||
# "nvidia/llama3-chatqa-1.5-70b",
|
||||
# "mistralai/mistral-large",
|
||||
# "mistralai/mixtral-8x22b-instruct-v0.1",
|
||||
# "mistralai/mixtral-8x7b-instruct-v0.1",
|
||||
# "mistralai/mistral-7b-instruct-v0.3",
|
||||
# "mistralai/mistral-7b-instruct-v0.2",
|
||||
# "mistralai/codestral-22b-instruct-v0.1",
|
||||
# "microsoft/phi-3-small-8k-instruct",
|
||||
# "microsoft/phi-3-small-128k-instruct",
|
||||
# "microsoft/phi-3-mini-4k-instruct",
|
||||
# "microsoft/phi-3-mini-128k-instruct",
|
||||
# "microsoft/phi-3-medium-4k-instruct",
|
||||
# "microsoft/phi-3-medium-128k-instruct",
|
||||
# "meta/llama3-70b-instruct",
|
||||
# "meta/llama3-8b-instruct",
|
||||
# "meta/llama2-70b",
|
||||
# "meta/codellama-70b",
|
||||
return [
|
||||
"stream",
|
||||
"temperature",
|
||||
"top_p",
|
||||
"frequency_penalty",
|
||||
"presence_penalty",
|
||||
"max_tokens",
|
||||
"max_completion_tokens",
|
||||
"stop",
|
||||
"seed",
|
||||
]
|
||||
|
||||
def map_openai_params(
|
||||
self,
|
||||
non_default_params: dict,
|
||||
optional_params: dict,
|
||||
model: str,
|
||||
drop_params: bool,
|
||||
) -> dict:
|
||||
supported_openai_params = self.get_supported_openai_params(model=model)
|
||||
for param, value in non_default_params.items():
|
||||
if param == "max_completion_tokens":
|
||||
optional_params["max_tokens"] = value
|
||||
elif param in supported_openai_params:
|
||||
optional_params[param] = value
|
||||
return optional_params
|
||||
Reference in New Issue
Block a user