structure saas with tools

2025-04-25 15:30:54 -03:00
commit 1aef473937
16434 changed files with 6584257 additions and 0 deletions
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/init.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/init.py
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/pycache/init.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/pycache/init.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/pycache/_async_client.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/pycache/_async_client.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/_async_client.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/_async_client.py
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/init.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/init.py
@@ -0,0 +1,188 @@
+# This file is auto-generated by `utils/generate_inference_types.py`.
+# Do not modify it manually.
+#
+# ruff: noqa: F401
+
+from .audio_classification import (
+    AudioClassificationInput,
+    AudioClassificationOutputElement,
+    AudioClassificationOutputTransform,
+    AudioClassificationParameters,
+)
+from .audio_to_audio import AudioToAudioInput, AudioToAudioOutputElement
+from .automatic_speech_recognition import (
+    AutomaticSpeechRecognitionEarlyStoppingEnum,
+    AutomaticSpeechRecognitionGenerationParameters,
+    AutomaticSpeechRecognitionInput,
+    AutomaticSpeechRecognitionOutput,
+    AutomaticSpeechRecognitionOutputChunk,
+    AutomaticSpeechRecognitionParameters,
+)
+from .base import BaseInferenceType
+from .chat_completion import (
+    ChatCompletionInput,
+    ChatCompletionInputFunctionDefinition,
+    ChatCompletionInputFunctionName,
+    ChatCompletionInputGrammarType,
+    ChatCompletionInputGrammarTypeType,
+    ChatCompletionInputMessage,
+    ChatCompletionInputMessageChunk,
+    ChatCompletionInputMessageChunkType,
+    ChatCompletionInputStreamOptions,
+    ChatCompletionInputTool,
+    ChatCompletionInputToolCall,
+    ChatCompletionInputToolChoiceClass,
+    ChatCompletionInputToolChoiceEnum,
+    ChatCompletionInputURL,
+    ChatCompletionOutput,
+    ChatCompletionOutputComplete,
+    ChatCompletionOutputFunctionDefinition,
+    ChatCompletionOutputLogprob,
+    ChatCompletionOutputLogprobs,
+    ChatCompletionOutputMessage,
+    ChatCompletionOutputToolCall,
+    ChatCompletionOutputTopLogprob,
+    ChatCompletionOutputUsage,
+    ChatCompletionStreamOutput,
+    ChatCompletionStreamOutputChoice,
+    ChatCompletionStreamOutputDelta,
+    ChatCompletionStreamOutputDeltaToolCall,
+    ChatCompletionStreamOutputFunction,
+    ChatCompletionStreamOutputLogprob,
+    ChatCompletionStreamOutputLogprobs,
+    ChatCompletionStreamOutputTopLogprob,
+    ChatCompletionStreamOutputUsage,
+)
+from .depth_estimation import DepthEstimationInput, DepthEstimationOutput
+from .document_question_answering import (
+    DocumentQuestionAnsweringInput,
+    DocumentQuestionAnsweringInputData,
+    DocumentQuestionAnsweringOutputElement,
+    DocumentQuestionAnsweringParameters,
+)
+from .feature_extraction import FeatureExtractionInput, FeatureExtractionInputTruncationDirection
+from .fill_mask import FillMaskInput, FillMaskOutputElement, FillMaskParameters
+from .image_classification import (
+    ImageClassificationInput,
+    ImageClassificationOutputElement,
+    ImageClassificationOutputTransform,
+    ImageClassificationParameters,
+)
+from .image_segmentation import (
+    ImageSegmentationInput,
+    ImageSegmentationOutputElement,
+    ImageSegmentationParameters,
+    ImageSegmentationSubtask,
+)
+from .image_to_image import ImageToImageInput, ImageToImageOutput, ImageToImageParameters, ImageToImageTargetSize
+from .image_to_text import (
+    ImageToTextEarlyStoppingEnum,
+    ImageToTextGenerationParameters,
+    ImageToTextInput,
+    ImageToTextOutput,
+    ImageToTextParameters,
+)
+from .object_detection import (
+    ObjectDetectionBoundingBox,
+    ObjectDetectionInput,
+    ObjectDetectionOutputElement,
+    ObjectDetectionParameters,
+)
+from .question_answering import (
+    QuestionAnsweringInput,
+    QuestionAnsweringInputData,
+    QuestionAnsweringOutputElement,
+    QuestionAnsweringParameters,
+)
+from .sentence_similarity import SentenceSimilarityInput, SentenceSimilarityInputData
+from .summarization import (
+    SummarizationInput,
+    SummarizationOutput,
+    SummarizationParameters,
+    SummarizationTruncationStrategy,
+)
+from .table_question_answering import (
+    Padding,
+    TableQuestionAnsweringInput,
+    TableQuestionAnsweringInputData,
+    TableQuestionAnsweringOutputElement,
+    TableQuestionAnsweringParameters,
+)
+from .text2text_generation import (
+    Text2TextGenerationInput,
+    Text2TextGenerationOutput,
+    Text2TextGenerationParameters,
+    Text2TextGenerationTruncationStrategy,
+)
+from .text_classification import (
+    TextClassificationInput,
+    TextClassificationOutputElement,
+    TextClassificationOutputTransform,
+    TextClassificationParameters,
+)
+from .text_generation import (
+    TextGenerationInput,
+    TextGenerationInputGenerateParameters,
+    TextGenerationInputGrammarType,
+    TextGenerationOutput,
+    TextGenerationOutputBestOfSequence,
+    TextGenerationOutputDetails,
+    TextGenerationOutputFinishReason,
+    TextGenerationOutputPrefillToken,
+    TextGenerationOutputToken,
+    TextGenerationStreamOutput,
+    TextGenerationStreamOutputStreamDetails,
+    TextGenerationStreamOutputToken,
+    TypeEnum,
+)
+from .text_to_audio import (
+    TextToAudioEarlyStoppingEnum,
+    TextToAudioGenerationParameters,
+    TextToAudioInput,
+    TextToAudioOutput,
+    TextToAudioParameters,
+)
+from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters
+from .text_to_speech import (
+    TextToSpeechEarlyStoppingEnum,
+    TextToSpeechGenerationParameters,
+    TextToSpeechInput,
+    TextToSpeechOutput,
+    TextToSpeechParameters,
+)
+from .text_to_video import TextToVideoInput, TextToVideoOutput, TextToVideoParameters
+from .token_classification import (
+    TokenClassificationAggregationStrategy,
+    TokenClassificationInput,
+    TokenClassificationOutputElement,
+    TokenClassificationParameters,
+)
+from .translation import TranslationInput, TranslationOutput, TranslationParameters, TranslationTruncationStrategy
+from .video_classification import (
+    VideoClassificationInput,
+    VideoClassificationOutputElement,
+    VideoClassificationOutputTransform,
+    VideoClassificationParameters,
+)
+from .visual_question_answering import (
+    VisualQuestionAnsweringInput,
+    VisualQuestionAnsweringInputData,
+    VisualQuestionAnsweringOutputElement,
+    VisualQuestionAnsweringParameters,
+)
+from .zero_shot_classification import (
+    ZeroShotClassificationInput,
+    ZeroShotClassificationOutputElement,
+    ZeroShotClassificationParameters,
+)
+from .zero_shot_image_classification import (
+    ZeroShotImageClassificationInput,
+    ZeroShotImageClassificationOutputElement,
+    ZeroShotImageClassificationParameters,
+)
+from .zero_shot_object_detection import (
+    ZeroShotObjectDetectionBoundingBox,
+    ZeroShotObjectDetectionInput,
+    ZeroShotObjectDetectionOutputElement,
+    ZeroShotObjectDetectionParameters,
+)
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/init.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/init.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/audio_classification.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/audio_classification.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/audio_to_audio.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/audio_to_audio.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/automatic_speech_recognition.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/automatic_speech_recognition.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/base.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/base.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/chat_completion.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/chat_completion.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/depth_estimation.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/depth_estimation.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/document_question_answering.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/document_question_answering.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/feature_extraction.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/feature_extraction.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/fill_mask.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/fill_mask.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/image_classification.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/image_classification.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/image_segmentation.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/image_segmentation.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/image_to_image.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/image_to_image.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/image_to_text.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/image_to_text.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/object_detection.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/object_detection.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/question_answering.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/question_answering.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/sentence_similarity.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/sentence_similarity.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/summarization.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/summarization.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/table_question_answering.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/table_question_answering.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text2text_generation.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text2text_generation.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_classification.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_classification.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_generation.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_generation.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_to_audio.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_to_audio.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_to_image.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_to_image.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_to_speech.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_to_speech.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_to_video.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/text_to_video.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/token_classification.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/token_classification.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/translation.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/translation.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/video_classification.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/video_classification.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/visual_question_answering.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/visual_question_answering.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/zero_shot_classification.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/zero_shot_classification.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/zero_shot_image_classification.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/zero_shot_image_classification.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/zero_shot_object_detection.cpython-310.pyc
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/pycache/zero_shot_object_detection.cpython-310.pyc
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/audio_classification.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/audio_classification.py
@@ -0,0 +1,43 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+AudioClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
+
+
+@dataclass_with_extra
+class AudioClassificationParameters(BaseInferenceType):
+    """Additional inference parameters for Audio Classification"""
+
+    function_to_apply: Optional["AudioClassificationOutputTransform"] = None
+    """The function to apply to the model outputs in order to retrieve the scores."""
+    top_k: Optional[int] = None
+    """When specified, limits the output to the top K most probable classes."""
+
+
+@dataclass_with_extra
+class AudioClassificationInput(BaseInferenceType):
+    """Inputs for Audio Classification inference"""
+
+    inputs: str
+    """The input audio data as a base64-encoded string. If no `parameters` are provided, you can
+    also provide the audio data as a raw bytes payload.
+    """
+    parameters: Optional[AudioClassificationParameters] = None
+    """Additional inference parameters for Audio Classification"""
+
+
+@dataclass_with_extra
+class AudioClassificationOutputElement(BaseInferenceType):
+    """Outputs for Audio Classification inference"""
+
+    label: str
+    """The predicted class label."""
+    score: float
+    """The corresponding probability."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/audio_to_audio.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/audio_to_audio.py
@@ -0,0 +1,30 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class AudioToAudioInput(BaseInferenceType):
+    """Inputs for Audio to Audio inference"""
+
+    inputs: Any
+    """The input audio data"""
+
+
+@dataclass_with_extra
+class AudioToAudioOutputElement(BaseInferenceType):
+    """Outputs of inference for the Audio To Audio task
+    A generated audio file with its label.
+    """
+
+    blob: Any
+    """The generated audio file."""
+    content_type: str
+    """The content type of audio file."""
+    label: str
+    """The label of the audio file."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/automatic_speech_recognition.py
@@ -0,0 +1,114 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+AutomaticSpeechRecognitionEarlyStoppingEnum = Literal["never"]
+
+
+@dataclass_with_extra
+class AutomaticSpeechRecognitionGenerationParameters(BaseInferenceType):
+    """Parametrization of the text generation process"""
+
+    do_sample: Optional[bool] = None
+    """Whether to use sampling instead of greedy decoding when generating new tokens."""
+    early_stopping: Optional[Union[bool, "AutomaticSpeechRecognitionEarlyStoppingEnum"]] = None
+    """Controls the stopping condition for beam-based methods."""
+    epsilon_cutoff: Optional[float] = None
+    """If set to float strictly between 0 and 1, only tokens with a conditional probability
+    greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+    3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+    Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+    """
+    eta_cutoff: Optional[float] = None
+    """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+    float strictly between 0 and 1, a token is only considered if it is greater than either
+    eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+    term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+    the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+    See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+    for more details.
+    """
+    max_length: Optional[int] = None
+    """The maximum length (in tokens) of the generated text, including the input."""
+    max_new_tokens: Optional[int] = None
+    """The maximum number of tokens to generate. Takes precedence over max_length."""
+    min_length: Optional[int] = None
+    """The minimum length (in tokens) of the generated text, including the input."""
+    min_new_tokens: Optional[int] = None
+    """The minimum number of tokens to generate. Takes precedence over min_length."""
+    num_beam_groups: Optional[int] = None
+    """Number of groups to divide num_beams into in order to ensure diversity among different
+    groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+    """
+    num_beams: Optional[int] = None
+    """Number of beams to use for beam search."""
+    penalty_alpha: Optional[float] = None
+    """The value balances the model confidence and the degeneration penalty in contrastive
+    search decoding.
+    """
+    temperature: Optional[float] = None
+    """The value used to modulate the next token probabilities."""
+    top_k: Optional[int] = None
+    """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+    top_p: Optional[float] = None
+    """If set to float < 1, only the smallest set of most probable tokens with probabilities
+    that add up to top_p or higher are kept for generation.
+    """
+    typical_p: Optional[float] = None
+    """Local typicality measures how similar the conditional probability of predicting a target
+    token next is to the expected conditional probability of predicting a random token next,
+    given the partial text already generated. If set to float < 1, the smallest set of the
+    most locally typical tokens with probabilities that add up to typical_p or higher are
+    kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+    """
+    use_cache: Optional[bool] = None
+    """Whether the model should use the past last key/values attentions to speed up decoding"""
+
+
+@dataclass_with_extra
+class AutomaticSpeechRecognitionParameters(BaseInferenceType):
+    """Additional inference parameters for Automatic Speech Recognition"""
+
+    return_timestamps: Optional[bool] = None
+    """Whether to output corresponding timestamps with the generated text"""
+    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+    generate_kwargs: Optional[AutomaticSpeechRecognitionGenerationParameters] = None
+    """Parametrization of the text generation process"""
+
+
+@dataclass_with_extra
+class AutomaticSpeechRecognitionInput(BaseInferenceType):
+    """Inputs for Automatic Speech Recognition inference"""
+
+    inputs: str
+    """The input audio data as a base64-encoded string. If no `parameters` are provided, you can
+    also provide the audio data as a raw bytes payload.
+    """
+    parameters: Optional[AutomaticSpeechRecognitionParameters] = None
+    """Additional inference parameters for Automatic Speech Recognition"""
+
+
+@dataclass_with_extra
+class AutomaticSpeechRecognitionOutputChunk(BaseInferenceType):
+    text: str
+    """A chunk of text identified by the model"""
+    timestamp: List[float]
+    """The start and end timestamps corresponding with the text"""
+
+
+@dataclass_with_extra
+class AutomaticSpeechRecognitionOutput(BaseInferenceType):
+    """Outputs of inference for the Automatic Speech Recognition task"""
+
+    text: str
+    """The recognized text."""
+    chunks: Optional[List[AutomaticSpeechRecognitionOutputChunk]] = None
+    """When returnTimestamps is enabled, chunks contains a list of audio chunks identified by
+    the model.
+    """
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/base.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/base.py
@@ -0,0 +1,161 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains a base class for all inference types."""
+
+import inspect
+import json
+from dataclasses import asdict, dataclass
+from typing import Any, Dict, List, Type, TypeVar, Union, get_args
+
+
+T = TypeVar("T", bound="BaseInferenceType")
+
+
+def _repr_with_extra(self):
+    fields = list(self.__dataclass_fields__.keys())
+    other_fields = list(k for k in self.__dict__ if k not in fields)
+    return f"{self.__class__.__name__}({', '.join(f'{k}={self.__dict__[k]!r}' for k in fields + other_fields)})"
+
+
+def dataclass_with_extra(cls: Type[T]) -> Type[T]:
+    """Decorator to add a custom __repr__ method to a dataclass, showing all fields, including extra ones.
+
+    This decorator only works with dataclasses that inherit from `BaseInferenceType`.
+    """
+    cls = dataclass(cls)
+    cls.__repr__ = _repr_with_extra  # type: ignore[method-assign]
+    return cls
+
+
+@dataclass
+class BaseInferenceType(dict):
+    """Base class for all inference types.
+
+    Object is a dataclass and a dict for backward compatibility but plan is to remove the dict part in the future.
+
+    Handle parsing from dict, list and json strings in a permissive way to ensure future-compatibility (e.g. all fields
+    are made optional, and non-expected fields are added as dict attributes).
+    """
+
+    @classmethod
+    def parse_obj_as_list(cls: Type[T], data: Union[bytes, str, List, Dict]) -> List[T]:
+        """Alias to parse server response and return a single instance.
+
+        See `parse_obj` for more details.
+        """
+        output = cls.parse_obj(data)
+        if not isinstance(output, list):
+            raise ValueError(f"Invalid input data for {cls}. Expected a list, but got {type(output)}.")
+        return output
+
+    @classmethod
+    def parse_obj_as_instance(cls: Type[T], data: Union[bytes, str, List, Dict]) -> T:
+        """Alias to parse server response and return a single instance.
+
+        See `parse_obj` for more details.
+        """
+        output = cls.parse_obj(data)
+        if isinstance(output, list):
+            raise ValueError(f"Invalid input data for {cls}. Expected a single instance, but got a list.")
+        return output
+
+    @classmethod
+    def parse_obj(cls: Type[T], data: Union[bytes, str, List, Dict]) -> Union[List[T], T]:
+        """Parse server response as a dataclass or list of dataclasses.
+
+        To enable future-compatibility, we want to handle cases where the server return more fields than expected.
+        In such cases, we don't want to raise an error but still create the dataclass object. Remaining fields are
+        added as dict attributes.
+        """
+        # Parse server response (from bytes)
+        if isinstance(data, bytes):
+            data = data.decode()
+        if isinstance(data, str):
+            data = json.loads(data)
+
+        # If a list, parse each item individually
+        if isinstance(data, List):
+            return [cls.parse_obj(d) for d in data]  # type: ignore [misc]
+
+        # At this point, we expect a dict
+        if not isinstance(data, dict):
+            raise ValueError(f"Invalid data type: {type(data)}")
+
+        init_values = {}
+        other_values = {}
+        for key, value in data.items():
+            key = normalize_key(key)
+            if key in cls.__dataclass_fields__ and cls.__dataclass_fields__[key].init:
+                if isinstance(value, dict) or isinstance(value, list):
+                    field_type = cls.__dataclass_fields__[key].type
+
+                    # if `field_type` is a `BaseInferenceType`, parse it
+                    if inspect.isclass(field_type) and issubclass(field_type, BaseInferenceType):
+                        value = field_type.parse_obj(value)
+
+                    # otherwise, recursively parse nested dataclasses (if possible)
+                    # `get_args` returns handle Union and Optional for us
+                    else:
+                        expected_types = get_args(field_type)
+                        for expected_type in expected_types:
+                            if getattr(expected_type, "_name", None) == "List":
+                                expected_type = get_args(expected_type)[
+                                    0
+                                ]  # assume same type for all items in the list
+                            if inspect.isclass(expected_type) and issubclass(expected_type, BaseInferenceType):
+                                value = expected_type.parse_obj(value)
+                                break
+                init_values[key] = value
+            else:
+                other_values[key] = value
+
+        # Make all missing fields default to None
+        # => ensure that dataclass initialization will never fail even if the server does not return all fields.
+        for key in cls.__dataclass_fields__:
+            if key not in init_values:
+                init_values[key] = None
+
+        # Initialize dataclass with expected values
+        item = cls(**init_values)
+
+        # Add remaining fields as dict attributes
+        item.update(other_values)
+
+        # Add remaining fields as extra dataclass fields.
+        # They won't be part of the dataclass fields but will be accessible as attributes.
+        # Use @dataclass_with_extra to show them in __repr__.
+        item.__dict__.update(other_values)
+        return item
+
+    def __post_init__(self):
+        self.update(asdict(self))
+
+    def __setitem__(self, __key: Any, __value: Any) -> None:
+        # Hacky way to keep dataclass values in sync when dict is updated
+        super().__setitem__(__key, __value)
+        if __key in self.__dataclass_fields__ and getattr(self, __key, None) != __value:
+            self.__setattr__(__key, __value)
+        return
+
+    def __setattr__(self, __name: str, __value: Any) -> None:
+        # Hacky way to keep dict values is sync when dataclass is updated
+        super().__setattr__(__name, __value)
+        if self.get(__name) != __value:
+            self[__name] = __value
+        return
+
+
+def normalize_key(key: str) -> str:
+    # e.g "content-type" -> "content_type", "Accept" -> "accept"
+    return key.replace("-", "_").replace(" ", "_").lower()
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/chat_completion.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/chat_completion.py
@@ -0,0 +1,311 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, List, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ChatCompletionInputURL(BaseInferenceType):
+    url: str
+
+
+ChatCompletionInputMessageChunkType = Literal["text", "image_url"]
+
+
+@dataclass_with_extra
+class ChatCompletionInputMessageChunk(BaseInferenceType):
+    type: "ChatCompletionInputMessageChunkType"
+    image_url: Optional[ChatCompletionInputURL] = None
+    text: Optional[str] = None
+
+
+@dataclass_with_extra
+class ChatCompletionInputFunctionDefinition(BaseInferenceType):
+    arguments: Any
+    name: str
+    description: Optional[str] = None
+
+
+@dataclass_with_extra
+class ChatCompletionInputToolCall(BaseInferenceType):
+    function: ChatCompletionInputFunctionDefinition
+    id: str
+    type: str
+
+
+@dataclass_with_extra
+class ChatCompletionInputMessage(BaseInferenceType):
+    role: str
+    content: Optional[Union[List[ChatCompletionInputMessageChunk], str]] = None
+    name: Optional[str] = None
+    tool_calls: Optional[List[ChatCompletionInputToolCall]] = None
+
+
+ChatCompletionInputGrammarTypeType = Literal["json", "regex"]
+
+
+@dataclass_with_extra
+class ChatCompletionInputGrammarType(BaseInferenceType):
+    type: "ChatCompletionInputGrammarTypeType"
+    value: Any
+    """A string that represents a [JSON Schema](https://json-schema.org/).
+    JSON Schema is a declarative language that allows to annotate JSON documents
+    with types and descriptions.
+    """
+
+
+@dataclass_with_extra
+class ChatCompletionInputStreamOptions(BaseInferenceType):
+    include_usage: Optional[bool] = None
+    """If set, an additional chunk will be streamed before the data: [DONE] message. The usage
+    field on this chunk shows the token usage statistics for the entire request, and the
+    choices field will always be an empty array. All other chunks will also include a usage
+    field, but with a null value.
+    """
+
+
+@dataclass_with_extra
+class ChatCompletionInputFunctionName(BaseInferenceType):
+    name: str
+
+
+@dataclass_with_extra
+class ChatCompletionInputToolChoiceClass(BaseInferenceType):
+    function: ChatCompletionInputFunctionName
+
+
+ChatCompletionInputToolChoiceEnum = Literal["auto", "none", "required"]
+
+
+@dataclass_with_extra
+class ChatCompletionInputTool(BaseInferenceType):
+    function: ChatCompletionInputFunctionDefinition
+    type: str
+
+
+@dataclass_with_extra
+class ChatCompletionInput(BaseInferenceType):
+    """Chat Completion Input.
+    Auto-generated from TGI specs.
+    For more details, check out
+    https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+    """
+
+    messages: List[ChatCompletionInputMessage]
+    """A list of messages comprising the conversation so far."""
+    frequency_penalty: Optional[float] = None
+    """Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
+    frequency in the text so far,
+    decreasing the model's likelihood to repeat the same line verbatim.
+    """
+    logit_bias: Optional[List[float]] = None
+    """UNUSED
+    Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON
+    object that maps tokens
+    (specified by their token ID in the tokenizer) to an associated bias value from -100 to
+    100. Mathematically,
+    the bias is added to the logits generated by the model prior to sampling. The exact
+    effect will vary per model,
+    but values between -1 and 1 should decrease or increase likelihood of selection; values
+    like -100 or 100 should
+    result in a ban or exclusive selection of the relevant token.
+    """
+    logprobs: Optional[bool] = None
+    """Whether to return log probabilities of the output tokens or not. If true, returns the log
+    probabilities of each
+    output token returned in the content of message.
+    """
+    max_tokens: Optional[int] = None
+    """The maximum number of tokens that can be generated in the chat completion."""
+    model: Optional[str] = None
+    """[UNUSED] ID of the model to use. See the model endpoint compatibility table for details
+    on which models work with the Chat API.
+    """
+    n: Optional[int] = None
+    """UNUSED
+    How many chat completion choices to generate for each input message. Note that you will
+    be charged based on the
+    number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
+    """
+    presence_penalty: Optional[float] = None
+    """Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they
+    appear in the text so far,
+    increasing the model's likelihood to talk about new topics
+    """
+    response_format: Optional[ChatCompletionInputGrammarType] = None
+    seed: Optional[int] = None
+    stop: Optional[List[str]] = None
+    """Up to 4 sequences where the API will stop generating further tokens."""
+    stream: Optional[bool] = None
+    stream_options: Optional[ChatCompletionInputStreamOptions] = None
+    temperature: Optional[float] = None
+    """What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
+    output more random, while
+    lower values like 0.2 will make it more focused and deterministic.
+    We generally recommend altering this or `top_p` but not both.
+    """
+    tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None
+    tool_prompt: Optional[str] = None
+    """A prompt to be appended before the tools"""
+    tools: Optional[List[ChatCompletionInputTool]] = None
+    """A list of tools the model may call. Currently, only functions are supported as a tool.
+    Use this to provide a list of
+    functions the model may generate JSON inputs for.
+    """
+    top_logprobs: Optional[int] = None
+    """An integer between 0 and 5 specifying the number of most likely tokens to return at each
+    token position, each with
+    an associated log probability. logprobs must be set to true if this parameter is used.
+    """
+    top_p: Optional[float] = None
+    """An alternative to sampling with temperature, called nucleus sampling, where the model
+    considers the results of the
+    tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%
+    probability mass are considered.
+    """
+
+
+@dataclass_with_extra
+class ChatCompletionOutputTopLogprob(BaseInferenceType):
+    logprob: float
+    token: str
+
+
+@dataclass_with_extra
+class ChatCompletionOutputLogprob(BaseInferenceType):
+    logprob: float
+    token: str
+    top_logprobs: List[ChatCompletionOutputTopLogprob]
+
+
+@dataclass_with_extra
+class ChatCompletionOutputLogprobs(BaseInferenceType):
+    content: List[ChatCompletionOutputLogprob]
+
+
+@dataclass_with_extra
+class ChatCompletionOutputFunctionDefinition(BaseInferenceType):
+    arguments: Any
+    name: str
+    description: Optional[str] = None
+
+
+@dataclass_with_extra
+class ChatCompletionOutputToolCall(BaseInferenceType):
+    function: ChatCompletionOutputFunctionDefinition
+    id: str
+    type: str
+
+
+@dataclass_with_extra
+class ChatCompletionOutputMessage(BaseInferenceType):
+    role: str
+    content: Optional[str] = None
+    tool_call_id: Optional[str] = None
+    tool_calls: Optional[List[ChatCompletionOutputToolCall]] = None
+
+
+@dataclass_with_extra
+class ChatCompletionOutputComplete(BaseInferenceType):
+    finish_reason: str
+    index: int
+    message: ChatCompletionOutputMessage
+    logprobs: Optional[ChatCompletionOutputLogprobs] = None
+
+
+@dataclass_with_extra
+class ChatCompletionOutputUsage(BaseInferenceType):
+    completion_tokens: int
+    prompt_tokens: int
+    total_tokens: int
+
+
+@dataclass_with_extra
+class ChatCompletionOutput(BaseInferenceType):
+    """Chat Completion Output.
+    Auto-generated from TGI specs.
+    For more details, check out
+    https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+    """
+
+    choices: List[ChatCompletionOutputComplete]
+    created: int
+    id: str
+    model: str
+    system_fingerprint: str
+    usage: ChatCompletionOutputUsage
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputFunction(BaseInferenceType):
+    arguments: str
+    name: Optional[str] = None
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputDeltaToolCall(BaseInferenceType):
+    function: ChatCompletionStreamOutputFunction
+    id: str
+    index: int
+    type: str
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputDelta(BaseInferenceType):
+    role: str
+    content: Optional[str] = None
+    tool_call_id: Optional[str] = None
+    tool_calls: Optional[List[ChatCompletionStreamOutputDeltaToolCall]] = None
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputTopLogprob(BaseInferenceType):
+    logprob: float
+    token: str
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputLogprob(BaseInferenceType):
+    logprob: float
+    token: str
+    top_logprobs: List[ChatCompletionStreamOutputTopLogprob]
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputLogprobs(BaseInferenceType):
+    content: List[ChatCompletionStreamOutputLogprob]
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputChoice(BaseInferenceType):
+    delta: ChatCompletionStreamOutputDelta
+    index: int
+    finish_reason: Optional[str] = None
+    logprobs: Optional[ChatCompletionStreamOutputLogprobs] = None
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutputUsage(BaseInferenceType):
+    completion_tokens: int
+    prompt_tokens: int
+    total_tokens: int
+
+
+@dataclass_with_extra
+class ChatCompletionStreamOutput(BaseInferenceType):
+    """Chat Completion Stream Output.
+    Auto-generated from TGI specs.
+    For more details, check out
+    https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+    """
+
+    choices: List[ChatCompletionStreamOutputChoice]
+    created: int
+    id: str
+    model: str
+    system_fingerprint: str
+    usage: Optional[ChatCompletionStreamOutputUsage] = None
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/depth_estimation.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/depth_estimation.py
@@ -0,0 +1,28 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Dict, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class DepthEstimationInput(BaseInferenceType):
+    """Inputs for Depth Estimation inference"""
+
+    inputs: Any
+    """The input image data"""
+    parameters: Optional[Dict[str, Any]] = None
+    """Additional inference parameters for Depth Estimation"""
+
+
+@dataclass_with_extra
+class DepthEstimationOutput(BaseInferenceType):
+    """Outputs of inference for the Depth Estimation task"""
+
+    depth: Any
+    """The predicted depth as an image"""
+    predicted_depth: Any
+    """The predicted depth as a tensor"""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/document_question_answering.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/document_question_answering.py
@@ -0,0 +1,80 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, List, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class DocumentQuestionAnsweringInputData(BaseInferenceType):
+    """One (document, question) pair to answer"""
+
+    image: Any
+    """The image on which the question is asked"""
+    question: str
+    """A question to ask of the document"""
+
+
+@dataclass_with_extra
+class DocumentQuestionAnsweringParameters(BaseInferenceType):
+    """Additional inference parameters for Document Question Answering"""
+
+    doc_stride: Optional[int] = None
+    """If the words in the document are too long to fit with the question for the model, it will
+    be split in several chunks with some overlap. This argument controls the size of that
+    overlap.
+    """
+    handle_impossible_answer: Optional[bool] = None
+    """Whether to accept impossible as an answer"""
+    lang: Optional[str] = None
+    """Language to use while running OCR. Defaults to english."""
+    max_answer_len: Optional[int] = None
+    """The maximum length of predicted answers (e.g., only answers with a shorter length are
+    considered).
+    """
+    max_question_len: Optional[int] = None
+    """The maximum length of the question after tokenization. It will be truncated if needed."""
+    max_seq_len: Optional[int] = None
+    """The maximum length of the total sentence (context + question) in tokens of each chunk
+    passed to the model. The context will be split in several chunks (using doc_stride as
+    overlap) if needed.
+    """
+    top_k: Optional[int] = None
+    """The number of answers to return (will be chosen by order of likelihood). Can return less
+    than top_k answers if there are not enough options available within the context.
+    """
+    word_boxes: Optional[List[Union[List[float], str]]] = None
+    """A list of words and bounding boxes (normalized 0->1000). If provided, the inference will
+    skip the OCR step and use the provided bounding boxes instead.
+    """
+
+
+@dataclass_with_extra
+class DocumentQuestionAnsweringInput(BaseInferenceType):
+    """Inputs for Document Question Answering inference"""
+
+    inputs: DocumentQuestionAnsweringInputData
+    """One (document, question) pair to answer"""
+    parameters: Optional[DocumentQuestionAnsweringParameters] = None
+    """Additional inference parameters for Document Question Answering"""
+
+
+@dataclass_with_extra
+class DocumentQuestionAnsweringOutputElement(BaseInferenceType):
+    """Outputs of inference for the Document Question Answering task"""
+
+    answer: str
+    """The answer to the question."""
+    end: int
+    """The end word index of the answer (in the OCR’d version of the input or provided word
+    boxes).
+    """
+    score: float
+    """The probability associated to the answer."""
+    start: int
+    """The start word index of the answer (in the OCR’d version of the input or provided word
+    boxes).
+    """
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/feature_extraction.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/feature_extraction.py
@@ -0,0 +1,36 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+FeatureExtractionInputTruncationDirection = Literal["Left", "Right"]
+
+
+@dataclass_with_extra
+class FeatureExtractionInput(BaseInferenceType):
+    """Feature Extraction Input.
+    Auto-generated from TEI specs.
+    For more details, check out
+    https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tei-import.ts.
+    """
+
+    inputs: Union[List[str], str]
+    """The text or list of texts to embed."""
+    normalize: Optional[bool] = None
+    prompt_name: Optional[str] = None
+    """The name of the prompt that should be used by for encoding. If not set, no prompt
+    will be applied.
+    Must be a key in the `sentence-transformers` configuration `prompts` dictionary.
+    For example if ``prompt_name`` is "query" and the ``prompts`` is {"query": "query: ",
+    ...},
+    then the sentence "What is the capital of France?" will be encoded as
+    "query: What is the capital of France?" because the prompt text will be prepended before
+    any text to encode.
+    """
+    truncate: Optional[bool] = None
+    truncation_direction: Optional["FeatureExtractionInputTruncationDirection"] = None
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/fill_mask.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/fill_mask.py
@@ -0,0 +1,47 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, List, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class FillMaskParameters(BaseInferenceType):
+    """Additional inference parameters for Fill Mask"""
+
+    targets: Optional[List[str]] = None
+    """When passed, the model will limit the scores to the passed targets instead of looking up
+    in the whole vocabulary. If the provided targets are not in the model vocab, they will be
+    tokenized and the first resulting token will be used (with a warning, and that might be
+    slower).
+    """
+    top_k: Optional[int] = None
+    """When passed, overrides the number of predictions to return."""
+
+
+@dataclass_with_extra
+class FillMaskInput(BaseInferenceType):
+    """Inputs for Fill Mask inference"""
+
+    inputs: str
+    """The text with masked tokens"""
+    parameters: Optional[FillMaskParameters] = None
+    """Additional inference parameters for Fill Mask"""
+
+
+@dataclass_with_extra
+class FillMaskOutputElement(BaseInferenceType):
+    """Outputs of inference for the Fill Mask task"""
+
+    score: float
+    """The corresponding probability"""
+    sequence: str
+    """The corresponding input with the mask token prediction."""
+    token: int
+    """The predicted token id (to replace the masked one)."""
+    token_str: Any
+    fill_mask_output_token_str: Optional[str] = None
+    """The predicted token (to replace the masked one)."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/image_classification.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/image_classification.py
@@ -0,0 +1,43 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+ImageClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
+
+
+@dataclass_with_extra
+class ImageClassificationParameters(BaseInferenceType):
+    """Additional inference parameters for Image Classification"""
+
+    function_to_apply: Optional["ImageClassificationOutputTransform"] = None
+    """The function to apply to the model outputs in order to retrieve the scores."""
+    top_k: Optional[int] = None
+    """When specified, limits the output to the top K most probable classes."""
+
+
+@dataclass_with_extra
+class ImageClassificationInput(BaseInferenceType):
+    """Inputs for Image Classification inference"""
+
+    inputs: str
+    """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+    also provide the image data as a raw bytes payload.
+    """
+    parameters: Optional[ImageClassificationParameters] = None
+    """Additional inference parameters for Image Classification"""
+
+
+@dataclass_with_extra
+class ImageClassificationOutputElement(BaseInferenceType):
+    """Outputs of inference for the Image Classification task"""
+
+    label: str
+    """The predicted class label."""
+    score: float
+    """The corresponding probability."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/image_segmentation.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/image_segmentation.py
@@ -0,0 +1,51 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+ImageSegmentationSubtask = Literal["instance", "panoptic", "semantic"]
+
+
+@dataclass_with_extra
+class ImageSegmentationParameters(BaseInferenceType):
+    """Additional inference parameters for Image Segmentation"""
+
+    mask_threshold: Optional[float] = None
+    """Threshold to use when turning the predicted masks into binary values."""
+    overlap_mask_area_threshold: Optional[float] = None
+    """Mask overlap threshold to eliminate small, disconnected segments."""
+    subtask: Optional["ImageSegmentationSubtask"] = None
+    """Segmentation task to be performed, depending on model capabilities."""
+    threshold: Optional[float] = None
+    """Probability threshold to filter out predicted masks."""
+
+
+@dataclass_with_extra
+class ImageSegmentationInput(BaseInferenceType):
+    """Inputs for Image Segmentation inference"""
+
+    inputs: str
+    """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+    also provide the image data as a raw bytes payload.
+    """
+    parameters: Optional[ImageSegmentationParameters] = None
+    """Additional inference parameters for Image Segmentation"""
+
+
+@dataclass_with_extra
+class ImageSegmentationOutputElement(BaseInferenceType):
+    """Outputs of inference for the Image Segmentation task
+    A predicted mask / segment
+    """
+
+    label: str
+    """The label of the predicted segment."""
+    mask: str
+    """The corresponding mask as a black-and-white image (base64-encoded)."""
+    score: Optional[float] = None
+    """The score or confidence degree the model has."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/image_to_image.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/image_to_image.py
@@ -0,0 +1,56 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ImageToImageTargetSize(BaseInferenceType):
+    """The size in pixel of the output image."""
+
+    height: int
+    width: int
+
+
+@dataclass_with_extra
+class ImageToImageParameters(BaseInferenceType):
+    """Additional inference parameters for Image To Image"""
+
+    guidance_scale: Optional[float] = None
+    """For diffusion models. A higher guidance scale value encourages the model to generate
+    images closely linked to the text prompt at the expense of lower image quality.
+    """
+    negative_prompt: Optional[str] = None
+    """One prompt to guide what NOT to include in image generation."""
+    num_inference_steps: Optional[int] = None
+    """For diffusion models. The number of denoising steps. More denoising steps usually lead to
+    a higher quality image at the expense of slower inference.
+    """
+    prompt: Optional[str] = None
+    """The text prompt to guide the image generation."""
+    target_size: Optional[ImageToImageTargetSize] = None
+    """The size in pixel of the output image."""
+
+
+@dataclass_with_extra
+class ImageToImageInput(BaseInferenceType):
+    """Inputs for Image To Image inference"""
+
+    inputs: str
+    """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+    also provide the image data as a raw bytes payload.
+    """
+    parameters: Optional[ImageToImageParameters] = None
+    """Additional inference parameters for Image To Image"""
+
+
+@dataclass_with_extra
+class ImageToImageOutput(BaseInferenceType):
+    """Outputs of inference for the Image To Image task"""
+
+    image: Any
+    """The output image returned as raw bytes in the payload."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/image_to_text.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/image_to_text.py
@@ -0,0 +1,101 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+ImageToTextEarlyStoppingEnum = Literal["never"]
+
+
+@dataclass_with_extra
+class ImageToTextGenerationParameters(BaseInferenceType):
+    """Parametrization of the text generation process"""
+
+    do_sample: Optional[bool] = None
+    """Whether to use sampling instead of greedy decoding when generating new tokens."""
+    early_stopping: Optional[Union[bool, "ImageToTextEarlyStoppingEnum"]] = None
+    """Controls the stopping condition for beam-based methods."""
+    epsilon_cutoff: Optional[float] = None
+    """If set to float strictly between 0 and 1, only tokens with a conditional probability
+    greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+    3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+    Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+    """
+    eta_cutoff: Optional[float] = None
+    """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+    float strictly between 0 and 1, a token is only considered if it is greater than either
+    eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+    term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+    the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+    See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+    for more details.
+    """
+    max_length: Optional[int] = None
+    """The maximum length (in tokens) of the generated text, including the input."""
+    max_new_tokens: Optional[int] = None
+    """The maximum number of tokens to generate. Takes precedence over max_length."""
+    min_length: Optional[int] = None
+    """The minimum length (in tokens) of the generated text, including the input."""
+    min_new_tokens: Optional[int] = None
+    """The minimum number of tokens to generate. Takes precedence over min_length."""
+    num_beam_groups: Optional[int] = None
+    """Number of groups to divide num_beams into in order to ensure diversity among different
+    groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+    """
+    num_beams: Optional[int] = None
+    """Number of beams to use for beam search."""
+    penalty_alpha: Optional[float] = None
+    """The value balances the model confidence and the degeneration penalty in contrastive
+    search decoding.
+    """
+    temperature: Optional[float] = None
+    """The value used to modulate the next token probabilities."""
+    top_k: Optional[int] = None
+    """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+    top_p: Optional[float] = None
+    """If set to float < 1, only the smallest set of most probable tokens with probabilities
+    that add up to top_p or higher are kept for generation.
+    """
+    typical_p: Optional[float] = None
+    """Local typicality measures how similar the conditional probability of predicting a target
+    token next is to the expected conditional probability of predicting a random token next,
+    given the partial text already generated. If set to float < 1, the smallest set of the
+    most locally typical tokens with probabilities that add up to typical_p or higher are
+    kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+    """
+    use_cache: Optional[bool] = None
+    """Whether the model should use the past last key/values attentions to speed up decoding"""
+
+
+@dataclass_with_extra
+class ImageToTextParameters(BaseInferenceType):
+    """Additional inference parameters for Image To Text"""
+
+    max_new_tokens: Optional[int] = None
+    """The amount of maximum tokens to generate."""
+    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+    generate_kwargs: Optional[ImageToTextGenerationParameters] = None
+    """Parametrization of the text generation process"""
+
+
+@dataclass_with_extra
+class ImageToTextInput(BaseInferenceType):
+    """Inputs for Image To Text inference"""
+
+    inputs: Any
+    """The input image data"""
+    parameters: Optional[ImageToTextParameters] = None
+    """Additional inference parameters for Image To Text"""
+
+
+@dataclass_with_extra
+class ImageToTextOutput(BaseInferenceType):
+    """Outputs of inference for the Image To Text task"""
+
+    generated_text: Any
+    image_to_text_output_generated_text: Optional[str] = None
+    """The generated text."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/object_detection.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/object_detection.py
@@ -0,0 +1,58 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ObjectDetectionParameters(BaseInferenceType):
+    """Additional inference parameters for Object Detection"""
+
+    threshold: Optional[float] = None
+    """The probability necessary to make a prediction."""
+
+
+@dataclass_with_extra
+class ObjectDetectionInput(BaseInferenceType):
+    """Inputs for Object Detection inference"""
+
+    inputs: str
+    """The input image data as a base64-encoded string. If no `parameters` are provided, you can
+    also provide the image data as a raw bytes payload.
+    """
+    parameters: Optional[ObjectDetectionParameters] = None
+    """Additional inference parameters for Object Detection"""
+
+
+@dataclass_with_extra
+class ObjectDetectionBoundingBox(BaseInferenceType):
+    """The predicted bounding box. Coordinates are relative to the top left corner of the input
+    image.
+    """
+
+    xmax: int
+    """The x-coordinate of the bottom-right corner of the bounding box."""
+    xmin: int
+    """The x-coordinate of the top-left corner of the bounding box."""
+    ymax: int
+    """The y-coordinate of the bottom-right corner of the bounding box."""
+    ymin: int
+    """The y-coordinate of the top-left corner of the bounding box."""
+
+
+@dataclass_with_extra
+class ObjectDetectionOutputElement(BaseInferenceType):
+    """Outputs of inference for the Object Detection task"""
+
+    box: ObjectDetectionBoundingBox
+    """The predicted bounding box. Coordinates are relative to the top left corner of the input
+    image.
+    """
+    label: str
+    """The predicted label for the bounding box."""
+    score: float
+    """The associated score / probability."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/question_answering.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/question_answering.py
@@ -0,0 +1,74 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class QuestionAnsweringInputData(BaseInferenceType):
+    """One (context, question) pair to answer"""
+
+    context: str
+    """The context to be used for answering the question"""
+    question: str
+    """The question to be answered"""
+
+
+@dataclass_with_extra
+class QuestionAnsweringParameters(BaseInferenceType):
+    """Additional inference parameters for Question Answering"""
+
+    align_to_words: Optional[bool] = None
+    """Attempts to align the answer to real words. Improves quality on space separated
+    languages. Might hurt on non-space-separated languages (like Japanese or Chinese)
+    """
+    doc_stride: Optional[int] = None
+    """If the context is too long to fit with the question for the model, it will be split in
+    several chunks with some overlap. This argument controls the size of that overlap.
+    """
+    handle_impossible_answer: Optional[bool] = None
+    """Whether to accept impossible as an answer."""
+    max_answer_len: Optional[int] = None
+    """The maximum length of predicted answers (e.g., only answers with a shorter length are
+    considered).
+    """
+    max_question_len: Optional[int] = None
+    """The maximum length of the question after tokenization. It will be truncated if needed."""
+    max_seq_len: Optional[int] = None
+    """The maximum length of the total sentence (context + question) in tokens of each chunk
+    passed to the model. The context will be split in several chunks (using docStride as
+    overlap) if needed.
+    """
+    top_k: Optional[int] = None
+    """The number of answers to return (will be chosen by order of likelihood). Note that we
+    return less than topk answers if there are not enough options available within the
+    context.
+    """
+
+
+@dataclass_with_extra
+class QuestionAnsweringInput(BaseInferenceType):
+    """Inputs for Question Answering inference"""
+
+    inputs: QuestionAnsweringInputData
+    """One (context, question) pair to answer"""
+    parameters: Optional[QuestionAnsweringParameters] = None
+    """Additional inference parameters for Question Answering"""
+
+
+@dataclass_with_extra
+class QuestionAnsweringOutputElement(BaseInferenceType):
+    """Outputs of inference for the Question Answering task"""
+
+    answer: str
+    """The answer to the question."""
+    end: int
+    """The character position in the input where the answer ends."""
+    score: float
+    """The probability associated to the answer."""
+    start: int
+    """The character position in the input where the answer begins."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/sentence_similarity.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/sentence_similarity.py
@@ -0,0 +1,27 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Dict, List, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class SentenceSimilarityInputData(BaseInferenceType):
+    sentences: List[str]
+    """A list of strings which will be compared against the source_sentence."""
+    source_sentence: str
+    """The string that you wish to compare the other strings with. This can be a phrase,
+    sentence, or longer passage, depending on the model being used.
+    """
+
+
+@dataclass_with_extra
+class SentenceSimilarityInput(BaseInferenceType):
+    """Inputs for Sentence similarity inference"""
+
+    inputs: SentenceSimilarityInputData
+    parameters: Optional[Dict[str, Any]] = None
+    """Additional inference parameters for Sentence Similarity"""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/summarization.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/summarization.py
@@ -0,0 +1,41 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Dict, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+SummarizationTruncationStrategy = Literal["do_not_truncate", "longest_first", "only_first", "only_second"]
+
+
+@dataclass_with_extra
+class SummarizationParameters(BaseInferenceType):
+    """Additional inference parameters for summarization."""
+
+    clean_up_tokenization_spaces: Optional[bool] = None
+    """Whether to clean up the potential extra spaces in the text output."""
+    generate_parameters: Optional[Dict[str, Any]] = None
+    """Additional parametrization of the text generation algorithm."""
+    truncation: Optional["SummarizationTruncationStrategy"] = None
+    """The truncation strategy to use."""
+
+
+@dataclass_with_extra
+class SummarizationInput(BaseInferenceType):
+    """Inputs for Summarization inference"""
+
+    inputs: str
+    """The input text to summarize."""
+    parameters: Optional[SummarizationParameters] = None
+    """Additional inference parameters for summarization."""
+
+
+@dataclass_with_extra
+class SummarizationOutput(BaseInferenceType):
+    """Outputs of inference for the Summarization task"""
+
+    summary_text: str
+    """The summarized text."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/table_question_answering.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/table_question_answering.py
@@ -0,0 +1,62 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Dict, List, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class TableQuestionAnsweringInputData(BaseInferenceType):
+    """One (table, question) pair to answer"""
+
+    question: str
+    """The question to be answered about the table"""
+    table: Dict[str, List[str]]
+    """The table to serve as context for the questions"""
+
+
+Padding = Literal["do_not_pad", "longest", "max_length"]
+
+
+@dataclass_with_extra
+class TableQuestionAnsweringParameters(BaseInferenceType):
+    """Additional inference parameters for Table Question Answering"""
+
+    padding: Optional["Padding"] = None
+    """Activates and controls padding."""
+    sequential: Optional[bool] = None
+    """Whether to do inference sequentially or as a batch. Batching is faster, but models like
+    SQA require the inference to be done sequentially to extract relations within sequences,
+    given their conversational nature.
+    """
+    truncation: Optional[bool] = None
+    """Activates and controls truncation."""
+
+
+@dataclass_with_extra
+class TableQuestionAnsweringInput(BaseInferenceType):
+    """Inputs for Table Question Answering inference"""
+
+    inputs: TableQuestionAnsweringInputData
+    """One (table, question) pair to answer"""
+    parameters: Optional[TableQuestionAnsweringParameters] = None
+    """Additional inference parameters for Table Question Answering"""
+
+
+@dataclass_with_extra
+class TableQuestionAnsweringOutputElement(BaseInferenceType):
+    """Outputs of inference for the Table Question Answering task"""
+
+    answer: str
+    """The answer of the question given the table. If there is an aggregator, the answer will be
+    preceded by `AGGREGATOR >`.
+    """
+    cells: List[str]
+    """List of strings made up of the answer cell values."""
+    coordinates: List[List[int]]
+    """Coordinates of the cells of the answers."""
+    aggregator: Optional[str] = None
+    """If the model has an aggregator, this returns the aggregator."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text2text_generation.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text2text_generation.py
@@ -0,0 +1,42 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Dict, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+Text2TextGenerationTruncationStrategy = Literal["do_not_truncate", "longest_first", "only_first", "only_second"]
+
+
+@dataclass_with_extra
+class Text2TextGenerationParameters(BaseInferenceType):
+    """Additional inference parameters for Text2text Generation"""
+
+    clean_up_tokenization_spaces: Optional[bool] = None
+    """Whether to clean up the potential extra spaces in the text output."""
+    generate_parameters: Optional[Dict[str, Any]] = None
+    """Additional parametrization of the text generation algorithm"""
+    truncation: Optional["Text2TextGenerationTruncationStrategy"] = None
+    """The truncation strategy to use"""
+
+
+@dataclass_with_extra
+class Text2TextGenerationInput(BaseInferenceType):
+    """Inputs for Text2text Generation inference"""
+
+    inputs: str
+    """The input text data"""
+    parameters: Optional[Text2TextGenerationParameters] = None
+    """Additional inference parameters for Text2text Generation"""
+
+
+@dataclass_with_extra
+class Text2TextGenerationOutput(BaseInferenceType):
+    """Outputs of inference for the Text2text Generation task"""
+
+    generated_text: Any
+    text2_text_generation_output_generated_text: Optional[str] = None
+    """The generated text."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_classification.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_classification.py
@@ -0,0 +1,41 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TextClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
+
+
+@dataclass_with_extra
+class TextClassificationParameters(BaseInferenceType):
+    """Additional inference parameters for Text Classification"""
+
+    function_to_apply: Optional["TextClassificationOutputTransform"] = None
+    """The function to apply to the model outputs in order to retrieve the scores."""
+    top_k: Optional[int] = None
+    """When specified, limits the output to the top K most probable classes."""
+
+
+@dataclass_with_extra
+class TextClassificationInput(BaseInferenceType):
+    """Inputs for Text Classification inference"""
+
+    inputs: str
+    """The text to classify"""
+    parameters: Optional[TextClassificationParameters] = None
+    """Additional inference parameters for Text Classification"""
+
+
+@dataclass_with_extra
+class TextClassificationOutputElement(BaseInferenceType):
+    """Outputs of inference for the Text Classification task"""
+
+    label: str
+    """The predicted class label."""
+    score: float
+    """The corresponding probability."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_generation.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_generation.py
@@ -0,0 +1,168 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, List, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TypeEnum = Literal["json", "regex"]
+
+
+@dataclass_with_extra
+class TextGenerationInputGrammarType(BaseInferenceType):
+    type: "TypeEnum"
+    value: Any
+    """A string that represents a [JSON Schema](https://json-schema.org/).
+    JSON Schema is a declarative language that allows to annotate JSON documents
+    with types and descriptions.
+    """
+
+
+@dataclass_with_extra
+class TextGenerationInputGenerateParameters(BaseInferenceType):
+    adapter_id: Optional[str] = None
+    """Lora adapter id"""
+    best_of: Optional[int] = None
+    """Generate best_of sequences and return the one if the highest token logprobs."""
+    decoder_input_details: Optional[bool] = None
+    """Whether to return decoder input token logprobs and ids."""
+    details: Optional[bool] = None
+    """Whether to return generation details."""
+    do_sample: Optional[bool] = None
+    """Activate logits sampling."""
+    frequency_penalty: Optional[float] = None
+    """The parameter for frequency penalty. 1.0 means no penalty
+    Penalize new tokens based on their existing frequency in the text so far,
+    decreasing the model's likelihood to repeat the same line verbatim.
+    """
+    grammar: Optional[TextGenerationInputGrammarType] = None
+    max_new_tokens: Optional[int] = None
+    """Maximum number of tokens to generate."""
+    repetition_penalty: Optional[float] = None
+    """The parameter for repetition penalty. 1.0 means no penalty.
+    See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+    """
+    return_full_text: Optional[bool] = None
+    """Whether to prepend the prompt to the generated text"""
+    seed: Optional[int] = None
+    """Random sampling seed."""
+    stop: Optional[List[str]] = None
+    """Stop generating tokens if a member of `stop` is generated."""
+    temperature: Optional[float] = None
+    """The value used to module the logits distribution."""
+    top_k: Optional[int] = None
+    """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+    top_n_tokens: Optional[int] = None
+    """The number of highest probability vocabulary tokens to keep for top-n-filtering."""
+    top_p: Optional[float] = None
+    """Top-p value for nucleus sampling."""
+    truncate: Optional[int] = None
+    """Truncate inputs tokens to the given size."""
+    typical_p: Optional[float] = None
+    """Typical Decoding mass
+    See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666)
+    for more information.
+    """
+    watermark: Optional[bool] = None
+    """Watermarking with [A Watermark for Large Language
+    Models](https://arxiv.org/abs/2301.10226).
+    """
+
+
+@dataclass_with_extra
+class TextGenerationInput(BaseInferenceType):
+    """Text Generation Input.
+    Auto-generated from TGI specs.
+    For more details, check out
+    https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+    """
+
+    inputs: str
+    parameters: Optional[TextGenerationInputGenerateParameters] = None
+    stream: Optional[bool] = None
+
+
+TextGenerationOutputFinishReason = Literal["length", "eos_token", "stop_sequence"]
+
+
+@dataclass_with_extra
+class TextGenerationOutputPrefillToken(BaseInferenceType):
+    id: int
+    logprob: float
+    text: str
+
+
+@dataclass_with_extra
+class TextGenerationOutputToken(BaseInferenceType):
+    id: int
+    logprob: float
+    special: bool
+    text: str
+
+
+@dataclass_with_extra
+class TextGenerationOutputBestOfSequence(BaseInferenceType):
+    finish_reason: "TextGenerationOutputFinishReason"
+    generated_text: str
+    generated_tokens: int
+    prefill: List[TextGenerationOutputPrefillToken]
+    tokens: List[TextGenerationOutputToken]
+    seed: Optional[int] = None
+    top_tokens: Optional[List[List[TextGenerationOutputToken]]] = None
+
+
+@dataclass_with_extra
+class TextGenerationOutputDetails(BaseInferenceType):
+    finish_reason: "TextGenerationOutputFinishReason"
+    generated_tokens: int
+    prefill: List[TextGenerationOutputPrefillToken]
+    tokens: List[TextGenerationOutputToken]
+    best_of_sequences: Optional[List[TextGenerationOutputBestOfSequence]] = None
+    seed: Optional[int] = None
+    top_tokens: Optional[List[List[TextGenerationOutputToken]]] = None
+
+
+@dataclass_with_extra
+class TextGenerationOutput(BaseInferenceType):
+    """Text Generation Output.
+    Auto-generated from TGI specs.
+    For more details, check out
+    https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+    """
+
+    generated_text: str
+    details: Optional[TextGenerationOutputDetails] = None
+
+
+@dataclass_with_extra
+class TextGenerationStreamOutputStreamDetails(BaseInferenceType):
+    finish_reason: "TextGenerationOutputFinishReason"
+    generated_tokens: int
+    input_length: int
+    seed: Optional[int] = None
+
+
+@dataclass_with_extra
+class TextGenerationStreamOutputToken(BaseInferenceType):
+    id: int
+    logprob: float
+    special: bool
+    text: str
+
+
+@dataclass_with_extra
+class TextGenerationStreamOutput(BaseInferenceType):
+    """Text Generation Stream Output.
+    Auto-generated from TGI specs.
+    For more details, check out
+    https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-tgi-import.ts.
+    """
+
+    index: int
+    token: TextGenerationStreamOutputToken
+    details: Optional[TextGenerationStreamOutputStreamDetails] = None
+    generated_text: Optional[str] = None
+    top_tokens: Optional[List[TextGenerationStreamOutputToken]] = None
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_to_audio.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_to_audio.py
@@ -0,0 +1,100 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TextToAudioEarlyStoppingEnum = Literal["never"]
+
+
+@dataclass_with_extra
+class TextToAudioGenerationParameters(BaseInferenceType):
+    """Parametrization of the text generation process"""
+
+    do_sample: Optional[bool] = None
+    """Whether to use sampling instead of greedy decoding when generating new tokens."""
+    early_stopping: Optional[Union[bool, "TextToAudioEarlyStoppingEnum"]] = None
+    """Controls the stopping condition for beam-based methods."""
+    epsilon_cutoff: Optional[float] = None
+    """If set to float strictly between 0 and 1, only tokens with a conditional probability
+    greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+    3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+    Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+    """
+    eta_cutoff: Optional[float] = None
+    """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+    float strictly between 0 and 1, a token is only considered if it is greater than either
+    eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+    term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+    the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+    See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+    for more details.
+    """
+    max_length: Optional[int] = None
+    """The maximum length (in tokens) of the generated text, including the input."""
+    max_new_tokens: Optional[int] = None
+    """The maximum number of tokens to generate. Takes precedence over max_length."""
+    min_length: Optional[int] = None
+    """The minimum length (in tokens) of the generated text, including the input."""
+    min_new_tokens: Optional[int] = None
+    """The minimum number of tokens to generate. Takes precedence over min_length."""
+    num_beam_groups: Optional[int] = None
+    """Number of groups to divide num_beams into in order to ensure diversity among different
+    groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+    """
+    num_beams: Optional[int] = None
+    """Number of beams to use for beam search."""
+    penalty_alpha: Optional[float] = None
+    """The value balances the model confidence and the degeneration penalty in contrastive
+    search decoding.
+    """
+    temperature: Optional[float] = None
+    """The value used to modulate the next token probabilities."""
+    top_k: Optional[int] = None
+    """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+    top_p: Optional[float] = None
+    """If set to float < 1, only the smallest set of most probable tokens with probabilities
+    that add up to top_p or higher are kept for generation.
+    """
+    typical_p: Optional[float] = None
+    """Local typicality measures how similar the conditional probability of predicting a target
+    token next is to the expected conditional probability of predicting a random token next,
+    given the partial text already generated. If set to float < 1, the smallest set of the
+    most locally typical tokens with probabilities that add up to typical_p or higher are
+    kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+    """
+    use_cache: Optional[bool] = None
+    """Whether the model should use the past last key/values attentions to speed up decoding"""
+
+
+@dataclass_with_extra
+class TextToAudioParameters(BaseInferenceType):
+    """Additional inference parameters for Text To Audio"""
+
+    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+    generate_kwargs: Optional[TextToAudioGenerationParameters] = None
+    """Parametrization of the text generation process"""
+
+
+@dataclass_with_extra
+class TextToAudioInput(BaseInferenceType):
+    """Inputs for Text To Audio inference"""
+
+    inputs: str
+    """The input text data"""
+    parameters: Optional[TextToAudioParameters] = None
+    """Additional inference parameters for Text To Audio"""
+
+
+@dataclass_with_extra
+class TextToAudioOutput(BaseInferenceType):
+    """Outputs of inference for the Text To Audio task"""
+
+    audio: Any
+    """The generated audio waveform."""
+    sampling_rate: float
+    """The sampling rate of the generated audio waveform."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_to_image.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_to_image.py
@@ -0,0 +1,50 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class TextToImageParameters(BaseInferenceType):
+    """Additional inference parameters for Text To Image"""
+
+    guidance_scale: Optional[float] = None
+    """A higher guidance scale value encourages the model to generate images closely linked to
+    the text prompt, but values too high may cause saturation and other artifacts.
+    """
+    height: Optional[int] = None
+    """The height in pixels of the output image"""
+    negative_prompt: Optional[str] = None
+    """One prompt to guide what NOT to include in image generation."""
+    num_inference_steps: Optional[int] = None
+    """The number of denoising steps. More denoising steps usually lead to a higher quality
+    image at the expense of slower inference.
+    """
+    scheduler: Optional[str] = None
+    """Override the scheduler with a compatible one."""
+    seed: Optional[int] = None
+    """Seed for the random number generator."""
+    width: Optional[int] = None
+    """The width in pixels of the output image"""
+
+
+@dataclass_with_extra
+class TextToImageInput(BaseInferenceType):
+    """Inputs for Text To Image inference"""
+
+    inputs: str
+    """The input text data (sometimes called "prompt")"""
+    parameters: Optional[TextToImageParameters] = None
+    """Additional inference parameters for Text To Image"""
+
+
+@dataclass_with_extra
+class TextToImageOutput(BaseInferenceType):
+    """Outputs of inference for the Text To Image task"""
+
+    image: Any
+    """The generated image returned as raw bytes in the payload."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_to_speech.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_to_speech.py
@@ -0,0 +1,100 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Literal, Optional, Union
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TextToSpeechEarlyStoppingEnum = Literal["never"]
+
+
+@dataclass_with_extra
+class TextToSpeechGenerationParameters(BaseInferenceType):
+    """Parametrization of the text generation process"""
+
+    do_sample: Optional[bool] = None
+    """Whether to use sampling instead of greedy decoding when generating new tokens."""
+    early_stopping: Optional[Union[bool, "TextToSpeechEarlyStoppingEnum"]] = None
+    """Controls the stopping condition for beam-based methods."""
+    epsilon_cutoff: Optional[float] = None
+    """If set to float strictly between 0 and 1, only tokens with a conditional probability
+    greater than epsilon_cutoff will be sampled. In the paper, suggested values range from
+    3e-4 to 9e-4, depending on the size of the model. See [Truncation Sampling as Language
+    Model Desmoothing](https://hf.co/papers/2210.15191) for more details.
+    """
+    eta_cutoff: Optional[float] = None
+    """Eta sampling is a hybrid of locally typical sampling and epsilon sampling. If set to
+    float strictly between 0 and 1, a token is only considered if it is greater than either
+    eta_cutoff or sqrt(eta_cutoff) * exp(-entropy(softmax(next_token_logits))). The latter
+    term is intuitively the expected next token probability, scaled by sqrt(eta_cutoff). In
+    the paper, suggested values range from 3e-4 to 2e-3, depending on the size of the model.
+    See [Truncation Sampling as Language Model Desmoothing](https://hf.co/papers/2210.15191)
+    for more details.
+    """
+    max_length: Optional[int] = None
+    """The maximum length (in tokens) of the generated text, including the input."""
+    max_new_tokens: Optional[int] = None
+    """The maximum number of tokens to generate. Takes precedence over max_length."""
+    min_length: Optional[int] = None
+    """The minimum length (in tokens) of the generated text, including the input."""
+    min_new_tokens: Optional[int] = None
+    """The minimum number of tokens to generate. Takes precedence over min_length."""
+    num_beam_groups: Optional[int] = None
+    """Number of groups to divide num_beams into in order to ensure diversity among different
+    groups of beams. See [this paper](https://hf.co/papers/1610.02424) for more details.
+    """
+    num_beams: Optional[int] = None
+    """Number of beams to use for beam search."""
+    penalty_alpha: Optional[float] = None
+    """The value balances the model confidence and the degeneration penalty in contrastive
+    search decoding.
+    """
+    temperature: Optional[float] = None
+    """The value used to modulate the next token probabilities."""
+    top_k: Optional[int] = None
+    """The number of highest probability vocabulary tokens to keep for top-k-filtering."""
+    top_p: Optional[float] = None
+    """If set to float < 1, only the smallest set of most probable tokens with probabilities
+    that add up to top_p or higher are kept for generation.
+    """
+    typical_p: Optional[float] = None
+    """Local typicality measures how similar the conditional probability of predicting a target
+    token next is to the expected conditional probability of predicting a random token next,
+    given the partial text already generated. If set to float < 1, the smallest set of the
+    most locally typical tokens with probabilities that add up to typical_p or higher are
+    kept for generation. See [this paper](https://hf.co/papers/2202.00666) for more details.
+    """
+    use_cache: Optional[bool] = None
+    """Whether the model should use the past last key/values attentions to speed up decoding"""
+
+
+@dataclass_with_extra
+class TextToSpeechParameters(BaseInferenceType):
+    """Additional inference parameters for Text To Speech"""
+
+    # Will be deprecated in the future when the renaming to `generation_parameters` is implemented in transformers
+    generate_kwargs: Optional[TextToSpeechGenerationParameters] = None
+    """Parametrization of the text generation process"""
+
+
+@dataclass_with_extra
+class TextToSpeechInput(BaseInferenceType):
+    """Inputs for Text To Speech inference"""
+
+    inputs: str
+    """The input text data"""
+    parameters: Optional[TextToSpeechParameters] = None
+    """Additional inference parameters for Text To Speech"""
+
+
+@dataclass_with_extra
+class TextToSpeechOutput(BaseInferenceType):
+    """Outputs of inference for the Text To Speech task"""
+
+    audio: Any
+    """The generated audio"""
+    sampling_rate: Optional[float] = None
+    """The sampling rate of the generated audio waveform."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_to_video.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/text_to_video.py
@@ -0,0 +1,46 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, List, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class TextToVideoParameters(BaseInferenceType):
+    """Additional inference parameters for Text To Video"""
+
+    guidance_scale: Optional[float] = None
+    """A higher guidance scale value encourages the model to generate videos closely linked to
+    the text prompt, but values too high may cause saturation and other artifacts.
+    """
+    negative_prompt: Optional[List[str]] = None
+    """One or several prompt to guide what NOT to include in video generation."""
+    num_frames: Optional[float] = None
+    """The num_frames parameter determines how many video frames are generated."""
+    num_inference_steps: Optional[int] = None
+    """The number of denoising steps. More denoising steps usually lead to a higher quality
+    video at the expense of slower inference.
+    """
+    seed: Optional[int] = None
+    """Seed for the random number generator."""
+
+
+@dataclass_with_extra
+class TextToVideoInput(BaseInferenceType):
+    """Inputs for Text To Video inference"""
+
+    inputs: str
+    """The input text data (sometimes called "prompt")"""
+    parameters: Optional[TextToVideoParameters] = None
+    """Additional inference parameters for Text To Video"""
+
+
+@dataclass_with_extra
+class TextToVideoOutput(BaseInferenceType):
+    """Outputs of inference for the Text To Video task"""
+
+    video: Any
+    """The generated video returned as raw bytes in the payload."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/token_classification.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/token_classification.py
@@ -0,0 +1,51 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TokenClassificationAggregationStrategy = Literal["none", "simple", "first", "average", "max"]
+
+
+@dataclass_with_extra
+class TokenClassificationParameters(BaseInferenceType):
+    """Additional inference parameters for Token Classification"""
+
+    aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None
+    """The strategy used to fuse tokens based on model predictions"""
+    ignore_labels: Optional[List[str]] = None
+    """A list of labels to ignore"""
+    stride: Optional[int] = None
+    """The number of overlapping tokens between chunks when splitting the input text."""
+
+
+@dataclass_with_extra
+class TokenClassificationInput(BaseInferenceType):
+    """Inputs for Token Classification inference"""
+
+    inputs: str
+    """The input text data"""
+    parameters: Optional[TokenClassificationParameters] = None
+    """Additional inference parameters for Token Classification"""
+
+
+@dataclass_with_extra
+class TokenClassificationOutputElement(BaseInferenceType):
+    """Outputs of inference for the Token Classification task"""
+
+    end: int
+    """The character position in the input where this group ends."""
+    score: float
+    """The associated score / probability"""
+    start: int
+    """The character position in the input where this group begins."""
+    word: str
+    """The corresponding text"""
+    entity: Optional[str] = None
+    """The predicted label for a single token"""
+    entity_group: Optional[str] = None
+    """The predicted label for a group of one or more tokens"""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/translation.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/translation.py
@@ -0,0 +1,49 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Dict, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+TranslationTruncationStrategy = Literal["do_not_truncate", "longest_first", "only_first", "only_second"]
+
+
+@dataclass_with_extra
+class TranslationParameters(BaseInferenceType):
+    """Additional inference parameters for Translation"""
+
+    clean_up_tokenization_spaces: Optional[bool] = None
+    """Whether to clean up the potential extra spaces in the text output."""
+    generate_parameters: Optional[Dict[str, Any]] = None
+    """Additional parametrization of the text generation algorithm."""
+    src_lang: Optional[str] = None
+    """The source language of the text. Required for models that can translate from multiple
+    languages.
+    """
+    tgt_lang: Optional[str] = None
+    """Target language to translate to. Required for models that can translate to multiple
+    languages.
+    """
+    truncation: Optional["TranslationTruncationStrategy"] = None
+    """The truncation strategy to use."""
+
+
+@dataclass_with_extra
+class TranslationInput(BaseInferenceType):
+    """Inputs for Translation inference"""
+
+    inputs: str
+    """The text to translate."""
+    parameters: Optional[TranslationParameters] = None
+    """Additional inference parameters for Translation"""
+
+
+@dataclass_with_extra
+class TranslationOutput(BaseInferenceType):
+    """Outputs of inference for the Translation task"""
+
+    translation_text: str
+    """The translated text."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/video_classification.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/video_classification.py
@@ -0,0 +1,45 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Literal, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+VideoClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
+
+
+@dataclass_with_extra
+class VideoClassificationParameters(BaseInferenceType):
+    """Additional inference parameters for Video Classification"""
+
+    frame_sampling_rate: Optional[int] = None
+    """The sampling rate used to select frames from the video."""
+    function_to_apply: Optional["VideoClassificationOutputTransform"] = None
+    """The function to apply to the model outputs in order to retrieve the scores."""
+    num_frames: Optional[int] = None
+    """The number of sampled frames to consider for classification."""
+    top_k: Optional[int] = None
+    """When specified, limits the output to the top K most probable classes."""
+
+
+@dataclass_with_extra
+class VideoClassificationInput(BaseInferenceType):
+    """Inputs for Video Classification inference"""
+
+    inputs: Any
+    """The input video data"""
+    parameters: Optional[VideoClassificationParameters] = None
+    """Additional inference parameters for Video Classification"""
+
+
+@dataclass_with_extra
+class VideoClassificationOutputElement(BaseInferenceType):
+    """Outputs of inference for the Video Classification task"""
+
+    label: str
+    """The predicted class label."""
+    score: float
+    """The corresponding probability."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/visual_question_answering.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/visual_question_answering.py
@@ -0,0 +1,49 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import Any, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class VisualQuestionAnsweringInputData(BaseInferenceType):
+    """One (image, question) pair to answer"""
+
+    image: Any
+    """The image."""
+    question: str
+    """The question to answer based on the image."""
+
+
+@dataclass_with_extra
+class VisualQuestionAnsweringParameters(BaseInferenceType):
+    """Additional inference parameters for Visual Question Answering"""
+
+    top_k: Optional[int] = None
+    """The number of answers to return (will be chosen by order of likelihood). Note that we
+    return less than topk answers if there are not enough options available within the
+    context.
+    """
+
+
+@dataclass_with_extra
+class VisualQuestionAnsweringInput(BaseInferenceType):
+    """Inputs for Visual Question Answering inference"""
+
+    inputs: VisualQuestionAnsweringInputData
+    """One (image, question) pair to answer"""
+    parameters: Optional[VisualQuestionAnsweringParameters] = None
+    """Additional inference parameters for Visual Question Answering"""
+
+
+@dataclass_with_extra
+class VisualQuestionAnsweringOutputElement(BaseInferenceType):
+    """Outputs of inference for the Visual Question Answering task"""
+
+    score: float
+    """The associated score / probability"""
+    answer: Optional[str] = None
+    """The answer to the question"""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/zero_shot_classification.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/zero_shot_classification.py
@@ -0,0 +1,45 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ZeroShotClassificationParameters(BaseInferenceType):
+    """Additional inference parameters for Zero Shot Classification"""
+
+    candidate_labels: List[str]
+    """The set of possible class labels to classify the text into."""
+    hypothesis_template: Optional[str] = None
+    """The sentence used in conjunction with `candidate_labels` to attempt the text
+    classification by replacing the placeholder with the candidate labels.
+    """
+    multi_label: Optional[bool] = None
+    """Whether multiple candidate labels can be true. If false, the scores are normalized such
+    that the sum of the label likelihoods for each sequence is 1. If true, the labels are
+    considered independent and probabilities are normalized for each candidate.
+    """
+
+
+@dataclass_with_extra
+class ZeroShotClassificationInput(BaseInferenceType):
+    """Inputs for Zero Shot Classification inference"""
+
+    inputs: str
+    """The text to classify"""
+    parameters: ZeroShotClassificationParameters
+    """Additional inference parameters for Zero Shot Classification"""
+
+
+@dataclass_with_extra
+class ZeroShotClassificationOutputElement(BaseInferenceType):
+    """Outputs of inference for the Zero Shot Classification task"""
+
+    label: str
+    """The predicted class label."""
+    score: float
+    """The corresponding probability."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/zero_shot_image_classification.py
@@ -0,0 +1,40 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List, Optional
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ZeroShotImageClassificationParameters(BaseInferenceType):
+    """Additional inference parameters for Zero Shot Image Classification"""
+
+    candidate_labels: List[str]
+    """The candidate labels for this image"""
+    hypothesis_template: Optional[str] = None
+    """The sentence used in conjunction with `candidate_labels` to attempt the image
+    classification by replacing the placeholder with the candidate labels.
+    """
+
+
+@dataclass_with_extra
+class ZeroShotImageClassificationInput(BaseInferenceType):
+    """Inputs for Zero Shot Image Classification inference"""
+
+    inputs: str
+    """The input image data to classify as a base64-encoded string."""
+    parameters: ZeroShotImageClassificationParameters
+    """Additional inference parameters for Zero Shot Image Classification"""
+
+
+@dataclass_with_extra
+class ZeroShotImageClassificationOutputElement(BaseInferenceType):
+    """Outputs of inference for the Zero Shot Image Classification task"""
+
+    label: str
+    """The predicted class label."""
+    score: float
+    """The corresponding probability."""
--- a/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py
+++ b/.venv/lib/python3.10/site-packages/huggingface_hub/inference/_generated/types/zero_shot_object_detection.py
@@ -0,0 +1,52 @@
+# Inference code generated from the JSON schema spec in @huggingface/tasks.
+#
+# See:
+#   - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
+#   - specs:  https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
+from typing import List
+
+from .base import BaseInferenceType, dataclass_with_extra
+
+
+@dataclass_with_extra
+class ZeroShotObjectDetectionParameters(BaseInferenceType):
+    """Additional inference parameters for Zero Shot Object Detection"""
+
+    candidate_labels: List[str]
+    """The candidate labels for this image"""
+
+
+@dataclass_with_extra
+class ZeroShotObjectDetectionInput(BaseInferenceType):
+    """Inputs for Zero Shot Object Detection inference"""
+
+    inputs: str
+    """The input image data as a base64-encoded string."""
+    parameters: ZeroShotObjectDetectionParameters
+    """Additional inference parameters for Zero Shot Object Detection"""
+
+
+@dataclass_with_extra
+class ZeroShotObjectDetectionBoundingBox(BaseInferenceType):
+    """The predicted bounding box. Coordinates are relative to the top left corner of the input
+    image.
+    """
+
+    xmax: int
+    xmin: int
+    ymax: int
+    ymin: int
+
+
+@dataclass_with_extra
+class ZeroShotObjectDetectionOutputElement(BaseInferenceType):
+    """Outputs of inference for the Zero Shot Object Detection task"""
+
+    box: ZeroShotObjectDetectionBoundingBox
+    """The predicted bounding box. Coordinates are relative to the top left corner of the input
+    image.
+    """
+    label: str
+    """A candidate label"""
+    score: float
+    """The associated score / probability"""