fix: allow custom torch_dtype in vlm models (#1735)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-06-10 03:52:15 -05:00 · 2025-06-10 03:52:15 -05:00 · f7f31137f1
commit f7f31137f1
parent 49b10e7419
2 changed files with 3 additions and 1 deletions
--- a/docling/datamodel/pipeline_options_vlm_model.py
+++ b/docling/datamodel/pipeline_options_vlm_model.py
@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Any, Dict, List, Literal
+from typing import Any, Dict, List, Literal, Optional, Union

 from pydantic import AnyUrl, BaseModel
 from typing_extensions import deprecated
@ -42,6 +42,7 @@ class InlineVlmOptions(BaseVlmOptions):
    transformers_model_type: TransformersModelType = TransformersModelType.AUTOMODEL
    response_format: ResponseFormat

+    torch_dtype: Optional[str] = None
    supported_devices: List[AcceleratorDevice] = [
        AcceleratorDevice.CPU,
        AcceleratorDevice.CUDA,
--- a/docling/models/vlm_models_inline/hf_transformers_model.py
+++ b/docling/models/vlm_models_inline/hf_transformers_model.py
@ -99,6 +99,7 @@ class HuggingFaceTransformersVlmModel(BasePageModel, HuggingFaceModelDownloadMix
            self.vlm_model = model_cls.from_pretrained(
                artifacts_path,
                device_map=self.device,
+                torch_dtype=self.vlm_options.torch_dtype,
                _attn_implementation=(
                    "flash_attention_2"
                    if self.device.startswith("cuda")