fix: add smoldocling in download utils (#1577)

add smoldocling in download utils

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-05-12 10:48:07 +02:00 committed by GitHub
parent 844babb390
commit 127e38646f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 28 additions and 0 deletions

View File

@ -32,6 +32,8 @@ class _AvailableModels(str, Enum):
CODE_FORMULA = "code_formula"
PICTURE_CLASSIFIER = "picture_classifier"
SMOLVLM = "smolvlm"
SMOLDOCLING = "smoldocling"
SMOLDOCLING_MLX = "smoldocling_mlx"
GRANITE_VISION = "granite_vision"
EASYOCR = "easyocr"
@ -105,6 +107,8 @@ def download(
with_code_formula=_AvailableModels.CODE_FORMULA in to_download,
with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download,
with_smolvlm=_AvailableModels.SMOLVLM in to_download,
with_smoldocling=_AvailableModels.SMOLDOCLING in to_download,
with_smoldocling_mlx=_AvailableModels.SMOLDOCLING_MLX in to_download,
with_granite_vision=_AvailableModels.GRANITE_VISION in to_download,
with_easyocr=_AvailableModels.EASYOCR in to_download,
)

View File

@ -4,12 +4,15 @@ from typing import Optional
from docling.datamodel.pipeline_options import (
granite_picture_description,
smoldocling_vlm_conversion_options,
smoldocling_vlm_mlx_conversion_options,
smolvlm_picture_description,
)
from docling.datamodel.settings import settings
from docling.models.code_formula_model import CodeFormulaModel
from docling.models.document_picture_classifier import DocumentPictureClassifier
from docling.models.easyocr_model import EasyOcrModel
from docling.models.hf_vlm_model import HuggingFaceVlmModel
from docling.models.layout_model import LayoutModel
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
from docling.models.table_structure_model import TableStructureModel
@ -27,6 +30,8 @@ def download_models(
with_code_formula: bool = True,
with_picture_classifier: bool = True,
with_smolvlm: bool = False,
with_smoldocling: bool = False,
with_smoldocling_mlx: bool = False,
with_granite_vision: bool = False,
with_easyocr: bool = True,
):
@ -77,6 +82,25 @@ def download_models(
progress=progress,
)
if with_smoldocling:
_log.info("Downloading SmolDocling model...")
HuggingFaceVlmModel.download_models(
repo_id=smoldocling_vlm_conversion_options.repo_id,
local_dir=output_dir / smoldocling_vlm_conversion_options.repo_cache_folder,
force=force,
progress=progress,
)
if with_smoldocling_mlx:
_log.info("Downloading SmolDocling MLX model...")
HuggingFaceVlmModel.download_models(
repo_id=smoldocling_vlm_mlx_conversion_options.repo_id,
local_dir=output_dir
/ smoldocling_vlm_mlx_conversion_options.repo_cache_folder,
force=force,
progress=progress,
)
if with_granite_vision:
_log.info("Downloading Granite Vision model...")
PictureDescriptionVlmModel.download_models(