diff --git a/docling/cli/models.py b/docling/cli/models.py index 3b62ad6..50038ad 100644 --- a/docling/cli/models.py +++ b/docling/cli/models.py @@ -32,9 +32,19 @@ class _AvailableModels(str, Enum): CODE_FORMULA = "code_formula" PICTURE_CLASSIFIER = "picture_classifier" SMOLVLM = "smolvlm" + GRANITE_VISION = "granite_vision" EASYOCR = "easyocr" +_default_models = [ + _AvailableModels.LAYOUT, + _AvailableModels.TABLEFORMER, + _AvailableModels.CODE_FORMULA, + _AvailableModels.PICTURE_CLASSIFIER, + _AvailableModels.EASYOCR, +] + + @app.command("download") def download( output_dir: Annotated[ @@ -73,7 +83,7 @@ def download( datefmt="[%X]", handlers=[RichHandler(show_level=False, show_time=False, markup=True)], ) - to_download = models or [m for m in _AvailableModels] + to_download = models or _default_models output_dir = download_models( output_dir=output_dir, force=force, @@ -83,6 +93,7 @@ def download( with_code_formula=_AvailableModels.CODE_FORMULA in to_download, with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download, with_smolvlm=_AvailableModels.SMOLVLM in to_download, + with_granite_vision=_AvailableModels.GRANITE_VISION in to_download, with_easyocr=_AvailableModels.EASYOCR in to_download, ) diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py index 9fa4826..69d185b 100644 --- a/docling/models/picture_description_vlm_model.py +++ b/docling/models/picture_description_vlm_model.py @@ -41,9 +41,9 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel): ) # Initialize processor and model - self.processor = AutoProcessor.from_pretrained(self.options.repo_id) + self.processor = AutoProcessor.from_pretrained(artifacts_path) self.model = AutoModelForVision2Seq.from_pretrained( - self.options.repo_id, + artifacts_path, torch_dtype=torch.bfloat16, _attn_implementation=( "flash_attention_2" if self.device.startswith("cuda") else "eager" diff --git a/docling/utils/model_downloader.py b/docling/utils/model_downloader.py index 7d22b77..694fe04 100644 --- a/docling/utils/model_downloader.py +++ b/docling/utils/model_downloader.py @@ -2,7 +2,10 @@ import logging from pathlib import Path from typing import Optional -from docling.datamodel.pipeline_options import smolvlm_picture_description +from docling.datamodel.pipeline_options import ( + granite_picture_description, + smolvlm_picture_description, +) from docling.datamodel.settings import settings from docling.models.code_formula_model import CodeFormulaModel from docling.models.document_picture_classifier import DocumentPictureClassifier @@ -23,7 +26,8 @@ def download_models( with_tableformer: bool = True, with_code_formula: bool = True, with_picture_classifier: bool = True, - with_smolvlm: bool = True, + with_smolvlm: bool = False, + with_granite_vision: bool = False, with_easyocr: bool = True, ): if output_dir is None: @@ -73,6 +77,15 @@ def download_models( progress=progress, ) + if with_granite_vision: + _log.info(f"Downloading Granite Vision model...") + PictureDescriptionVlmModel.download_models( + repo_id=granite_picture_description.repo_id, + local_dir=output_dir / granite_picture_description.repo_cache_folder, + force=force, + progress=progress, + ) + if with_easyocr: _log.info(f"Downloading easyocr models...") EasyOcrModel.download_models(