diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py index 230151d..63c0af8 100644 --- a/docling/models/picture_description_vlm_model.py +++ b/docling/models/picture_description_vlm_model.py @@ -1,3 +1,4 @@ +import threading from collections.abc import Iterable from pathlib import Path from typing import Optional, Type, Union @@ -15,6 +16,9 @@ from docling.models.utils.hf_model_download import ( ) from docling.utils.accelerator_utils import decide_device +# Global lock for model initialization to prevent threading issues +_model_init_lock = threading.Lock() + class PictureDescriptionVlmModel( PictureDescriptionBaseModel, HuggingFaceModelDownloadMixin @@ -57,17 +61,18 @@ class PictureDescriptionVlmModel( ) # Initialize processor and model - self.processor = AutoProcessor.from_pretrained(artifacts_path) - self.model = AutoModelForVision2Seq.from_pretrained( - artifacts_path, - torch_dtype=torch.bfloat16, - _attn_implementation=( - "flash_attention_2" - if self.device.startswith("cuda") - and accelerator_options.cuda_use_flash_attention2 - else "eager" - ), - ).to(self.device) + with _model_init_lock: + self.processor = AutoProcessor.from_pretrained(artifacts_path) + self.model = AutoModelForVision2Seq.from_pretrained( + artifacts_path, + torch_dtype=torch.bfloat16, + _attn_implementation=( + "flash_attention_2" + if self.device.startswith("cuda") + and accelerator_options.cuda_use_flash_attention2 + else "eager" + ), + ).to(self.device) self.provenance = f"{self.options.repo_id}"