diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index a24df89..1d35227 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -225,6 +225,7 @@ class PictureDescriptionApiOptions(PictureDescriptionBaseOptions): headers: Dict[str, str] = {} params: Dict[str, Any] = {} timeout: float = 20 + concurrency: int = 1 prompt: str = "Describe this image in a few sentences." provenance: str = "" @@ -295,6 +296,7 @@ class ApiVlmOptions(BaseVlmOptions): params: Dict[str, Any] = {} scale: float = 2.0 timeout: float = 60 + concurrency: int = 1 response_format: ResponseFormat diff --git a/docling/models/api_vlm_model.py b/docling/models/api_vlm_model.py index f7e82b5..60bc6fc 100644 --- a/docling/models/api_vlm_model.py +++ b/docling/models/api_vlm_model.py @@ -1,4 +1,5 @@ from collections.abc import Iterable +from concurrent.futures import ThreadPoolExecutor from docling.datamodel.base_models import Page, VlmPrediction from docling.datamodel.document import ConversionResult @@ -27,6 +28,7 @@ class ApiVlmModel(BasePageModel): ) self.timeout = self.vlm_options.timeout + self.concurrency = self.vlm_options.concurrency self.prompt_content = ( f"This is a page from a document.\n{self.vlm_options.prompt}" ) @@ -38,10 +40,10 @@ class ApiVlmModel(BasePageModel): def __call__( self, conv_res: ConversionResult, page_batch: Iterable[Page] ) -> Iterable[Page]: - for page in page_batch: + def _vlm_request(page): assert page._backend is not None if not page._backend.is_valid(): - yield page + return page else: with TimeRecorder(conv_res, "vlm"): assert page.size is not None @@ -63,4 +65,7 @@ class ApiVlmModel(BasePageModel): page.predictions.vlm_response = VlmPrediction(text=page_tags) - yield page + return page + + with ThreadPoolExecutor(max_workers=self.concurrency) as executor: + yield from executor.map(_vlm_request, page_batch) diff --git a/docling/models/picture_description_api_model.py b/docling/models/picture_description_api_model.py index 44bb5e2..eb331b2 100644 --- a/docling/models/picture_description_api_model.py +++ b/docling/models/picture_description_api_model.py @@ -1,4 +1,5 @@ from collections.abc import Iterable +from concurrent.futures import ThreadPoolExecutor from pathlib import Path from typing import Optional, Type, Union @@ -37,6 +38,7 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel): accelerator_options=accelerator_options, ) self.options: PictureDescriptionApiOptions + self.concurrency = self.options.concurrency if self.enabled: if not enable_remote_services: @@ -48,8 +50,8 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel): def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]: # Note: technically we could make a batch request here, # but not all APIs will allow for it. For example, vllm won't allow more than 1. - for image in images: - yield api_image_request( + def _api_request(image): + return api_image_request( image=image, prompt=self.options.prompt, url=self.options.url, @@ -57,3 +59,6 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel): headers=self.options.headers, **self.options.params, ) + + with ThreadPoolExecutor(max_workers=self.concurrency) as executor: + yield from executor.map(_api_request, images)