feat: Improve parallelization for remote services API calls (#1548)
* Provide the option to make remote services call concurrent Signed-off-by: Vinay Damodaran <vrdn@hey.com> * Use yield from correctly? Signed-off-by: Vinay Damodaran <vrdn@hey.com> * not do amateur hour stuff Signed-off-by: Vinay Damodaran <vrdn@hey.com> --------- Signed-off-by: Vinay Damodaran <vrdn@hey.com>
This commit is contained in:
committed by
GitHub
parent
9f8b479f17
commit
3a04f2a367
@@ -1,4 +1,5 @@
|
||||
from collections.abc import Iterable
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from docling.datamodel.base_models import Page, VlmPrediction
|
||||
from docling.datamodel.document import ConversionResult
|
||||
@@ -27,6 +28,7 @@ class ApiVlmModel(BasePageModel):
|
||||
)
|
||||
|
||||
self.timeout = self.vlm_options.timeout
|
||||
self.concurrency = self.vlm_options.concurrency
|
||||
self.prompt_content = (
|
||||
f"This is a page from a document.\n{self.vlm_options.prompt}"
|
||||
)
|
||||
@@ -38,10 +40,10 @@ class ApiVlmModel(BasePageModel):
|
||||
def __call__(
|
||||
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
||||
) -> Iterable[Page]:
|
||||
for page in page_batch:
|
||||
def _vlm_request(page):
|
||||
assert page._backend is not None
|
||||
if not page._backend.is_valid():
|
||||
yield page
|
||||
return page
|
||||
else:
|
||||
with TimeRecorder(conv_res, "vlm"):
|
||||
assert page.size is not None
|
||||
@@ -63,4 +65,7 @@ class ApiVlmModel(BasePageModel):
|
||||
|
||||
page.predictions.vlm_response = VlmPrediction(text=page_tags)
|
||||
|
||||
yield page
|
||||
return page
|
||||
|
||||
with ThreadPoolExecutor(max_workers=self.concurrency) as executor:
|
||||
yield from executor.map(_vlm_request, page_batch)
|
||||
|
||||
Reference in New Issue
Block a user