From f2e9c0784c842612641171754ce51362e298088d Mon Sep 17 00:00:00 2001 From: Krishnan Date: Tue, 20 May 2025 13:29:38 +0530 Subject: [PATCH] fix: incorrect force_backend_text behaviour for VLM DocTag pipelines (#1371) * Fix force_backend_text Signed-off-by: Krishnan Raghavan * empty commit to retrigger CI Signed-off-by: Panos Vagenas --------- Signed-off-by: Krishnan Raghavan Signed-off-by: Panos Vagenas Co-authored-by: Krishnan Raghavan Co-authored-by: Panos Vagenas --- docling/pipeline/vlm_pipeline.py | 36 +++++++++++++++----------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py index 9a7b51e..6605c1c 100644 --- a/docling/pipeline/vlm_pipeline.py +++ b/docling/pipeline/vlm_pipeline.py @@ -136,25 +136,23 @@ class VlmPipeline(PaginatedPipeline): conv_res.document.load_from_doctags(doctags_doc) # If forced backend text, replace model predicted text with backend one - if page.size: - if self.force_backend_text: - scale = self.pipeline_options.images_scale - for element, _level in conv_res.document.iterate_items(): - if ( - not isinstance(element, TextItem) - or len(element.prov) == 0 - ): - continue - crop_bbox = ( - element.prov[0] - .bbox.scaled(scale=scale) - .to_top_left_origin( - page_height=page.size.height * scale - ) - ) - txt = self.extract_text_from_backend(page, crop_bbox) - element.text = txt - element.orig = txt + if self.force_backend_text: + scale = self.pipeline_options.images_scale + for element, _level in conv_res.document.iterate_items(): + if not isinstance(element, TextItem) or len(element.prov) == 0: + continue + page_ix = element.prov[0].page_no - 1 + page = conv_res.pages[page_ix] + if not page.size: + continue + crop_bbox = ( + element.prov[0] + .bbox.scaled(scale=scale) + .to_top_left_origin(page_height=page.size.height * scale) + ) + txt = self.extract_text_from_backend(page, crop_bbox) + element.text = txt + element.orig = txt elif ( self.pipeline_options.vlm_options.response_format == ResponseFormat.MARKDOWN