fix: incorrect force_backend_text behaviour for VLM DocTag pipelines (#1371)

* Fix force_backend_text Signed-off-by: Krishnan Raghavan <krishnanraghavan@Krishnans-MacBook-Air.local> * empty commit to retrigger CI Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> --------- Signed-off-by: Krishnan Raghavan <krishnanraghavan@Krishnans-MacBook-Air.local> Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> Co-authored-by: Krishnan Raghavan <krishnanraghavan@Krishnans-MacBook-Air.local> Co-authored-by: Panos Vagenas <pva@zurich.ibm.com>
2025-05-20 13:29:38 +05:30
parent 98b5eeb844
commit f2e9c0784c
1 changed files with 17 additions and 19 deletions
@@ -136,21 +136,19 @@ class VlmPipeline(PaginatedPipeline):
                conv_res.document.load_from_doctags(doctags_doc)
                # If forced backend text, replace model predicted text with backend one
                if page.size:
                if self.force_backend_text:
                    scale = self.pipeline_options.images_scale
                    for element, _level in conv_res.document.iterate_items():
-                            if (
+                        if not isinstance(element, TextItem) or len(element.prov) == 0:
-                                not isinstance(element, TextItem)
+                            continue
-                                or len(element.prov) == 0
+                        page_ix = element.prov[0].page_no - 1
-                            ):
+                        page = conv_res.pages[page_ix]
                        if not page.size:
                            continue
                        crop_bbox = (
                            element.prov[0]
                            .bbox.scaled(scale=scale)
-                                .to_top_left_origin(
+                            .to_top_left_origin(page_height=page.size.height * scale)
                                    page_height=page.size.height * scale
                                )
                        )
                        txt = self.extract_text_from_backend(page, crop_bbox)
                        element.text = txt