fix: incorrect force_backend_text behaviour for VLM DocTag pipelines (#1371)

* Fix force_backend_text

Signed-off-by: Krishnan Raghavan <krishnanraghavan@Krishnans-MacBook-Air.local>

* empty commit to retrigger CI

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

---------

Signed-off-by: Krishnan Raghavan <krishnanraghavan@Krishnans-MacBook-Air.local>
Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
Co-authored-by: Krishnan Raghavan <krishnanraghavan@Krishnans-MacBook-Air.local>
Co-authored-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
Krishnan 2025-05-20 13:29:38 +05:30 committed by GitHub
parent 98b5eeb844
commit f2e9c0784c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -136,25 +136,23 @@ class VlmPipeline(PaginatedPipeline):
conv_res.document.load_from_doctags(doctags_doc)
# If forced backend text, replace model predicted text with backend one
if page.size:
if self.force_backend_text:
scale = self.pipeline_options.images_scale
for element, _level in conv_res.document.iterate_items():
if (
not isinstance(element, TextItem)
or len(element.prov) == 0
):
continue
crop_bbox = (
element.prov[0]
.bbox.scaled(scale=scale)
.to_top_left_origin(
page_height=page.size.height * scale
)
)
txt = self.extract_text_from_backend(page, crop_bbox)
element.text = txt
element.orig = txt
if self.force_backend_text:
scale = self.pipeline_options.images_scale
for element, _level in conv_res.document.iterate_items():
if not isinstance(element, TextItem) or len(element.prov) == 0:
continue
page_ix = element.prov[0].page_no - 1
page = conv_res.pages[page_ix]
if not page.size:
continue
crop_bbox = (
element.prov[0]
.bbox.scaled(scale=scale)
.to_top_left_origin(page_height=page.size.height * scale)
)
txt = self.extract_text_from_backend(page, crop_bbox)
element.text = txt
element.orig = txt
elif (
self.pipeline_options.vlm_options.response_format
== ResponseFormat.MARKDOWN