fix: formula conversion with page_range param set (#1791)
When page_range param is used for formula conversion, the system throws list index out of range error. Included tests to validate that the fix works. Signed-off-by: Masum <masumsofts@yahoo.com>
This commit is contained in:
parent
c2ef69718a
commit
dbab30e92c
@ -86,7 +86,7 @@ class BaseItemAndImageEnrichmentModel(
|
||||
coord_origin=bbox.coord_origin,
|
||||
)
|
||||
|
||||
page_ix = element_prov.page_no - 1
|
||||
page_ix = element_prov.page_no - conv_res.pages[0].page_no - 1
|
||||
cropped_image = conv_res.pages[page_ix].get_image(
|
||||
scale=self.images_scale, cropbox=expanded_bbox
|
||||
)
|
||||
|
@ -60,3 +60,25 @@ def test_code_and_formula_conversion():
|
||||
gt = "a ^ { 2 } + 8 = 1 2"
|
||||
predicted = formula_blocks[0].text
|
||||
assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"
|
||||
|
||||
|
||||
def test_formula_conversion_with_page_range():
|
||||
pdf_path = Path("tests/data/pdf/code_and_formula.pdf")
|
||||
converter = get_converter()
|
||||
|
||||
print(f"converting {pdf_path} with page range")
|
||||
|
||||
doc_result: ConversionResult = converter.convert(pdf_path, page_range=(2, 2))
|
||||
|
||||
results = doc_result.document.texts
|
||||
|
||||
formula_blocks = [
|
||||
el
|
||||
for el in results
|
||||
if isinstance(el, TextItem) and el.label == DocItemLabel.FORMULA
|
||||
]
|
||||
assert len(formula_blocks) == 1
|
||||
|
||||
gt = "a ^ { 2 } + 8 = 1 2"
|
||||
predicted = formula_blocks[0].text
|
||||
assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"
|
||||
|
Loading…
Reference in New Issue
Block a user