fix: Properly address page in pipeline _assemble_document when page_range is provided (#1334)
* Fixes #1333 Signed-off-by: Joan Fabrégat <j@fabreg.at> * fix for the (dumb) MyPy type checker Signed-off-by: Joan Fabrégat <j@fabreg.at> --------- Signed-off-by: Joan Fabrégat <j@fabreg.at>
This commit is contained in:
parent
72ab8e1821
commit
6b696b504a
@ -2,7 +2,7 @@ import logging
|
|||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional, cast
|
||||||
|
|
||||||
from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
|
from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
|
||||||
|
|
||||||
@ -226,7 +226,11 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
and self.pipeline_options.generate_table_images
|
and self.pipeline_options.generate_table_images
|
||||||
):
|
):
|
||||||
page_ix = element.prov[0].page_no - 1
|
page_ix = element.prov[0].page_no - 1
|
||||||
page = conv_res.pages[page_ix]
|
page = next(
|
||||||
|
(p for p in conv_res.pages if p.page_no == page_ix),
|
||||||
|
cast("Page", None),
|
||||||
|
)
|
||||||
|
assert page is not None
|
||||||
assert page.size is not None
|
assert page.size is not None
|
||||||
assert page.image is not None
|
assert page.image is not None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user