fix: Properly address page in pipeline _assemble_document when page_range is provided (#1334)

* Fixes #1333

Signed-off-by: Joan Fabrégat <j@fabreg.at>

* fix for the (dumb) MyPy type checker

Signed-off-by: Joan Fabrégat <j@fabreg.at>

---------

Signed-off-by: Joan Fabrégat <j@fabreg.at>
This commit is contained in:
Joan Fabrégat 2025-04-10 16:11:28 +02:00 committed by GitHub
parent 72ab8e1821
commit 6b696b504a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,7 +2,7 @@ import logging
import sys
import warnings
from pathlib import Path
from typing import Optional
from typing import Optional, cast
from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
@ -226,7 +226,11 @@ class StandardPdfPipeline(PaginatedPipeline):
and self.pipeline_options.generate_table_images
):
page_ix = element.prov[0].page_no - 1
page = conv_res.pages[page_ix]
page = next(
(p for p in conv_res.pages if p.page_no == page_ix),
cast("Page", None),
)
assert page is not None
assert page.size is not None
assert page.image is not None