From 6b696b504a03ba49f05237d0e1b23fcced1a538a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20Fabr=C3=A9gat?= Date: Thu, 10 Apr 2025 16:11:28 +0200 Subject: [PATCH] fix: Properly address page in pipeline _assemble_document when page_range is provided (#1334) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fixes #1333 Signed-off-by: Joan Fabrégat * fix for the (dumb) MyPy type checker Signed-off-by: Joan Fabrégat --------- Signed-off-by: Joan Fabrégat --- docling/pipeline/standard_pdf_pipeline.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index ecaa27c..ae2d918 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -2,7 +2,7 @@ import logging import sys import warnings from pathlib import Path -from typing import Optional +from typing import Optional, cast from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem @@ -226,7 +226,11 @@ class StandardPdfPipeline(PaginatedPipeline): and self.pipeline_options.generate_table_images ): page_ix = element.prov[0].page_no - 1 - page = conv_res.pages[page_ix] + page = next( + (p for p in conv_res.pages if p.page_no == page_ix), + cast("Page", None), + ) + assert page is not None assert page.size is not None assert page.image is not None