From f5adfb9724aae1207f23e21d74033f331e6e1ffb Mon Sep 17 00:00:00 2001 From: Christoph Auer <60343111+cau-git@users.noreply.github.com> Date: Wed, 19 Mar 2025 11:05:42 +0100 Subject: [PATCH] fix: Determine correct page size in DoclingParseV4Backend (#1196) Signed-off-by: Christoph Auer --- docling/backend/docling_parse_v4_backend.py | 33 +++++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/docling/backend/docling_parse_v4_backend.py b/docling/backend/docling_parse_v4_backend.py index 9ec0aee..e1e7430 100644 --- a/docling/backend/docling_parse_v4_backend.py +++ b/docling/backend/docling_parse_v4_backend.py @@ -112,23 +112,30 @@ class DoclingParseV4PageBackend(PdfPageBackend): padbox.r = page_size.width - padbox.r padbox.t = page_size.height - padbox.t - image = ( - self._ppage.render( - scale=scale * 1.5, - rotation=0, # no additional rotation - crop=padbox.as_tuple(), - ) - .to_pil() - .resize(size=(round(cropbox.width * scale), round(cropbox.height * scale))) - ) # We resize the image from 1.5x the given scale to make it sharper. + with pypdfium2_lock: + image = ( + self._ppage.render( + scale=scale * 1.5, + rotation=0, # no additional rotation + crop=padbox.as_tuple(), + ) + .to_pil() + .resize( + size=(round(cropbox.width * scale), round(cropbox.height * scale)) + ) + ) # We resize the image from 1.5x the given scale to make it sharper. return image def get_size(self) -> Size: - return Size( - width=self._dpage.dimension.width, - height=self._dpage.dimension.height, - ) + with pypdfium2_lock: + return Size(width=self._ppage.get_width(), height=self._ppage.get_height()) + + # TODO: Take width and height from docling-parse. + # return Size( + # width=self._dpage.dimension.width, + # height=self._dpage.dimension.height, + # ) def unload(self): self._ppage = None