fix: Determine correct page size in DoclingParseV4Backend (#1196)

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-03-19 11:05:42 +01:00 committed by GitHub
parent d5f7798763
commit f5adfb9724
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -112,6 +112,7 @@ class DoclingParseV4PageBackend(PdfPageBackend):
padbox.r = page_size.width - padbox.r padbox.r = page_size.width - padbox.r
padbox.t = page_size.height - padbox.t padbox.t = page_size.height - padbox.t
with pypdfium2_lock:
image = ( image = (
self._ppage.render( self._ppage.render(
scale=scale * 1.5, scale=scale * 1.5,
@ -119,16 +120,22 @@ class DoclingParseV4PageBackend(PdfPageBackend):
crop=padbox.as_tuple(), crop=padbox.as_tuple(),
) )
.to_pil() .to_pil()
.resize(size=(round(cropbox.width * scale), round(cropbox.height * scale))) .resize(
size=(round(cropbox.width * scale), round(cropbox.height * scale))
)
) # We resize the image from 1.5x the given scale to make it sharper. ) # We resize the image from 1.5x the given scale to make it sharper.
return image return image
def get_size(self) -> Size: def get_size(self) -> Size:
return Size( with pypdfium2_lock:
width=self._dpage.dimension.width, return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
height=self._dpage.dimension.height,
) # TODO: Take width and height from docling-parse.
# return Size(
# width=self._dpage.dimension.width,
# height=self._dpage.dimension.height,
# )
def unload(self): def unload(self):
self._ppage = None self._ppage = None