fix: Determine correct page size in DoclingParseV4Backend (#1196)

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2025-03-19 11:05:42 +01:00
parent d5f7798763
commit f5adfb9724
1 changed files with 20 additions and 13 deletions
@@ -112,6 +112,7 @@ class DoclingParseV4PageBackend(PdfPageBackend):
            padbox.r = page_size.width - padbox.r
            padbox.t = page_size.height - padbox.t
        with pypdfium2_lock:
            image = (
                self._ppage.render(
                    scale=scale * 1.5,
@@ -119,16 +120,22 @@ class DoclingParseV4PageBackend(PdfPageBackend):
                    crop=padbox.as_tuple(),
                )
                .to_pil()
-            .resize(size=(round(cropbox.width * scale), round(cropbox.height * scale)))
+                .resize(
                    size=(round(cropbox.width * scale), round(cropbox.height * scale))
                )
            )  # We resize the image from 1.5x the given scale to make it sharper.
        return image
    def get_size(self) -> Size:
-        return Size(
+        with pypdfium2_lock:
-            width=self._dpage.dimension.width,
+            return Size(width=self._ppage.get_width(), height=self._ppage.get_height())
-            height=self._dpage.dimension.height,
+
-        )
+        # TODO: Take width and height from docling-parse.
        # return Size(
        #    width=self._dpage.dimension.width,
        #    height=self._dpage.dimension.height,
        # )
    def unload(self):
        self._ppage = None