fix: Better raise exception when a page fails to parse (#46)
* Put safety-checks for failed parse of pages Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Bump to docling-parse 1.1.1 Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Raise from page backend if page is not correctly parsed Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
7e84533299
commit
8808463cec
@ -28,6 +28,10 @@ class DoclingParsePageBackend(PdfPageBackend):
|
|||||||
self.broken_page = "pages" not in parsed_page
|
self.broken_page = "pages" not in parsed_page
|
||||||
if not self.broken_page:
|
if not self.broken_page:
|
||||||
self._dpage = parsed_page["pages"][0]
|
self._dpage = parsed_page["pages"][0]
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Page {page_no} of document {document_hash} could not be parsed."
|
||||||
|
)
|
||||||
|
|
||||||
def get_text_in_rect(self, bbox: BoundingBox) -> str:
|
def get_text_in_rect(self, bbox: BoundingBox) -> str:
|
||||||
if self.broken_page:
|
if self.broken_page:
|
||||||
|
Loading…
Reference in New Issue
Block a user