Ensure all models work only on valid pages (#158)

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2024-10-18 08:54:06 +02:00
committed by GitHub
parent 034a411057
commit a00c937e19
10 changed files with 413 additions and 376 deletions

View File

@@ -17,9 +17,13 @@ class PagePreprocessingModel(BasePageModel):
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
for page in page_batch:
page = self._populate_page_images(page)
page = self._parse_page_cells(page)
yield page
assert page._backend is not None
if not page._backend.is_valid():
yield page
else:
page = self._populate_page_images(page)
page = self._parse_page_cells(page)
yield page
# Generate the page image and store it in the page object
def _populate_page_images(self, page: Page) -> Page: