From 3eca8b84859d0df5dc4ec8530b6a0d24de617e61 Mon Sep 17 00:00:00 2001 From: mara004 Date: Thu, 25 Jul 2024 08:54:57 +0200 Subject: [PATCH] refactor(pypdfium2): just forward input to PdfDocument directly (#17) PdfDocument() should do accept strings, paths, bytes and byte streams. If not, please file a bug report. Signed-off-by: mara004 --- docling/backend/pypdfium2_backend.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/docling/backend/pypdfium2_backend.py b/docling/backend/pypdfium2_backend.py index 198ec51..f2272d5 100644 --- a/docling/backend/pypdfium2_backend.py +++ b/docling/backend/pypdfium2_backend.py @@ -201,13 +201,7 @@ class PyPdfiumPageBackend(PdfPageBackend): class PyPdfiumDocumentBackend(PdfDocumentBackend): def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]): super().__init__(path_or_stream) - - if isinstance(path_or_stream, Path): - self._pdoc = pdfium.PdfDocument(path_or_stream) - elif isinstance(path_or_stream, BytesIO): - self._pdoc = pdfium.PdfDocument( - path_or_stream - ) # TODO Fix me, won't accept bytes. + self._pdoc = pdfium.PdfDocument(path_or_stream) def page_count(self) -> int: return len(self._pdoc)