fix: type of path_or_stream in PdfDocumentBackend (#28)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-08-07 17:20:44 +02:00 committed by GitHub
parent 9550db8e64
commit 794b20a50a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 5 additions and 4 deletions

View File

@ -35,7 +35,7 @@ class PdfPageBackend(ABC):
class PdfDocumentBackend(ABC):
@abstractmethod
def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
def __init__(self, path_or_stream: Union[BytesIO, Path]):
pass
@abstractmethod

View File

@ -146,11 +146,12 @@ class DoclingParsePageBackend(PdfPageBackend):
class DoclingParseDocumentBackend(PdfDocumentBackend):
def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
def __init__(self, path_or_stream: Union[BytesIO, Path]):
super().__init__(path_or_stream)
self._pdoc = pdfium.PdfDocument(path_or_stream)
# Parsing cells with docling_parser call
print("PARSING WITH DOCLING PARSE")
if isinstance(path_or_stream, BytesIO):
raise NotImplemented("This backend does not support byte streams yet.")
parser = pdf_parser()
self._parser_doc = parser.find_cells(str(path_or_stream))

View File

@ -199,7 +199,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
class PyPdfiumDocumentBackend(PdfDocumentBackend):
def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
def __init__(self, path_or_stream: Union[BytesIO, Path]):
super().__init__(path_or_stream)
self._pdoc = pdfium.PdfDocument(path_or_stream)