fix: type of path_or_stream in PdfDocumentBackend (#28)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
9550db8e64
commit
794b20a50a
@ -35,7 +35,7 @@ class PdfPageBackend(ABC):
|
||||
|
||||
class PdfDocumentBackend(ABC):
|
||||
@abstractmethod
|
||||
def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
|
||||
def __init__(self, path_or_stream: Union[BytesIO, Path]):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
|
@ -146,11 +146,12 @@ class DoclingParsePageBackend(PdfPageBackend):
|
||||
|
||||
|
||||
class DoclingParseDocumentBackend(PdfDocumentBackend):
|
||||
def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
|
||||
def __init__(self, path_or_stream: Union[BytesIO, Path]):
|
||||
super().__init__(path_or_stream)
|
||||
self._pdoc = pdfium.PdfDocument(path_or_stream)
|
||||
# Parsing cells with docling_parser call
|
||||
print("PARSING WITH DOCLING PARSE")
|
||||
if isinstance(path_or_stream, BytesIO):
|
||||
raise NotImplemented("This backend does not support byte streams yet.")
|
||||
parser = pdf_parser()
|
||||
self._parser_doc = parser.find_cells(str(path_or_stream))
|
||||
|
||||
|
@ -199,7 +199,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
|
||||
|
||||
|
||||
class PyPdfiumDocumentBackend(PdfDocumentBackend):
|
||||
def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]):
|
||||
def __init__(self, path_or_stream: Union[BytesIO, Path]):
|
||||
super().__init__(path_or_stream)
|
||||
self._pdoc = pdfium.PdfDocument(path_or_stream)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user