diff --git a/docling/backend/abstract_backend.py b/docling/backend/abstract_backend.py index be4c4a5..99b7981 100644 --- a/docling/backend/abstract_backend.py +++ b/docling/backend/abstract_backend.py @@ -35,7 +35,7 @@ class PdfPageBackend(ABC): class PdfDocumentBackend(ABC): @abstractmethod - def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]): + def __init__(self, path_or_stream: Union[BytesIO, Path]): pass @abstractmethod diff --git a/docling/backend/docling_parse_backend.py b/docling/backend/docling_parse_backend.py index 1e4bc63..31a2582 100644 --- a/docling/backend/docling_parse_backend.py +++ b/docling/backend/docling_parse_backend.py @@ -146,11 +146,12 @@ class DoclingParsePageBackend(PdfPageBackend): class DoclingParseDocumentBackend(PdfDocumentBackend): - def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]): + def __init__(self, path_or_stream: Union[BytesIO, Path]): super().__init__(path_or_stream) self._pdoc = pdfium.PdfDocument(path_or_stream) # Parsing cells with docling_parser call - print("PARSING WITH DOCLING PARSE") + if isinstance(path_or_stream, BytesIO): + raise NotImplemented("This backend does not support byte streams yet.") parser = pdf_parser() self._parser_doc = parser.find_cells(str(path_or_stream)) diff --git a/docling/backend/pypdfium2_backend.py b/docling/backend/pypdfium2_backend.py index f2272d5..e5540f4 100644 --- a/docling/backend/pypdfium2_backend.py +++ b/docling/backend/pypdfium2_backend.py @@ -199,7 +199,7 @@ class PyPdfiumPageBackend(PdfPageBackend): class PyPdfiumDocumentBackend(PdfDocumentBackend): - def __init__(self, path_or_stream: Iterable[Union[BytesIO, Path]]): + def __init__(self, path_or_stream: Union[BytesIO, Path]): super().__init__(path_or_stream) self._pdoc = pdfium.PdfDocument(path_or_stream)