from abc import ABC, abstractmethod from io import BytesIO from pathlib import Path from typing import TYPE_CHECKING, Set, Union from docling_core.types.doc import DoclingDocument if TYPE_CHECKING: from docling.datamodel.base_models import InputFormat from docling.datamodel.document import InputDocument class AbstractDocumentBackend(ABC): @abstractmethod def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): self.file = in_doc.file self.path_or_stream = path_or_stream self.document_hash = in_doc.document_hash self.input_format = in_doc.format @abstractmethod def is_valid(self) -> bool: pass @classmethod @abstractmethod def supports_pagination(cls) -> bool: pass def unload(self): if isinstance(self.path_or_stream, BytesIO): self.path_or_stream.close() self.path_or_stream = None @classmethod @abstractmethod def supported_formats(cls) -> Set["InputFormat"]: pass class PaginatedDocumentBackend(AbstractDocumentBackend): """DeclarativeDocumentBackend. A declarative document backend is a backend that can transform to DoclingDocument straight without a recognition pipeline. """ @abstractmethod def page_count(self) -> int: pass class DeclarativeDocumentBackend(AbstractDocumentBackend): """DeclarativeDocumentBackend. A declarative document backend is a backend that can transform to DoclingDocument straight without a recognition pipeline. """ @abstractmethod def convert(self) -> DoclingDocument: pass