Docling/docling/backend/noop_backend.py

import logging
from io import BytesIO
from pathlib import Path
from typing import Set, Union

from docling.backend.abstract_backend import AbstractDocumentBackend
from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import InputDocument

_log = logging.getLogger(__name__)


class NoOpBackend(AbstractDocumentBackend):
    """
    A no-op backend that only validates input existence.
    Used e.g. for audio files where actual processing is handled by the ASR pipeline.
    """

    def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]):
        super().__init__(in_doc, path_or_stream)

        _log.debug(f"NoOpBackend initialized for: {path_or_stream}")

        # Validate input
        try:
            if isinstance(self.path_or_stream, BytesIO):
                # Check if stream has content
                self.valid = len(self.path_or_stream.getvalue()) > 0
                _log.debug(
                    f"BytesIO stream length: {len(self.path_or_stream.getvalue())}"
                )
            elif isinstance(self.path_or_stream, Path):
                # Check if file exists
                self.valid = self.path_or_stream.exists()
                _log.debug(f"File exists: {self.valid}")
            else:
                self.valid = False
        except Exception as e:
            _log.error(f"NoOpBackend validation failed: {e}")
            self.valid = False

    def is_valid(self) -> bool:
        return self.valid

    @classmethod
    def supports_pagination(cls) -> bool:
        return False

    @classmethod
    def supported_formats(cls) -> Set[InputFormat]:
        return set(InputFormat)