diff --git a/docling/cli/main.py b/docling/cli/main.py index b06354c..260d815 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -27,8 +27,10 @@ from docling.datamodel.base_models import ( from docling.datamodel.document import ConversionResult from docling.datamodel.pipeline_options import ( EasyOcrOptions, + OcrEngine, OcrMacOptions, OcrOptions, + PdfBackend, PdfPipelineOptions, RapidOcrOptions, TableFormerMode, @@ -68,22 +70,6 @@ def version_callback(value: bool): raise typer.Exit() -# Define an enum for the backend options -class PdfBackend(str, Enum): - PYPDFIUM2 = "pypdfium2" - DLPARSE_V1 = "dlparse_v1" - DLPARSE_V2 = "dlparse_v2" - - -# Define an enum for the ocr engines -class OcrEngine(str, Enum): - EASYOCR = "easyocr" - TESSERACT_CLI = "tesseract_cli" - TESSERACT = "tesseract" - OCRMAC = "ocrmac" - RAPIDOCR = "rapidocr" - - def export_documents( conv_results: Iterable[ConversionResult], output_dir: Path, diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index b71c0f9..dd6291a 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -19,12 +19,12 @@ if TYPE_CHECKING: class ConversionStatus(str, Enum): - PENDING = auto() - STARTED = auto() - FAILURE = auto() - SUCCESS = auto() - PARTIAL_SUCCESS = auto() - SKIPPED = auto() + PENDING = "pending" + STARTED = "started" + FAILURE = "failure" + SUCCESS = "success" + PARTIAL_SUCCESS = "partial_success" + SKIPPED = "skipped" class InputFormat(str, Enum): @@ -89,15 +89,15 @@ MimeTypeToFormat = { class DocInputType(str, Enum): - PATH = auto() - STREAM = auto() + PATH = "path" + STREAM = "stream" class DoclingComponentType(str, Enum): - DOCUMENT_BACKEND = auto() - MODEL = auto() - DOC_ASSEMBLER = auto() - USER_INPUT = auto() + DOCUMENT_BACKEND = "document_backend" + MODEL = "model" + DOC_ASSEMBLER = "doc_assembler" + USER_INPUT = "user_input" class ErrorItem(BaseModel): diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 9be3ee8..235b5b7 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -126,6 +126,26 @@ class OcrMacOptions(OcrOptions): ) +# Define an enum for the backend options +class PdfBackend(str, Enum): + """Enum of valid PDF backends.""" + + PYPDFIUM2 = "pypdfium2" + DLPARSE_V1 = "dlparse_v1" + DLPARSE_V2 = "dlparse_v2" + + +# Define an enum for the ocr engines +class OcrEngine(str, Enum): + """Enum of valid OCR engines.""" + + EASYOCR = "easyocr" + TESSERACT_CLI = "tesseract_cli" + TESSERACT = "tesseract" + OCRMAC = "ocrmac" + RAPIDOCR = "rapidocr" + + class PipelineOptions(BaseModel): """Base pipeline options."""