fix: make enum serializable with human-readable value (#555)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-12-10 13:12:44 +01:00 committed by GitHub
parent eb30c4f763
commit a7df337654
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 34 additions and 28 deletions

View File

@ -27,8 +27,10 @@ from docling.datamodel.base_models import (
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import (
EasyOcrOptions,
OcrEngine,
OcrMacOptions,
OcrOptions,
PdfBackend,
PdfPipelineOptions,
RapidOcrOptions,
TableFormerMode,
@ -68,22 +70,6 @@ def version_callback(value: bool):
raise typer.Exit()
# Define an enum for the backend options
class PdfBackend(str, Enum):
PYPDFIUM2 = "pypdfium2"
DLPARSE_V1 = "dlparse_v1"
DLPARSE_V2 = "dlparse_v2"
# Define an enum for the ocr engines
class OcrEngine(str, Enum):
EASYOCR = "easyocr"
TESSERACT_CLI = "tesseract_cli"
TESSERACT = "tesseract"
OCRMAC = "ocrmac"
RAPIDOCR = "rapidocr"
def export_documents(
conv_results: Iterable[ConversionResult],
output_dir: Path,

View File

@ -19,12 +19,12 @@ if TYPE_CHECKING:
class ConversionStatus(str, Enum):
PENDING = auto()
STARTED = auto()
FAILURE = auto()
SUCCESS = auto()
PARTIAL_SUCCESS = auto()
SKIPPED = auto()
PENDING = "pending"
STARTED = "started"
FAILURE = "failure"
SUCCESS = "success"
PARTIAL_SUCCESS = "partial_success"
SKIPPED = "skipped"
class InputFormat(str, Enum):
@ -89,15 +89,15 @@ MimeTypeToFormat = {
class DocInputType(str, Enum):
PATH = auto()
STREAM = auto()
PATH = "path"
STREAM = "stream"
class DoclingComponentType(str, Enum):
DOCUMENT_BACKEND = auto()
MODEL = auto()
DOC_ASSEMBLER = auto()
USER_INPUT = auto()
DOCUMENT_BACKEND = "document_backend"
MODEL = "model"
DOC_ASSEMBLER = "doc_assembler"
USER_INPUT = "user_input"
class ErrorItem(BaseModel):

View File

@ -126,6 +126,26 @@ class OcrMacOptions(OcrOptions):
)
# Define an enum for the backend options
class PdfBackend(str, Enum):
"""Enum of valid PDF backends."""
PYPDFIUM2 = "pypdfium2"
DLPARSE_V1 = "dlparse_v1"
DLPARSE_V2 = "dlparse_v2"
# Define an enum for the ocr engines
class OcrEngine(str, Enum):
"""Enum of valid OCR engines."""
EASYOCR = "easyocr"
TESSERACT_CLI = "tesseract_cli"
TESSERACT = "tesseract"
OCRMAC = "ocrmac"
RAPIDOCR = "rapidocr"
class PipelineOptions(BaseModel):
"""Base pipeline options."""