feat: add factory for ocr engines via plugins (#1010)
* add factory for ocr engines Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply pre-commit after rebase Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add picture description factory Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix enable option Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * switch to create methods Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * make `options` an explicit kwarg Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * keep old lock of docling-core Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix lock Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add allow_external_plugins option Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add factory return and ignore options type Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> Co-authored-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
@@ -9,6 +9,7 @@ import warnings
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Dict, Iterable, List, Optional, Type
|
||||
|
||||
import rich.table
|
||||
import typer
|
||||
from docling_core.types.doc import ImageRefMode
|
||||
from docling_core.utils.file import resolve_source_to_path
|
||||
@@ -30,18 +31,14 @@ from docling.datamodel.pipeline_options import (
|
||||
AcceleratorDevice,
|
||||
AcceleratorOptions,
|
||||
EasyOcrOptions,
|
||||
OcrEngine,
|
||||
OcrMacOptions,
|
||||
OcrOptions,
|
||||
PdfBackend,
|
||||
PdfPipelineOptions,
|
||||
RapidOcrOptions,
|
||||
TableFormerMode,
|
||||
TesseractCliOcrOptions,
|
||||
TesseractOcrOptions,
|
||||
)
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
||||
from docling.models.factories import get_ocr_factory
|
||||
|
||||
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
||||
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
|
||||
@@ -49,8 +46,11 @@ warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr
|
||||
_log = logging.getLogger(__name__)
|
||||
from rich.console import Console
|
||||
|
||||
console = Console()
|
||||
err_console = Console(stderr=True)
|
||||
|
||||
ocr_factory_internal = get_ocr_factory(allow_external_plugins=False)
|
||||
ocr_engines_enum_internal = ocr_factory_internal.get_enum()
|
||||
|
||||
app = typer.Typer(
|
||||
name="Docling",
|
||||
@@ -78,6 +78,24 @@ def version_callback(value: bool):
|
||||
raise typer.Exit()
|
||||
|
||||
|
||||
def show_external_plugins_callback(value: bool):
|
||||
if value:
|
||||
ocr_factory_all = get_ocr_factory(allow_external_plugins=True)
|
||||
table = rich.table.Table(title="Available OCR engines")
|
||||
table.add_column("Name", justify="right")
|
||||
table.add_column("Plugin")
|
||||
table.add_column("Package")
|
||||
for meta in ocr_factory_all.registered_meta.values():
|
||||
if not meta.module.startswith("docling."):
|
||||
table.add_row(
|
||||
f"[bold]{meta.kind}[/bold]",
|
||||
meta.plugin_name,
|
||||
meta.module.split(".")[0],
|
||||
)
|
||||
rich.print(table)
|
||||
raise typer.Exit()
|
||||
|
||||
|
||||
def export_documents(
|
||||
conv_results: Iterable[ConversionResult],
|
||||
output_dir: Path,
|
||||
@@ -196,8 +214,16 @@ def convert(
|
||||
),
|
||||
] = False,
|
||||
ocr_engine: Annotated[
|
||||
OcrEngine, typer.Option(..., help="The OCR engine to use.")
|
||||
] = OcrEngine.EASYOCR,
|
||||
str,
|
||||
typer.Option(
|
||||
...,
|
||||
help=(
|
||||
f"The OCR engine to use. When --allow-external-plugins is *not* set, the available values are: "
|
||||
f"{', '.join((o.value for o in ocr_engines_enum_internal))}. "
|
||||
f"Use the option --show-external-plugins to see the options allowed with external plugins."
|
||||
),
|
||||
),
|
||||
] = EasyOcrOptions.kind,
|
||||
ocr_lang: Annotated[
|
||||
Optional[str],
|
||||
typer.Option(
|
||||
@@ -241,6 +267,21 @@ def convert(
|
||||
..., help="Must be enabled when using models connecting to remote services."
|
||||
),
|
||||
] = False,
|
||||
allow_external_plugins: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
..., help="Must be enabled for loading modules from third-party plugins."
|
||||
),
|
||||
] = False,
|
||||
show_external_plugins: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
...,
|
||||
help="List the third-party plugins which are available when the option --allow-external-plugins is set.",
|
||||
callback=show_external_plugins_callback,
|
||||
is_eager=True,
|
||||
),
|
||||
] = False,
|
||||
abort_on_error: Annotated[
|
||||
bool,
|
||||
typer.Option(
|
||||
@@ -368,18 +409,11 @@ def convert(
|
||||
export_txt = OutputFormat.TEXT in to_formats
|
||||
export_doctags = OutputFormat.DOCTAGS in to_formats
|
||||
|
||||
if ocr_engine == OcrEngine.EASYOCR:
|
||||
ocr_options: OcrOptions = EasyOcrOptions(force_full_page_ocr=force_ocr)
|
||||
elif ocr_engine == OcrEngine.TESSERACT_CLI:
|
||||
ocr_options = TesseractCliOcrOptions(force_full_page_ocr=force_ocr)
|
||||
elif ocr_engine == OcrEngine.TESSERACT:
|
||||
ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr)
|
||||
elif ocr_engine == OcrEngine.OCRMAC:
|
||||
ocr_options = OcrMacOptions(force_full_page_ocr=force_ocr)
|
||||
elif ocr_engine == OcrEngine.RAPIDOCR:
|
||||
ocr_options = RapidOcrOptions(force_full_page_ocr=force_ocr)
|
||||
else:
|
||||
raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
|
||||
ocr_factory = get_ocr_factory(allow_external_plugins=allow_external_plugins)
|
||||
ocr_options: OcrOptions = ocr_factory.create_options( # type: ignore
|
||||
kind=ocr_engine,
|
||||
force_full_page_ocr=force_ocr,
|
||||
)
|
||||
|
||||
ocr_lang_list = _split_list(ocr_lang)
|
||||
if ocr_lang_list is not None:
|
||||
@@ -387,6 +421,7 @@ def convert(
|
||||
|
||||
accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
|
||||
pipeline_options = PdfPipelineOptions(
|
||||
allow_external_plugins=allow_external_plugins,
|
||||
enable_remote_services=enable_remote_services,
|
||||
accelerator_options=accelerator_options,
|
||||
do_ocr=ocr,
|
||||
|
||||
Reference in New Issue
Block a user