feat: add factory for ocr engines via plugins (#1010)

* add factory for ocr engines

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* apply pre-commit after rebase

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add picture description factory

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix enable option

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* switch to create methods

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* make `options` an explicit kwarg

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* keep old lock of docling-core

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix lock

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add allow_external_plugins option

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add factory return and ignore options type

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
Co-authored-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-03-18 13:58:05 +01:00
committed by GitHub
parent 3960b199d6
commit 6eaae3cba0
21 changed files with 485 additions and 158 deletions

View File

@@ -2,7 +2,7 @@ import copy
import logging
from abc import abstractmethod
from pathlib import Path
from typing import Iterable, List
from typing import Iterable, List, Optional, Type
import numpy as np
from docling_core.types.doc import BoundingBox, CoordOrigin
@@ -13,15 +13,22 @@ from scipy.ndimage import binary_dilation, find_objects, label
from docling.datamodel.base_models import Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import OcrOptions
from docling.datamodel.pipeline_options import AcceleratorOptions, OcrOptions
from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel
from docling.models.base_model import BaseModelWithOptions, BasePageModel
_log = logging.getLogger(__name__)
class BaseOcrModel(BasePageModel):
def __init__(self, enabled: bool, options: OcrOptions):
class BaseOcrModel(BasePageModel, BaseModelWithOptions):
def __init__(
self,
*,
enabled: bool,
artifacts_path: Optional[Path],
options: OcrOptions,
accelerator_options: AcceleratorOptions,
):
self.enabled = enabled
self.options = options
@@ -186,3 +193,8 @@ class BaseOcrModel(BasePageModel):
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
pass
@classmethod
@abstractmethod
def get_options_type(cls) -> Type[OcrOptions]:
pass