fix: enrichment models batch size and expose picture classifier (#878)

* expose picture classifier in CLI Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * use different batch size in each model Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * remove batch size from CLI Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * cleanup imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-02-05 11:46:01 +01:00 · 2025-02-05 11:46:01 +01:00 · 5ad6de0560
commit 5ad6de0560
parent 17448163e7
4 changed files with 13 additions and 1 deletions
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@ -219,6 +219,13 @@ def convert(
        bool,
        typer.Option(..., help="Enable the formula enrichment model in the pipeline."),
    ] = False,
+    enrich_picture_classes: Annotated[
+        bool,
+        typer.Option(
+            ...,
+            help="Enable the picture classification enrichment model in the pipeline.",
+        ),
+    ] = False,
    artifacts_path: Annotated[
        Optional[Path],
        typer.Option(..., help="If provided, the location of the model artifacts."),
@ -375,6 +382,7 @@ def convert(
            do_table_structure=True,
            do_code_enrichment=enrich_code,
            do_formula_enrichment=enrich_formula,
+            do_picture_classification=enrich_picture_classes,
            document_timeout=document_timeout,
        )
        pipeline_options.table_structure_options.do_cell_matching = (
--- a/docling/models/base_model.py
+++ b/docling/models/base_model.py
@ -6,6 +6,7 @@ from typing_extensions import TypeVar

 from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page
 from docling.datamodel.document import ConversionResult
+from docling.datamodel.settings import settings


 class BasePageModel(ABC):
@ -21,6 +22,8 @@ EnrichElementT = TypeVar("EnrichElementT", default=NodeItem)

 class GenericEnrichmentModel(ABC, Generic[EnrichElementT]):

+    elements_batch_size: int = settings.perf.elements_batch_size
+
    @abstractmethod
    def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
        pass
--- a/docling/models/code_formula_model.py
+++ b/docling/models/code_formula_model.py
@ -61,6 +61,7 @@ class CodeFormulaModel(BaseItemAndImageEnrichmentModel):
        Processes the given batch of elements and enriches them with predictions.
    """

+    elements_batch_size = 5
    images_scale = 1.66  # = 120 dpi, aligned with training data resolution
    expansion_factor = 0.03

--- a/docling/pipeline/base_pipeline.py
+++ b/docling/pipeline/base_pipeline.py
@ -79,7 +79,7 @@ class BasePipeline(ABC):
            for model in self.enrichment_pipe:
                for element_batch in chunkify(
                    _prepare_elements(conv_res, model),
-                    settings.perf.elements_batch_size,
+                    model.elements_batch_size,
                ):
                    for element in model(
                        doc=conv_res.document, element_batch=element_batch