refactor: allow the usage of backends in the enrich models and generalize the interface (#742)

* fix get image with cropbox

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* allow the usage of backends in the enrich models and generalize the interface

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* move logic in BaseTextImageEnrichmentModel

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* renaming

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi
2025-01-15 09:52:38 +01:00
committed by GitHub
parent f7e1cbf629
commit 57fc28d3d8
9 changed files with 208 additions and 38 deletions

View File

@@ -1,7 +1,7 @@
import logging
import sys
from pathlib import Path
from typing import Optional
from typing import Iterable, Optional
from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
@@ -17,6 +17,7 @@ from docling.datamodel.pipeline_options import (
TesseractCliOcrOptions,
TesseractOcrOptions,
)
from docling.models.base_model import BasePageModel
from docling.models.base_ocr_model import BaseOcrModel
from docling.models.ds_glm_model import GlmModel, GlmOptions
from docling.models.easyocr_model import EasyOcrModel
@@ -50,7 +51,7 @@ class StandardPdfPipeline(PaginatedPipeline):
else:
self.artifacts_path = Path(pipeline_options.artifacts_path)
keep_images = (
self.keep_images = (
self.pipeline_options.generate_page_images
or self.pipeline_options.generate_picture_images
or self.pipeline_options.generate_table_images
@@ -87,7 +88,7 @@ class StandardPdfPipeline(PaginatedPipeline):
accelerator_options=pipeline_options.accelerator_options,
),
# Page assemble
PageAssembleModel(options=PageAssembleOptions(keep_images=keep_images)),
PageAssembleModel(options=PageAssembleOptions()),
]
self.enrichment_pipe = [