feat: Introduce LayoutOptions to control layout postprocessing behaviour (#1870)

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-07-04 15:36:13 +02:00
committed by GitHub
parent 598c9c53d4
commit ec6cf6f7e8
4 changed files with 24 additions and 4 deletions

View File

@@ -12,6 +12,7 @@ from PIL import Image
from docling.datamodel.accelerator_options import AcceleratorOptions
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import LayoutOptions
from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel
from docling.models.utils.hf_model_download import download_hf_model
@@ -48,10 +49,15 @@ class LayoutModel(BasePageModel):
CONTAINER_LABELS = [DocItemLabel.FORM, DocItemLabel.KEY_VALUE_REGION]
def __init__(
self, artifacts_path: Optional[Path], accelerator_options: AcceleratorOptions
self,
artifacts_path: Optional[Path],
accelerator_options: AcceleratorOptions,
options: LayoutOptions,
):
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
self.options = options
device = decide_device(accelerator_options.device)
if artifacts_path is None:
@@ -177,7 +183,7 @@ class LayoutModel(BasePageModel):
# Apply postprocessing
processed_clusters, processed_cells = LayoutPostprocessor(
page, clusters
page, clusters, self.options
).postprocess()
# Note: LayoutPostprocessor updates page.cells and page.parsed_page internally