feat: Introduce LayoutOptions to control layout postprocessing behaviour (#1870)

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-07-04 15:36:13 +02:00 committed by GitHub
parent 598c9c53d4
commit ec6cf6f7e8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 24 additions and 4 deletions

View File

@ -1,4 +1,5 @@
import logging import logging
from datetime import datetime
from enum import Enum from enum import Enum
from pathlib import Path from pathlib import Path
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
@ -265,6 +266,12 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
) )
class LayoutOptions(BaseModel):
"""Options for layout processing."""
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
class AsrPipelineOptions(PipelineOptions): class AsrPipelineOptions(PipelineOptions):
asr_options: Union[InlineAsrOptions] = asr_model_specs.WHISPER_TINY asr_options: Union[InlineAsrOptions] = asr_model_specs.WHISPER_TINY
artifacts_path: Optional[Union[Path, str]] = None artifacts_path: Optional[Union[Path, str]] = None
@ -289,6 +296,7 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
picture_description_options: PictureDescriptionBaseOptions = ( picture_description_options: PictureDescriptionBaseOptions = (
smolvlm_picture_description smolvlm_picture_description
) )
layout_options: LayoutOptions = LayoutOptions()
images_scale: float = 1.0 images_scale: float = 1.0
generate_page_images: bool = False generate_page_images: bool = False

View File

@ -12,6 +12,7 @@ from PIL import Image
from docling.datamodel.accelerator_options import AcceleratorOptions from docling.datamodel.accelerator_options import AcceleratorOptions
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
from docling.datamodel.document import ConversionResult from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import LayoutOptions
from docling.datamodel.settings import settings from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel from docling.models.base_model import BasePageModel
from docling.models.utils.hf_model_download import download_hf_model from docling.models.utils.hf_model_download import download_hf_model
@ -48,10 +49,15 @@ class LayoutModel(BasePageModel):
CONTAINER_LABELS = [DocItemLabel.FORM, DocItemLabel.KEY_VALUE_REGION] CONTAINER_LABELS = [DocItemLabel.FORM, DocItemLabel.KEY_VALUE_REGION]
def __init__( def __init__(
self, artifacts_path: Optional[Path], accelerator_options: AcceleratorOptions self,
artifacts_path: Optional[Path],
accelerator_options: AcceleratorOptions,
options: LayoutOptions,
): ):
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
self.options = options
device = decide_device(accelerator_options.device) device = decide_device(accelerator_options.device)
if artifacts_path is None: if artifacts_path is None:
@ -177,7 +183,7 @@ class LayoutModel(BasePageModel):
# Apply postprocessing # Apply postprocessing
processed_clusters, processed_cells = LayoutPostprocessor( processed_clusters, processed_cells = LayoutPostprocessor(
page, clusters page, clusters, self.options
).postprocess() ).postprocess()
# Note: LayoutPostprocessor updates page.cells and page.parsed_page internally # Note: LayoutPostprocessor updates page.cells and page.parsed_page internally

View File

@ -80,6 +80,7 @@ class StandardPdfPipeline(PaginatedPipeline):
LayoutModel( LayoutModel(
artifacts_path=artifacts_path, artifacts_path=artifacts_path,
accelerator_options=pipeline_options.accelerator_options, accelerator_options=pipeline_options.accelerator_options,
options=pipeline_options.layout_options,
), ),
# Table structure model # Table structure model
TableStructureModel( TableStructureModel(

View File

@ -9,6 +9,7 @@ from docling_core.types.doc.page import TextCell
from rtree import index from rtree import index
from docling.datamodel.base_models import BoundingBox, Cluster, Page from docling.datamodel.base_models import BoundingBox, Cluster, Page
from docling.datamodel.pipeline_options import LayoutOptions
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -194,12 +195,16 @@ class LayoutPostprocessor:
DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER, DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
} }
def __init__(self, page: Page, clusters: List[Cluster]) -> None: def __init__(
self, page: Page, clusters: List[Cluster], options: LayoutOptions
) -> None:
"""Initialize processor with page and clusters.""" """Initialize processor with page and clusters."""
self.cells = page.cells self.cells = page.cells
self.page = page self.page = page
self.page_size = page.size self.page_size = page.size
self.all_clusters = clusters self.all_clusters = clusters
self.options = options
self.regular_clusters = [ self.regular_clusters = [
c for c in clusters if c.label not in self.SPECIAL_TYPES c for c in clusters if c.label not in self.SPECIAL_TYPES
] ]
@ -267,7 +272,7 @@ class LayoutPostprocessor:
# Handle orphaned cells # Handle orphaned cells
unassigned = self._find_unassigned_cells(clusters) unassigned = self._find_unassigned_cells(clusters)
if unassigned: if unassigned and self.options.create_orphan_clusters:
next_id = max((c.id for c in self.all_clusters), default=0) + 1 next_id = max((c.id for c in self.all_clusters), default=0) + 1
orphan_clusters = [] orphan_clusters = []
for i, cell in enumerate(unassigned): for i, cell in enumerate(unassigned):