feat: Introduce LayoutOptions to control layout postprocessing behaviour (#1870)
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
598c9c53d4
commit
ec6cf6f7e8
@ -1,4 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
|
from typing import Any, ClassVar, Dict, List, Literal, Optional, Union
|
||||||
@ -265,6 +266,12 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class LayoutOptions(BaseModel):
|
||||||
|
"""Options for layout processing."""
|
||||||
|
|
||||||
|
create_orphan_clusters: bool = True # Whether to create clusters for orphaned cells
|
||||||
|
|
||||||
|
|
||||||
class AsrPipelineOptions(PipelineOptions):
|
class AsrPipelineOptions(PipelineOptions):
|
||||||
asr_options: Union[InlineAsrOptions] = asr_model_specs.WHISPER_TINY
|
asr_options: Union[InlineAsrOptions] = asr_model_specs.WHISPER_TINY
|
||||||
artifacts_path: Optional[Union[Path, str]] = None
|
artifacts_path: Optional[Union[Path, str]] = None
|
||||||
@ -289,6 +296,7 @@ class PdfPipelineOptions(PaginatedPipelineOptions):
|
|||||||
picture_description_options: PictureDescriptionBaseOptions = (
|
picture_description_options: PictureDescriptionBaseOptions = (
|
||||||
smolvlm_picture_description
|
smolvlm_picture_description
|
||||||
)
|
)
|
||||||
|
layout_options: LayoutOptions = LayoutOptions()
|
||||||
|
|
||||||
images_scale: float = 1.0
|
images_scale: float = 1.0
|
||||||
generate_page_images: bool = False
|
generate_page_images: bool = False
|
||||||
|
@ -12,6 +12,7 @@ from PIL import Image
|
|||||||
from docling.datamodel.accelerator_options import AcceleratorOptions
|
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||||
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
|
from docling.datamodel.base_models import BoundingBox, Cluster, LayoutPrediction, Page
|
||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
|
from docling.datamodel.pipeline_options import LayoutOptions
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.base_model import BasePageModel
|
from docling.models.base_model import BasePageModel
|
||||||
from docling.models.utils.hf_model_download import download_hf_model
|
from docling.models.utils.hf_model_download import download_hf_model
|
||||||
@ -48,10 +49,15 @@ class LayoutModel(BasePageModel):
|
|||||||
CONTAINER_LABELS = [DocItemLabel.FORM, DocItemLabel.KEY_VALUE_REGION]
|
CONTAINER_LABELS = [DocItemLabel.FORM, DocItemLabel.KEY_VALUE_REGION]
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, artifacts_path: Optional[Path], accelerator_options: AcceleratorOptions
|
self,
|
||||||
|
artifacts_path: Optional[Path],
|
||||||
|
accelerator_options: AcceleratorOptions,
|
||||||
|
options: LayoutOptions,
|
||||||
):
|
):
|
||||||
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
||||||
|
|
||||||
|
self.options = options
|
||||||
|
|
||||||
device = decide_device(accelerator_options.device)
|
device = decide_device(accelerator_options.device)
|
||||||
|
|
||||||
if artifacts_path is None:
|
if artifacts_path is None:
|
||||||
@ -177,7 +183,7 @@ class LayoutModel(BasePageModel):
|
|||||||
# Apply postprocessing
|
# Apply postprocessing
|
||||||
|
|
||||||
processed_clusters, processed_cells = LayoutPostprocessor(
|
processed_clusters, processed_cells = LayoutPostprocessor(
|
||||||
page, clusters
|
page, clusters, self.options
|
||||||
).postprocess()
|
).postprocess()
|
||||||
# Note: LayoutPostprocessor updates page.cells and page.parsed_page internally
|
# Note: LayoutPostprocessor updates page.cells and page.parsed_page internally
|
||||||
|
|
||||||
|
@ -80,6 +80,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
LayoutModel(
|
LayoutModel(
|
||||||
artifacts_path=artifacts_path,
|
artifacts_path=artifacts_path,
|
||||||
accelerator_options=pipeline_options.accelerator_options,
|
accelerator_options=pipeline_options.accelerator_options,
|
||||||
|
options=pipeline_options.layout_options,
|
||||||
),
|
),
|
||||||
# Table structure model
|
# Table structure model
|
||||||
TableStructureModel(
|
TableStructureModel(
|
||||||
|
@ -9,6 +9,7 @@ from docling_core.types.doc.page import TextCell
|
|||||||
from rtree import index
|
from rtree import index
|
||||||
|
|
||||||
from docling.datamodel.base_models import BoundingBox, Cluster, Page
|
from docling.datamodel.base_models import BoundingBox, Cluster, Page
|
||||||
|
from docling.datamodel.pipeline_options import LayoutOptions
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -194,12 +195,16 @@ class LayoutPostprocessor:
|
|||||||
DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
|
DocItemLabel.TITLE: DocItemLabel.SECTION_HEADER,
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, page: Page, clusters: List[Cluster]) -> None:
|
def __init__(
|
||||||
|
self, page: Page, clusters: List[Cluster], options: LayoutOptions
|
||||||
|
) -> None:
|
||||||
"""Initialize processor with page and clusters."""
|
"""Initialize processor with page and clusters."""
|
||||||
|
|
||||||
self.cells = page.cells
|
self.cells = page.cells
|
||||||
self.page = page
|
self.page = page
|
||||||
self.page_size = page.size
|
self.page_size = page.size
|
||||||
self.all_clusters = clusters
|
self.all_clusters = clusters
|
||||||
|
self.options = options
|
||||||
self.regular_clusters = [
|
self.regular_clusters = [
|
||||||
c for c in clusters if c.label not in self.SPECIAL_TYPES
|
c for c in clusters if c.label not in self.SPECIAL_TYPES
|
||||||
]
|
]
|
||||||
@ -267,7 +272,7 @@ class LayoutPostprocessor:
|
|||||||
|
|
||||||
# Handle orphaned cells
|
# Handle orphaned cells
|
||||||
unassigned = self._find_unassigned_cells(clusters)
|
unassigned = self._find_unassigned_cells(clusters)
|
||||||
if unassigned:
|
if unassigned and self.options.create_orphan_clusters:
|
||||||
next_id = max((c.id for c in self.all_clusters), default=0) + 1
|
next_id = max((c.id for c in self.all_clusters), default=0) + 1
|
||||||
orphan_clusters = []
|
orphan_clusters = []
|
||||||
for i, cell in enumerate(unassigned):
|
for i, cell in enumerate(unassigned):
|
||||||
|
Loading…
Reference in New Issue
Block a user