diff --git a/docling/backend/docling_parse_backend.py b/docling/backend/docling_parse_backend.py index 33e7792..2846981 100644 --- a/docling/backend/docling_parse_backend.py +++ b/docling/backend/docling_parse_backend.py @@ -60,7 +60,7 @@ class DoclingParsePageBackend(PdfPageBackend): coord_origin=CoordOrigin.BOTTOMLEFT, ).to_top_left_origin(page_height=page_size.height * scale) - overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area() + overlap_frac = cell_bbox.intersection_over_self(bbox) if overlap_frac > 0.5: if len(text_piece) > 0: diff --git a/docling/backend/docling_parse_v2_backend.py b/docling/backend/docling_parse_v2_backend.py index 6c12b66..2d83dc2 100644 --- a/docling/backend/docling_parse_v2_backend.py +++ b/docling/backend/docling_parse_v2_backend.py @@ -71,7 +71,7 @@ class DoclingParseV2PageBackend(PdfPageBackend): coord_origin=CoordOrigin.BOTTOMLEFT, ).to_top_left_origin(page_height=page_size.height * scale) - overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area() + overlap_frac = cell_bbox.intersection_over_self(bbox) if overlap_frac > 0.5: if len(text_piece) > 0: diff --git a/docling/backend/docling_parse_v4_backend.py b/docling/backend/docling_parse_v4_backend.py index 3e59f12..a95aceb 100644 --- a/docling/backend/docling_parse_v4_backend.py +++ b/docling/backend/docling_parse_v4_backend.py @@ -46,7 +46,7 @@ class DoclingParseV4PageBackend(PdfPageBackend): .scaled(scale) ) - overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area() + overlap_frac = cell_bbox.intersection_over_self(bbox) if overlap_frac > 0.5: if len(text_piece) > 0: diff --git a/docling/utils/layout_postprocessor.py b/docling/utils/layout_postprocessor.py index 4c25655..27e9ea2 100644 --- a/docling/utils/layout_postprocessor.py +++ b/docling/utils/layout_postprocessor.py @@ -90,17 +90,12 @@ class SpatialClusterIndex: containment_threshold: float, ) -> bool: """Check if two bboxes overlap sufficiently.""" - area1, area2 = bbox1.area(), bbox2.area() - if area1 <= 0 or area2 <= 0: + if bbox1.area() <= 0 or bbox2.area() <= 0: return False - overlap_area = bbox1.intersection_area_with(bbox2) - if overlap_area <= 0: - return False - - iou = overlap_area / (area1 + area2 - overlap_area) - containment1 = overlap_area / area1 - containment2 = overlap_area / area2 + iou = bbox1.intersection_over_union(bbox2) + containment1 = bbox1.intersection_over_self(bbox2) + containment2 = bbox2.intersection_over_self(bbox1) return ( iou > overlap_threshold @@ -321,11 +316,9 @@ class LayoutPostprocessor: for special in special_clusters: contained = [] for cluster in self.regular_clusters: - overlap = cluster.bbox.intersection_area_with(special.bbox) - if overlap > 0: - containment = overlap / cluster.bbox.area() - if containment > 0.8: - contained.append(cluster) + containment = cluster.bbox.intersection_over_self(special.bbox) + if containment > 0.8: + contained.append(cluster) if contained: # Sort contained clusters by minimum cell ID: @@ -379,9 +372,7 @@ class LayoutPostprocessor: for regular in self.regular_clusters: if regular.label == DocItemLabel.TABLE: # Calculate overlap - overlap = regular.bbox.intersection_area_with(wrapper.bbox) - wrapper_area = wrapper.bbox.area() - overlap_ratio = overlap / wrapper_area + overlap_ratio = wrapper.bbox.intersection_over_self(regular.bbox) conf_diff = wrapper.confidence - regular.confidence @@ -421,8 +412,7 @@ class LayoutPostprocessor: # Rule 2: CODE vs others if candidate.label == DocItemLabel.CODE: # Calculate how much of the other cluster is contained within the CODE cluster - overlap = other.bbox.intersection_area_with(candidate.bbox) - containment = overlap / other.bbox.area() + containment = other.bbox.intersection_over_self(candidate.bbox) if containment > 0.8: # other is 80% contained within CODE return True @@ -586,11 +576,9 @@ class LayoutPostprocessor: if cell.rect.to_bounding_box().area() <= 0: continue - overlap = cell.rect.to_bounding_box().intersection_area_with( + overlap_ratio = cell.rect.to_bounding_box().intersection_over_self( cluster.bbox ) - overlap_ratio = overlap / cell.rect.to_bounding_box().area() - if overlap_ratio > best_overlap: best_overlap = overlap_ratio best_cluster = cluster