fix: fix ZeroDivisionError for cell_bbox.area() (#1636)

fix ZeroDivisionError for cell_bbox.area()

Signed-off-by: Saidgurbuz <said.gurbuz@epfl.ch>
This commit is contained in:
Said Gürbüz 2025-05-22 13:43:33 +02:00 committed by GitHub
parent 45265bf8b1
commit c2f595d283
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 13 additions and 25 deletions

View File

@ -60,7 +60,7 @@ class DoclingParsePageBackend(PdfPageBackend):
coord_origin=CoordOrigin.BOTTOMLEFT, coord_origin=CoordOrigin.BOTTOMLEFT,
).to_top_left_origin(page_height=page_size.height * scale) ).to_top_left_origin(page_height=page_size.height * scale)
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area() overlap_frac = cell_bbox.intersection_over_self(bbox)
if overlap_frac > 0.5: if overlap_frac > 0.5:
if len(text_piece) > 0: if len(text_piece) > 0:

View File

@ -71,7 +71,7 @@ class DoclingParseV2PageBackend(PdfPageBackend):
coord_origin=CoordOrigin.BOTTOMLEFT, coord_origin=CoordOrigin.BOTTOMLEFT,
).to_top_left_origin(page_height=page_size.height * scale) ).to_top_left_origin(page_height=page_size.height * scale)
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area() overlap_frac = cell_bbox.intersection_over_self(bbox)
if overlap_frac > 0.5: if overlap_frac > 0.5:
if len(text_piece) > 0: if len(text_piece) > 0:

View File

@ -46,7 +46,7 @@ class DoclingParseV4PageBackend(PdfPageBackend):
.scaled(scale) .scaled(scale)
) )
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area() overlap_frac = cell_bbox.intersection_over_self(bbox)
if overlap_frac > 0.5: if overlap_frac > 0.5:
if len(text_piece) > 0: if len(text_piece) > 0:

View File

@ -90,17 +90,12 @@ class SpatialClusterIndex:
containment_threshold: float, containment_threshold: float,
) -> bool: ) -> bool:
"""Check if two bboxes overlap sufficiently.""" """Check if two bboxes overlap sufficiently."""
area1, area2 = bbox1.area(), bbox2.area() if bbox1.area() <= 0 or bbox2.area() <= 0:
if area1 <= 0 or area2 <= 0:
return False return False
overlap_area = bbox1.intersection_area_with(bbox2) iou = bbox1.intersection_over_union(bbox2)
if overlap_area <= 0: containment1 = bbox1.intersection_over_self(bbox2)
return False containment2 = bbox2.intersection_over_self(bbox1)
iou = overlap_area / (area1 + area2 - overlap_area)
containment1 = overlap_area / area1
containment2 = overlap_area / area2
return ( return (
iou > overlap_threshold iou > overlap_threshold
@ -321,11 +316,9 @@ class LayoutPostprocessor:
for special in special_clusters: for special in special_clusters:
contained = [] contained = []
for cluster in self.regular_clusters: for cluster in self.regular_clusters:
overlap = cluster.bbox.intersection_area_with(special.bbox) containment = cluster.bbox.intersection_over_self(special.bbox)
if overlap > 0: if containment > 0.8:
containment = overlap / cluster.bbox.area() contained.append(cluster)
if containment > 0.8:
contained.append(cluster)
if contained: if contained:
# Sort contained clusters by minimum cell ID: # Sort contained clusters by minimum cell ID:
@ -379,9 +372,7 @@ class LayoutPostprocessor:
for regular in self.regular_clusters: for regular in self.regular_clusters:
if regular.label == DocItemLabel.TABLE: if regular.label == DocItemLabel.TABLE:
# Calculate overlap # Calculate overlap
overlap = regular.bbox.intersection_area_with(wrapper.bbox) overlap_ratio = wrapper.bbox.intersection_over_self(regular.bbox)
wrapper_area = wrapper.bbox.area()
overlap_ratio = overlap / wrapper_area
conf_diff = wrapper.confidence - regular.confidence conf_diff = wrapper.confidence - regular.confidence
@ -421,8 +412,7 @@ class LayoutPostprocessor:
# Rule 2: CODE vs others # Rule 2: CODE vs others
if candidate.label == DocItemLabel.CODE: if candidate.label == DocItemLabel.CODE:
# Calculate how much of the other cluster is contained within the CODE cluster # Calculate how much of the other cluster is contained within the CODE cluster
overlap = other.bbox.intersection_area_with(candidate.bbox) containment = other.bbox.intersection_over_self(candidate.bbox)
containment = overlap / other.bbox.area()
if containment > 0.8: # other is 80% contained within CODE if containment > 0.8: # other is 80% contained within CODE
return True return True
@ -586,11 +576,9 @@ class LayoutPostprocessor:
if cell.rect.to_bounding_box().area() <= 0: if cell.rect.to_bounding_box().area() <= 0:
continue continue
overlap = cell.rect.to_bounding_box().intersection_area_with( overlap_ratio = cell.rect.to_bounding_box().intersection_over_self(
cluster.bbox cluster.bbox
) )
overlap_ratio = overlap / cell.rect.to_bounding_box().area()
if overlap_ratio > best_overlap: if overlap_ratio > best_overlap:
best_overlap = overlap_ratio best_overlap = overlap_ratio
best_cluster = cluster best_cluster = cluster