fix: fix ZeroDivisionError for cell_bbox.area() (#1636)

fix ZeroDivisionError for cell_bbox.area()

Signed-off-by: Saidgurbuz <said.gurbuz@epfl.ch>
This commit is contained in:
Said Gürbüz 2025-05-22 13:43:33 +02:00 committed by GitHub
parent 45265bf8b1
commit c2f595d283
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 13 additions and 25 deletions

View File

@ -60,7 +60,7 @@ class DoclingParsePageBackend(PdfPageBackend):
coord_origin=CoordOrigin.BOTTOMLEFT,
).to_top_left_origin(page_height=page_size.height * scale)
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area()
overlap_frac = cell_bbox.intersection_over_self(bbox)
if overlap_frac > 0.5:
if len(text_piece) > 0:

View File

@ -71,7 +71,7 @@ class DoclingParseV2PageBackend(PdfPageBackend):
coord_origin=CoordOrigin.BOTTOMLEFT,
).to_top_left_origin(page_height=page_size.height * scale)
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area()
overlap_frac = cell_bbox.intersection_over_self(bbox)
if overlap_frac > 0.5:
if len(text_piece) > 0:

View File

@ -46,7 +46,7 @@ class DoclingParseV4PageBackend(PdfPageBackend):
.scaled(scale)
)
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area()
overlap_frac = cell_bbox.intersection_over_self(bbox)
if overlap_frac > 0.5:
if len(text_piece) > 0:

View File

@ -90,17 +90,12 @@ class SpatialClusterIndex:
containment_threshold: float,
) -> bool:
"""Check if two bboxes overlap sufficiently."""
area1, area2 = bbox1.area(), bbox2.area()
if area1 <= 0 or area2 <= 0:
if bbox1.area() <= 0 or bbox2.area() <= 0:
return False
overlap_area = bbox1.intersection_area_with(bbox2)
if overlap_area <= 0:
return False
iou = overlap_area / (area1 + area2 - overlap_area)
containment1 = overlap_area / area1
containment2 = overlap_area / area2
iou = bbox1.intersection_over_union(bbox2)
containment1 = bbox1.intersection_over_self(bbox2)
containment2 = bbox2.intersection_over_self(bbox1)
return (
iou > overlap_threshold
@ -321,11 +316,9 @@ class LayoutPostprocessor:
for special in special_clusters:
contained = []
for cluster in self.regular_clusters:
overlap = cluster.bbox.intersection_area_with(special.bbox)
if overlap > 0:
containment = overlap / cluster.bbox.area()
if containment > 0.8:
contained.append(cluster)
containment = cluster.bbox.intersection_over_self(special.bbox)
if containment > 0.8:
contained.append(cluster)
if contained:
# Sort contained clusters by minimum cell ID:
@ -379,9 +372,7 @@ class LayoutPostprocessor:
for regular in self.regular_clusters:
if regular.label == DocItemLabel.TABLE:
# Calculate overlap
overlap = regular.bbox.intersection_area_with(wrapper.bbox)
wrapper_area = wrapper.bbox.area()
overlap_ratio = overlap / wrapper_area
overlap_ratio = wrapper.bbox.intersection_over_self(regular.bbox)
conf_diff = wrapper.confidence - regular.confidence
@ -421,8 +412,7 @@ class LayoutPostprocessor:
# Rule 2: CODE vs others
if candidate.label == DocItemLabel.CODE:
# Calculate how much of the other cluster is contained within the CODE cluster
overlap = other.bbox.intersection_area_with(candidate.bbox)
containment = overlap / other.bbox.area()
containment = other.bbox.intersection_over_self(candidate.bbox)
if containment > 0.8: # other is 80% contained within CODE
return True
@ -586,11 +576,9 @@ class LayoutPostprocessor:
if cell.rect.to_bounding_box().area() <= 0:
continue
overlap = cell.rect.to_bounding_box().intersection_area_with(
overlap_ratio = cell.rect.to_bounding_box().intersection_over_self(
cluster.bbox
)
overlap_ratio = overlap / cell.rect.to_bounding_box().area()
if overlap_ratio > best_overlap:
best_overlap = overlap_ratio
best_cluster = cluster