fix: fix ZeroDivisionError for cell_bbox.area() (#1636)
fix ZeroDivisionError for cell_bbox.area() Signed-off-by: Saidgurbuz <said.gurbuz@epfl.ch>
This commit is contained in:
parent
45265bf8b1
commit
c2f595d283
@ -60,7 +60,7 @@ class DoclingParsePageBackend(PdfPageBackend):
|
|||||||
coord_origin=CoordOrigin.BOTTOMLEFT,
|
coord_origin=CoordOrigin.BOTTOMLEFT,
|
||||||
).to_top_left_origin(page_height=page_size.height * scale)
|
).to_top_left_origin(page_height=page_size.height * scale)
|
||||||
|
|
||||||
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area()
|
overlap_frac = cell_bbox.intersection_over_self(bbox)
|
||||||
|
|
||||||
if overlap_frac > 0.5:
|
if overlap_frac > 0.5:
|
||||||
if len(text_piece) > 0:
|
if len(text_piece) > 0:
|
||||||
|
@ -71,7 +71,7 @@ class DoclingParseV2PageBackend(PdfPageBackend):
|
|||||||
coord_origin=CoordOrigin.BOTTOMLEFT,
|
coord_origin=CoordOrigin.BOTTOMLEFT,
|
||||||
).to_top_left_origin(page_height=page_size.height * scale)
|
).to_top_left_origin(page_height=page_size.height * scale)
|
||||||
|
|
||||||
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area()
|
overlap_frac = cell_bbox.intersection_over_self(bbox)
|
||||||
|
|
||||||
if overlap_frac > 0.5:
|
if overlap_frac > 0.5:
|
||||||
if len(text_piece) > 0:
|
if len(text_piece) > 0:
|
||||||
|
@ -46,7 +46,7 @@ class DoclingParseV4PageBackend(PdfPageBackend):
|
|||||||
.scaled(scale)
|
.scaled(scale)
|
||||||
)
|
)
|
||||||
|
|
||||||
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area()
|
overlap_frac = cell_bbox.intersection_over_self(bbox)
|
||||||
|
|
||||||
if overlap_frac > 0.5:
|
if overlap_frac > 0.5:
|
||||||
if len(text_piece) > 0:
|
if len(text_piece) > 0:
|
||||||
|
@ -90,17 +90,12 @@ class SpatialClusterIndex:
|
|||||||
containment_threshold: float,
|
containment_threshold: float,
|
||||||
) -> bool:
|
) -> bool:
|
||||||
"""Check if two bboxes overlap sufficiently."""
|
"""Check if two bboxes overlap sufficiently."""
|
||||||
area1, area2 = bbox1.area(), bbox2.area()
|
if bbox1.area() <= 0 or bbox2.area() <= 0:
|
||||||
if area1 <= 0 or area2 <= 0:
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
overlap_area = bbox1.intersection_area_with(bbox2)
|
iou = bbox1.intersection_over_union(bbox2)
|
||||||
if overlap_area <= 0:
|
containment1 = bbox1.intersection_over_self(bbox2)
|
||||||
return False
|
containment2 = bbox2.intersection_over_self(bbox1)
|
||||||
|
|
||||||
iou = overlap_area / (area1 + area2 - overlap_area)
|
|
||||||
containment1 = overlap_area / area1
|
|
||||||
containment2 = overlap_area / area2
|
|
||||||
|
|
||||||
return (
|
return (
|
||||||
iou > overlap_threshold
|
iou > overlap_threshold
|
||||||
@ -321,11 +316,9 @@ class LayoutPostprocessor:
|
|||||||
for special in special_clusters:
|
for special in special_clusters:
|
||||||
contained = []
|
contained = []
|
||||||
for cluster in self.regular_clusters:
|
for cluster in self.regular_clusters:
|
||||||
overlap = cluster.bbox.intersection_area_with(special.bbox)
|
containment = cluster.bbox.intersection_over_self(special.bbox)
|
||||||
if overlap > 0:
|
if containment > 0.8:
|
||||||
containment = overlap / cluster.bbox.area()
|
contained.append(cluster)
|
||||||
if containment > 0.8:
|
|
||||||
contained.append(cluster)
|
|
||||||
|
|
||||||
if contained:
|
if contained:
|
||||||
# Sort contained clusters by minimum cell ID:
|
# Sort contained clusters by minimum cell ID:
|
||||||
@ -379,9 +372,7 @@ class LayoutPostprocessor:
|
|||||||
for regular in self.regular_clusters:
|
for regular in self.regular_clusters:
|
||||||
if regular.label == DocItemLabel.TABLE:
|
if regular.label == DocItemLabel.TABLE:
|
||||||
# Calculate overlap
|
# Calculate overlap
|
||||||
overlap = regular.bbox.intersection_area_with(wrapper.bbox)
|
overlap_ratio = wrapper.bbox.intersection_over_self(regular.bbox)
|
||||||
wrapper_area = wrapper.bbox.area()
|
|
||||||
overlap_ratio = overlap / wrapper_area
|
|
||||||
|
|
||||||
conf_diff = wrapper.confidence - regular.confidence
|
conf_diff = wrapper.confidence - regular.confidence
|
||||||
|
|
||||||
@ -421,8 +412,7 @@ class LayoutPostprocessor:
|
|||||||
# Rule 2: CODE vs others
|
# Rule 2: CODE vs others
|
||||||
if candidate.label == DocItemLabel.CODE:
|
if candidate.label == DocItemLabel.CODE:
|
||||||
# Calculate how much of the other cluster is contained within the CODE cluster
|
# Calculate how much of the other cluster is contained within the CODE cluster
|
||||||
overlap = other.bbox.intersection_area_with(candidate.bbox)
|
containment = other.bbox.intersection_over_self(candidate.bbox)
|
||||||
containment = overlap / other.bbox.area()
|
|
||||||
if containment > 0.8: # other is 80% contained within CODE
|
if containment > 0.8: # other is 80% contained within CODE
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -586,11 +576,9 @@ class LayoutPostprocessor:
|
|||||||
if cell.rect.to_bounding_box().area() <= 0:
|
if cell.rect.to_bounding_box().area() <= 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
overlap = cell.rect.to_bounding_box().intersection_area_with(
|
overlap_ratio = cell.rect.to_bounding_box().intersection_over_self(
|
||||||
cluster.bbox
|
cluster.bbox
|
||||||
)
|
)
|
||||||
overlap_ratio = overlap / cell.rect.to_bounding_box().area()
|
|
||||||
|
|
||||||
if overlap_ratio > best_overlap:
|
if overlap_ratio > best_overlap:
|
||||||
best_overlap = overlap_ratio
|
best_overlap = overlap_ratio
|
||||||
best_cluster = cluster
|
best_cluster = cluster
|
||||||
|
Loading…
Reference in New Issue
Block a user