fix(pypdfium): resolve overlapping text when merging bounding boxes (#1549)

get merged_text from boundingbox instead of merging it to prevent overlaps

Signed-off-by: Pedro Ribeiro <pedro_ribeiro_93@hotmail.com>
This commit is contained in:
Pedro Ribeiro
2025-05-19 14:26:00 +01:00
committed by GitHub
parent 12a0e64892
commit 98b5eeb844
52 changed files with 52225 additions and 4690 deletions

View File

@@ -5951,7 +5951,7 @@
"b": 465.596681609368,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9393879771232605,
"confidence": 0.93938809633255,
"cells": [
{
"index": 77,
@@ -7406,7 +7406,7 @@
"b": 534.1167018462124,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5769620537757874,
"confidence": 0.5769621729850769,
"cells": [
{
"index": 134,
@@ -8046,7 +8046,7 @@
"b": 650.6431884765625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6444889903068542,
"confidence": 0.6444893479347229,
"cells": [],
"children": []
}
@@ -10042,7 +10042,7 @@
"b": 465.596681609368,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9393879771232605,
"confidence": 0.93938809633255,
"cells": [
{
"index": 77,
@@ -11509,7 +11509,7 @@
"b": 534.1167018462124,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5769620537757874,
"confidence": 0.5769621729850769,
"cells": [
{
"index": 134,
@@ -12155,7 +12155,7 @@
"b": 650.6431884765625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6444889903068542,
"confidence": 0.6444893479347229,
"cells": [],
"children": []
},
@@ -14148,7 +14148,7 @@
"b": 465.596681609368,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9393879771232605,
"confidence": 0.93938809633255,
"cells": [
{
"index": 77,
@@ -15615,7 +15615,7 @@
"b": 534.1167018462124,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5769620537757874,
"confidence": 0.5769621729850769,
"cells": [
{
"index": 134,
@@ -16261,7 +16261,7 @@
"b": 650.6431884765625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6444889903068542,
"confidence": 0.6444893479347229,
"cells": [],
"children": []
},