fix(pypdfium): resolve overlapping text when merging bounding boxes (#1549)

get merged_text from boundingbox instead of merging it to prevent overlaps

Signed-off-by: Pedro Ribeiro <pedro_ribeiro_93@hotmail.com>
This commit is contained in:
Pedro Ribeiro
2025-05-19 14:26:00 +01:00
committed by GitHub
parent 12a0e64892
commit 98b5eeb844
52 changed files with 52225 additions and 4690 deletions

View File

@@ -14942,9 +14942,9 @@
"page_no": 2,
"bbox": {
"l": 148.45364379882812,
"t": 583.6257629394531,
"t": 583.6257476806641,
"r": 464.3608093261719,
"b": 366.1537780761719,
"b": 366.1538391113281,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@@ -15221,9 +15221,9 @@
{
"page_no": 7,
"bbox": {
"l": 164.65028381347656,
"l": 164.6503143310547,
"t": 628.2029113769531,
"r": 449.5505676269531,
"r": 449.550537109375,
"b": 511.6590576171875,
"coord_origin": "BOTTOMLEFT"
},
@@ -15475,7 +15475,7 @@
{
"page_no": 8,
"bbox": {
"l": 140.70968627929688,
"l": 140.70960998535156,
"t": 283.9361572265625,
"r": 472.73382568359375,
"b": 198.32281494140625,
@@ -15804,10 +15804,10 @@
{
"page_no": 10,
"bbox": {
"l": 162.67430114746094,
"t": 347.37744140625,
"r": 451.70062255859375,
"b": 128.78643798828125,
"l": 162.67434692382812,
"t": 347.3774719238281,
"r": 451.70068359375,
"b": 128.786376953125,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@@ -15875,9 +15875,9 @@
{
"page_no": 11,
"bbox": {
"l": 168.39285278320312,
"l": 168.3928985595703,
"t": 610.0334930419922,
"r": 447.35137939453125,
"r": 447.3513488769531,
"b": 157.99432373046875,
"coord_origin": "BOTTOMLEFT"
},
@@ -17702,7 +17702,7 @@
"page_no": 10,
"bbox": {
"l": 143.6376495361328,
"t": 635.6522979736328,
"t": 635.6522827148438,
"r": 470.8485412597656,
"b": 528.7375183105469,
"coord_origin": "BOTTOMLEFT"