fix(pypdfium): resolve overlapping text when merging bounding boxes (#1549)

get merged_text from boundingbox instead of merging it to prevent overlaps

Signed-off-by: Pedro Ribeiro <pedro_ribeiro_93@hotmail.com>
This commit is contained in:
Pedro Ribeiro
2025-05-19 14:26:00 +01:00
committed by GitHub
parent 12a0e64892
commit 98b5eeb844
52 changed files with 52225 additions and 4690 deletions

View File

@@ -2686,7 +2686,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858679533004761,
"confidence": 0.8858681321144104,
"cells": [
{
"index": 1,
@@ -2816,7 +2816,7 @@
"b": 179.20818999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9577404260635376,
"confidence": 0.957740306854248,
"cells": [
{
"index": 5,
@@ -13628,7 +13628,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858679533004761,
"confidence": 0.8858681321144104,
"cells": [
{
"index": 1,
@@ -13770,7 +13770,7 @@
"b": 179.20818999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9577404260635376,
"confidence": 0.957740306854248,
"cells": [
{
"index": 5,
@@ -20153,7 +20153,7 @@
"b": 179.20818999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9577404260635376,
"confidence": 0.957740306854248,
"cells": [
{
"index": 5,
@@ -26486,7 +26486,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858679533004761,
"confidence": 0.8858681321144104,
"cells": [
{
"index": 1,