fix(pypdfium): resolve overlapping text when merging bounding boxes (#1549)

get merged_text from boundingbox instead of merging it to prevent overlaps

Signed-off-by: Pedro Ribeiro <pedro_ribeiro_93@hotmail.com>
This commit is contained in:
Pedro Ribeiro
2025-05-19 14:26:00 +01:00
committed by GitHub
parent 12a0e64892
commit 98b5eeb844
52 changed files with 52225 additions and 4690 deletions

View File

@@ -1171,7 +1171,7 @@
"b": 295.08200000000005,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9547483325004578,
"confidence": 0.9547481536865234,
"cells": [
{
"index": 17,
@@ -1311,7 +1311,7 @@
"b": 350.522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9672117233276367,
"confidence": 0.9672118425369263,
"cells": [
{
"index": 22,
@@ -1971,7 +1971,7 @@
"b": 295.08200000000005,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9547483325004578,
"confidence": 0.9547481536865234,
"cells": [
{
"index": 17,
@@ -2117,7 +2117,7 @@
"b": 350.522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9672117233276367,
"confidence": 0.9672118425369263,
"cells": [
{
"index": 22,
@@ -2770,7 +2770,7 @@
"b": 295.08200000000005,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9547483325004578,
"confidence": 0.9547481536865234,
"cells": [
{
"index": 17,
@@ -2916,7 +2916,7 @@
"b": 350.522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9672117233276367,
"confidence": 0.9672118425369263,
"cells": [
{
"index": 22,