fix(pypdfium): resolve overlapping text when merging bounding boxes (#1549)

get merged_text from boundingbox instead of merging it to prevent overlaps

Signed-off-by: Pedro Ribeiro <pedro_ribeiro_93@hotmail.com>
This commit is contained in:
Pedro Ribeiro
2025-05-19 14:26:00 +01:00
committed by GitHub
parent 12a0e64892
commit 98b5eeb844
52 changed files with 52225 additions and 4690 deletions

View File

@@ -3989,7 +3989,7 @@
"prov": [
{
"bbox": [
33.09040069580078,
33.09052658081055,
89.5469970703125,
585.1502075195312,
498.9671630859375
@@ -4055,9 +4055,9 @@
"prov": [
{
"bbox": [
64.1669921875,
64.16704559326172,
103.87176513671875,
258.7742919921875,
258.77435302734375,
188.49365234375
],
"page": 3,
@@ -4099,7 +4099,7 @@
"prov": [
{
"bbox": [
145.4144744873047,
145.41448974609375,
156.616943359375,
252.08840942382812,
264.7552490234375
@@ -4121,10 +4121,10 @@
"prov": [
{
"bbox": [
32.075252532958984,
554.0420684814453,
32.075260162353516,
554.0421142578125,
239.620361328125,
721.4226226806641
721.4226608276367
],
"page": 5,
"span": [
@@ -4168,7 +4168,7 @@
135.97177124023438,
381.39068603515625,
545.4180908203125,
684.5892486572266
684.5892562866211
],
"page": 10,
"span": [
@@ -4187,10 +4187,10 @@
"prov": [
{
"bbox": [
135.64837646484375,
135.64834594726562,
197.24334716796875,
301.2367248535156,
407.8262939453125
301.23675537109375,
407.8263244628906
],
"page": 11,
"span": [
@@ -4209,10 +4209,10 @@
"prov": [
{
"bbox": [
63.801902770996094,
621.9678497314453,
547.11474609375,
696.6175842285156
63.80195617675781,
621.9679107666016,
547.1146850585938,
696.6176071166992
],
"page": 14,
"span": [
@@ -4231,7 +4231,7 @@
"prov": [
{
"bbox": [
63.985130310058594,
63.9850959777832,
145.8603515625,
530.0478515625,
364.09503173828125
@@ -4254,9 +4254,9 @@
{
"bbox": [
136.5016632080078,
314.4587707519531,
314.45880126953125,
545.4508666992188,
672.7508773803711
672.7509078979492
],
"page": 15,
"span": [
@@ -4343,10 +4343,10 @@
"prov": [
{
"bbox": [
136.1496124267578,
76.34844970703125,
547.5267944335938,
659.9669647216797
136.1495819091797,
76.3485107421875,
547.52685546875,
659.9669189453125
],
"page": 2,
"span": [
@@ -6705,9 +6705,9 @@
{
"bbox": [
135.52462768554688,
349.949462890625,
349.94940185546875,
545.8714599609375,
502.2747802734375
502.2746887207031
],
"page": 8,
"span": [
@@ -7164,7 +7164,7 @@
64.41139221191406,
70.39208984375,
547.3950805664062,
398.3863830566406
398.3863525390625
],
"page": 9,
"span": [
@@ -9130,7 +9130,7 @@
"prov": [
{
"bbox": [
63.55636978149414,
63.55635070800781,
495.77532958984375,
548.5687255859375,
687.7661285400391