feat: Integrate ListItemMarkerProcessor into document assembly (#1825)

* Integrate ListItemMarkerProcessor into document assembly

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update to final version

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update all test cases

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Upgrade deps

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-07-01 10:04:58 +02:00
committed by GitHub
parent bdfee4e2d0
commit 56a0e104f7
24 changed files with 739 additions and 1675 deletions
+60 -60
View File
@@ -321,7 +321,7 @@
"page": 1,
"span": [
0,
68
65
],
"__ref_s3_data": null
}
@@ -344,7 +344,7 @@
"page": 1,
"span": [
0,
38
35
],
"__ref_s3_data": null
}
@@ -543,7 +543,7 @@
"page": 2,
"span": [
0,
166
164
],
"__ref_s3_data": null
}
@@ -566,7 +566,7 @@
"page": 2,
"span": [
0,
181
179
],
"__ref_s3_data": null
}
@@ -589,7 +589,7 @@
"page": 2,
"span": [
0,
106
104
],
"__ref_s3_data": null
}
@@ -612,7 +612,7 @@
"page": 2,
"span": [
0,
131
129
],
"__ref_s3_data": null
}
@@ -2550,7 +2550,7 @@
"page": 8,
"span": [
0,
121
117
],
"__ref_s3_data": null
}
@@ -2596,7 +2596,7 @@
"page": 9,
"span": [
0,
165
161
],
"__ref_s3_data": null
}
@@ -2619,7 +2619,7 @@
"page": 9,
"span": [
0,
125
121
],
"__ref_s3_data": null
}
@@ -2642,7 +2642,7 @@
"page": 9,
"span": [
0,
216
212
],
"__ref_s3_data": null
}
@@ -2665,7 +2665,7 @@
"page": 9,
"span": [
0,
236
232
],
"__ref_s3_data": null
}
@@ -2688,7 +2688,7 @@
"page": 9,
"span": [
0,
194
190
],
"__ref_s3_data": null
}
@@ -2711,7 +2711,7 @@
"page": 9,
"span": [
0,
165
161
],
"__ref_s3_data": null
}
@@ -2734,7 +2734,7 @@
"page": 9,
"span": [
0,
273
269
],
"__ref_s3_data": null
}
@@ -2757,7 +2757,7 @@
"page": 9,
"span": [
0,
170
166
],
"__ref_s3_data": null
}
@@ -2780,7 +2780,7 @@
"page": 9,
"span": [
0,
226
221
],
"__ref_s3_data": null
}
@@ -2803,7 +2803,7 @@
"page": 9,
"span": [
0,
239
234
],
"__ref_s3_data": null
}
@@ -2826,7 +2826,7 @@
"page": 9,
"span": [
0,
240
235
],
"__ref_s3_data": null
}
@@ -2849,7 +2849,7 @@
"page": 9,
"span": [
0,
283
278
],
"__ref_s3_data": null
}
@@ -2872,7 +2872,7 @@
"page": 9,
"span": [
0,
142
137
],
"__ref_s3_data": null
}
@@ -2895,7 +2895,7 @@
"page": 9,
"span": [
0,
127
122
],
"__ref_s3_data": null
}
@@ -2918,7 +2918,7 @@
"page": 9,
"span": [
0,
287
282
],
"__ref_s3_data": null
}
@@ -2941,7 +2941,7 @@
"page": 9,
"span": [
0,
156
151
],
"__ref_s3_data": null
}
@@ -2964,7 +2964,7 @@
"page": 9,
"span": [
0,
407
402
],
"__ref_s3_data": null
}
@@ -2987,7 +2987,7 @@
"page": 9,
"span": [
0,
328
323
],
"__ref_s3_data": null
}
@@ -3010,7 +3010,7 @@
"page": 9,
"span": [
0,
229
224
],
"__ref_s3_data": null
}
@@ -3033,7 +3033,7 @@
"page": 9,
"span": [
0,
315
310
],
"__ref_s3_data": null
}
@@ -3056,7 +3056,7 @@
"page": 9,
"span": [
0,
592
587
],
"__ref_s3_data": null
}
@@ -3079,7 +3079,7 @@
"page": 9,
"span": [
0,
322
317
],
"__ref_s3_data": null
}
@@ -3102,7 +3102,7 @@
"page": 9,
"span": [
0,
224
219
],
"__ref_s3_data": null
}
@@ -3125,7 +3125,7 @@
"page": 9,
"span": [
0,
229
224
],
"__ref_s3_data": null
}
@@ -3171,7 +3171,7 @@
"page": 10,
"span": [
0,
302
297
],
"__ref_s3_data": null
}
@@ -3194,7 +3194,7 @@
"page": 10,
"span": [
0,
308
303
],
"__ref_s3_data": null
}
@@ -3217,7 +3217,7 @@
"page": 10,
"span": [
0,
183
178
],
"__ref_s3_data": null
}
@@ -3240,7 +3240,7 @@
"page": 10,
"span": [
0,
275
270
],
"__ref_s3_data": null
}
@@ -3263,7 +3263,7 @@
"page": 10,
"span": [
0,
251
246
],
"__ref_s3_data": null
}
@@ -3286,7 +3286,7 @@
"page": 10,
"span": [
0,
366
361
],
"__ref_s3_data": null
}
@@ -3309,7 +3309,7 @@
"page": 10,
"span": [
0,
221
216
],
"__ref_s3_data": null
}
@@ -3332,7 +3332,7 @@
"page": 10,
"span": [
0,
217
212
],
"__ref_s3_data": null
}
@@ -3355,7 +3355,7 @@
"page": 10,
"span": [
0,
190
185
],
"__ref_s3_data": null
}
@@ -3378,7 +3378,7 @@
"page": 10,
"span": [
0,
220
215
],
"__ref_s3_data": null
}
@@ -3401,7 +3401,7 @@
"page": 10,
"span": [
0,
280
275
],
"__ref_s3_data": null
}
@@ -3424,7 +3424,7 @@
"page": 10,
"span": [
0,
106
101
],
"__ref_s3_data": null
}
@@ -3470,7 +3470,7 @@
"page": 10,
"span": [
0,
221
216
],
"__ref_s3_data": null
}
@@ -3714,7 +3714,7 @@
"page": 11,
"span": [
0,
373
370
],
"__ref_s3_data": null
}
@@ -3737,7 +3737,7 @@
"page": 11,
"span": [
0,
573
570
],
"__ref_s3_data": null
}
@@ -3760,7 +3760,7 @@
"page": 11,
"span": [
0,
195
192
],
"__ref_s3_data": null
}
@@ -3783,7 +3783,7 @@
"page": 11,
"span": [
0,
218
215
],
"__ref_s3_data": null
}
@@ -3806,7 +3806,7 @@
"page": 11,
"span": [
0,
238
235
],
"__ref_s3_data": null
}
@@ -3903,7 +3903,7 @@
"page": 12,
"span": [
0,
61
59
],
"__ref_s3_data": null
}
@@ -3926,7 +3926,7 @@
"page": 12,
"span": [
0,
77
75
],
"__ref_s3_data": null
}
@@ -4018,7 +4018,7 @@
"page": 12,
"span": [
0,
173
170
],
"__ref_s3_data": null
}
@@ -4041,7 +4041,7 @@
"page": 12,
"span": [
0,
187
184
],
"__ref_s3_data": null
}
@@ -4064,7 +4064,7 @@
"page": 12,
"span": [
0,
97
94
],
"__ref_s3_data": null
}
@@ -4110,7 +4110,7 @@
"page": 12,
"span": [
0,
169
166
],
"__ref_s3_data": null
}
@@ -4179,7 +4179,7 @@
"page": 12,
"span": [
0,
110
107
],
"__ref_s3_data": null
}
@@ -4202,7 +4202,7 @@
"page": 12,
"span": [
0,
91
88
],
"__ref_s3_data": null
}
@@ -4225,7 +4225,7 @@
"page": 12,
"span": [
0,
471
468
],
"__ref_s3_data": null
}
@@ -4248,7 +4248,7 @@
"page": 12,
"span": [
0,
311
308
],
"__ref_s3_data": null
}
@@ -4271,7 +4271,7 @@
"page": 12,
"span": [
0,
503
500
],
"__ref_s3_data": null
}
+34 -34
View File
@@ -649,7 +649,7 @@
"page": 2,
"span": [
0,
149
145
],
"__ref_s3_data": null
}
@@ -672,7 +672,7 @@
"page": 2,
"span": [
0,
109
105
],
"__ref_s3_data": null
}
@@ -695,7 +695,7 @@
"page": 2,
"span": [
0,
180
176
],
"__ref_s3_data": null
}
@@ -718,7 +718,7 @@
"page": 2,
"span": [
0,
115
111
],
"__ref_s3_data": null
}
@@ -787,7 +787,7 @@
"page": 2,
"span": [
0,
280
276
],
"__ref_s3_data": null
}
@@ -1506,7 +1506,7 @@
"page": 5,
"span": [
0,
202
198
],
"__ref_s3_data": null
}
@@ -1529,7 +1529,7 @@
"page": 5,
"span": [
0,
208
204
],
"__ref_s3_data": null
}
@@ -1552,7 +1552,7 @@
"page": 5,
"span": [
0,
82
78
],
"__ref_s3_data": null
}
@@ -1575,7 +1575,7 @@
"page": 5,
"span": [
0,
70
66
],
"__ref_s3_data": null
}
@@ -1598,7 +1598,7 @@
"page": 5,
"span": [
0,
53
49
],
"__ref_s3_data": null
}
@@ -1621,7 +1621,7 @@
"page": 5,
"span": [
0,
160
156
],
"__ref_s3_data": null
}
@@ -2502,7 +2502,7 @@
"page": 8,
"span": [
0,
191
187
],
"__ref_s3_data": null
}
@@ -2525,7 +2525,7 @@
"page": 8,
"span": [
0,
279
275
],
"__ref_s3_data": null
}
@@ -2548,7 +2548,7 @@
"page": 8,
"span": [
0,
213
209
],
"__ref_s3_data": null
}
@@ -2571,7 +2571,7 @@
"page": 8,
"span": [
0,
251
247
],
"__ref_s3_data": null
}
@@ -2594,7 +2594,7 @@
"page": 8,
"span": [
0,
261
257
],
"__ref_s3_data": null
}
@@ -2617,7 +2617,7 @@
"page": 8,
"span": [
0,
235
231
],
"__ref_s3_data": null
}
@@ -2640,7 +2640,7 @@
"page": 8,
"span": [
0,
316
312
],
"__ref_s3_data": null
}
@@ -2663,7 +2663,7 @@
"page": 8,
"span": [
0,
172
168
],
"__ref_s3_data": null
}
@@ -2686,7 +2686,7 @@
"page": 8,
"span": [
0,
271
267
],
"__ref_s3_data": null
}
@@ -2709,7 +2709,7 @@
"page": 8,
"span": [
0,
149
144
],
"__ref_s3_data": null
}
@@ -2732,7 +2732,7 @@
"page": 8,
"span": [
0,
227
222
],
"__ref_s3_data": null
}
@@ -2755,7 +2755,7 @@
"page": 8,
"span": [
0,
192
187
],
"__ref_s3_data": null
}
@@ -2778,7 +2778,7 @@
"page": 8,
"span": [
0,
305
300
],
"__ref_s3_data": null
}
@@ -2875,7 +2875,7 @@
"page": 9,
"span": [
0,
153
148
],
"__ref_s3_data": null
}
@@ -2898,7 +2898,7 @@
"page": 9,
"span": [
0,
190
185
],
"__ref_s3_data": null
}
@@ -2921,7 +2921,7 @@
"page": 9,
"span": [
0,
132
127
],
"__ref_s3_data": null
}
@@ -2944,7 +2944,7 @@
"page": 9,
"span": [
0,
219
214
],
"__ref_s3_data": null
}
@@ -2967,7 +2967,7 @@
"page": 9,
"span": [
0,
100
95
],
"__ref_s3_data": null
}
@@ -2990,7 +2990,7 @@
"page": 9,
"span": [
0,
339
334
],
"__ref_s3_data": null
}
@@ -3013,7 +3013,7 @@
"page": 9,
"span": [
0,
336
331
],
"__ref_s3_data": null
}
@@ -3036,7 +3036,7 @@
"page": 9,
"span": [
0,
188
183
],
"__ref_s3_data": null
}
@@ -3059,7 +3059,7 @@
"page": 9,
"span": [
0,
290
285
],
"__ref_s3_data": null
}
@@ -3082,7 +3082,7 @@
"page": 9,
"span": [
0,
138
133
],
"__ref_s3_data": null
}
@@ -2705,7 +2705,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9373533725738525,
"confidence": 0.9373534917831421,
"cells": [
{
"index": 0,
@@ -2745,7 +2745,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858679533004761,
"confidence": 0.8858680725097656,
"cells": [
{
"index": 1,
@@ -13641,7 +13641,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9373533725738525,
"confidence": 0.9373534917831421,
"cells": [
{
"index": 0,
@@ -13687,7 +13687,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858679533004761,
"confidence": 0.8858680725097656,
"cells": [
{
"index": 1,
@@ -26499,7 +26499,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9373533725738525,
"confidence": 0.9373534917831421,
"cells": [
{
"index": 0,
@@ -26545,7 +26545,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858679533004761,
"confidence": 0.8858680725097656,
"cells": [
{
"index": 1,
+28 -28
View File
@@ -1144,7 +1144,7 @@
"page": 7,
"span": [
0,
108
105
],
"__ref_s3_data": null
}
@@ -1167,7 +1167,7 @@
"page": 7,
"span": [
0,
106
103
],
"__ref_s3_data": null
}
@@ -1236,7 +1236,7 @@
"page": 7,
"span": [
0,
78
75
],
"__ref_s3_data": null
}
@@ -1259,7 +1259,7 @@
"page": 7,
"span": [
0,
84
81
],
"__ref_s3_data": null
}
@@ -1282,7 +1282,7 @@
"page": 7,
"span": [
0,
144
141
],
"__ref_s3_data": null
}
@@ -1974,7 +1974,7 @@
"page": 12,
"span": [
0,
270
267
],
"__ref_s3_data": null
}
@@ -1997,7 +1997,7 @@
"page": 12,
"span": [
0,
301
298
],
"__ref_s3_data": null
}
@@ -2020,7 +2020,7 @@
"page": 12,
"span": [
0,
140
137
],
"__ref_s3_data": null
}
@@ -2043,7 +2043,7 @@
"page": 12,
"span": [
0,
204
201
],
"__ref_s3_data": null
}
@@ -2066,7 +2066,7 @@
"page": 13,
"span": [
0,
203
200
],
"__ref_s3_data": null
}
@@ -2089,7 +2089,7 @@
"page": 13,
"span": [
0,
264
261
],
"__ref_s3_data": null
}
@@ -2112,7 +2112,7 @@
"page": 13,
"span": [
0,
131
128
],
"__ref_s3_data": null
}
@@ -2135,7 +2135,7 @@
"page": 13,
"span": [
0,
345
342
],
"__ref_s3_data": null
}
@@ -2158,7 +2158,7 @@
"page": 13,
"span": [
0,
234
231
],
"__ref_s3_data": null
}
@@ -2181,7 +2181,7 @@
"page": 13,
"span": [
0,
413
409
],
"__ref_s3_data": null
}
@@ -2204,7 +2204,7 @@
"page": 13,
"span": [
0,
295
291
],
"__ref_s3_data": null
}
@@ -2227,7 +2227,7 @@
"page": 13,
"span": [
0,
281
277
],
"__ref_s3_data": null
}
@@ -2250,7 +2250,7 @@
"page": 13,
"span": [
0,
275
271
],
"__ref_s3_data": null
}
@@ -2273,7 +2273,7 @@
"page": 13,
"span": [
0,
241
237
],
"__ref_s3_data": null
}
@@ -2296,7 +2296,7 @@
"page": 13,
"span": [
0,
405
401
],
"__ref_s3_data": null
}
@@ -2319,7 +2319,7 @@
"page": 13,
"span": [
0,
96
92
],
"__ref_s3_data": null
}
@@ -2342,7 +2342,7 @@
"page": 13,
"span": [
0,
195
191
],
"__ref_s3_data": null
}
@@ -2365,7 +2365,7 @@
"page": 14,
"span": [
0,
223
219
],
"__ref_s3_data": null
}
@@ -2388,7 +2388,7 @@
"page": 14,
"span": [
0,
269
265
],
"__ref_s3_data": null
}
@@ -2411,7 +2411,7 @@
"page": 14,
"span": [
0,
147
143
],
"__ref_s3_data": null
}
@@ -2434,7 +2434,7 @@
"page": 14,
"span": [
0,
329
325
],
"__ref_s3_data": null
}
@@ -2457,7 +2457,7 @@
"page": 14,
"span": [
0,
259
255
],
"__ref_s3_data": null
}
@@ -2480,7 +2480,7 @@
"page": 14,
"span": [
0,
206
202
],
"__ref_s3_data": null
}
+26 -26
View File
@@ -233,7 +233,7 @@
"page": 1,
"span": [
0,
248
246
],
"__ref_s3_data": null
}
@@ -256,7 +256,7 @@
"page": 1,
"span": [
0,
205
203
],
"__ref_s3_data": null
}
@@ -348,7 +348,7 @@
"page": 1,
"span": [
0,
201
199
],
"__ref_s3_data": null
}
@@ -371,7 +371,7 @@
"page": 1,
"span": [
0,
214
212
],
"__ref_s3_data": null
}
@@ -463,7 +463,7 @@
"page": 2,
"span": [
0,
155
153
],
"__ref_s3_data": null
}
@@ -486,7 +486,7 @@
"page": 2,
"span": [
0,
135
133
],
"__ref_s3_data": null
}
@@ -509,7 +509,7 @@
"page": 2,
"span": [
0,
197
195
],
"__ref_s3_data": null
}
@@ -647,7 +647,7 @@
"page": 3,
"span": [
0,
365
363
],
"__ref_s3_data": null
}
@@ -670,7 +670,7 @@
"page": 3,
"span": [
0,
253
251
],
"__ref_s3_data": null
}
@@ -693,7 +693,7 @@
"page": 3,
"span": [
0,
300
298
],
"__ref_s3_data": null
}
@@ -762,7 +762,7 @@
"page": 3,
"span": [
0,
140
137
],
"__ref_s3_data": null
}
@@ -785,7 +785,7 @@
"page": 3,
"span": [
0,
116
113
],
"__ref_s3_data": null
}
@@ -808,7 +808,7 @@
"page": 3,
"span": [
0,
114
111
],
"__ref_s3_data": null
}
@@ -831,7 +831,7 @@
"page": 3,
"span": [
0,
142
139
],
"__ref_s3_data": null
}
@@ -854,7 +854,7 @@
"page": 3,
"span": [
0,
170
167
],
"__ref_s3_data": null
}
@@ -923,7 +923,7 @@
"page": 4,
"span": [
0,
222
220
],
"__ref_s3_data": null
}
@@ -946,7 +946,7 @@
"page": 4,
"span": [
0,
242
240
],
"__ref_s3_data": null
}
@@ -969,7 +969,7 @@
"page": 4,
"span": [
0,
226
224
],
"__ref_s3_data": null
}
@@ -1038,7 +1038,7 @@
"page": 4,
"span": [
0,
290
287
],
"__ref_s3_data": null
}
@@ -1061,7 +1061,7 @@
"page": 4,
"span": [
0,
278
275
],
"__ref_s3_data": null
}
@@ -1084,7 +1084,7 @@
"page": 4,
"span": [
0,
253
250
],
"__ref_s3_data": null
}
@@ -1107,7 +1107,7 @@
"page": 4,
"span": [
0,
215
212
],
"__ref_s3_data": null
}
@@ -1130,7 +1130,7 @@
"page": 4,
"span": [
0,
206
203
],
"__ref_s3_data": null
}
@@ -1199,7 +1199,7 @@
"page": 4,
"span": [
0,
155
153
],
"__ref_s3_data": null
}
@@ -1222,7 +1222,7 @@
"page": 4,
"span": [
0,
184
182
],
"__ref_s3_data": null
}
@@ -1245,7 +1245,7 @@
"page": 4,
"span": [
0,
158
156
],
"__ref_s3_data": null
}
+9 -9
View File
@@ -2908,7 +2908,7 @@
"page": 12,
"span": [
0,
57
54
],
"__ref_s3_data": null
}
@@ -2931,7 +2931,7 @@
"page": 12,
"span": [
0,
58
55
],
"__ref_s3_data": null
}
@@ -2954,7 +2954,7 @@
"page": 12,
"span": [
0,
127
124
],
"__ref_s3_data": null
}
@@ -3069,7 +3069,7 @@
"page": 13,
"span": [
0,
136
133
],
"__ref_s3_data": null
}
@@ -3253,7 +3253,7 @@
"page": 14,
"span": [
0,
65
62
],
"__ref_s3_data": null
}
@@ -3350,7 +3350,7 @@
"page": 14,
"span": [
0,
57
54
],
"__ref_s3_data": null
}
@@ -3534,7 +3534,7 @@
"page": 14,
"span": [
0,
231
228
],
"__ref_s3_data": null
}
@@ -3585,7 +3585,7 @@
"page": 15,
"span": [
0,
228
225
],
"__ref_s3_data": null
}
@@ -3608,7 +3608,7 @@
"page": 15,
"span": [
0,
232
229
],
"__ref_s3_data": null
}