feat: support xlsm files (#1520)
* code for xlsm support * updated support for xlsm * updated code for xlsm support * Update docling_parse_v4_backend.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update docling_parse_v4_backend.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update test_backend_msexcel_xlsm.py updated the tests/test_backend_msexcel_xlsm.py: have a function starting with test removed all print statements ** To add an explicit assert {test}=={pred} Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update base_models.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update test_backend_msexcel.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update test_backend_msexcel_xlsm.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Update document_converter.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * Delete tests/test_backend_msexcel_xlsm.py Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * xlsm file Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> * run tests * ran tests * Fix tests, upgrade XSLM example to a valid file Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: ShiroYasha18 <85089952+ShiroYasha18@users.noreply.github.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
6613b9e98b
commit
df140227c3
@ -70,7 +70,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
|
||||
InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp", "webp"],
|
||||
InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"],
|
||||
InputFormat.CSV: ["csv"],
|
||||
InputFormat.XLSX: ["xlsx"],
|
||||
InputFormat.XLSX: ["xlsx", "xlsm"],
|
||||
InputFormat.XML_USPTO: ["xml", "txt"],
|
||||
InputFormat.JSON_DOCLING: ["json"],
|
||||
}
|
||||
|
@ -213,10 +213,10 @@
|
||||
"prov": [
|
||||
{
|
||||
"bbox": [
|
||||
139.6674041748047,
|
||||
139.66741943359375,
|
||||
322.5054626464844,
|
||||
475.00927734375,
|
||||
454.4546203613281
|
||||
454.45458984375
|
||||
],
|
||||
"page": 1,
|
||||
"span": [
|
||||
|
@ -2646,7 +2646,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9373533129692078,
|
||||
"confidence": 0.9373534917831421,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@ -2686,7 +2686,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.8858679533004761,
|
||||
"confidence": 0.8858680725097656,
|
||||
"cells": [
|
||||
{
|
||||
"index": 1,
|
||||
@ -2726,7 +2726,7 @@
|
||||
"b": 152.90697999999998,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9806435108184814,
|
||||
"confidence": 0.9806433916091919,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
@ -2881,7 +2881,7 @@
|
||||
"b": 255.42400999999995,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9850425124168396,
|
||||
"confidence": 0.98504239320755,
|
||||
"cells": [
|
||||
{
|
||||
"index": 7,
|
||||
@ -3096,7 +3096,7 @@
|
||||
"b": 327.98218,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9591907262802124,
|
||||
"confidence": 0.9591909050941467,
|
||||
"cells": [
|
||||
{
|
||||
"index": 15,
|
||||
@ -3280,8 +3280,8 @@
|
||||
"id": 0,
|
||||
"label": "table",
|
||||
"bbox": {
|
||||
"l": 139.6674041748047,
|
||||
"t": 337.5453796386719,
|
||||
"l": 139.66741943359375,
|
||||
"t": 337.54541015625,
|
||||
"r": 475.00927734375,
|
||||
"b": 469.4945373535156,
|
||||
"coord_origin": "TOPLEFT"
|
||||
@ -7787,7 +7787,7 @@
|
||||
"b": 518.17419,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9589295387268066,
|
||||
"confidence": 0.9589294195175171,
|
||||
"cells": [
|
||||
{
|
||||
"index": 91,
|
||||
@ -7852,7 +7852,7 @@
|
||||
"b": 618.3,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9849976301193237,
|
||||
"confidence": 0.9849975109100342,
|
||||
"cells": [
|
||||
{
|
||||
"index": 93,
|
||||
@ -8184,8 +8184,8 @@
|
||||
"id": 0,
|
||||
"label": "table",
|
||||
"bbox": {
|
||||
"l": 139.6674041748047,
|
||||
"t": 337.5453796386719,
|
||||
"l": 139.66741943359375,
|
||||
"t": 337.54541015625,
|
||||
"r": 475.00927734375,
|
||||
"b": 469.4945373535156,
|
||||
"coord_origin": "TOPLEFT"
|
||||
@ -13582,7 +13582,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9373533129692078,
|
||||
"confidence": 0.9373534917831421,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@ -13628,7 +13628,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.8858679533004761,
|
||||
"confidence": 0.8858680725097656,
|
||||
"cells": [
|
||||
{
|
||||
"index": 1,
|
||||
@ -13674,7 +13674,7 @@
|
||||
"b": 152.90697999999998,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9806435108184814,
|
||||
"confidence": 0.9806433916091919,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
@ -13841,7 +13841,7 @@
|
||||
"b": 255.42400999999995,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9850425124168396,
|
||||
"confidence": 0.98504239320755,
|
||||
"cells": [
|
||||
{
|
||||
"index": 7,
|
||||
@ -14062,7 +14062,7 @@
|
||||
"b": 327.98218,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9591907262802124,
|
||||
"confidence": 0.9591909050941467,
|
||||
"cells": [
|
||||
{
|
||||
"index": 15,
|
||||
@ -14252,8 +14252,8 @@
|
||||
"id": 0,
|
||||
"label": "table",
|
||||
"bbox": {
|
||||
"l": 139.6674041748047,
|
||||
"t": 337.5453796386719,
|
||||
"l": 139.66741943359375,
|
||||
"t": 337.54541015625,
|
||||
"r": 475.00927734375,
|
||||
"b": 469.4945373535156,
|
||||
"coord_origin": "TOPLEFT"
|
||||
@ -19642,7 +19642,7 @@
|
||||
"b": 518.17419,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9589295387268066,
|
||||
"confidence": 0.9589294195175171,
|
||||
"cells": [
|
||||
{
|
||||
"index": 91,
|
||||
@ -19713,7 +19713,7 @@
|
||||
"b": 618.3,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9849976301193237,
|
||||
"confidence": 0.9849975109100342,
|
||||
"cells": [
|
||||
{
|
||||
"index": 93,
|
||||
@ -20057,7 +20057,7 @@
|
||||
"b": 152.90697999999998,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9806435108184814,
|
||||
"confidence": 0.9806433916091919,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
@ -20224,7 +20224,7 @@
|
||||
"b": 255.42400999999995,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9850425124168396,
|
||||
"confidence": 0.98504239320755,
|
||||
"cells": [
|
||||
{
|
||||
"index": 7,
|
||||
@ -20445,7 +20445,7 @@
|
||||
"b": 327.98218,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9591907262802124,
|
||||
"confidence": 0.9591909050941467,
|
||||
"cells": [
|
||||
{
|
||||
"index": 15,
|
||||
@ -20635,8 +20635,8 @@
|
||||
"id": 0,
|
||||
"label": "table",
|
||||
"bbox": {
|
||||
"l": 139.6674041748047,
|
||||
"t": 337.5453796386719,
|
||||
"l": 139.66741943359375,
|
||||
"t": 337.54541015625,
|
||||
"r": 475.00927734375,
|
||||
"b": 469.4945373535156,
|
||||
"coord_origin": "TOPLEFT"
|
||||
@ -26025,7 +26025,7 @@
|
||||
"b": 518.17419,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9589295387268066,
|
||||
"confidence": 0.9589294195175171,
|
||||
"cells": [
|
||||
{
|
||||
"index": 91,
|
||||
@ -26096,7 +26096,7 @@
|
||||
"b": 618.3,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9849976301193237,
|
||||
"confidence": 0.9849975109100342,
|
||||
"cells": [
|
||||
{
|
||||
"index": 93,
|
||||
@ -26440,7 +26440,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9373533129692078,
|
||||
"confidence": 0.9373534917831421,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@ -26486,7 +26486,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.8858679533004761,
|
||||
"confidence": 0.8858680725097656,
|
||||
"cells": [
|
||||
{
|
||||
"index": 1,
|
||||
|
@ -336,8 +336,8 @@
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 139.6674041748047,
|
||||
"t": 454.4546203613281,
|
||||
"l": 139.66741943359375,
|
||||
"t": 454.45458984375,
|
||||
"r": 475.00927734375,
|
||||
"b": 322.5054626464844,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
|
@ -2646,7 +2646,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9373533129692078,
|
||||
"confidence": 0.9373534917831421,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@ -2686,7 +2686,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.8858679533004761,
|
||||
"confidence": 0.8858680725097656,
|
||||
"cells": [
|
||||
{
|
||||
"index": 1,
|
||||
@ -2726,7 +2726,7 @@
|
||||
"b": 152.90697999999998,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9806435108184814,
|
||||
"confidence": 0.9806433916091919,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
@ -2881,7 +2881,7 @@
|
||||
"b": 255.42400999999995,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9850425124168396,
|
||||
"confidence": 0.98504239320755,
|
||||
"cells": [
|
||||
{
|
||||
"index": 7,
|
||||
@ -3096,7 +3096,7 @@
|
||||
"b": 327.98218,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9591907262802124,
|
||||
"confidence": 0.9591909050941467,
|
||||
"cells": [
|
||||
{
|
||||
"index": 15,
|
||||
@ -3280,8 +3280,8 @@
|
||||
"id": 0,
|
||||
"label": "table",
|
||||
"bbox": {
|
||||
"l": 139.6674041748047,
|
||||
"t": 337.5453796386719,
|
||||
"l": 139.66741943359375,
|
||||
"t": 337.54541015625,
|
||||
"r": 475.00927734375,
|
||||
"b": 469.4945373535156,
|
||||
"coord_origin": "TOPLEFT"
|
||||
@ -7787,7 +7787,7 @@
|
||||
"b": 518.17419,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9589295387268066,
|
||||
"confidence": 0.9589294195175171,
|
||||
"cells": [
|
||||
{
|
||||
"index": 91,
|
||||
@ -7852,7 +7852,7 @@
|
||||
"b": 618.3,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9849976301193237,
|
||||
"confidence": 0.9849975109100342,
|
||||
"cells": [
|
||||
{
|
||||
"index": 93,
|
||||
@ -8184,8 +8184,8 @@
|
||||
"id": 0,
|
||||
"label": "table",
|
||||
"bbox": {
|
||||
"l": 139.6674041748047,
|
||||
"t": 337.5453796386719,
|
||||
"l": 139.66741943359375,
|
||||
"t": 337.54541015625,
|
||||
"r": 475.00927734375,
|
||||
"b": 469.4945373535156,
|
||||
"coord_origin": "TOPLEFT"
|
||||
@ -13582,7 +13582,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9373533129692078,
|
||||
"confidence": 0.9373534917831421,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@ -13628,7 +13628,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.8858679533004761,
|
||||
"confidence": 0.8858680725097656,
|
||||
"cells": [
|
||||
{
|
||||
"index": 1,
|
||||
@ -13674,7 +13674,7 @@
|
||||
"b": 152.90697999999998,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9806435108184814,
|
||||
"confidence": 0.9806433916091919,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
@ -13841,7 +13841,7 @@
|
||||
"b": 255.42400999999995,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9850425124168396,
|
||||
"confidence": 0.98504239320755,
|
||||
"cells": [
|
||||
{
|
||||
"index": 7,
|
||||
@ -14062,7 +14062,7 @@
|
||||
"b": 327.98218,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9591907262802124,
|
||||
"confidence": 0.9591909050941467,
|
||||
"cells": [
|
||||
{
|
||||
"index": 15,
|
||||
@ -14252,8 +14252,8 @@
|
||||
"id": 0,
|
||||
"label": "table",
|
||||
"bbox": {
|
||||
"l": 139.6674041748047,
|
||||
"t": 337.5453796386719,
|
||||
"l": 139.66741943359375,
|
||||
"t": 337.54541015625,
|
||||
"r": 475.00927734375,
|
||||
"b": 469.4945373535156,
|
||||
"coord_origin": "TOPLEFT"
|
||||
@ -19642,7 +19642,7 @@
|
||||
"b": 518.17419,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9589295387268066,
|
||||
"confidence": 0.9589294195175171,
|
||||
"cells": [
|
||||
{
|
||||
"index": 91,
|
||||
@ -19713,7 +19713,7 @@
|
||||
"b": 618.3,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9849976301193237,
|
||||
"confidence": 0.9849975109100342,
|
||||
"cells": [
|
||||
{
|
||||
"index": 93,
|
||||
@ -20057,7 +20057,7 @@
|
||||
"b": 152.90697999999998,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9806435108184814,
|
||||
"confidence": 0.9806433916091919,
|
||||
"cells": [
|
||||
{
|
||||
"index": 2,
|
||||
@ -20224,7 +20224,7 @@
|
||||
"b": 255.42400999999995,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9850425124168396,
|
||||
"confidence": 0.98504239320755,
|
||||
"cells": [
|
||||
{
|
||||
"index": 7,
|
||||
@ -20445,7 +20445,7 @@
|
||||
"b": 327.98218,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9591907262802124,
|
||||
"confidence": 0.9591909050941467,
|
||||
"cells": [
|
||||
{
|
||||
"index": 15,
|
||||
@ -20635,8 +20635,8 @@
|
||||
"id": 0,
|
||||
"label": "table",
|
||||
"bbox": {
|
||||
"l": 139.6674041748047,
|
||||
"t": 337.5453796386719,
|
||||
"l": 139.66741943359375,
|
||||
"t": 337.54541015625,
|
||||
"r": 475.00927734375,
|
||||
"b": 469.4945373535156,
|
||||
"coord_origin": "TOPLEFT"
|
||||
@ -26025,7 +26025,7 @@
|
||||
"b": 518.17419,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9589295387268066,
|
||||
"confidence": 0.9589294195175171,
|
||||
"cells": [
|
||||
{
|
||||
"index": 91,
|
||||
@ -26096,7 +26096,7 @@
|
||||
"b": 618.3,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9849976301193237,
|
||||
"confidence": 0.9849975109100342,
|
||||
"cells": [
|
||||
{
|
||||
"index": 93,
|
||||
@ -26440,7 +26440,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9373533129692078,
|
||||
"confidence": 0.9373534917831421,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@ -26486,7 +26486,7 @@
|
||||
"b": 102.78223000000003,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.8858679533004761,
|
||||
"confidence": 0.8858680725097656,
|
||||
"cells": [
|
||||
{
|
||||
"index": 1,
|
||||
|
8
tests/data/groundtruth/docling_v2/example_8.html.itxt
vendored
Normal file
8
tests/data/groundtruth/docling_v2/example_8.html.itxt
vendored
Normal file
@ -0,0 +1,8 @@
|
||||
item-0 at level 0: unspecified: group _root_
|
||||
item-1 at level 1: section: group header-1
|
||||
item-2 at level 2: section_header: Pivot table with with 1 row header
|
||||
item-3 at level 3: table with [6x4]
|
||||
item-4 at level 2: section_header: Pivot table with 2 row headers
|
||||
item-5 at level 3: table with [6x5]
|
||||
item-6 at level 2: section_header: Equivalent pivot table
|
||||
item-7 at level 3: table with [6x5]
|
2008
tests/data/groundtruth/docling_v2/example_8.html.json
vendored
Normal file
2008
tests/data/groundtruth/docling_v2/example_8.html.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
29
tests/data/groundtruth/docling_v2/example_8.html.md
vendored
Normal file
29
tests/data/groundtruth/docling_v2/example_8.html.md
vendored
Normal file
@ -0,0 +1,29 @@
|
||||
## Pivot table with with 1 row header
|
||||
|
||||
| Year | Month | Revenue | Cost |
|
||||
|--------|----------|-----------|--------|
|
||||
| 2025 | January | $134 | $162 |
|
||||
| 2025 | February | $150 | $155 |
|
||||
| 2025 | March | $160 | $143 |
|
||||
| 2025 | April | $210 | $150 |
|
||||
| 2025 | May | $280 | $120 |
|
||||
|
||||
## Pivot table with 2 row headers
|
||||
|
||||
| Year | Quarter | Month | Revenue | Cost |
|
||||
|--------|-----------|----------|-----------|--------|
|
||||
| 2025 | Q1 | January | $134 | $162 |
|
||||
| 2025 | Q1 | February | $150 | $155 |
|
||||
| 2025 | Q1 | March | $160 | $143 |
|
||||
| 2025 | Q2 | April | $210 | $150 |
|
||||
| 2025 | Q2 | May | $280 | $120 |
|
||||
|
||||
## Equivalent pivot table
|
||||
|
||||
| Year | Quarter | Month | Revenue | Cost |
|
||||
|--------|-----------|----------|-----------|--------|
|
||||
| 2025 | Q1 | January | $134 | $162 |
|
||||
| 2025 | Q1 | February | $150 | $155 |
|
||||
| 2025 | Q1 | March | $160 | $143 |
|
||||
| 2025 | Q2 | April | $210 | $150 |
|
||||
| 2025 | Q2 | May | $280 | $120 |
|
3
tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.itxt
vendored
Normal file
3
tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.itxt
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
item-0 at level 0: unspecified: group _root_
|
||||
item-1 at level 1: section: group sheet: SalesData
|
||||
item-2 at level 2: table with [21x4]
|
2153
tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.json
vendored
Normal file
2153
tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
22
tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.md
vendored
Normal file
22
tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.md
vendored
Normal file
@ -0,0 +1,22 @@
|
||||
| Product | Date | Quantity | Revenue |
|
||||
|-----------|---------------------|------------|-----------|
|
||||
| Widget A | 2024-01-01 00:00:00 | 5 | 5000 |
|
||||
| Widget B | 2024-01-02 00:00:00 | 10 | 12000 |
|
||||
| Widget C | 2024-01-03 00:00:00 | 3 | 3000 |
|
||||
| Widget D | 2024-01-04 00:00:00 | 8 | 8000 |
|
||||
| Widget A | 2024-01-05 00:00:00 | 7 | 7000 |
|
||||
| Widget B | 2024-01-06 00:00:00 | 6 | 6000 |
|
||||
| Widget C | 2024-01-07 00:00:00 | 12 | 15000 |
|
||||
| Widget D | 2024-01-08 00:00:00 | 9 | 9000 |
|
||||
| Widget A | 2024-01-09 00:00:00 | 4 | 4000 |
|
||||
| Widget B | 2024-01-10 00:00:00 | 11 | 11000 |
|
||||
| Widget C | 2024-01-11 00:00:00 | 5 | 5000 |
|
||||
| Widget D | 2024-01-12 00:00:00 | 8 | 8500 |
|
||||
| Widget A | 2024-01-13 00:00:00 | 6 | 6200 |
|
||||
| Widget B | 2024-01-14 00:00:00 | 7 | 7100 |
|
||||
| Widget C | 2024-01-15 00:00:00 | 10 | 10500 |
|
||||
| Widget D | 2024-01-16 00:00:00 | 3 | 3200 |
|
||||
| Widget A | 2024-01-17 00:00:00 | 9 | 9400 |
|
||||
| Widget B | 2024-01-18 00:00:00 | 12 | 12500 |
|
||||
| Widget C | 2024-01-19 00:00:00 | 6 | 6100 |
|
||||
| Widget D | 2024-01-20 00:00:00 | 8 | 8900 |
|
127
tests/data/groundtruth/docling_v2/textbox.docx.itxt
vendored
127
tests/data/groundtruth/docling_v2/textbox.docx.itxt
vendored
@ -5,92 +5,89 @@ item-0 at level 0: unspecified: group _root_
|
||||
item-4 at level 1: section: group textbox
|
||||
item-5 at level 2: paragraph: Student falls ill
|
||||
item-6 at level 2: paragraph:
|
||||
item-7 at level 2: paragraph:
|
||||
item-8 at level 2: list: group list
|
||||
item-9 at level 3: list_item: Suggested Reportable Symptoms:
|
||||
item-7 at level 2: list: group list
|
||||
item-8 at level 3: list_item: Suggested Reportable Symptoms:
|
||||
* ... sh
|
||||
* Blisters
|
||||
* Headache
|
||||
* Sore throat
|
||||
item-10 at level 1: list_item:
|
||||
item-9 at level 1: list_item:
|
||||
item-10 at level 1: paragraph:
|
||||
item-11 at level 1: paragraph:
|
||||
item-12 at level 1: paragraph:
|
||||
item-13 at level 1: section: group textbox
|
||||
item-14 at level 2: paragraph: If a caregiver suspects that wit ... the same suggested reportable symptoms
|
||||
item-12 at level 1: section: group textbox
|
||||
item-13 at level 2: paragraph: If a caregiver suspects that wit ... the same suggested reportable symptoms
|
||||
item-14 at level 1: paragraph:
|
||||
item-15 at level 1: paragraph:
|
||||
item-16 at level 1: paragraph:
|
||||
item-17 at level 1: paragraph:
|
||||
item-18 at level 1: paragraph:
|
||||
item-19 at level 1: section: group textbox
|
||||
item-20 at level 2: paragraph: Yes
|
||||
item-18 at level 1: section: group textbox
|
||||
item-19 at level 2: paragraph: Yes
|
||||
item-20 at level 1: paragraph:
|
||||
item-21 at level 1: paragraph:
|
||||
item-22 at level 1: paragraph:
|
||||
item-23 at level 1: section: group textbox
|
||||
item-24 at level 2: list: group list
|
||||
item-25 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network.
|
||||
item-26 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System.
|
||||
item-27 at level 2: paragraph:
|
||||
item-28 at level 2: paragraph:
|
||||
item-29 at level 1: list: group list
|
||||
item-30 at level 2: list_item:
|
||||
item-22 at level 1: section: group textbox
|
||||
item-23 at level 2: list: group list
|
||||
item-24 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network.
|
||||
item-25 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System.
|
||||
item-26 at level 2: paragraph:
|
||||
item-27 at level 1: list: group list
|
||||
item-28 at level 2: list_item:
|
||||
item-29 at level 1: paragraph:
|
||||
item-30 at level 1: paragraph:
|
||||
item-31 at level 1: paragraph:
|
||||
item-32 at level 1: paragraph:
|
||||
item-33 at level 1: paragraph:
|
||||
item-34 at level 1: paragraph:
|
||||
item-35 at level 1: paragraph:
|
||||
item-36 at level 1: section: group textbox
|
||||
item-37 at level 2: paragraph: Health Bureau:
|
||||
item-38 at level 2: paragraph: Upon receiving a report from the ... rt to the Centers for Disease Control.
|
||||
item-39 at level 2: list: group list
|
||||
item-40 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection.
|
||||
item-41 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act.
|
||||
item-42 at level 2: paragraph:
|
||||
item-43 at level 2: paragraph:
|
||||
item-44 at level 1: list: group list
|
||||
item-45 at level 2: list_item:
|
||||
item-46 at level 1: paragraph:
|
||||
item-47 at level 1: section: group textbox
|
||||
item-48 at level 2: paragraph: Department of Education:
|
||||
item-34 at level 1: section: group textbox
|
||||
item-35 at level 2: paragraph: Health Bureau:
|
||||
item-36 at level 2: paragraph: Upon receiving a report from the ... rt to the Centers for Disease Control.
|
||||
item-37 at level 2: list: group list
|
||||
item-38 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection.
|
||||
item-39 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act.
|
||||
item-40 at level 2: paragraph:
|
||||
item-41 at level 1: list: group list
|
||||
item-42 at level 2: list_item:
|
||||
item-43 at level 1: paragraph:
|
||||
item-44 at level 1: section: group textbox
|
||||
item-45 at level 2: paragraph: Department of Education:
|
||||
Collabo ... vention measures at all school levels.
|
||||
item-46 at level 1: paragraph:
|
||||
item-47 at level 1: paragraph:
|
||||
item-48 at level 1: paragraph:
|
||||
item-49 at level 1: paragraph:
|
||||
item-50 at level 1: paragraph:
|
||||
item-51 at level 1: paragraph:
|
||||
item-52 at level 1: paragraph:
|
||||
item-53 at level 1: paragraph:
|
||||
item-54 at level 1: paragraph:
|
||||
item-55 at level 1: paragraph:
|
||||
item-56 at level 1: section: group textbox
|
||||
item-57 at level 2: inline: group group
|
||||
item-58 at level 3: paragraph: The Health Bureau will handle
|
||||
item-59 at level 3: paragraph: reporting and specimen collection
|
||||
item-60 at level 3: paragraph: .
|
||||
item-61 at level 2: paragraph:
|
||||
item-62 at level 2: paragraph:
|
||||
item-63 at level 1: paragraph:
|
||||
item-64 at level 1: paragraph:
|
||||
item-53 at level 1: section: group textbox
|
||||
item-54 at level 2: inline: group group
|
||||
item-55 at level 3: paragraph: The Health Bureau will handle
|
||||
item-56 at level 3: paragraph: reporting and specimen collection
|
||||
item-57 at level 3: paragraph: .
|
||||
item-58 at level 2: paragraph:
|
||||
item-59 at level 1: paragraph:
|
||||
item-60 at level 1: paragraph:
|
||||
item-61 at level 1: paragraph:
|
||||
item-62 at level 1: section: group textbox
|
||||
item-63 at level 2: paragraph: Whether the epidemic has eased.
|
||||
item-64 at level 2: paragraph:
|
||||
item-65 at level 1: paragraph:
|
||||
item-66 at level 1: section: group textbox
|
||||
item-67 at level 2: paragraph: Whether the epidemic has eased.
|
||||
item-68 at level 2: paragraph:
|
||||
item-69 at level 2: paragraph:
|
||||
item-67 at level 2: paragraph: Whether the test results are pos ... legally designated infectious disease.
|
||||
item-68 at level 2: paragraph: No
|
||||
item-69 at level 1: paragraph:
|
||||
item-70 at level 1: paragraph:
|
||||
item-71 at level 1: section: group textbox
|
||||
item-72 at level 2: paragraph: Whether the test results are pos ... legally designated infectious disease.
|
||||
item-73 at level 2: paragraph: No
|
||||
item-74 at level 1: paragraph:
|
||||
item-75 at level 1: paragraph:
|
||||
item-76 at level 1: section: group textbox
|
||||
item-72 at level 2: paragraph: Yes
|
||||
item-73 at level 1: paragraph:
|
||||
item-74 at level 1: section: group textbox
|
||||
item-75 at level 2: paragraph: Yes
|
||||
item-76 at level 1: paragraph:
|
||||
item-77 at level 1: paragraph:
|
||||
item-78 at level 1: section: group textbox
|
||||
item-79 at level 1: paragraph:
|
||||
item-80 at level 1: paragraph:
|
||||
item-81 at level 1: section: group textbox
|
||||
item-82 at level 2: paragraph: Case closed.
|
||||
item-83 at level 2: paragraph:
|
||||
item-84 at level 2: paragraph:
|
||||
item-85 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary.
|
||||
item-79 at level 2: paragraph: Case closed.
|
||||
item-80 at level 2: paragraph:
|
||||
item-81 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary.
|
||||
item-82 at level 1: paragraph:
|
||||
item-83 at level 1: section: group textbox
|
||||
item-84 at level 2: paragraph: No
|
||||
item-85 at level 1: paragraph:
|
||||
item-86 at level 1: paragraph:
|
||||
item-87 at level 1: section: group textbox
|
||||
item-88 at level 1: paragraph:
|
||||
item-89 at level 1: paragraph:
|
||||
item-90 at level 1: paragraph:
|
||||
item-87 at level 1: paragraph:
|
800
tests/data/groundtruth/docling_v2/textbox.docx.json
vendored
800
tests/data/groundtruth/docling_v2/textbox.docx.json
vendored
File diff suppressed because it is too large
Load Diff
@ -40,6 +40,12 @@ The Health Bureau will handle **reporting and specimen collection** .
|
||||
|
||||
No
|
||||
|
||||
Yes
|
||||
|
||||
Yes
|
||||
|
||||
**Case closed.**
|
||||
|
||||
The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.
|
||||
The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary.
|
||||
|
||||
No
|
@ -1,2 +1,2 @@
|
||||
<doctag><text><loc_60><loc_46><loc_424><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
|
||||
<doctag><text><loc_59><loc_46><loc_424><loc_90>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
|
||||
</doctag>
|
@ -42,10 +42,10 @@
|
||||
{
|
||||
"page_no": 1,
|
||||
"bbox": {
|
||||
"l": 238.19302423176944,
|
||||
"l": 234.08627147881114,
|
||||
"t": 2570.0959833241664,
|
||||
"r": 1696.0985546594009,
|
||||
"b": 2315.204273887442,
|
||||
"r": 1696.0985042090742,
|
||||
"b": 2319.1220927976665,
|
||||
"coord_origin": "BOTTOMLEFT"
|
||||
},
|
||||
"charspan": [
|
||||
|
@ -40,14 +40,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 238.19302423176944,
|
||||
"r_y0": 415.36904822716525,
|
||||
"r_x1": 1696.0985546594009,
|
||||
"r_y1": 415.36904822716525,
|
||||
"r_x2": 1696.0985546594009,
|
||||
"r_y2": 345.20535775097477,
|
||||
"r_x3": 238.19302423176944,
|
||||
"r_y3": 345.20535775097477,
|
||||
"r_x0": 234.08627147881114,
|
||||
"r_y0": 419.5788697734327,
|
||||
"r_x1": 1696.0985042090742,
|
||||
"r_y1": 419.5788697734327,
|
||||
"r_x2": 1696.0985042090742,
|
||||
"r_y2": 349.4151792972422,
|
||||
"r_x3": 234.08627147881114,
|
||||
"r_y3": 349.4151792972422,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@ -65,14 +65,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 245.43122061153045,
|
||||
"r_y0": 513.795726112558,
|
||||
"r_x1": 514.3223724413002,
|
||||
"r_y1": 513.795726112558,
|
||||
"r_x2": 514.3223724413002,
|
||||
"r_y2": 436.0574704074058,
|
||||
"r_x3": 245.43122061153045,
|
||||
"r_y3": 436.0574704074058,
|
||||
"r_x0": 242.29979922858777,
|
||||
"r_y0": 509.8779072023336,
|
||||
"r_x1": 513.3470125989277,
|
||||
"r_y1": 509.8779072023336,
|
||||
"r_x2": 513.3470125989277,
|
||||
"r_y2": 439.9752910477536,
|
||||
"r_x3": 242.29979922858777,
|
||||
"r_y3": 439.9752910477536,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@ -90,13 +90,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 238.19302423176944,
|
||||
"l": 234.08627147881114,
|
||||
"t": 258.9040166758338,
|
||||
"r": 1696.0985546594009,
|
||||
"b": 513.795726112558,
|
||||
"r": 1696.0985042090742,
|
||||
"b": 509.8779072023336,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9721010327339172,
|
||||
"confidence": 0.9721011519432068,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@ -132,14 +132,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 238.19302423176944,
|
||||
"r_y0": 415.36904822716525,
|
||||
"r_x1": 1696.0985546594009,
|
||||
"r_y1": 415.36904822716525,
|
||||
"r_x2": 1696.0985546594009,
|
||||
"r_y2": 345.20535775097477,
|
||||
"r_x3": 238.19302423176944,
|
||||
"r_y3": 345.20535775097477,
|
||||
"r_x0": 234.08627147881114,
|
||||
"r_y0": 419.5788697734327,
|
||||
"r_x1": 1696.0985042090742,
|
||||
"r_y1": 419.5788697734327,
|
||||
"r_x2": 1696.0985042090742,
|
||||
"r_y2": 349.4151792972422,
|
||||
"r_x3": 234.08627147881114,
|
||||
"r_y3": 349.4151792972422,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@ -157,14 +157,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 245.43122061153045,
|
||||
"r_y0": 513.795726112558,
|
||||
"r_x1": 514.3223724413002,
|
||||
"r_y1": 513.795726112558,
|
||||
"r_x2": 514.3223724413002,
|
||||
"r_y2": 436.0574704074058,
|
||||
"r_x3": 245.43122061153045,
|
||||
"r_y3": 436.0574704074058,
|
||||
"r_x0": 242.29979922858777,
|
||||
"r_y0": 509.8779072023336,
|
||||
"r_x1": 513.3470125989277,
|
||||
"r_y1": 509.8779072023336,
|
||||
"r_x2": 513.3470125989277,
|
||||
"r_y2": 439.9752910477536,
|
||||
"r_x3": 242.29979922858777,
|
||||
"r_y3": 439.9752910477536,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@ -195,13 +195,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 238.19302423176944,
|
||||
"l": 234.08627147881114,
|
||||
"t": 258.9040166758338,
|
||||
"r": 1696.0985546594009,
|
||||
"b": 513.795726112558,
|
||||
"r": 1696.0985042090742,
|
||||
"b": 509.8779072023336,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9721010327339172,
|
||||
"confidence": 0.9721011519432068,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@ -237,14 +237,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 238.19302423176944,
|
||||
"r_y0": 415.36904822716525,
|
||||
"r_x1": 1696.0985546594009,
|
||||
"r_y1": 415.36904822716525,
|
||||
"r_x2": 1696.0985546594009,
|
||||
"r_y2": 345.20535775097477,
|
||||
"r_x3": 238.19302423176944,
|
||||
"r_y3": 345.20535775097477,
|
||||
"r_x0": 234.08627147881114,
|
||||
"r_y0": 419.5788697734327,
|
||||
"r_x1": 1696.0985042090742,
|
||||
"r_y1": 419.5788697734327,
|
||||
"r_x2": 1696.0985042090742,
|
||||
"r_y2": 349.4151792972422,
|
||||
"r_x3": 234.08627147881114,
|
||||
"r_y3": 349.4151792972422,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@ -262,14 +262,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 245.43122061153045,
|
||||
"r_y0": 513.795726112558,
|
||||
"r_x1": 514.3223724413002,
|
||||
"r_y1": 513.795726112558,
|
||||
"r_x2": 514.3223724413002,
|
||||
"r_y2": 436.0574704074058,
|
||||
"r_x3": 245.43122061153045,
|
||||
"r_y3": 436.0574704074058,
|
||||
"r_x0": 242.29979922858777,
|
||||
"r_y0": 509.8779072023336,
|
||||
"r_x1": 513.3470125989277,
|
||||
"r_y1": 509.8779072023336,
|
||||
"r_x2": 513.3470125989277,
|
||||
"r_y2": 439.9752910477536,
|
||||
"r_x3": 242.29979922858777,
|
||||
"r_y3": 439.9752910477536,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
@ -293,13 +293,13 @@
|
||||
"id": 0,
|
||||
"label": "text",
|
||||
"bbox": {
|
||||
"l": 238.19302423176944,
|
||||
"l": 234.08627147881114,
|
||||
"t": 258.9040166758338,
|
||||
"r": 1696.0985546594009,
|
||||
"b": 513.795726112558,
|
||||
"r": 1696.0985042090742,
|
||||
"b": 509.8779072023336,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"confidence": 0.9721010327339172,
|
||||
"confidence": 0.9721011519432068,
|
||||
"cells": [
|
||||
{
|
||||
"index": 0,
|
||||
@ -335,14 +335,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 238.19302423176944,
|
||||
"r_y0": 415.36904822716525,
|
||||
"r_x1": 1696.0985546594009,
|
||||
"r_y1": 415.36904822716525,
|
||||
"r_x2": 1696.0985546594009,
|
||||
"r_y2": 345.20535775097477,
|
||||
"r_x3": 238.19302423176944,
|
||||
"r_y3": 345.20535775097477,
|
||||
"r_x0": 234.08627147881114,
|
||||
"r_y0": 419.5788697734327,
|
||||
"r_x1": 1696.0985042090742,
|
||||
"r_y1": 419.5788697734327,
|
||||
"r_x2": 1696.0985042090742,
|
||||
"r_y2": 349.4151792972422,
|
||||
"r_x3": 234.08627147881114,
|
||||
"r_y3": 349.4151792972422,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "JSON and Markdown in an easy self contained",
|
||||
@ -360,14 +360,14 @@
|
||||
"a": 255
|
||||
},
|
||||
"rect": {
|
||||
"r_x0": 245.43122061153045,
|
||||
"r_y0": 513.795726112558,
|
||||
"r_x1": 514.3223724413002,
|
||||
"r_y1": 513.795726112558,
|
||||
"r_x2": 514.3223724413002,
|
||||
"r_y2": 436.0574704074058,
|
||||
"r_x3": 245.43122061153045,
|
||||
"r_y3": 436.0574704074058,
|
||||
"r_x0": 242.29979922858777,
|
||||
"r_y0": 509.8779072023336,
|
||||
"r_x1": 513.3470125989277,
|
||||
"r_y1": 509.8779072023336,
|
||||
"r_x2": 513.3470125989277,
|
||||
"r_y2": 439.9752910477536,
|
||||
"r_x3": 242.29979922858777,
|
||||
"r_y3": 439.9752910477536,
|
||||
"coord_origin": "TOPLEFT"
|
||||
},
|
||||
"text": "package",
|
||||
|
BIN
tests/data/xlsx/sample_sales_data.xlsm
vendored
Normal file
BIN
tests/data/xlsx/sample_sales_data.xlsm
vendored
Normal file
Binary file not shown.
@ -16,13 +16,13 @@ _log = logging.getLogger(__name__)
|
||||
GENERATE = GEN_TEST_DATA
|
||||
|
||||
|
||||
def get_xlsx_paths():
|
||||
def get_excel_paths():
|
||||
# Define the directory you want to search
|
||||
directory = Path("./tests/data/xlsx/")
|
||||
|
||||
# List all PDF files in the directory and its subdirectories
|
||||
pdf_files = sorted(directory.rglob("*.xlsx"))
|
||||
return pdf_files
|
||||
# List all Excel files in the directory and its subdirectories
|
||||
excel_files = sorted(directory.rglob("*.xlsx")) + sorted(directory.rglob("*.xlsm"))
|
||||
return excel_files
|
||||
|
||||
|
||||
def get_converter():
|
||||
@ -35,17 +35,17 @@ def get_converter():
|
||||
def documents() -> list[tuple[Path, DoclingDocument]]:
|
||||
documents: list[dict[Path, DoclingDocument]] = []
|
||||
|
||||
xlsx_paths = get_xlsx_paths()
|
||||
excel_paths = get_excel_paths()
|
||||
converter = get_converter()
|
||||
|
||||
for xlsx_path in xlsx_paths:
|
||||
_log.debug(f"converting {xlsx_path}")
|
||||
for excel_path in excel_paths:
|
||||
_log.debug(f"converting {excel_path}")
|
||||
|
||||
gt_path = (
|
||||
xlsx_path.parent.parent / "groundtruth" / "docling_v2" / xlsx_path.name
|
||||
excel_path.parent.parent / "groundtruth" / "docling_v2" / excel_path.name
|
||||
)
|
||||
|
||||
conv_result: ConversionResult = converter.convert(xlsx_path)
|
||||
conv_result: ConversionResult = converter.convert(excel_path)
|
||||
|
||||
doc: DoclingDocument = conv_result.document
|
||||
|
||||
@ -55,7 +55,7 @@ def documents() -> list[tuple[Path, DoclingDocument]]:
|
||||
return documents
|
||||
|
||||
|
||||
def test_e2e_xlsx_conversions(documents) -> None:
|
||||
def test_e2e_excel_conversions(documents) -> None:
|
||||
for gt_path, doc in documents:
|
||||
pred_md: str = doc.export_to_markdown()
|
||||
assert verify_export(pred_md, str(gt_path) + ".md"), "export to md"
|
||||
@ -79,7 +79,7 @@ def test_pages(documents) -> None:
|
||||
documents: The paths and converted documents.
|
||||
"""
|
||||
# number of pages from the backend method
|
||||
path = next(item for item in get_xlsx_paths() if item.stem == "test-01")
|
||||
path = next(item for item in get_excel_paths() if item.stem == "test-01")
|
||||
in_doc = InputDocument(
|
||||
path_or_stream=path,
|
||||
format=InputFormat.XLSX,
|
||||
|
Loading…
Reference in New Issue
Block a user