feat: leverage new list modeling, capture default markers (#1856)

* chore: update docling-core & regenerate test data

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* update backends to leverage new list modeling

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* repin docling-core

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* ensure availability of latest docling-core API

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

---------

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
Panos Vagenas
2025-06-27 16:37:15 +02:00
committed by GitHub
parent e79e4f0ab6
commit 0533da1923
90 changed files with 2252 additions and 2240 deletions

View File

@@ -1,6 +1,6 @@
{
"schema_name": "DoclingDocument",
"version": "1.4.0",
"version": "1.5.0",
"name": "2206.01062",
"origin": {
"mimetype": "application/pdf",
@@ -10866,7 +10866,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/356",
@@ -10897,7 +10897,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/357",
@@ -10928,7 +10928,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/358",
@@ -10959,7 +10959,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/359",
@@ -11048,7 +11048,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/362",
@@ -12430,7 +12430,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/409",
@@ -12461,7 +12461,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/410",
@@ -12492,7 +12492,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/411",
@@ -12523,7 +12523,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/412",
@@ -12554,7 +12554,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/413",
@@ -12585,7 +12585,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/414",
@@ -14713,7 +14713,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/487",
@@ -14744,7 +14744,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/488",
@@ -14775,7 +14775,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/489",
@@ -14806,7 +14806,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/490",
@@ -14837,7 +14837,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/491",
@@ -14868,7 +14868,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/492",
@@ -14899,7 +14899,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/493",
@@ -14930,7 +14930,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/494",
@@ -14961,7 +14961,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/495",
@@ -14992,7 +14992,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/496",
@@ -15023,7 +15023,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/497",
@@ -15054,7 +15054,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/498",
@@ -15085,7 +15085,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/499",
@@ -15580,7 +15580,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/516",
@@ -15611,7 +15611,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/517",
@@ -15642,7 +15642,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/518",
@@ -15673,7 +15673,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/519",
@@ -15704,7 +15704,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/520",
@@ -15735,7 +15735,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/521",
@@ -15766,7 +15766,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/522",
@@ -15797,7 +15797,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/523",
@@ -15828,7 +15828,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
},
{
"self_ref": "#/texts/524",
@@ -15859,7 +15859,7 @@
"formatting": null,
"hyperlink": null,
"enumerated": false,
"marker": "-"
"marker": ""
}
],
"pictures": [