Docling/tests/data/webp/groundtruth/docling_v2/webp-test.json
Panos Vagenas 7c5614a37a
fix(markdown): fix single-formatted headings & list items (#1820)
* fix(markdown): fix formatting & inline edge cases (show behavior before change)

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* add change and updated test data

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* update lock

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* improve test case

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

---------

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
2025-06-25 13:05:06 +02:00

77 lines
1.6 KiB
JSON
Vendored

{
"schema_name": "DoclingDocument",
"version": "1.4.0",
"name": "webp-test",
"origin": {
"mimetype": "application/pdf",
"binary_hash": 16115062463007057787,
"filename": "webp-test.webp",
"uri": null
},
"furniture": {
"self_ref": "#/furniture",
"parent": null,
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"parent": null,
"children": [
{
"cref": "#/texts/0"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"cref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "text",
"prov": [
{
"page_no": 1,
"bbox": {
"l": 234.08627147881114,
"t": 2570.0959833241664,
"r": 1696.0985042090742,
"b": 2319.1220927976665,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
0,
94
]
}
],
"orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package",
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package",
"formatting": null,
"hyperlink": null
}
],
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {
"1": {
"size": {
"width": 2000.0,
"height": 2829.0
},
"image": null,
"page_no": 1
}
}
}