Docling/tests/data/groundtruth/docling_v2/equations.docx.json
Panos Vagenas 7c5614a37a
fix(markdown): fix single-formatted headings & list items (#1820)
* fix(markdown): fix formatting & inline edge cases (show behavior before change)

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* add change and updated test data

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* update lock

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* improve test case

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

---------

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
2025-06-25 13:05:06 +02:00

665 lines
16 KiB
JSON
Vendored

{
"schema_name": "DoclingDocument",
"version": "1.4.0",
"name": "equations",
"origin": {
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"binary_hash": 11121138535595486899,
"filename": "equations.docx"
},
"furniture": {
"self_ref": "#/furniture",
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"children": [
{
"$ref": "#/groups/0"
},
{
"$ref": "#/texts/3"
},
{
"$ref": "#/texts/4"
},
{
"$ref": "#/texts/5"
},
{
"$ref": "#/texts/6"
},
{
"$ref": "#/texts/7"
},
{
"$ref": "#/texts/8"
},
{
"$ref": "#/texts/9"
},
{
"$ref": "#/texts/10"
},
{
"$ref": "#/texts/11"
},
{
"$ref": "#/texts/12"
},
{
"$ref": "#/groups/1"
},
{
"$ref": "#/texts/16"
},
{
"$ref": "#/texts/17"
},
{
"$ref": "#/texts/18"
},
{
"$ref": "#/texts/19"
},
{
"$ref": "#/texts/20"
},
{
"$ref": "#/texts/21"
},
{
"$ref": "#/texts/22"
},
{
"$ref": "#/texts/23"
},
{
"$ref": "#/texts/24"
},
{
"$ref": "#/texts/25"
},
{
"$ref": "#/texts/26"
},
{
"$ref": "#/texts/27"
},
{
"$ref": "#/groups/2"
},
{
"$ref": "#/texts/31"
},
{
"$ref": "#/texts/32"
},
{
"$ref": "#/texts/33"
},
{
"$ref": "#/texts/34"
},
{
"$ref": "#/texts/35"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [
{
"self_ref": "#/groups/0",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/0"
},
{
"$ref": "#/texts/1"
},
{
"$ref": "#/texts/2"
}
],
"content_layer": "body",
"name": "group",
"label": "inline"
},
{
"self_ref": "#/groups/1",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/13"
},
{
"$ref": "#/texts/14"
},
{
"$ref": "#/texts/15"
}
],
"content_layer": "body",
"name": "group",
"label": "inline"
},
{
"self_ref": "#/groups/2",
"parent": {
"$ref": "#/body"
},
"children": [
{
"$ref": "#/texts/28"
},
{
"$ref": "#/texts/29"
},
{
"$ref": "#/texts/30"
}
],
"content_layer": "body",
"name": "group",
"label": "inline"
}
],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"$ref": "#/groups/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
},
{
"self_ref": "#/texts/1",
"parent": {
"$ref": "#/groups/0"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "A= \\pi r^{2}",
"text": "A= \\pi r^{2}"
},
{
"self_ref": "#/texts/2",
"parent": {
"$ref": "#/groups/0"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
},
{
"self_ref": "#/texts/3",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/4",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23",
"text": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23"
},
{
"self_ref": "#/texts/5",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/6",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/7",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is another equation:",
"text": "This is another equation:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/8",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)",
"text": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)"
},
{
"self_ref": "#/texts/9",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/10",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/11",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/12",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/13",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
},
{
"self_ref": "#/texts/14",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "A= \\pi r^{2}",
"text": "A= \\pi r^{2}"
},
{
"self_ref": "#/texts/15",
"parent": {
"$ref": "#/groups/1"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
},
{
"self_ref": "#/texts/16",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/17",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}",
"text": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}"
},
{
"self_ref": "#/texts/18",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/19",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/20",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/21",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is another equation:",
"text": "This is another equation:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/22",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/23",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis }",
"text": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis }"
},
{
"self_ref": "#/texts/24",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/25",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/26",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/27",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/28",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "This is a word document and this is an inline equation: ",
"text": "This is a word document and this is an inline equation: "
},
{
"self_ref": "#/texts/29",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "A= \\pi r^{2}",
"text": "A= \\pi r^{2}"
},
{
"self_ref": "#/texts/30",
"parent": {
"$ref": "#/groups/2"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": ". If instead, I want an equation by line, I can do this:",
"text": ". If instead, I want an equation by line, I can do this:"
},
{
"self_ref": "#/texts/31",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/32",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "formula",
"prov": [],
"orig": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty",
"text": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty"
},
{
"self_ref": "#/texts/33",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
},
{
"self_ref": "#/texts/34",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false,
"script": "baseline"
}
},
{
"self_ref": "#/texts/35",
"parent": {
"$ref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "",
"text": ""
}
],
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {}
}