
* fix(markdown): fix formatting & inline edge cases (show behavior before change) Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * add change and updated test data Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * update lock Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * improve test case Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> --------- Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
665 lines
16 KiB
JSON
Vendored
665 lines
16 KiB
JSON
Vendored
{
|
|
"schema_name": "DoclingDocument",
|
|
"version": "1.4.0",
|
|
"name": "equations",
|
|
"origin": {
|
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
"binary_hash": 11121138535595486899,
|
|
"filename": "equations.docx"
|
|
},
|
|
"furniture": {
|
|
"self_ref": "#/furniture",
|
|
"children": [],
|
|
"content_layer": "furniture",
|
|
"name": "_root_",
|
|
"label": "unspecified"
|
|
},
|
|
"body": {
|
|
"self_ref": "#/body",
|
|
"children": [
|
|
{
|
|
"$ref": "#/groups/0"
|
|
},
|
|
{
|
|
"$ref": "#/texts/3"
|
|
},
|
|
{
|
|
"$ref": "#/texts/4"
|
|
},
|
|
{
|
|
"$ref": "#/texts/5"
|
|
},
|
|
{
|
|
"$ref": "#/texts/6"
|
|
},
|
|
{
|
|
"$ref": "#/texts/7"
|
|
},
|
|
{
|
|
"$ref": "#/texts/8"
|
|
},
|
|
{
|
|
"$ref": "#/texts/9"
|
|
},
|
|
{
|
|
"$ref": "#/texts/10"
|
|
},
|
|
{
|
|
"$ref": "#/texts/11"
|
|
},
|
|
{
|
|
"$ref": "#/texts/12"
|
|
},
|
|
{
|
|
"$ref": "#/groups/1"
|
|
},
|
|
{
|
|
"$ref": "#/texts/16"
|
|
},
|
|
{
|
|
"$ref": "#/texts/17"
|
|
},
|
|
{
|
|
"$ref": "#/texts/18"
|
|
},
|
|
{
|
|
"$ref": "#/texts/19"
|
|
},
|
|
{
|
|
"$ref": "#/texts/20"
|
|
},
|
|
{
|
|
"$ref": "#/texts/21"
|
|
},
|
|
{
|
|
"$ref": "#/texts/22"
|
|
},
|
|
{
|
|
"$ref": "#/texts/23"
|
|
},
|
|
{
|
|
"$ref": "#/texts/24"
|
|
},
|
|
{
|
|
"$ref": "#/texts/25"
|
|
},
|
|
{
|
|
"$ref": "#/texts/26"
|
|
},
|
|
{
|
|
"$ref": "#/texts/27"
|
|
},
|
|
{
|
|
"$ref": "#/groups/2"
|
|
},
|
|
{
|
|
"$ref": "#/texts/31"
|
|
},
|
|
{
|
|
"$ref": "#/texts/32"
|
|
},
|
|
{
|
|
"$ref": "#/texts/33"
|
|
},
|
|
{
|
|
"$ref": "#/texts/34"
|
|
},
|
|
{
|
|
"$ref": "#/texts/35"
|
|
}
|
|
],
|
|
"content_layer": "body",
|
|
"name": "_root_",
|
|
"label": "unspecified"
|
|
},
|
|
"groups": [
|
|
{
|
|
"self_ref": "#/groups/0",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [
|
|
{
|
|
"$ref": "#/texts/0"
|
|
},
|
|
{
|
|
"$ref": "#/texts/1"
|
|
},
|
|
{
|
|
"$ref": "#/texts/2"
|
|
}
|
|
],
|
|
"content_layer": "body",
|
|
"name": "group",
|
|
"label": "inline"
|
|
},
|
|
{
|
|
"self_ref": "#/groups/1",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [
|
|
{
|
|
"$ref": "#/texts/13"
|
|
},
|
|
{
|
|
"$ref": "#/texts/14"
|
|
},
|
|
{
|
|
"$ref": "#/texts/15"
|
|
}
|
|
],
|
|
"content_layer": "body",
|
|
"name": "group",
|
|
"label": "inline"
|
|
},
|
|
{
|
|
"self_ref": "#/groups/2",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [
|
|
{
|
|
"$ref": "#/texts/28"
|
|
},
|
|
{
|
|
"$ref": "#/texts/29"
|
|
},
|
|
{
|
|
"$ref": "#/texts/30"
|
|
}
|
|
],
|
|
"content_layer": "body",
|
|
"name": "group",
|
|
"label": "inline"
|
|
}
|
|
],
|
|
"texts": [
|
|
{
|
|
"self_ref": "#/texts/0",
|
|
"parent": {
|
|
"$ref": "#/groups/0"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "This is a word document and this is an inline equation: ",
|
|
"text": "This is a word document and this is an inline equation: "
|
|
},
|
|
{
|
|
"self_ref": "#/texts/1",
|
|
"parent": {
|
|
"$ref": "#/groups/0"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "formula",
|
|
"prov": [],
|
|
"orig": "A= \\pi r^{2}",
|
|
"text": "A= \\pi r^{2}"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/2",
|
|
"parent": {
|
|
"$ref": "#/groups/0"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": ". If instead, I want an equation by line, I can do this:",
|
|
"text": ". If instead, I want an equation by line, I can do this:"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/3",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/4",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "formula",
|
|
"prov": [],
|
|
"orig": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23",
|
|
"text": "a^{2}+b^{2}=c^{2} \\text{ \\texttimes } 23"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/5",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "And that is an equation by itself. Cheers!",
|
|
"text": "And that is an equation by itself. Cheers!",
|
|
"formatting": {
|
|
"bold": false,
|
|
"italic": false,
|
|
"underline": false,
|
|
"strikethrough": false,
|
|
"script": "baseline"
|
|
}
|
|
},
|
|
{
|
|
"self_ref": "#/texts/6",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/7",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "This is another equation:",
|
|
"text": "This is another equation:",
|
|
"formatting": {
|
|
"bold": false,
|
|
"italic": false,
|
|
"underline": false,
|
|
"strikethrough": false,
|
|
"script": "baseline"
|
|
}
|
|
},
|
|
{
|
|
"self_ref": "#/texts/8",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "formula",
|
|
"prov": [],
|
|
"orig": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)",
|
|
"text": "f\\left(x\\right)=a_{0}+\\sum_{n=1}^{ \\infty }\\left(a_{n}\\cos(\\frac{n \\pi x}{L})+b_{n}\\sin(\\frac{n \\pi x}{L})\\right)"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/9",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/10",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
|
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
|
"formatting": {
|
|
"bold": false,
|
|
"italic": false,
|
|
"underline": false,
|
|
"strikethrough": false,
|
|
"script": "baseline"
|
|
}
|
|
},
|
|
{
|
|
"self_ref": "#/texts/11",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/12",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/13",
|
|
"parent": {
|
|
"$ref": "#/groups/1"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "This is a word document and this is an inline equation: ",
|
|
"text": "This is a word document and this is an inline equation: "
|
|
},
|
|
{
|
|
"self_ref": "#/texts/14",
|
|
"parent": {
|
|
"$ref": "#/groups/1"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "formula",
|
|
"prov": [],
|
|
"orig": "A= \\pi r^{2}",
|
|
"text": "A= \\pi r^{2}"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/15",
|
|
"parent": {
|
|
"$ref": "#/groups/1"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": ". If instead, I want an equation by line, I can do this:",
|
|
"text": ". If instead, I want an equation by line, I can do this:"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/16",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/17",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "formula",
|
|
"prov": [],
|
|
"orig": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}",
|
|
"text": "\\left(x+a\\right)^{n}=\\sum_{k=0}^{n}\\left(\\genfrac{}{}{0pt}{}{n}{k}\\right)x^{k}a^{n-k}"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/18",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/19",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "And that is an equation by itself. Cheers!",
|
|
"text": "And that is an equation by itself. Cheers!",
|
|
"formatting": {
|
|
"bold": false,
|
|
"italic": false,
|
|
"underline": false,
|
|
"strikethrough": false,
|
|
"script": "baseline"
|
|
}
|
|
},
|
|
{
|
|
"self_ref": "#/texts/20",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/21",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "This is another equation:",
|
|
"text": "This is another equation:",
|
|
"formatting": {
|
|
"bold": false,
|
|
"italic": false,
|
|
"underline": false,
|
|
"strikethrough": false,
|
|
"script": "baseline"
|
|
}
|
|
},
|
|
{
|
|
"self_ref": "#/texts/22",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/23",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "formula",
|
|
"prov": [],
|
|
"orig": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis }",
|
|
"text": "\\left(1+x\\right)^{n}=1+\\frac{nx}{1!}+\\frac{n\\left(n-1\\right)x^{2}}{2!}+ \\text{ \\textellipsis }"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/24",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/25",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
|
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
|
|
"formatting": {
|
|
"bold": false,
|
|
"italic": false,
|
|
"underline": false,
|
|
"strikethrough": false,
|
|
"script": "baseline"
|
|
}
|
|
},
|
|
{
|
|
"self_ref": "#/texts/26",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/27",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/28",
|
|
"parent": {
|
|
"$ref": "#/groups/2"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "This is a word document and this is an inline equation: ",
|
|
"text": "This is a word document and this is an inline equation: "
|
|
},
|
|
{
|
|
"self_ref": "#/texts/29",
|
|
"parent": {
|
|
"$ref": "#/groups/2"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "formula",
|
|
"prov": [],
|
|
"orig": "A= \\pi r^{2}",
|
|
"text": "A= \\pi r^{2}"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/30",
|
|
"parent": {
|
|
"$ref": "#/groups/2"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": ". If instead, I want an equation by line, I can do this:",
|
|
"text": ". If instead, I want an equation by line, I can do this:"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/31",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/32",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "formula",
|
|
"prov": [],
|
|
"orig": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty",
|
|
"text": "e^{x}=1+\\frac{x}{1!}+\\frac{x^{2}}{2!}+\\frac{x^{3}}{3!}+ \\text{ \\textellipsis } , - \\infty < x < \\infty"
|
|
},
|
|
{
|
|
"self_ref": "#/texts/33",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
},
|
|
{
|
|
"self_ref": "#/texts/34",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "And that is an equation by itself. Cheers!",
|
|
"text": "And that is an equation by itself. Cheers!",
|
|
"formatting": {
|
|
"bold": false,
|
|
"italic": false,
|
|
"underline": false,
|
|
"strikethrough": false,
|
|
"script": "baseline"
|
|
}
|
|
},
|
|
{
|
|
"self_ref": "#/texts/35",
|
|
"parent": {
|
|
"$ref": "#/body"
|
|
},
|
|
"children": [],
|
|
"content_layer": "body",
|
|
"label": "paragraph",
|
|
"prov": [],
|
|
"orig": "",
|
|
"text": ""
|
|
}
|
|
],
|
|
"pictures": [],
|
|
"tables": [],
|
|
"key_value_items": [],
|
|
"form_items": [],
|
|
"pages": {}
|
|
} |