diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 0269e13..2cd2515 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -70,7 +70,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = { InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp", "webp"], InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"], InputFormat.CSV: ["csv"], - InputFormat.XLSX: ["xlsx"], + InputFormat.XLSX: ["xlsx", "xlsm"], InputFormat.XML_USPTO: ["xml", "txt"], InputFormat.JSON_DOCLING: ["json"], } diff --git a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json index e938e2d..dd51e39 100644 --- a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json +++ b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json @@ -213,10 +213,10 @@ "prov": [ { "bbox": [ - 139.6674041748047, + 139.66741943359375, 322.5054626464844, 475.00927734375, - 454.4546203613281 + 454.45458984375 ], "page": 1, "span": [ diff --git a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json index 3bca0d5..5db555b 100644 --- a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json +++ b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json @@ -2646,7 +2646,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373533129692078, + "confidence": 0.9373534917831421, "cells": [ { "index": 0, @@ -2686,7 +2686,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858679533004761, + "confidence": 0.8858680725097656, "cells": [ { "index": 1, @@ -2726,7 +2726,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806435108184814, + "confidence": 0.9806433916091919, "cells": [ { "index": 2, @@ -2881,7 +2881,7 @@ "b": 255.42400999999995, "coord_origin": "TOPLEFT" }, - "confidence": 0.9850425124168396, + "confidence": 0.98504239320755, "cells": [ { "index": 7, @@ -3096,7 +3096,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591907262802124, + "confidence": 0.9591909050941467, "cells": [ { "index": 15, @@ -3280,8 +3280,8 @@ "id": 0, "label": "table", "bbox": { - "l": 139.6674041748047, - "t": 337.5453796386719, + "l": 139.66741943359375, + "t": 337.54541015625, "r": 475.00927734375, "b": 469.4945373535156, "coord_origin": "TOPLEFT" @@ -7787,7 +7787,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589295387268066, + "confidence": 0.9589294195175171, "cells": [ { "index": 91, @@ -7852,7 +7852,7 @@ "b": 618.3, "coord_origin": "TOPLEFT" }, - "confidence": 0.9849976301193237, + "confidence": 0.9849975109100342, "cells": [ { "index": 93, @@ -8184,8 +8184,8 @@ "id": 0, "label": "table", "bbox": { - "l": 139.6674041748047, - "t": 337.5453796386719, + "l": 139.66741943359375, + "t": 337.54541015625, "r": 475.00927734375, "b": 469.4945373535156, "coord_origin": "TOPLEFT" @@ -13582,7 +13582,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373533129692078, + "confidence": 0.9373534917831421, "cells": [ { "index": 0, @@ -13628,7 +13628,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858679533004761, + "confidence": 0.8858680725097656, "cells": [ { "index": 1, @@ -13674,7 +13674,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806435108184814, + "confidence": 0.9806433916091919, "cells": [ { "index": 2, @@ -13841,7 +13841,7 @@ "b": 255.42400999999995, "coord_origin": "TOPLEFT" }, - "confidence": 0.9850425124168396, + "confidence": 0.98504239320755, "cells": [ { "index": 7, @@ -14062,7 +14062,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591907262802124, + "confidence": 0.9591909050941467, "cells": [ { "index": 15, @@ -14252,8 +14252,8 @@ "id": 0, "label": "table", "bbox": { - "l": 139.6674041748047, - "t": 337.5453796386719, + "l": 139.66741943359375, + "t": 337.54541015625, "r": 475.00927734375, "b": 469.4945373535156, "coord_origin": "TOPLEFT" @@ -19642,7 +19642,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589295387268066, + "confidence": 0.9589294195175171, "cells": [ { "index": 91, @@ -19713,7 +19713,7 @@ "b": 618.3, "coord_origin": "TOPLEFT" }, - "confidence": 0.9849976301193237, + "confidence": 0.9849975109100342, "cells": [ { "index": 93, @@ -20057,7 +20057,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806435108184814, + "confidence": 0.9806433916091919, "cells": [ { "index": 2, @@ -20224,7 +20224,7 @@ "b": 255.42400999999995, "coord_origin": "TOPLEFT" }, - "confidence": 0.9850425124168396, + "confidence": 0.98504239320755, "cells": [ { "index": 7, @@ -20445,7 +20445,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591907262802124, + "confidence": 0.9591909050941467, "cells": [ { "index": 15, @@ -20635,8 +20635,8 @@ "id": 0, "label": "table", "bbox": { - "l": 139.6674041748047, - "t": 337.5453796386719, + "l": 139.66741943359375, + "t": 337.54541015625, "r": 475.00927734375, "b": 469.4945373535156, "coord_origin": "TOPLEFT" @@ -26025,7 +26025,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589295387268066, + "confidence": 0.9589294195175171, "cells": [ { "index": 91, @@ -26096,7 +26096,7 @@ "b": 618.3, "coord_origin": "TOPLEFT" }, - "confidence": 0.9849976301193237, + "confidence": 0.9849975109100342, "cells": [ { "index": 93, @@ -26440,7 +26440,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373533129692078, + "confidence": 0.9373534917831421, "cells": [ { "index": 0, @@ -26486,7 +26486,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858679533004761, + "confidence": 0.8858680725097656, "cells": [ { "index": 1, diff --git a/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json b/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json index c057009..f281a44 100644 --- a/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json +++ b/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json @@ -336,8 +336,8 @@ { "page_no": 1, "bbox": { - "l": 139.6674041748047, - "t": 454.4546203613281, + "l": 139.66741943359375, + "t": 454.45458984375, "r": 475.00927734375, "b": 322.5054626464844, "coord_origin": "BOTTOMLEFT" diff --git a/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.pages.json b/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.pages.json index 3bca0d5..5db555b 100644 --- a/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.pages.json +++ b/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.pages.json @@ -2646,7 +2646,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373533129692078, + "confidence": 0.9373534917831421, "cells": [ { "index": 0, @@ -2686,7 +2686,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858679533004761, + "confidence": 0.8858680725097656, "cells": [ { "index": 1, @@ -2726,7 +2726,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806435108184814, + "confidence": 0.9806433916091919, "cells": [ { "index": 2, @@ -2881,7 +2881,7 @@ "b": 255.42400999999995, "coord_origin": "TOPLEFT" }, - "confidence": 0.9850425124168396, + "confidence": 0.98504239320755, "cells": [ { "index": 7, @@ -3096,7 +3096,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591907262802124, + "confidence": 0.9591909050941467, "cells": [ { "index": 15, @@ -3280,8 +3280,8 @@ "id": 0, "label": "table", "bbox": { - "l": 139.6674041748047, - "t": 337.5453796386719, + "l": 139.66741943359375, + "t": 337.54541015625, "r": 475.00927734375, "b": 469.4945373535156, "coord_origin": "TOPLEFT" @@ -7787,7 +7787,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589295387268066, + "confidence": 0.9589294195175171, "cells": [ { "index": 91, @@ -7852,7 +7852,7 @@ "b": 618.3, "coord_origin": "TOPLEFT" }, - "confidence": 0.9849976301193237, + "confidence": 0.9849975109100342, "cells": [ { "index": 93, @@ -8184,8 +8184,8 @@ "id": 0, "label": "table", "bbox": { - "l": 139.6674041748047, - "t": 337.5453796386719, + "l": 139.66741943359375, + "t": 337.54541015625, "r": 475.00927734375, "b": 469.4945373535156, "coord_origin": "TOPLEFT" @@ -13582,7 +13582,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373533129692078, + "confidence": 0.9373534917831421, "cells": [ { "index": 0, @@ -13628,7 +13628,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858679533004761, + "confidence": 0.8858680725097656, "cells": [ { "index": 1, @@ -13674,7 +13674,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806435108184814, + "confidence": 0.9806433916091919, "cells": [ { "index": 2, @@ -13841,7 +13841,7 @@ "b": 255.42400999999995, "coord_origin": "TOPLEFT" }, - "confidence": 0.9850425124168396, + "confidence": 0.98504239320755, "cells": [ { "index": 7, @@ -14062,7 +14062,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591907262802124, + "confidence": 0.9591909050941467, "cells": [ { "index": 15, @@ -14252,8 +14252,8 @@ "id": 0, "label": "table", "bbox": { - "l": 139.6674041748047, - "t": 337.5453796386719, + "l": 139.66741943359375, + "t": 337.54541015625, "r": 475.00927734375, "b": 469.4945373535156, "coord_origin": "TOPLEFT" @@ -19642,7 +19642,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589295387268066, + "confidence": 0.9589294195175171, "cells": [ { "index": 91, @@ -19713,7 +19713,7 @@ "b": 618.3, "coord_origin": "TOPLEFT" }, - "confidence": 0.9849976301193237, + "confidence": 0.9849975109100342, "cells": [ { "index": 93, @@ -20057,7 +20057,7 @@ "b": 152.90697999999998, "coord_origin": "TOPLEFT" }, - "confidence": 0.9806435108184814, + "confidence": 0.9806433916091919, "cells": [ { "index": 2, @@ -20224,7 +20224,7 @@ "b": 255.42400999999995, "coord_origin": "TOPLEFT" }, - "confidence": 0.9850425124168396, + "confidence": 0.98504239320755, "cells": [ { "index": 7, @@ -20445,7 +20445,7 @@ "b": 327.98218, "coord_origin": "TOPLEFT" }, - "confidence": 0.9591907262802124, + "confidence": 0.9591909050941467, "cells": [ { "index": 15, @@ -20635,8 +20635,8 @@ "id": 0, "label": "table", "bbox": { - "l": 139.6674041748047, - "t": 337.5453796386719, + "l": 139.66741943359375, + "t": 337.54541015625, "r": 475.00927734375, "b": 469.4945373535156, "coord_origin": "TOPLEFT" @@ -26025,7 +26025,7 @@ "b": 518.17419, "coord_origin": "TOPLEFT" }, - "confidence": 0.9589295387268066, + "confidence": 0.9589294195175171, "cells": [ { "index": 91, @@ -26096,7 +26096,7 @@ "b": 618.3, "coord_origin": "TOPLEFT" }, - "confidence": 0.9849976301193237, + "confidence": 0.9849975109100342, "cells": [ { "index": 93, @@ -26440,7 +26440,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.9373533129692078, + "confidence": 0.9373534917831421, "cells": [ { "index": 0, @@ -26486,7 +26486,7 @@ "b": 102.78223000000003, "coord_origin": "TOPLEFT" }, - "confidence": 0.8858679533004761, + "confidence": 0.8858680725097656, "cells": [ { "index": 1, diff --git a/tests/data/groundtruth/docling_v2/example_8.html.itxt b/tests/data/groundtruth/docling_v2/example_8.html.itxt new file mode 100644 index 0000000..505408e --- /dev/null +++ b/tests/data/groundtruth/docling_v2/example_8.html.itxt @@ -0,0 +1,8 @@ +item-0 at level 0: unspecified: group _root_ + item-1 at level 1: section: group header-1 + item-2 at level 2: section_header: Pivot table with with 1 row header + item-3 at level 3: table with [6x4] + item-4 at level 2: section_header: Pivot table with 2 row headers + item-5 at level 3: table with [6x5] + item-6 at level 2: section_header: Equivalent pivot table + item-7 at level 3: table with [6x5] \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/example_8.html.json b/tests/data/groundtruth/docling_v2/example_8.html.json new file mode 100644 index 0000000..e77d5cf --- /dev/null +++ b/tests/data/groundtruth/docling_v2/example_8.html.json @@ -0,0 +1,2008 @@ +{ + "schema_name": "DoclingDocument", + "version": "1.3.0", + "name": "example_8", + "origin": { + "mimetype": "text/html", + "binary_hash": 12799593797322619937, + "filename": "example_8.html" + }, + "furniture": { + "self_ref": "#/furniture", + "children": [], + "content_layer": "furniture", + "name": "_root_", + "label": "unspecified" + }, + "body": { + "self_ref": "#/body", + "children": [ + { + "$ref": "#/groups/0" + } + ], + "content_layer": "body", + "name": "_root_", + "label": "unspecified" + }, + "groups": [ + { + "self_ref": "#/groups/0", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/0" + }, + { + "$ref": "#/texts/1" + }, + { + "$ref": "#/texts/2" + } + ], + "content_layer": "body", + "name": "header-1", + "label": "section" + } + ], + "texts": [ + { + "self_ref": "#/texts/0", + "parent": { + "$ref": "#/groups/0" + }, + "children": [ + { + "$ref": "#/tables/0" + } + ], + "content_layer": "body", + "label": "section_header", + "prov": [], + "orig": "Pivot table with with 1 row header", + "text": "Pivot table with with 1 row header", + "level": 1 + }, + { + "self_ref": "#/texts/1", + "parent": { + "$ref": "#/groups/0" + }, + "children": [ + { + "$ref": "#/tables/1" + } + ], + "content_layer": "body", + "label": "section_header", + "prov": [], + "orig": "Pivot table with 2 row headers", + "text": "Pivot table with 2 row headers", + "level": 1 + }, + { + "self_ref": "#/texts/2", + "parent": { + "$ref": "#/groups/0" + }, + "children": [ + { + "$ref": "#/tables/2" + } + ], + "content_layer": "body", + "label": "section_header", + "prov": [], + "orig": "Equivalent pivot table", + "text": "Equivalent pivot table", + "level": 1 + } + ], + "pictures": [], + "tables": [ + { + "self_ref": "#/tables/0", + "parent": { + "$ref": "#/texts/0" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Year", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Month", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Revenue", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Cost", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 5, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "January", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "$134", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$162", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "February", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$155", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "March", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "$160", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$143", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "April", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "$210", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "May", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "$280", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$120", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 6, + "num_cols": 4, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Year", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Month", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Revenue", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Cost", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 5, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "January", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "$134", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$162", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 5, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "February", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$155", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 5, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "March", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "$160", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$143", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 5, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "April", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "$210", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 5, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "May", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "$280", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$120", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/1", + "parent": { + "$ref": "#/texts/1" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Year", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Quarter", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Month", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Revenue", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "Cost", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 6, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "January", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$134", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$162", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "February", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$155", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "March", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$160", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$143", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "April", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$210", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "May", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$280", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$120", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 6, + "num_cols": 5, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Year", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Quarter", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Month", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Revenue", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "Cost", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 6, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "January", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$134", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$162", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 6, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "February", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$155", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 6, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "March", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$160", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$143", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 6, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "April", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$210", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 6, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "May", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$280", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$120", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/2", + "parent": { + "$ref": "#/texts/2" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Year", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Quarter", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Month", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Revenue", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "Cost", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 7, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "January", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$134", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$162", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "February", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$155", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "March", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$160", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$143", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "April", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$210", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "May", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$280", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$120", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 6, + "num_cols": 5, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Year", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Quarter", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Month", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Revenue", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "Cost", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 7, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "January", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$134", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$162", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 7, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "February", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$155", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 7, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q1", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "March", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$160", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$143", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 7, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "April", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$210", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$150", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 7, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "2025", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Q2", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "May", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "$280", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "$120", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + } + ], + "key_value_items": [], + "form_items": [], + "pages": {} +} \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/example_8.html.md b/tests/data/groundtruth/docling_v2/example_8.html.md new file mode 100644 index 0000000..462a810 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/example_8.html.md @@ -0,0 +1,29 @@ +## Pivot table with with 1 row header + +| Year | Month | Revenue | Cost | +|--------|----------|-----------|--------| +| 2025 | January | $134 | $162 | +| 2025 | February | $150 | $155 | +| 2025 | March | $160 | $143 | +| 2025 | April | $210 | $150 | +| 2025 | May | $280 | $120 | + +## Pivot table with 2 row headers + +| Year | Quarter | Month | Revenue | Cost | +|--------|-----------|----------|-----------|--------| +| 2025 | Q1 | January | $134 | $162 | +| 2025 | Q1 | February | $150 | $155 | +| 2025 | Q1 | March | $160 | $143 | +| 2025 | Q2 | April | $210 | $150 | +| 2025 | Q2 | May | $280 | $120 | + +## Equivalent pivot table + +| Year | Quarter | Month | Revenue | Cost | +|--------|-----------|----------|-----------|--------| +| 2025 | Q1 | January | $134 | $162 | +| 2025 | Q1 | February | $150 | $155 | +| 2025 | Q1 | March | $160 | $143 | +| 2025 | Q2 | April | $210 | $150 | +| 2025 | Q2 | May | $280 | $120 | \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.itxt b/tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.itxt new file mode 100644 index 0000000..f7965d2 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.itxt @@ -0,0 +1,3 @@ +item-0 at level 0: unspecified: group _root_ + item-1 at level 1: section: group sheet: SalesData + item-2 at level 2: table with [21x4] \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.json b/tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.json new file mode 100644 index 0000000..04f8198 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.json @@ -0,0 +1,2153 @@ +{ + "schema_name": "DoclingDocument", + "version": "1.3.0", + "name": "sample_sales_data", + "origin": { + "mimetype": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "binary_hash": 14806485565397602516, + "filename": "sample_sales_data.xlsm" + }, + "furniture": { + "self_ref": "#/furniture", + "children": [], + "content_layer": "furniture", + "name": "_root_", + "label": "unspecified" + }, + "body": { + "self_ref": "#/body", + "children": [ + { + "$ref": "#/groups/0" + } + ], + "content_layer": "body", + "name": "_root_", + "label": "unspecified" + }, + "groups": [ + { + "self_ref": "#/groups/0", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/tables/0" + } + ], + "content_layer": "body", + "name": "sheet: SalesData", + "label": "section" + } + ], + "texts": [], + "pictures": [], + "tables": [ + { + "self_ref": "#/tables/0", + "parent": { + "$ref": "#/groups/0" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 0.0, + "t": 0.0, + "r": 4.0, + "b": 21.0, + "coord_origin": "TOPLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Product", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Date", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Quantity", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Revenue", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-01 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "5000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-02 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-03 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "3000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-04 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-05 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "7000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-06 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-07 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "15000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-08 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "9000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-09 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "4000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-10 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "11", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "11000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-11 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "5000", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-12 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8500", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-13 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6200", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-14 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "7100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-15 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "10500", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-16 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "3200", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-17 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "9400", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-18 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12500", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-19 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-20 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8900", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 21, + "num_cols": 4, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Product", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Date", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Quantity", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Revenue", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-01 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "5000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-02 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-03 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "3000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-04 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-05 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "7000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-06 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-07 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "15000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-08 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "9000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-09 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "4000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-10 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "11", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "11000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-11 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "5000", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-12 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8500", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-13 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6200", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-14 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "7100", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-15 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 15, + "end_row_offset_idx": 16, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "10500", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-16 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 16, + "end_row_offset_idx": 17, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "3200", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget A", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-17 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 17, + "end_row_offset_idx": 18, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "9400", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget B", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-18 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 18, + "end_row_offset_idx": 19, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "12500", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget C", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-19 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 19, + "end_row_offset_idx": 20, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6100", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Widget D", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "2024-01-20 00:00:00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 20, + "end_row_offset_idx": 21, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "8900", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + } + ], + "key_value_items": [], + "form_items": [], + "pages": { + "1": { + "size": { + "width": 4.0, + "height": 21.0 + }, + "page_no": 1 + } + } +} \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.md b/tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.md new file mode 100644 index 0000000..55e52de --- /dev/null +++ b/tests/data/groundtruth/docling_v2/sample_sales_data.xlsm.md @@ -0,0 +1,22 @@ +| Product | Date | Quantity | Revenue | +|-----------|---------------------|------------|-----------| +| Widget A | 2024-01-01 00:00:00 | 5 | 5000 | +| Widget B | 2024-01-02 00:00:00 | 10 | 12000 | +| Widget C | 2024-01-03 00:00:00 | 3 | 3000 | +| Widget D | 2024-01-04 00:00:00 | 8 | 8000 | +| Widget A | 2024-01-05 00:00:00 | 7 | 7000 | +| Widget B | 2024-01-06 00:00:00 | 6 | 6000 | +| Widget C | 2024-01-07 00:00:00 | 12 | 15000 | +| Widget D | 2024-01-08 00:00:00 | 9 | 9000 | +| Widget A | 2024-01-09 00:00:00 | 4 | 4000 | +| Widget B | 2024-01-10 00:00:00 | 11 | 11000 | +| Widget C | 2024-01-11 00:00:00 | 5 | 5000 | +| Widget D | 2024-01-12 00:00:00 | 8 | 8500 | +| Widget A | 2024-01-13 00:00:00 | 6 | 6200 | +| Widget B | 2024-01-14 00:00:00 | 7 | 7100 | +| Widget C | 2024-01-15 00:00:00 | 10 | 10500 | +| Widget D | 2024-01-16 00:00:00 | 3 | 3200 | +| Widget A | 2024-01-17 00:00:00 | 9 | 9400 | +| Widget B | 2024-01-18 00:00:00 | 12 | 12500 | +| Widget C | 2024-01-19 00:00:00 | 6 | 6100 | +| Widget D | 2024-01-20 00:00:00 | 8 | 8900 | \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/textbox.docx.itxt b/tests/data/groundtruth/docling_v2/textbox.docx.itxt index e17e2be..406de95 100644 --- a/tests/data/groundtruth/docling_v2/textbox.docx.itxt +++ b/tests/data/groundtruth/docling_v2/textbox.docx.itxt @@ -5,92 +5,89 @@ item-0 at level 0: unspecified: group _root_ item-4 at level 1: section: group textbox item-5 at level 2: paragraph: Student falls ill item-6 at level 2: paragraph: - item-7 at level 2: paragraph: - item-8 at level 2: list: group list - item-9 at level 3: list_item: Suggested Reportable Symptoms: + item-7 at level 2: list: group list + item-8 at level 3: list_item: Suggested Reportable Symptoms: * ... sh * Blisters * Headache * Sore throat - item-10 at level 1: list_item: + item-9 at level 1: list_item: + item-10 at level 1: paragraph: item-11 at level 1: paragraph: - item-12 at level 1: paragraph: - item-13 at level 1: section: group textbox - item-14 at level 2: paragraph: If a caregiver suspects that wit ... the same suggested reportable symptoms + item-12 at level 1: section: group textbox + item-13 at level 2: paragraph: If a caregiver suspects that wit ... the same suggested reportable symptoms + item-14 at level 1: paragraph: item-15 at level 1: paragraph: item-16 at level 1: paragraph: item-17 at level 1: paragraph: - item-18 at level 1: paragraph: - item-19 at level 1: section: group textbox - item-20 at level 2: paragraph: Yes + item-18 at level 1: section: group textbox + item-19 at level 2: paragraph: Yes + item-20 at level 1: paragraph: item-21 at level 1: paragraph: - item-22 at level 1: paragraph: - item-23 at level 1: section: group textbox - item-24 at level 2: list: group list - item-25 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network. - item-26 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System. - item-27 at level 2: paragraph: - item-28 at level 2: paragraph: - item-29 at level 1: list: group list - item-30 at level 2: list_item: + item-22 at level 1: section: group textbox + item-23 at level 2: list: group list + item-24 at level 3: list_item: A report must be submitted withi ... saster Prevention Information Network. + item-25 at level 3: list_item: A report must also be submitted ... d Infectious Disease Reporting System. + item-26 at level 2: paragraph: + item-27 at level 1: list: group list + item-28 at level 2: list_item: + item-29 at level 1: paragraph: + item-30 at level 1: paragraph: item-31 at level 1: paragraph: item-32 at level 1: paragraph: item-33 at level 1: paragraph: - item-34 at level 1: paragraph: - item-35 at level 1: paragraph: - item-36 at level 1: section: group textbox - item-37 at level 2: paragraph: Health Bureau: - item-38 at level 2: paragraph: Upon receiving a report from the ... rt to the Centers for Disease Control. - item-39 at level 2: list: group list - item-40 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection. - item-41 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act. - item-42 at level 2: paragraph: - item-43 at level 2: paragraph: - item-44 at level 1: list: group list - item-45 at level 2: list_item: - item-46 at level 1: paragraph: - item-47 at level 1: section: group textbox - item-48 at level 2: paragraph: Department of Education: + item-34 at level 1: section: group textbox + item-35 at level 2: paragraph: Health Bureau: + item-36 at level 2: paragraph: Upon receiving a report from the ... rt to the Centers for Disease Control. + item-37 at level 2: list: group list + item-38 at level 3: list_item: If necessary, provide health edu ... vidual to undergo specimen collection. + item-39 at level 3: list_item: Implement appropriate epidemic p ... the Communicable Disease Control Act. + item-40 at level 2: paragraph: + item-41 at level 1: list: group list + item-42 at level 2: list_item: + item-43 at level 1: paragraph: + item-44 at level 1: section: group textbox + item-45 at level 2: paragraph: Department of Education: Collabo ... vention measures at all school levels. + item-46 at level 1: paragraph: + item-47 at level 1: paragraph: + item-48 at level 1: paragraph: item-49 at level 1: paragraph: item-50 at level 1: paragraph: item-51 at level 1: paragraph: item-52 at level 1: paragraph: - item-53 at level 1: paragraph: - item-54 at level 1: paragraph: - item-55 at level 1: paragraph: - item-56 at level 1: section: group textbox - item-57 at level 2: inline: group group - item-58 at level 3: paragraph: The Health Bureau will handle - item-59 at level 3: paragraph: reporting and specimen collection - item-60 at level 3: paragraph: . - item-61 at level 2: paragraph: - item-62 at level 2: paragraph: - item-63 at level 1: paragraph: - item-64 at level 1: paragraph: + item-53 at level 1: section: group textbox + item-54 at level 2: inline: group group + item-55 at level 3: paragraph: The Health Bureau will handle + item-56 at level 3: paragraph: reporting and specimen collection + item-57 at level 3: paragraph: . + item-58 at level 2: paragraph: + item-59 at level 1: paragraph: + item-60 at level 1: paragraph: + item-61 at level 1: paragraph: + item-62 at level 1: section: group textbox + item-63 at level 2: paragraph: Whether the epidemic has eased. + item-64 at level 2: paragraph: item-65 at level 1: paragraph: item-66 at level 1: section: group textbox - item-67 at level 2: paragraph: Whether the epidemic has eased. - item-68 at level 2: paragraph: - item-69 at level 2: paragraph: + item-67 at level 2: paragraph: Whether the test results are pos ... legally designated infectious disease. + item-68 at level 2: paragraph: No + item-69 at level 1: paragraph: item-70 at level 1: paragraph: item-71 at level 1: section: group textbox - item-72 at level 2: paragraph: Whether the test results are pos ... legally designated infectious disease. - item-73 at level 2: paragraph: No - item-74 at level 1: paragraph: - item-75 at level 1: paragraph: - item-76 at level 1: section: group textbox + item-72 at level 2: paragraph: Yes + item-73 at level 1: paragraph: + item-74 at level 1: section: group textbox + item-75 at level 2: paragraph: Yes + item-76 at level 1: paragraph: item-77 at level 1: paragraph: item-78 at level 1: section: group textbox - item-79 at level 1: paragraph: - item-80 at level 1: paragraph: - item-81 at level 1: section: group textbox - item-82 at level 2: paragraph: Case closed. - item-83 at level 2: paragraph: - item-84 at level 2: paragraph: - item-85 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary. + item-79 at level 2: paragraph: Case closed. + item-80 at level 2: paragraph: + item-81 at level 2: paragraph: The Health Bureau will carry out ... ters for Disease Control if necessary. + item-82 at level 1: paragraph: + item-83 at level 1: section: group textbox + item-84 at level 2: paragraph: No + item-85 at level 1: paragraph: item-86 at level 1: paragraph: - item-87 at level 1: section: group textbox - item-88 at level 1: paragraph: - item-89 at level 1: paragraph: - item-90 at level 1: paragraph: \ No newline at end of file + item-87 at level 1: paragraph: \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/textbox.docx.json b/tests/data/groundtruth/docling_v2/textbox.docx.json index 743fb57..840e937 100644 --- a/tests/data/groundtruth/docling_v2/textbox.docx.json +++ b/tests/data/groundtruth/docling_v2/textbox.docx.json @@ -29,6 +29,9 @@ { "$ref": "#/groups/0" }, + { + "$ref": "#/texts/6" + }, { "$ref": "#/texts/7" }, @@ -36,10 +39,10 @@ "$ref": "#/texts/8" }, { - "$ref": "#/texts/9" + "$ref": "#/groups/2" }, { - "$ref": "#/groups/2" + "$ref": "#/texts/10" }, { "$ref": "#/texts/11" @@ -50,17 +53,14 @@ { "$ref": "#/texts/13" }, - { - "$ref": "#/texts/14" - }, { "$ref": "#/groups/3" }, { - "$ref": "#/texts/16" + "$ref": "#/texts/15" }, { - "$ref": "#/texts/17" + "$ref": "#/texts/16" }, { "$ref": "#/groups/4" @@ -68,6 +68,12 @@ { "$ref": "#/groups/6" }, + { + "$ref": "#/texts/21" + }, + { + "$ref": "#/texts/22" + }, { "$ref": "#/texts/23" }, @@ -77,12 +83,6 @@ { "$ref": "#/texts/25" }, - { - "$ref": "#/texts/26" - }, - { - "$ref": "#/texts/27" - }, { "$ref": "#/groups/7" }, @@ -90,11 +90,20 @@ "$ref": "#/groups/9" }, { - "$ref": "#/texts/35" + "$ref": "#/texts/32" }, { "$ref": "#/groups/10" }, + { + "$ref": "#/texts/34" + }, + { + "$ref": "#/texts/35" + }, + { + "$ref": "#/texts/36" + }, { "$ref": "#/texts/37" }, @@ -107,74 +116,65 @@ { "$ref": "#/texts/40" }, - { - "$ref": "#/texts/41" - }, - { - "$ref": "#/texts/42" - }, - { - "$ref": "#/texts/43" - }, { "$ref": "#/groups/11" }, { - "$ref": "#/texts/49" + "$ref": "#/texts/45" }, { - "$ref": "#/texts/50" + "$ref": "#/texts/46" }, { - "$ref": "#/texts/51" + "$ref": "#/texts/47" }, { "$ref": "#/groups/13" }, { - "$ref": "#/texts/55" + "$ref": "#/texts/50" }, { "$ref": "#/groups/14" }, + { + "$ref": "#/texts/53" + }, + { + "$ref": "#/texts/54" + }, + { + "$ref": "#/groups/15" + }, + { + "$ref": "#/texts/56" + }, + { + "$ref": "#/groups/16" + }, { "$ref": "#/texts/58" }, { "$ref": "#/texts/59" }, - { - "$ref": "#/groups/15" - }, - { - "$ref": "#/texts/60" - }, - { - "$ref": "#/groups/16" - }, - { - "$ref": "#/texts/61" - }, - { - "$ref": "#/texts/62" - }, { "$ref": "#/groups/17" }, { - "$ref": "#/texts/67" + "$ref": "#/texts/63" }, { "$ref": "#/groups/18" }, { - "$ref": "#/texts/68" + "$ref": "#/texts/65" }, { - "$ref": "#/texts/69" + "$ref": "#/texts/66" }, { - "$ref": "#/texts/70" + "$ref": "#/texts/67" } ], "content_layer": "body", @@ -194,9 +194,6 @@ { "$ref": "#/texts/4" }, - { - "$ref": "#/texts/5" - }, { "$ref": "#/groups/1" } @@ -212,7 +209,7 @@ }, "children": [ { - "$ref": "#/texts/6" + "$ref": "#/texts/5" } ], "content_layer": "body", @@ -226,7 +223,7 @@ }, "children": [ { - "$ref": "#/texts/10" + "$ref": "#/texts/9" } ], "content_layer": "body", @@ -240,7 +237,7 @@ }, "children": [ { - "$ref": "#/texts/15" + "$ref": "#/texts/14" } ], "content_layer": "body", @@ -257,10 +254,7 @@ "$ref": "#/groups/5" }, { - "$ref": "#/texts/20" - }, - { - "$ref": "#/texts/21" + "$ref": "#/texts/19" } ], "content_layer": "body", @@ -274,10 +268,10 @@ }, "children": [ { - "$ref": "#/texts/18" + "$ref": "#/texts/17" }, { - "$ref": "#/texts/19" + "$ref": "#/texts/18" } ], "content_layer": "body", @@ -291,7 +285,7 @@ }, "children": [ { - "$ref": "#/texts/22" + "$ref": "#/texts/20" } ], "content_layer": "body", @@ -305,19 +299,16 @@ }, "children": [ { - "$ref": "#/texts/28" + "$ref": "#/texts/26" }, { - "$ref": "#/texts/29" + "$ref": "#/texts/27" }, { "$ref": "#/groups/8" }, { - "$ref": "#/texts/32" - }, - { - "$ref": "#/texts/33" + "$ref": "#/texts/30" } ], "content_layer": "body", @@ -331,10 +322,10 @@ }, "children": [ { - "$ref": "#/texts/30" + "$ref": "#/texts/28" }, { - "$ref": "#/texts/31" + "$ref": "#/texts/29" } ], "content_layer": "body", @@ -348,7 +339,7 @@ }, "children": [ { - "$ref": "#/texts/34" + "$ref": "#/texts/31" } ], "content_layer": "body", @@ -362,7 +353,7 @@ }, "children": [ { - "$ref": "#/texts/36" + "$ref": "#/texts/33" } ], "content_layer": "body", @@ -379,10 +370,7 @@ "$ref": "#/groups/12" }, { - "$ref": "#/texts/47" - }, - { - "$ref": "#/texts/48" + "$ref": "#/texts/44" } ], "content_layer": "body", @@ -396,13 +384,13 @@ }, "children": [ { - "$ref": "#/texts/44" + "$ref": "#/texts/41" }, { - "$ref": "#/texts/45" + "$ref": "#/texts/42" }, { - "$ref": "#/texts/46" + "$ref": "#/texts/43" } ], "content_layer": "body", @@ -416,13 +404,10 @@ }, "children": [ { - "$ref": "#/texts/52" + "$ref": "#/texts/48" }, { - "$ref": "#/texts/53" - }, - { - "$ref": "#/texts/54" + "$ref": "#/texts/49" } ], "content_layer": "body", @@ -436,10 +421,10 @@ }, "children": [ { - "$ref": "#/texts/56" + "$ref": "#/texts/51" }, { - "$ref": "#/texts/57" + "$ref": "#/texts/52" } ], "content_layer": "body", @@ -451,7 +436,11 @@ "parent": { "$ref": "#/body" }, - "children": [], + "children": [ + { + "$ref": "#/texts/55" + } + ], "content_layer": "body", "name": "textbox", "label": "section" @@ -461,7 +450,11 @@ "parent": { "$ref": "#/body" }, - "children": [], + "children": [ + { + "$ref": "#/texts/57" + } + ], "content_layer": "body", "name": "textbox", "label": "section" @@ -473,16 +466,13 @@ }, "children": [ { - "$ref": "#/texts/63" + "$ref": "#/texts/60" }, { - "$ref": "#/texts/64" + "$ref": "#/texts/61" }, { - "$ref": "#/texts/65" - }, - { - "$ref": "#/texts/66" + "$ref": "#/texts/62" } ], "content_layer": "body", @@ -494,7 +484,11 @@ "parent": { "$ref": "#/body" }, - "children": [], + "children": [ + { + "$ref": "#/texts/64" + } + ], "content_layer": "body", "name": "textbox", "label": "section" @@ -581,18 +575,6 @@ }, { "self_ref": "#/texts/5", - "parent": { - "$ref": "#/groups/0" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/6", "parent": { "$ref": "#/groups/1" }, @@ -612,7 +594,7 @@ "marker": "-" }, { - "self_ref": "#/texts/7", + "self_ref": "#/texts/6", "parent": { "$ref": "#/body" }, @@ -625,6 +607,18 @@ "enumerated": false, "marker": "-" }, + { + "self_ref": "#/texts/7", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, { "self_ref": "#/texts/8", "parent": { @@ -639,18 +633,6 @@ }, { "self_ref": "#/texts/9", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/10", "parent": { "$ref": "#/groups/2" }, @@ -667,6 +649,18 @@ "strikethrough": false } }, + { + "self_ref": "#/texts/10", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, { "self_ref": "#/texts/11", "parent": { @@ -705,18 +699,6 @@ }, { "self_ref": "#/texts/14", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/15", "parent": { "$ref": "#/groups/3" }, @@ -733,6 +715,18 @@ "strikethrough": false } }, + { + "self_ref": "#/texts/15", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, { "self_ref": "#/texts/16", "parent": { @@ -747,18 +741,6 @@ }, { "self_ref": "#/texts/17", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/18", "parent": { "$ref": "#/groups/5" }, @@ -778,7 +760,7 @@ "marker": "-" }, { - "self_ref": "#/texts/19", + "self_ref": "#/texts/18", "parent": { "$ref": "#/groups/5" }, @@ -797,32 +779,20 @@ "enumerated": false, "marker": "-" }, + { + "self_ref": "#/texts/19", + "parent": { + "$ref": "#/groups/4" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, { "self_ref": "#/texts/20", - "parent": { - "$ref": "#/groups/4" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/21", - "parent": { - "$ref": "#/groups/4" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/22", "parent": { "$ref": "#/groups/6" }, @@ -835,6 +805,30 @@ "enumerated": false, "marker": "-" }, + { + "self_ref": "#/texts/21", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/22", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, { "self_ref": "#/texts/23", "parent": { @@ -873,30 +867,6 @@ }, { "self_ref": "#/texts/26", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/27", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/28", "parent": { "$ref": "#/groups/7" }, @@ -914,7 +884,7 @@ } }, { - "self_ref": "#/texts/29", + "self_ref": "#/texts/27", "parent": { "$ref": "#/groups/7" }, @@ -932,7 +902,7 @@ } }, { - "self_ref": "#/texts/30", + "self_ref": "#/texts/28", "parent": { "$ref": "#/groups/8" }, @@ -952,7 +922,7 @@ "marker": "-" }, { - "self_ref": "#/texts/31", + "self_ref": "#/texts/29", "parent": { "$ref": "#/groups/8" }, @@ -972,7 +942,7 @@ "marker": "-" }, { - "self_ref": "#/texts/32", + "self_ref": "#/texts/30", "parent": { "$ref": "#/groups/7" }, @@ -984,19 +954,7 @@ "text": "" }, { - "self_ref": "#/texts/33", - "parent": { - "$ref": "#/groups/7" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/34", + "self_ref": "#/texts/31", "parent": { "$ref": "#/groups/9" }, @@ -1009,6 +967,48 @@ "enumerated": false, "marker": "-" }, + { + "self_ref": "#/texts/32", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/33", + "parent": { + "$ref": "#/groups/10" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.", + "text": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.", + "formatting": { + "bold": false, + "italic": false, + "underline": false, + "strikethrough": false + } + }, + { + "self_ref": "#/texts/34", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, { "self_ref": "#/texts/35", "parent": { @@ -1024,20 +1024,14 @@ { "self_ref": "#/texts/36", "parent": { - "$ref": "#/groups/10" + "$ref": "#/body" }, "children": [], "content_layer": "body", "label": "paragraph", "prov": [], - "orig": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.", - "text": "Department of Education:\nCollaborate with the Health Bureau in conducting epidemiological investigations and assist Health Bureau personnel in implementing necessary epidemic prevention measures at all school levels.", - "formatting": { - "bold": false, - "italic": false, - "underline": false, - "strikethrough": false - } + "orig": "", + "text": "" }, { "self_ref": "#/texts/37", @@ -1089,42 +1083,6 @@ }, { "self_ref": "#/texts/41", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/42", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/43", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/44", "parent": { "$ref": "#/groups/12" }, @@ -1142,7 +1100,7 @@ } }, { - "self_ref": "#/texts/45", + "self_ref": "#/texts/42", "parent": { "$ref": "#/groups/12" }, @@ -1160,7 +1118,7 @@ } }, { - "self_ref": "#/texts/46", + "self_ref": "#/texts/43", "parent": { "$ref": "#/groups/12" }, @@ -1178,7 +1136,7 @@ } }, { - "self_ref": "#/texts/47", + "self_ref": "#/texts/44", "parent": { "$ref": "#/groups/11" }, @@ -1189,22 +1147,64 @@ "orig": "", "text": "" }, + { + "self_ref": "#/texts/45", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/46", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/47", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, { "self_ref": "#/texts/48", "parent": { - "$ref": "#/groups/11" + "$ref": "#/groups/13" }, "children": [], "content_layer": "body", "label": "paragraph", "prov": [], - "orig": "", - "text": "" + "orig": "Whether the epidemic has eased.", + "text": "Whether the epidemic has eased.", + "formatting": { + "bold": true, + "italic": false, + "underline": false, + "strikethrough": false + } }, { "self_ref": "#/texts/49", "parent": { - "$ref": "#/body" + "$ref": "#/groups/13" }, "children": [], "content_layer": "body", @@ -1227,72 +1227,6 @@ }, { "self_ref": "#/texts/51", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/52", - "parent": { - "$ref": "#/groups/13" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "Whether the epidemic has eased.", - "text": "Whether the epidemic has eased.", - "formatting": { - "bold": true, - "italic": false, - "underline": false, - "strikethrough": false - } - }, - { - "self_ref": "#/texts/53", - "parent": { - "$ref": "#/groups/13" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/54", - "parent": { - "$ref": "#/groups/13" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/55", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/56", "parent": { "$ref": "#/groups/14" }, @@ -1310,7 +1244,7 @@ } }, { - "self_ref": "#/texts/57", + "self_ref": "#/texts/52", "parent": { "$ref": "#/groups/14" }, @@ -1327,6 +1261,78 @@ "strikethrough": false } }, + { + "self_ref": "#/texts/53", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/54", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/55", + "parent": { + "$ref": "#/groups/15" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "Yes", + "text": "Yes", + "formatting": { + "bold": false, + "italic": false, + "underline": false, + "strikethrough": false + } + }, + { + "self_ref": "#/texts/56", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/57", + "parent": { + "$ref": "#/groups/16" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "Yes", + "text": "Yes", + "formatting": { + "bold": false, + "italic": false, + "underline": false, + "strikethrough": false + } + }, { "self_ref": "#/texts/58", "parent": { @@ -1353,42 +1359,6 @@ }, { "self_ref": "#/texts/60", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/61", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/62", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/63", "parent": { "$ref": "#/groups/17" }, @@ -1406,7 +1376,7 @@ } }, { - "self_ref": "#/texts/64", + "self_ref": "#/texts/61", "parent": { "$ref": "#/groups/17" }, @@ -1418,19 +1388,7 @@ "text": "" }, { - "self_ref": "#/texts/65", - "parent": { - "$ref": "#/groups/17" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/66", + "self_ref": "#/texts/62", "parent": { "$ref": "#/groups/17" }, @@ -1447,6 +1405,60 @@ "strikethrough": false } }, + { + "self_ref": "#/texts/63", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/64", + "parent": { + "$ref": "#/groups/18" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "No", + "text": "No", + "formatting": { + "bold": false, + "italic": false, + "underline": false, + "strikethrough": false + } + }, + { + "self_ref": "#/texts/65", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, + { + "self_ref": "#/texts/66", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "paragraph", + "prov": [], + "orig": "", + "text": "" + }, { "self_ref": "#/texts/67", "parent": { @@ -1458,42 +1470,6 @@ "prov": [], "orig": "", "text": "" - }, - { - "self_ref": "#/texts/68", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/69", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" - }, - { - "self_ref": "#/texts/70", - "parent": { - "$ref": "#/body" - }, - "children": [], - "content_layer": "body", - "label": "paragraph", - "prov": [], - "orig": "", - "text": "" } ], "pictures": [], diff --git a/tests/data/groundtruth/docling_v2/textbox.docx.md b/tests/data/groundtruth/docling_v2/textbox.docx.md index 9458bd0..293c4d8 100644 --- a/tests/data/groundtruth/docling_v2/textbox.docx.md +++ b/tests/data/groundtruth/docling_v2/textbox.docx.md @@ -40,6 +40,12 @@ The Health Bureau will handle **reporting and specimen collection** . No +Yes + +Yes + **Case closed.** -The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary. \ No newline at end of file +The Health Bureau will carry out subsequent related epidemic prevention measures and follow-up, and will request assistance from the Centers for Disease Control if necessary. + +No \ No newline at end of file diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt b/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt index 76fe886..5682a13 100644 --- a/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt @@ -1,2 +1,2 @@ -Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package +Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package \ No newline at end of file diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.json b/tests/data/webp/groundtruth/docling_v2/webp-test.json index 94c9bda..bf14a5c 100644 --- a/tests/data/webp/groundtruth/docling_v2/webp-test.json +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.json @@ -42,10 +42,10 @@ { "page_no": 1, "bbox": { - "l": 238.19302423176944, + "l": 234.08627147881114, "t": 2570.0959833241664, - "r": 1696.0985546594009, - "b": 2315.204273887442, + "r": 1696.0985042090742, + "b": 2319.1220927976665, "coord_origin": "BOTTOMLEFT" }, "charspan": [ diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json b/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json index 67ad465..732403c 100644 --- a/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json @@ -40,14 +40,14 @@ "a": 255 }, "rect": { - "r_x0": 238.19302423176944, - "r_y0": 415.36904822716525, - "r_x1": 1696.0985546594009, - "r_y1": 415.36904822716525, - "r_x2": 1696.0985546594009, - "r_y2": 345.20535775097477, - "r_x3": 238.19302423176944, - "r_y3": 345.20535775097477, + "r_x0": 234.08627147881114, + "r_y0": 419.5788697734327, + "r_x1": 1696.0985042090742, + "r_y1": 419.5788697734327, + "r_x2": 1696.0985042090742, + "r_y2": 349.4151792972422, + "r_x3": 234.08627147881114, + "r_y3": 349.4151792972422, "coord_origin": "TOPLEFT" }, "text": "JSON and Markdown in an easy self contained", @@ -65,14 +65,14 @@ "a": 255 }, "rect": { - "r_x0": 245.43122061153045, - "r_y0": 513.795726112558, - "r_x1": 514.3223724413002, - "r_y1": 513.795726112558, - "r_x2": 514.3223724413002, - "r_y2": 436.0574704074058, - "r_x3": 245.43122061153045, - "r_y3": 436.0574704074058, + "r_x0": 242.29979922858777, + "r_y0": 509.8779072023336, + "r_x1": 513.3470125989277, + "r_y1": 509.8779072023336, + "r_x2": 513.3470125989277, + "r_y2": 439.9752910477536, + "r_x3": 242.29979922858777, + "r_y3": 439.9752910477536, "coord_origin": "TOPLEFT" }, "text": "package", @@ -90,13 +90,13 @@ "id": 0, "label": "text", "bbox": { - "l": 238.19302423176944, + "l": 234.08627147881114, "t": 258.9040166758338, - "r": 1696.0985546594009, - "b": 513.795726112558, + "r": 1696.0985042090742, + "b": 509.8779072023336, "coord_origin": "TOPLEFT" }, - "confidence": 0.9721010327339172, + "confidence": 0.9721011519432068, "cells": [ { "index": 0, @@ -132,14 +132,14 @@ "a": 255 }, "rect": { - "r_x0": 238.19302423176944, - "r_y0": 415.36904822716525, - "r_x1": 1696.0985546594009, - "r_y1": 415.36904822716525, - "r_x2": 1696.0985546594009, - "r_y2": 345.20535775097477, - "r_x3": 238.19302423176944, - "r_y3": 345.20535775097477, + "r_x0": 234.08627147881114, + "r_y0": 419.5788697734327, + "r_x1": 1696.0985042090742, + "r_y1": 419.5788697734327, + "r_x2": 1696.0985042090742, + "r_y2": 349.4151792972422, + "r_x3": 234.08627147881114, + "r_y3": 349.4151792972422, "coord_origin": "TOPLEFT" }, "text": "JSON and Markdown in an easy self contained", @@ -157,14 +157,14 @@ "a": 255 }, "rect": { - "r_x0": 245.43122061153045, - "r_y0": 513.795726112558, - "r_x1": 514.3223724413002, - "r_y1": 513.795726112558, - "r_x2": 514.3223724413002, - "r_y2": 436.0574704074058, - "r_x3": 245.43122061153045, - "r_y3": 436.0574704074058, + "r_x0": 242.29979922858777, + "r_y0": 509.8779072023336, + "r_x1": 513.3470125989277, + "r_y1": 509.8779072023336, + "r_x2": 513.3470125989277, + "r_y2": 439.9752910477536, + "r_x3": 242.29979922858777, + "r_y3": 439.9752910477536, "coord_origin": "TOPLEFT" }, "text": "package", @@ -195,13 +195,13 @@ "id": 0, "label": "text", "bbox": { - "l": 238.19302423176944, + "l": 234.08627147881114, "t": 258.9040166758338, - "r": 1696.0985546594009, - "b": 513.795726112558, + "r": 1696.0985042090742, + "b": 509.8779072023336, "coord_origin": "TOPLEFT" }, - "confidence": 0.9721010327339172, + "confidence": 0.9721011519432068, "cells": [ { "index": 0, @@ -237,14 +237,14 @@ "a": 255 }, "rect": { - "r_x0": 238.19302423176944, - "r_y0": 415.36904822716525, - "r_x1": 1696.0985546594009, - "r_y1": 415.36904822716525, - "r_x2": 1696.0985546594009, - "r_y2": 345.20535775097477, - "r_x3": 238.19302423176944, - "r_y3": 345.20535775097477, + "r_x0": 234.08627147881114, + "r_y0": 419.5788697734327, + "r_x1": 1696.0985042090742, + "r_y1": 419.5788697734327, + "r_x2": 1696.0985042090742, + "r_y2": 349.4151792972422, + "r_x3": 234.08627147881114, + "r_y3": 349.4151792972422, "coord_origin": "TOPLEFT" }, "text": "JSON and Markdown in an easy self contained", @@ -262,14 +262,14 @@ "a": 255 }, "rect": { - "r_x0": 245.43122061153045, - "r_y0": 513.795726112558, - "r_x1": 514.3223724413002, - "r_y1": 513.795726112558, - "r_x2": 514.3223724413002, - "r_y2": 436.0574704074058, - "r_x3": 245.43122061153045, - "r_y3": 436.0574704074058, + "r_x0": 242.29979922858777, + "r_y0": 509.8779072023336, + "r_x1": 513.3470125989277, + "r_y1": 509.8779072023336, + "r_x2": 513.3470125989277, + "r_y2": 439.9752910477536, + "r_x3": 242.29979922858777, + "r_y3": 439.9752910477536, "coord_origin": "TOPLEFT" }, "text": "package", @@ -293,13 +293,13 @@ "id": 0, "label": "text", "bbox": { - "l": 238.19302423176944, + "l": 234.08627147881114, "t": 258.9040166758338, - "r": 1696.0985546594009, - "b": 513.795726112558, + "r": 1696.0985042090742, + "b": 509.8779072023336, "coord_origin": "TOPLEFT" }, - "confidence": 0.9721010327339172, + "confidence": 0.9721011519432068, "cells": [ { "index": 0, @@ -335,14 +335,14 @@ "a": 255 }, "rect": { - "r_x0": 238.19302423176944, - "r_y0": 415.36904822716525, - "r_x1": 1696.0985546594009, - "r_y1": 415.36904822716525, - "r_x2": 1696.0985546594009, - "r_y2": 345.20535775097477, - "r_x3": 238.19302423176944, - "r_y3": 345.20535775097477, + "r_x0": 234.08627147881114, + "r_y0": 419.5788697734327, + "r_x1": 1696.0985042090742, + "r_y1": 419.5788697734327, + "r_x2": 1696.0985042090742, + "r_y2": 349.4151792972422, + "r_x3": 234.08627147881114, + "r_y3": 349.4151792972422, "coord_origin": "TOPLEFT" }, "text": "JSON and Markdown in an easy self contained", @@ -360,14 +360,14 @@ "a": 255 }, "rect": { - "r_x0": 245.43122061153045, - "r_y0": 513.795726112558, - "r_x1": 514.3223724413002, - "r_y1": 513.795726112558, - "r_x2": 514.3223724413002, - "r_y2": 436.0574704074058, - "r_x3": 245.43122061153045, - "r_y3": 436.0574704074058, + "r_x0": 242.29979922858777, + "r_y0": 509.8779072023336, + "r_x1": 513.3470125989277, + "r_y1": 509.8779072023336, + "r_x2": 513.3470125989277, + "r_y2": 439.9752910477536, + "r_x3": 242.29979922858777, + "r_y3": 439.9752910477536, "coord_origin": "TOPLEFT" }, "text": "package", diff --git a/tests/data/xlsx/sample_sales_data.xlsm b/tests/data/xlsx/sample_sales_data.xlsm new file mode 100644 index 0000000..0f3832a Binary files /dev/null and b/tests/data/xlsx/sample_sales_data.xlsm differ diff --git a/tests/test_backend_msexcel.py b/tests/test_backend_msexcel.py index 65f636e..6dd7342 100644 --- a/tests/test_backend_msexcel.py +++ b/tests/test_backend_msexcel.py @@ -16,13 +16,13 @@ _log = logging.getLogger(__name__) GENERATE = GEN_TEST_DATA -def get_xlsx_paths(): +def get_excel_paths(): # Define the directory you want to search directory = Path("./tests/data/xlsx/") - # List all PDF files in the directory and its subdirectories - pdf_files = sorted(directory.rglob("*.xlsx")) - return pdf_files + # List all Excel files in the directory and its subdirectories + excel_files = sorted(directory.rglob("*.xlsx")) + sorted(directory.rglob("*.xlsm")) + return excel_files def get_converter(): @@ -35,17 +35,17 @@ def get_converter(): def documents() -> list[tuple[Path, DoclingDocument]]: documents: list[dict[Path, DoclingDocument]] = [] - xlsx_paths = get_xlsx_paths() + excel_paths = get_excel_paths() converter = get_converter() - for xlsx_path in xlsx_paths: - _log.debug(f"converting {xlsx_path}") + for excel_path in excel_paths: + _log.debug(f"converting {excel_path}") gt_path = ( - xlsx_path.parent.parent / "groundtruth" / "docling_v2" / xlsx_path.name + excel_path.parent.parent / "groundtruth" / "docling_v2" / excel_path.name ) - conv_result: ConversionResult = converter.convert(xlsx_path) + conv_result: ConversionResult = converter.convert(excel_path) doc: DoclingDocument = conv_result.document @@ -55,7 +55,7 @@ def documents() -> list[tuple[Path, DoclingDocument]]: return documents -def test_e2e_xlsx_conversions(documents) -> None: +def test_e2e_excel_conversions(documents) -> None: for gt_path, doc in documents: pred_md: str = doc.export_to_markdown() assert verify_export(pred_md, str(gt_path) + ".md"), "export to md" @@ -79,7 +79,7 @@ def test_pages(documents) -> None: documents: The paths and converted documents. """ # number of pages from the backend method - path = next(item for item in get_xlsx_paths() if item.stem == "test-01") + path = next(item for item in get_excel_paths() if item.stem == "test-01") in_doc = InputDocument( path_or_stream=path, format=InputFormat.XLSX,