fix: use first table row as col headers (#1156)

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
This commit is contained in:
Panos Vagenas 2025-03-13 15:34:18 +01:00 committed by GitHub
parent 6eb718f849
commit 0945973b79
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
33 changed files with 286 additions and 310 deletions

View File

@ -380,7 +380,7 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
end_row_offset_idx=row_idx + row_span,
start_col_offset_idx=col_idx,
end_col_offset_idx=col_idx + col_span,
col_header=False,
column_header=row_idx == 0,
row_header=False,
)
data.table_cells.append(cell)

View File

@ -111,7 +111,7 @@ class CsvDocumentBackend(DeclarativeDocumentBackend):
end_row_offset_idx=row_idx + 1,
start_col_offset_idx=col_idx,
end_col_offset_idx=col_idx + 1,
col_header=row_idx == 0, # First row as header
column_header=row_idx == 0, # First row as header
row_header=False,
)
table_data.table_cells.append(cell)

View File

@ -457,7 +457,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
end_row_offset_idx=row_idx + row_span,
start_col_offset_idx=col_idx,
end_col_offset_idx=col_idx + col_span,
col_header=col_header,
column_header=col_header,
row_header=((not col_header) and html_cell.name == "th"),
)
data.table_cells.append(table_cell)

View File

@ -136,7 +136,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
end_row_offset_idx=trow_ind + row_span,
start_col_offset_idx=tcol_ind,
end_col_offset_idx=tcol_ind + col_span,
col_header=False,
column_header=trow_ind == 0,
row_header=False,
)
tcells.append(icell)

View File

@ -164,7 +164,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
end_row_offset_idx=excel_cell.row + excel_cell.row_span,
start_col_offset_idx=excel_cell.col,
end_col_offset_idx=excel_cell.col + excel_cell.col_span,
col_header=False,
column_header=excel_cell.row == 0,
row_header=False,
)
table_data.table_cells.append(cell)
@ -173,7 +173,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
return doc
def _find_data_tables(self, sheet: Worksheet):
def _find_data_tables(self, sheet: Worksheet) -> List[ExcelTable]:
"""
Find all compact rectangular data tables in a sheet.
"""
@ -340,47 +340,4 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend):
except:
_log.error("could not extract the image from excel sheets")
"""
for idx, chart in enumerate(sheet._charts): # type: ignore
try:
chart_path = f"chart_{idx + 1}.png"
_log.info(
f"Chart found, but dynamic rendering is required for: {chart_path}"
)
_log.info(f"Chart {idx + 1}:")
# Chart type
# _log.info(f"Type: {type(chart).__name__}")
print(f"Type: {type(chart).__name__}")
# Extract series data
for series_idx, series in enumerate(chart.series):
#_log.info(f"Series {series_idx + 1}:")
print(f"Series {series_idx + 1} type: {type(series).__name__}")
#print(f"x-values: {series.xVal}")
#print(f"y-values: {series.yVal}")
print(f"xval type: {type(series.xVal).__name__}")
xvals = []
for _ in series.xVal.numLit.pt:
print(f"xval type: {type(_).__name__}")
if hasattr(_, 'v'):
xvals.append(_.v)
print(f"x-values: {xvals}")
yvals = []
for _ in series.yVal:
if hasattr(_, 'v'):
yvals.append(_.v)
print(f"y-values: {yvals}")
except Exception as exc:
print(exc)
continue
"""
return doc

View File

@ -346,7 +346,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
end_row_offset_idx=row_idx + row_span,
start_col_offset_idx=col_idx,
end_col_offset_idx=col_idx + col_span,
col_header=False,
column_header=row_idx == 0,
row_header=False,
)
if len(cell.text.strip()) > 0:

View File

@ -601,7 +601,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
end_row_offset_idx=row.grid_cols_before + spanned_idx,
start_col_offset_idx=col_idx,
end_col_offset_idx=col_idx + cell.grid_span,
col_header=False,
column_header=row.grid_cols_before + row_idx == 0,
row_header=False,
)
data.table_cells.append(table_cell)

View File

@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -296,7 +296,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -308,7 +308,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -320,7 +320,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -332,7 +332,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -99,7 +99,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -111,7 +111,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -123,7 +123,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -135,7 +135,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -147,7 +147,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -159,7 +159,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -171,7 +171,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -183,7 +183,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -920,7 +920,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -932,7 +932,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -944,7 +944,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -956,7 +956,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -968,7 +968,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -980,7 +980,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -992,7 +992,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1004,7 +1004,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1016,7 +1016,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1028,7 +1028,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1040,7 +1040,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1052,7 +1052,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -284,7 +284,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -296,7 +296,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -308,7 +308,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},

View File

@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -99,7 +99,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -111,7 +111,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -123,7 +123,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -135,7 +135,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -147,7 +147,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -159,7 +159,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -171,7 +171,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -183,7 +183,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -920,7 +920,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -932,7 +932,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -944,7 +944,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -956,7 +956,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -968,7 +968,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -980,7 +980,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -992,7 +992,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1004,7 +1004,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1016,7 +1016,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1028,7 +1028,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1040,7 +1040,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1052,7 +1052,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -99,7 +99,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -111,7 +111,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -123,7 +123,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -135,7 +135,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -147,7 +147,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -159,7 +159,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -171,7 +171,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -183,7 +183,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -920,7 +920,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -932,7 +932,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -944,7 +944,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -956,7 +956,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -968,7 +968,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -980,7 +980,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -992,7 +992,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1004,7 +1004,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1016,7 +1016,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1028,7 +1028,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1040,7 +1040,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1052,7 +1052,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -99,7 +99,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -111,7 +111,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -123,7 +123,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -135,7 +135,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -147,7 +147,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -159,7 +159,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -171,7 +171,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -183,7 +183,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -920,7 +920,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Index",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -932,7 +932,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Customer Id",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -944,7 +944,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "First Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -956,7 +956,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Last Name",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -968,7 +968,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "Company",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -980,7 +980,7 @@
"start_col_offset_idx": 5,
"end_col_offset_idx": 6,
"text": "City",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -992,7 +992,7 @@
"start_col_offset_idx": 6,
"end_col_offset_idx": 7,
"text": "Country",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1004,7 +1004,7 @@
"start_col_offset_idx": 7,
"end_col_offset_idx": 8,
"text": "Phone 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1016,7 +1016,7 @@
"start_col_offset_idx": 8,
"end_col_offset_idx": 9,
"text": "Phone 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1028,7 +1028,7 @@
"start_col_offset_idx": 9,
"end_col_offset_idx": 10,
"text": "Email",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1040,7 +1040,7 @@
"start_col_offset_idx": 10,
"end_col_offset_idx": 11,
"text": "Subscription Date",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1052,7 +1052,7 @@
"start_col_offset_idx": 11,
"end_col_offset_idx": 12,
"text": "Website",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -284,7 +284,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -296,7 +296,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -308,7 +308,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -320,7 +320,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@ -51,7 +51,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -63,7 +63,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -75,7 +75,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -87,7 +87,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -308,7 +308,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -320,7 +320,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -332,7 +332,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -344,7 +344,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "4",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},

View File

@ -344,7 +344,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -356,7 +356,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -368,7 +368,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -493,7 +493,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -505,7 +505,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -517,7 +517,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@ -68,7 +68,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -80,7 +80,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -181,7 +181,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -193,7 +193,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -205,7 +205,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@ -68,7 +68,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -80,7 +80,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -181,7 +181,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -193,7 +193,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -205,7 +205,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 3,
"text": "Header 2 & 3 (colspan)",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

View File

@ -163,5 +163,6 @@
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {}
}

View File

@ -960,7 +960,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 4,
"text": "Class1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -972,7 +972,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 7,
"text": "Class2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1385,7 +1385,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 4,
"text": "Class1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1397,7 +1397,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 4,
"text": "Class1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1409,7 +1409,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 4,
"text": "Class1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1421,7 +1421,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 7,
"text": "Class2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1433,7 +1433,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 7,
"text": "Class2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1445,7 +1445,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 7,
"text": "Class2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@ -2143,6 +2143,7 @@
}
],
"key_value_items": [],
"form_items": [],
"pages": {
"1": {
"size": {

File diff suppressed because one or more lines are too long

View File

@ -176,7 +176,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Tab1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -188,7 +188,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Tab2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -200,7 +200,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Tab3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -289,7 +289,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Tab1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -301,7 +301,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Tab2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -313,7 +313,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Tab3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@ -399,5 +399,6 @@
}
],
"key_value_items": [],
"form_items": [],
"pages": {}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -748,5 +748,6 @@
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {}
}

View File

@ -802,5 +802,6 @@
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {}
}

View File

@ -979,5 +979,6 @@
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {}
}

View File

@ -7914,7 +7914,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -7950,7 +7950,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -8130,7 +8130,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -8159,7 +8159,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -8171,7 +8171,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Duck\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@ -8237,7 +8237,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -8249,7 +8249,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Scientific classification \n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@ -8445,7 +8445,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -8457,7 +8457,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Subfamilies\n",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@ -8513,7 +8513,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Authority control databases ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -8578,7 +8578,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Authority control databases ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -8590,7 +8590,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 2,
"text": "Authority control databases ",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}

File diff suppressed because one or more lines are too long

View File

@ -71,19 +71,19 @@
</head>
<h2>Test with tables</h2>
<p>A uniform table</p>
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td></tr><tr><td>Cell 1.0</td><td>Cell 1.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.1</td><td>Cell 2.2</td></tr></tbody></table>
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th></tr><tr><td>Cell 1.0</td><td>Cell 1.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.1</td><td>Cell 2.2</td></tr></tbody></table>
<p></p>
<p>A non-uniform table with horizontal spans</p>
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td></tr></tbody></table>
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td></tr></tbody></table>
<p></p>
<p>A non-uniform table with horizontal spans in inner columns</p>
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td><td>Header 0.3</td></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td><td>Cell 1.3</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td><td>Cell 2.3</td></tr></tbody></table>
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th><th>Header 0.3</th></tr><tr><td>Cell 1.0</td><td colspan="2">Merged Cell 1.1 1.2</td><td>Cell 1.3</td></tr><tr><td>Cell 2.0</td><td colspan="2">Merged Cell 2.1 2.2</td><td>Cell 2.3</td></tr></tbody></table>
<p></p>
<p>A non-uniform table with vertical spans</p>
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td></tr></tbody></table>
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td></tr></tbody></table>
<p></p>
<p>A non-uniform table with all kinds of spans and empty cells</p>
<table><tbody><tr><td>Header 0.0</td><td>Header 0.1</td><td>Header 0.2</td><td></td><td></td></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td><td></td><td></td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td><td></td><td></td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td><td rowspan="3"></td><td></td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td><td rowspan="2">Merged Cell 4.4 5.4</td></tr><tr><td></td><td></td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></tr><tr><td colspan="5"></td></tr><tr><td></td><td></td><td></td><td></td><td>Cell 8.4</td></tr></tbody></table>
<table><tbody><tr><th>Header 0.0</th><th>Header 0.1</th><th>Header 0.2</th><th></th><th></th></tr><tr><td>Cell 1.0</td><td rowspan="2">Merged Cell 1.1 2.1</td><td>Cell 1.2</td><td></td><td></td></tr><tr><td>Cell 2.0</td><td>Cell 2.2</td><td></td><td></td></tr><tr><td>Cell 3.0</td><td rowspan="2">Merged Cell 3.1 4.1</td><td>Cell 3.2</td><td rowspan="3"></td><td></td></tr><tr><td>Cell 4.0</td><td>Cell 4.2</td><td rowspan="2">Merged Cell 4.4 5.4</td></tr><tr><td></td><td></td><td></td></tr><tr><td></td><td></td><td></td><td></td><td></td></tr><tr><td colspan="5"></td></tr><tr><td></td><td></td><td></td><td></td><td>Cell 8.4</td></tr></tbody></table>
<p></p>
<p></p>
</html>

View File

@ -261,7 +261,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -273,7 +273,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -285,7 +285,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -374,7 +374,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -386,7 +386,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -398,7 +398,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@ -504,7 +504,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -516,7 +516,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -528,7 +528,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -593,7 +593,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -605,7 +605,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -617,7 +617,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@ -723,7 +723,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -735,7 +735,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -747,7 +747,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -759,7 +759,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Header 0.3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -848,7 +848,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -860,7 +860,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -872,7 +872,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -884,7 +884,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "Header 0.3",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@ -1014,7 +1014,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1026,7 +1026,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1038,7 +1038,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1175,7 +1175,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1187,7 +1187,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1199,7 +1199,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@ -1381,7 +1381,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1393,7 +1393,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1405,7 +1405,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1417,7 +1417,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1429,7 +1429,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1818,7 +1818,7 @@
"start_col_offset_idx": 0,
"end_col_offset_idx": 1,
"text": "Header 0.0",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1830,7 +1830,7 @@
"start_col_offset_idx": 1,
"end_col_offset_idx": 2,
"text": "Header 0.1",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1842,7 +1842,7 @@
"start_col_offset_idx": 2,
"end_col_offset_idx": 3,
"text": "Header 0.2",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1854,7 +1854,7 @@
"start_col_offset_idx": 3,
"end_col_offset_idx": 4,
"text": "",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
},
@ -1866,7 +1866,7 @@
"start_col_offset_idx": 4,
"end_col_offset_idx": 5,
"text": "",
"column_header": false,
"column_header": true,
"row_header": false,
"row_section": false
}
@ -2372,5 +2372,6 @@
}
],
"key_value_items": [],
"form_items": [],
"pages": {}
}

View File

@ -58,7 +58,11 @@ def test_e2e_valid_csv_conversions():
pred_itxt, str(gt_path) + ".itxt"
), "export to indented-text"
assert verify_document(doc, str(gt_path) + ".json"), "export to json"
assert verify_document(
pred_doc=doc,
gtfile=str(gt_path) + ".json",
generate=GENERATE,
), "export to json"
def test_e2e_invalid_csv_conversions():

View File

@ -90,4 +90,8 @@ def test_e2e_docx_conversions():
if docx_path.name == "word_tables.docx":
pred_html: str = doc.export_to_html()
assert verify_export(pred_html, str(gt_path) + ".html"), "export to html"
assert verify_export(
pred_text=pred_html,
gtfile=str(gt_path) + ".html",
generate=GENERATE,
), "export to html"