perf(msexcel): _find_table_bounds use iter_rows/iter_cols instead of Worksheet.cell (#1875)

* perf(msexcel): _find_table_bounds use iter_rows/iter_cols instead of sheet.cell

* DCO Remediation Commit for Qiefan Jiang <jiangqiefan@bytedance.com>

I, Qiefan Jiang <jiangqiefan@bytedance.com>, hereby add my Signed-off-by to this commit: 274102a8d4db5d2da8c7ca603e1eb039c1e07967

Signed-off-by: Qiefan Jiang <jiangqiefan@bytedance.com>

* fix lint

* DCO Remediation Commit for Qiefan Jiang <jiangqiefan@bytedance.com>

I, Qiefan Jiang <jiangqiefan@bytedance.com>, hereby add my Signed-off-by to this commit: b6b5b090a99ba7ba23c1facf0317f7e9f95039e5

Signed-off-by: Qiefan Jiang <jiangqiefan@bytedance.com>

---------

Signed-off-by: Qiefan Jiang <jiangqiefan@bytedance.com>
This commit is contained in:
Qiefan Jiang 2025-07-03 19:12:06 +08:00 committed by GitHub
parent 3089cf2d26
commit 13865c06f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -337,10 +337,17 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
# Collect the data within the bounds
data = []
visited_cells: set[tuple[int, int]] = set()
for ri in range(start_row, max_row + 1):
for rj in range(start_col, max_col + 1):
cell = sheet.cell(row=ri + 1, column=rj + 1) # 1-based indexing
for ri, row in enumerate(
sheet.iter_rows(
min_row=start_row + 1, # start_row is 0-based but iter_rows is 1-based
max_row=max_row + 1,
min_col=start_col + 1,
max_col=max_col + 1,
values_only=False,
),
start_row,
):
for rj, cell in enumerate(row, start_col):
# Check if the cell belongs to a merged range
row_span = 1
col_span = 1
@ -397,10 +404,16 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
"""
max_row: int = start_row
while max_row < sheet.max_row - 1:
# Get the cell value or check if it is part of a merged cell
cell = sheet.cell(row=max_row + 2, column=start_col + 1)
for ri, (cell,) in enumerate(
sheet.iter_rows(
min_row=start_row + 2,
max_row=sheet.max_row,
min_col=start_col + 1,
max_col=start_col + 1,
values_only=False,
),
start_row + 1,
):
# Check if the cell is part of a merged range
merged_range = next(
(mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr),
@ -414,7 +427,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
if merged_range:
max_row = max(max_row, merged_range.max_row - 1)
else:
max_row += 1
max_row = ri
return max_row
@ -433,10 +446,16 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
"""
max_col: int = start_col
while max_col < sheet.max_column - 1:
# Get the cell value or check if it is part of a merged cell
cell = sheet.cell(row=start_row + 1, column=max_col + 2)
for rj, (cell,) in enumerate(
sheet.iter_cols(
min_row=start_row + 1,
max_row=start_row + 1,
min_col=start_col + 2,
max_col=sheet.max_column,
values_only=False,
),
start_col + 1,
):
# Check if the cell is part of a merged range
merged_range = next(
(mr for mr in sheet.merged_cells.ranges if cell.coordinate in mr),
@ -450,7 +469,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
if merged_range:
max_col = max(max_col, merged_range.max_col - 1)
else:
max_col += 1
max_col = rj
return max_col