feat: Add option to define page range (#852)

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-01-31 15:23:00 +01:00
committed by GitHub
parent d727b04ad0
commit 70d68b6164
6 changed files with 82 additions and 4 deletions

View File

@@ -141,7 +141,9 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
for i in range(0, conv_res.input.page_count):
conv_res.pages.append(Page(page_no=i))
start_page, end_page = conv_res.input.limits.page_range
if (start_page - 1) <= i <= (end_page - 1):
conv_res.pages.append(Page(page_no=i))
try:
# Iterate batches of pages (page_batch_size) in the doc