fix: Introduce Image format options in CLI. Silence the tqdm downloading messages. (#544)
* fix: main: Introduce format options for Image with the same pdf pipeline_options. Add RapidOcrOptions to the Union of ocr_options for PdfPipelineOptions Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * fix: Silence the tqdm messages during the downloading of model files Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * fix: Code styling Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * fix: Use the HF API to disable the tqdm progress bars Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> --------- Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
This commit is contained in:
@@ -372,11 +372,13 @@ def convert(
|
||||
else:
|
||||
raise RuntimeError(f"Unexpected PDF backend type {pdf_backend}")
|
||||
|
||||
pdf_format_option = PdfFormatOption(
|
||||
pipeline_options=pipeline_options,
|
||||
backend=backend, # pdf_backend
|
||||
)
|
||||
format_options: Dict[InputFormat, FormatOption] = {
|
||||
InputFormat.PDF: PdfFormatOption(
|
||||
pipeline_options=pipeline_options,
|
||||
backend=backend, # pdf_backend
|
||||
)
|
||||
InputFormat.PDF: pdf_format_option,
|
||||
InputFormat.IMAGE: pdf_format_option,
|
||||
}
|
||||
doc_converter = DocumentConverter(
|
||||
allowed_formats=from_formats,
|
||||
|
||||
Reference in New Issue
Block a user