From 97f214efddcf66f0734a95c17c08936f6111d113 Mon Sep 17 00:00:00 2001 From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Date: Sun, 10 Nov 2024 14:26:17 +0100 Subject: [PATCH] fix: allow mps usage for easyocr (#286) * fix: allow mps usage for easyocr Signed-off-by: Michele Dolfi * add example for cpu-only Signed-off-by: Michele Dolfi * comment out example Signed-off-by: Michele Dolfi --------- Signed-off-by: Michele Dolfi --- docling/models/easyocr_model.py | 5 +---- docs/examples/custom_convert.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py index 6c72be5..1b8e914 100644 --- a/docling/models/easyocr_model.py +++ b/docling/models/easyocr_model.py @@ -31,12 +31,9 @@ class EasyOcrModel(BaseOcrModel): "Alternatively, Docling has support for other OCR engines. See the documentation." ) - use_gpu = ( - False if torch.backends.mps.is_available() else self.options.use_gpu - ) self.reader = easyocr.Reader( lang_list=self.options.lang, - gpu=use_gpu, + gpu=self.options.use_gpu, model_storage_directory=self.options.model_storage_directory, download_enabled=self.options.download_enabled, ) diff --git a/docs/examples/custom_convert.py b/docs/examples/custom_convert.py index 204ae59..7631848 100644 --- a/docs/examples/custom_convert.py +++ b/docs/examples/custom_convert.py @@ -80,6 +80,20 @@ def main(): } ) + # Docling Parse with EasyOCR (CPU only) + # ---------------------- + # pipeline_options = PdfPipelineOptions() + # pipeline_options.do_ocr = True + # pipeline_options.ocr_options.use_gpu = False # <-- set this. + # pipeline_options.do_table_structure = True + # pipeline_options.table_structure_options.do_cell_matching = True + + # doc_converter = DocumentConverter( + # format_options={ + # InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) + # } + # ) + # Docling Parse with Tesseract # ---------------------- # pipeline_options = PdfPipelineOptions()