fix: allow mps usage for easyocr (#286)

* fix: allow mps usage for easyocr Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add example for cpu-only Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * comment out example Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2024-11-10 14:26:17 +01:00
parent be8aa17291
commit 97f214efdd
2 changed files with 15 additions and 4 deletions
@@ -31,12 +31,9 @@ class EasyOcrModel(BaseOcrModel):
                    "Alternatively, Docling has support for other OCR engines. See the documentation."
                )

-            use_gpu = (
-                False if torch.backends.mps.is_available() else self.options.use_gpu
-            )
            self.reader = easyocr.Reader(
                lang_list=self.options.lang,
-                gpu=use_gpu,
+                gpu=self.options.use_gpu,
                model_storage_directory=self.options.model_storage_directory,
                download_enabled=self.options.download_enabled,
            )
@@ -80,6 +80,20 @@ def main():
        }
    )

+    # Docling Parse with EasyOCR (CPU only)
+    # ----------------------
+    # pipeline_options = PdfPipelineOptions()
+    # pipeline_options.do_ocr = True
+    # pipeline_options.ocr_options.use_gpu = False  # <-- set this.
+    # pipeline_options.do_table_structure = True
+    # pipeline_options.table_structure_options.do_cell_matching = True
+
+    # doc_converter = DocumentConverter(
+    #     format_options={
+    #         InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
+    #     }
+    # )
+
    # Docling Parse with Tesseract
    # ----------------------
    # pipeline_options = PdfPipelineOptions()