fix: allow mps usage for easyocr (#286)

* fix: allow mps usage for easyocr

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add example for cpu-only

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* comment out example

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-11-10 14:26:17 +01:00 committed by GitHub
parent be8aa17291
commit 97f214efdd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 15 additions and 4 deletions

View File

@ -31,12 +31,9 @@ class EasyOcrModel(BaseOcrModel):
"Alternatively, Docling has support for other OCR engines. See the documentation."
)
use_gpu = (
False if torch.backends.mps.is_available() else self.options.use_gpu
)
self.reader = easyocr.Reader(
lang_list=self.options.lang,
gpu=use_gpu,
gpu=self.options.use_gpu,
model_storage_directory=self.options.model_storage_directory,
download_enabled=self.options.download_enabled,
)

View File

@ -80,6 +80,20 @@ def main():
}
)
# Docling Parse with EasyOCR (CPU only)
# ----------------------
# pipeline_options = PdfPipelineOptions()
# pipeline_options.do_ocr = True
# pipeline_options.ocr_options.use_gpu = False # <-- set this.
# pipeline_options.do_table_structure = True
# pipeline_options.table_structure_options.do_cell_matching = True
# doc_converter = DocumentConverter(
# format_options={
# InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
# }
# )
# Docling Parse with Tesseract
# ----------------------
# pipeline_options = PdfPipelineOptions()