From 53327552e83ced079ae50d8067ba7a8ce80cd9ad Mon Sep 17 00:00:00 2001 From: Yorick Terweijden Date: Mon, 27 Jan 2025 14:38:15 +0200 Subject: [PATCH] feat(ocr): expose `rec_keys_path` in RapidOcrOptions to support custom dictionaries (#786) * Expose `rec_keys_path` in RapidOcrOptions to support custom dictionaries - Added `rec_keys_path` to `RapidOcrOptions` to align with RapidOCR's capability to use custom character dictionaries. - Passed `rec_keys_path` to `RapidOcrModel` initialization, ensuring the recognition model can load the correct dictionary (e.g., for Latin characters). Signed-off-by: Yorick Terweijden * style(rapidocr-options): fix alignment of `rec_keys_path` comment Adjusted the alignment of the comment for `rec_keys_path` to maintain consistent formatting. No functional changes were made. Signed-off-by: Yorick Terweijden --------- Signed-off-by: Yorick Terweijden --- docling/datamodel/pipeline_options.py | 1 + docling/models/rapid_ocr_model.py | 1 + 2 files changed, 2 insertions(+) diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 00ab7b4..14ca75b 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -119,6 +119,7 @@ class RapidOcrOptions(OcrOptions): det_model_path: Optional[str] = None # same default as rapidocr cls_model_path: Optional[str] = None # same default as rapidocr rec_model_path: Optional[str] = None # same default as rapidocr + rec_keys_path: Optional[str] = None # same default as rapidocr model_config = ConfigDict( extra="forbid", diff --git a/docling/models/rapid_ocr_model.py b/docling/models/rapid_ocr_model.py index 5882ffc..fa3fbed 100644 --- a/docling/models/rapid_ocr_model.py +++ b/docling/models/rapid_ocr_model.py @@ -59,6 +59,7 @@ class RapidOcrModel(BaseOcrModel): det_model_path=self.options.det_model_path, cls_model_path=self.options.cls_model_path, rec_model_path=self.options.rec_model_path, + rec_keys_path=self.options.rec_keys_path, ) def __call__(