From c47ae700ece2ea4efee17f82e4667c1ce9a0ed2a Mon Sep 17 00:00:00 2001 From: Nikos Livathinos <100353117+nikos-livathinos@users.noreply.github.com> Date: Tue, 11 Feb 2025 12:27:12 +0100 Subject: [PATCH] fix: Fix the initialization of the TesseractOcrModel (#935) Signed-off-by: Nikos Livathinos --- docling/models/tesseract_ocr_model.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py index 5b70155..c41806f 100644 --- a/docling/models/tesseract_ocr_model.py +++ b/docling/models/tesseract_ocr_model.py @@ -22,6 +22,7 @@ class TesseractOcrModel(BaseOcrModel): self.scale = 3 # multiplier for 72 dpi == 216 dpi. self.reader = None self.osd_reader = None + self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {} if self.enabled: install_errmsg = ( @@ -57,8 +58,6 @@ class TesseractOcrModel(BaseOcrModel): _log.debug("Initializing TesserOCR: %s", tesseract_version) lang = "+".join(self.options.lang) - self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {} - if any([l.startswith("script/") for l in self._tesserocr_languages]): self.script_prefix = "script/" else: