From ae39a9411a09b2165ac745af358dea644f868e26 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos <100353117+nikos-livathinos@users.noreply.github.com> Date: Mon, 30 Jun 2025 10:55:56 +0200 Subject: [PATCH] fix: Ensure that TesseractOcrModel does not crash in case OSD is not installed (#1866) fix: Ensure that TesseractOcrModel does not crash if tesseract OSD is not installed Signed-off-by: Nikos Livathinos --- docling/models/tesseract_ocr_model.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py index 0d52087..ed6306b 100644 --- a/docling/models/tesseract_ocr_model.py +++ b/docling/models/tesseract_ocr_model.py @@ -144,7 +144,10 @@ class TesseractOcrModel(BaseOcrModel): local_reader = self.reader self.osd_reader.SetImage(high_res_image) + + doc_orientation = 0 osd = self.osd_reader.DetectOrientationScript() + # No text, or Orientation and Script detection failure if osd is None: _log.error( @@ -158,11 +161,14 @@ class TesseractOcrModel(BaseOcrModel): # to OCR in the hope OCR will succeed while OSD failed if self._is_auto: continue - doc_orientation = parse_tesseract_orientation(osd["orient_deg"]) - if doc_orientation != 0: - high_res_image = high_res_image.rotate( - -doc_orientation, expand=True + else: + doc_orientation = parse_tesseract_orientation( + osd["orient_deg"] ) + if doc_orientation != 0: + high_res_image = high_res_image.rotate( + -doc_orientation, expand=True + ) if self._is_auto: script = osd["script_name"] script = map_tesseract_script(script)