From 9f8b479f17bbfaf79c3c897980ad15742ec86568 Mon Sep 17 00:00:00 2001 From: jimkarag02 <112767673+jimkarag02@users.noreply.github.com> Date: Wed, 14 May 2025 16:05:52 +0300 Subject: [PATCH] fix(ocr): orig field in TesseractOcrCliModel as str (#1553) fix: ensure orig and text are both strings in TesseractOcrCliModel Signed-off-by: Dimitris Karagatslis --- docling/models/tesseract_ocr_cli_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index 91b4555..33ca0c1 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -249,7 +249,7 @@ class TesseractOcrCliModel(BaseOcrModel): cell = TextCell( index=ix, text=str(text), - orig=text, + orig=str(text), from_ocr=True, confidence=conf / 100.0, rect=BoundingRectangle.from_bounding_box(