diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index ababe67..16e1629 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -1,3 +1,4 @@ +import csv import io import logging import os @@ -96,7 +97,7 @@ class TesseractOcrCliModel(BaseOcrModel): # _log.info(decoded_data) # Read the TSV file generated by Tesseract - df = pd.read_csv(io.StringIO(decoded_data), sep="\t") + df = pd.read_csv(io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t") # Display the dataframe (optional) # _log.info("df: ", df.head())