fix: ParserError EOF inside string (#470) (#472)

Signed-off-by: guglie <gdguglie@gmail.com>
This commit is contained in:
guglie 2024-12-03 11:21:18 +01:00 committed by GitHub
parent 5ba3807f31
commit c90c41c391
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,3 +1,4 @@
import csv
import io
import logging
import os
@ -96,7 +97,7 @@ class TesseractOcrCliModel(BaseOcrModel):
# _log.info(decoded_data)
# Read the TSV file generated by Tesseract
df = pd.read_csv(io.StringIO(decoded_data), sep="\t")
df = pd.read_csv(io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t")
# Display the dataframe (optional)
# _log.info("df: ", df.head())