fix: Runtime error when Pandas Series is not always of string type (#1024)

Signed-off-by: fan <fansluck@qq.com>
This commit is contained in:
fanszoro 2025-02-20 22:41:41 +08:00 committed by GitHub
parent dfcc30dddb
commit 6796f0a132
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -114,7 +114,9 @@ class TesseractOcrCliModel(BaseOcrModel):
# _log.info("df: ", df.head())
# Filter rows that contain actual text (ignore header or empty rows)
df_filtered = df[df["text"].notnull() & (df["text"].str.strip() != "")]
df_filtered = df[
df["text"].notnull() & (df["text"].apply(str).str.strip() != "")
]
return df_filtered