fix: Runtime error when Pandas Series is not always of string type (#1024)
Signed-off-by: fan <fansluck@qq.com>
This commit is contained in:
parent
dfcc30dddb
commit
6796f0a132
@ -114,7 +114,9 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
# _log.info("df: ", df.head())
|
||||
|
||||
# Filter rows that contain actual text (ignore header or empty rows)
|
||||
df_filtered = df[df["text"].notnull() & (df["text"].str.strip() != "")]
|
||||
df_filtered = df[
|
||||
df["text"].notnull() & (df["text"].apply(str).str.strip() != "")
|
||||
]
|
||||
|
||||
return df_filtered
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user