fix: Runtime error when Pandas Series is not always of string type (#1024)
Signed-off-by: fan <fansluck@qq.com>
This commit is contained in:
parent
dfcc30dddb
commit
6796f0a132
@ -114,7 +114,9 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
# _log.info("df: ", df.head())
|
# _log.info("df: ", df.head())
|
||||||
|
|
||||||
# Filter rows that contain actual text (ignore header or empty rows)
|
# Filter rows that contain actual text (ignore header or empty rows)
|
||||||
df_filtered = df[df["text"].notnull() & (df["text"].str.strip() != "")]
|
df_filtered = df[
|
||||||
|
df["text"].notnull() & (df["text"].apply(str).str.strip() != "")
|
||||||
|
]
|
||||||
|
|
||||||
return df_filtered
|
return df_filtered
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user