fix: auto-recognize .xlsx, .docx and .pptx files (#1340)
* bug: auto-recognize .xlsx files Signed-off-by: Tim Kellogg <timothy.kellogg@gmail.com> * apply styling Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply to other ms office zip formats Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Tim Kellogg <timothy.kellogg@gmail.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
b295da4bfe
commit
0de70e7991
@ -283,6 +283,13 @@ class _DocumentConversionInput(BaseModel):
|
||||
if mime is None: # must guess from
|
||||
with obj.open("rb") as f:
|
||||
content = f.read(1024) # Read first 1KB
|
||||
if mime is not None and mime.lower() == "application/zip":
|
||||
if obj.suffixes[-1].lower() == ".xlsx":
|
||||
mime = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
elif obj.suffixes[-1].lower() == ".docx":
|
||||
mime = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
elif obj.suffixes[-1].lower() == ".pptx":
|
||||
mime = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
|
||||
elif isinstance(obj, DocumentStream):
|
||||
content = obj.stream.read(8192)
|
||||
|
Loading…
Reference in New Issue
Block a user