fix: Fix issue with detecting docx files, and files with upper case extensions (#1609)

fix detecting files with uppercase extensions

Signed-off-by: MoheyElDin Badr <moheyeldin.badr@gmail.com>
This commit is contained in:
MoheyElDin Badr 2025-05-20 20:42:37 +03:00 committed by GitHub
parent 0e00a263fa
commit f4d9d4111b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -302,7 +302,7 @@ class _DocumentConversionInput(BaseModel):
if ("." in obj.name and not obj.name.startswith("."))
else ""
)
mime = _DocumentConversionInput._mime_from_extension(ext)
mime = _DocumentConversionInput._mime_from_extension(ext.lower())
if mime is not None and mime.lower() == "application/zip":
objname = obj.name.lower()
if objname.endswith(".xlsx"):
@ -376,6 +376,13 @@ class _DocumentConversionInput(BaseModel):
mime = FormatToMimeType[InputFormat.JSON_DOCLING][0]
elif ext in FormatToExtensions[InputFormat.PDF]:
mime = FormatToMimeType[InputFormat.PDF][0]
elif ext in FormatToExtensions[InputFormat.DOCX]:
mime = FormatToMimeType[InputFormat.DOCX][0]
elif ext in FormatToExtensions[InputFormat.PPTX]:
mime = FormatToMimeType[InputFormat.PPTX][0]
elif ext in FormatToExtensions[InputFormat.XLSX]:
mime = FormatToMimeType[InputFormat.XLSX][0]
return mime
@staticmethod