fix(mspowerpoint): handle invalid images in PowerPoint slides (#650)
- Add error handling for images that cannot be loaded by Pillow - Improve resilience when encountering corrupted or unsupported image formats - Maintain processing of other slide elements even if an image fails to load Signed-off-by: Tendo33 <sjf1998112@gmail.com>
This commit is contained in:
parent
0ee849e8bc
commit
d49650c54f
@ -16,7 +16,7 @@ from docling_core.types.doc import (
|
|||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
)
|
)
|
||||||
from PIL import Image
|
from PIL import Image, UnidentifiedImageError
|
||||||
from pptx import Presentation
|
from pptx import Presentation
|
||||||
from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
|
from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
|
||||||
|
|
||||||
@ -120,6 +120,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
bullet_type = "None"
|
bullet_type = "None"
|
||||||
list_text = ""
|
list_text = ""
|
||||||
list_label = GroupLabel.LIST
|
list_label = GroupLabel.LIST
|
||||||
|
doc_label = DocItemLabel.LIST_ITEM
|
||||||
prov = self.generate_prov(shape, slide_ind, shape.text.strip())
|
prov = self.generate_prov(shape, slide_ind, shape.text.strip())
|
||||||
|
|
||||||
# Identify if shape contains lists
|
# Identify if shape contains lists
|
||||||
@ -276,6 +277,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
im_dpi, _ = image.dpi
|
im_dpi, _ = image.dpi
|
||||||
|
|
||||||
# Open it with PIL
|
# Open it with PIL
|
||||||
|
try:
|
||||||
pil_image = Image.open(BytesIO(image_bytes))
|
pil_image = Image.open(BytesIO(image_bytes))
|
||||||
|
|
||||||
# shape has picture
|
# shape has picture
|
||||||
@ -286,6 +288,8 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
caption=None,
|
caption=None,
|
||||||
prov=prov,
|
prov=prov,
|
||||||
)
|
)
|
||||||
|
except (UnidentifiedImageError, OSError) as e:
|
||||||
|
_log.warning(f"Warning: image cannot be loaded by Pillow: {e}")
|
||||||
return
|
return
|
||||||
|
|
||||||
def handle_tables(self, shape, parent_slide, slide_ind, doc):
|
def handle_tables(self, shape, parent_slide, slide_ind, doc):
|
||||||
|
Loading…
Reference in New Issue
Block a user