diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py index f595e4b..995969d 100644 --- a/docling/backend/mspowerpoint_backend.py +++ b/docling/backend/mspowerpoint_backend.py @@ -16,7 +16,7 @@ from docling_core.types.doc import ( TableCell, TableData, ) -from PIL import Image +from PIL import Image, UnidentifiedImageError from pptx import Presentation from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER @@ -120,6 +120,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB bullet_type = "None" list_text = "" list_label = GroupLabel.LIST + doc_label = DocItemLabel.LIST_ITEM prov = self.generate_prov(shape, slide_ind, shape.text.strip()) # Identify if shape contains lists @@ -276,16 +277,19 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB im_dpi, _ = image.dpi # Open it with PIL - pil_image = Image.open(BytesIO(image_bytes)) + try: + pil_image = Image.open(BytesIO(image_bytes)) - # shape has picture - prov = self.generate_prov(shape, slide_ind, "") - doc.add_picture( - parent=parent_slide, - image=ImageRef.from_pil(image=pil_image, dpi=im_dpi), - caption=None, - prov=prov, - ) + # shape has picture + prov = self.generate_prov(shape, slide_ind, "") + doc.add_picture( + parent=parent_slide, + image=ImageRef.from_pil(image=pil_image, dpi=im_dpi), + caption=None, + prov=prov, + ) + except (UnidentifiedImageError, OSError) as e: + _log.warning(f"Warning: image cannot be loaded by Pillow: {e}") return def handle_tables(self, shape, parent_slide, slide_ind, doc):