fix: Added handling of grouped elements in pptx backend (#307)

* Added handling of grouped elements in pptx backend Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * updated log.warn to warning Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> --------- Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> Co-authored-by: Maksym Lysak <mly@zurich.ibm.com>
2024-11-11 16:38:21 +01:00
parent 53bf2d1790
commit 81c8243a8b
1 changed files with 17 additions and 22 deletions
@@ -358,41 +358,36 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
            size = Size(width=slide_width, height=slide_height)
            parent_page = doc.add_page(page_no=slide_ind + 1, size=size)
            # parent_page = doc.add_page(page_no=slide_ind, size=size, hash=hash)
            # Loop through each shape in the slide
            for shape in slide.shapes:
            def handle_shapes(shape, parent_slide, slide_ind, doc):
                handle_groups(shape, parent_slide, slide_ind, doc)
                if shape.has_table:
                    # Handle Tables
                    self.handle_tables(shape, parent_slide, slide_ind, doc)
                if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
-                    # Handle Tables
+                    # Handle Pictures
                    self.handle_pictures(shape, parent_slide, slide_ind, doc)
                # If shape doesn't have any text, move on to the next shape
                if not hasattr(shape, "text"):
-                    continue
+                    return
                if shape.text is None:
-                    continue
+                    return
                if len(shape.text.strip()) == 0:
-                    continue
+                    return
                if not shape.has_text_frame:
-                    _log.warn("Warning: shape has text but not text_frame")
+                    _log.warning("Warning: shape has text but not text_frame")
-                    continue
+                    return
                # if shape.is_placeholder:
                # Handle Titles (Headers) and Subtitles
                # Check if the shape is a placeholder (titles are placeholders)
                # self.handle_title(shape, parent_slide, slide_ind, doc)
                # self.handle_text_elements(shape, parent_slide, slide_ind, doc)
                # else:
                # Handle other text elements, including lists (bullet lists, numbered lists)
                self.handle_text_elements(shape, parent_slide, slide_ind, doc)
                return
-                # figures...
+            def handle_groups(shape, parent_slide, slide_ind, doc):
-                # doc.add_figure(data=BaseFigureData(), parent=self.parents[self.level], caption=None)
+                if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
                    for groupedshape in shape.shapes:
                        handle_shapes(groupedshape, parent_slide, slide_ind, doc)
            # Loop through each shape in the slide
            for shape in slide.shapes:
                handle_shapes(shape, parent_slide, slide_ind, doc)
        return doc