From 415b877984fd89884e97b4740bc553e800055e0e Mon Sep 17 00:00:00 2001 From: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Date: Fri, 11 Apr 2025 13:04:00 +0200 Subject: [PATCH] fix(docx): declare image_data variable when handling pictures (#1359) Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> --- docling/backend/msword_backend.py | 32 +++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index 5530bba..5915c0a 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -850,7 +850,8 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend): def _handle_pictures( self, docx_obj: DocxDocument, drawing_blip: Any, doc: DoclingDocument ) -> None: - def get_docx_image(drawing_blip): + def get_docx_image(drawing_blip: Any) -> Optional[bytes]: + image_data: Optional[bytes] = None rId = drawing_blip[0].get( "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed" ) @@ -862,19 +863,26 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend): level = self._get_level() # Open the BytesIO object with PIL to create an Image - try: - image_data = get_docx_image(drawing_blip) - image_bytes = BytesIO(image_data) - pil_image = Image.open(image_bytes) - doc.add_picture( - parent=self.parents[level - 1], - image=ImageRef.from_pil(image=pil_image, dpi=72), - caption=None, - ) - except (UnidentifiedImageError, OSError) as e: - _log.warning("Warning: image cannot be loaded by Pillow") + image_data: Optional[bytes] = get_docx_image(drawing_blip) + if image_data is None: + _log.warning("Warning: image cannot be found") doc.add_picture( parent=self.parents[level - 1], caption=None, ) + else: + try: + image_bytes = BytesIO(image_data) + pil_image = Image.open(image_bytes) + doc.add_picture( + parent=self.parents[level - 1], + image=ImageRef.from_pil(image=pil_image, dpi=72), + caption=None, + ) + except (UnidentifiedImageError, OSError) as e: + _log.warning("Warning: image cannot be loaded by Pillow") + doc.add_picture( + parent=self.parents[level - 1], + caption=None, + ) return