fix: Fix for the crash when encountering WMF images in pptx and docx (#837)
* Fix for the crash when encountering WMF images in pptx and docx backends on non Windows platforms Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Updated faq Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> --------- Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> Co-authored-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
d01a2e73ee
commit
fea0a99a95
@ -271,13 +271,12 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
return
|
return
|
||||||
|
|
||||||
def handle_pictures(self, shape, parent_slide, slide_ind, doc):
|
def handle_pictures(self, shape, parent_slide, slide_ind, doc):
|
||||||
# Get the image bytes
|
|
||||||
image = shape.image
|
|
||||||
image_bytes = image.blob
|
|
||||||
im_dpi, _ = image.dpi
|
|
||||||
|
|
||||||
# Open it with PIL
|
# Open it with PIL
|
||||||
try:
|
try:
|
||||||
|
# Get the image bytes
|
||||||
|
image = shape.image
|
||||||
|
image_bytes = image.blob
|
||||||
|
im_dpi, _ = image.dpi
|
||||||
pil_image = Image.open(BytesIO(image_bytes))
|
pil_image = Image.open(BytesIO(image_bytes))
|
||||||
|
|
||||||
# shape has picture
|
# shape has picture
|
||||||
|
@ -520,11 +520,11 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
image_data = image_part.blob # Get the binary image data
|
image_data = image_part.blob # Get the binary image data
|
||||||
return image_data
|
return image_data
|
||||||
|
|
||||||
image_data = get_docx_image(element, drawing_blip)
|
|
||||||
image_bytes = BytesIO(image_data)
|
|
||||||
level = self.get_level()
|
level = self.get_level()
|
||||||
# Open the BytesIO object with PIL to create an Image
|
# Open the BytesIO object with PIL to create an Image
|
||||||
try:
|
try:
|
||||||
|
image_data = get_docx_image(element, drawing_blip)
|
||||||
|
image_bytes = BytesIO(image_data)
|
||||||
pil_image = Image.open(image_bytes)
|
pil_image = Image.open(image_bytes)
|
||||||
doc.add_picture(
|
doc.add_picture(
|
||||||
parent=self.parents[level - 1],
|
parent=self.parents[level - 1],
|
||||||
|
@ -151,3 +151,11 @@ This is a collection of FAQ collected from the user questions on <https://github
|
|||||||
pipeline_options = PdfPipelineOptions()
|
pipeline_options = PdfPipelineOptions()
|
||||||
pipeline_options.ocr_options.lang = ["fr", "de", "es", "en"] # example of languages for EasyOCR
|
pipeline_options.ocr_options.lang = ["fr", "de", "es", "en"] # example of languages for EasyOCR
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
??? Some images are missing from MS Word and Powerpoint"
|
||||||
|
|
||||||
|
### Some images are missing from MS Word and Powerpoint
|
||||||
|
|
||||||
|
The image processing library used by Docling is able to handle embedded WMF images only on Windows platform.
|
||||||
|
If you are on other operaring systems, these images will be ignored.
|
||||||
|
Loading…
Reference in New Issue
Block a user