docs: add export with embedded images (#175)

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-10-24 20:19:41 +02:00 committed by GitHub
parent 8208c93e3a
commit 8d356aa247
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,7 +2,7 @@ import logging
import time import time
from pathlib import Path from pathlib import Path
from docling_core.types.doc import PictureItem, TableItem from docling_core.types.doc import ImageRefMode, PictureItem, TableItem
from docling.datamodel.base_models import FigureElement, InputFormat, Table from docling.datamodel.base_models import FigureElement, InputFormat, Table
from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.datamodel.pipeline_options import PdfPipelineOptions
@ -71,6 +71,12 @@ def main():
with element_image_filename.open("wb") as fp: with element_image_filename.open("wb") as fp:
element.image.pil_image.save(fp, "PNG") element.image.pil_image.save(fp, "PNG")
# Save markdown with embedded pictures
content_md = conv_res.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)
md_filename = output_dir / f"{doc_filename}-with-images.md"
with md_filename.open("w") as fp:
fp.write(content_md)
end_time = time.time() - start_time end_time = time.time() - start_time
_log.info(f"Document converted and figures exported in {end_time:.2f} seconds.") _log.info(f"Document converted and figures exported in {end_time:.2f} seconds.")