docs: revamp picture description example (#1015)

* docs: revamp picture description example Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> * Improvements for visualization example (#1017) * fix colab install, use granite and improve viz of description Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * switch docs to notbook Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * show results with all models Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * show other vlm Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Panos Vagenas <pva@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
2025-02-19 11:28:54 +01:00
parent 7450050ace
commit 27c04007bc
3 changed files with 344 additions and 49 deletions
@@ -1,48 +0,0 @@
-import logging
-from pathlib import Path
-
-from docling_core.types.doc import PictureItem
-
-from docling.datamodel.base_models import InputFormat
-from docling.datamodel.pipeline_options import (
-    PdfPipelineOptions,
-    granite_picture_description,
-    smolvlm_picture_description,
-)
-from docling.document_converter import DocumentConverter, PdfFormatOption
-
-
-def main():
-    logging.basicConfig(level=logging.INFO)
-
-    input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
-
-    pipeline_options = PdfPipelineOptions()
-    pipeline_options.do_picture_description = True
-    pipeline_options.picture_description_options = smolvlm_picture_description
-    # pipeline_options.picture_description_options = granite_picture_description
-
-    pipeline_options.picture_description_options.prompt = (
-        "Describe the image in three sentences. Be consise and accurate."
-    )
-
-    doc_converter = DocumentConverter(
-        format_options={
-            InputFormat.PDF: PdfFormatOption(
-                pipeline_options=pipeline_options,
-            )
-        }
-    )
-    result = doc_converter.convert(input_doc_path)
-
-    for element, _level in result.document.iterate_items():
-        if isinstance(element, PictureItem):
-            print(
-                f"Picture {element.self_ref}\n"
-                f"Caption: {element.caption_text(doc=result.document)}\n"
-                f"Annotations: {element.annotations}"
-            )
-
-
-if __name__ == "__main__":
-    main()
@@ -75,7 +75,7 @@ nav:
      - "Figure enrichment": examples/develop_picture_enrichment.py
      - "Table export": examples/export_tables.py
      - "Multimodal export": examples/export_multimodal.py
-      - "Annotate picture with local vlm": examples/pictures_description.py
+      - "Annotate picture with local vlm": examples/pictures_description.ipynb
      - "Annotate picture with remote vlm": examples/pictures_description_api.py
      - "Force full page OCR": examples/full_page_ocr.py
      - "Automatic OCR language detection with tesseract": examples/tesseract_lang_detection.py