From 64ac043786efdece0c61827051a5b41dddf6c5d7 Mon Sep 17 00:00:00 2001
From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>
Date: Thu, 19 Jun 2025 04:10:40 -0500
Subject: [PATCH] docs: support running examples from root or subfolder (#1816)

support running examples from root or subfolder

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
---
 docs/examples/batch_convert.py                 | 11 ++++++-----
 docs/examples/custom_convert.py                |  3 ++-
 docs/examples/develop_formula_understanding.py |  3 ++-
 docs/examples/develop_picture_enrichment.py    |  3 ++-
 docs/examples/export_figures.py                |  3 ++-
 docs/examples/export_multimodal.py             |  3 ++-
 docs/examples/export_tables.py                 |  3 ++-
 docs/examples/full_page_ocr.py                 |  5 +++--
 docs/examples/pictures_description_api.py      |  3 ++-
 docs/examples/run_with_accelerator.py          |  5 +++--
 docs/examples/tesseract_lang_detection.py      |  5 +++--
 docs/examples/translate.py                     |  3 ++-
 docs/examples/vlm_pipeline_api_model.py        |  4 ++--
 13 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/docs/examples/batch_convert.py b/docs/examples/batch_convert.py
index 25eb2ba..6a4da7d 100644
--- a/docs/examples/batch_convert.py
+++ b/docs/examples/batch_convert.py
@@ -121,14 +121,15 @@ def export_documents(
 def main():
     logging.basicConfig(level=logging.INFO)
 
+    data_folder = Path(__file__).parent / "../../tests/data"
     input_doc_paths = [
-        Path("./tests/data/pdf/2206.01062.pdf"),
-        Path("./tests/data/pdf/2203.01017v2.pdf"),
-        Path("./tests/data/pdf/2305.03393v1.pdf"),
-        Path("./tests/data/pdf/redp5110_sampled.pdf"),
+        data_folder / "pdf/2206.01062.pdf",
+        data_folder / "pdf/2203.01017v2.pdf",
+        data_folder / "pdf/2305.03393v1.pdf",
+        data_folder / "pdf/redp5110_sampled.pdf",
     ]
 
-    # buf = BytesIO(Path("./test/data/2206.01062.pdf").open("rb").read())
+    # buf = BytesIO((data_folder / "pdf/2206.01062.pdf").open("rb").read())
     # docs = [DocumentStream(name="my_doc.pdf", stream=buf)]
     # input = DocumentConversionInput.from_streams(docs)
 
diff --git a/docs/examples/custom_convert.py b/docs/examples/custom_convert.py
index 12dfacd..6a90075 100644
--- a/docs/examples/custom_convert.py
+++ b/docs/examples/custom_convert.py
@@ -16,7 +16,8 @@ _log = logging.getLogger(__name__)
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2206.01062.pdf"
 
     ###########################################################################
 
diff --git a/docs/examples/develop_formula_understanding.py b/docs/examples/develop_formula_understanding.py
index beb1575..8b90613 100644
--- a/docs/examples/develop_formula_understanding.py
+++ b/docs/examples/develop_formula_understanding.py
@@ -71,7 +71,8 @@ class ExampleFormulaUnderstandingPipeline(StandardPdfPipeline):
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_path = Path("./tests/data/pdf/2203.01017v2.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2203.01017v2.pdf"
 
     pipeline_options = ExampleFormulaUnderstandingPipelineOptions()
     pipeline_options.do_formula_understanding = True
diff --git a/docs/examples/develop_picture_enrichment.py b/docs/examples/develop_picture_enrichment.py
index 9e3d306..f027898 100644
--- a/docs/examples/develop_picture_enrichment.py
+++ b/docs/examples/develop_picture_enrichment.py
@@ -76,7 +76,8 @@ class ExamplePictureClassifierPipeline(StandardPdfPipeline):
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2206.01062.pdf"
 
     pipeline_options = ExamplePictureClassifierPipelineOptions()
     pipeline_options.images_scale = 2.0
diff --git a/docs/examples/export_figures.py b/docs/examples/export_figures.py
index 8ed14a7..ab9d1e9 100644
--- a/docs/examples/export_figures.py
+++ b/docs/examples/export_figures.py
@@ -16,7 +16,8 @@ IMAGE_RESOLUTION_SCALE = 2.0
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2206.01062.pdf"
     output_dir = Path("scratch")
 
     # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
diff --git a/docs/examples/export_multimodal.py b/docs/examples/export_multimodal.py
index bef74bf..addbe37 100644
--- a/docs/examples/export_multimodal.py
+++ b/docs/examples/export_multimodal.py
@@ -19,7 +19,8 @@ IMAGE_RESOLUTION_SCALE = 2.0
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2206.01062.pdf"
     output_dir = Path("scratch")
 
     # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
diff --git a/docs/examples/export_tables.py b/docs/examples/export_tables.py
index 9a911d8..4d6c2b4 100644
--- a/docs/examples/export_tables.py
+++ b/docs/examples/export_tables.py
@@ -12,7 +12,8 @@ _log = logging.getLogger(__name__)
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2206.01062.pdf"
     output_dir = Path("scratch")
 
     doc_converter = DocumentConverter()
diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py
index 5525e87..7ff5f82 100644
--- a/docs/examples/full_page_ocr.py
+++ b/docs/examples/full_page_ocr.py
@@ -9,7 +9,8 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
 
 
 def main():
-    input_doc = Path("./tests/data/pdf/2206.01062.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2206.01062.pdf"
 
     pipeline_options = PdfPipelineOptions()
     pipeline_options.do_ocr = True
@@ -32,7 +33,7 @@ def main():
         }
     )
 
-    doc = converter.convert(input_doc).document
+    doc = converter.convert(input_doc_path).document
     md = doc.export_to_markdown()
     print(md)
 
diff --git a/docs/examples/pictures_description_api.py b/docs/examples/pictures_description_api.py
index 938d0a5..44ffc22 100644
--- a/docs/examples/pictures_description_api.py
+++ b/docs/examples/pictures_description_api.py
@@ -96,7 +96,8 @@ def watsonx_vlm_options():
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2206.01062.pdf"
 
     pipeline_options = PdfPipelineOptions(
         enable_remote_services=True  # <-- this is required!
diff --git a/docs/examples/run_with_accelerator.py b/docs/examples/run_with_accelerator.py
index 6b3ddc6..31d467d 100644
--- a/docs/examples/run_with_accelerator.py
+++ b/docs/examples/run_with_accelerator.py
@@ -10,7 +10,8 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
 
 
 def main():
-    input_doc = Path("./tests/data/pdf/2206.01062.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2206.01062.pdf"
 
     # Explicitly set the accelerator
     # accelerator_options = AcceleratorOptions(
@@ -47,7 +48,7 @@ def main():
     settings.debug.profile_pipeline_timings = True
 
     # Convert the document
-    conversion_result = converter.convert(input_doc)
+    conversion_result = converter.convert(input_doc_path)
     doc = conversion_result.document
 
     # List with total time per document
diff --git a/docs/examples/tesseract_lang_detection.py b/docs/examples/tesseract_lang_detection.py
index 37859b9..bb11708 100644
--- a/docs/examples/tesseract_lang_detection.py
+++ b/docs/examples/tesseract_lang_detection.py
@@ -9,7 +9,8 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
 
 
 def main():
-    input_doc = Path("./tests/data/pdf/2206.01062.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2206.01062.pdf"
 
     # Set lang=["auto"] with a tesseract OCR engine: TesseractOcrOptions, TesseractCliOcrOptions
     # ocr_options = TesseractOcrOptions(lang=["auto"])
@@ -27,7 +28,7 @@ def main():
         }
     )
 
-    doc = converter.convert(input_doc).document
+    doc = converter.convert(input_doc_path).document
     md = doc.export_to_markdown()
     print(md)
 
diff --git a/docs/examples/translate.py b/docs/examples/translate.py
index f2711a2..4698168 100644
--- a/docs/examples/translate.py
+++ b/docs/examples/translate.py
@@ -30,7 +30,8 @@ def translate(text: str, src: str = "en", dest: str = "de"):
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2206.01062.pdf"
     output_dir = Path("scratch")
 
     # Important: For operating with page images, we must keep them, otherwise the DocumentConverter
diff --git a/docs/examples/vlm_pipeline_api_model.py b/docs/examples/vlm_pipeline_api_model.py
index 20ca259..679f7bd 100644
--- a/docs/examples/vlm_pipeline_api_model.py
+++ b/docs/examples/vlm_pipeline_api_model.py
@@ -95,8 +95,8 @@ def watsonx_vlm_options(model: str, prompt: str):
 def main():
     logging.basicConfig(level=logging.INFO)
 
-    # input_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
-    input_doc_path = Path("./tests/data/pdf/2305.03393v1-pg9.pdf")
+    data_folder = Path(__file__).parent / "../../tests/data"
+    input_doc_path = data_folder / "pdf/2305.03393v1-pg9.pdf"
 
     pipeline_options = VlmPipelineOptions(
         enable_remote_services=True  # <-- this is required!