feat: Introduce support for GPU Accelerators (#593)

* Upgraded Layout Postprocessing, sending old code back to ERZ Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Implement hierachical cluster layout processing Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Pass nested cluster processing through full pipeline Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Pass nested clusters through GLM as payload Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Move to_docling_document from ds-glm to this repo Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Clean up imports again Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * feat(Accelerator): Introduce options to control the num_threads and device from API, envvars, CLI. - Introduce the AcceleratorOptions, AcceleratorDevice and use them to set the device where the models run. - Introduce the accelerator_utils with function to decide the device and resolve the AUTO setting. - Refactor the way how the docling-ibm-models are called to match the new init signature of models. - Translate the accelerator options to the specific inputs for third-party models. - Extend the docling CLI with parameters to set the num_threads and device. - Add new unit tests. - Write new example how to use the accelerator options. * fix: Improve the pydantic objects in the pipeline_options and imports. Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * fix: TableStructureModel: Refactor the artifacts path to use the new structure for fast/accurate model Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * Updated test ground-truth Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Updated test ground-truth (again), bugfix for empty layout Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * fix: Do proper check to set the device in EasyOCR, RapidOCR. Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * Rollback changes from main Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update test gt Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Remove unused debug settings Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Review fixes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Nail the accelerator defaults for MPS Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
2024-12-13 17:45:22 +01:00 · 2024-12-13 17:45:22 +01:00 · 19fad9261c
commit 19fad9261c
parent 365a1e7b98
38 changed files with 384 additions and 93 deletions
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@ -26,6 +26,8 @@ from docling.datamodel.base_models import (
 )
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
    EasyOcrOptions,
    OcrEngine,
    OcrMacOptions,
@ -257,6 +259,10 @@ def convert(
            help="The timeout for processing each document, in seconds.",
        ),
    ] = None,
+    num_threads: Annotated[int, typer.Option(..., help="Number of threads")] = 4,
+    device: Annotated[
+        AcceleratorDevice, typer.Option(..., help="Accelerator device")
+    ] = AcceleratorDevice.AUTO,
 ):
    if verbose == 0:
        logging.basicConfig(level=logging.WARNING)
@ -336,7 +342,9 @@ def convert(
        if ocr_lang_list is not None:
            ocr_options.lang = ocr_lang_list

+        accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
        pipeline_options = PdfPipelineOptions(
+            accelerator_options=accelerator_options,
            do_ocr=ocr,
            ocr_options=ocr_options,
            do_table_structure=True,
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@ -1,8 +1,66 @@
+import logging
+import os
+import warnings
 from enum import Enum
 from pathlib import Path
-from typing import List, Literal, Optional, Union
+from typing import Annotated, Any, Dict, List, Literal, Optional, Tuple, Type, Union

-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
+from pydantic_settings import (
+    BaseSettings,
+    PydanticBaseSettingsSource,
+    SettingsConfigDict,
+)
+from typing_extensions import deprecated
+
+_log = logging.getLogger(__name__)
+
+
+class AcceleratorDevice(str, Enum):
+    """Devices to run model inference"""
+
+    AUTO = "auto"
+    CPU = "cpu"
+    CUDA = "cuda"
+    MPS = "mps"
+
+
+class AcceleratorOptions(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="DOCLING_", env_nested_delimiter="_", populate_by_name=True
+    )
+
+    num_threads: int = 4
+    device: AcceleratorDevice = AcceleratorDevice.AUTO
+
+    @model_validator(mode="before")
+    @classmethod
+    def check_alternative_envvars(cls, data: Any) -> Any:
+        r"""
+        Set num_threads from the "alternative" envvar OMP_NUM_THREADS.
+        The alternative envvar is used only if it is valid and the regular envvar is not set.
+
+        Notice: The standard pydantic settings mechanism with parameter "aliases" does not provide
+        the same functionality. In case the alias envvar is set and the user tries to override the
+        parameter in settings initialization, Pydantic treats the parameter provided in __init__()
+        as an extra input instead of simply overwriting the evvar value for that parameter.
+        """
+        if isinstance(data, dict):
+            input_num_threads = data.get("num_threads")
+
+            # Check if to set the num_threads from the alternative envvar
+            if input_num_threads is None:
+                docling_num_threads = os.getenv("DOCLING_NUM_THREADS")
+                omp_num_threads = os.getenv("OMP_NUM_THREADS")
+                if docling_num_threads is None and omp_num_threads is not None:
+                    try:
+                        data["num_threads"] = int(omp_num_threads)
+                    except ValueError:
+                        _log.error(
+                            "Ignoring misformatted envvar OMP_NUM_THREADS '%s'",
+                            omp_num_threads,
+                        )
+        return data


 class TableFormerMode(str, Enum):
@ -78,9 +136,11 @@ class EasyOcrOptions(OcrOptions):

    kind: Literal["easyocr"] = "easyocr"
    lang: List[str] = ["fr", "de", "es", "en"]
-    use_gpu: bool = True  # same default as easyocr.Reader
+
+    use_gpu: Optional[bool] = None
+
    model_storage_directory: Optional[str] = None
-    download_enabled: bool = True  # same default as easyocr.Reader
+    download_enabled: bool = True

    model_config = ConfigDict(
        extra="forbid",
@ -153,6 +213,7 @@ class PipelineOptions(BaseModel):
        True  # This default will be set to False on a future version of docling
    )
    document_timeout: Optional[float] = None
+    accelerator_options: AcceleratorOptions = AcceleratorOptions()


 class PdfPipelineOptions(PipelineOptions):
--- a/docling/models/easyocr_model.py
+++ b/docling/models/easyocr_model.py
@ -1,4 +1,5 @@
 import logging
+import warnings
 from typing import Iterable

 import numpy
@ -7,16 +8,26 @@ from docling_core.types.doc import BoundingBox, CoordOrigin

 from docling.datamodel.base_models import Cell, OcrCell, Page
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import EasyOcrOptions
+from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
+    EasyOcrOptions,
+)
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)


 class EasyOcrModel(BaseOcrModel):
-    def __init__(self, enabled: bool, options: EasyOcrOptions):
+    def __init__(
+        self,
+        enabled: bool,
+        options: EasyOcrOptions,
+        accelerator_options: AcceleratorOptions,
+    ):
        super().__init__(enabled=enabled, options=options)
        self.options: EasyOcrOptions

@ -31,11 +42,32 @@ class EasyOcrModel(BaseOcrModel):
                    "Alternatively, Docling has support for other OCR engines. See the documentation."
                )

+            if self.options.use_gpu is None:
+                device = decide_device(accelerator_options.device)
+                # Enable easyocr GPU if running on CUDA, MPS
+                use_gpu = any(
+                    [
+                        device.startswith(x)
+                        for x in [
+                            AcceleratorDevice.CUDA.value,
+                            AcceleratorDevice.MPS.value,
+                        ]
+                    ]
+                )
+            else:
+                warnings.warn(
+                    "Deprecated field. Better to set the `accelerator_options.device` in `pipeline_options`. "
+                    "When `use_gpu and accelerator_options.device == AcceleratorDevice.CUDA` the GPU is used "
+                    "to run EasyOCR. Otherwise, EasyOCR runs in CPU."
+                )
+                use_gpu = self.options.use_gpu
+
            self.reader = easyocr.Reader(
                lang_list=self.options.lang,
-                gpu=self.options.use_gpu,
+                gpu=use_gpu,
                model_storage_directory=self.options.model_storage_directory,
                download_enabled=self.options.download_enabled,
+                verbose=False,
            )

    def __call__(
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@ -9,6 +9,7 @@ from docling_core.types.doc import CoordOrigin, DocItemLabel
 from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
 from PIL import ImageDraw

+import docling.utils.layout_utils as lu
 from docling.datamodel.base_models import (
    BoundingBox,
    Cell,
@ -17,9 +18,10 @@ from docling.datamodel.base_models import (
    Page,
 )
 from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
-from docling.utils import layout_utils as lu
+from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)
@ -46,8 +48,16 @@ class LayoutModel(BasePageModel):
    FIGURE_LABEL = DocItemLabel.PICTURE
    FORMULA_LABEL = DocItemLabel.FORMULA

-    def __init__(self, artifacts_path: Path):
-        self.layout_predictor = LayoutPredictor(artifacts_path)  # TODO temporary
+    def __init__(self, artifacts_path: Path, accelerator_options: AcceleratorOptions):
+        device = decide_device(accelerator_options.device)
+
+        self.layout_predictor = LayoutPredictor(
+            artifact_path=str(artifacts_path),
+            device=device,
+            num_threads=accelerator_options.num_threads,
+            base_threshold=0.6,
+            blacklist_classes={"Form", "Key-Value Region"},
+        )

    def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):
        MIN_INTERSECTION = 0.2
--- a/docling/models/rapid_ocr_model.py
+++ b/docling/models/rapid_ocr_model.py
@ -6,16 +6,26 @@ from docling_core.types.doc import BoundingBox, CoordOrigin

 from docling.datamodel.base_models import OcrCell, Page
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import RapidOcrOptions
+from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
+    RapidOcrOptions,
+)
 from docling.datamodel.settings import settings
 from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder

 _log = logging.getLogger(__name__)


 class RapidOcrModel(BaseOcrModel):
-    def __init__(self, enabled: bool, options: RapidOcrOptions):
+    def __init__(
+        self,
+        enabled: bool,
+        options: RapidOcrOptions,
+        accelerator_options: AcceleratorOptions,
+    ):
        super().__init__(enabled=enabled, options=options)
        self.options: RapidOcrOptions

@ -30,52 +40,21 @@ class RapidOcrModel(BaseOcrModel):
                    "Alternatively, Docling has support for other OCR engines. See the documentation."
                )

-            # This configuration option will be revamped while introducing device settings for all models.
-            # For the moment we will default to auto and let onnx-runtime pick the best.
-            cls_use_cuda = True
-            rec_use_cuda = True
-            det_use_cuda = True
-            det_use_dml = True
-            cls_use_dml = True
-            rec_use_dml = True
-
-            # # Same as Defaults in RapidOCR
-            # cls_use_cuda = False
-            # rec_use_cuda = False
-            # det_use_cuda = False
-            # det_use_dml = False
-            # cls_use_dml = False
-            # rec_use_dml = False
-
-            # # If we set everything to true onnx-runtime would automatically choose the fastest accelerator
-            # if self.options.device == self.options.Device.AUTO:
-            #     cls_use_cuda = True
-            #     rec_use_cuda = True
-            #     det_use_cuda = True
-            #     det_use_dml = True
-            #     cls_use_dml = True
-            #     rec_use_dml = True
-
-            # # If we set use_cuda to true onnx would use the cuda device available in runtime if no cuda device is available it would run on CPU.
-            # elif self.options.device == self.options.Device.CUDA:
-            #     cls_use_cuda = True
-            #     rec_use_cuda = True
-            #     det_use_cuda = True
-
-            # # If we set use_dml to true onnx would use the dml device available in runtime if no dml device is available it would work on CPU.
-            # elif self.options.device == self.options.Device.DIRECTML:
-            #     det_use_dml = True
-            #     cls_use_dml = True
-            #     rec_use_dml = True
+            # Decide the accelerator devices
+            device = decide_device(accelerator_options.device)
+            use_cuda = str(AcceleratorDevice.CUDA.value).lower() in device
+            use_dml = accelerator_options.device == AcceleratorDevice.AUTO
+            intra_op_num_threads = accelerator_options.num_threads

            self.reader = RapidOCR(
                text_score=self.options.text_score,
-                cls_use_cuda=cls_use_cuda,
-                rec_use_cuda=rec_use_cuda,
-                det_use_cuda=det_use_cuda,
-                det_use_dml=det_use_dml,
-                cls_use_dml=cls_use_dml,
-                rec_use_dml=rec_use_dml,
+                cls_use_cuda=use_cuda,
+                rec_use_cuda=use_cuda,
+                det_use_cuda=use_cuda,
+                det_use_dml=use_dml,
+                cls_use_dml=use_dml,
+                rec_use_dml=use_dml,
+                intra_op_num_threads=intra_op_num_threads,
                print_verbose=self.options.print_verbose,
                det_model_path=self.options.det_model_path,
                cls_model_path=self.options.cls_model_path,
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@ -9,15 +9,25 @@ from PIL import ImageDraw

 from docling.datamodel.base_models import Page, Table, TableStructurePrediction
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
+from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
+    TableFormerMode,
+    TableStructureOptions,
+)
 from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
+from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder


 class TableStructureModel(BasePageModel):
    def __init__(
-        self, enabled: bool, artifacts_path: Path, options: TableStructureOptions
+        self,
+        enabled: bool,
+        artifacts_path: Path,
+        options: TableStructureOptions,
+        accelerator_options: AcceleratorOptions,
    ):
        self.options = options
        self.do_cell_matching = self.options.do_cell_matching
@ -26,16 +36,26 @@ class TableStructureModel(BasePageModel):
        self.enabled = enabled
        if self.enabled:
            if self.mode == TableFormerMode.ACCURATE:
-                artifacts_path = artifacts_path / "fat"
+                artifacts_path = artifacts_path / "accurate"
+            else:
+                artifacts_path = artifacts_path / "fast"

            # Third Party
            import docling_ibm_models.tableformer.common as c

+            device = decide_device(accelerator_options.device)
+
+            # Disable MPS here, until we know why it makes things slower.
+            if device == AcceleratorDevice.MPS.value:
+                device = AcceleratorDevice.CPU.value
+
            self.tm_config = c.read_config(f"{artifacts_path}/tm_config.json")
            self.tm_config["model"]["save_dir"] = artifacts_path
            self.tm_model_type = self.tm_config["model"]["type"]

-            self.tf_predictor = TFPredictor(self.tm_config)
+            self.tf_predictor = TFPredictor(
+                self.tm_config, device, accelerator_options.num_threads
+            )
            self.scale = 2.0  # Scale up table input images to 144 dpi

    def draw_table_and_cells(
--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@ -38,7 +38,7 @@ _log = logging.getLogger(__name__)


 class StandardPdfPipeline(PaginatedPipeline):
-    _layout_model_path = "model_artifacts/layout/beehive_v0.0.5_pt"
+    _layout_model_path = "model_artifacts/layout"
    _table_model_path = "model_artifacts/tableformer"

    def __init__(self, pipeline_options: PdfPipelineOptions):
@ -75,7 +75,8 @@ class StandardPdfPipeline(PaginatedPipeline):
            # Layout model
            LayoutModel(
                artifacts_path=self.artifacts_path
-                / StandardPdfPipeline._layout_model_path
+                / StandardPdfPipeline._layout_model_path,
+                accelerator_options=pipeline_options.accelerator_options,
            ),
            # Table structure model
            TableStructureModel(
@ -83,6 +84,7 @@ class StandardPdfPipeline(PaginatedPipeline):
                artifacts_path=self.artifacts_path
                / StandardPdfPipeline._table_model_path,
                options=pipeline_options.table_structure_options,
+                accelerator_options=pipeline_options.accelerator_options,
            ),
            # Page assemble
            PageAssembleModel(options=PageAssembleOptions(keep_images=keep_images)),
@ -104,7 +106,7 @@ class StandardPdfPipeline(PaginatedPipeline):
            repo_id="ds4sd/docling-models",
            force_download=force,
            local_dir=local_dir,
-            revision="v2.0.1",
+            revision="v2.1.0",
        )

        return Path(download_path)
@ -114,6 +116,7 @@ class StandardPdfPipeline(PaginatedPipeline):
            return EasyOcrModel(
                enabled=self.pipeline_options.do_ocr,
                options=self.pipeline_options.ocr_options,
+                accelerator_options=self.pipeline_options.accelerator_options,
            )
        elif isinstance(self.pipeline_options.ocr_options, TesseractCliOcrOptions):
            return TesseractOcrCliModel(
@ -129,6 +132,7 @@ class StandardPdfPipeline(PaginatedPipeline):
            return RapidOcrModel(
                enabled=self.pipeline_options.do_ocr,
                options=self.pipeline_options.ocr_options,
+                accelerator_options=self.pipeline_options.accelerator_options,
            )
        elif isinstance(self.pipeline_options.ocr_options, OcrMacOptions):
            if "darwin" != sys.platform:
--- a/docling/utils/accelerator_utils.py
+++ b/docling/utils/accelerator_utils.py
@ -0,0 +1,42 @@
+import logging
+
+import torch
+
+from docling.datamodel.pipeline_options import AcceleratorDevice
+
+_log = logging.getLogger(__name__)
+
+
+def decide_device(accelerator_device: AcceleratorDevice) -> str:
+    r"""
+    Resolve the device based on the acceleration options and the available devices in the system
+    Rules:
+    1. AUTO: Check for the best available device on the system.
+    2. User-defined: Check if the device actually exists, otherwise fall-back to CPU
+    """
+    cuda_index = 0
+    device = "cpu"
+
+    has_cuda = torch.backends.cuda.is_built() and torch.cuda.is_available()
+    has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
+
+    if accelerator_device == AcceleratorDevice.AUTO:
+        if has_cuda:
+            device = f"cuda:{cuda_index}"
+        elif has_mps:
+            device = "mps"
+
+    else:
+        if accelerator_device == AcceleratorDevice.CUDA:
+            if has_cuda:
+                device = f"cuda:{cuda_index}"
+            else:
+                _log.warning("CUDA is not available in the system. Fall back to 'CPU'")
+        elif accelerator_device == AcceleratorDevice.MPS:
+            if has_mps:
+                device = "mps"
+            else:
+                _log.warning("MPS is not available in the system. Fall back to 'CPU'")
+
+    _log.info("Accelerator device: '%s'", device)
+    return device
--- a/docs/examples/custom_convert.py
+++ b/docs/examples/custom_convert.py
@ -74,6 +74,10 @@ def main():
    pipeline_options.do_ocr = True
    pipeline_options.do_table_structure = True
    pipeline_options.table_structure_options.do_cell_matching = True
+    pipeline_options.ocr_options.lang = ["es"]
+    pipeline_options.accelerator_options = AcceleratorOptions(
+        num_threads=4, device=Device.AUTO
+    )

    doc_converter = DocumentConverter(
        format_options={
--- a/docs/examples/run_with_accelerator.py
+++ b/docs/examples/run_with_accelerator.py
@ -0,0 +1,63 @@
+from pathlib import Path
+
+from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
+from docling.datamodel.base_models import InputFormat
+from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
+    PdfPipelineOptions,
+    TesseractCliOcrOptions,
+    TesseractOcrOptions,
+)
+from docling.datamodel.settings import settings
+from docling.document_converter import DocumentConverter, PdfFormatOption
+
+
+def main():
+    input_doc = Path("./tests/data/2206.01062.pdf")
+
+    # Explicitly set the accelerator
+    # accelerator_options = AcceleratorOptions(
+    #     num_threads=8, device=AcceleratorDevice.AUTO
+    # )
+    accelerator_options = AcceleratorOptions(
+        num_threads=8, device=AcceleratorDevice.CPU
+    )
+    # accelerator_options = AcceleratorOptions(
+    #     num_threads=8, device=AcceleratorDevice.MPS
+    # )
+    # accelerator_options = AcceleratorOptions(
+    #     num_threads=8, device=AcceleratorDevice.CUDA
+    # )
+
+    pipeline_options = PdfPipelineOptions()
+    pipeline_options.accelerator_options = accelerator_options
+    pipeline_options.do_ocr = True
+    pipeline_options.do_table_structure = True
+    pipeline_options.table_structure_options.do_cell_matching = True
+
+    converter = DocumentConverter(
+        format_options={
+            InputFormat.PDF: PdfFormatOption(
+                pipeline_options=pipeline_options,
+            )
+        }
+    )
+
+    # Enable the profiling to measure the time spent
+    settings.debug.profile_pipeline_timings = True
+
+    # Convert the document
+    conversion_result = converter.convert(input_doc)
+    doc = conversion_result.document
+
+    # List with total time per document
+    doc_conversion_secs = conversion_result.timings["pipeline_total"].times
+
+    md = doc.export_to_markdown()
+    print(md)
+    print(f"Conversion secs: {doc_conversion_secs}")
+
+
+if __name__ == "__main__":
+    main()
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -75,6 +75,7 @@ nav:
      - "Table export": examples/export_tables.py
      - "Multimodal export": examples/export_multimodal.py
      - "Force full page OCR": examples/full_page_ocr.py
+      - "Accelerator options": examples/run_with_acclerators.py
    - Chunking:
      - "Hybrid chunking": examples/hybrid_chunking.ipynb
    - RAG / QA:
--- a/poetry.lock
+++ b/poetry.lock
@ -914,13 +914,13 @@ chunking = ["semchunk (>=2.2.0,<3.0.0)", "transformers (>=4.34.0,<5.0.0)"]

 [[package]]
 name = "docling-ibm-models"
-version = "2.0.7"
+version = "3.1.0"
 description = "This package contains the AI models used by the Docling PDF conversion package"
 optional = false
 python-versions = "<4.0,>=3.9"
 files = [
-    {file = "docling_ibm_models-2.0.7-py3-none-any.whl", hash = "sha256:bf362add22e9c526ac56c04bce412d7bb1c331b44a73204abba0b1d90a500c78"},
-    {file = "docling_ibm_models-2.0.7.tar.gz", hash = "sha256:e1372c4f2517d522125fb02a820558f01914926f532bcd0534f1028a25d63667"},
+    {file = "docling_ibm_models-3.1.0-py3-none-any.whl", hash = "sha256:a381a45dff16fdb2246b99c15a2e3d6ba880c573d48a1d6477d3ffb36bab807f"},
+    {file = "docling_ibm_models-3.1.0.tar.gz", hash = "sha256:65d734ffa490edc4e2301d296b6e893afa536c63b7daae7bbda781bd15b3431e"},
 ]

 [package.dependencies]
@ -929,9 +929,11 @@ jsonlines = ">=3.1.0,<4.0.0"
 numpy = ">=1.24.4,<3.0.0"
 opencv-python-headless = ">=4.6.0.66,<5.0.0.0"
 Pillow = ">=10.0.0,<11.0.0"
+safetensors = {version = ">=0.4.3,<1", extras = ["torch"]}
 torch = ">=2.2.2,<3.0.0"
 torchvision = ">=0,<1"
 tqdm = ">=4.64.0,<5.0.0"
+transformers = ">=4.42.0,<5.0.0"

 [[package]]
 name = "docling-parse"
@ -5978,6 +5980,10 @@ files = [
    {file = "safetensors-0.4.5.tar.gz", hash = "sha256:d73de19682deabb02524b3d5d1f8b3aaba94c72f1bbfc7911b9b9d5d391c0310"},
 ]

+[package.dependencies]
+numpy = {version = ">=1.21.6", optional = true, markers = "extra == \"numpy\""}
+torch = {version = ">=1.10", optional = true, markers = "extra == \"torch\""}
+
 [package.extras]
 all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"]
 dev = ["safetensors[all]"]
@ -7602,4 +7608,4 @@ tesserocr = ["tesserocr"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "3e66a54bd0433581e4909003124e2b79b42bdd1fb90d17c037f3294aeff56aa9"
+content-hash = "5271637a86ae221be362a288546c9fee3e3e25e5b323c997464c032c284716bd"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -27,8 +27,9 @@ packages = [{include = "docling"}]
 python = "^3.9"
 docling-core = { version = "^2.9.0", extras = ["chunking"] }
 pydantic = "^2.0.0"
-docling-ibm-models = "^2.0.6"
+docling-ibm-models = "^3.1.0"
 deepsearch-glm = "^1.0.0"
+docling-parse = "^3.0.0"
 filetype = "^1.2.0"
 pypdfium2 = "^4.30.0"
 pydantic-settings = "^2.3.0"
@ -36,7 +37,6 @@ huggingface_hub = ">=0.23,<1"
 requests = "^2.32.3"
 easyocr = "^1.7"
 tesserocr = { version = "^2.7.1", optional = true }
-docling-parse = "^3.0.0"
 certifi = ">=2024.7.4"
 rtree = "^1.3.0"
 scipy = "^1.6.0"
--- a/tests/data/groundtruth/docling_v1/2203.01017v2.json
+++ b/tests/data/groundtruth/docling_v1/2203.01017v2.json
--- a/tests/data/groundtruth/docling_v1/2203.01017v2.pages.json
+++ b/tests/data/groundtruth/docling_v1/2203.01017v2.pages.json
--- a/tests/data/groundtruth/docling_v1/2206.01062.json
+++ b/tests/data/groundtruth/docling_v1/2206.01062.json
--- a/tests/data/groundtruth/docling_v1/2206.01062.pages.json
+++ b/tests/data/groundtruth/docling_v1/2206.01062.pages.json
--- a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json
+++ b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.json
--- a/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json
+++ b/tests/data/groundtruth/docling_v1/2305.03393v1-pg9.pages.json
--- a/tests/data/groundtruth/docling_v1/2305.03393v1.json
+++ b/tests/data/groundtruth/docling_v1/2305.03393v1.json
--- a/tests/data/groundtruth/docling_v1/2305.03393v1.pages.json
+++ b/tests/data/groundtruth/docling_v1/2305.03393v1.pages.json
--- a/tests/data/groundtruth/docling_v1/redp5110_sampled.json
+++ b/tests/data/groundtruth/docling_v1/redp5110_sampled.json
--- a/tests/data/groundtruth/docling_v1/redp5110_sampled.pages.json
+++ b/tests/data/groundtruth/docling_v1/redp5110_sampled.pages.json
--- a/tests/data/groundtruth/docling_v2/2203.01017v2.json
+++ b/tests/data/groundtruth/docling_v2/2203.01017v2.json
--- a/tests/data/groundtruth/docling_v2/2203.01017v2.pages.json
+++ b/tests/data/groundtruth/docling_v2/2203.01017v2.pages.json
--- a/tests/data/groundtruth/docling_v2/2206.01062.json
+++ b/tests/data/groundtruth/docling_v2/2206.01062.json
--- a/tests/data/groundtruth/docling_v2/2206.01062.pages.json
+++ b/tests/data/groundtruth/docling_v2/2206.01062.pages.json
--- a/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json
+++ b/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.json
--- a/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.pages.json
+++ b/tests/data/groundtruth/docling_v2/2305.03393v1-pg9.pages.json
--- a/tests/data/groundtruth/docling_v2/2305.03393v1.json
+++ b/tests/data/groundtruth/docling_v2/2305.03393v1.json
--- a/tests/data/groundtruth/docling_v2/2305.03393v1.pages.json
+++ b/tests/data/groundtruth/docling_v2/2305.03393v1.pages.json
--- a/tests/data/groundtruth/docling_v2/redp5110_sampled.json
+++ b/tests/data/groundtruth/docling_v2/redp5110_sampled.json
--- a/tests/data/groundtruth/docling_v2/redp5110_sampled.pages.json
+++ b/tests/data/groundtruth/docling_v2/redp5110_sampled.pages.json
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test.json
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test.json
@ -1 +1 @@
-{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test.pdf", "filename-prov": null, "document-hash": "73f23122e9edbdb0a115b448e03c8064a0ea8bdc21d02917ce220cf032454f31", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "8c5c5b766c1bdb92242142ca37260089b02380f9c57729703350f646cdf4771e", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [70.90211486816406, 689.2166748046875, 504.87200927734375, 765.0995483398438], "page": 1, "span": [0, 94], "__ref_s3_data": null}], "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "type": "paragraph", "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 841.9216918945312, "page": 1, "width": 595.201171875}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
+{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test.pdf", "filename-prov": null, "document-hash": "80f38f5b87a84870681556176a9622186fd200dd32c5557be9e0c0af05b8bc61", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "14d896dc8bcb7ee7c08c0347eb6be8dcb92a3782501992f1ea14d2e58077d4e3", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [69.6796646118164, 689.012451171875, 504.87200927734375, 765.0995483398438], "page": 1, "span": [0, 94], "__ref_s3_data": null}], "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 841.9216918945312, "page": 1, "width": 595.201171875}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}
--- a/tests/data_scanned/groundtruth/docling_v1/ocr_test.pages.json
+++ b/tests/data_scanned/groundtruth/docling_v1/ocr_test.pages.json
@ -1 +1 @@
-[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 70.90211866351085, "t": 102.66666671251767, "r": 504.8720079864275, "b": 124.83139551297336, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 73.10852522817731, "t": 130.0013615789096, "r": 153.04479435252625, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 70.90211866351085, "t": 76.82212829589844, "r": 504.8720079864275, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 70.90211866351085, "t": 102.66666671251767, "r": 504.8720079864275, "b": 124.83139551297336, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 73.10852522817731, "t": 130.0013615789096, "r": 153.04479435252625, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}}]}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 70.90211866351085, "t": 76.82212829589844, "r": 504.8720079864275, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 70.90211866351085, "t": 102.66666671251767, "r": 504.8720079864275, "b": 124.83139551297336, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 73.10852522817731, "t": 130.0013615789096, "r": 153.04479435252625, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}}]}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 70.90211866351085, "t": 76.82212829589844, "r": 504.8720079864275, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 70.90211866351085, "t": 102.66666671251767, "r": 504.8720079864275, "b": 124.83139551297336, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 73.10852522817731, "t": 130.0013615789096, "r": 153.04479435252625, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}}]}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}]
+[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573798, "r": 504.8720051760782, "b": 124.83139494707746, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.79712523204603, "r": 153.088934155825, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.82213592529297, "r": 504.8720051760782, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573798, "r": 504.8720051760782, "b": 124.83139494707746, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.79712523204603, "r": 153.088934155825, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}}]}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.82213592529297, "r": 504.8720051760782, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573798, "r": 504.8720051760782, "b": 124.83139494707746, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.79712523204603, "r": 153.088934155825, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}}]}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.82213592529297, "r": 504.8720051760782, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573798, "r": 504.8720051760782, "b": 124.83139494707746, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.79712523204603, "r": 153.088934155825, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}}]}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}]
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test.json
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test.json
@ -1 +1 @@
-{"schema_name": "DoclingDocument", "version": "1.0.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 70.90211486816406, "t": 765.0995483398438, "r": 504.87200927734375, "b": 689.2166748046875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "pictures": [], "tables": [], "key_value_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}
+{"schema_name": "DoclingDocument", "version": "1.0.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.6796646118164, "t": 765.0995483398438, "r": 504.87200927734375, "b": 689.012451171875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "pictures": [], "tables": [], "key_value_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}
--- a/tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.json
+++ b/tests/data_scanned/groundtruth/docling_v2/ocr_test.pages.json
@ -1 +1 @@
-[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 70.90211866351085, "t": 102.66666671251767, "r": 504.8720079864275, "b": 124.83139551297336, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 73.10852522817731, "t": 130.0013615789096, "r": 153.04479435252625, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 70.90211866351085, "t": 76.82212829589844, "r": 504.8720079864275, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 70.90211866351085, "t": 102.66666671251767, "r": 504.8720079864275, "b": 124.83139551297336, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 73.10852522817731, "t": 130.0013615789096, "r": 153.04479435252625, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}}]}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 70.90211866351085, "t": 76.82212829589844, "r": 504.8720079864275, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 70.90211866351085, "t": 102.66666671251767, "r": 504.8720079864275, "b": 124.83139551297336, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 73.10852522817731, "t": 130.0013615789096, "r": 153.04479435252625, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}}]}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 70.90211866351085, "t": 76.82212829589844, "r": 504.8720079864275, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 70.90211866351085, "t": 102.66666671251767, "r": 504.8720079864275, "b": 124.83139551297336, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 73.10852522817731, "t": 130.0013615789096, "r": 153.04479435252625, "b": 152.70503335218427, "coord_origin": "TOPLEFT"}}]}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}]
+[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573798, "r": 504.8720051760782, "b": 124.83139494707746, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.79712523204603, "r": 153.088934155825, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.82213592529297, "r": 504.8720051760782, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573798, "r": 504.8720051760782, "b": 124.83139494707746, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.79712523204603, "r": 153.088934155825, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}}]}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.82213592529297, "r": 504.8720051760782, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573798, "r": 504.8720051760782, "b": 124.83139494707746, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.79712523204603, "r": 153.088934155825, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}}]}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.6796630536824, "t": 76.82213592529297, "r": 504.8720051760782, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}, "confidence": 0.9715732336044312, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 73.34702132031646, "t": 76.99999977896755, "r": 503.64955224479564, "b": 97.99999977896755, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.6796630536824, "t": 104.00000011573798, "r": 504.8720051760782, "b": 124.83139494707746, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 71.84193505100733, "t": 129.79712523204603, "r": 153.088934155825, "b": 152.90926970226087, "coord_origin": "TOPLEFT"}}]}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}]
--- a/tests/test_options.py
+++ b/tests/test_options.py
@ -1,3 +1,4 @@
+import os
 from pathlib import Path

 import pytest
@ -5,7 +6,12 @@ import pytest
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.datamodel.base_models import ConversionStatus, InputFormat
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.pipeline_options import PdfPipelineOptions, TableFormerMode
+from docling.datamodel.pipeline_options import (
+    AcceleratorDevice,
+    AcceleratorOptions,
+    PdfPipelineOptions,
+    TableFormerMode,
+)
 from docling.document_converter import DocumentConverter, PdfFormatOption


@ -35,6 +41,61 @@ def get_converters_with_table_options():
            yield converter


+def test_accelerator_options():
+    # Check the default options
+    ao = AcceleratorOptions()
+    assert ao.num_threads == 4, "Wrong default num_threads"
+    assert ao.device == AcceleratorDevice.AUTO, "Wrong default device"
+
+    # Use API
+    ao2 = AcceleratorOptions(num_threads=2, device=AcceleratorDevice.MPS)
+    ao3 = AcceleratorOptions(num_threads=3, device=AcceleratorDevice.CUDA)
+    assert ao2.num_threads == 2
+    assert ao2.device == AcceleratorDevice.MPS
+    assert ao3.num_threads == 3
+    assert ao3.device == AcceleratorDevice.CUDA
+
+    # Use envvars (regular + alternative) and default values
+    os.environ["OMP_NUM_THREADS"] = "1"
+    ao.__init__()
+    assert ao.num_threads == 1
+    assert ao.device == AcceleratorDevice.AUTO
+    os.environ["DOCLING_DEVICE"] = "cpu"
+    ao.__init__()
+    assert ao.device == AcceleratorDevice.CPU
+    assert ao.num_threads == 1
+
+    # Use envvars and override in init
+    os.environ["DOCLING_DEVICE"] = "cpu"
+    ao4 = AcceleratorOptions(num_threads=5, device=AcceleratorDevice.MPS)
+    assert ao4.num_threads == 5
+    assert ao4.device == AcceleratorDevice.MPS
+
+    # Use regular and alternative envvar
+    os.environ["DOCLING_NUM_THREADS"] = "2"
+    ao5 = AcceleratorOptions()
+    assert ao5.num_threads == 2
+    assert ao5.device == AcceleratorDevice.CPU
+
+    # Use wrong values
+    is_exception = False
+    try:
+        os.environ["DOCLING_DEVICE"] = "wrong"
+        ao5.__init__()
+    except Exception as ex:
+        print(ex)
+        is_exception = True
+    assert is_exception
+
+    # Use misformatted alternative envvar
+    del os.environ["DOCLING_NUM_THREADS"]
+    del os.environ["DOCLING_DEVICE"]
+    os.environ["OMP_NUM_THREADS"] = "wrong"
+    ao6 = AcceleratorOptions()
+    assert ao6.num_threads == 4
+    assert ao6.device == AcceleratorDevice.AUTO
+
+
 def test_e2e_conversions(test_doc_path):
    for converter in get_converters_with_table_options():
        print(f"converting {test_doc_path}")