Docling/docling/datamodel/settings.py

import sys
from pathlib import Path
from typing import Annotated, Tuple

from pydantic import BaseModel, PlainValidator
from pydantic_settings import BaseSettings, SettingsConfigDict


def _validate_page_range(v: Tuple[int, int]) -> Tuple[int, int]:
    if v[0] < 1 or v[1] < v[0]:
        raise ValueError(
            "Invalid page range: start must be ≥ 1 and end must be ≥ start."
        )
    return v


PageRange = Annotated[Tuple[int, int], PlainValidator(_validate_page_range)]

DEFAULT_PAGE_RANGE: PageRange = (1, sys.maxsize)


class DocumentLimits(BaseModel):
    max_num_pages: int = sys.maxsize
    max_file_size: int = sys.maxsize
    page_range: PageRange = DEFAULT_PAGE_RANGE


class BatchConcurrencySettings(BaseModel):
    doc_batch_size: int = 2
    doc_batch_concurrency: int = 2
    page_batch_size: int = 4
    page_batch_concurrency: int = 2
    elements_batch_size: int = 16

    # doc_batch_size: int = 1
    # doc_batch_concurrency: int = 1
    # page_batch_size: int = 1
    # page_batch_concurrency: int = 1

    # model_concurrency: int = 2

    # To force models into single core: export OMP_NUM_THREADS=1


class DebugSettings(BaseModel):
    visualize_cells: bool = False
    visualize_ocr: bool = False
    visualize_layout: bool = False
    visualize_raw_layout: bool = False
    visualize_tables: bool = False

    profile_pipeline_timings: bool = False

    # Path used to output debug information.
    debug_output_path: str = str(Path.cwd() / "debug")


class AppSettings(BaseSettings):
    model_config = SettingsConfigDict(env_prefix="DOCLING_", env_nested_delimiter="_")

    perf: BatchConcurrencySettings
    debug: DebugSettings

    cache_dir: Path = Path.home() / ".cache" / "docling"


settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings())