71 lines
1.9 KiB
Python
71 lines
1.9 KiB
Python
import sys
|
|
from pathlib import Path
|
|
from typing import Annotated, Optional, Tuple
|
|
|
|
from pydantic import BaseModel, PlainValidator
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
def _validate_page_range(v: Tuple[int, int]) -> Tuple[int, int]:
|
|
if v[0] < 1 or v[1] < v[0]:
|
|
raise ValueError(
|
|
"Invalid page range: start must be ≥ 1 and end must be ≥ start."
|
|
)
|
|
return v
|
|
|
|
|
|
PageRange = Annotated[Tuple[int, int], PlainValidator(_validate_page_range)]
|
|
|
|
DEFAULT_PAGE_RANGE: PageRange = (1, sys.maxsize)
|
|
|
|
|
|
class DocumentLimits(BaseModel):
|
|
max_num_pages: int = sys.maxsize
|
|
max_file_size: int = sys.maxsize
|
|
page_range: PageRange = DEFAULT_PAGE_RANGE
|
|
|
|
|
|
class BatchConcurrencySettings(BaseModel):
|
|
doc_batch_size: int = 2
|
|
doc_batch_concurrency: int = 2
|
|
page_batch_size: int = 4
|
|
page_batch_concurrency: int = 2
|
|
elements_batch_size: int = 16
|
|
|
|
# doc_batch_size: int = 1
|
|
# doc_batch_concurrency: int = 1
|
|
# page_batch_size: int = 1
|
|
# page_batch_concurrency: int = 1
|
|
|
|
# model_concurrency: int = 2
|
|
|
|
# To force models into single core: export OMP_NUM_THREADS=1
|
|
|
|
|
|
class DebugSettings(BaseModel):
|
|
visualize_cells: bool = False
|
|
visualize_ocr: bool = False
|
|
visualize_layout: bool = False
|
|
visualize_raw_layout: bool = False
|
|
visualize_tables: bool = False
|
|
|
|
profile_pipeline_timings: bool = False
|
|
|
|
# Path used to output debug information.
|
|
debug_output_path: str = str(Path.cwd() / "debug")
|
|
|
|
|
|
class AppSettings(BaseSettings):
|
|
model_config = SettingsConfigDict(
|
|
env_prefix="DOCLING_", env_nested_delimiter="_", env_nested_max_split=1
|
|
)
|
|
|
|
perf: BatchConcurrencySettings = BatchConcurrencySettings()
|
|
debug: DebugSettings = DebugSettings()
|
|
|
|
cache_dir: Path = Path.home() / ".cache" / "docling"
|
|
artifacts_path: Optional[Path] = None
|
|
|
|
|
|
settings = AppSettings()
|