diff --git a/docling/datamodel/settings.py b/docling/datamodel/settings.py index 439ffe7..fee871a 100644 --- a/docling/datamodel/settings.py +++ b/docling/datamodel/settings.py @@ -1,6 +1,6 @@ import sys from pathlib import Path -from typing import Annotated, Tuple +from typing import Annotated, Optional, Tuple from pydantic import BaseModel, PlainValidator from pydantic_settings import BaseSettings, SettingsConfigDict @@ -62,6 +62,7 @@ class AppSettings(BaseSettings): debug: DebugSettings cache_dir: Path = Path.home() / ".cache" / "docling" + artifacts_path: Optional[Path] = None settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings()) diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index 13e435f..1c71bf7 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -61,6 +61,14 @@ class StandardPdfPipeline(PaginatedPipeline): artifacts_path: Optional[Path] = None if pipeline_options.artifacts_path is not None: artifacts_path = Path(pipeline_options.artifacts_path).expanduser() + elif settings.artifacts_path is not None: + artifacts_path = Path(settings.artifacts_path).expanduser() + + if artifacts_path is not None and not artifacts_path.is_dir(): + raise RuntimeError( + f"The value of {artifacts_path=} is not valid. " + "When defined, it must point to a folder containing all models required by the pipeline." + ) self.keep_images = ( self.pipeline_options.generate_page_images