Docling/docling/datamodel/settings.py
Christoph Auer 2a2c65bf4f
feat: Add pipeline timings and toggle visualization, establish debug settings (#183)
* Add settings to turn visualization on or off

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add profiling code to all models

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Refactor and fix profiling codes

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Visualization codes output PNG to debug dir

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Fixes for time logging

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Optimize imports

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update lockfile

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add start_timestamps to ProfilingItem

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2024-10-30 15:04:19 +01:00

48 lines
1.1 KiB
Python

import sys
from pathlib import Path
from pydantic import BaseModel
from pydantic_settings import BaseSettings
class DocumentLimits(BaseModel):
max_num_pages: int = sys.maxsize
max_file_size: int = sys.maxsize
class BatchConcurrencySettings(BaseModel):
doc_batch_size: int = 2
doc_batch_concurrency: int = 2
page_batch_size: int = 4
page_batch_concurrency: int = 2
elements_batch_size: int = 16
# doc_batch_size: int = 1
# doc_batch_concurrency: int = 1
# page_batch_size: int = 1
# page_batch_concurrency: int = 1
# model_concurrency: int = 2
# To force models into single core: export OMP_NUM_THREADS=1
class DebugSettings(BaseModel):
visualize_cells: bool = False
visualize_ocr: bool = False
visualize_layout: bool = False
visualize_tables: bool = False
profile_pipeline_timings: bool = False
# Path used to output debug information.
debug_output_path: str = str(Path.cwd() / "debug")
class AppSettings(BaseSettings):
perf: BatchConcurrencySettings
debug: DebugSettings
settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings())