
* Add settings to turn visualization on or off Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add profiling code to all models Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Refactor and fix profiling codes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Visualization codes output PNG to debug dir Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for time logging Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Optimize imports Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update lockfile Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add start_timestamps to ProfilingItem Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
48 lines
1.1 KiB
Python
48 lines
1.1 KiB
Python
import sys
|
|
from pathlib import Path
|
|
|
|
from pydantic import BaseModel
|
|
from pydantic_settings import BaseSettings
|
|
|
|
|
|
class DocumentLimits(BaseModel):
|
|
max_num_pages: int = sys.maxsize
|
|
max_file_size: int = sys.maxsize
|
|
|
|
|
|
class BatchConcurrencySettings(BaseModel):
|
|
doc_batch_size: int = 2
|
|
doc_batch_concurrency: int = 2
|
|
page_batch_size: int = 4
|
|
page_batch_concurrency: int = 2
|
|
elements_batch_size: int = 16
|
|
|
|
# doc_batch_size: int = 1
|
|
# doc_batch_concurrency: int = 1
|
|
# page_batch_size: int = 1
|
|
# page_batch_concurrency: int = 1
|
|
|
|
# model_concurrency: int = 2
|
|
|
|
# To force models into single core: export OMP_NUM_THREADS=1
|
|
|
|
|
|
class DebugSettings(BaseModel):
|
|
visualize_cells: bool = False
|
|
visualize_ocr: bool = False
|
|
visualize_layout: bool = False
|
|
visualize_tables: bool = False
|
|
|
|
profile_pipeline_timings: bool = False
|
|
|
|
# Path used to output debug information.
|
|
debug_output_path: str = str(Path.cwd() / "debug")
|
|
|
|
|
|
class AppSettings(BaseSettings):
|
|
perf: BatchConcurrencySettings
|
|
debug: DebugSettings
|
|
|
|
|
|
settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings())
|