Docling/docling/utils/profiling.py
Christoph Auer 2a2c65bf4f
feat: Add pipeline timings and toggle visualization, establish debug settings (#183)
* Add settings to turn visualization on or off

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add profiling code to all models

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Refactor and fix profiling codes

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Visualization codes output PNG to debug dir

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Fixes for time logging

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Optimize imports

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update lockfile

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add start_timestamps to ProfilingItem

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
2024-10-30 15:04:19 +01:00

63 lines
1.7 KiB
Python

import time
from datetime import datetime
from enum import Enum
from typing import TYPE_CHECKING, List
import numpy as np
from pydantic import BaseModel
from docling.datamodel.settings import settings
if TYPE_CHECKING:
from docling.datamodel.document import ConversionResult
class ProfilingScope(str, Enum):
PAGE = "page"
DOCUMENT = "document"
class ProfilingItem(BaseModel):
scope: ProfilingScope
count: int = 0
times: List[float] = []
start_timestamps: List[datetime] = []
def avg(self) -> float:
return np.average(self.times) # type: ignore
def std(self) -> float:
return np.std(self.times) # type: ignore
def mean(self) -> float:
return np.mean(self.times) # type: ignore
def percentile(self, perc: float) -> float:
return np.percentile(self.times, perc) # type: ignore
class TimeRecorder:
def __init__(
self,
conv_res: "ConversionResult",
key: str,
scope: ProfilingScope = ProfilingScope.PAGE,
):
if settings.debug.profile_pipeline_timings:
if key not in conv_res.timings.keys():
conv_res.timings[key] = ProfilingItem(scope=scope)
self.conv_res = conv_res
self.key = key
def __enter__(self):
if settings.debug.profile_pipeline_timings:
self.start = time.monotonic()
self.conv_res.timings[self.key].start_timestamps.append(datetime.utcnow())
return self
def __exit__(self, *args):
if settings.debug.profile_pipeline_timings:
elapsed = time.monotonic() - self.start
self.conv_res.timings[self.key].times.append(elapsed)
self.conv_res.timings[self.key].count += 1