
* Add settings to turn visualization on or off Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add profiling code to all models Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Refactor and fix profiling codes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Visualization codes output PNG to debug dir Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for time logging Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Optimize imports Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update lockfile Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add start_timestamps to ProfilingItem Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
63 lines
1.7 KiB
Python
63 lines
1.7 KiB
Python
import time
|
|
from datetime import datetime
|
|
from enum import Enum
|
|
from typing import TYPE_CHECKING, List
|
|
|
|
import numpy as np
|
|
from pydantic import BaseModel
|
|
|
|
from docling.datamodel.settings import settings
|
|
|
|
if TYPE_CHECKING:
|
|
from docling.datamodel.document import ConversionResult
|
|
|
|
|
|
class ProfilingScope(str, Enum):
|
|
PAGE = "page"
|
|
DOCUMENT = "document"
|
|
|
|
|
|
class ProfilingItem(BaseModel):
|
|
scope: ProfilingScope
|
|
count: int = 0
|
|
times: List[float] = []
|
|
start_timestamps: List[datetime] = []
|
|
|
|
def avg(self) -> float:
|
|
return np.average(self.times) # type: ignore
|
|
|
|
def std(self) -> float:
|
|
return np.std(self.times) # type: ignore
|
|
|
|
def mean(self) -> float:
|
|
return np.mean(self.times) # type: ignore
|
|
|
|
def percentile(self, perc: float) -> float:
|
|
return np.percentile(self.times, perc) # type: ignore
|
|
|
|
|
|
class TimeRecorder:
|
|
def __init__(
|
|
self,
|
|
conv_res: "ConversionResult",
|
|
key: str,
|
|
scope: ProfilingScope = ProfilingScope.PAGE,
|
|
):
|
|
if settings.debug.profile_pipeline_timings:
|
|
if key not in conv_res.timings.keys():
|
|
conv_res.timings[key] = ProfilingItem(scope=scope)
|
|
self.conv_res = conv_res
|
|
self.key = key
|
|
|
|
def __enter__(self):
|
|
if settings.debug.profile_pipeline_timings:
|
|
self.start = time.monotonic()
|
|
self.conv_res.timings[self.key].start_timestamps.append(datetime.utcnow())
|
|
return self
|
|
|
|
def __exit__(self, *args):
|
|
if settings.debug.profile_pipeline_timings:
|
|
elapsed = time.monotonic() - self.start
|
|
self.conv_res.timings[self.key].times.append(elapsed)
|
|
self.conv_res.timings[self.key].count += 1
|