feat: Add pipeline timings and toggle visualization, establish debug settings (#183)

* Add settings to turn visualization on or off

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add profiling code to all models

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Refactor and fix profiling codes

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Visualization codes output PNG to debug dir

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Fixes for time logging

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Optimize imports

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update lockfile

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add start_timestamps to ProfilingItem

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2024-10-30 15:04:19 +01:00
committed by GitHub
parent 94a5290789
commit 2a2c65bf4f
23 changed files with 998 additions and 771 deletions

View File

@@ -1,6 +1,7 @@
import copy
import logging
from abc import abstractmethod
from pathlib import Path
from typing import Iterable, List
import numpy as np
@@ -10,12 +11,15 @@ from rtree import index
from scipy.ndimage import find_objects, label
from docling.datamodel.base_models import OcrCell, Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import OcrOptions
from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel
_log = logging.getLogger(__name__)
class BaseOcrModel:
class BaseOcrModel(BasePageModel):
def __init__(self, enabled: bool, options: OcrOptions):
self.enabled = enabled
self.options = options
@@ -113,7 +117,7 @@ class BaseOcrModel:
]
return filtered_ocr_cells
def draw_ocr_rects_and_cells(self, page, ocr_rects):
def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
image = copy.deepcopy(page.image)
draw = ImageDraw.Draw(image, "RGBA")
@@ -130,8 +134,21 @@ class BaseOcrModel:
if isinstance(tc, OcrCell):
color = "magenta"
draw.rectangle([(x0, y0), (x1, y1)], outline=color)
image.show()
if show:
image.show()
else:
out_path: Path = (
Path(settings.debug.debug_output_path)
/ f"debug_{conv_res.input.file.stem}"
)
out_path.mkdir(parents=True, exist_ok=True)
out_file = out_path / f"ocr_page_{page.page_no:05}.png"
image.save(str(out_file), format="png")
@abstractmethod
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
pass