feat: Add pipeline timings and toggle visualization, establish debug settings (#183)
* Add settings to turn visualization on or off Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add profiling code to all models Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Refactor and fix profiling codes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Visualization codes output PNG to debug dir Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for time logging Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Optimize imports Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update lockfile Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add start_timestamps to ProfilingItem Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -1,10 +1,14 @@
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional
|
||||
|
||||
from PIL import ImageDraw
|
||||
from pydantic import BaseModel
|
||||
|
||||
from docling.datamodel.base_models import Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_model import BasePageModel
|
||||
from docling.utils.profiling import TimeRecorder
|
||||
|
||||
|
||||
class PagePreprocessingOptions(BaseModel):
|
||||
@@ -15,14 +19,17 @@ class PagePreprocessingModel(BasePageModel):
|
||||
def __init__(self, options: PagePreprocessingOptions):
|
||||
self.options = options
|
||||
|
||||
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
|
||||
def __call__(
|
||||
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
||||
) -> Iterable[Page]:
|
||||
for page in page_batch:
|
||||
assert page._backend is not None
|
||||
if not page._backend.is_valid():
|
||||
yield page
|
||||
else:
|
||||
page = self._populate_page_images(page)
|
||||
page = self._parse_page_cells(page)
|
||||
with TimeRecorder(conv_res, "page_parse"):
|
||||
page = self._populate_page_images(page)
|
||||
page = self._parse_page_cells(conv_res, page)
|
||||
yield page
|
||||
|
||||
# Generate the page image and store it in the page object
|
||||
@@ -43,19 +50,30 @@ class PagePreprocessingModel(BasePageModel):
|
||||
return page
|
||||
|
||||
# Extract and populate the page cells and store it in the page object
|
||||
def _parse_page_cells(self, page: Page) -> Page:
|
||||
def _parse_page_cells(self, conv_res: ConversionResult, page: Page) -> Page:
|
||||
assert page._backend is not None
|
||||
|
||||
page.cells = list(page._backend.get_text_cells())
|
||||
|
||||
# DEBUG code:
|
||||
def draw_text_boxes(image, cells):
|
||||
def draw_text_boxes(image, cells, show: bool = False):
|
||||
draw = ImageDraw.Draw(image)
|
||||
for c in cells:
|
||||
x0, y0, x1, y1 = c.bbox.as_tuple()
|
||||
draw.rectangle([(x0, y0), (x1, y1)], outline="red")
|
||||
image.show()
|
||||
if show:
|
||||
image.show()
|
||||
else:
|
||||
out_path: Path = (
|
||||
Path(settings.debug.debug_output_path)
|
||||
/ f"debug_{conv_res.input.file.stem}"
|
||||
)
|
||||
out_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# draw_text_boxes(page.get_image(scale=1.0), cells)
|
||||
out_file = out_path / f"cells_page_{page.page_no:05}.png"
|
||||
image.save(str(out_file), format="png")
|
||||
|
||||
if settings.debug.visualize_cells:
|
||||
draw_text_boxes(page.get_image(scale=1.0), page.cells)
|
||||
|
||||
return page
|
||||
|
||||
Reference in New Issue
Block a user