feat: Add pipeline timings and toggle visualization, establish debug settings (#183)
* Add settings to turn visualization on or off Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add profiling code to all models Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Refactor and fix profiling codes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Visualization codes output PNG to debug dir Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for time logging Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Optimize imports Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update lockfile Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add start_timestamps to ProfilingItem Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -5,9 +5,10 @@ from docling.backend.abstract_backend import (
|
||||
DeclarativeDocumentBackend,
|
||||
)
|
||||
from docling.datamodel.base_models import ConversionStatus
|
||||
from docling.datamodel.document import ConversionResult, InputDocument
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import PipelineOptions
|
||||
from docling.pipeline.base_pipeline import BasePipeline
|
||||
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
@@ -22,13 +23,11 @@ class SimplePipeline(BasePipeline):
|
||||
def __init__(self, pipeline_options: PipelineOptions):
|
||||
super().__init__(pipeline_options)
|
||||
|
||||
def _build_document(
|
||||
self, in_doc: InputDocument, conv_res: ConversionResult
|
||||
) -> ConversionResult:
|
||||
def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
|
||||
|
||||
if not isinstance(in_doc._backend, DeclarativeDocumentBackend):
|
||||
if not isinstance(conv_res.input._backend, DeclarativeDocumentBackend):
|
||||
raise RuntimeError(
|
||||
f"The selected backend {type(in_doc._backend).__name__} for {in_doc.file} is not a declarative backend. "
|
||||
f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a declarative backend. "
|
||||
f"Can not convert this with simple pipeline. "
|
||||
f"Please check your format configuration on DocumentConverter."
|
||||
)
|
||||
@@ -38,13 +37,11 @@ class SimplePipeline(BasePipeline):
|
||||
# Instead of running a page-level pipeline to build up the document structure,
|
||||
# the backend is expected to be of type DeclarativeDocumentBackend, which can output
|
||||
# a DoclingDocument straight.
|
||||
|
||||
conv_res.document = in_doc._backend.convert()
|
||||
with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
|
||||
conv_res.document = conv_res.input._backend.convert()
|
||||
return conv_res
|
||||
|
||||
def _determine_status(
|
||||
self, in_doc: InputDocument, conv_res: ConversionResult
|
||||
) -> ConversionStatus:
|
||||
def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
|
||||
# This is called only if the previous steps didn't raise.
|
||||
# Since we don't have anything else to evaluate, we can
|
||||
# safely return SUCCESS.
|
||||
|
||||
Reference in New Issue
Block a user