feat: Introduce support for GPU Accelerators (#593)
* Upgraded Layout Postprocessing, sending old code back to ERZ Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Implement hierachical cluster layout processing Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Pass nested cluster processing through full pipeline Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Pass nested clusters through GLM as payload Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Move to_docling_document from ds-glm to this repo Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Clean up imports again Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * feat(Accelerator): Introduce options to control the num_threads and device from API, envvars, CLI. - Introduce the AcceleratorOptions, AcceleratorDevice and use them to set the device where the models run. - Introduce the accelerator_utils with function to decide the device and resolve the AUTO setting. - Refactor the way how the docling-ibm-models are called to match the new init signature of models. - Translate the accelerator options to the specific inputs for third-party models. - Extend the docling CLI with parameters to set the num_threads and device. - Add new unit tests. - Write new example how to use the accelerator options. * fix: Improve the pydantic objects in the pipeline_options and imports. Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * fix: TableStructureModel: Refactor the artifacts path to use the new structure for fast/accurate model Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * Updated test ground-truth Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Updated test ground-truth (again), bugfix for empty layout Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * fix: Do proper check to set the device in EasyOCR, RapidOCR. Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> * Rollback changes from main Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update test gt Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Remove unused debug settings Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Review fixes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Nail the accelerator defaults for MPS Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
This commit is contained in:
@@ -6,16 +6,26 @@ from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
|
||||
from docling.datamodel.base_models import OcrCell, Page
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.pipeline_options import RapidOcrOptions
|
||||
from docling.datamodel.pipeline_options import (
|
||||
AcceleratorDevice,
|
||||
AcceleratorOptions,
|
||||
RapidOcrOptions,
|
||||
)
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.models.base_ocr_model import BaseOcrModel
|
||||
from docling.utils.accelerator_utils import decide_device
|
||||
from docling.utils.profiling import TimeRecorder
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class RapidOcrModel(BaseOcrModel):
|
||||
def __init__(self, enabled: bool, options: RapidOcrOptions):
|
||||
def __init__(
|
||||
self,
|
||||
enabled: bool,
|
||||
options: RapidOcrOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
super().__init__(enabled=enabled, options=options)
|
||||
self.options: RapidOcrOptions
|
||||
|
||||
@@ -30,52 +40,21 @@ class RapidOcrModel(BaseOcrModel):
|
||||
"Alternatively, Docling has support for other OCR engines. See the documentation."
|
||||
)
|
||||
|
||||
# This configuration option will be revamped while introducing device settings for all models.
|
||||
# For the moment we will default to auto and let onnx-runtime pick the best.
|
||||
cls_use_cuda = True
|
||||
rec_use_cuda = True
|
||||
det_use_cuda = True
|
||||
det_use_dml = True
|
||||
cls_use_dml = True
|
||||
rec_use_dml = True
|
||||
|
||||
# # Same as Defaults in RapidOCR
|
||||
# cls_use_cuda = False
|
||||
# rec_use_cuda = False
|
||||
# det_use_cuda = False
|
||||
# det_use_dml = False
|
||||
# cls_use_dml = False
|
||||
# rec_use_dml = False
|
||||
|
||||
# # If we set everything to true onnx-runtime would automatically choose the fastest accelerator
|
||||
# if self.options.device == self.options.Device.AUTO:
|
||||
# cls_use_cuda = True
|
||||
# rec_use_cuda = True
|
||||
# det_use_cuda = True
|
||||
# det_use_dml = True
|
||||
# cls_use_dml = True
|
||||
# rec_use_dml = True
|
||||
|
||||
# # If we set use_cuda to true onnx would use the cuda device available in runtime if no cuda device is available it would run on CPU.
|
||||
# elif self.options.device == self.options.Device.CUDA:
|
||||
# cls_use_cuda = True
|
||||
# rec_use_cuda = True
|
||||
# det_use_cuda = True
|
||||
|
||||
# # If we set use_dml to true onnx would use the dml device available in runtime if no dml device is available it would work on CPU.
|
||||
# elif self.options.device == self.options.Device.DIRECTML:
|
||||
# det_use_dml = True
|
||||
# cls_use_dml = True
|
||||
# rec_use_dml = True
|
||||
# Decide the accelerator devices
|
||||
device = decide_device(accelerator_options.device)
|
||||
use_cuda = str(AcceleratorDevice.CUDA.value).lower() in device
|
||||
use_dml = accelerator_options.device == AcceleratorDevice.AUTO
|
||||
intra_op_num_threads = accelerator_options.num_threads
|
||||
|
||||
self.reader = RapidOCR(
|
||||
text_score=self.options.text_score,
|
||||
cls_use_cuda=cls_use_cuda,
|
||||
rec_use_cuda=rec_use_cuda,
|
||||
det_use_cuda=det_use_cuda,
|
||||
det_use_dml=det_use_dml,
|
||||
cls_use_dml=cls_use_dml,
|
||||
rec_use_dml=rec_use_dml,
|
||||
cls_use_cuda=use_cuda,
|
||||
rec_use_cuda=use_cuda,
|
||||
det_use_cuda=use_cuda,
|
||||
det_use_dml=use_dml,
|
||||
cls_use_dml=use_dml,
|
||||
rec_use_dml=use_dml,
|
||||
intra_op_num_threads=intra_op_num_threads,
|
||||
print_verbose=self.options.print_verbose,
|
||||
det_model_path=self.options.det_model_path,
|
||||
cls_model_path=self.options.cls_model_path,
|
||||
|
||||
Reference in New Issue
Block a user