perf: Move expensive imports closer to usage (#1863)
* Move expensive imports closer to usage Signed-off-by: William Easton <bill.easton@elastic.co> * DCO Remediation Commit for William Easton <bill.easton@elastic.co> I, William Easton <bill.easton@elastic.co>, hereby add my Signed-off-by to this commit: 8a7412ce5bb131a01bb6403067aeb948c9093b0b Signed-off-by: William Easton <bill.easton@elastic.co> * formatting fixes Signed-off-by: William Easton <bill.easton@elastic.co> * DCO Remediation Commit for William Easton <bill.easton@elastic.co> I, William Easton <bill.easton@elastic.co>, hereby add my Signed-off-by to this commit: 8a7412ce5bb131a01bb6403067aeb948c9093b0b I, William Easton <bill.easton@elastic.co>, hereby add my Signed-off-by to this commit: 963e34325071db5e844841f10c27b396a054a0a1 Signed-off-by: William Easton <bill.easton@elastic.co> * Fix baseocrmodel test issue Signed-off-by: William Easton <bill.easton@elastic.co> --------- Signed-off-by: William Easton <bill.easton@elastic.co>
This commit is contained in:
@@ -3,14 +3,13 @@ import logging
|
||||
from abc import abstractmethod
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Type
|
||||
from typing import TYPE_CHECKING, List, Optional, Type
|
||||
|
||||
import numpy as np
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
from docling_core.types.doc.page import TextCell
|
||||
from PIL import Image, ImageDraw
|
||||
from rtree import index
|
||||
from scipy.ndimage import binary_dilation, find_objects, label
|
||||
|
||||
from docling.datamodel.accelerator_options import AcceleratorOptions
|
||||
from docling.datamodel.base_models import Page
|
||||
@@ -31,11 +30,16 @@ class BaseOcrModel(BasePageModel, BaseModelWithOptions):
|
||||
options: OcrOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
# Make sure any delay/error from import occurs on ocr model init and not first use
|
||||
from scipy.ndimage import binary_dilation, find_objects, label
|
||||
|
||||
self.enabled = enabled
|
||||
self.options = options
|
||||
|
||||
# Computes the optimum amount and coordinates of rectangles to OCR on a given page
|
||||
def get_ocr_rects(self, page: Page) -> List[BoundingBox]:
|
||||
from scipy.ndimage import binary_dilation, find_objects, label
|
||||
|
||||
BITMAP_COVERAGE_TRESHOLD = 0.75
|
||||
assert page.size is not None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user