fix: Guard against attribute errors in TesseractOcrModel __del__ (#1494)
This moves the initialization of the `reader` and `script_readers` attributes to before we attempt to import tesserocr, so that when later accessing these attributes in the garbage collection method `__del__` the attributes exist. This requires changing the typing of the `script_readers` dict value to `Any` because we cannot yet reference its actual strong type, since it's a tesserocr value. This prevents throwing an exception during garbage collection for cases where the TesseractOcrModel instance didn't properly initialize, like when it throws an `ImportError` during its initializer. Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
cc453961a9
commit
4ab7e9ddfb
@ -1,7 +1,7 @@
|
||||
import logging
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Optional, Type
|
||||
from typing import Any, Optional, Type
|
||||
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||
@ -38,6 +38,8 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
self.options: TesseractOcrOptions
|
||||
|
||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||
self.reader = None
|
||||
self.script_readers: dict[str, Any] = {}
|
||||
|
||||
if self.enabled:
|
||||
install_errmsg = (
|
||||
@ -84,9 +86,7 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
"oem": tesserocr.OEM.DEFAULT,
|
||||
}
|
||||
|
||||
self.reader = None
|
||||
self.osd_reader = None
|
||||
self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
|
||||
|
||||
if self.options.path is not None:
|
||||
tesserocr_kwargs["path"] = self.options.path
|
||||
|
Loading…
Reference in New Issue
Block a user