ci: add coverage and ruff (#1383)
* add coverage calculation and push Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * new codecov version and usage of token Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enable ruff formatter instead of black and isort Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff lint fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff unsafe fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add removed imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * runs 1 on linter issues Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * finalize linter fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update pyproject.toml Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional, Type
|
||||
from typing import Optional, Type
|
||||
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||
from docling_core.types.doc.page import BoundingRectangle, TextCell
|
||||
@@ -37,9 +38,6 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
self.options: TesseractOcrOptions
|
||||
|
||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||
self.reader = None
|
||||
self.osd_reader = None
|
||||
self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
|
||||
|
||||
if self.enabled:
|
||||
install_errmsg = (
|
||||
@@ -64,7 +62,7 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
raise ImportError(install_errmsg)
|
||||
try:
|
||||
tesseract_version = tesserocr.tesseract_version()
|
||||
except:
|
||||
except Exception:
|
||||
raise ImportError(install_errmsg)
|
||||
|
||||
_, self._tesserocr_languages = tesserocr.get_languages()
|
||||
@@ -75,7 +73,7 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
_log.debug("Initializing TesserOCR: %s", tesseract_version)
|
||||
lang = "+".join(self.options.lang)
|
||||
|
||||
if any([l.startswith("script/") for l in self._tesserocr_languages]):
|
||||
if any(lang.startswith("script/") for lang in self._tesserocr_languages):
|
||||
self.script_prefix = "script/"
|
||||
else:
|
||||
self.script_prefix = ""
|
||||
@@ -86,6 +84,10 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
"oem": tesserocr.OEM.DEFAULT,
|
||||
}
|
||||
|
||||
self.reader = None
|
||||
self.osd_reader = None
|
||||
self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
|
||||
|
||||
if self.options.path is not None:
|
||||
tesserocr_kwargs["path"] = self.options.path
|
||||
|
||||
|
||||
Reference in New Issue
Block a user