fix: PermissionError when using tesseract_ocr_cli_model (#496)

Signed-off-by: Gaspard Petit <gaspardpetit@gmail.com>
This commit is contained in:
Gaspard Petit 2024-12-03 04:22:03 -05:00 committed by GitHub
parent 33cff98d36
commit d3f84b2457
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,6 @@
import io
import logging
import os
import tempfile
from subprocess import DEVNULL, PIPE, Popen
from typing import Iterable, Optional, Tuple
@ -130,14 +131,17 @@ class TesseractOcrCliModel(BaseOcrModel):
high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect
)
with tempfile.NamedTemporaryFile(
suffix=".png", mode="w"
) as image_file:
fname = image_file.name
high_res_image.save(fname)
try:
with tempfile.NamedTemporaryFile(
suffix=".png", mode="w+b", delete=False
) as image_file:
fname = image_file.name
high_res_image.save(image_file)
df = self._run_tesseract(fname)
finally:
if os.path.exists(fname):
os.remove(fname)
# _log.info(df)