fix: PermissionError when using tesseract_ocr_cli_model (#496)

Signed-off-by: Gaspard Petit <gaspardpetit@gmail.com>
This commit is contained in:
Gaspard Petit 2024-12-03 04:22:03 -05:00 committed by GitHub
parent 33cff98d36
commit d3f84b2457
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,6 @@
import io import io
import logging import logging
import os
import tempfile import tempfile
from subprocess import DEVNULL, PIPE, Popen from subprocess import DEVNULL, PIPE, Popen
from typing import Iterable, Optional, Tuple from typing import Iterable, Optional, Tuple
@ -130,14 +131,17 @@ class TesseractOcrCliModel(BaseOcrModel):
high_res_image = page._backend.get_page_image( high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect scale=self.scale, cropbox=ocr_rect
) )
try:
with tempfile.NamedTemporaryFile( with tempfile.NamedTemporaryFile(
suffix=".png", mode="w" suffix=".png", mode="w+b", delete=False
) as image_file: ) as image_file:
fname = image_file.name fname = image_file.name
high_res_image.save(fname) high_res_image.save(image_file)
df = self._run_tesseract(fname) df = self._run_tesseract(fname)
finally:
if os.path.exists(fname):
os.remove(fname)
# _log.info(df) # _log.info(df)