[REF] attachment_indexation_ocr: refactor test class for inheritance

This commit is contained in:
len 2023-09-07 14:08:18 +02:00
parent 6196f307ee
commit ecadc83a9d
3 changed files with 63 additions and 53 deletions

View File

@ -1,3 +1,4 @@
# © 2016 Therp BV <http://therp.nl>
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
from . import common
from . import test_document_ocr

View File

@ -0,0 +1,45 @@
# Copyright 2023 len-foss/Financial Way
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
import subprocess
from io import BytesIO
from PIL import Image, ImageDraw, ImageFont
from odoo.tests.common import TransactionCase
from ..models.ir_attachment import _MARKER_PHRASE
class TestOcrCase(TransactionCase):
@classmethod
def _get_some_system_font(cls):
"""Get a font that is available on the system"""
output = subprocess.check_output(["fc-list"])
for line in output.splitlines():
line = line.decode("utf-8")
if "otf" in line.lower() and "roman" in line.lower():
return line.split(":")[0]
raise RuntimeError("No suitable font found!")
@classmethod
def _get_image_data(cls, frmt):
test_image = Image.new("RGB", (200, 30))
draw = ImageDraw.Draw(test_image)
font = ImageFont.truetype(cls.font_path, 24)
draw.text((3, 3), cls.result_string, font=font)
data = BytesIO()
test_image.save(data, frmt)
return data.getvalue()
@classmethod
def setUpClass(cls):
super(TestOcrCase, cls).setUpClass()
cls.font_path = cls._get_some_system_font()
cls.ir_config_parameter_key = "ocr.synchronous"
cls.result_string = "Hello world"
cls.data_png = cls._get_image_data("png")
cls.marker = _MARKER_PHRASE
cls.attachment_ocr = cls.env["ir.attachment"].with_context(ocr_force=True)

View File

@ -1,73 +1,37 @@
# © 2016 Therp BV <http://therp.nl>
# Copyright 2023 len-foss/Financial Way
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
import base64
import subprocess
from io import BytesIO
from PIL import Image, ImageDraw, ImageFont
from odoo.tests.common import TransactionCase
from ..models.ir_attachment import _MARKER_PHRASE
from .common import TestOcrCase
def _get_some_system_font():
"""Get a font that is available on the system"""
output = subprocess.check_output(["fc-list"])
for line in output.splitlines():
line = line.decode("utf-8")
if "otf" in line.lower() and "roman" in line.lower():
return line.split(":")[0]
raise RuntimeError("No suitable font found!")
font_path = _get_some_system_font()
ir_config_parameter_key = "ocr.synchronous"
result_string = "Hello world"
def _get_image_data(frmt="png"):
test_image = Image.new("RGB", (200, 30))
draw = ImageDraw.Draw(test_image)
draw.text((3, 3), result_string, font=ImageFont.truetype(font_path, 24))
data = BytesIO()
test_image.save(data, frmt)
return data.getvalue()
class TestDocumentOcr(TransactionCase):
class TestOcr(TestOcrCase):
def test_document_ocr_png(self):
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
bin_data = _get_image_data("png")
result = self.env["ir.attachment"]._index(bin_data, "image/png")
self.assertEqual(result.strip(), result_string)
result = self.attachment_ocr._index(self.data_png, "image/png")
self.assertEqual(result.strip(), self.result_string)
def test_document_ocr_ppm(self):
"""It works on images that don't have a specific mimetype"""
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
bin_data = _get_image_data("ppm")
result = self.env["ir.attachment"]._index(bin_data, "application/octet-stream")
self.assertEqual(result.strip(), result_string)
bin_data = self._get_image_data("ppm")
result = self.attachment_ocr._index(bin_data, "application/octet-stream")
self.assertEqual(result.strip(), self.result_string)
def test_document_ocr_pdf(self):
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
bin_data = _get_image_data("pdf")
result = self.env["ir.attachment"]._index(bin_data, "application/pdf")
self.assertEqual(result.strip(), result_string)
bin_data = self._get_image_data("pdf")
result = self.attachment_ocr._index(bin_data, "application/pdf")
self.assertEqual(result.strip(), self.result_string)
def test_document_ocr_cron(self):
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "False")
bin_data = _get_image_data("png")
vals = {"name": "testattachment", "datas": base64.b64encode(bin_data)}
vals = {"name": "testattachment", "datas": base64.b64encode(self.data_png)}
attachment = self.env["ir.attachment"].create(vals)
self.assertEqual(attachment.index_content, _MARKER_PHRASE)
self.assertEqual(attachment.index_content, self.marker)
attachment._ocr_cron()
self.assertEqual(attachment.index_content.strip(), result_string)
self.assertEqual(attachment.index_content.strip(), self.result_string)
def test_document_ocr_lang(self):
"""We can pass an ocr_lang context key to help text detection"""
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
bin_data = _get_image_data("pdf")
with_lang = self.env["ir.attachment"].with_context(ocr_lang="eng")
bin_data = self._get_image_data("pdf")
with_lang = self.attachment_ocr.with_context(ocr_lang="eng")
result = with_lang._index(bin_data, "application/pdf")
self.assertEqual(result.strip(), result_string)
self.assertEqual(result.strip(), self.result_string)