mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-27 19:08:42 -06:00
[REF] attachment_indexation_ocr: refactor test class for inheritance
This commit is contained in:
parent
6196f307ee
commit
ecadc83a9d
@ -1,3 +1,4 @@
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
from . import common
|
||||
from . import test_document_ocr
|
||||
|
45
attachment_indexation_ocr/tests/common.py
Normal file
45
attachment_indexation_ocr/tests/common.py
Normal file
@ -0,0 +1,45 @@
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
|
||||
import subprocess
|
||||
from io import BytesIO
|
||||
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
from odoo.tests.common import TransactionCase
|
||||
|
||||
from ..models.ir_attachment import _MARKER_PHRASE
|
||||
|
||||
|
||||
class TestOcrCase(TransactionCase):
|
||||
@classmethod
|
||||
def _get_some_system_font(cls):
|
||||
"""Get a font that is available on the system"""
|
||||
output = subprocess.check_output(["fc-list"])
|
||||
for line in output.splitlines():
|
||||
line = line.decode("utf-8")
|
||||
if "otf" in line.lower() and "roman" in line.lower():
|
||||
return line.split(":")[0]
|
||||
raise RuntimeError("No suitable font found!")
|
||||
|
||||
@classmethod
|
||||
def _get_image_data(cls, frmt):
|
||||
test_image = Image.new("RGB", (200, 30))
|
||||
draw = ImageDraw.Draw(test_image)
|
||||
font = ImageFont.truetype(cls.font_path, 24)
|
||||
draw.text((3, 3), cls.result_string, font=font)
|
||||
data = BytesIO()
|
||||
test_image.save(data, frmt)
|
||||
return data.getvalue()
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
super(TestOcrCase, cls).setUpClass()
|
||||
|
||||
cls.font_path = cls._get_some_system_font()
|
||||
cls.ir_config_parameter_key = "ocr.synchronous"
|
||||
cls.result_string = "Hello world"
|
||||
cls.data_png = cls._get_image_data("png")
|
||||
cls.marker = _MARKER_PHRASE
|
||||
|
||||
cls.attachment_ocr = cls.env["ir.attachment"].with_context(ocr_force=True)
|
@ -1,73 +1,37 @@
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
import base64
|
||||
import subprocess
|
||||
from io import BytesIO
|
||||
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
from odoo.tests.common import TransactionCase
|
||||
|
||||
from ..models.ir_attachment import _MARKER_PHRASE
|
||||
from .common import TestOcrCase
|
||||
|
||||
|
||||
def _get_some_system_font():
|
||||
"""Get a font that is available on the system"""
|
||||
output = subprocess.check_output(["fc-list"])
|
||||
for line in output.splitlines():
|
||||
line = line.decode("utf-8")
|
||||
if "otf" in line.lower() and "roman" in line.lower():
|
||||
return line.split(":")[0]
|
||||
raise RuntimeError("No suitable font found!")
|
||||
|
||||
|
||||
font_path = _get_some_system_font()
|
||||
ir_config_parameter_key = "ocr.synchronous"
|
||||
result_string = "Hello world"
|
||||
|
||||
|
||||
def _get_image_data(frmt="png"):
|
||||
test_image = Image.new("RGB", (200, 30))
|
||||
draw = ImageDraw.Draw(test_image)
|
||||
draw.text((3, 3), result_string, font=ImageFont.truetype(font_path, 24))
|
||||
data = BytesIO()
|
||||
test_image.save(data, frmt)
|
||||
return data.getvalue()
|
||||
|
||||
|
||||
class TestDocumentOcr(TransactionCase):
|
||||
class TestOcr(TestOcrCase):
|
||||
def test_document_ocr_png(self):
|
||||
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
|
||||
bin_data = _get_image_data("png")
|
||||
result = self.env["ir.attachment"]._index(bin_data, "image/png")
|
||||
self.assertEqual(result.strip(), result_string)
|
||||
result = self.attachment_ocr._index(self.data_png, "image/png")
|
||||
self.assertEqual(result.strip(), self.result_string)
|
||||
|
||||
def test_document_ocr_ppm(self):
|
||||
"""It works on images that don't have a specific mimetype"""
|
||||
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
|
||||
bin_data = _get_image_data("ppm")
|
||||
result = self.env["ir.attachment"]._index(bin_data, "application/octet-stream")
|
||||
self.assertEqual(result.strip(), result_string)
|
||||
bin_data = self._get_image_data("ppm")
|
||||
result = self.attachment_ocr._index(bin_data, "application/octet-stream")
|
||||
self.assertEqual(result.strip(), self.result_string)
|
||||
|
||||
def test_document_ocr_pdf(self):
|
||||
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
|
||||
bin_data = _get_image_data("pdf")
|
||||
result = self.env["ir.attachment"]._index(bin_data, "application/pdf")
|
||||
self.assertEqual(result.strip(), result_string)
|
||||
bin_data = self._get_image_data("pdf")
|
||||
result = self.attachment_ocr._index(bin_data, "application/pdf")
|
||||
self.assertEqual(result.strip(), self.result_string)
|
||||
|
||||
def test_document_ocr_cron(self):
|
||||
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "False")
|
||||
bin_data = _get_image_data("png")
|
||||
vals = {"name": "testattachment", "datas": base64.b64encode(bin_data)}
|
||||
vals = {"name": "testattachment", "datas": base64.b64encode(self.data_png)}
|
||||
attachment = self.env["ir.attachment"].create(vals)
|
||||
self.assertEqual(attachment.index_content, _MARKER_PHRASE)
|
||||
self.assertEqual(attachment.index_content, self.marker)
|
||||
attachment._ocr_cron()
|
||||
self.assertEqual(attachment.index_content.strip(), result_string)
|
||||
self.assertEqual(attachment.index_content.strip(), self.result_string)
|
||||
|
||||
def test_document_ocr_lang(self):
|
||||
"""We can pass an ocr_lang context key to help text detection"""
|
||||
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
|
||||
bin_data = _get_image_data("pdf")
|
||||
with_lang = self.env["ir.attachment"].with_context(ocr_lang="eng")
|
||||
bin_data = self._get_image_data("pdf")
|
||||
with_lang = self.attachment_ocr.with_context(ocr_lang="eng")
|
||||
result = with_lang._index(bin_data, "application/pdf")
|
||||
self.assertEqual(result.strip(), result_string)
|
||||
self.assertEqual(result.strip(), self.result_string)
|
||||
|
Loading…
Reference in New Issue
Block a user