mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-28 03:16:29 -06:00
[REF] attachment_indexation_ocr: refactor test class for inheritance
This commit is contained in:
parent
6196f307ee
commit
ecadc83a9d
@ -1,3 +1,4 @@
|
|||||||
# © 2016 Therp BV <http://therp.nl>
|
# © 2016 Therp BV <http://therp.nl>
|
||||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
from . import common
|
||||||
from . import test_document_ocr
|
from . import test_document_ocr
|
||||||
|
45
attachment_indexation_ocr/tests/common.py
Normal file
45
attachment_indexation_ocr/tests/common.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# Copyright 2023 len-foss/Financial Way
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
from odoo.tests.common import TransactionCase
|
||||||
|
|
||||||
|
from ..models.ir_attachment import _MARKER_PHRASE
|
||||||
|
|
||||||
|
|
||||||
|
class TestOcrCase(TransactionCase):
|
||||||
|
@classmethod
|
||||||
|
def _get_some_system_font(cls):
|
||||||
|
"""Get a font that is available on the system"""
|
||||||
|
output = subprocess.check_output(["fc-list"])
|
||||||
|
for line in output.splitlines():
|
||||||
|
line = line.decode("utf-8")
|
||||||
|
if "otf" in line.lower() and "roman" in line.lower():
|
||||||
|
return line.split(":")[0]
|
||||||
|
raise RuntimeError("No suitable font found!")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_image_data(cls, frmt):
|
||||||
|
test_image = Image.new("RGB", (200, 30))
|
||||||
|
draw = ImageDraw.Draw(test_image)
|
||||||
|
font = ImageFont.truetype(cls.font_path, 24)
|
||||||
|
draw.text((3, 3), cls.result_string, font=font)
|
||||||
|
data = BytesIO()
|
||||||
|
test_image.save(data, frmt)
|
||||||
|
return data.getvalue()
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setUpClass(cls):
|
||||||
|
super(TestOcrCase, cls).setUpClass()
|
||||||
|
|
||||||
|
cls.font_path = cls._get_some_system_font()
|
||||||
|
cls.ir_config_parameter_key = "ocr.synchronous"
|
||||||
|
cls.result_string = "Hello world"
|
||||||
|
cls.data_png = cls._get_image_data("png")
|
||||||
|
cls.marker = _MARKER_PHRASE
|
||||||
|
|
||||||
|
cls.attachment_ocr = cls.env["ir.attachment"].with_context(ocr_force=True)
|
@ -1,73 +1,37 @@
|
|||||||
# © 2016 Therp BV <http://therp.nl>
|
# © 2016 Therp BV <http://therp.nl>
|
||||||
|
# Copyright 2023 len-foss/Financial Way
|
||||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
import base64
|
import base64
|
||||||
import subprocess
|
|
||||||
from io import BytesIO
|
|
||||||
|
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from .common import TestOcrCase
|
||||||
|
|
||||||
from odoo.tests.common import TransactionCase
|
|
||||||
|
|
||||||
from ..models.ir_attachment import _MARKER_PHRASE
|
|
||||||
|
|
||||||
|
|
||||||
def _get_some_system_font():
|
class TestOcr(TestOcrCase):
|
||||||
"""Get a font that is available on the system"""
|
|
||||||
output = subprocess.check_output(["fc-list"])
|
|
||||||
for line in output.splitlines():
|
|
||||||
line = line.decode("utf-8")
|
|
||||||
if "otf" in line.lower() and "roman" in line.lower():
|
|
||||||
return line.split(":")[0]
|
|
||||||
raise RuntimeError("No suitable font found!")
|
|
||||||
|
|
||||||
|
|
||||||
font_path = _get_some_system_font()
|
|
||||||
ir_config_parameter_key = "ocr.synchronous"
|
|
||||||
result_string = "Hello world"
|
|
||||||
|
|
||||||
|
|
||||||
def _get_image_data(frmt="png"):
|
|
||||||
test_image = Image.new("RGB", (200, 30))
|
|
||||||
draw = ImageDraw.Draw(test_image)
|
|
||||||
draw.text((3, 3), result_string, font=ImageFont.truetype(font_path, 24))
|
|
||||||
data = BytesIO()
|
|
||||||
test_image.save(data, frmt)
|
|
||||||
return data.getvalue()
|
|
||||||
|
|
||||||
|
|
||||||
class TestDocumentOcr(TransactionCase):
|
|
||||||
def test_document_ocr_png(self):
|
def test_document_ocr_png(self):
|
||||||
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
|
result = self.attachment_ocr._index(self.data_png, "image/png")
|
||||||
bin_data = _get_image_data("png")
|
self.assertEqual(result.strip(), self.result_string)
|
||||||
result = self.env["ir.attachment"]._index(bin_data, "image/png")
|
|
||||||
self.assertEqual(result.strip(), result_string)
|
|
||||||
|
|
||||||
def test_document_ocr_ppm(self):
|
def test_document_ocr_ppm(self):
|
||||||
"""It works on images that don't have a specific mimetype"""
|
"""It works on images that don't have a specific mimetype"""
|
||||||
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
|
bin_data = self._get_image_data("ppm")
|
||||||
bin_data = _get_image_data("ppm")
|
result = self.attachment_ocr._index(bin_data, "application/octet-stream")
|
||||||
result = self.env["ir.attachment"]._index(bin_data, "application/octet-stream")
|
self.assertEqual(result.strip(), self.result_string)
|
||||||
self.assertEqual(result.strip(), result_string)
|
|
||||||
|
|
||||||
def test_document_ocr_pdf(self):
|
def test_document_ocr_pdf(self):
|
||||||
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
|
bin_data = self._get_image_data("pdf")
|
||||||
bin_data = _get_image_data("pdf")
|
result = self.attachment_ocr._index(bin_data, "application/pdf")
|
||||||
result = self.env["ir.attachment"]._index(bin_data, "application/pdf")
|
self.assertEqual(result.strip(), self.result_string)
|
||||||
self.assertEqual(result.strip(), result_string)
|
|
||||||
|
|
||||||
def test_document_ocr_cron(self):
|
def test_document_ocr_cron(self):
|
||||||
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "False")
|
vals = {"name": "testattachment", "datas": base64.b64encode(self.data_png)}
|
||||||
bin_data = _get_image_data("png")
|
|
||||||
vals = {"name": "testattachment", "datas": base64.b64encode(bin_data)}
|
|
||||||
attachment = self.env["ir.attachment"].create(vals)
|
attachment = self.env["ir.attachment"].create(vals)
|
||||||
self.assertEqual(attachment.index_content, _MARKER_PHRASE)
|
self.assertEqual(attachment.index_content, self.marker)
|
||||||
attachment._ocr_cron()
|
attachment._ocr_cron()
|
||||||
self.assertEqual(attachment.index_content.strip(), result_string)
|
self.assertEqual(attachment.index_content.strip(), self.result_string)
|
||||||
|
|
||||||
def test_document_ocr_lang(self):
|
def test_document_ocr_lang(self):
|
||||||
"""We can pass an ocr_lang context key to help text detection"""
|
"""We can pass an ocr_lang context key to help text detection"""
|
||||||
self.env["ir.config_parameter"].set_param(ir_config_parameter_key, "True")
|
bin_data = self._get_image_data("pdf")
|
||||||
bin_data = _get_image_data("pdf")
|
with_lang = self.attachment_ocr.with_context(ocr_lang="eng")
|
||||||
with_lang = self.env["ir.attachment"].with_context(ocr_lang="eng")
|
|
||||||
result = with_lang._index(bin_data, "application/pdf")
|
result = with_lang._index(bin_data, "application/pdf")
|
||||||
self.assertEqual(result.strip(), result_string)
|
self.assertEqual(result.strip(), self.result_string)
|
||||||
|
Loading…
Reference in New Issue
Block a user