mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-26 18:38:41 -06:00
Remove migrated document_ocr module
This commit is contained in:
parent
f80491a589
commit
3b3c60b348
@ -1,101 +0,0 @@
|
|||||||
.. image:: https://img.shields.io/badge/licence-AGPL--3-blue.svg
|
|
||||||
:target: http://www.gnu.org/licenses/agpl-3.0-standalone.html
|
|
||||||
:alt: License: AGPL-3
|
|
||||||
|
|
||||||
=================
|
|
||||||
OCR for documents
|
|
||||||
=================
|
|
||||||
|
|
||||||
This module was written to make uploaded documents, for example scans, searchable by running OCR on them.
|
|
||||||
|
|
||||||
It supports all image formats `Pillow supports <http://pillow.readthedocs.io/en/3.2.x/handbook/image-file-formats.html>`_ for reading and PDFs.
|
|
||||||
|
|
||||||
Installation
|
|
||||||
============
|
|
||||||
|
|
||||||
To install this module, you need to:
|
|
||||||
|
|
||||||
#. install tesseract and the language(s) your documents use
|
|
||||||
#. if you want to support OCR on PDFs, install imagemagick
|
|
||||||
#. install the module itself
|
|
||||||
|
|
||||||
On an Debian or Ubuntu system you would typically run::
|
|
||||||
|
|
||||||
$ sudo apt-get install tesseract-ocr imagemagick
|
|
||||||
|
|
||||||
|
|
||||||
Configuration
|
|
||||||
=============
|
|
||||||
|
|
||||||
To configure this module, go to:
|
|
||||||
|
|
||||||
#. Settings/Technical/Parameters/System parameters and review the parameters with names document_ocr.*
|
|
||||||
|
|
||||||
Usage
|
|
||||||
=====
|
|
||||||
|
|
||||||
By default, character recognition is done asynchronously by a cronjob at night.
|
|
||||||
This is because the recognition process takes a while and you don't want to make your users wait for the indexation to finish.
|
|
||||||
The interval to run the cronjob can be adjusted to your needs in the ``Scheduled Actions`` menu, under ` `Settings``.
|
|
||||||
In case you want to force the OCR to be done immediately, set configuration parameter ``document_ocr.synchronous`` to value ``True``.
|
|
||||||
|
|
||||||
|
|
||||||
By default, recognition language is set to english.
|
|
||||||
In case you want to use a different default, set configuration parameter ``document_ocr.language`` to value respective value ex:``por``, for Portuguese.
|
|
||||||
|
|
||||||
|
|
||||||
In PDF case, OCR will run after it will be converted to an image. But OCR will be applied to all PDFs.
|
|
||||||
|
|
||||||
|
|
||||||
System parameters used:
|
|
||||||
#``document_ocr.synchronous``: bool
|
|
||||||
#``document_ocr.language``: string
|
|
||||||
#``document_ocr.dpi``: integer
|
|
||||||
#``document_ocr.quality``: integer
|
|
||||||
|
|
||||||
|
|
||||||
.. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas
|
|
||||||
:alt: Try me on Runbot
|
|
||||||
:target: https://runbot.odoo-community.org/runbot/118/10.0
|
|
||||||
|
|
||||||
Bug Tracker
|
|
||||||
===========
|
|
||||||
|
|
||||||
Bugs are tracked on `GitHub Issues <https://github.com/OCA/knowledge/issues>`_.
|
|
||||||
In case of trouble, please check there if your issue has already been reported.
|
|
||||||
If you spotted it first, help us smashing it by providing a detailed and welcomed feedback.
|
|
||||||
|
|
||||||
Credits
|
|
||||||
=======
|
|
||||||
|
|
||||||
The actual work
|
|
||||||
---------------
|
|
||||||
|
|
||||||
* `tesseract <https://github.com/tesseract-ocr>`_
|
|
||||||
|
|
||||||
Images
|
|
||||||
------
|
|
||||||
|
|
||||||
* Odoo Community Association: `Icon <https://github.com/OCA/maintainer-tools/blob/master/template/module/static/description/icon.svg>`_.
|
|
||||||
|
|
||||||
Contributors
|
|
||||||
------------
|
|
||||||
|
|
||||||
* Holger Brunn <hbrunn@therp.nl>
|
|
||||||
|
|
||||||
Do not contact contributors directly about help with questions or problems concerning this addon, but use the `community mailing list <mailto:community@mail.odoo.com>`_ or the `appropriate specialized mailinglist <https://odoo-community.org/groups>`_ for help, and the bug tracker linked in `Bug Tracker`_ above for technical issues.
|
|
||||||
|
|
||||||
Maintainer
|
|
||||||
----------
|
|
||||||
|
|
||||||
.. image:: https://odoo-community.org/logo.png
|
|
||||||
:alt: Odoo Community Association
|
|
||||||
:target: https://odoo-community.org
|
|
||||||
|
|
||||||
This module is maintained by the OCA.
|
|
||||||
|
|
||||||
OCA, or the Odoo Community Association, is a nonprofit organization whose
|
|
||||||
mission is to support the collaborative development of Odoo features and
|
|
||||||
promote its widespread use.
|
|
||||||
|
|
||||||
To contribute to this module, please visit https://odoo-community.org.
|
|
@ -1,5 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# © 2016 Therp BV <http://therp.nl>
|
|
||||||
# © 2017 ThinkOpen Solutions <https://tkobr.com>
|
|
||||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
|
||||||
from . import models
|
|
@ -1,27 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# © 2016 Therp BV <http://therp.nl>
|
|
||||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
|
||||||
{
|
|
||||||
"name": "OCR for Documents",
|
|
||||||
"version": "10.0.1.0.0",
|
|
||||||
"author": "Therp BV,"
|
|
||||||
" Odoo Community Association (OCA),"
|
|
||||||
" ThinkOpen Solutions Brasil",
|
|
||||||
"license": "AGPL-3",
|
|
||||||
"category": "Knowledge Management",
|
|
||||||
"summary": "Run character recognition on uploaded files",
|
|
||||||
"depends": [
|
|
||||||
'document',
|
|
||||||
],
|
|
||||||
"data": [
|
|
||||||
"data/ir_cron.xml",
|
|
||||||
"data/ir_config_parameter.xml",
|
|
||||||
"views/ir_attachment_view.xml",
|
|
||||||
],
|
|
||||||
"external_dependencies": {
|
|
||||||
'bin': [
|
|
||||||
'tesseract',
|
|
||||||
'convert',
|
|
||||||
],
|
|
||||||
},
|
|
||||||
}
|
|
@ -1,21 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<openerp>
|
|
||||||
<data noupdate="1">
|
|
||||||
<record id="param_synchronous" model="ir.config_parameter">
|
|
||||||
<field name="key">document_ocr.synchronous</field>
|
|
||||||
<field name="value">False</field>
|
|
||||||
</record>
|
|
||||||
<record id="param_dpi" model="ir.config_parameter">
|
|
||||||
<field name="key">document_ocr.dpi</field>
|
|
||||||
<field name="value">300</field>
|
|
||||||
</record>
|
|
||||||
<record id="param_quality" model="ir.config_parameter">
|
|
||||||
<field name="key">document_ocr.quality</field>
|
|
||||||
<field name="value">100</field>
|
|
||||||
</record>
|
|
||||||
<record id="param_language" model="ir.config_parameter">
|
|
||||||
<field name="key">document_ocr.language</field>
|
|
||||||
<field name="value">eng</field>
|
|
||||||
</record>
|
|
||||||
</data>
|
|
||||||
</openerp>
|
|
@ -1,13 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<openerp>
|
|
||||||
<data noupdate="1">
|
|
||||||
<record id="cron" model="ir.cron">
|
|
||||||
<field name="name">Run OCR on uploaded documents</field>
|
|
||||||
<field name="interval_type">days</field>
|
|
||||||
<field name="interval_number">1</field>
|
|
||||||
<field name="model">ir.attachment</field>
|
|
||||||
<field name="function">_ocr_cron</field>
|
|
||||||
<field name="numbercall">-1</field>
|
|
||||||
</record>
|
|
||||||
</data>
|
|
||||||
</openerp>
|
|
@ -1,5 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# © 2016 Therp BV <http://therp.nl>
|
|
||||||
# © 2017 ThinkOpen Solutions <https://tkobr.com>
|
|
||||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
|
||||||
from . import ir_attachment
|
|
@ -1,261 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# © 2016 Therp BV <http://therp.nl>
|
|
||||||
# © 2017 ThinkOpen Solutions <https://tkobr.com>
|
|
||||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
|
||||||
|
|
||||||
import io
|
|
||||||
import logging
|
|
||||||
import subprocess
|
|
||||||
from StringIO import StringIO
|
|
||||||
|
|
||||||
import pyPdf
|
|
||||||
from odoo import api, fields, models, _
|
|
||||||
from odoo.exceptions import UserError
|
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__)
|
|
||||||
_MARKER_PHRASE = '[[waiting for OCR]]'
|
|
||||||
OCR_LANGUAGE = [('afr', 'Afrikaans'),
|
|
||||||
('amh', 'Amharic'),
|
|
||||||
('ara', 'Arabic'),
|
|
||||||
('asm', 'Assamese'),
|
|
||||||
('aze', 'Azerbaijani'),
|
|
||||||
('aze_cyrl', 'Azerbaijani - Cyrilic'),
|
|
||||||
('bel', 'Belarusian'),
|
|
||||||
('ben', 'Bengali'),
|
|
||||||
('bod', 'Tibetan'),
|
|
||||||
('bos', 'Bosnian'),
|
|
||||||
('bul', 'Bulgarian'),
|
|
||||||
('cat', 'Catalan; Valencian'),
|
|
||||||
('ceb', 'Cebuano'),
|
|
||||||
('ces', 'Czech'),
|
|
||||||
('chi_sim', 'Chinese - Simplified'),
|
|
||||||
('chi_tra', 'Chinese - Traditional'),
|
|
||||||
('chr', 'Cherokee'),
|
|
||||||
('cym', 'Welsh'),
|
|
||||||
('dan', 'Danish'),
|
|
||||||
('dan_frak', 'Danish - Fraktur'),
|
|
||||||
('deu', 'German'),
|
|
||||||
('deu_frak', 'German - Fraktur'),
|
|
||||||
('dzo', 'Dzongkha'),
|
|
||||||
('ell', 'Greek, Modern (1453-)'),
|
|
||||||
('eng', 'English'),
|
|
||||||
('enm', 'English, Middle (1100-1500)'),
|
|
||||||
('epo', 'Esperanto'),
|
|
||||||
('equ', 'Math / equation detection module'),
|
|
||||||
('est', 'Estonian'),
|
|
||||||
('eus', 'Basque'),
|
|
||||||
('fas', 'Persian'),
|
|
||||||
('fin', 'Finnish'),
|
|
||||||
('fra', 'French'),
|
|
||||||
('frk', 'Frankish'),
|
|
||||||
('frm', 'French, Middle (ca.1400-1600)'),
|
|
||||||
('gle', 'Irish'),
|
|
||||||
('glg', 'Galician'),
|
|
||||||
('grc', 'Greek, Ancient (to 1453)'),
|
|
||||||
('guj', 'Gujarati'),
|
|
||||||
('hat', 'Haitian; Haitian Creole'),
|
|
||||||
('heb', 'Hebrew'),
|
|
||||||
('hin', 'Hindi'),
|
|
||||||
('hrv', 'Croatian'),
|
|
||||||
('hun', 'Hungarian'),
|
|
||||||
('iku', 'Inuktitut'),
|
|
||||||
('ind', 'Indonesian'),
|
|
||||||
('isl', 'Icelandic'),
|
|
||||||
('ita', 'Italian'),
|
|
||||||
('ita_old', 'Italian - Old'),
|
|
||||||
('jav', 'Javanese'),
|
|
||||||
('jpn', 'Japanese'),
|
|
||||||
('kan', 'Kannada'),
|
|
||||||
('kat', 'Georgian'),
|
|
||||||
('kat_old', 'Georgian - Old'),
|
|
||||||
('kaz', 'Kazakh'),
|
|
||||||
('khm', 'Central Khmer'),
|
|
||||||
('kir', 'Kirghiz; Kyrgyz'),
|
|
||||||
('kor', 'Korean'),
|
|
||||||
('kur', 'Kurdish'),
|
|
||||||
('lao', 'Lao'),
|
|
||||||
('lat', 'Latin'),
|
|
||||||
('lav', 'Latvian'),
|
|
||||||
('lit', 'Lithuanian'),
|
|
||||||
('mal', 'Malayalam'),
|
|
||||||
('mar', 'Marathi'),
|
|
||||||
('mkd', 'Macedonian'),
|
|
||||||
('mlt', 'Maltese'),
|
|
||||||
('msa', 'Malay'),
|
|
||||||
('mya', 'Burmese'),
|
|
||||||
('nep', 'Nepali'),
|
|
||||||
('nld', 'Dutch; Flemish'),
|
|
||||||
('nor', 'Norwegian'),
|
|
||||||
('ori', 'Oriya'),
|
|
||||||
('osd', 'Orientation and script detection module'),
|
|
||||||
('pan', 'Panjabi; Punjabi'),
|
|
||||||
('pol', 'Polish'),
|
|
||||||
('por', 'Portuguese'),
|
|
||||||
('pus', 'Pushto; Pashto'),
|
|
||||||
('ron', 'Romanian; Moldavian; Moldovan'),
|
|
||||||
('rus', 'Russian'),
|
|
||||||
('san', 'Sanskrit'),
|
|
||||||
('sin', 'Sinhala; Sinhalese'),
|
|
||||||
('slk', 'Slovak'),
|
|
||||||
('slk_frak', 'Slovak - Fraktur'),
|
|
||||||
('slv', 'Slovenian'),
|
|
||||||
('spa', 'Spanish; Castilian'),
|
|
||||||
('spa_old', 'Spanish; Castilian - Old'),
|
|
||||||
('sqi', 'Albanian'),
|
|
||||||
('srp', 'Serbian'),
|
|
||||||
('srp_latn', 'Serbian - Latin'),
|
|
||||||
('swa', 'Swahili'),
|
|
||||||
('swe', 'Swedish'),
|
|
||||||
('syr', 'Syriac'),
|
|
||||||
('tam', 'Tamil'),
|
|
||||||
('tel', 'Telugu'),
|
|
||||||
('tgk', 'Tajik'),
|
|
||||||
('tgl', 'Tagalog'),
|
|
||||||
('tha', 'Thai'),
|
|
||||||
('tir', 'Tigrinya'),
|
|
||||||
('tur', 'Turkish'),
|
|
||||||
('uig', 'Uighur; Uyghur'),
|
|
||||||
('ukr', 'Ukrainian'),
|
|
||||||
('urd', 'Urdu'),
|
|
||||||
('uzb', 'Uzbek'),
|
|
||||||
('uzb_cyrl', 'Uzbek - Cyrilic'),
|
|
||||||
('vie', 'Vietnamese'),
|
|
||||||
('yid', 'Yiddish'), ]
|
|
||||||
|
|
||||||
|
|
||||||
class IrAttachment(models.Model):
|
|
||||||
_inherit = 'ir.attachment'
|
|
||||||
|
|
||||||
language = fields.Selection(OCR_LANGUAGE, 'Language',
|
|
||||||
default=lambda self:
|
|
||||||
self.env['ir.config_parameter'].get_param(
|
|
||||||
'document_ocr.language', 'eng'))
|
|
||||||
# We need to redefine index_content field to be able to update it
|
|
||||||
# on the onchange_language()
|
|
||||||
index_content = fields.Text('Indexed Content',
|
|
||||||
readonly=False,
|
|
||||||
prefetch=False)
|
|
||||||
index_content_rel = fields.Text(related='index_content',
|
|
||||||
string='Indexed Content Rel')
|
|
||||||
|
|
||||||
@api.onchange('language')
|
|
||||||
def onchange_language(self):
|
|
||||||
process = subprocess.Popen(['tesseract', '--list-langs'],
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE)
|
|
||||||
stdout, stderr = process.communicate()
|
|
||||||
if self.language not in stderr.split('\n'):
|
|
||||||
raise UserError(_(
|
|
||||||
"Language not installed."
|
|
||||||
" Please ask your system administrator to"
|
|
||||||
" install tesseract '%s' language." %
|
|
||||||
self.language))
|
|
||||||
if self.store_fname:
|
|
||||||
bin_data = self._file_read(self.store_fname)
|
|
||||||
else:
|
|
||||||
bin_data = self.db_datas
|
|
||||||
if bin_data:
|
|
||||||
index_content = self._index(
|
|
||||||
bin_data.decode('base64'), self.datas_fname, self.mimetype)
|
|
||||||
return {'value': {
|
|
||||||
'index_content': index_content}}
|
|
||||||
return {'value': {}}
|
|
||||||
|
|
||||||
@api.model
|
|
||||||
def _index(self, bin_data, datas_fname, mimetype):
|
|
||||||
content = super(IrAttachment, self)._index(
|
|
||||||
bin_data, datas_fname, mimetype)
|
|
||||||
if not content or content == 'image':
|
|
||||||
has_synchr_param = self.env['ir.config_parameter'].get_param(
|
|
||||||
'document_ocr.synchronous', 'False') == 'True'
|
|
||||||
has_force_flag = self.env.context.get('document_ocr_force')
|
|
||||||
synchr = has_synchr_param or has_force_flag
|
|
||||||
if synchr:
|
|
||||||
content = self._index_ocr(bin_data)
|
|
||||||
else:
|
|
||||||
content = _MARKER_PHRASE
|
|
||||||
return content
|
|
||||||
|
|
||||||
def _index_ocr(self, bin_data):
|
|
||||||
_logger.info('OCR IMAGE "%s"...', self.datas_fname)
|
|
||||||
process = subprocess.Popen(
|
|
||||||
['tesseract', 'stdin', 'stdout', '-l', self.language],
|
|
||||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE,
|
|
||||||
)
|
|
||||||
stdout, stderr = process.communicate(bin_data)
|
|
||||||
if stderr:
|
|
||||||
_logger.error('Error during OCR: %s', stderr)
|
|
||||||
return stdout
|
|
||||||
|
|
||||||
def _index_pdf(self, bin_data):
|
|
||||||
|
|
||||||
def convert_bin_to_image(self, bin_data):
|
|
||||||
dpi = int(self.env['ir.config_parameter'].get_param(
|
|
||||||
'document_ocr.dpi', '500'))
|
|
||||||
quality = int(self.env['ir.config_parameter'].get_param(
|
|
||||||
'document_ocr.quality', '100'))
|
|
||||||
process = subprocess.Popen(
|
|
||||||
['convert', '-density', str(dpi),
|
|
||||||
'-quality', str(quality),
|
|
||||||
'-', '-append', 'png32:-'],
|
|
||||||
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
|
|
||||||
stderr=subprocess.PIPE)
|
|
||||||
stdout, stderr = process.communicate(bin_data)
|
|
||||||
if stderr:
|
|
||||||
_logger.error('Error converting PDF to image: %s', stderr)
|
|
||||||
return stdout
|
|
||||||
|
|
||||||
def _convert_pdf_page_to_image(self, pdf, pagenum):
|
|
||||||
dst_pdf = pyPdf.PdfFileWriter()
|
|
||||||
dst_pdf.addPage(pdf.getPage(pagenum))
|
|
||||||
pdf_bytes = io.BytesIO()
|
|
||||||
dst_pdf.write(pdf_bytes)
|
|
||||||
pdf_bytes.seek(0)
|
|
||||||
return convert_bin_to_image(self, pdf_bytes.read())
|
|
||||||
|
|
||||||
has_synchr_param = self.env['ir.config_parameter'].get_param(
|
|
||||||
'document_ocr.synchronous', 'False') == 'True'
|
|
||||||
has_force_flag = self.env.context.get('document_ocr_force')
|
|
||||||
synchr = has_synchr_param or has_force_flag
|
|
||||||
if synchr:
|
|
||||||
buf = super(IrAttachment, self)._index_pdf(bin_data)
|
|
||||||
if len(buf.split('\n')) < 2 and bin_data.startswith('%PDF-'):
|
|
||||||
# If we got less than 2 lines,
|
|
||||||
# run OCR anyway and append to existent text
|
|
||||||
try:
|
|
||||||
f = StringIO(bin_data)
|
|
||||||
pdf = pyPdf.PdfFileReader(f)
|
|
||||||
if pdf.getNumPages() > 1:
|
|
||||||
for pagenum in range(0, pdf.getNumPages()):
|
|
||||||
_logger.info('OCR PDF "%s" page %d/%d...',
|
|
||||||
self.datas_fname,
|
|
||||||
pagenum + 1,
|
|
||||||
pdf.getNumPages())
|
|
||||||
pdf_image = _convert_pdf_page_to_image(self, pdf,
|
|
||||||
pagenum)
|
|
||||||
index_content = self._index_ocr(pdf_image)
|
|
||||||
buf = u'%s\n-- %d --\n%s' % (
|
|
||||||
buf, pagenum + 1, index_content.decode('utf8'))
|
|
||||||
else:
|
|
||||||
pdf_image = convert_bin_to_image(self, bin_data)
|
|
||||||
index_content = self._index_ocr(pdf_image)
|
|
||||||
buf = u'%s\n%s' % (buf, index_content.decode('utf8'))
|
|
||||||
except Exception as e:
|
|
||||||
_logger.error('Error converting PDF to image: %s', e)
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
buf = _MARKER_PHRASE
|
|
||||||
return buf
|
|
||||||
|
|
||||||
@api.model
|
|
||||||
def _ocr_cron(self):
|
|
||||||
for this in self.with_context(document_ocr_force=True).search(
|
|
||||||
[('index_content', '=', _MARKER_PHRASE)]):
|
|
||||||
if not this.datas:
|
|
||||||
continue
|
|
||||||
index_content = this._index(
|
|
||||||
this.datas.decode('base64'), this.datas_fname, this.mimetype)
|
|
||||||
this.write({
|
|
||||||
'index_content': index_content,
|
|
||||||
})
|
|
Binary file not shown.
Before Width: | Height: | Size: 9.2 KiB |
@ -1,5 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# © 2016 Therp BV <http://therp.nl>
|
|
||||||
# © 2017 ThinkOpen Solutions <https://tkobr.com>
|
|
||||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
|
||||||
from . import test_document_ocr
|
|
@ -1,62 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
# © 2016 Therp BV <http://therp.nl>
|
|
||||||
# © 2017 ThinkOpen Solutions <https://tkobr.com>
|
|
||||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
|
||||||
from StringIO import StringIO
|
|
||||||
|
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
|
||||||
from PIL import PdfImagePlugin, PalmImagePlugin # noqa # pylint: disable=unused-import
|
|
||||||
from odoo.tests.common import TransactionCase
|
|
||||||
|
|
||||||
from ..models.ir_attachment import _MARKER_PHRASE
|
|
||||||
|
|
||||||
|
|
||||||
class TestDocumentOcr(TransactionCase):
|
|
||||||
def test_document_ocr(self):
|
|
||||||
self.env['ir.config_parameter'].set_param(
|
|
||||||
'document_ocr.synchronous', 'True')
|
|
||||||
test_image = Image.new('RGB', (200, 30))
|
|
||||||
draw = ImageDraw.Draw(test_image)
|
|
||||||
draw.text((3, 3), "Hello world", font=ImageFont.truetype(
|
|
||||||
'/usr/share/fonts/truetype/inconsolata/Inconsolata.otf', 24))
|
|
||||||
# test a plain image
|
|
||||||
data = StringIO()
|
|
||||||
test_image.save(data, 'png')
|
|
||||||
attachment = self.env['ir.attachment'].create({
|
|
||||||
'name': 'testattachment',
|
|
||||||
'datas_fname': 'test_png.pdf'})
|
|
||||||
result = attachment._index(
|
|
||||||
data.getvalue(), 'test.png', None)
|
|
||||||
self.assertEqual(result.strip(), 'Hello world')
|
|
||||||
# should also work for pdfs
|
|
||||||
data = StringIO()
|
|
||||||
test_image.save(data, 'pdf', resolution=300)
|
|
||||||
attachment = self.env['ir.attachment'].create({
|
|
||||||
'name': 'testattachment',
|
|
||||||
'datas_fname': 'test_pdf.pdf'})
|
|
||||||
result = attachment._index(
|
|
||||||
data.getvalue(), 'test.pdf', None)
|
|
||||||
self.assertEqual(result.strip(), 'Hello world')
|
|
||||||
# check cron
|
|
||||||
self.env['ir.config_parameter'].set_param(
|
|
||||||
'document_ocr.synchronous', 'False')
|
|
||||||
attachment = self.env['ir.attachment'].create({
|
|
||||||
'name': 'testattachment',
|
|
||||||
'datas_fname': 'test_cron.pdf',
|
|
||||||
'datas': data.getvalue().encode('base64'),
|
|
||||||
})
|
|
||||||
self.assertEqual(attachment.index_content, _MARKER_PHRASE)
|
|
||||||
attachment._ocr_cron()
|
|
||||||
self.assertEqual(attachment.index_content.strip(), 'Hello world')
|
|
||||||
# and for an unreadable image, we expect an empty string
|
|
||||||
self.env['ir.config_parameter'].set_param(
|
|
||||||
'document_ocr.synchronous', 'True')
|
|
||||||
data = StringIO()
|
|
||||||
test_image = Image.new('1', (200, 30))
|
|
||||||
test_image.save(data, 'palm')
|
|
||||||
attachment = self.env['ir.attachment'].create({
|
|
||||||
'name': 'testattachment',
|
|
||||||
'datas_fname': 'test_err.palm'})
|
|
||||||
result = attachment._index(
|
|
||||||
data.getvalue(), 'test.palm', None)
|
|
||||||
self.assertEqual(result, '')
|
|
@ -1,43 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<odoo>
|
|
||||||
<!-- Attachment -->
|
|
||||||
<record id="view_attachment_form" model="ir.ui.view">
|
|
||||||
<field name="model">ir.attachment</field>
|
|
||||||
<field name="inherit_id" ref="base.view_attachment_form"/>
|
|
||||||
<field name="arch" type="xml">
|
|
||||||
<xpath expr="(//sheet/group/group)[last()]" position="attributes">
|
|
||||||
<attribute name="invisible">1</attribute>
|
|
||||||
</xpath>
|
|
||||||
<xpath expr="(//sheet/group/group)[last()]" position="before">
|
|
||||||
<group groups="base.group_no_one" string="Indexed Content" colspan="4">
|
|
||||||
<field name="index_content_rel" readonly="1" nolabel="1"/>
|
|
||||||
</group>
|
|
||||||
</xpath>
|
|
||||||
<field name="mimetype" position="after">
|
|
||||||
<field name="store_fname" invisible="1"/>
|
|
||||||
<field name="language"/>
|
|
||||||
</field>
|
|
||||||
</field>
|
|
||||||
</record>
|
|
||||||
<record id="view_attachment_tree" model="ir.ui.view">
|
|
||||||
<field name="model">ir.attachment</field>
|
|
||||||
<field name="inherit_id" ref="base.view_attachment_tree"/>
|
|
||||||
<field name="arch" type="xml">
|
|
||||||
<field name="type" position="after">
|
|
||||||
<field name="language"/>
|
|
||||||
</field>
|
|
||||||
</field>
|
|
||||||
</record>
|
|
||||||
<record id="view_attachment_search" model="ir.ui.view">
|
|
||||||
<field name="model">ir.attachment</field>
|
|
||||||
<field name="inherit_id" ref="base.view_attachment_search"/>
|
|
||||||
<field name="arch" type="xml">
|
|
||||||
<field name="name" position="after">
|
|
||||||
<field name="language"/>
|
|
||||||
</field>
|
|
||||||
<filter name="owner" position="after">
|
|
||||||
<filter string="Language" domain="[]" context="{'group_by':'language'}" groups="base.group_no_one"/>
|
|
||||||
</filter>
|
|
||||||
</field>
|
|
||||||
</record>
|
|
||||||
</odoo>
|
|
Loading…
Reference in New Issue
Block a user