Fixes Flake8 errors

Add requirements.txt for tesseract
This commit is contained in:
Carlos Almeida 2017-06-02 10:03:34 +01:00
parent a58c40621c
commit caf5856266
4 changed files with 16 additions and 9 deletions

View File

@ -4,7 +4,9 @@
{ {
"name": "OCR for Documents", "name": "OCR for Documents",
"version": "10.0.1.0.0", "version": "10.0.1.0.0",
"author": "Therp BV, Odoo Community Association (OCA), ThinkOpen Solutions Brasil", "author": "Therp BV,"
" Odoo Community Association (OCA),"
" ThinkOpen Solutions Brasil",
"license": "AGPL-3", "license": "AGPL-3",
"category": "Knowledge Management", "category": "Knowledge Management",
"summary": "Run character recognition on uploaded files", "summary": "Run character recognition on uploaded files",

View File

@ -9,7 +9,7 @@ import subprocess
from StringIO import StringIO from StringIO import StringIO
import pyPdf import pyPdf
from odoo import api, fields, models from odoo import api, fields, models, _
from odoo.exceptions import UserError from odoo.exceptions import UserError
_logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
@ -126,11 +126,14 @@ OCR_LANGUAGE = [('afr', 'Afrikaans'),
class IrAttachment(models.Model): class IrAttachment(models.Model):
_inherit = 'ir.attachment' _inherit = 'ir.attachment'
language = fields.Selection(OCR_LANGUAGE, 'Language') language = fields.Selection(OCR_LANGUAGE, _('Language'))
# We need to redefine index_content field to be able to update it # We need to redefine index_content field to be able to update it
# on the onchange_language() # on the onchange_language()
index_content = fields.Text('Indexed Content', readonly=False, prefetch=False) index_content = fields.Text(_('Indexed Content'),
index_content_rel = fields.Text(related='index_content', string='Indexed Content Rel') readonly=False,
prefetch=False)
index_content_rel = fields.Text(related='index_content',
string=_('Indexed Content Rel'))
@api.onchange('language') @api.onchange('language')
def onchange_language(self): def onchange_language(self):
@ -139,11 +142,11 @@ class IrAttachment(models.Model):
stderr=subprocess.PIPE) stderr=subprocess.PIPE)
stdout, stderr = process.communicate() stdout, stderr = process.communicate()
if self.language not in stderr.split('\n'): if self.language not in stderr.split('\n'):
raise UserError( raise UserError(_(
"Language not installed." "Language not installed."
" Please ask your system administrator to" " Please ask your system administrator to"
" install tesseract '%s' language." % " install tesseract '%s' language." %
self.language) self.language))
if self.store_fname: if self.store_fname:
bin_data = self._file_read(self.store_fname) bin_data = self._file_read(self.store_fname)
else: else:
@ -216,7 +219,8 @@ class IrAttachment(models.Model):
if synchr: if synchr:
buf = super(IrAttachment, self)._index_pdf(bin_data) buf = super(IrAttachment, self)._index_pdf(bin_data)
if len(buf.split('\n')) < 2 and bin_data.startswith('%PDF-'): if len(buf.split('\n')) < 2 and bin_data.startswith('%PDF-'):
# If we got less than 2 lines, run OCR and append to existent text # If we got less than 2 lines,
# run OCR anyway and append to existent text
try: try:
f = StringIO(bin_data) f = StringIO(bin_data)
pdf = pyPdf.PdfFileReader(f) pdf = pyPdf.PdfFileReader(f)

View File

@ -5,7 +5,7 @@
from StringIO import StringIO from StringIO import StringIO
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
from odoo.addons.document_ocr.models.ir_attachment import _MARKER_PHRASE from models.ir_attachment import _MARKER_PHRASE
from odoo.tests.common import TransactionCase from odoo.tests.common import TransactionCase

1
requirements.txt Normal file
View File

@ -0,0 +1 @@
tesseract