mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-26 10:28:40 -06:00
Fixes Flake8 errors
Add requirements.txt for tesseract
This commit is contained in:
parent
a58c40621c
commit
caf5856266
@ -4,7 +4,9 @@
|
||||
{
|
||||
"name": "OCR for Documents",
|
||||
"version": "10.0.1.0.0",
|
||||
"author": "Therp BV, Odoo Community Association (OCA), ThinkOpen Solutions Brasil",
|
||||
"author": "Therp BV,"
|
||||
" Odoo Community Association (OCA),"
|
||||
" ThinkOpen Solutions Brasil",
|
||||
"license": "AGPL-3",
|
||||
"category": "Knowledge Management",
|
||||
"summary": "Run character recognition on uploaded files",
|
||||
|
@ -9,7 +9,7 @@ import subprocess
|
||||
from StringIO import StringIO
|
||||
|
||||
import pyPdf
|
||||
from odoo import api, fields, models
|
||||
from odoo import api, fields, models, _
|
||||
from odoo.exceptions import UserError
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
@ -126,11 +126,14 @@ OCR_LANGUAGE = [('afr', 'Afrikaans'),
|
||||
class IrAttachment(models.Model):
|
||||
_inherit = 'ir.attachment'
|
||||
|
||||
language = fields.Selection(OCR_LANGUAGE, 'Language')
|
||||
language = fields.Selection(OCR_LANGUAGE, _('Language'))
|
||||
# We need to redefine index_content field to be able to update it
|
||||
# on the onchange_language()
|
||||
index_content = fields.Text('Indexed Content', readonly=False, prefetch=False)
|
||||
index_content_rel = fields.Text(related='index_content', string='Indexed Content Rel')
|
||||
index_content = fields.Text(_('Indexed Content'),
|
||||
readonly=False,
|
||||
prefetch=False)
|
||||
index_content_rel = fields.Text(related='index_content',
|
||||
string=_('Indexed Content Rel'))
|
||||
|
||||
@api.onchange('language')
|
||||
def onchange_language(self):
|
||||
@ -139,11 +142,11 @@ class IrAttachment(models.Model):
|
||||
stderr=subprocess.PIPE)
|
||||
stdout, stderr = process.communicate()
|
||||
if self.language not in stderr.split('\n'):
|
||||
raise UserError(
|
||||
raise UserError(_(
|
||||
"Language not installed."
|
||||
" Please ask your system administrator to"
|
||||
" install tesseract '%s' language." %
|
||||
self.language)
|
||||
self.language))
|
||||
if self.store_fname:
|
||||
bin_data = self._file_read(self.store_fname)
|
||||
else:
|
||||
@ -216,7 +219,8 @@ class IrAttachment(models.Model):
|
||||
if synchr:
|
||||
buf = super(IrAttachment, self)._index_pdf(bin_data)
|
||||
if len(buf.split('\n')) < 2 and bin_data.startswith('%PDF-'):
|
||||
# If we got less than 2 lines, run OCR and append to existent text
|
||||
# If we got less than 2 lines,
|
||||
# run OCR anyway and append to existent text
|
||||
try:
|
||||
f = StringIO(bin_data)
|
||||
pdf = pyPdf.PdfFileReader(f)
|
||||
|
@ -5,7 +5,7 @@
|
||||
from StringIO import StringIO
|
||||
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
from odoo.addons.document_ocr.models.ir_attachment import _MARKER_PHRASE
|
||||
from models.ir_attachment import _MARKER_PHRASE
|
||||
from odoo.tests.common import TransactionCase
|
||||
|
||||
|
||||
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
tesseract
|
Loading…
Reference in New Issue
Block a user