mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-26 18:38:41 -06:00
Fixes Flake8 errors
Add requirements.txt for tesseract
This commit is contained in:
parent
a58c40621c
commit
caf5856266
@ -4,7 +4,9 @@
|
|||||||
{
|
{
|
||||||
"name": "OCR for Documents",
|
"name": "OCR for Documents",
|
||||||
"version": "10.0.1.0.0",
|
"version": "10.0.1.0.0",
|
||||||
"author": "Therp BV, Odoo Community Association (OCA), ThinkOpen Solutions Brasil",
|
"author": "Therp BV,"
|
||||||
|
" Odoo Community Association (OCA),"
|
||||||
|
" ThinkOpen Solutions Brasil",
|
||||||
"license": "AGPL-3",
|
"license": "AGPL-3",
|
||||||
"category": "Knowledge Management",
|
"category": "Knowledge Management",
|
||||||
"summary": "Run character recognition on uploaded files",
|
"summary": "Run character recognition on uploaded files",
|
||||||
|
@ -9,7 +9,7 @@ import subprocess
|
|||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
|
||||||
import pyPdf
|
import pyPdf
|
||||||
from odoo import api, fields, models
|
from odoo import api, fields, models, _
|
||||||
from odoo.exceptions import UserError
|
from odoo.exceptions import UserError
|
||||||
|
|
||||||
_logger = logging.getLogger(__name__)
|
_logger = logging.getLogger(__name__)
|
||||||
@ -126,11 +126,14 @@ OCR_LANGUAGE = [('afr', 'Afrikaans'),
|
|||||||
class IrAttachment(models.Model):
|
class IrAttachment(models.Model):
|
||||||
_inherit = 'ir.attachment'
|
_inherit = 'ir.attachment'
|
||||||
|
|
||||||
language = fields.Selection(OCR_LANGUAGE, 'Language')
|
language = fields.Selection(OCR_LANGUAGE, _('Language'))
|
||||||
# We need to redefine index_content field to be able to update it
|
# We need to redefine index_content field to be able to update it
|
||||||
# on the onchange_language()
|
# on the onchange_language()
|
||||||
index_content = fields.Text('Indexed Content', readonly=False, prefetch=False)
|
index_content = fields.Text(_('Indexed Content'),
|
||||||
index_content_rel = fields.Text(related='index_content', string='Indexed Content Rel')
|
readonly=False,
|
||||||
|
prefetch=False)
|
||||||
|
index_content_rel = fields.Text(related='index_content',
|
||||||
|
string=_('Indexed Content Rel'))
|
||||||
|
|
||||||
@api.onchange('language')
|
@api.onchange('language')
|
||||||
def onchange_language(self):
|
def onchange_language(self):
|
||||||
@ -139,11 +142,11 @@ class IrAttachment(models.Model):
|
|||||||
stderr=subprocess.PIPE)
|
stderr=subprocess.PIPE)
|
||||||
stdout, stderr = process.communicate()
|
stdout, stderr = process.communicate()
|
||||||
if self.language not in stderr.split('\n'):
|
if self.language not in stderr.split('\n'):
|
||||||
raise UserError(
|
raise UserError(_(
|
||||||
"Language not installed."
|
"Language not installed."
|
||||||
" Please ask your system administrator to"
|
" Please ask your system administrator to"
|
||||||
" install tesseract '%s' language." %
|
" install tesseract '%s' language." %
|
||||||
self.language)
|
self.language))
|
||||||
if self.store_fname:
|
if self.store_fname:
|
||||||
bin_data = self._file_read(self.store_fname)
|
bin_data = self._file_read(self.store_fname)
|
||||||
else:
|
else:
|
||||||
@ -216,7 +219,8 @@ class IrAttachment(models.Model):
|
|||||||
if synchr:
|
if synchr:
|
||||||
buf = super(IrAttachment, self)._index_pdf(bin_data)
|
buf = super(IrAttachment, self)._index_pdf(bin_data)
|
||||||
if len(buf.split('\n')) < 2 and bin_data.startswith('%PDF-'):
|
if len(buf.split('\n')) < 2 and bin_data.startswith('%PDF-'):
|
||||||
# If we got less than 2 lines, run OCR and append to existent text
|
# If we got less than 2 lines,
|
||||||
|
# run OCR anyway and append to existent text
|
||||||
try:
|
try:
|
||||||
f = StringIO(bin_data)
|
f = StringIO(bin_data)
|
||||||
pdf = pyPdf.PdfFileReader(f)
|
pdf = pyPdf.PdfFileReader(f)
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
from StringIO import StringIO
|
from StringIO import StringIO
|
||||||
|
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
from odoo.addons.document_ocr.models.ir_attachment import _MARKER_PHRASE
|
from models.ir_attachment import _MARKER_PHRASE
|
||||||
from odoo.tests.common import TransactionCase
|
from odoo.tests.common import TransactionCase
|
||||||
|
|
||||||
|
|
||||||
|
1
requirements.txt
Normal file
1
requirements.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
tesseract
|
Loading…
Reference in New Issue
Block a user