diff --git a/document_ocr/models/ir_attachment.py b/document_ocr/models/ir_attachment.py index efbd9b18..18102d52 100644 --- a/document_ocr/models/ir_attachment.py +++ b/document_ocr/models/ir_attachment.py @@ -177,6 +177,7 @@ class IrAttachment(models.Model): return content def _index_ocr(self, bin_data): + _logger.info('OCR IMAGE "%s"...', self.datas_fname) process = subprocess.Popen( ['tesseract', 'stdin', 'stdout', '-l', self.language], stdin=subprocess.PIPE, stdout=subprocess.PIPE, @@ -237,7 +238,6 @@ class IrAttachment(models.Model): buf = u'%s\n-- %d --\n%s' % ( buf, pagenum + 1, index_content.decode('utf8')) else: - _logger.info('OCR PDF "%s"...', self.datas_fname) pdf_image = convert_bin_to_image(self, bin_data) index_content = self._index_ocr(pdf_image) buf = u'%s\n%s' % (buf, index_content.decode('utf8')) diff --git a/document_ocr/tests/test_document_ocr.py b/document_ocr/tests/test_document_ocr.py index 3d4cf69f..1d1a5490 100644 --- a/document_ocr/tests/test_document_ocr.py +++ b/document_ocr/tests/test_document_ocr.py @@ -23,13 +23,17 @@ class TestDocumentOcr(TransactionCase): data = StringIO() test_image.save(data, 'png') attachment = self.env['ir.attachment'].create({ - 'name': 'testattachment'}) + 'name': 'testattachment', + 'datas_fname': 'test_png.pdf'}) result = attachment._index( data.getvalue(), 'test.png', None) self.assertEqual(result.strip(), 'Hello world') # should also work for pdfs data = StringIO() test_image.save(data, 'pdf', resolution=300) + attachment = self.env['ir.attachment'].create({ + 'name': 'testattachment', + 'datas_fname': 'test_pdf.pdf'}) result = attachment._index( data.getvalue(), 'test.pdf', None) self.assertEqual(result.strip(), 'Hello world') @@ -38,6 +42,7 @@ class TestDocumentOcr(TransactionCase): 'document_ocr.synchronous', 'False') attachment = self.env['ir.attachment'].create({ 'name': 'testattachment', + 'datas_fname': 'test_cron.pdf', 'datas': data.getvalue().encode('base64'), }) self.assertEqual(attachment.index_content, _MARKER_PHRASE) @@ -49,6 +54,9 @@ class TestDocumentOcr(TransactionCase): data = StringIO() test_image = Image.new('1', (200, 30)) test_image.save(data, 'palm') + attachment = self.env['ir.attachment'].create({ + 'name': 'testattachment', + 'datas_fname': 'test_err.palm'}) result = attachment._index( data.getvalue(), 'test.palm', None) self.assertEqual(result, '')