From e5b00ee0d2c62b7b49ba9adb127f64602d34dfb3 Mon Sep 17 00:00:00 2001 From: Holger Brunn Date: Tue, 20 Jun 2017 15:06:14 +0200 Subject: [PATCH 1/3] [ADD] cap the amount of documents to ocr per cronjob run --- document_ocr/data/ir_cron.xml | 1 + document_ocr/models/ir_attachment.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/document_ocr/data/ir_cron.xml b/document_ocr/data/ir_cron.xml index f69d151a..1ea8cd20 100644 --- a/document_ocr/data/ir_cron.xml +++ b/document_ocr/data/ir_cron.xml @@ -8,6 +8,7 @@ ir.attachment _ocr_cron -1 + (100,) diff --git a/document_ocr/models/ir_attachment.py b/document_ocr/models/ir_attachment.py index ec161712..7a69a718 100644 --- a/document_ocr/models/ir_attachment.py +++ b/document_ocr/models/ir_attachment.py @@ -70,10 +70,10 @@ class IrAttachment(models.Model): return StringIO(stdout) @api.model - def _ocr_cron(self): + def _ocr_cron(self, limit=0): for this in self.with_context(document_ocr_force=True).search([ ('index_content', '=', _MARKER_PHRASE), - ]): + ], limit=limit): if not this.datas: continue file_type, index_content = this._index( From 60fb31e29f067c9c9fa4e5236e633ecdd868a5b5 Mon Sep 17 00:00:00 2001 From: Holger Brunn Date: Tue, 20 Jun 2017 15:09:28 +0200 Subject: [PATCH 2/3] [FIX] ignore files with unknown mimetype --- document_ocr/models/ir_attachment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/document_ocr/models/ir_attachment.py b/document_ocr/models/ir_attachment.py index 7a69a718..1dfb7231 100644 --- a/document_ocr/models/ir_attachment.py +++ b/document_ocr/models/ir_attachment.py @@ -18,7 +18,7 @@ class IrAttachment(models.Model): def _index(self, data, datas_fname, file_type): mimetype, content = super(IrAttachment, self)._index( data, datas_fname, file_type) - if not content or content == 'image': + if mimetype and (not content or content == 'image'): has_synchr_param = self.env['ir.config_parameter'].get_param( 'document_ocr.synchronous', 'False') == 'True' has_force_flag = self.env.context.get('document_ocr_force') From f19573a7a08cfdb0cfc7cfe57be33c79dfb41a63 Mon Sep 17 00:00:00 2001 From: Holger Brunn Date: Sun, 4 Feb 2018 08:02:54 +0100 Subject: [PATCH 3/3] [FIX] use png as for pillow interchange --- document_ocr/models/ir_attachment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/document_ocr/models/ir_attachment.py b/document_ocr/models/ir_attachment.py index 1dfb7231..a167cc9e 100644 --- a/document_ocr/models/ir_attachment.py +++ b/document_ocr/models/ir_attachment.py @@ -42,7 +42,7 @@ class IrAttachment(models.Model): else: image_data = StringIO() try: - Image.open(StringIO(data)).save(image_data, 'tiff', + Image.open(StringIO(data)).save(image_data, 'png', dpi=(dpi, dpi)) except IOError: _logger.exception('Failed to OCR image')