From 1125d299af5d8efcf6286e02b28f162e9d042779 Mon Sep 17 00:00:00 2001 From: len Date: Thu, 15 Feb 2024 11:19:29 +0100 Subject: [PATCH] [ADD] attachment_indexation_textract --- attachment_indexation_textract/README.rst | 5 +++ attachment_indexation_textract/__init__.py | 4 +++ .../__manifest__.py | 20 +++++++++++ .../models/__init__.py | 4 +++ .../models/ir_attachment.py | 36 +++++++++++++++++++ .../addons/attachment_indexation_textract | 1 + setup/attachment_indexation_textract/setup.py | 6 ++++ 7 files changed, 76 insertions(+) create mode 100644 attachment_indexation_textract/README.rst create mode 100644 attachment_indexation_textract/__init__.py create mode 100644 attachment_indexation_textract/__manifest__.py create mode 100644 attachment_indexation_textract/models/__init__.py create mode 100644 attachment_indexation_textract/models/ir_attachment.py create mode 120000 setup/attachment_indexation_textract/odoo/addons/attachment_indexation_textract create mode 100644 setup/attachment_indexation_textract/setup.py diff --git a/attachment_indexation_textract/README.rst b/attachment_indexation_textract/README.rst new file mode 100644 index 00000000..97eccaab --- /dev/null +++ b/attachment_indexation_textract/README.rst @@ -0,0 +1,5 @@ +====================================================== +Attachments List and Document Indexation with Textract +====================================================== + +Module to index documents with Textract; install optional dependencies to support more file formats. Supported formats include .xls, .doc with antiword, etc. diff --git a/attachment_indexation_textract/__init__.py b/attachment_indexation_textract/__init__.py new file mode 100644 index 00000000..ada0d667 --- /dev/null +++ b/attachment_indexation_textract/__init__.py @@ -0,0 +1,4 @@ +# Copyright 2023 len-foss/Financial Way +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from . import models diff --git a/attachment_indexation_textract/__manifest__.py b/attachment_indexation_textract/__manifest__.py new file mode 100644 index 00000000..b3fdfb74 --- /dev/null +++ b/attachment_indexation_textract/__manifest__.py @@ -0,0 +1,20 @@ +# Copyright 2023 len-foss/Financial Way +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). +{ + "name": "Attachments Indexation with Textract", + "category": "Hidden/Tools", + "version": "16.0.1.0.0", + "summary": "Attachments List and Document Indexation with PyMuPDF", + "author": "len-foss/FinancialWay,Odoo Community Association (OCA)", + "website": "https://github.com/OCA/knowledge", + "license": "AGPL-3", + "depends": ["attachment_indexation"], + "auto_install": True, + "installable": True, + "data": [], + "assets": {}, + "external_dependencies": { + "python": ["textract"], + "bin": ["antiword"], + }, +} diff --git a/attachment_indexation_textract/models/__init__.py b/attachment_indexation_textract/models/__init__.py new file mode 100644 index 00000000..f407ef53 --- /dev/null +++ b/attachment_indexation_textract/models/__init__.py @@ -0,0 +1,4 @@ +# Copyright 2023 len-foss/Financial Way +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +from . import ir_attachment diff --git a/attachment_indexation_textract/models/ir_attachment.py b/attachment_indexation_textract/models/ir_attachment.py new file mode 100644 index 00000000..74055488 --- /dev/null +++ b/attachment_indexation_textract/models/ir_attachment.py @@ -0,0 +1,36 @@ +# Copyright 2023 len-foss/Financial Way +# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). + +import logging +import mimetypes +import tempfile + +import textract + +from odoo import models + +_logger = logging.getLogger(__name__) + + +class IrAttachment(models.Model): + _inherit = "ir.attachment" + + def _index(self, bin_data, mimetype, checksum=None): + """Index documents with textract if available""" + if mimetype != "application/pdf": # mupdf is better + buf = self.with_context(mimetype=mimetype)._index_textract(bin_data) + return buf or super()._index(bin_data, mimetype, checksum=checksum) + + def _index_textract(self, bin_data): + """Index documents with textract if available""" + buf = "" + try: + mimetype = self.env.context.get("mimetype") + extension = mimetypes.guess_extension(mimetype) + with tempfile.NamedTemporaryFile(suffix=extension or "") as tmp_file: + tmp_file.write(bin_data) + file_path = tmp_file.name + buf = textract.process(file_path) + except Exception: + _logger.info(Exception, exc_info=True) + return buf diff --git a/setup/attachment_indexation_textract/odoo/addons/attachment_indexation_textract b/setup/attachment_indexation_textract/odoo/addons/attachment_indexation_textract new file mode 120000 index 00000000..bbbeb9e2 --- /dev/null +++ b/setup/attachment_indexation_textract/odoo/addons/attachment_indexation_textract @@ -0,0 +1 @@ +../../../../attachment_indexation_textract \ No newline at end of file diff --git a/setup/attachment_indexation_textract/setup.py b/setup/attachment_indexation_textract/setup.py new file mode 100644 index 00000000..28c57bb6 --- /dev/null +++ b/setup/attachment_indexation_textract/setup.py @@ -0,0 +1,6 @@ +import setuptools + +setuptools.setup( + setup_requires=['setuptools-odoo'], + odoo_addon=True, +)