mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-27 10:58:41 -06:00
[ADD] attachment_indexation_textract
This commit is contained in:
parent
8d1125aa76
commit
1125d299af
5
attachment_indexation_textract/README.rst
Normal file
5
attachment_indexation_textract/README.rst
Normal file
@ -0,0 +1,5 @@
|
||||
======================================================
|
||||
Attachments List and Document Indexation with Textract
|
||||
======================================================
|
||||
|
||||
Module to index documents with Textract; install optional dependencies to support more file formats. Supported formats include .xls, .doc with antiword, etc.
|
4
attachment_indexation_textract/__init__.py
Normal file
4
attachment_indexation_textract/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
|
||||
from . import models
|
20
attachment_indexation_textract/__manifest__.py
Normal file
20
attachment_indexation_textract/__manifest__.py
Normal file
@ -0,0 +1,20 @@
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
{
|
||||
"name": "Attachments Indexation with Textract",
|
||||
"category": "Hidden/Tools",
|
||||
"version": "16.0.1.0.0",
|
||||
"summary": "Attachments List and Document Indexation with PyMuPDF",
|
||||
"author": "len-foss/FinancialWay,Odoo Community Association (OCA)",
|
||||
"website": "https://github.com/OCA/knowledge",
|
||||
"license": "AGPL-3",
|
||||
"depends": ["attachment_indexation"],
|
||||
"auto_install": True,
|
||||
"installable": True,
|
||||
"data": [],
|
||||
"assets": {},
|
||||
"external_dependencies": {
|
||||
"python": ["textract"],
|
||||
"bin": ["antiword"],
|
||||
},
|
||||
}
|
4
attachment_indexation_textract/models/__init__.py
Normal file
4
attachment_indexation_textract/models/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
|
||||
from . import ir_attachment
|
36
attachment_indexation_textract/models/ir_attachment.py
Normal file
36
attachment_indexation_textract/models/ir_attachment.py
Normal file
@ -0,0 +1,36 @@
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
|
||||
import logging
|
||||
import mimetypes
|
||||
import tempfile
|
||||
|
||||
import textract
|
||||
|
||||
from odoo import models
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IrAttachment(models.Model):
|
||||
_inherit = "ir.attachment"
|
||||
|
||||
def _index(self, bin_data, mimetype, checksum=None):
|
||||
"""Index documents with textract if available"""
|
||||
if mimetype != "application/pdf": # mupdf is better
|
||||
buf = self.with_context(mimetype=mimetype)._index_textract(bin_data)
|
||||
return buf or super()._index(bin_data, mimetype, checksum=checksum)
|
||||
|
||||
def _index_textract(self, bin_data):
|
||||
"""Index documents with textract if available"""
|
||||
buf = ""
|
||||
try:
|
||||
mimetype = self.env.context.get("mimetype")
|
||||
extension = mimetypes.guess_extension(mimetype)
|
||||
with tempfile.NamedTemporaryFile(suffix=extension or "") as tmp_file:
|
||||
tmp_file.write(bin_data)
|
||||
file_path = tmp_file.name
|
||||
buf = textract.process(file_path)
|
||||
except Exception:
|
||||
_logger.info(Exception, exc_info=True)
|
||||
return buf
|
@ -0,0 +1 @@
|
||||
../../../../attachment_indexation_textract
|
6
setup/attachment_indexation_textract/setup.py
Normal file
6
setup/attachment_indexation_textract/setup.py
Normal file
@ -0,0 +1,6 @@
|
||||
import setuptools
|
||||
|
||||
setuptools.setup(
|
||||
setup_requires=['setuptools-odoo'],
|
||||
odoo_addon=True,
|
||||
)
|
Loading…
Reference in New Issue
Block a user