mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-28 03:16:29 -06:00
[ADD] attachment_indexation_mupdf
This commit is contained in:
parent
f31245a448
commit
7f394be9c5
5
attachment_indexation_mupdf/README.rst
Normal file
5
attachment_indexation_mupdf/README.rst
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
=====================================================
|
||||||
|
Attachments List and Document Indexation with PyMuPDF
|
||||||
|
=====================================================
|
||||||
|
|
||||||
|
Module to index pdf document using state-of-the-art library.
|
4
attachment_indexation_mupdf/__init__.py
Normal file
4
attachment_indexation_mupdf/__init__.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# Copyright 2023 len-foss/Financial Way
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
|
||||||
|
from . import models
|
17
attachment_indexation_mupdf/__manifest__.py
Normal file
17
attachment_indexation_mupdf/__manifest__.py
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
# Copyright 2023 len-foss/Financial Way
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
{
|
||||||
|
"name": "Attachments List and Document Indexation with PyMuPDF",
|
||||||
|
"category": "Hidden/Tools",
|
||||||
|
"version": "16.0.0.0.0",
|
||||||
|
"summary": "Attachments List and Document Indexation with PyMuPDF",
|
||||||
|
"author": "len-foss/FinancialWay,Odoo Community Association (OCA)",
|
||||||
|
"website": "https://github.com/OCA/knowledge",
|
||||||
|
"license": "AGPL-3",
|
||||||
|
"depends": ["attachment_indexation"],
|
||||||
|
"auto_install": True,
|
||||||
|
"installable": True,
|
||||||
|
"data": [],
|
||||||
|
"assets": {},
|
||||||
|
"external_dependencies": {"python": ["PyMuPDF"]},
|
||||||
|
}
|
4
attachment_indexation_mupdf/models/__init__.py
Normal file
4
attachment_indexation_mupdf/models/__init__.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# Copyright 2023 len-foss/Financial Way
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
|
||||||
|
from . import ir_attachment
|
36
attachment_indexation_mupdf/models/ir_attachment.py
Normal file
36
attachment_indexation_mupdf/models/ir_attachment.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
# Copyright 2023 len-foss/Financial Way
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
|
||||||
|
import io
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from odoo import models
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import fitz
|
||||||
|
except ImportError:
|
||||||
|
fitz = None
|
||||||
|
_logger.warning(
|
||||||
|
"Attachment indexation of PDF documents is unavailable"
|
||||||
|
"because PyMuPDF cannot be loaded."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class IrAttachment(models.Model):
|
||||||
|
_inherit = "ir.attachment"
|
||||||
|
|
||||||
|
def _index_pdf(self, bin_data):
|
||||||
|
"""Index PDF documents with MuPDF if available"""
|
||||||
|
if fitz is None:
|
||||||
|
return super()._index_pdf(bin_data)
|
||||||
|
buf = ""
|
||||||
|
try:
|
||||||
|
f = io.BytesIO(bin_data)
|
||||||
|
doc = fitz.open(stream=f, filetype="pdf")
|
||||||
|
for page in doc:
|
||||||
|
buf += page.get_text()
|
||||||
|
except Exception: # pylint: disable=except-pass
|
||||||
|
pass
|
||||||
|
return buf
|
2
attachment_indexation_mupdf/tests/__init__.py
Normal file
2
attachment_indexation_mupdf/tests/__init__.py
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
from . import test_indexation
|
BIN
attachment_indexation_mupdf/tests/files/test_content.pdf
Normal file
BIN
attachment_indexation_mupdf/tests/files/test_content.pdf
Normal file
Binary file not shown.
27
attachment_indexation_mupdf/tests/test_indexation.py
Normal file
27
attachment_indexation_mupdf/tests/test_indexation.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# Copyright 2023 len-foss/Financial Way
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
|
||||||
|
import os
|
||||||
|
from unittest import skipIf
|
||||||
|
|
||||||
|
from odoo.tests.common import TransactionCase, tagged
|
||||||
|
|
||||||
|
directory = os.path.dirname(__file__)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import fitz
|
||||||
|
except ImportError:
|
||||||
|
fitz = None
|
||||||
|
|
||||||
|
|
||||||
|
@tagged("post_install", "-at_install")
|
||||||
|
class TestCaseIndexation(TransactionCase):
|
||||||
|
@skipIf(fitz is None, "PyMyPDF is not installed")
|
||||||
|
def test_attachment_pdf_indexation(self):
|
||||||
|
with open(os.path.join(directory, "files", "test_content.pdf"), "rb") as file:
|
||||||
|
pdf = file.read()
|
||||||
|
text = self.env["ir.attachment"]._index(pdf, "application/pdf")
|
||||||
|
# note that the whitespace character is not the same as with pdfminer
|
||||||
|
self.assertEqual(
|
||||||
|
text, "TestContent!!\n", "the index content should be correct"
|
||||||
|
)
|
@ -0,0 +1 @@
|
|||||||
|
../../../../attachment_indexation_mupdf
|
6
setup/attachment_indexation_mupdf/setup.py
Normal file
6
setup/attachment_indexation_mupdf/setup.py
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
import setuptools
|
||||||
|
|
||||||
|
setuptools.setup(
|
||||||
|
setup_requires=['setuptools-odoo'],
|
||||||
|
odoo_addon=True,
|
||||||
|
)
|
Loading…
Reference in New Issue
Block a user