mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-27 19:08:42 -06:00
[ADD] attachment_indexation_mupdf
This commit is contained in:
parent
f31245a448
commit
7f394be9c5
5
attachment_indexation_mupdf/README.rst
Normal file
5
attachment_indexation_mupdf/README.rst
Normal file
@ -0,0 +1,5 @@
|
||||
=====================================================
|
||||
Attachments List and Document Indexation with PyMuPDF
|
||||
=====================================================
|
||||
|
||||
Module to index pdf document using state-of-the-art library.
|
4
attachment_indexation_mupdf/__init__.py
Normal file
4
attachment_indexation_mupdf/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
|
||||
from . import models
|
17
attachment_indexation_mupdf/__manifest__.py
Normal file
17
attachment_indexation_mupdf/__manifest__.py
Normal file
@ -0,0 +1,17 @@
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
{
|
||||
"name": "Attachments List and Document Indexation with PyMuPDF",
|
||||
"category": "Hidden/Tools",
|
||||
"version": "16.0.0.0.0",
|
||||
"summary": "Attachments List and Document Indexation with PyMuPDF",
|
||||
"author": "len-foss/FinancialWay,Odoo Community Association (OCA)",
|
||||
"website": "https://github.com/OCA/knowledge",
|
||||
"license": "AGPL-3",
|
||||
"depends": ["attachment_indexation"],
|
||||
"auto_install": True,
|
||||
"installable": True,
|
||||
"data": [],
|
||||
"assets": {},
|
||||
"external_dependencies": {"python": ["PyMuPDF"]},
|
||||
}
|
4
attachment_indexation_mupdf/models/__init__.py
Normal file
4
attachment_indexation_mupdf/models/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
|
||||
from . import ir_attachment
|
36
attachment_indexation_mupdf/models/ir_attachment.py
Normal file
36
attachment_indexation_mupdf/models/ir_attachment.py
Normal file
@ -0,0 +1,36 @@
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
|
||||
import io
|
||||
import logging
|
||||
|
||||
from odoo import models
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import fitz
|
||||
except ImportError:
|
||||
fitz = None
|
||||
_logger.warning(
|
||||
"Attachment indexation of PDF documents is unavailable"
|
||||
"because PyMuPDF cannot be loaded."
|
||||
)
|
||||
|
||||
|
||||
class IrAttachment(models.Model):
|
||||
_inherit = "ir.attachment"
|
||||
|
||||
def _index_pdf(self, bin_data):
|
||||
"""Index PDF documents with MuPDF if available"""
|
||||
if fitz is None:
|
||||
return super()._index_pdf(bin_data)
|
||||
buf = ""
|
||||
try:
|
||||
f = io.BytesIO(bin_data)
|
||||
doc = fitz.open(stream=f, filetype="pdf")
|
||||
for page in doc:
|
||||
buf += page.get_text()
|
||||
except Exception: # pylint: disable=except-pass
|
||||
pass
|
||||
return buf
|
2
attachment_indexation_mupdf/tests/__init__.py
Normal file
2
attachment_indexation_mupdf/tests/__init__.py
Normal file
@ -0,0 +1,2 @@
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
from . import test_indexation
|
BIN
attachment_indexation_mupdf/tests/files/test_content.pdf
Normal file
BIN
attachment_indexation_mupdf/tests/files/test_content.pdf
Normal file
Binary file not shown.
27
attachment_indexation_mupdf/tests/test_indexation.py
Normal file
27
attachment_indexation_mupdf/tests/test_indexation.py
Normal file
@ -0,0 +1,27 @@
|
||||
# Copyright 2023 len-foss/Financial Way
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
|
||||
import os
|
||||
from unittest import skipIf
|
||||
|
||||
from odoo.tests.common import TransactionCase, tagged
|
||||
|
||||
directory = os.path.dirname(__file__)
|
||||
|
||||
try:
|
||||
import fitz
|
||||
except ImportError:
|
||||
fitz = None
|
||||
|
||||
|
||||
@tagged("post_install", "-at_install")
|
||||
class TestCaseIndexation(TransactionCase):
|
||||
@skipIf(fitz is None, "PyMyPDF is not installed")
|
||||
def test_attachment_pdf_indexation(self):
|
||||
with open(os.path.join(directory, "files", "test_content.pdf"), "rb") as file:
|
||||
pdf = file.read()
|
||||
text = self.env["ir.attachment"]._index(pdf, "application/pdf")
|
||||
# note that the whitespace character is not the same as with pdfminer
|
||||
self.assertEqual(
|
||||
text, "TestContent!!\n", "the index content should be correct"
|
||||
)
|
@ -0,0 +1 @@
|
||||
../../../../attachment_indexation_mupdf
|
6
setup/attachment_indexation_mupdf/setup.py
Normal file
6
setup/attachment_indexation_mupdf/setup.py
Normal file
@ -0,0 +1,6 @@
|
||||
import setuptools
|
||||
|
||||
setuptools.setup(
|
||||
setup_requires=['setuptools-odoo'],
|
||||
odoo_addon=True,
|
||||
)
|
Loading…
Reference in New Issue
Block a user