mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-13 15:34:49 -06:00
8.0 document rtf index (#110)
* [ENH] Add module document_rtf_index. * [FIX] Improvements after review. - Only load indexer when module is installed; - Protect non standard imports; - Standardized README.rst.
This commit is contained in:
parent
da0c01345b
commit
bae6af55d3
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.rtf -diff
|
@ -35,6 +35,7 @@ install:
|
||||
- git clone --depth=1 https://github.com/OCA/maintainer-quality-tools.git ${HOME}/maintainer-quality-tools
|
||||
- export PATH=${HOME}/maintainer-quality-tools/travis:${PATH}
|
||||
- pip install --upgrade paramiko
|
||||
- pip install --upgrade pyth
|
||||
- travis_install_nightly
|
||||
|
||||
script:
|
||||
|
58
document_rtf_index/README.rst
Normal file
58
document_rtf_index/README.rst
Normal file
@ -0,0 +1,58 @@
|
||||
.. image:: https://img.shields.io/badge/licence-AGPL--3-blue.svg
|
||||
:target: http://www.gnu.org/licenses/agpl-3.0-standalone.html
|
||||
:alt: License: AGPL-3
|
||||
|
||||
Index rtf documents
|
||||
===================
|
||||
|
||||
Indexing rtf documents can take a long time, especially when they contain
|
||||
images. This module will convert rtf documents to only the text contents and
|
||||
index that text.
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
Just installing the module will register the rtf indexer. No further user
|
||||
action or configuration required.
|
||||
|
||||
|
||||
.. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas
|
||||
:alt: Try me on Runbot
|
||||
:target: https://runbot.odoo-community.org/runbot/knowledge/8.0
|
||||
|
||||
.. repo_id is available in https://github.com/OCA/maintainer-tools/blob/master/tools/repos_with_ids.txt
|
||||
.. branch is "8.0" for example
|
||||
|
||||
Bug Tracker
|
||||
===========
|
||||
|
||||
Bugs are tracked on
|
||||
`GitHub Issues <https://github.com/OCA/knowledge/issues>`_.
|
||||
In case of trouble, please check there if your issue has already been
|
||||
reported. If you spotted it first, help us smashing it by providing a
|
||||
detailed and welcomed feedback.
|
||||
|
||||
Credits
|
||||
=======
|
||||
|
||||
Contributors
|
||||
------------
|
||||
|
||||
* Ronald Portier <ronald@therp.nl>
|
||||
* Icon courtesy of https://www.picol.org (refresh.svg) and
|
||||
https://github.com/odoo/odoo/blob/8.0/addons/knowledge/static/description/icon.png
|
||||
|
||||
Maintainer
|
||||
----------
|
||||
|
||||
.. image:: https://odoo-community.org/logo.png
|
||||
:alt: Odoo Community Association
|
||||
:target: https://odoo-community.org
|
||||
|
||||
This module is maintained by the OCA.
|
||||
|
||||
OCA, or the Odoo Community Association, is a nonprofit organization whose
|
||||
mission is to support the collaborative development of Odoo features and
|
||||
promote its widespread use.
|
||||
|
||||
To contribute to this module, please visit https://odoo-community.org.
|
4
document_rtf_index/__init__.py
Normal file
4
document_rtf_index/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
from . import models
|
23
document_rtf_index/__openerp__.py
Normal file
23
document_rtf_index/__openerp__.py
Normal file
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
{
|
||||
"name": "Index rtf documents",
|
||||
"version": "8.0.1.0.0",
|
||||
"author": "Therp BV, Odoo Community Association (OCA)",
|
||||
"license": "AGPL-3",
|
||||
"category": "Knowledge Management",
|
||||
"summary": "Index rtf documents",
|
||||
"depends": [
|
||||
'document',
|
||||
],
|
||||
"data": [],
|
||||
"auto_install": False,
|
||||
"installable": True,
|
||||
"application": False,
|
||||
"external_dependencies": {
|
||||
'python': [
|
||||
'pyth',
|
||||
],
|
||||
},
|
||||
}
|
4
document_rtf_index/models/__init__.py
Normal file
4
document_rtf_index/models/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
from . import ir_attachment
|
74
document_rtf_index/models/ir_attachment.py
Normal file
74
document_rtf_index/models/ir_attachment.py
Normal file
@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
import logging
|
||||
|
||||
from openerp import api, models
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IrAttachment(models.Model):
|
||||
_inherit = 'ir.attachment'
|
||||
|
||||
@api.noguess
|
||||
def _register_hook(self, cr):
|
||||
"""Only register our indexer if module is installed."""
|
||||
import StringIO
|
||||
try:
|
||||
from pyth import document
|
||||
from pyth.plugins.rtf15.reader import Rtf15Reader
|
||||
from pyth.plugins.plaintext.writer import PlaintextWriter
|
||||
except ImportError:
|
||||
_logger.warn("pyth not found, RTF indexing disabled.")
|
||||
return
|
||||
from openerp.addons.document.content_index import indexer, cntIndex
|
||||
from openerp.addons.document.std_index import _to_unicode
|
||||
|
||||
def improved_paragraph(self, paragraph, prefix=""):
|
||||
"""Override method to insert image ignoring code."""
|
||||
content = []
|
||||
for text in paragraph.content:
|
||||
# Begin patch =========\
|
||||
if text.__class__ is document.Image:
|
||||
continue
|
||||
# End patch ===========/
|
||||
content.append(u"".join(text.content))
|
||||
content = u"".join(content).encode("utf-8")
|
||||
for line in content.split("\n"):
|
||||
self.target.write(" " * self.indent)
|
||||
self.target.write(prefix)
|
||||
self.target.write(line)
|
||||
self.target.write("\n")
|
||||
if prefix:
|
||||
prefix = " "
|
||||
|
||||
PlaintextWriter.paragraph = improved_paragraph
|
||||
|
||||
class RtfDoc(indexer):
|
||||
"""Index Rich Text Format (RTF) files."""
|
||||
|
||||
def _getMimeTypes(self):
|
||||
return [
|
||||
'application/rtf',
|
||||
'application/x-rtf',
|
||||
'text/rtf',
|
||||
'text/richtext',
|
||||
]
|
||||
|
||||
def _getExtensions(self):
|
||||
return [
|
||||
'.rtf',
|
||||
]
|
||||
|
||||
def _doIndexContent(self, content):
|
||||
"""Just get text contents of rtf file."""
|
||||
s = StringIO.StringIO(content)
|
||||
r = Rtf15Reader.read(s) # r will be pyth.document.Document
|
||||
s.close()
|
||||
w = PlaintextWriter.write(r) # w will be cStringIO.StringO
|
||||
result = _to_unicode(w.getvalue())
|
||||
return result
|
||||
|
||||
cntIndex.register(RtfDoc())
|
BIN
document_rtf_index/static/description/icon.png
Normal file
BIN
document_rtf_index/static/description/icon.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
15022
document_rtf_index/test_files/test_with_cat_image.rtf
Normal file
15022
document_rtf_index/test_files/test_with_cat_image.rtf
Normal file
File diff suppressed because it is too large
Load Diff
4
document_rtf_index/tests/__init__.py
Normal file
4
document_rtf_index/tests/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
from . import test_rtf_index
|
30
document_rtf_index/tests/test_rtf_index.py
Normal file
30
document_rtf_index/tests/test_rtf_index.py
Normal file
@ -0,0 +1,30 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
from openerp.tests.common import TransactionCase
|
||||
from openerp.modules.module import get_module_resource
|
||||
|
||||
|
||||
class TestIndexRtf(TransactionCase):
|
||||
|
||||
def test_index_rtf(self):
|
||||
"""Test if the indexer indexes just the text in rtf documents."""
|
||||
attachment_model = self.env['ir.attachment']
|
||||
# Force loading of indexer (normally _register_hooks runs after tests)
|
||||
attachment_model._register_hook(self.env.cr)
|
||||
# we do this to avoid error messages about word files in demo data
|
||||
attachment_model.search([]).unlink()
|
||||
# Now take rather large rtf test file, with only few actual words:
|
||||
rtf_path = get_module_resource(
|
||||
'document_rtf_index',
|
||||
'test_files',
|
||||
'test_with_cat_image.rtf'
|
||||
)
|
||||
rtf_file = open(rtf_path, 'rb').read().encode('base64')
|
||||
att1 = self.env['ir.attachment'].create({
|
||||
'name': 'test_with_cat_image.rtf',
|
||||
'datas_fname': 'test_with_cat_image.rtf',
|
||||
'datas': rtf_file,
|
||||
})
|
||||
self.assertEqual(att1.file_type, 'application/rtf')
|
||||
self.assertEqual(att1.index_content[:16], 'Hello rtf world!')
|
Loading…
Reference in New Issue
Block a user