mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-26 18:38:41 -06:00
[ENH] Add module document_rtf_index.
This commit is contained in:
parent
da0c01345b
commit
2f7bf15718
37
document_rtf_index/README.rst
Normal file
37
document_rtf_index/README.rst
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
Index rtf documents
|
||||||
|
===================
|
||||||
|
|
||||||
|
Indexing rtf documents can take a long time, especially when they contain
|
||||||
|
images. This module will convert rtf documents to only the text contents and
|
||||||
|
index that text.
|
||||||
|
|
||||||
|
Usage
|
||||||
|
=====
|
||||||
|
|
||||||
|
Just installing the module will register the rtf indexer. No further user
|
||||||
|
action or configuration required.
|
||||||
|
|
||||||
|
Credits
|
||||||
|
=======
|
||||||
|
|
||||||
|
Contributors
|
||||||
|
------------
|
||||||
|
|
||||||
|
* Ronald Portier <ronald@therp.nl>
|
||||||
|
* Icon courtesy of http://www.picol.org (refresh.svg) and
|
||||||
|
https://github.com/odoo/odoo/blob/8.0/addons/knowledge/static/description/icon.png
|
||||||
|
|
||||||
|
Maintainer
|
||||||
|
----------
|
||||||
|
|
||||||
|
.. image:: http://odoo-community.org/logo.png
|
||||||
|
:alt: Odoo Community Association
|
||||||
|
:target: http://odoo-community.org
|
||||||
|
|
||||||
|
This module is maintained by the OCA.
|
||||||
|
|
||||||
|
OCA, or the Odoo Community Association, is a nonprofit organization whose
|
||||||
|
mission is to support the collaborative development of Odoo features and
|
||||||
|
promote its widespread use.
|
||||||
|
|
||||||
|
To contribute to this module, please visit http://odoo-community.org.
|
4
document_rtf_index/__init__.py
Normal file
4
document_rtf_index/__init__.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# © 2016 Therp BV <http://therp.nl>
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
from . import std_index
|
23
document_rtf_index/__openerp__.py
Normal file
23
document_rtf_index/__openerp__.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# © 2016 Therp BV <http://therp.nl>
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
{
|
||||||
|
"name": "Index rtf documents",
|
||||||
|
"version": "8.0.1.0.0",
|
||||||
|
"author": "Therp BV, Odoo Community Association (OCA)",
|
||||||
|
"license": "AGPL-3",
|
||||||
|
"category": "Knowledge Management",
|
||||||
|
"summary": "Index rtf documents",
|
||||||
|
"depends": [
|
||||||
|
'document',
|
||||||
|
],
|
||||||
|
"data": [],
|
||||||
|
"auto_install": False,
|
||||||
|
"installable": True,
|
||||||
|
"application": False,
|
||||||
|
"external_dependencies": {
|
||||||
|
'python': [
|
||||||
|
'pyth',
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}
|
BIN
document_rtf_index/static/description/icon.png
Normal file
BIN
document_rtf_index/static/description/icon.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
61
document_rtf_index/std_index.py
Normal file
61
document_rtf_index/std_index.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# © 2016 Therp BV <http://therp.nl>
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
import StringIO
|
||||||
|
|
||||||
|
from pyth import document
|
||||||
|
from pyth.plugins.rtf15.reader import Rtf15Reader
|
||||||
|
from pyth.plugins.plaintext.writer import PlaintextWriter
|
||||||
|
|
||||||
|
from openerp.addons.document.content_index import indexer, cntIndex
|
||||||
|
from openerp.addons.document.std_index import _to_unicode
|
||||||
|
|
||||||
|
|
||||||
|
def improved_paragraph(self, paragraph, prefix=""):
|
||||||
|
"""Override method to insert image ignoring code."""
|
||||||
|
content = []
|
||||||
|
for text in paragraph.content:
|
||||||
|
# Begin patch =========\
|
||||||
|
if text.__class__ is document.Image:
|
||||||
|
continue
|
||||||
|
# End patch ===========/
|
||||||
|
content.append(u"".join(text.content))
|
||||||
|
content = u"".join(content).encode("utf-8")
|
||||||
|
|
||||||
|
for line in content.split("\n"):
|
||||||
|
self.target.write(" " * self.indent)
|
||||||
|
self.target.write(prefix)
|
||||||
|
self.target.write(line)
|
||||||
|
self.target.write("\n")
|
||||||
|
if prefix: prefix = " "
|
||||||
|
|
||||||
|
|
||||||
|
PlaintextWriter.paragraph = improved_paragraph
|
||||||
|
|
||||||
|
|
||||||
|
class RtfDoc(indexer):
|
||||||
|
"""Index Rich Text Format (RTF) files."""
|
||||||
|
|
||||||
|
def _getMimeTypes(self):
|
||||||
|
return [
|
||||||
|
'application/rtf',
|
||||||
|
'application/x-rtf',
|
||||||
|
'text/rtf',
|
||||||
|
'text/richtext',
|
||||||
|
]
|
||||||
|
|
||||||
|
def _getExtensions(self):
|
||||||
|
return [
|
||||||
|
'.rtf',
|
||||||
|
]
|
||||||
|
|
||||||
|
def _doIndexContent(self, content):
|
||||||
|
"""Just get text contents of rtf file."""
|
||||||
|
s = StringIO.StringIO(content)
|
||||||
|
r = Rtf15Reader.read(s) # r will be pyth.document.Document
|
||||||
|
s.close()
|
||||||
|
w = PlaintextWriter.write(r) # w will be cStringIO.StringO
|
||||||
|
result = _to_unicode(w.getvalue())
|
||||||
|
return result
|
||||||
|
|
||||||
|
cntIndex.register(RtfDoc())
|
15022
document_rtf_index/test_files/test_with_cat_image.rtf
Normal file
15022
document_rtf_index/test_files/test_with_cat_image.rtf
Normal file
File diff suppressed because it is too large
Load Diff
4
document_rtf_index/tests/__init__.py
Normal file
4
document_rtf_index/tests/__init__.py
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# © 2016 Therp BV <http://therp.nl>
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
from . import test_rtf_index
|
28
document_rtf_index/tests/test_rtf_index.py
Normal file
28
document_rtf_index/tests/test_rtf_index.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# © 2016 Therp BV <http://therp.nl>
|
||||||
|
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||||
|
import base64
|
||||||
|
from openerp.tests.common import TransactionCase
|
||||||
|
from openerp.modules.module import get_module_resource
|
||||||
|
|
||||||
|
|
||||||
|
class TestIndexRtf(TransactionCase):
|
||||||
|
|
||||||
|
def test_index_rtf(self):
|
||||||
|
"""Test if the indexer indexes just the text in rtf documents."""
|
||||||
|
# we do this to avoid error messages about word files in demo data
|
||||||
|
self.env['ir.attachment'].search([]).unlink()
|
||||||
|
# Now take rather large rtf test file, with only few actual words:
|
||||||
|
rtf_path = get_module_resource(
|
||||||
|
'document_rtf_index',
|
||||||
|
'test_files',
|
||||||
|
'test_with_cat_image.rtf'
|
||||||
|
)
|
||||||
|
rtf_file = open(rtf_path, 'rb').read().encode('base64')
|
||||||
|
att1 = self.env['ir.attachment'].create({
|
||||||
|
'name': 'test_with_cat_image.rtf',
|
||||||
|
'datas_fname': 'test_with_cat_image.rtf',
|
||||||
|
'datas': rtf_file,
|
||||||
|
})
|
||||||
|
self.assertEqual(att1.file_type, 'application/rtf')
|
||||||
|
self.assertEqual(att1.index_content[:16], 'Hello rtf world!')
|
Loading…
Reference in New Issue
Block a user