mirror of
https://github.com/OCA/knowledge.git
synced 2025-07-26 02:18:40 -06:00
[ENH] Add module document_rtf_index.
This commit is contained in:
parent
da0c01345b
commit
2f7bf15718
37
document_rtf_index/README.rst
Normal file
37
document_rtf_index/README.rst
Normal file
@ -0,0 +1,37 @@
|
||||
Index rtf documents
|
||||
===================
|
||||
|
||||
Indexing rtf documents can take a long time, especially when they contain
|
||||
images. This module will convert rtf documents to only the text contents and
|
||||
index that text.
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
Just installing the module will register the rtf indexer. No further user
|
||||
action or configuration required.
|
||||
|
||||
Credits
|
||||
=======
|
||||
|
||||
Contributors
|
||||
------------
|
||||
|
||||
* Ronald Portier <ronald@therp.nl>
|
||||
* Icon courtesy of http://www.picol.org (refresh.svg) and
|
||||
https://github.com/odoo/odoo/blob/8.0/addons/knowledge/static/description/icon.png
|
||||
|
||||
Maintainer
|
||||
----------
|
||||
|
||||
.. image:: http://odoo-community.org/logo.png
|
||||
:alt: Odoo Community Association
|
||||
:target: http://odoo-community.org
|
||||
|
||||
This module is maintained by the OCA.
|
||||
|
||||
OCA, or the Odoo Community Association, is a nonprofit organization whose
|
||||
mission is to support the collaborative development of Odoo features and
|
||||
promote its widespread use.
|
||||
|
||||
To contribute to this module, please visit http://odoo-community.org.
|
4
document_rtf_index/__init__.py
Normal file
4
document_rtf_index/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
from . import std_index
|
23
document_rtf_index/__openerp__.py
Normal file
23
document_rtf_index/__openerp__.py
Normal file
@ -0,0 +1,23 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
{
|
||||
"name": "Index rtf documents",
|
||||
"version": "8.0.1.0.0",
|
||||
"author": "Therp BV, Odoo Community Association (OCA)",
|
||||
"license": "AGPL-3",
|
||||
"category": "Knowledge Management",
|
||||
"summary": "Index rtf documents",
|
||||
"depends": [
|
||||
'document',
|
||||
],
|
||||
"data": [],
|
||||
"auto_install": False,
|
||||
"installable": True,
|
||||
"application": False,
|
||||
"external_dependencies": {
|
||||
'python': [
|
||||
'pyth',
|
||||
],
|
||||
},
|
||||
}
|
BIN
document_rtf_index/static/description/icon.png
Normal file
BIN
document_rtf_index/static/description/icon.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 12 KiB |
61
document_rtf_index/std_index.py
Normal file
61
document_rtf_index/std_index.py
Normal file
@ -0,0 +1,61 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
import StringIO
|
||||
|
||||
from pyth import document
|
||||
from pyth.plugins.rtf15.reader import Rtf15Reader
|
||||
from pyth.plugins.plaintext.writer import PlaintextWriter
|
||||
|
||||
from openerp.addons.document.content_index import indexer, cntIndex
|
||||
from openerp.addons.document.std_index import _to_unicode
|
||||
|
||||
|
||||
def improved_paragraph(self, paragraph, prefix=""):
|
||||
"""Override method to insert image ignoring code."""
|
||||
content = []
|
||||
for text in paragraph.content:
|
||||
# Begin patch =========\
|
||||
if text.__class__ is document.Image:
|
||||
continue
|
||||
# End patch ===========/
|
||||
content.append(u"".join(text.content))
|
||||
content = u"".join(content).encode("utf-8")
|
||||
|
||||
for line in content.split("\n"):
|
||||
self.target.write(" " * self.indent)
|
||||
self.target.write(prefix)
|
||||
self.target.write(line)
|
||||
self.target.write("\n")
|
||||
if prefix: prefix = " "
|
||||
|
||||
|
||||
PlaintextWriter.paragraph = improved_paragraph
|
||||
|
||||
|
||||
class RtfDoc(indexer):
|
||||
"""Index Rich Text Format (RTF) files."""
|
||||
|
||||
def _getMimeTypes(self):
|
||||
return [
|
||||
'application/rtf',
|
||||
'application/x-rtf',
|
||||
'text/rtf',
|
||||
'text/richtext',
|
||||
]
|
||||
|
||||
def _getExtensions(self):
|
||||
return [
|
||||
'.rtf',
|
||||
]
|
||||
|
||||
def _doIndexContent(self, content):
|
||||
"""Just get text contents of rtf file."""
|
||||
s = StringIO.StringIO(content)
|
||||
r = Rtf15Reader.read(s) # r will be pyth.document.Document
|
||||
s.close()
|
||||
w = PlaintextWriter.write(r) # w will be cStringIO.StringO
|
||||
result = _to_unicode(w.getvalue())
|
||||
return result
|
||||
|
||||
cntIndex.register(RtfDoc())
|
15022
document_rtf_index/test_files/test_with_cat_image.rtf
Normal file
15022
document_rtf_index/test_files/test_with_cat_image.rtf
Normal file
File diff suppressed because it is too large
Load Diff
4
document_rtf_index/tests/__init__.py
Normal file
4
document_rtf_index/tests/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
from . import test_rtf_index
|
28
document_rtf_index/tests/test_rtf_index.py
Normal file
28
document_rtf_index/tests/test_rtf_index.py
Normal file
@ -0,0 +1,28 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# © 2016 Therp BV <http://therp.nl>
|
||||
# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html).
|
||||
import base64
|
||||
from openerp.tests.common import TransactionCase
|
||||
from openerp.modules.module import get_module_resource
|
||||
|
||||
|
||||
class TestIndexRtf(TransactionCase):
|
||||
|
||||
def test_index_rtf(self):
|
||||
"""Test if the indexer indexes just the text in rtf documents."""
|
||||
# we do this to avoid error messages about word files in demo data
|
||||
self.env['ir.attachment'].search([]).unlink()
|
||||
# Now take rather large rtf test file, with only few actual words:
|
||||
rtf_path = get_module_resource(
|
||||
'document_rtf_index',
|
||||
'test_files',
|
||||
'test_with_cat_image.rtf'
|
||||
)
|
||||
rtf_file = open(rtf_path, 'rb').read().encode('base64')
|
||||
att1 = self.env['ir.attachment'].create({
|
||||
'name': 'test_with_cat_image.rtf',
|
||||
'datas_fname': 'test_with_cat_image.rtf',
|
||||
'datas': rtf_file,
|
||||
})
|
||||
self.assertEqual(att1.file_type, 'application/rtf')
|
||||
self.assertEqual(att1.index_content[:16], 'Hello rtf world!')
|
Loading…
Reference in New Issue
Block a user