From 3b3c60b348b44c390d048984c64e1fa444553df2 Mon Sep 17 00:00:00 2001 From: Carlos Almeida Date: Tue, 6 Jun 2017 15:55:09 +0100 Subject: [PATCH] Remove migrated document_ocr module --- document_ocr/README.rst | 101 --------- document_ocr/__init__.py | 5 - document_ocr/__manifest__.py | 27 --- document_ocr/data/ir_config_parameter.xml | 21 -- document_ocr/data/ir_cron.xml | 13 -- document_ocr/models/__init__.py | 5 - document_ocr/models/ir_attachment.py | 261 ---------------------- document_ocr/static/description/icon.png | Bin 9455 -> 0 bytes document_ocr/tests/__init__.py | 5 - document_ocr/tests/test_document_ocr.py | 62 ----- document_ocr/views/ir_attachment_view.xml | 43 ---- 11 files changed, 543 deletions(-) delete mode 100644 document_ocr/README.rst delete mode 100644 document_ocr/__init__.py delete mode 100644 document_ocr/__manifest__.py delete mode 100644 document_ocr/data/ir_config_parameter.xml delete mode 100644 document_ocr/data/ir_cron.xml delete mode 100644 document_ocr/models/__init__.py delete mode 100644 document_ocr/models/ir_attachment.py delete mode 100644 document_ocr/static/description/icon.png delete mode 100644 document_ocr/tests/__init__.py delete mode 100644 document_ocr/tests/test_document_ocr.py delete mode 100644 document_ocr/views/ir_attachment_view.xml diff --git a/document_ocr/README.rst b/document_ocr/README.rst deleted file mode 100644 index c500f0b1..00000000 --- a/document_ocr/README.rst +++ /dev/null @@ -1,101 +0,0 @@ -.. image:: https://img.shields.io/badge/licence-AGPL--3-blue.svg - :target: http://www.gnu.org/licenses/agpl-3.0-standalone.html - :alt: License: AGPL-3 - -================= -OCR for documents -================= - -This module was written to make uploaded documents, for example scans, searchable by running OCR on them. - -It supports all image formats `Pillow supports `_ for reading and PDFs. - -Installation -============ - -To install this module, you need to: - -#. install tesseract and the language(s) your documents use -#. if you want to support OCR on PDFs, install imagemagick -#. install the module itself - -On an Debian or Ubuntu system you would typically run:: - - $ sudo apt-get install tesseract-ocr imagemagick - - -Configuration -============= - -To configure this module, go to: - -#. Settings/Technical/Parameters/System parameters and review the parameters with names document_ocr.* - -Usage -===== - -By default, character recognition is done asynchronously by a cronjob at night. -This is because the recognition process takes a while and you don't want to make your users wait for the indexation to finish. -The interval to run the cronjob can be adjusted to your needs in the ``Scheduled Actions`` menu, under ` `Settings``. -In case you want to force the OCR to be done immediately, set configuration parameter ``document_ocr.synchronous`` to value ``True``. - - -By default, recognition language is set to english. -In case you want to use a different default, set configuration parameter ``document_ocr.language`` to value respective value ex:``por``, for Portuguese. - - -In PDF case, OCR will run after it will be converted to an image. But OCR will be applied to all PDFs. - - -System parameters used: -#``document_ocr.synchronous``: bool -#``document_ocr.language``: string -#``document_ocr.dpi``: integer -#``document_ocr.quality``: integer - - -.. image:: https://odoo-community.org/website/image/ir.attachment/5784_f2813bd/datas - :alt: Try me on Runbot - :target: https://runbot.odoo-community.org/runbot/118/10.0 - -Bug Tracker -=========== - -Bugs are tracked on `GitHub Issues `_. -In case of trouble, please check there if your issue has already been reported. -If you spotted it first, help us smashing it by providing a detailed and welcomed feedback. - -Credits -======= - -The actual work ---------------- - -* `tesseract `_ - -Images ------- - -* Odoo Community Association: `Icon `_. - -Contributors ------------- - -* Holger Brunn - -Do not contact contributors directly about help with questions or problems concerning this addon, but use the `community mailing list `_ or the `appropriate specialized mailinglist `_ for help, and the bug tracker linked in `Bug Tracker`_ above for technical issues. - -Maintainer ----------- - -.. image:: https://odoo-community.org/logo.png - :alt: Odoo Community Association - :target: https://odoo-community.org - -This module is maintained by the OCA. - -OCA, or the Odoo Community Association, is a nonprofit organization whose -mission is to support the collaborative development of Odoo features and -promote its widespread use. - -To contribute to this module, please visit https://odoo-community.org. diff --git a/document_ocr/__init__.py b/document_ocr/__init__.py deleted file mode 100644 index 472456b6..00000000 --- a/document_ocr/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- -# © 2016 Therp BV -# © 2017 ThinkOpen Solutions -# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). -from . import models diff --git a/document_ocr/__manifest__.py b/document_ocr/__manifest__.py deleted file mode 100644 index ad012794..00000000 --- a/document_ocr/__manifest__.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- -# © 2016 Therp BV -# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). -{ - "name": "OCR for Documents", - "version": "10.0.1.0.0", - "author": "Therp BV," - " Odoo Community Association (OCA)," - " ThinkOpen Solutions Brasil", - "license": "AGPL-3", - "category": "Knowledge Management", - "summary": "Run character recognition on uploaded files", - "depends": [ - 'document', - ], - "data": [ - "data/ir_cron.xml", - "data/ir_config_parameter.xml", - "views/ir_attachment_view.xml", - ], - "external_dependencies": { - 'bin': [ - 'tesseract', - 'convert', - ], - }, -} diff --git a/document_ocr/data/ir_config_parameter.xml b/document_ocr/data/ir_config_parameter.xml deleted file mode 100644 index 721a0740..00000000 --- a/document_ocr/data/ir_config_parameter.xml +++ /dev/null @@ -1,21 +0,0 @@ - - - - - document_ocr.synchronous - False - - - document_ocr.dpi - 300 - - - document_ocr.quality - 100 - - - document_ocr.language - eng - - - diff --git a/document_ocr/data/ir_cron.xml b/document_ocr/data/ir_cron.xml deleted file mode 100644 index f69d151a..00000000 --- a/document_ocr/data/ir_cron.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - Run OCR on uploaded documents - days - 1 - ir.attachment - _ocr_cron - -1 - - - diff --git a/document_ocr/models/__init__.py b/document_ocr/models/__init__.py deleted file mode 100644 index 051b3ddf..00000000 --- a/document_ocr/models/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- -# © 2016 Therp BV -# © 2017 ThinkOpen Solutions -# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). -from . import ir_attachment diff --git a/document_ocr/models/ir_attachment.py b/document_ocr/models/ir_attachment.py deleted file mode 100644 index 18102d52..00000000 --- a/document_ocr/models/ir_attachment.py +++ /dev/null @@ -1,261 +0,0 @@ -# -*- coding: utf-8 -*- -# © 2016 Therp BV -# © 2017 ThinkOpen Solutions -# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). - -import io -import logging -import subprocess -from StringIO import StringIO - -import pyPdf -from odoo import api, fields, models, _ -from odoo.exceptions import UserError - -_logger = logging.getLogger(__name__) -_MARKER_PHRASE = '[[waiting for OCR]]' -OCR_LANGUAGE = [('afr', 'Afrikaans'), - ('amh', 'Amharic'), - ('ara', 'Arabic'), - ('asm', 'Assamese'), - ('aze', 'Azerbaijani'), - ('aze_cyrl', 'Azerbaijani - Cyrilic'), - ('bel', 'Belarusian'), - ('ben', 'Bengali'), - ('bod', 'Tibetan'), - ('bos', 'Bosnian'), - ('bul', 'Bulgarian'), - ('cat', 'Catalan; Valencian'), - ('ceb', 'Cebuano'), - ('ces', 'Czech'), - ('chi_sim', 'Chinese - Simplified'), - ('chi_tra', 'Chinese - Traditional'), - ('chr', 'Cherokee'), - ('cym', 'Welsh'), - ('dan', 'Danish'), - ('dan_frak', 'Danish - Fraktur'), - ('deu', 'German'), - ('deu_frak', 'German - Fraktur'), - ('dzo', 'Dzongkha'), - ('ell', 'Greek, Modern (1453-)'), - ('eng', 'English'), - ('enm', 'English, Middle (1100-1500)'), - ('epo', 'Esperanto'), - ('equ', 'Math / equation detection module'), - ('est', 'Estonian'), - ('eus', 'Basque'), - ('fas', 'Persian'), - ('fin', 'Finnish'), - ('fra', 'French'), - ('frk', 'Frankish'), - ('frm', 'French, Middle (ca.1400-1600)'), - ('gle', 'Irish'), - ('glg', 'Galician'), - ('grc', 'Greek, Ancient (to 1453)'), - ('guj', 'Gujarati'), - ('hat', 'Haitian; Haitian Creole'), - ('heb', 'Hebrew'), - ('hin', 'Hindi'), - ('hrv', 'Croatian'), - ('hun', 'Hungarian'), - ('iku', 'Inuktitut'), - ('ind', 'Indonesian'), - ('isl', 'Icelandic'), - ('ita', 'Italian'), - ('ita_old', 'Italian - Old'), - ('jav', 'Javanese'), - ('jpn', 'Japanese'), - ('kan', 'Kannada'), - ('kat', 'Georgian'), - ('kat_old', 'Georgian - Old'), - ('kaz', 'Kazakh'), - ('khm', 'Central Khmer'), - ('kir', 'Kirghiz; Kyrgyz'), - ('kor', 'Korean'), - ('kur', 'Kurdish'), - ('lao', 'Lao'), - ('lat', 'Latin'), - ('lav', 'Latvian'), - ('lit', 'Lithuanian'), - ('mal', 'Malayalam'), - ('mar', 'Marathi'), - ('mkd', 'Macedonian'), - ('mlt', 'Maltese'), - ('msa', 'Malay'), - ('mya', 'Burmese'), - ('nep', 'Nepali'), - ('nld', 'Dutch; Flemish'), - ('nor', 'Norwegian'), - ('ori', 'Oriya'), - ('osd', 'Orientation and script detection module'), - ('pan', 'Panjabi; Punjabi'), - ('pol', 'Polish'), - ('por', 'Portuguese'), - ('pus', 'Pushto; Pashto'), - ('ron', 'Romanian; Moldavian; Moldovan'), - ('rus', 'Russian'), - ('san', 'Sanskrit'), - ('sin', 'Sinhala; Sinhalese'), - ('slk', 'Slovak'), - ('slk_frak', 'Slovak - Fraktur'), - ('slv', 'Slovenian'), - ('spa', 'Spanish; Castilian'), - ('spa_old', 'Spanish; Castilian - Old'), - ('sqi', 'Albanian'), - ('srp', 'Serbian'), - ('srp_latn', 'Serbian - Latin'), - ('swa', 'Swahili'), - ('swe', 'Swedish'), - ('syr', 'Syriac'), - ('tam', 'Tamil'), - ('tel', 'Telugu'), - ('tgk', 'Tajik'), - ('tgl', 'Tagalog'), - ('tha', 'Thai'), - ('tir', 'Tigrinya'), - ('tur', 'Turkish'), - ('uig', 'Uighur; Uyghur'), - ('ukr', 'Ukrainian'), - ('urd', 'Urdu'), - ('uzb', 'Uzbek'), - ('uzb_cyrl', 'Uzbek - Cyrilic'), - ('vie', 'Vietnamese'), - ('yid', 'Yiddish'), ] - - -class IrAttachment(models.Model): - _inherit = 'ir.attachment' - - language = fields.Selection(OCR_LANGUAGE, 'Language', - default=lambda self: - self.env['ir.config_parameter'].get_param( - 'document_ocr.language', 'eng')) - # We need to redefine index_content field to be able to update it - # on the onchange_language() - index_content = fields.Text('Indexed Content', - readonly=False, - prefetch=False) - index_content_rel = fields.Text(related='index_content', - string='Indexed Content Rel') - - @api.onchange('language') - def onchange_language(self): - process = subprocess.Popen(['tesseract', '--list-langs'], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, stderr = process.communicate() - if self.language not in stderr.split('\n'): - raise UserError(_( - "Language not installed." - " Please ask your system administrator to" - " install tesseract '%s' language." % - self.language)) - if self.store_fname: - bin_data = self._file_read(self.store_fname) - else: - bin_data = self.db_datas - if bin_data: - index_content = self._index( - bin_data.decode('base64'), self.datas_fname, self.mimetype) - return {'value': { - 'index_content': index_content}} - return {'value': {}} - - @api.model - def _index(self, bin_data, datas_fname, mimetype): - content = super(IrAttachment, self)._index( - bin_data, datas_fname, mimetype) - if not content or content == 'image': - has_synchr_param = self.env['ir.config_parameter'].get_param( - 'document_ocr.synchronous', 'False') == 'True' - has_force_flag = self.env.context.get('document_ocr_force') - synchr = has_synchr_param or has_force_flag - if synchr: - content = self._index_ocr(bin_data) - else: - content = _MARKER_PHRASE - return content - - def _index_ocr(self, bin_data): - _logger.info('OCR IMAGE "%s"...', self.datas_fname) - process = subprocess.Popen( - ['tesseract', 'stdin', 'stdout', '-l', self.language], - stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - stdout, stderr = process.communicate(bin_data) - if stderr: - _logger.error('Error during OCR: %s', stderr) - return stdout - - def _index_pdf(self, bin_data): - - def convert_bin_to_image(self, bin_data): - dpi = int(self.env['ir.config_parameter'].get_param( - 'document_ocr.dpi', '500')) - quality = int(self.env['ir.config_parameter'].get_param( - 'document_ocr.quality', '100')) - process = subprocess.Popen( - ['convert', '-density', str(dpi), - '-quality', str(quality), - '-', '-append', 'png32:-'], - stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - stdout, stderr = process.communicate(bin_data) - if stderr: - _logger.error('Error converting PDF to image: %s', stderr) - return stdout - - def _convert_pdf_page_to_image(self, pdf, pagenum): - dst_pdf = pyPdf.PdfFileWriter() - dst_pdf.addPage(pdf.getPage(pagenum)) - pdf_bytes = io.BytesIO() - dst_pdf.write(pdf_bytes) - pdf_bytes.seek(0) - return convert_bin_to_image(self, pdf_bytes.read()) - - has_synchr_param = self.env['ir.config_parameter'].get_param( - 'document_ocr.synchronous', 'False') == 'True' - has_force_flag = self.env.context.get('document_ocr_force') - synchr = has_synchr_param or has_force_flag - if synchr: - buf = super(IrAttachment, self)._index_pdf(bin_data) - if len(buf.split('\n')) < 2 and bin_data.startswith('%PDF-'): - # If we got less than 2 lines, - # run OCR anyway and append to existent text - try: - f = StringIO(bin_data) - pdf = pyPdf.PdfFileReader(f) - if pdf.getNumPages() > 1: - for pagenum in range(0, pdf.getNumPages()): - _logger.info('OCR PDF "%s" page %d/%d...', - self.datas_fname, - pagenum + 1, - pdf.getNumPages()) - pdf_image = _convert_pdf_page_to_image(self, pdf, - pagenum) - index_content = self._index_ocr(pdf_image) - buf = u'%s\n-- %d --\n%s' % ( - buf, pagenum + 1, index_content.decode('utf8')) - else: - pdf_image = convert_bin_to_image(self, bin_data) - index_content = self._index_ocr(pdf_image) - buf = u'%s\n%s' % (buf, index_content.decode('utf8')) - except Exception as e: - _logger.error('Error converting PDF to image: %s', e) - pass - else: - buf = _MARKER_PHRASE - return buf - - @api.model - def _ocr_cron(self): - for this in self.with_context(document_ocr_force=True).search( - [('index_content', '=', _MARKER_PHRASE)]): - if not this.datas: - continue - index_content = this._index( - this.datas.decode('base64'), this.datas_fname, this.mimetype) - this.write({ - 'index_content': index_content, - }) diff --git a/document_ocr/static/description/icon.png b/document_ocr/static/description/icon.png deleted file mode 100644 index 3a0328b516c4980e8e44cdb63fd945757ddd132d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9455 zcmW++2RxMjAAjx~&dlBk9S+%}OXg)AGE&Cb*&}d0jUxM@u(PQx^-s)697TX`ehR4?GS^qbkof1cslKgkU)h65qZ9Oc=ml_0temigYLJfnz{IDzUf>bGs4N!v3=Z3jMq&A#7%rM5eQ#dc?k~! zVpnB`o+K7|Al`Q_U;eD$B zfJtP*jH`siUq~{KE)`jP2|#TUEFGRryE2`i0**z#*^6~AI|YzIWy$Cu#CSLW3q=GA z6`?GZymC;dCPk~rBS%eCb`5OLr;RUZ;D`}um=H)BfVIq%7VhiMr)_#G0N#zrNH|__ zc+blN2UAB0=617@>_u;MPHN;P;N#YoE=)R#i$k_`UAA>WWCcEVMh~L_ zj--gtp&|K1#58Yz*AHCTMziU1Jzt_jG0I@qAOHsk$2}yTmVkBp_eHuY$A9)>P6o~I z%aQ?!(GqeQ-Y+b0I(m9pwgi(IIZZzsbMv+9w{PFtd_<_(LA~0H(xz{=FhLB@(1&qHA5EJw1>>=%q2f&^X>IQ{!GJ4e9U z&KlB)z(84HmNgm2hg2C0>WM{E(DdPr+EeU_N@57;PC2&DmGFW_9kP&%?X4}+xWi)( z;)z%wI5>D4a*5XwD)P--sPkoY(a~WBw;E~AW`Yue4kFa^LM3X`8x|}ZUeMnqr}>kH zG%WWW>3ml$Yez?i%)2pbKPI7?5o?hydokgQyZsNEr{a|mLdt;X2TX(#B1j35xPnPW z*bMSSOauW>o;*=kO8ojw91VX!qoOQb)zHJ!odWB}d+*K?#sY_jqPdg{Sm2HdYzdEx zOGVPhVRTGPtv0o}RfVP;Nd(|CB)I;*t&QO8h zFfekr30S!-LHmV_Su-W+rEwYXJ^;6&3|L$mMC8*bQptyOo9;>Qb9Q9`ySe3%V$A*9 zeKEe+b0{#KWGp$F+tga)0RtI)nhMa-K@JS}2krK~n8vJ=Ngm?R!9G<~RyuU0d?nz# z-5EK$o(!F?hmX*2Yt6+coY`6jGbb7tF#6nHA zuKk=GGJ;ZwON1iAfG$E#Y7MnZVmrY|j0eVI(DN_MNFJmyZ|;w4tf@=CCDZ#5N_0K= z$;R~bbk?}TpfDjfB&aiQ$VA}s?P}xPERJG{kxk5~R`iRS(SK5d+Xs9swCozZISbnS zk!)I0>t=A<-^z(cmSFz3=jZ23u13X><0b)P)^1T_))Kr`e!-pb#q&J*Q`p+B6la%C zuVl&0duN<;uOsB3%T9Fp8t{ED108<+W(nOZd?gDnfNBC3>M8WE61$So|P zVvqH0SNtDTcsUdzaMDpT=Ty0pDHHNL@Z0w$Y`XO z2M-_r1S+GaH%pz#Uy0*w$Vdl=X=rQXEzO}d6J^R6zjM1u&c9vYLvLp?W7w(?np9x1 zE_0JSAJCPB%i7p*Wvg)pn5T`8k3-uR?*NT|J`eS#_#54p>!p(mLDvmc-3o0mX*mp_ zN*AeS<>#^-{S%W<*mz^!X$w_2dHWpcJ6^j64qFBft-o}o_Vx80o0>}Du;>kLts;$8 zC`7q$QI(dKYG`Wa8#wl@V4jVWBRGQ@1dr-hstpQL)Tl+aqVpGpbSfN>5i&QMXfiZ> zaA?T1VGe?rpQ@;+pkrVdd{klI&jVS@I5_iz!=UMpTsa~mBga?1r}aRBm1WS;TT*s0f0lY=JBl66Upy)-k4J}lh=P^8(SXk~0xW=T9v*B|gzIhN z>qsO7dFd~mgxAy4V?&)=5ieYq?zi?ZEoj)&2o)RLy=@hbCRcfT5jigwtQGE{L*8<@Yd{zg;CsL5mvzfDY}P-wos_6PfprFVaeqNE%h zKZhLtcQld;ZD+>=nqN~>GvROfueSzJD&BE*}XfU|H&(FssBqY=hPCt`d zH?@s2>I(|;fcW&YM6#V#!kUIP8$Nkdh0A(bEVj``-AAyYgwY~jB zT|I7Bf@%;7aL7Wf4dZ%VqF$eiaC38OV6oy3Z#TER2G+fOCd9Iaoy6aLYbPTN{XRPz z;U!V|vBf%H!}52L2gH_+j;`bTcQRXB+y9onc^wLm5wi3-Be}U>k_u>2Eg$=k!(l@I zcCg+flakT2Nej3i0yn+g+}%NYb?ta;R?(g5SnwsQ49U8Wng8d|{B+lyRcEDvR3+`O{zfmrmvFrL6acVP%yG98X zo&+VBg@px@i)%o?dG(`T;n*$S5*rnyiR#=wW}}GsAcfyQpE|>a{=$Hjg=-*_K;UtD z#z-)AXwSRY?OPefw^iI+ z)AXz#PfEjlwTes|_{sB?4(O@fg0AJ^g8gP}ex9Ucf*@_^J(s_5jJV}c)s$`Myn|Kd z$6>}#q^n{4vN@+Os$m7KV+`}c%4)4pv@06af4-x5#wj!KKb%caK{A&Y#Rfs z-po?Dcb1({W=6FKIUirH&(yg=*6aLCekcKwyfK^JN5{wcA3nhO(o}SK#!CINhI`-I z1)6&n7O&ZmyFMuNwvEic#IiOAwNkR=u5it{B9n2sAJV5pNhar=j5`*N!Na;c7g!l$ z3aYBqUkqqTJ=Re-;)s!EOeij=7SQZ3Hq}ZRds%IM*PtM$wV z@;rlc*NRK7i3y5BETSKuumEN`Xu_8GP1Ri=OKQ$@I^ko8>H6)4rjiG5{VBM>B|%`&&s^)jS|-_95&yc=GqjNo{zFkw%%HHhS~e=s zD#sfS+-?*t|J!+ozP6KvtOl!R)@@-z24}`9{QaVLD^9VCSR2b`b!KC#o;Ki<+wXB6 zx3&O0LOWcg4&rv4QG0)4yb}7BFSEg~=IR5#ZRj8kg}dS7_V&^%#Do==#`u zpy6{ox?jWuR(;pg+f@mT>#HGWHAJRRDDDv~@(IDw&R>9643kK#HN`!1vBJHnC+RM&yIh8{gG2q zA%e*U3|N0XSRa~oX-3EAneep)@{h2vvd3Xvy$7og(sayr@95+e6~Xvi1tUqnIxoIH zVWo*OwYElb#uyW{Imam6f2rGbjR!Y3`#gPqkv57dB6K^wRGxc9B(t|aYDGS=m$&S!NmCtrMMaUg(c zc2qC=2Z`EEFMW-me5B)24AqF*bV5Dr-M5ig(l-WPS%CgaPzs6p_gnCIvTJ=Y<6!gT zVt@AfYCzjjsMEGi=rDQHo0yc;HqoRNnNFeWZgcm?f;cp(6CNylj36DoL(?TS7eU#+ z7&mfr#y))+CJOXQKUMZ7QIdS9@#-}7y2K1{8)cCt0~-X0O!O?Qx#E4Og+;A2SjalQ zs7r?qn0H044=sDN$SRG$arw~n=+T_DNdSrarmu)V6@|?1-ZB#hRn`uilTGPJ@fqEy zGt(f0B+^JDP&f=r{#Y_wi#AVDf-y!RIXU^0jXsFpf>=Ji*TeqSY!H~AMbJdCGLhC) zn7Rx+sXw6uYj;WRYrLd^5IZq@6JI1C^YkgnedZEYy<&4(z%Q$5yv#Boo{AH8n$a zhb4Y3PWdr269&?V%uI$xMcUrMzl=;w<_nm*qr=c3Rl@i5wWB;e-`t7D&c-mcQl7x! zZWB`UGcw=Y2=}~wzrfLx=uet<;m3~=8I~ZRuzvMQUQdr+yTV|ATf1Uuomr__nDf=X zZ3WYJtHp_ri(}SQAPjv+Y+0=fH4krOP@S&=zZ-t1jW1o@}z;xk8 z(Nz1co&El^HK^NrhVHa-_;&88vTU>_J33=%{if;BEY*J#1n59=07jrGQ#IP>@u#3A z;!q+E1Rj3ZJ+!4bq9F8PXJ@yMgZL;>&gYA0%_Kbi8?S=XGM~dnQZQ!yBSgcZhY96H zrWnU;k)qy`rX&&xlDyA%(a1Hhi5CWkmg(`Gb%m(HKi-7Z!LKGRP_B8@`7&hdDy5n= z`OIxqxiVfX@OX1p(mQu>0Ai*v_cTMiw4qRt3~NBvr9oBy0)r>w3p~V0SCm=An6@3n)>@z!|o-$HvDK z|3D2ZMJkLE5loMKl6R^ez@Zz%S$&mbeoqH5`Bb){Ei21q&VP)hWS2tjShfFtGE+$z zzCR$P#uktu+#!w)cX!lWN1XU%K-r=s{|j?)Akf@q#3b#{6cZCuJ~gCxuMXRmI$nGtnH+-h z+GEi!*X=AP<|fG`1>MBdTb?28JYc=fGvAi2I<$B(rs$;eoJCyR6_bc~p!XR@O-+sD z=eH`-ye})I5ic1eL~TDmtfJ|8`0VJ*Yr=hNCd)G1p2MMz4C3^Mj?7;!w|Ly%JqmuW zlIEW^Ft%z?*|fpXda>Jr^1noFZEwFgVV%|*XhH@acv8rdGxeEX{M$(vG{Zw+x(ei@ zmfXb22}8-?Fi`vo-YVrTH*C?a8%M=Hv9MqVH7H^J$KsD?>!SFZ;ZsvnHr_gn=7acz z#W?0eCdVhVMWN12VV^$>WlQ?f;P^{(&pYTops|btm6aj>_Uz+hqpGwB)vWp0Cf5y< zft8-je~nn?W11plq}N)4A{l8I7$!ks_x$PXW-2XaRFswX_BnF{R#6YIwMhAgd5F9X zGmwdadS6(a^fjHtXg8=l?Rc0Sm%hk6E9!5cLVloEy4eh(=FwgP`)~I^5~pBEWo+F6 zSf2ncyMurJN91#cJTy_u8Y}@%!bq1RkGC~-bV@SXRd4F{R-*V`bS+6;W5vZ(&+I<9$;-V|eNfLa5n-6% z2(}&uGRF;p92eS*sE*oR$@pexaqr*meB)VhmIg@h{uzkk$9~qh#cHhw#>O%)b@+(| z^IQgqzuj~Sk(J;swEM-3TrJAPCq9k^^^`q{IItKBRXYe}e0Tdr=Huf7da3$l4PdpwWDop%^}n;dD#K4s#DYA8SHZ z&1!riV4W4R7R#C))JH1~axJ)RYnM$$lIR%6fIVA@zV{XVyx}C+a-Dt8Y9M)^KU0+H zR4IUb2CJ{Hg>CuaXtD50jB(_Tcx=Z$^WYu2u5kubqmwp%drJ6 z?Fo40g!Qd<-l=TQxqHEOuPX0;^z7iX?Ke^a%XT<13TA^5`4Xcw6D@Ur&VT&CUe0d} z1GjOVF1^L@>O)l@?bD~$wzgf(nxX1OGD8fEV?TdJcZc2KoUe|oP1#=$$7ee|xbY)A zDZq+cuTpc(fFdj^=!;{k03C69lMQ(|>uhRfRu%+!k&YOi-3|1QKB z z?n?eq1XP>p-IM$Z^C;2L3itnbJZAip*Zo0aw2bs8@(s^~*8T9go!%dHcAz2lM;`yp zD=7&xjFV$S&5uDaiScyD?B-i1ze`+CoRtz`Wn+Zl&#s4&}MO{@N!ufrzjG$B79)Y2d3tBk&)TxUTw@QS0TEL_?njX|@vq?Uz(nBFK5Pq7*xj#u*R&i|?7+6# z+|r_n#SW&LXhtheZdah{ZVoqwyT{D>MC3nkFF#N)xLi{p7J1jXlmVeb;cP5?e(=f# zuT7fvjSbjS781v?7{)-X3*?>tq?)Yd)~|1{BDS(pqC zC}~H#WXlkUW*H5CDOo<)#x7%RY)A;ShGhI5s*#cRDA8YgqG(HeKDx+#(ZQ?386dv! zlXCO)w91~Vw4AmOcATuV653fa9R$fyK8ul%rG z-wfS zihugoZyr38Im?Zuh6@RcF~t1anQu7>#lPpb#}4cOA!EM11`%f*07RqOVkmX{p~KJ9 z^zP;K#|)$`^Rb{rnHGH{~>1(fawV0*Z#)}M`m8-?ZJV<+e}s9wE# z)l&az?w^5{)`S(%MRzxdNqrs1n*-=jS^_jqE*5XDrA0+VE`5^*p3CuM<&dZEeCjoz zR;uu_H9ZPZV|fQq`Cyw4nscrVwi!fE6ciMmX$!_hN7uF;jjKG)d2@aC4ropY)8etW=xJvni)8eHi`H$%#zn^WJ5NLc-rqk|u&&4Z6fD_m&JfSI1Bvb?b<*n&sfl0^t z=HnmRl`XrFvMKB%9}>PaA`m-fK6a0(8=qPkWS5bb4=v?XcWi&hRY?O5HdulRi4?fN zlsJ*N-0Qw+Yic@s0(2uy%F@ib;GjXt01Fmx5XbRo6+n|pP(&nodMoap^z{~q ziEeaUT@Mxe3vJSfI6?uLND(CNr=#^W<1b}jzW58bIfyWTDle$mmS(|x-0|2UlX+9k zQ^EX7Nw}?EzVoBfT(-LT|=9N@^hcn-_p&sqG z&*oVs2JSU+N4ZD`FhCAWaS;>|wH2G*Id|?pa#@>tyxX`+4HyIArWDvVrX)2WAOQff z0qyHu&-S@i^MS-+j--!pr4fPBj~_8({~e1bfcl0wI1kaoN>mJL6KUPQm5N7lB(ui1 zE-o%kq)&djzWJ}ob<-GfDlkB;F31j-VHKvQUGQ3sp`CwyGJk_i!y^sD0fqC@$9|jO zOqN!r!8-p==F@ZVP=U$qSpY(gQ0)59P1&t@y?5rvg<}E+GB}26NYPp4f2YFQrQtot5mn3wu_qprZ=>Ig-$ zbW26Ws~IgY>}^5w`vTB(G`PTZaDiGBo5o(tp)qli|NeV( z@H_=R8V39rt5J5YB2Ky?4eJJ#b`_iBe2ot~6%7mLt5t8Vwi^Jy7|jWXqa3amOIoRb zOr}WVFP--DsS`1WpN%~)t3R!arKF^Q$e12KEqU36AWwnCBICpH4XCsfnyrHr>$I$4 z!DpKX$OKLWarN7nv@!uIA+~RNO)l$$w}p(;b>mx8pwYvu;dD_unryX_NhT8*Tj>BTrTTL&!?O+%Rv;b?B??gSzdp?6Uug9{ zd@V08Z$BdI?fpoCS$)t4mg4rT8Q_I}h`0d-vYZ^|dOB*Q^S|xqTV*vIg?@fVFSmMpaw0qtTRbx} z({Pg?#{2`sc9)M5N$*N|4;^t$+QP?#mov zGVC@I*lBVrOU-%2y!7%)fAKjpEFsgQc4{amtiHb95KQEwvf<(3T<9-Zm$xIew#P22 zc2Ix|App^>v6(3L_MCU0d3W##AB0M~3D00EWoKZqsJYT(#@w$Y_H7G22M~ApVFTRHMI_3be)Lkn#0F*V8Pq zc}`Cjy$bE;FJ6H7p=0y#R>`}-m4(0F>%@P|?7fx{=R^uFdISRnZ2W_xQhD{YuR3t< z{6yxu=4~JkeA;|(J6_nv#>Nvs&FuLA&PW^he@t(UwFFE8)|a!R{`E`K`i^ZnyE4$k z;(749Ix|oi$c3QbEJ3b~D_kQsPz~fIUKym($a_7dJ?o+40*OLl^{=&oq$<#Q(yyrp z{J-FAniyAw9tPbe&IhQ|a`DqFTVQGQ&Gq3!C2==4x{6EJwiPZ8zub-iXoUtkJiG{} zPaR&}_fn8_z~(=;5lD-aPWD3z8PZS@AaUiomF!G8I}Mf>e~0g#BelA-5#`cj;O5>N Xviia!U7SGha1wx#SCgwmn*{w2TRX*I diff --git a/document_ocr/tests/__init__.py b/document_ocr/tests/__init__.py deleted file mode 100644 index 7efb2857..00000000 --- a/document_ocr/tests/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# -*- coding: utf-8 -*- -# © 2016 Therp BV -# © 2017 ThinkOpen Solutions -# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). -from . import test_document_ocr diff --git a/document_ocr/tests/test_document_ocr.py b/document_ocr/tests/test_document_ocr.py deleted file mode 100644 index 1d1a5490..00000000 --- a/document_ocr/tests/test_document_ocr.py +++ /dev/null @@ -1,62 +0,0 @@ -# -*- coding: utf-8 -*- -# © 2016 Therp BV -# © 2017 ThinkOpen Solutions -# License AGPL-3.0 or later (http://www.gnu.org/licenses/agpl.html). -from StringIO import StringIO - -from PIL import Image, ImageDraw, ImageFont -from PIL import PdfImagePlugin, PalmImagePlugin # noqa # pylint: disable=unused-import -from odoo.tests.common import TransactionCase - -from ..models.ir_attachment import _MARKER_PHRASE - - -class TestDocumentOcr(TransactionCase): - def test_document_ocr(self): - self.env['ir.config_parameter'].set_param( - 'document_ocr.synchronous', 'True') - test_image = Image.new('RGB', (200, 30)) - draw = ImageDraw.Draw(test_image) - draw.text((3, 3), "Hello world", font=ImageFont.truetype( - '/usr/share/fonts/truetype/inconsolata/Inconsolata.otf', 24)) - # test a plain image - data = StringIO() - test_image.save(data, 'png') - attachment = self.env['ir.attachment'].create({ - 'name': 'testattachment', - 'datas_fname': 'test_png.pdf'}) - result = attachment._index( - data.getvalue(), 'test.png', None) - self.assertEqual(result.strip(), 'Hello world') - # should also work for pdfs - data = StringIO() - test_image.save(data, 'pdf', resolution=300) - attachment = self.env['ir.attachment'].create({ - 'name': 'testattachment', - 'datas_fname': 'test_pdf.pdf'}) - result = attachment._index( - data.getvalue(), 'test.pdf', None) - self.assertEqual(result.strip(), 'Hello world') - # check cron - self.env['ir.config_parameter'].set_param( - 'document_ocr.synchronous', 'False') - attachment = self.env['ir.attachment'].create({ - 'name': 'testattachment', - 'datas_fname': 'test_cron.pdf', - 'datas': data.getvalue().encode('base64'), - }) - self.assertEqual(attachment.index_content, _MARKER_PHRASE) - attachment._ocr_cron() - self.assertEqual(attachment.index_content.strip(), 'Hello world') - # and for an unreadable image, we expect an empty string - self.env['ir.config_parameter'].set_param( - 'document_ocr.synchronous', 'True') - data = StringIO() - test_image = Image.new('1', (200, 30)) - test_image.save(data, 'palm') - attachment = self.env['ir.attachment'].create({ - 'name': 'testattachment', - 'datas_fname': 'test_err.palm'}) - result = attachment._index( - data.getvalue(), 'test.palm', None) - self.assertEqual(result, '') diff --git a/document_ocr/views/ir_attachment_view.xml b/document_ocr/views/ir_attachment_view.xml deleted file mode 100644 index ed171d61..00000000 --- a/document_ocr/views/ir_attachment_view.xml +++ /dev/null @@ -1,43 +0,0 @@ - - - - - ir.attachment - - - - 1 - - - - - - - - - - - - - - ir.attachment - - - - - - - - - ir.attachment - - - - - - - - - - -