diff --git a/document_page/__init__.py b/document_page/__init__.py
index a5aa017f..d11fab84 100644
--- a/document_page/__init__.py
+++ b/document_page/__init__.py
@@ -18,8 +18,5 @@
# along with this program. If not, see
, tag='pre', attrs=[('class', 'screen')] - if _debug: sys.stderr.write('_BaseHTMLProcessor, unknown_starttag, tag=%s\n' % tag) - uattrs = [] - # thanks to Kevin Marks for this breathtaking hack to deal with (valid) high-bit attribute values in UTF-8 feeds - for key, value in attrs: - if type(value) != type(u''): - value = unicode(value, self.encoding) - uattrs.append((unicode(key, self.encoding), value)) - strattrs = u''.join([u' %s="%s"' % (key, value) for key, value in uattrs]).encode(self.encoding) - if tag in self.elements_no_end_tag: - self.pieces.append('<%(tag)s%(strattrs)s />' % locals()) - else: - self.pieces.append('<%(tag)s%(strattrs)s>' % locals()) - - def unknown_endtag(self, tag): - # called for each end tag, e.g. for, tag will be 'pre' - # Reconstruct the original end tag. - if tag not in self.elements_no_end_tag: - self.pieces.append("%(tag)s>" % locals()) - - def handle_charref(self, ref): - # called for each character reference, e.g. for ' ', ref will be '160' - # Reconstruct the original character reference. - self.pieces.append('%(ref)s;' % locals()) - - def handle_entityref(self, ref): - # called for each entity reference, e.g. for '©', ref will be 'copy' - # Reconstruct the original entity reference. - self.pieces.append('&%(ref)s;' % locals()) - - def handle_data(self, text): - # called for each block of plain text, i.e. outside of any tag and - # not containing any character or entity references - # Store the original text verbatim. - if _debug: sys.stderr.write('_BaseHTMLProcessor, handle_text, text=%s\n' % text) - self.pieces.append(text) - - def handle_comment(self, text): - # called for each HTML comment, e.g. - # Reconstruct the original comment. - self.pieces.append('' % locals()) - - def handle_pi(self, text): - # called for each processing instruction, e.g. - # Reconstruct original processing instruction. - self.pieces.append('%(text)s>' % locals()) - - def handle_decl(self, text): - # called for the DOCTYPE, if present, e.g. - # - # Reconstruct original DOCTYPE - self.pieces.append('' % locals()) - - _new_declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9:]*\s*').match - def _scan_name(self, i, declstartpos): - rawdata = self.rawdata - n = len(rawdata) - if i == n: - return None, -1 - m = self._new_declname_match(rawdata, i) - if m: - s = m.group() - name = s.strip() - if (i + len(s)) == n: - return None, -1 # end of buffer - return name.lower(), m.end() - else: - self.handle_data(rawdata) -# self.updatepos(declstartpos, i) - return None, -1 - - def output(self): - '''Return processed HTML as a single string''' - return ''.join([str(p) for p in self.pieces]) - -class _LooseFeedParser(_FeedParserMixin, _BaseHTMLProcessor): - def __init__(self, baseuri, baselang, encoding): - sgmllib.SGMLParser.__init__(self) - _FeedParserMixin.__init__(self, baseuri, baselang, encoding) - - def decodeEntities(self, element, data): - data = data.replace('<', '<') - data = data.replace('<', '<') - data = data.replace('>', '>') - data = data.replace('>', '>') - data = data.replace('&', '&') - data = data.replace('&', '&') - data = data.replace('"', '"') - data = data.replace('"', '"') - data = data.replace(''', ''') - data = data.replace(''', ''') - if self.contentparams.has_key('type') and not self.contentparams.get('type', 'xml').endswith('xml'): - data = data.replace('<', '<') - data = data.replace('>', '>') - data = data.replace('&', '&') - data = data.replace('"', '"') - data = data.replace(''', "'") - return data - -class _RelativeURIResolver(_BaseHTMLProcessor): - relative_uris = [('a', 'href'), - ('applet', 'codebase'), - ('area', 'href'), - ('blockquote', 'cite'), - ('body', 'background'), - ('del', 'cite'), - ('form', 'action'), - ('frame', 'longdesc'), - ('frame', 'src'), - ('iframe', 'longdesc'), - ('iframe', 'src'), - ('head', 'profile'), - ('img', 'longdesc'), - ('img', 'src'), - ('img', 'usemap'), - ('input', 'src'), - ('input', 'usemap'), - ('ins', 'cite'), - ('link', 'href'), - ('object', 'classid'), - ('object', 'codebase'), - ('object', 'data'), - ('object', 'usemap'), - ('q', 'cite'), - ('script', 'src')] - - def __init__(self, baseuri, encoding): - _BaseHTMLProcessor.__init__(self, encoding) - self.baseuri = baseuri - - def resolveURI(self, uri): - return _urljoin(self.baseuri, uri) - - def unknown_starttag(self, tag, attrs): - attrs = self.normalize_attrs(attrs) - attrs = [(key, ((tag, key) in self.relative_uris) and self.resolveURI(value) or value) for key, value in attrs] - _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) - -def _resolveRelativeURIs(htmlSource, baseURI, encoding): - if _debug: sys.stderr.write('entering _resolveRelativeURIs\n') - p = _RelativeURIResolver(baseURI, encoding) - p.feed(htmlSource) - return p.output() - -class _HTMLSanitizer(_BaseHTMLProcessor): - acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'b', 'big', - 'blockquote', 'br', 'button', 'caption', 'center', 'cite', 'code', 'col', - 'colgroup', 'dd', 'del', 'dfn', 'dir', 'div', 'dl', 'dt', 'em', 'fieldset', - 'font', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', - 'ins', 'kbd', 'label', 'legend', 'li', 'map', 'menu', 'ol', 'optgroup', - 'option', 'p', 'pre', 'q', 's', 'samp', 'select', 'small', 'span', 'strike', - 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'tfoot', 'th', - 'thead', 'tr', 'tt', 'u', 'ul', 'var'] - - acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', - 'action', 'align', 'alt', 'axis', 'border', 'cellpadding', 'cellspacing', - 'char', 'charoff', 'charset', 'checked', 'cite', 'class', 'clear', 'cols', - 'colspan', 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', - 'enctype', 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', - 'id', 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method', - 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', - 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', - 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', 'type', - 'usemap', 'valign', 'value', 'vspace', 'width'] - - unacceptable_elements_with_end_tag = ['script', 'applet'] - - def reset(self): - _BaseHTMLProcessor.reset(self) - self.unacceptablestack = 0 - - def unknown_starttag(self, tag, attrs): - if not tag in self.acceptable_elements: - if tag in self.unacceptable_elements_with_end_tag: - self.unacceptablestack += 1 - return - attrs = self.normalize_attrs(attrs) - attrs = [(key, value) for key, value in attrs if key in self.acceptable_attributes] - _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) - - def unknown_endtag(self, tag): - if not tag in self.acceptable_elements: - if tag in self.unacceptable_elements_with_end_tag: - self.unacceptablestack -= 1 - return - _BaseHTMLProcessor.unknown_endtag(self, tag) - - def handle_pi(self, text): - pass - - def handle_decl(self, text): - pass - - def handle_data(self, text): - if not self.unacceptablestack: - _BaseHTMLProcessor.handle_data(self, text) - -def _sanitizeHTML(htmlSource, encoding): - p = _HTMLSanitizer(encoding) - p.feed(htmlSource) - data = p.output() - if TIDY_MARKUP: - # loop through list of preferred Tidy interfaces looking for one that's installed, - # then set up a common _tidy function to wrap the interface-specific API. - _tidy = None - for tidy_interface in PREFERRED_TIDY_INTERFACES: - try: - if tidy_interface == "uTidy": - from tidy import parseString as _utidy - def _tidy(data, **kwargs): - return str(_utidy(data, **kwargs)) - break - elif tidy_interface == "mxTidy": - from mx.Tidy import Tidy as _mxtidy - def _tidy(data, **kwargs): - nerrors, nwarnings, data, errordata = _mxtidy.tidy(data, **kwargs) - return data - break - except: - pass - if _tidy: - utf8 = type(data) == type(u'') - if utf8: - data = data.encode('utf-8') - data = _tidy(data, output_xhtml=1, numeric_entities=1, wrap=0, char_encoding="utf8") - if utf8: - data = unicode(data, 'utf-8') - if data.count(''): - data = data.split('>', 1)[1] - if data.count('= '2.3.3' - assert base64 != None - user, passw = base64.decodestring(req.headers['Authorization'].split(' ')[1]).split(':') - realm = re.findall('realm="([^"]*)"', headers['WWW-Authenticate'])[0] - self.add_password(realm, host, user, passw) - retry = self.http_error_auth_reqed('www-authenticate', host, req, headers) - self.reset_retry_count() - return retry - except: - return self.http_error_default(req, fp, code, msg, headers) - -def _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers): - """URL, filename, or string --> stream - - This function lets you define parsers that take any input source - (URL, pathname to local or network file, or actual data as a string) - and deal with it in a uniform manner. Returned object is guaranteed - to have all the basic stdio read methods (read, readline, readlines). - Just .close() the object when you're done with it. - - If the etag argument is supplied, it will be used as the value of an - If-None-Match request header. - - If the modified argument is supplied, it must be a tuple of 9 integers - as returned by gmtime() in the standard Python time module. This MUST - be in GMT (Greenwich Mean Time). The formatted date/time will be used - as the value of an If-Modified-Since request header. - - If the agent argument is supplied, it will be used as the value of a - User-Agent request header. - - If the referrer argument is supplied, it will be used as the value of a - Referer[sic] request header. - - If handlers is supplied, it is a list of handlers used to build a - urllib2 opener. - """ - - if hasattr(url_file_stream_or_string, 'read'): - return url_file_stream_or_string - - if url_file_stream_or_string == '-': - return sys.stdin - - if urlparse.urlparse(url_file_stream_or_string)[0] in ('http', 'https', 'ftp'): - if not agent: - agent = USER_AGENT - # test for inline user:password for basic auth - auth = None - if base64: - urltype, rest = urllib.splittype(url_file_stream_or_string) - realhost, rest = urllib.splithost(rest) - if realhost: - user_passwd, realhost = urllib.splituser(realhost) - if user_passwd: - url_file_stream_or_string = '%s://%s%s' % (urltype, realhost, rest) - auth = base64.encodestring(user_passwd).strip() - # try to open with urllib2 (to use optional headers) - request = urllib2.Request(url_file_stream_or_string) - request.add_header('User-Agent', agent) - if etag: - request.add_header('If-None-Match', etag) - if modified: - # format into an RFC 1123-compliant timestamp. We can't use - # time.strftime() since the %a and %b directives can be affected - # by the current locale, but RFC 2616 states that dates must be - # in English. - short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] - months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] - request.add_header('If-Modified-Since', '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5])) - if referrer: - request.add_header('Referer', referrer) - if gzip and zlib: - request.add_header('Accept-encoding', 'gzip, deflate') - elif gzip: - request.add_header('Accept-encoding', 'gzip') - elif zlib: - request.add_header('Accept-encoding', 'deflate') - else: - request.add_header('Accept-encoding', '') - if auth: - request.add_header('Authorization', 'Basic %s' % auth) - if ACCEPT_HEADER: - request.add_header('Accept', ACCEPT_HEADER) - request.add_header('A-IM', 'feed') # RFC 3229 support - opener = apply(urllib2.build_opener, tuple([_FeedURLHandler()] + handlers)) - opener.addheaders = [] # RMK - must clear so we only send our custom User-Agent - try: - return opener.open(request) - finally: - opener.close() # JohnD - - # try to open with native open function (if url_file_stream_or_string is a filename) - try: - return open(url_file_stream_or_string) - except: - pass - - # treat url_file_stream_or_string as string - return _StringIO(str(url_file_stream_or_string)) - -_date_handlers = [] -def registerDateHandler(func): - '''Register a date handler function (takes string, returns 9-tuple date in GMT)''' - _date_handlers.insert(0, func) - -# ISO-8601 date parsing routines written by Fazal Majid. -# The ISO 8601 standard is very convoluted and irregular - a full ISO 8601 -# parser is beyond the scope of feedparser and would be a worthwhile addition -# to the Python library. -# A single regular expression cannot parse ISO 8601 date formats into groups -# as the standard is highly irregular (for instance is 030104 2003-01-04 or -# 0301-04-01), so we use templates instead. -# Please note the order in templates is significant because we need a -# greedy match. -_iso8601_tmpl = ['YYYY-?MM-?DD', 'YYYY-MM', 'YYYY-?OOO', - 'YY-?MM-?DD', 'YY-?OOO', 'YYYY', - '-YY-?MM', '-OOO', '-YY', - '--MM-?DD', '--MM', - '---DD', - 'CC', ''] -_iso8601_re = [ - tmpl.replace( - 'YYYY', r'(?P
', '')
- text = text.replace('
', '')
-
- text = wikimarkup.to_unicode(text)
- text = self.strip(text)
-
- text = super(WikiParser, self).parse(text)
- text = self.addImage(text, id)
- text = self.attachDoc(text, id)
- text = self.recordLink(text)
- text = self.viewRecordLink(text)
- text = self.addInternalLinks(text)
- #TODO : already implemented but we will implement it later after releasing the 5.0
- #text = self.addRss(text, id)
- return text
-
- def viewRecordLink(self, text):
- def record(path):
- record = path.group().replace('view:','').split("|")
- model = record[0]
- text = record[1].replace('\r','').strip()
- label = "View Record"
- if len(record) > 2:
- label = record[2]
- proxy = rpc.RPCProxy(model)
- ids = proxy.name_search(text, [], 'ilike', {})
- if len(ids):
- id = ids[0][0]
- else:
- try:
- id = int(text)
- except:
- id = 0
- return "[[/openerp/form/view?model=%s&id=%d | %s]]" % (model, id, label)
-
- bits = _view.sub(record, text)
- return bits
-
- def addRss(self, text, id):
- def addrss(path):
- rssurl = path.group().replace('rss:','')
- import rss.feedparser as feedparser
- data = feedparser.parse(rssurl)
- values = "') - mInPre = False - mLastSection = u'pre' - t = t[1:] - else: - # paragraph - if t.strip() == u'': - if paragraphStack: - output.append(paragraphStack + u'
') - paragraphStack = False - mLastSection = u'p' - else: - if mLastSection != u'p': - output.append(self.closeParagraph(mLastSection)) - mLastSection = u'' - mInPre = False - paragraphStack = u'' - else: - paragraphStack = u'
' - else: - if paragraphStack: - output.append(paragraphStack) - paragraphStack = False - mLastSection = u'p' - elif mLastSection != u'p': - output.append(self.closeParagraph(mLastSection) + u'
') - mLastSection = u'p' - mInPre = False - - # somewhere above we forget to get out of pre block (bug 785) - if preCloseMatch and mInPre: - mInPre = False - - if paragraphStack == False: - output.append(t + u"\n") - - while prefixLength: - output.append(self.closeList(pref2[prefixLength-1], mDTopen)) - mDTopen = False - prefixLength -= 1 - - if mLastSection != u'': - output.append(u'' + mLastSection + u'>') - mLastSection = u'' - - return ''.join(output) - -class Parser(BaseParser): - def __init__(self, show_toc=True): - super(Parser, self).__init__() - self.show_toc = show_toc - - def parse(self, text): - utf8 = isinstance(text, str) - text = to_unicode(text) - if text[-1:] != u'\n': - text = text + u'\n' - taggedNewline = True - else: - taggedNewline = False - - text = self.strip(text) - text = self.removeHtmlTags(text) - text = self.doTableStuff(text) - text = self.parseHorizontalRule(text) - text = self.checkTOC(text) - text = self.parseHeaders(text) - text = self.parseAllQuotes(text) - text = self.replaceExternalLinks(text) - if not self.show_toc and text.find(u"") == -1: - self.show_toc = False - text = self.formatHeadings(text, True) - text = self.unstrip(text) - text = self.fixtags(text) - text = self.doBlockLevels(text, True) - text = self.unstripNoWiki(text) - text = text.split(u'\n') - text = u'\n'.join(text) - if taggedNewline and text[-1:] == u'\n': - text = text[:-1] - if utf8: - return text.encode("utf-8") - return text - - def checkTOC(self, text): - if text.find(u"__NOTOC__") != -1: - text = text.replace(u"__NOTOC__", u"") - self.show_toc = False - if text.find(u"__TOC__") != -1: - text = text.replace(u"__TOC__", u"") - self.show_toc = True - return text - - def doTableStuff(self, text): - t = text.split(u"\n") - td = [] # Is currently a td tag open? - ltd = [] # Was it TD or TH? - tr = [] # Is currently a tr tag open? - ltr = [] # tr attributes - has_opened_tr = [] # Did this table open a
element? - indent_level = 0 # indent level of the table - - for k, x in zip(range(len(t)), t): - x = x.strip() - fc = x[0:1] - matches = _zomgPat.match(x) - if matches: - indent_level = len(matches.group(1)) - - attributes = self.unstripForHTML(matches.group(2)) - - t[k] = u' ' + z - if td.pop(): - z = u'' + l + u'>' + z - ltr.pop() - t[k] = z - tr.append(False) - td.append(False) - ltd.append(u'') - attributes = self.unstripForHTML(x) - ltr.append(self.fixTagAttributes(attributes, u'tr')) - elif u'|' == fc or u'!' == fc or u'|+' == x[0:2]: # Caption - # x is a table row - if u'|+' == x[0:2]: - fc = u'+' - x = x[1:] - x = x[1:] - if fc == u'!': - x = x.replace(u'!!', u'||') - # Split up multiple cells on the same line. - # FIXME: This can result in improper nesting of tags processed - # by earlier parser steps, but should avoid splitting up eg - # attribute values containing literal "||". - x = x.split(u'||') - - t[k] = u'' - - # Loop through each table cell - for theline in x: - z = '' - if fc != u'+': - tra = ltr.pop() - if not tr.pop(): - z = u''*indent_level - elif u'|-' == x[0:2]: # Allows for |------------- - x = x[1:] - while x != u'' and x[0:1] == '-': - x = x[1:] - z = '' - l = ltd.pop() - has_opened_tr.pop() - has_opened_tr.append(True) - if tr.pop(): - z = u'
- '*indent_level + u'
' - td.append(False) - ltd.append(u'') - tr.append(False) - ltr.append(u'') - has_opened_tr.append(False) - elif len(td) == 0: - pass - elif u'|}' == x[0:2]: - z = u"
" + x[2:] - l = ltd.pop() - if not has_opened_tr.pop(): - z = u"" + z - if tr.pop(): - z = u" " + z - if td.pop(): - z = u'' + l + u'>' + z - ltr.pop() - t[k] = z + u'\n' - tr.append(True) - ltr.append(u'') - has_opened_tr.pop() - has_opened_tr.append(True) - l = ltd.pop() - if td.pop(): - z = u'' + l + u'>' + z - if fc == u'|': - l = u'td' - elif fc == u'!': - l = u'th' - elif fc == u'+': - l = u'caption' - else: - l = u'' - ltd.append(l) - - #Cell parameters - y = theline.split(u'|', 1) - # Note that a '|' inside an invalid link should not - # be mistaken as delimiting cell parameters - if y[0].find(u'[[') != -1: - y = [theline] - - if len(y) == 1: - y = z + u"<" + l + u">" + y[0] - else: - attributes = self.unstripForHTML(y[0]) - y = z + u"<" + l + self.fixTagAttributes(attributes, l) + u">" + y[1] - - t[k] += y - td.append(True) - - while len(td) > 0: - l = ltd.pop() - if td.pop(): - t.append(u'') - if tr.pop(): - t.append(u' ') - if not has_opened_tr.pop(): - t.append(u'') - t.append(u'') - - text = u'\n'.join(t) - # special case: don't return empty table - if text == u"