attachment:moin-2.py of RadomirDopieralski/NewMoinMoinParser

Attachment 'moin-2.py'

   1 # -*- coding: iso-8859-1 -*-
   2 
   3 import re
   4 from MoinMoin import config
   5 from MoinMoin import wikiutil
   6 from emitter import DocEmitter
   7 from document import DocNode
   8 
   9 Dependencies = []
  10 
  11 class Parser:
  12     """
  13     The class to glue the DocParser and DocEmitter with the
  14     MoinMoin current API.
  15     """
  16     # Enable caching
  17     caching = 1
  18     Dependencies = []
  19 
  20     def __init__(self, raw, request, **kw):
  21         """Create a minimal Parser object with required attributes."""
  22         self.request = request
  23         self.form = request.form
  24         self.raw = raw
  25         
  26     def format(self, formatter):
  27         """Create and call the true parser and emitter."""
  28         document = DocParser(self.raw, self.request).parse()
  29         result = DocEmitter(document, formatter, self.request).emit()
  30         self.request.write(result)
  31 
  32 class DocParser:
  33     """
  34     Parse the raw text and create a document object
  35     that can be converted into output using DocEmitter.
  36     """
  37 
  38     # Table of character sequences to replace with entities:
  39 
  40     # The parsing rules
  41 
  42     wiki_word = r'[A-Z]\w+[A-Z]\w+'
  43 
  44     # For the inline elements
  45     smiley_rule = r'''(?P<smiley>
  46         (?<=[\n\s]) %s (?=[\s),;.?!\n])
  47     )''' % u'|'.join([re.escape(t) for t in config.smileys.keys()])
  48     wikiword_rule = r'''(?P<wikiword>
  49         (\/|(\.\.\/)+|^|(?<=[^!])) \b%s\b (\/%s)*
  50     )'''%(wiki_word, wiki_word)
  51     interwiki_rule = r'''\b(?P<interwiki>
  52         \w+:[^/][\w\/]+
  53     )\b'''
  54     interwiki2_rule = r'\[(?P<interwiki2>\w+:[^/]\S+)(\s+(?P<interwiki2_text>.+?))?\s*\]'
  55     attach_rule = r'\b(attachment|inline|drawing):(?P<attach>\S+?)\b'
  56     attach2_rule = r'\[(attachment|inline|drawing):(?P<attach2>\S+?)\s*(?P<attach2_text>.+?)\s*\]'
  57     anchor_rule = r'\[(?P<anchor>[#]\S+?)\s+(?P<anchor_text>.*?)\s*\]'
  58     url_rule = r'(?P<url>\[(?P<url_addr>(http|https|ftp|nntp|news|mailto|telnet|file|irc):[^\s\]]*)\s*(?P<url_text>.*?)\s*\])'
  59     link_rule = r'(?P<link>\["(?P<link_addr>.+?)"\s*(?P<link_text>.+?)?\s*\])'
  60     macro_rule = r'\[\[(?P<macro>.+?)(\((?P<macro_arg>.*?)\))?\]\]'
  61 
  62     char_rule = r'(?P<char>.)'
  63     code_rule = r'(?P<code>{{{(?P<code_text>.*?)}}})'
  64     code2_rule = r'(?P<code2>`(?P<code2_text>.*?)`)'
  65     emph_rule = r'''(?P<emph>'')'''
  66     strong_rule = r"(?P<strong>''')"
  67     
  68     # For the block elements
  69     rule_rule = r'(?P<rule>^----+$)'
  70     line_rule = r'(?P<line>^\s*$)'
  71     head_rule = r'(?P<head>^(?P<head_head>=+)\s*(?P<head_text>[^*].*?)\s*(?P=head_head)\s*$)'
  72     text_rule = r'(?P<text>.+)'
  73     ulist_rule = r'(?P<ulist>^(?P<ulist_head>\s+[\*])\s+(?P<ulist_text>.*?)$)'
  74     olist_rule = r'(?P<olist>^(?P<olist_head>\s+[0-9a-zA-Z#][\.\)])\s+(?P<olist_text>.*?)$)'
  75     dlist_rule = r'(?P<dlist>^\s+(?P<dlist_term>.+?)\s*::\s*)'
  76     table_rule = r'^\s*(?P<table>(\|\|.+?)+)\|\|\s*$'
  77     pre_rule = r'(?P<pre>^\s*{{{\s*(\n+\s*)?(?P<pre_text>([\#]!(?P<pre_kind>\S*).*$)?(.|\n)+?)(\n)?}}}\s*$)'
  78 
  79     def __init__(self, raw, request):
  80         self.request = request
  81         self._ = request.getText
  82         self.raw = raw
  83         self.root = DocNode("document", None)
  84         self.cur = self.root        # The most recent document node
  85         self.text = None            # The node to add inline characters to
  86         
  87         self.block_rules = '|'.join([
  88                 self.line_rule,
  89                 self.head_rule,
  90                 self.rule_rule,
  91                 self.pre_rule,
  92                 self.ulist_rule,
  93                 self.dlist_rule,
  94                 self.olist_rule,
  95                 self.table_rule,
  96                 self.text_rule,
  97         ])
  98         self.block_re = re.compile(self.block_rules, re.X|re.U|re.M)
  99         self.abbr_rule = self.get_abbr_rule()
 100         self.inline_rules = '|'.join([
 101                 self.link_rule,
 102                 self.anchor_rule,
 103                 self.url_rule,
 104                 self.macro_rule,
 105                 self.attach_rule,
 106                 self.attach2_rule,
 107                 self.wikiword_rule,
 108                 self.interwiki_rule,
 109                 self.interwiki2_rule,
 110                 self.code_rule,
 111                 self.code2_rule,
 112                 self.strong_rule,
 113                 self.emph_rule,
 114                 self.abbr_rule,
 115                 self.smiley_rule,
 116                 self.char_rule,
 117         ])
 118         self.inline_re = re.compile(self.inline_rules, re.X|re.U)
 119 
 120     def get_abbr_rule(self):
 121         abbr_dict_page = 'AbbreviationDict'
 122         if self.request.dicts.has_dict(abbr_dict_page):
 123             self.abbr_dict = self.request.dicts.dict(abbr_dict_page)
 124         else:
 125             self.abbr_dict = {}
 126         return r'''(^|<|(?<=[\s()'`"\[\]&-]))(?P<abbr>%s)(>|$|(?=[\s,.!?()'`":;\[\]&-]))'''%'|'.join([re.escape(k.strip()) for k in self.abbr_dict.keys()+['XXX']])
 127 
 128     # copied from wiki.py
 129     def _getTableAttrs(self, attrdef):
 130         # skip "|" and initial "<"
 131         while attrdef and attrdef[0] == "|":
 132             attrdef = attrdef[1:]
 133         if not attrdef or attrdef[0] != "<":
 134             return {}, ''
 135         attrdef = attrdef[1:]
 136 
 137         # extension for special table markup
 138         def table_extension(key, parser, attrs, wiki_parser=self):
 139             _ = wiki_parser._
 140             msg = ''
 141             if key[0] in "0123456789":
 142                 token = parser.get_token()
 143                 if token != '%':
 144                     wanted = '%'
 145                     msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % {
 146                         'wanted': wanted, 'key': key, 'token': token}
 147                 else:
 148                     try:
 149                         dummy = int(key)
 150                     except ValueError:
 151                         msg = _('Expected an integer "%(key)s" before "%(token)s"') % {
 152                             'key': key, 'token': token}
 153                     else:
 154                         attrs['width'] = '"%s%%"' % key
 155             elif key == '-':
 156                 arg = parser.get_token()
 157                 try:
 158                     dummy = int(arg)
 159                 except ValueError:
 160                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
 161                         'arg': arg, 'key': key}
 162                 else:
 163                     attrs['colspan'] = '"%s"' % arg
 164             elif key == '|':
 165                 arg = parser.get_token()
 166                 try:
 167                     dummy = int(arg)
 168                 except ValueError:
 169                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
 170                         'arg': arg, 'key': key}
 171                 else:
 172                     attrs['rowspan'] = '"%s"' % arg
 173             elif key == '(':
 174                 attrs['align'] = '"left"'
 175             elif key == ':':
 176                 attrs['align'] = '"center"'
 177             elif key == ')':
 178                 attrs['align'] = '"right"'
 179             elif key == '^':
 180                 attrs['valign'] = '"top"'
 181             elif key == 'v':
 182                 attrs['valign'] = '"bottom"'
 183             elif key == '#':
 184                 arg = parser.get_token()
 185                 try:
 186                     if len(arg) != 6: raise ValueError
 187                     dummy = int(arg, 16)
 188                 except ValueError:
 189                     msg = _('Expected a color value "%(arg)s" after "%(key)s"') % {
 190                         'arg': arg, 'key': key}
 191                 else:
 192                     attrs['bgcolor'] = '"#%s"' % arg
 193             elif key == '=':
 194                 arg = parser.get_token()
 195                 this_key = attrdef.split('=')[0]
 196                 attrs[this_key] = arg
 197             else:
 198                 msg = ""
 199             #print "key: %s\nattrs: %s" % (key, str(attrs))
 200             return msg
 201 
 202         # scan attributes
 203         attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension)
 204         if msg: msg = '<strong class="highlight">%s</strong>' % msg
 205         #print attr
 206         return attr, msg
 207     def _upto(self, node, kinds):
 208         """
 209         Look up the tree to the first occurence 
 210         of one of the listed kinds of nodes or root.
 211         Start at the node node.
 212         """
 213         if not node:
 214             return None
 215         while node.parent is not None and not node.kind in kinds:
 216             node = node.parent
 217         return node
 218 
 219     # The _*_repl methods called for matches in regexps
 220 
 221     def _macro_repl(self, groups):
 222         node = DocNode('macro', self.cur, groups.get('macro'))
 223         node.args = groups.get('macro_arg', None)
 224         self.text = None
 225     _macro_arg_repl = _macro_repl
 226 
 227     def _wikiword_repl(self, groups):
 228         page = groups.get('wikiword', '')
 229         node = DocNode('page_link', self.cur)
 230         node.content = page
 231         DocNode('text', node, page)
 232         self.text = None
 233         
 234     def _interwiki_repl(self, groups):
 235         page = groups.get('interwiki', 'self:') or groups.get('interwiki2', 'self:')
 236         text = groups.get('interwiki2_text')
 237         node = DocNode('interwiki_link', self.cur)
 238         node.content = page
 239         DocNode('text', node, text or page.split(':')[1])
 240         self.text = None
 241     _interwiki2_repl = _interwiki_repl
 242     _interwiki2_text_repl = _interwiki_repl
 243 
 244 
 245     def _anchor_repl(self, groups):
 246         addr = groups.get('anchor')
 247         text = groups.get('anchor_text', '') or addr
 248         node = DocNode('anchor_link', self.cur, addr)
 249         DocNode('text', node, text)
 250         self.text = None
 251     _anchor_text_repl = _anchor_repl
 252 
 253     def _url_repl(self, groups):
 254         addr = groups.get('url_addr', ':')
 255         proto = addr.split(':')[0]
 256         text = groups.get('url_text', '') or addr
 257         node = DocNode('external_link', self.cur)
 258         node.content = addr
 259         node.proto = proto
 260         DocNode('text', node, text)
 261         self.text = None
 262     _url_text_repl = _url_repl
 263     _url_addr_repl = _url_repl
 264 
 265     def _attach_repl(self, groups):
 266         addr = groups.get('attach') or groups.get('attach2') or ':'
 267         text = groups.get('attach2_text', ':')
 268         scheme, name = addr.split(':')
 269         if scheme=='inline':
 270             scheme='inlined_attachment'
 271         node = DocNode(scheme, self.cur, name)
 272         DocNode('text', node, text or addr)
 273         self.text = None
 274     _attach2_repl = _attach_repl
 275     _attach2_text_repl = _attach_repl
 276         
 277     def _link_repl(self, groups):
 278         """Handle all kinds of links, including macros."""
 279         addr = groups.get('link_addr', '')
 280         text = (groups.get('link_text', '') or '').strip()
 281         node = DocNode('page_link', self.cur, addr)
 282         DocNode('text', node, text or node.content)
 283         self.text = None
 284     _link_addr_repl = _link_repl
 285     _link_text_repl = _link_repl
 286 
 287     def _rule_repl(self, groups):
 288         self.cur = self._upto(self.cur, ('document','section','blockquote'))
 289         DocNode('rule', self.cur)
 290 
 291     def _table_repl(self, groups):
 292         row = groups.get('table', '||')
 293         attrs, attrerr = self._getTableAttrs(row)
 294         self.cur = self._upto(self.cur, ('table', 'document', 'section', 'blockquote'))
 295         if self.cur.kind!='table':
 296             self.cur = DocNode('table', self.cur)
 297             self.cur.attrs = attrs
 298         tb = self.cur
 299         tr = DocNode('table_row', tb)
 300         tr.attrs = attrs
 301         for cell in row.split('||')[1:]:
 302             if cell and cell[0]=='<':
 303                 attrs, attrerr = self._getTableAttrs(cell)
 304                 parts = cell[1:].split('>')
 305                 args = parts[0]
 306                 cell = '>'.join(parts[1:])
 307             else:
 308                 args = None
 309                 attrs = {}
 310                 attrerr =''
 311             self.cur = DocNode('table_cell', tr)
 312             self.cur.attrs = attrs
 313             self.text = None
 314             self.parse_inline(cell)
 315         self.cur = tb
 316         self.text = None
 317 
 318     def _dlist_repl(self, groups):
 319         self.cur = self._upto(self.cur, ('definition_list', 'document', 'section', 'blockquote'))
 320         if self.cur.kind!='definition_list':
 321             self.cur = DocNode('definition_list', self.cur)
 322         dl = self.cur
 323         self.cur = DocNode('term', dl)
 324         self.text = None
 325         self.parse_inline(groups.get('dlist_term', u''))
 326         self.cur = DocNode('definition', dl)
 327         self.text = None
 328     _dlist_term_repl = _dlist_repl
 329         
 330     def _ulist_repl(self, groups):
 331         bullet = groups.get('ulist_head', '')
 332         # Find a list with the same bullet up the tree
 333         lst = self.cur
 334         while ( lst and 
 335                 not (lst.kind == 'bullet_list' and 
 336                 lst.bullet==bullet) and 
 337                 not lst.kind in ('document','section','blockquote')
 338             ):
 339             lst = lst.parent
 340         if lst and lst.kind=='bullet_list':
 341             self.cur = lst
 342         else:
 343             # Create a new level of list
 344             self.cur = self._upto(self.cur, ('list_item', 'document', 'section', 'blockquote'))
 345             self.cur = DocNode('bullet_list', self.cur)
 346             self.cur.bullet = bullet
 347         self.cur = DocNode('list_item', self.cur)
 348         self.parse_inline(groups.get('ulist_text', ''))
 349         self.text = None
 350     _ulist_text_repl=_ulist_repl
 351     _ulist_head_repl=_ulist_repl
 352 
 353     def _olist_repl(self, groups):
 354         bullet = groups.get('olist_head', '')
 355         # Normalize the list number
 356         bullet = re.sub(r'[0-9]', '0', bullet)
 357         bullet = re.sub(r'[a-z]', 'a', bullet)
 358         bullet = re.sub(r'[A-Z]', 'A', bullet)
 359         # Find a list with the same bullet up the tree
 360         lst = self.cur
 361         while lst and not (lst.kind == 'number_list' and lst.bullet==bullet) and not lst.kind in ('document','section','blockquote'):
 362             lst = lst.parent
 363         if lst and lst.kind=='number_list':
 364             self.cur = lst
 365         else:
 366             # Create a new level of list
 367             self.cur = self._upto(self.cur, ('list_item', 'document', 'section', 'blockquote'))
 368             self.cur = DocNode('number_list', self.cur)
 369             self.cur.bullet = bullet
 370         self.cur = DocNode('list_item', self.cur)
 371         self.parse_inline(groups.get('olist_text', ''))
 372         self.text = None
 373     _olist_text_repl=_olist_repl
 374     _olist_head_repl=_olist_repl
 375 
 376     def _head_repl(self, groups):
 377         self.cur = self._upto(self.cur, ('document','section', 'blockquote'))
 378         node = DocNode('header', self.cur, groups.get('head_text', '').strip())
 379         node.level = len(groups.get('head_head', ' '))
 380     _head_head_repl = _head_repl
 381     _head_text_repl = _head_repl
 382    
 383     def _text_repl(self, groups):
 384         # No text allowed in those nodes
 385         if self.cur.kind in ('number_list', 'bullet_list', 'definition_list', 'table', 'table_row'):
 386             self.cur = self._upto(self.cur, ('document','section', 'blockquote'))
 387         # Those nodes can have text, but only in paragraphs
 388         if self.cur.kind in ('document','section','blockquote'):
 389             self.cur = DocNode('paragraph', self.cur)
 390         self.parse_inline(groups.get('text', '')+' ')
 391         self.text = None
 392 
 393     def _pre_repl(self, groups):
 394         self.cur = self._upto(self.cur, ('document','section','blockquote'))
 395         kind = groups.get('pre_kind', None)
 396         node = DocNode('preformatted', self.cur, groups.get('pre_text', u''))
 397         node.sect = kind or ''
 398         self.text = None
 399     _pre_text_repl = _pre_repl
 400     _pre_head_repl = _pre_repl
 401     _pre_kind_repl = _pre_repl
 402     
 403     def _line_repl(self, groups):
 404         self.cur = self._upto(self.cur, ('document','section','blockquote'))
 405 
 406     def _code_repl(self, groups):
 407         text = groups.get('code_text', u'') or groups.get('code2_text', u'')
 408         DocNode('code', self.cur, text)
 409         self.text = None
 410     _code_text_repl = _code_repl
 411     _code2_text_repl = _code_repl
 412     _code2_repl = _code_repl
 413 
 414     def _emph_repl(self, groups):
 415         last = self._upto(self.cur, ('emphasis','document'))
 416         if last.kind=='emphasis':
 417             self.cur = last.parent or self.root
 418         else:
 419             self.cur = DocNode('emphasis', self.cur)
 420         self.text = None
 421     _emph_open_repl = _emph_repl
 422     _emph_close_repl = _emph_repl
 423   
 424     def _strong_repl(self, groups):
 425         last = self._upto(self.cur, ('strong','document'))
 426         if last.kind=='strong':
 427             self.cur = last.parent or self.root
 428         else:
 429             self.cur = DocNode('strong', self.cur)
 430         self.text = None
 431     _strong_open_repl = _strong_repl
 432     _strong_close_repl = _strong_repl
 433   
 434     def _smiley_repl(self, groups):
 435         word = groups.get('smiley', '')
 436         DocNode('smiley', self.cur, word)
 437         
 438     def _abbr_repl(self, groups):
 439         abbr = groups.get('abbr', '')
 440         node = DocNode('abbr', self.cur, abbr)
 441         node.title = self.abbr_dict.get(abbr, '???')
 442         self.text = None
 443   
 444     def _char_repl(self, groups):
 445         if self.text is None:
 446             self.text = DocNode('text', self.cur, u'')
 447         self.text.content += groups.get('char', u'')
 448  
 449     def _replace(self, match):
 450         """Invoke appropriate _*_repl method. Called for every matched group."""
 451         groups = match.groupdict()
 452         for name,text in groups.iteritems():
 453             if text is not None:
 454                 replace = getattr(self, '_%s_repl'%name)
 455                 replace(groups)
 456                 return
 457 
 458     def parse_inline(self, raw):
 459         """Recognize inline elements inside blocks."""
 460         re.sub(self.inline_re, self._replace, raw)
 461     
 462     def parse_block(self, raw):
 463         """Recognize block elements."""
 464         re.sub(self.block_re, self._replace, raw)
 465     
 466     def parse(self):
 467         self.parse_block(self.raw)
 468         return self.root
Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
You are not allowed to attach a file to this page.
MoinMoin: attachment:moin-2.py of RadomirDopieralski/NewMoinMoinParser

Attachment 'moin-2.py'

Attached Files