Attachment 'simple.py'
Download 1 # -*- coding: iso-8859-1 -*-
2
3 import re
4 from MoinMoin import config, wikiutil
5
6 from document import DocNode
7 from emitter import DocEmitter
8
9 Dependencies = []
10
11 class Parser:
12 """
13 The class to glue the DocParser and DocEmitter with the
14 MoinMoin current API.
15 """
16 # Enable caching
17 caching = 1
18 Dependencies = []
19
20 def __init__(self, raw, request, **kw):
21 """Create a minimal Parser object with required attributes."""
22 self.request = request
23 self.form = request.form
24 self.raw = raw
25
26 def format(self, formatter):
27 """Create and call the true parser and emitter."""
28 document = DocParser(self.raw, self.request).parse()
29 result = DocEmitter(document, formatter, self.request).emit()
30 self.request.write(result)
31
32 class DocParser:
33 """
34 Parse the raw text and create a document object
35 that can be converted into output using SheepEmitter.
36 """
37
38 # Table of character sequences to replace with entities:
39 typo_tab = {
40 ' -- ': u' \u2013 ',
41 ' --- ': u' \u2014 ',
42 '...': u'\u2026',
43 '(c)': u'\u00A9',
44 '(C)': u'\u00A9',
45 '(R)': u'\u00AE',
46 '-8<-': u'\u2702',
47 '[o ': u'\u260e ',
48 '--> ': u'\u261e ',
49 '8=X ': u'\u2620 ',
50 'd~': u'\u266a',
51 '~o': u'\u00B0',
52 '[ ]': u'\u2610',
53 '[v]': u'\u2611',
54 '[x]': u'\u2612',
55 }
56
57 # The parsing rules
58
59 # For the inline elements
60
61 inline_tab = {
62 'typo': r'%s' % '|'.join([re.escape(t) for t in typo_tab.keys()]),
63 'smiley': r'(?<=[\n\s])%s(?=[\s),;.?!\n])' % '|'.join([re.escape(t) for t in config.smileys.keys()]),
64 'link': r'\[\[(?P<link_target>.+?)\s*(\|\s*(?P<link_text>.+?)\s*)?]]',
65 'char': r'.',
66 'code': r'(?P<code_head>`+)(?P<code_text>.*?)(?P=code_head)',
67 'quote': r"(?P<quote_open>(?<=\s),,|^,,)|(?P<quote_close>''(?=['\s.,;!?\)\]\}]|$))",
68 'emph': r'(?P<emph_open>(?<=\s)\*|^\*)|(?P<emph_close>\*(?=[\s.,;!?\)\]\}]|$))',
69 'strong': r'(?P<strong_open>(?<=\s)\*\*|^\*\*)|(?P<strong_close>\*\*(?=[\s.,;!?\)\]\}]|$))',
70 'strongemph': r'(?P<strongemph_open>(?<=\s)\*\*\*|^\*\*\*)|(?P<strongemph_close>\*\*\*(?=[\n\s.,;!?\)\]\}]|$))'
71 }
72
73
74 typo_rule = r'(?P<typo>%s)' % '|'.join([re.escape(t) for t in typo_tab.keys()])
75 smiley_rule = r'(?P<smiley>(?<=[\n\s])%s(?=[\s),;.?!\n]))' % u'|'.join([re.escape(t) for t in config.smileys.keys()])
76 link_rule = r'(?P<link>\[\[(?P<link_target>.+?)\s*(\|\s*(?P<link_text>.+?)\s*)?]])'
77 char_rule = r'(?P<char>.)'
78 code_rule = r'(?P<code>(?P<code_head>`+)(?P<code_text>.*?)(?P=code_head))'
79 quote_rule = r'''(?P<quote>(?P<quote_open>(?<=\s),,|^,,)|(?P<quote_close>''(?=['\s.,;!?\)\]\}]|$)))'''
80 emph_rule = r'(?P<emph>(?P<emph_open>(?<=\s)\*|^\*)|(?P<emph_close>\*(?=[\s.,;!?\)\]\}]|$)))'
81 strong_rule = r'(?P<strong>(?P<strong_open>(?<=\s)\*\*|^\*\*)|(?P<strong_close>\*\*(?=[\s.,;!?\)\]\}]|$)))'
82 strongemph_rule = r'(?P<strongemph>(?P<strongemph_open>(?<=\s)\*\*\*|^\*\*\*)|(?P<strongemph_close>\*\*\*(?=[\n\s.,;!?\)\]\}]|$)))'
83 # For the block elements
84 rule_rule = r'(?P<rule>^----+$)'
85 line_rule = r'(?P<line>^\s*$)'
86 head_rule = r'(?P<head>^(?P<head_head>\*+)\s+(?P<head_text>[^*].*?)\s+(?P=head_head)\s*$)'
87 text_rule = r'(?P<text>.+)'
88 list_rule = r'(?P<list>^(?P<list_head>\s+[\*\+-])\s+(?P<list_text>.*?)$)'
89 blockquote_rule = r'(?P<blockquote>^(?P<blockquote_head>\"\"+)\s*$(?P<blockquote_text>(.|\n)+?)^(?P=blockquote_head)\s*$)'
90 pre_rule = r'(?P<pre>^(?P<pre_head>``+)\s*$(\n)?(?P<pre_text>(^[\#]!(?P<pre_kind>.*?)(\s+.*)?$)?(.|\n)+?)(\n)?^(?P=pre_head)\s*$)'
91 sect_rule = r'''(?P<sect>^(?P<sect_head>\*\*+)\s*$
92 (\n^[\#]class\s*(?P<sect_kind>.*)\s*$)?
93 (\n^[\#]style\s*(?P<sect_style>.*)\s*$)?
94 (?P<sect_text>(.|\n)+?)^(?P=sect_head)\s*$)'''
95 # For the link targets:
96 extern_rule = r'(?P<extern_addr>(?P<extern_proto>http|https|ftp|nntp|news|mailto|telnet|file|irc):.*)'
97 attach_rule = r'(?P<attach_scheme>attachment|inline|drawing|image|figure):(?P<attach_addr>.*)'
98 inter_rule = r'(?P<inter_wiki>[A-Z][a-zA-Z]+):(?P<inter_page>.*)'
99 #u'|'.join(wikimacro.getNames(config))
100 macro_rule = r'(?P<macro_name>%s)\((-|(?P<macro_param>.*))\)' % '\w+'
101 page_rule = r'(?P<page_name>.*)'
102
103 def __init__(self, raw, request):
104 self.request = request
105 self.raw = raw
106 self.root = DocNode('document', None)
107 self.cur = self.root # The most recent document node
108 self.text = None # The node to add inline characters to
109 abbr_dict_page = 'AbbreviationDict'
110 if self.request.dicts.has_dict(abbr_dict_page):
111 self.abbr_dict = self.request.dicts.dict(abbr_dict_page)
112 else:
113 self.abbr_dict = {}
114 # The 'XXX' is there because an empty rule would always match
115 self.abbr_rule = r'''
116 (^|<|(?<=[\s()'`"\[\]&-]))
117 (?P<abbr>%s)
118 (>|$|(?=[\s,.!?()'`":;\[\]&-]))
119 ''' % '|'.join([re.escape(k.strip()) for k in self.abbr_dict.keys()+['XXX']])
120 self.block_rules = '|'.join([
121 self.line_rule,
122 self.sect_rule,
123 self.head_rule,
124 self.rule_rule,
125 self.pre_rule,
126 self.blockquote_rule,
127 self.list_rule,
128 self.text_rule,
129 ])
130 self.block_re = re.compile(self.block_rules, re.X|re.U|re.M)
131 self.addr_rules = r'|'.join([
132 self.macro_rule,
133 self.extern_rule,
134 self.attach_rule,
135 self.inter_rule,
136 self.page_rule,
137 ])
138 self.addr_re = re.compile(self.addr_rules, re.X|re.U)
139 self.inline_rules = r'|'.join([
140 self._get_inline_rule('link'),
141 self._get_inline_rule('code'),
142 self._get_inline_rule('strongemph'),
143 self._get_inline_rule('strong'),
144 self._get_inline_rule('emph'),
145 self._get_inline_rule('quote'),
146 self.abbr_rule,
147 self._get_inline_rule('smiley'),
148 self._get_inline_rule('typo'),
149 self._get_inline_rule('char'),
150 ])
151 self.inline_re = re.compile(self.inline_rules, re.X|re.U)
152
153 def _get_inline_rule(self, rule):
154 return r'(?P<%s>%s)' % (rule, self.inline_tab.get(rule, ''))
155
156 def _get_block_rule(self, rule):
157 return r'(?P<%s>%s)' % (rule, self.block_tab.get(rule, ''))
158
159 def _upto(self, node, kinds):
160 """
161 Look up the tree to the first occurence
162 of one of the listed kinds of nodes or root.
163 Start at the node node.
164 """
165 while node.parent is not None and not node.kind in kinds:
166 node = node.parent
167 return node
168
169 # The _*_repl methods called for matches in regexps
170
171 def _link_repl(self, groups):
172 """Handle all kinds of links, including macros."""
173 target = groups.get('link_target', '')
174 text = (groups.get('link_text', '') or '').strip()
175 m = self.addr_re.match(target)
176 if m:
177 if m.group('page_name'):
178 node = DocNode('page_link', self.cur)
179 node.content = m.group('page_name')
180 DocNode('text', node, text or node.content)
181 elif m.group('extern_addr'):
182 node = DocNode('external_link', self.cur)
183 node.content = m.group('extern_addr')
184 node.proto = m.group('extern_proto')
185 DocNode('text', node, text or node.content)
186 elif m.group('inter_wiki'):
187 node = DocNode('interwiki_link', self.cur)
188 node.content = '%s:%s'%(m.group('inter_wiki'), m.group('inter_page'))
189 DocNode('text', node, text or m.group('inter_page'))
190 elif m.group('attach_scheme'):
191 scheme = m.group('attach_scheme')
192 if scheme=='inline':
193 scheme='inlined_attachment'
194 node = DocNode(scheme, self.cur, m.group('attach_addr'))
195 DocNode('text', node, text or node.content)
196 elif m.group('macro_name'):
197 node = DocNode('macro', self.cur, m.group('macro_name'))
198 node.args = m.group('macro_param')
199 else:
200 node = DocNode('bad_link', self.cur)
201 node.content = target
202 DocNode('text', node, text or target)
203 self.text = None
204 _link_target_repl = _link_repl
205 _link_text_repl = _link_repl
206
207 def _rule_repl(self, groups):
208 self.cur = self._upto(self.cur, ('document','section','blockquote'))
209 DocNode('rule', self.cur)
210
211 def _list_repl(self, groups):
212 bullet = groups.get('list_head', '')
213 # Find a list with the same bullet up the tree
214 lst = self.cur
215 while lst and not (lst.kind == 'bullet_list' and lst.bullet==bullet) and not lst.kind in ('document','section','blockquote'):
216 lst = lst.parent
217 if lst and lst.kind=='bullet_list':
218 self.cur = lst
219 else:
220 # Create a new level of list
221 self.cur = self._upto(self.cur, ('list_item', 'document', 'section', 'blockquote'))
222 self.cur = DocNode('bullet_list', self.cur)
223 self.cur.bullet = bullet
224 self.cur = DocNode('list_item', self.cur)
225 self.parse_inline(groups.get('list_text', ''))
226 self.text = None
227 _list_text_repl=_list_repl
228 _list_head_repl=_list_repl
229
230 def _head_repl(self, groups):
231 self.cur = self._upto(self.cur, ('document','section', 'blockquote'))
232 node = DocNode('header', self.cur, groups.get('head_text', '').strip())
233 node.level = len(groups.get('head_head', ' '))
234 _head_head_repl = _head_repl
235 _head_text_repl = _head_repl
236
237 def _text_repl(self, groups):
238 if self.cur.kind in ('document','section','blockquote'):
239 self.cur = DocNode('paragraph', self.cur)
240 self.parse_inline(groups.get('text', '')+' ')
241 self.text = None
242
243 def _sect_repl(self, groups):
244 kind = groups.get('sect_kind', None)
245 node = DocNode('section', self.cur)
246 node.sect = kind or ''
247 node.style = groups.get('sect_style', None) or ''
248 savecur = self.cur
249 self.cur = node
250 self.text = None
251 self.parse_block(groups.get('sect_text', u''))
252 self.cur = savecur
253 self.text = None
254 _sect_text_repl = _sect_repl
255 _sect_head_repl = _sect_repl
256 _sect_kind_repl = _sect_repl
257 _sect_style_repl = _sect_repl
258
259 def _pre_repl(self, groups):
260 self.cur = self._upto(self.cur, ('document','section','blockquote'))
261 kind = groups.get('pre_kind', None)
262 node = DocNode('preformatted', self.cur, groups.get('pre_text', u''))
263 node.sect = kind or ''
264 self.text = None
265 _pre_text_repl = _pre_repl
266 _pre_head_repl = _pre_repl
267 _pre_kind_repl = _pre_repl
268
269 def _blockquote_repl(self, groups):
270 savecur = self._upto(self.cur, ('document','section', 'blockquote'))
271 self.cur = DocNode('blockquote', self.cur)
272 text = groups.get('blockquote_text', u'')
273 self.text = None
274 self.parse_block(text)
275 self.cur = savecur
276 self.text = None
277 _blockquote_text_repl = _blockquote_repl
278 _blockquote_head_repl = _blockquote_repl
279
280 def _line_repl(self, groups):
281 self.cur = self._upto(self.cur, ('document','section','blockquote'))
282
283 def _code_repl(self, groups):
284 DocNode('code', self.cur, groups.get('code_text', u'').strip())
285 self.text = None
286 _code_text_repl = _code_repl
287 _code_head_repl = _code_repl
288
289 def _emph_repl(self, groups):
290 if groups.get('emph_open', None) is not None and self.cur.kind != 'emphasis':
291 self.cur = DocNode('emphasis', self.cur)
292 self.text = None
293 elif self.cur.kind == 'emphasis':
294 self.cur = self._upto(self.cur, ('emphasis',)).parent
295 self.text = None
296 _emph_open_repl = _emph_repl
297 _emph_close_repl = _emph_repl
298
299 def _quote_repl(self, groups):
300 if groups.get('quote_open', None) is not None:
301 self.cur = DocNode('quote', self.cur)
302 self.text = None
303 else:
304 self.cur = self._upto(self.cur, ('quote', 'section', 'paragraph', 'list_item'))
305 if self.cur.kind == 'quote':
306 self.cur = self.cur.parent or self.root
307 self.text = None
308 else:
309 if self.text is None:
310 self.text = DocNode('text', self.cur, u'')
311 self.text.content += groups.get('quote', '')
312 _quote_open_repl = _quote_repl
313 _quote_close_repl = _quote_repl
314
315
316 def _strong_repl(self, groups):
317 if groups.get('strong_open', None) is not None and not self.cur.kind in ('strong', 'emphasis'):
318 self.cur = DocNode('strong', self.cur)
319 self.text = None
320 elif self.cur.kind == 'strong':
321 self.cur = self._upto(self.cur, ('strong',)).parent
322 self.text = None
323 _strong_open_repl = _strong_repl
324 _strong_close_repl = _strong_repl
325
326 def _strongemph_repl(self, groups):
327 if groups.get('strongemph_open', None) is not None and not self.cur.kind in ('strong', 'emphasis'):
328 self.cur = DocNode('strong', self.cur)
329 self.cur = DocNode('emphasis', self.cur)
330 self.text = None
331 else:
332 if self.cur.kind == 'emphasis':
333 self.cur = self._upto(self.cur, ('emphasis',)).parent
334 if self.cur.kind == 'strong':
335 self.cur = self._upto(self.cur, ('strong',)).parent
336 self.text = None
337 _strongemph_open_repl = _strongemph_repl
338 _strongemph_close_repl = _strongemph_repl
339
340
341 def _smiley_repl(self, groups):
342 word = groups.get('smiley', '')
343 DocNode('smiley', self.cur, word)
344
345 def _typo_repl(self, groups):
346 if self.text is None:
347 self.text = DocNode('text', self.cur, u'')
348 typo = groups.get('typo', u'')
349 char = self.typo_tab.get(typo, typo)
350 self.text.content += char
351
352 def _abbr_repl(self, groups):
353 abbr = groups.get('abbr', '')
354 node = DocNode('abbr', self.cur, abbr)
355 node.title = self.abbr_dict.get(abbr, '???')
356 self.text = None
357
358 def _char_repl(self, groups):
359 if self.text is None:
360 self.text = DocNode('text', self.cur, u'')
361 self.text.content += groups.get('char', u'')
362
363 def _replace(self, match):
364 """Invoke appropriate _*_repl method. Called for every matched group."""
365 groups = match.groupdict()
366 for name,text in groups.iteritems():
367 if text is not None:
368 replace = getattr(self, '_%s_repl'%name)
369 replace(groups)
370 return
371
372 def parse_inline(self, raw):
373 """Recognize inline elements inside blocks."""
374 re.sub(self.inline_re, self._replace, raw)
375
376 def parse_block(self, raw):
377 """Recognize block elements."""
378 re.sub(self.block_re, self._replace, raw)
379
380 def parse(self):
381 self.parse_block(self.raw)
382 return self.root
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.