Attachment 'media.py'
Download 1 # -*- coding: iso-8859-1 -*-
2
3 # media.py --- Plugin parser for MoinMoin parsing MediaWiki documents
4
5 # $Id: media.py,v 1.16 2006/04/20 19:35:38 stefan Exp $
6
7 # Copyright 2005 Stefan Merten <smerten@oekonux.de>
8
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
13
14 # This program is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22
23 ###############################################################################
24 ###############################################################################
25
26 import re
27
28 from MoinMoin.parser import wiki
29
30 ###############################################################################
31 ###############################################################################
32
33 # Because the syntaxes are rather similar most stuff is inherited from the
34 # standard wiki parser. Some functions are copied and modified.
35 class Parser(wiki.Parser):
36
37 # the big, fat, ugly one ;)
38 formatting_rules = ur"""(?:(?P<emph_ibb>'''''(?=[^']+'''))
39 (?P<emph_ibi>'''''(?=[^']+''))
40 (?P<emph_ib_or_bi>'{5}(?=[^']))
41 (?P<emph>'{2,3})
42 (?P<b></?b>)
43 (?P<i></?i>)
44 (?P<u></?u>)
45 (?P<sup><sup>.*?</sup>)
46 (?P<sub><sub>.*?</sub>)
47 (?P<tt><tt>.*?</tt>)
48 (?P<pre></?(pre|nowiki)>)
49 (?P<small></?small>)
50 (?P<br><br\s*/?>)
51 (?P<comment><!--.*?-->)
52 (?P<rule>-{4,})
53 (?P<macro>\[\[(%%(macronames)s)\(.*?\)\]\]))
54 (?P<media_anchor>\[\[#\w+\]\])
55 (?P<ml>^(\*+#+[*#]*|#+\*+[*#]*))
56 (?P<ol>^#+)
57 (?P<dl>^;+[^:]+:)
58 (?P<li>^\*+)
59 (?P<ind>^:+)
60 (?P<media_heading>^\s*(?P<hmarker>=+).*(?P=hmarker)\s*$)
61 (?P<media_bracket>\[\[.*?\]\])
62 (?P<url_bracket>\[((%(url)s)\:|#|\:)[^\s\]]+(\s[^\]]+)?\])
63 (?P<url>%(url_rule)s)
64 (?P<email>[-\w._+]+\@[\w-]+(\.[\w-]+)+)
65 (?P<media_entity>&\w+;)
66 (?P<ent>[<>&])""" % {
67 'url': wiki.Parser.url_pattern,
68 'url_rule': wiki.Parser.url_rule,
69 }
70 #(?P<processor>(\{\{\{(#!.*|\s*$)))
71
72 # Map languages to page name and label of discussion pages
73 lang2nm_spc = { 'en': ( u'Talk', u'Discussion', u'User', u'Image',
74 u'Media', ),
75 'de': ( u'Diskussion', u'Diskussion', u'Benutzer', u'Bild',
76 u'Media', ),
77 # Fallback for unknown languages
78 '': ( u'Talk', u'Discussion', u'User', u'Image',
79 u'Media', ),
80 }
81
82 ###########################################################################
83
84 def _b_repl(self, match):
85 """Handle <b>."""
86 # This is not really correct because it mixes with ''' notation
87 return self._emph_repl("'''")
88
89 ###########################################################################
90
91 def _i_repl(self, match):
92 """Handle <i>."""
93 # This is not really correct because it mixes with '' notation
94 return self._emph_repl("''")
95
96 ###########################################################################
97
98 def _media_bracket_repl(self, word):
99 """Handle double bracket links."""
100 content = word[2:-2]
101 if content:
102 words = content.split("|", 1)
103 wikiname = words[0]
104 if len(words) > 1:
105 text = words[1]
106 else:
107 text = None
108
109 # Remap link as in MediaWiki
110 wikiname = re.sub("\s+", "_", wikiname.strip())
111 wikiname = wikiname[0].upper() + wikiname[1:]
112
113 # Handle special namespaces
114 if re.search("(?i)_" + self.talk_name + ":", wikiname):
115 wikiname = re.sub("(?i)_" + self.talk_name + ":", ":",
116 wikiname) + self.talk_page
117 elif re.search("(?i)^" + self.talk_name + ":", wikiname):
118 wikiname = re.sub("(?i)^" + self.talk_name + ":", "",
119 wikiname) + self.talk_page
120
121 wikiname = re.sub("(?i)^" + self.user_name + ":", "", wikiname)
122
123 att_re = "(?i)^(" + self.media_name + "|" + self.image_name + "):"
124 if re.search(att_re, wikiname):
125 words[0] = re.sub(att_re, "attachment:", wikiname)
126 return self.attachment(words)
127
128 # Map remaining namespaces to top level pages
129 wikiname = re.sub("^(\w+):", r"\1/", wikiname)
130
131 return self._word_repl(wikiname, text)
132 else:
133 return word
134
135 ###########################################################################
136
137 def _media_heading_repl(self, word):
138 """Handle headings."""
139 # The easiest thing to do is to convert the markup
140 asMoin = word.strip()
141 asMoin = re.sub("^=(=+)", r"\1 ", asMoin)
142 asMoin = re.sub("(=+)=$", r" \1", asMoin)
143 return self._heading_repl(asMoin)
144
145 ###########################################################################
146
147 def _list(self, result, style=None):
148 """Handle all lists."""
149 self._close_item(result)
150 self.in_li = 1
151 css_class = ''
152 if self.line_was_empty and not self.first_list_item:
153 css_class = 'gap'
154 result.append(self.formatter.listitem(1, css_class=css_class,
155 style=style))
156 return ''.join(result)
157
158 ###########################################################################
159
160 def _li_repl(self, match):
161 """Handle bullet lists."""
162 result = [ self._indent_to(len(match), "ul", None, None), ]
163 return self._list(result)
164
165 ###########################################################################
166
167 def _ol_repl(self, match):
168 """Handle numbered lists."""
169 result = [ self._indent_to(len(match), "ol", "1", None), ]
170 return self._list(result)
171
172 ###########################################################################
173
174 def _ml_repl(self, match):
175 """Handle mixed lists."""
176 if match[-1] == "*":
177 return self._li_repl(match)
178 else:
179 return self._ol_repl(match)
180
181 ###########################################################################
182
183 def _ind_repl(self, match):
184 """Handle indented blocks."""
185 result = [ self._indent_to(len(match), "ul", None, None), ]
186 return self._list(result, style="list-style-type:none")
187
188 ###########################################################################
189
190 def _dl_repl(self, match):
191 """Handle definition lists."""
192 prefix = re.search("^;+", match).group(0)
193 term = match[len(prefix):-1].strip()
194 result = [ self._indent_to(len(prefix), "dl", None, None), ]
195 self._close_item(result)
196 self.in_dd = 1
197 result.extend([
198 self.formatter.definition_term(1),
199 self.formatter.text(term),
200 self.formatter.definition_term(0),
201 self.formatter.definition_desc(1),
202 ])
203 return ''.join(result)
204
205 ###########################################################################
206
207 def _sup_repl(self, word):
208 """Handle superscript."""
209 return self.formatter.sup(1) + \
210 self.formatter.text(word[5:-6]) + \
211 self.formatter.sup(0)
212
213 ###########################################################################
214
215 def _sub_repl(self, word):
216 """Handle subscript."""
217 return self.formatter.sub(1) + \
218 self.formatter.text(word[5:-6]) + \
219 self.formatter.sub(0)
220
221 ###########################################################################
222
223 def _small_repl(self, word):
224 """Handle small."""
225 self.is_small = not self.is_small
226 return self.formatter.small(self.is_small)
227
228 ###########################################################################
229
230 def _tt_repl(self, word):
231 """Handle inline code."""
232 return self.formatter.code(1) + \
233 self.formatter.text(word[4:-5]) + \
234 self.formatter.code(0)
235
236 ###########################################################################
237
238 def _pre_repl(self, word):
239 """Handle code displays."""
240 if word[1] != '/' and not self.in_pre:
241 self.in_pre = 3
242 return self._closeP() + self.formatter.preformatted(self.in_pre)
243 elif word[1] == '/' and self.in_pre:
244 self.in_pre = 0
245 self.inhibit_p = 0
246 return self.formatter.preformatted(self.in_pre)
247 return word
248
249 ###########################################################################
250
251 def _media_entity_repl(self, word):
252 """Handle inlined entity."""
253 return self.formatter.rawHTML(word)
254
255 ###########################################################################
256
257 def _br_repl(self, word):
258 """Handle inlined entity."""
259 return self.formatter.linebreak(0)
260
261 ###########################################################################
262
263 def _media_anchor_repl(self, word):
264 """Handle inlined entity."""
265 return self.formatter.anchordef(word[3:-2])
266
267 ###########################################################################
268
269 # This is copied from the super class and modified
270 def format(self, formatter):
271 """ For each line, scan through looking for magic
272 strings, outputting verbatim any intervening text.
273 """
274 self.formatter = formatter
275 self.hilite_re = self.formatter.page.hilite_re
276 ( self.talk_name, self.talk_label,
277 self.user_name, self.image_name,
278 self.media_name, ) = self.lang2nm_spc.get(getattr(self.cfg,
279 'default_lang',
280 ""),
281 self.lang2nm_spc[''])
282 self.talk_page = "/" + self.talk_name
283
284 # prepare regex patterns
285 rules = self.formatting_rules.replace('\n', '|')
286 if getattr(self.cfg, 'allow_numeric_entities', False):
287 rules = ur'(?P<ent_numeric>&#\d{1,5};)|' + rules
288
289 self.request.clock.start('compile_huge_and_ugly')
290 scan_re = re.compile(rules, re.UNICODE)
291 eol_re = re.compile(r'\r?\n', re.UNICODE)
292 indent_re = re.compile("^[*#;:]", re.UNICODE)
293 self.request.clock.stop('compile_huge_and_ugly')
294
295 # get text and replace TABs
296 rawtext = self.raw.expandtabs()
297
298 # go through the lines
299 self.lineno = 0
300 self.lines = eol_re.split(rawtext)
301 self.line_is_empty = 0
302
303 # write out discussion link at the very top unless this is discussion
304 # already
305 page_name = self.request.page.page_name
306 if not page_name.endswith(self.talk_page):
307 # not a discussion page already
308 self.request.write(self._word_repl(self.talk_page, self.talk_label))
309
310 # Main loop
311 for line in self.lines:
312 self.lineno = self.lineno + 1
313 self.table_rowstart = 1
314 self.line_was_empty = self.line_is_empty
315 self.line_is_empty = 0
316 self.first_list_item = 0
317 self.inhibit_p = 0
318
319 if self.in_pre:
320 # TODO: move this into function
321 # still looking for processing instructions
322 # TODO: use strings for pre state, not numbers
323 if self.in_pre == 1:
324 self.processor = None
325 self.processor_is_parser = 0
326 processor_name = ''
327 if (line.strip()[:2] == "#!"):
328 processor_name = line.strip()[2:].split()[0]
329 self.processor = wikiutil.importPlugin(
330 self.request.cfg, "processor", processor_name, "process")
331
332 # now look for a parser with that name
333 if self.processor is None:
334 self.processor = wikiutil.importPlugin(
335 self.request.cfg, "parser", processor_name, "Parser")
336 if self.processor:
337 self.processor_is_parser = 1
338 if self.processor:
339 self.in_pre = 2
340 self.colorize_lines = [line]
341 self.processor_name = processor_name
342 continue
343 else:
344 self.request.write(self._closeP() +
345 self.formatter.preformatted(1))
346 self.in_pre = 3
347 if self.in_pre == 2:
348 # processing mode
349 endpos = line.find("}}}")
350 if endpos == -1:
351 self.colorize_lines.append(line)
352 continue
353 if line[:endpos]:
354 self.colorize_lines.append(line[:endpos])
355
356 # Close p before calling processor
357 # TODO: do we really need this?
358 self.request.write(self._closeP())
359 res = self.formatter.processor(self.processor_name,
360 self.colorize_lines,
361 self.processor_is_parser)
362 self.request.write(res)
363 del self.colorize_lines
364 self.in_pre = 0
365 self.processor = None
366
367 # send rest of line through regex machinery
368 line = line[endpos+3:]
369 else:
370 # we don't have \n as whitespace any more
371 # This is the space between lines we join to one paragraph
372 line = line + ' '
373
374 # Paragraph break on empty lines
375 if not line.strip():
376 if self.in_table:
377 self.request.write(self.formatter.table(0))
378 self.in_table = 0
379 # CHANGE: removed check for not self.list_types
380 # p should close on every empty line
381 if (self.formatter.in_p):
382 self.request.write(self.formatter.paragraph(0))
383 self.line_is_empty = 1
384 continue
385
386 # Reset indent level if needed
387 if not indent_re.match(line):
388 self.request.write(self._indent_to(0, "ul", None, None))
389
390 # Scan line, format and write
391 formatted_line = self.scan(scan_re, line)
392 self.request.write(formatted_line)
393
394 if self.in_pre:
395 self.request.write(self.formatter.linebreak())
396
397 # Close code displays, paragraphs, tables and open lists
398 self.request.write(self._undent())
399 if self.in_pre: self.request.write(self.formatter.preformatted(0))
400 if self.formatter.in_p: self.request.write(self.formatter.paragraph(0))
401 if self.in_table: self.request.write(self.formatter.table(0))
402
403 ###############################################################################
404 ###############################################################################
405
406 podDocumentation = """
407
408 =head1 NAME
409
410 media.py - A MediaWiki parser plugin for MoinMoin
411
412 =head1 SYNOPSIS
413
414 #format media
415
416 ==MediaWiki level 1 header==
417 * Bullet without indentation
418 ** On level two - as MediaWiki likes
419
420 =head1 DESCRIPTION
421
422 B<media.py> is a parser plugin for MoinMoin. It can parse a subset of MediaWiki
423 syntax and use it for MoinMoin.
424
425 =head2 Supported features
426
427 Currently a substantial subset of MediaWiki is supported. These constructs are
428 recognized:
429
430 MediaWiki syntax Meaning Notes
431
432 ''x'' Italics
433 '''x''' Bold
434 '''''x''''' VeryStrong
435 <sup>x</sup> Superscript text
436 <sub>x</sub> Subscript text
437 <small>x</small> Small text
438 <u>x</u> Underline
439 <tt>x</tt> Preformatted embedded text
440 <i>x</i> Italics
441 <b>x</b> Bold
442 &x; HTML entities
443 <br> Breaking paragraphs
444
445 ---- Divider
446
447 http://x External Link
448 [http://x] External Link [5]
449 [http://x y] External Link with text
450
451 [[x]] Link [1]
452 [[x|y]] Link with text [1]
453 [[#Anchor]] Setting an anchor in a page
454
455 ==x== Header level 1
456 ===x=== Header level 2
457 ...
458 * x Bullet list level 1
459 ** x Bullet list level 2
460 ...
461 # x Numbered list level 1
462 ## x Numbered list level 2
463 ...
464 *#* Mixed lists
465 : x Indented paragraph level 1
466 :: x Indented paragraph level 2
467 ...
468 ;x:y Definition level 1
469 ;;x:y Definition level 2 [2]
470 ...
471 <pre>x</pre> Preformatted uninterpreted text
472 <nowiki>x</nowiki> Uninterpreted text [3]
473
474 <!-- x --> Comments [4]
475
476 =over 4
477
478 =item [1]
479
480 These restrictions currently apply:
481
482 =over 4
483
484 =item *
485
486 Stuff in parentheses is not hidden from display.
487
488 =item *
489
490 Namespaces are not hidden. Instead they are transformed to top level MoinMoin
491 pages (the most natural mapping of namespaces).
492
493 =item *
494
495 Interwiki and language linking MediaWiki is not really supported well.
496
497 =item *
498
499 Special namespace C<Special:> is not supported. Such links map to a macro in
500 MoinMoin.
501
502 =item *
503
504 Dates. Done by a macro in MoinMoin.
505
506 =back
507
508 These adaptions are made:
509
510 =over 4
511
512 =item *
513
514 The C<User:> namespace is dropped silently to map the MediaWiki logic into
515 MoinMoin logic.
516
517 =item *
518
519 Special namespaces C<Image:> and C<Media:> used to embed pictures or to attach
520 arbitrary data are mapped to the C<attachment:> for MoinMoin so they are
521 effectively mapped to the attachment feature of MoinMoin.
522
523 =item *
524
525 The C<Talk:> namespace is mapped to a sub page C</Talk>.
526
527 =item *
528
529 Namespace operation is currently supported for English and German.
530
531 =back
532
533 =item [2]
534
535 This is probably an extension to MediaWiki original syntax.
536
537 =item [3]
538
539 <nowiki> is handled the same as <pre>. I.e. the whitespace structure is kept.
540
541 =item [4]
542
543 Comments are only recognized if they appear on a single physical line.
544
545 =item [5]
546
547 The missing link text is not replaced by an automatically generated number.
548
549 =back
550
551 Each non-discussion page gets an automatic link to its discussion page.
552
553 =head2 Unsupported features
554
555 These things are not yet supported:
556
557 MediaWiki syntax Meaning
558
559 <center>x</center> Centered text
560 ISBN x Link to a book by its ISBN
561 RFC x Link to an RFC
562 <strike>x</strike> Stroke text
563 <math>x</math> TeX markup
564 x Preformatted text line
565
566 Moreover these features are not yet supported:
567
568 =over 4
569
570 =item * Tables
571
572 =item * HTML
573
574 =item * MediaWiki templates
575
576 =back
577
578 If you are really missing a feature please check
579
580 http://en.wiki.oekonux.org/Oekonux/Project/Wiki/MediaWikiFAQ/FeatureRequests
581
582 Feel free to add your feature request there.
583
584 =head2 Unsupportable features
585
586 The following things are beyond a parser because they are replaced while
587 processing an edit of before displaying a page actually. While editing the
588 MoinMoin counterparts must be used:
589
590 MediaWiki syntax MoinMoin syntax Meaning
591
592 ~~~ @USERNAME@ Insert user name
593 ~~~~ @SIG@ Insert user name and current time
594 #REDIRECT [[x]] #REDIRECT x Page redirection
595
596 =head2 Additional features
597
598 The following things are inherited from MoinMoin and can be used:
599
600 MoinMoin syntax Meaning Notes
601
602 [[macro(arguments)]] Macros present in MoinMoin [1]
603 someone@example.com Mail addresses
604
605 =over 4
606
607 =item [1]
608
609 You must use parentheses even if there are no arguments. Otherwise macro syntax
610 would conflict with page names.
611
612 =back
613
614 =head1 INSTALLATION
615
616 See
617
618 http://moinmoin.wikiwikiweb.de/ParserMarket#head-17c33967bbb4345a453627b944bad1f1bc4b2791
619
620 =head1 AUTHOR
621
622 Stefan Merten <smerten@oekonux.de>
623
624 =head1 LICENSE
625
626 This program is licensed under the terms of the GPL. See
627
628 http://www.gnu.org/licenses/gpl.txt
629
630 =head1 AVAILABILTY
631
632 See
633
634 http://www.merten-home.de/FreeSoftware/media4moin/
635
636 """
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.