Attachment 'wikiutil.py'

Download

   1 # -*- coding: iso-8859-1 -*-
   2 """
   3     MoinMoin - Wiki Utility Functions
   4 
   5     @copyright: 2000-2004 Juergen Hermann <jh@web.de>,
   6                 2004 by Florian Festi,
   7                 2006 by Mikko Virkkil,
   8                 2005-2008 MoinMoin:ThomasWaldmann,
   9                 2007 MoinMoin:ReimarBauer
  10     @license: GNU GPL, see COPYING for details.
  11 """
  12 
  13 import cgi
  14 import codecs
  15 import os
  16 import re
  17 import time
  18 import urllib
  19 
  20 from MoinMoin import log
  21 logging = log.getLogger(__name__)
  22 
  23 from MoinMoin import config
  24 from MoinMoin.util import pysupport, lock
  25 from MoinMoin.support.python_compatibility import rsplit
  26 from inspect import getargspec, isfunction, isclass, ismethod
  27 
  28 from MoinMoin import web # needed so that next line works:
  29 import werkzeug
  30 
  31 # Exceptions
  32 class InvalidFileNameError(Exception):
  33     """ Called when we find an invalid file name """
  34     pass
  35 
  36 # constants for page names
  37 PARENT_PREFIX = "../"
  38 PARENT_PREFIX_LEN = len(PARENT_PREFIX)
  39 CHILD_PREFIX = "/"
  40 CHILD_PREFIX_LEN = len(CHILD_PREFIX)
  41 
  42 #############################################################################
  43 ### Getting data from user/Sending data to user
  44 #############################################################################
  45 
  46 def decodeUnknownInput(text):
  47     """ Decode unknown input, like text attachments
  48 
  49     First we try utf-8 because it has special format, and it will decode
  50     only utf-8 files. Then we try config.charset, then iso-8859-1 using
  51     'replace'. We will never raise an exception, but may return junk
  52     data.
  53 
  54     WARNING: Use this function only for data that you view, not for data
  55     that you save in the wiki.
  56 
  57     @param text: the text to decode, string
  58     @rtype: unicode
  59     @return: decoded text (maybe wrong)
  60     """
  61     # Shortcut for unicode input
  62     if isinstance(text, unicode):
  63         return text
  64 
  65     try:
  66         return unicode(text, 'utf-8')
  67     except UnicodeError:
  68         if config.charset not in ['utf-8', 'iso-8859-1']:
  69             try:
  70                 return unicode(text, config.charset)
  71             except UnicodeError:
  72                 pass
  73         return unicode(text, 'iso-8859-1', 'replace')
  74 
  75 
  76 def decodeUserInput(s, charsets=[config.charset]):
  77     """
  78     Decodes input from the user.
  79 
  80     @param s: the string to unquote
  81     @param charsets: list of charsets to assume the string is in
  82     @rtype: unicode
  83     @return: the unquoted string as unicode
  84     """
  85     for charset in charsets:
  86         try:
  87             return s.decode(charset)
  88         except UnicodeError:
  89             pass
  90     raise UnicodeError('The string %r cannot be decoded.' % s)
  91 
  92 
  93 def url_quote(s, safe='/', want_unicode=None):
  94     """ see werkzeug.url_quote, we use a different safe param default value """
  95     try:
  96         assert want_unicode is None
  97     except AssertionError:
  98         log.exception("call with deprecated want_unicode param, please fix caller")
  99     return werkzeug.url_quote(s, charset=config.charset, safe=safe)
 100 
 101 def url_quote_plus(s, safe='/', want_unicode=None):
 102     """ see werkzeug.url_quote_plus, we use a different safe param default value """
 103     try:
 104         assert want_unicode is None
 105     except AssertionError:
 106         log.exception("call with deprecated want_unicode param, please fix caller")
 107     return werkzeug.url_quote_plus(s, charset=config.charset, safe=safe)
 108 
 109 def url_unquote(s, want_unicode=None):
 110     """ see werkzeug.url_unquote """
 111     try:
 112         assert want_unicode is None
 113     except AssertionError:
 114         log.exception("call with deprecated want_unicode param, please fix caller")
 115     if isinstance(s, unicode):
 116         s = s.encode(config.charset)
 117     return werkzeug.url_unquote(s, charset=config.charset, errors='fallback:iso-8859-1')
 118 
 119 
 120 def parseQueryString(qstr, want_unicode=None):
 121     """ see werkzeug.url_decode
 122 
 123         Please note: this returns a MultiDict, you might need to use dict() on
 124                      the result if your code expects a "normal" dict.
 125     """
 126     try:
 127         assert want_unicode is None
 128     except AssertionError:
 129         log.exception("call with deprecated want_unicode param, please fix caller")
 130     return werkzeug.url_decode(qstr, charset=config.charset, errors='fallback:iso-8859-1',
 131                                decode_keys=False, include_empty=False)
 132 
 133 def makeQueryString(qstr=None, want_unicode=None, **kw):
 134     """ Make a querystring from arguments.
 135 
 136     kw arguments overide values in qstr.
 137 
 138     If a string is passed in, it's returned verbatim and keyword parameters are ignored.
 139 
 140     See also: werkzeug.url_encode
 141 
 142     @param qstr: dict to format as query string, using either ascii or unicode
 143     @param kw: same as dict when using keywords, using ascii or unicode
 144     @rtype: string
 145     @return: query string ready to use in a url
 146     """
 147     try:
 148         assert want_unicode is None
 149     except AssertionError:
 150         log.exception("call with deprecated want_unicode param, please fix caller")
 151     if qstr is None:
 152         qstr = {}
 153     elif isinstance(qstr, (str, unicode)):
 154         return qstr
 155     if isinstance(qstr, dict):
 156         qstr.update(kw)
 157         return werkzeug.url_encode(qstr, charset=config.charset, encode_keys=True)
 158     else:
 159         raise ValueError("Unsupported argument type, should be dict.")
 160 
 161 
 162 def quoteWikinameURL(pagename, charset=config.charset):
 163     """ Return a url encoding of filename in plain ascii
 164 
 165     Use urllib.quote to quote any character that is not always safe.
 166 
 167     @param pagename: the original pagename (unicode)
 168     @param charset: url text encoding, 'utf-8' recommended. Other charset
 169                     might not be able to encode the page name and raise
 170                     UnicodeError. (default config.charset ('utf-8')).
 171     @rtype: string
 172     @return: the quoted filename, all unsafe characters encoded
 173     """
 174     # XXX please note that urllib.quote and werkzeug.url_quote have
 175     # XXX different defaults for safe=...
 176     return werkzeug.url_quote(pagename, charset=charset, safe='/')
 177 
 178 
 179 escape = werkzeug.escape
 180 
 181 
 182 def clean_input(text, max_len=201):
 183     """ Clean input:
 184         replace CR, LF, TAB by whitespace
 185         delete control chars
 186 
 187         @param text: unicode text to clean (if we get str, we decode)
 188         @rtype: unicode
 189         @return: cleaned text
 190     """
 191     # we only have input fields with max 200 chars, but spammers send us more
 192     length = len(text)
 193     if length == 0 or length > max_len:
 194         return u''
 195     else:
 196         if isinstance(text, str):
 197             # the translate() below can ONLY process unicode, thus, if we get
 198             # str, we try to decode it using the usual coding:
 199             text = text.decode(config.charset)
 200         return text.translate(config.clean_input_translation_map)
 201 
 202 
 203 def make_breakable(text, maxlen):
 204     """ make a text breakable by inserting soft hyphens into nonbreakable parts and escaping special chars
 205     """
 206     soft_hyphen = "&shy;"
 207     text = text.split(" ")
 208     newtext = []
 209     for word in text:
 210         if len(word) > maxlen:
 211             part = []
 212             while word:
 213                 part.append(escape(word[:maxlen]))
 214                 word = word[maxlen:]
 215             newtext.append(soft_hyphen.join(part))
 216         else:
 217             word = escape(word)
 218             newtext.append(word)
 219     return " ".join(newtext)
 220 
 221 ########################################################################
 222 ### Storage
 223 ########################################################################
 224 
 225 # Precompiled patterns for file name [un]quoting
 226 UNSAFE = re.compile(r'[^a-zA-Z0-9_]+')
 227 QUOTED = re.compile(r'\(([a-fA-F0-9]+)\)')
 228 
 229 
 230 def quoteWikinameFS(wikiname, charset=config.charset):
 231     """ Return file system representation of a Unicode WikiName.
 232 
 233     Warning: will raise UnicodeError if wikiname can not be encoded using
 234     charset. The default value of config.charset, 'utf-8' can encode any
 235     character.
 236 
 237     @param wikiname: Unicode string possibly containing non-ascii characters
 238     @param charset: charset to encode string
 239     @rtype: string
 240     @return: quoted name, safe for any file system
 241     """
 242     filename = wikiname.encode(charset)
 243 
 244     quoted = []
 245     location = 0
 246     for needle in UNSAFE.finditer(filename):
 247         # append leading safe stuff
 248         quoted.append(filename[location:needle.start()])
 249         location = needle.end()
 250         # Quote and append unsafe stuff
 251         quoted.append('(')
 252         for character in needle.group():
 253             quoted.append('%02x' % ord(character))
 254         quoted.append(')')
 255 
 256     # append rest of string
 257     quoted.append(filename[location:])
 258     return ''.join(quoted)
 259 
 260 
 261 def unquoteWikiname(filename, charsets=[config.charset]):
 262     """ Return Unicode WikiName from quoted file name.
 263 
 264     We raise an InvalidFileNameError if we find an invalid name, so the
 265     wiki could alarm the admin or suggest the user to rename a page.
 266     Invalid file names should never happen in normal use, but are rather
 267     cheap to find.
 268 
 269     This function should be used only to unquote file names, not page
 270     names we receive from the user. These are handled in request by
 271     urllib.unquote, decodePagename and normalizePagename.
 272 
 273     Todo: search clients of unquoteWikiname and check for exceptions.
 274 
 275     @param filename: string using charset and possibly quoted parts
 276     @param charsets: list of charsets used by string
 277     @rtype: Unicode String
 278     @return: WikiName
 279     """
 280     ### Temporary fix start ###
 281     # From some places we get called with Unicode strings
 282     if isinstance(filename, type(u'')):
 283         filename = filename.encode(config.charset)
 284     ### Temporary fix end ###
 285 
 286     parts = []
 287     start = 0
 288     for needle in QUOTED.finditer(filename):
 289         # append leading unquoted stuff
 290         parts.append(filename[start:needle.start()])
 291         start = needle.end()
 292         # Append quoted stuff
 293         group = needle.group(1)
 294         # Filter invalid filenames
 295         if (len(group) % 2 != 0):
 296             raise InvalidFileNameError(filename)
 297         try:
 298             for i in range(0, len(group), 2):
 299                 byte = group[i:i+2]
 300                 character = chr(int(byte, 16))
 301                 parts.append(character)
 302         except ValueError:
 303             # byte not in hex, e.g 'xy'
 304             raise InvalidFileNameError(filename)
 305 
 306     # append rest of string
 307     if start == 0:
 308         wikiname = filename
 309     else:
 310         parts.append(filename[start:len(filename)])
 311         wikiname = ''.join(parts)
 312 
 313     # FIXME: This looks wrong, because at this stage "()" can be both errors
 314     # like open "(" without close ")", or unquoted valid characters in the file name.
 315     # Filter invalid filenames. Any left (xx) must be invalid
 316     #if '(' in wikiname or ')' in wikiname:
 317     #    raise InvalidFileNameError(filename)
 318 
 319     wikiname = decodeUserInput(wikiname, charsets)
 320     return wikiname
 321 
 322 # time scaling
 323 def timestamp2version(ts):
 324     """ Convert UNIX timestamp (may be float or int) to our version
 325         (long) int.
 326         We don't want to use floats, so we just scale by 1e6 to get
 327         an integer in usecs.
 328     """
 329     return long(ts*1000000L) # has to be long for py 2.2.x
 330 
 331 def version2timestamp(v):
 332     """ Convert version number to UNIX timestamp (float).
 333         This must ONLY be used for display purposes.
 334     """
 335     return v / 1000000.0
 336 
 337 
 338 # This is the list of meta attribute names to be treated as integers.
 339 # IMPORTANT: do not use any meta attribute names with "-" (or any other chars
 340 # invalid in python attribute names), use e.g. _ instead.
 341 INTEGER_METAS = ['current', 'revision', # for page storage (moin 2.0)
 342                  'data_format_revision', # for data_dir format spec (use by mig scripts)
 343                 ]
 344 
 345 class MetaDict(dict):
 346     """ store meta informations as a dict.
 347     """
 348     def __init__(self, metafilename, cache_directory):
 349         """ create a MetaDict from metafilename """
 350         dict.__init__(self)
 351         self.metafilename = metafilename
 352         self.dirty = False
 353         lock_dir = os.path.join(cache_directory, '__metalock__')
 354         self.rlock = lock.ReadLock(lock_dir, 60.0)
 355         self.wlock = lock.WriteLock(lock_dir, 60.0)
 356 
 357         if not self.rlock.acquire(3.0):
 358             raise EnvironmentError("Could not lock in MetaDict")
 359         try:
 360             self._get_meta()
 361         finally:
 362             self.rlock.release()
 363 
 364     def _get_meta(self):
 365         """ get the meta dict from an arbitrary filename.
 366             does not keep state, does uncached, direct disk access.
 367             @param metafilename: the name of the file to read
 368             @return: dict with all values or {} if empty or error
 369         """
 370 
 371         try:
 372             metafile = codecs.open(self.metafilename, "r", "utf-8")
 373             meta = metafile.read() # this is much faster than the file's line-by-line iterator
 374             metafile.close()
 375         except IOError:
 376             meta = u''
 377         for line in meta.splitlines():
 378             key, value = line.split(':', 1)
 379             value = value.strip()
 380             if key in INTEGER_METAS:
 381                 value = int(value)
 382             dict.__setitem__(self, key, value)
 383 
 384     def _put_meta(self):
 385         """ put the meta dict into an arbitrary filename.
 386             does not keep or modify state, does uncached, direct disk access.
 387             @param metafilename: the name of the file to write
 388             @param metadata: dict of the data to write to the file
 389         """
 390         meta = []
 391         for key, value in self.items():
 392             if key in INTEGER_METAS:
 393                 value = str(value)
 394             meta.append("%s: %s" % (key, value))
 395         meta = '\r\n'.join(meta)
 396 
 397         metafile = codecs.open(self.metafilename, "w", "utf-8")
 398         metafile.write(meta)
 399         metafile.close()
 400         self.dirty = False
 401 
 402     def sync(self, mtime_usecs=None):
 403         """ No-Op except for that parameter """
 404         if not mtime_usecs is None:
 405             self.__setitem__('mtime', str(mtime_usecs))
 406         # otherwise no-op
 407 
 408     def __getitem__(self, key):
 409         """ We don't care for cache coherency here. """
 410         return dict.__getitem__(self, key)
 411 
 412     def __setitem__(self, key, value):
 413         """ Sets a dictionary entry. """
 414         if not self.wlock.acquire(5.0):
 415             raise EnvironmentError("Could not lock in MetaDict")
 416         try:
 417             self._get_meta() # refresh cache
 418             try:
 419                 oldvalue = dict.__getitem__(self, key)
 420             except KeyError:
 421                 oldvalue = None
 422             if value != oldvalue:
 423                 dict.__setitem__(self, key, value)
 424                 self._put_meta() # sync cache
 425         finally:
 426             self.wlock.release()
 427 
 428 
 429 # Quoting of wiki names, file names, etc. (in the wiki markup) -----------------------------------
 430 
 431 # don't ever change this - DEPRECATED, only needed for 1.5 > 1.6 migration conversion
 432 QUOTE_CHARS = u'"'
 433 
 434 
 435 #############################################################################
 436 ### InterWiki
 437 #############################################################################
 438 INTERWIKI_PAGE = "InterWikiMap"
 439 
 440 def generate_file_list(request):
 441     """ generates a list of all files. for internal use. """
 442 
 443     # order is important here, the local intermap file takes
 444     # precedence over the shared one, and is thus read AFTER
 445     # the shared one
 446     intermap_files = request.cfg.shared_intermap
 447     if not isinstance(intermap_files, list):
 448         intermap_files = [intermap_files]
 449     else:
 450         intermap_files = intermap_files[:]
 451     intermap_files.append(os.path.join(request.cfg.data_dir, "intermap.txt"))
 452     request.cfg.shared_intermap_files = [filename for filename in intermap_files
 453                                          if filename and os.path.isfile(filename)]
 454 
 455 
 456 def get_max_mtime(file_list, page):
 457     """ Returns the highest modification time of the files in file_list and the
 458     page page. """
 459     timestamps = [os.stat(filename).st_mtime for filename in file_list]
 460     if page.exists():
 461         # exists() is cached and thus cheaper than mtime_usecs()
 462         timestamps.append(version2timestamp(page.mtime_usecs()))
 463     if timestamps:
 464         return max(timestamps)
 465     else:
 466         return 0 # no files / pages there
 467 
 468 def load_wikimap(request):
 469     """ load interwiki map (once, and only on demand) """
 470     from MoinMoin.Page import Page
 471 
 472     now = int(time.time())
 473     if getattr(request.cfg, "shared_intermap_files", None) is None:
 474         generate_file_list(request)
 475 
 476     try:
 477         _interwiki_list = request.cfg.cache.interwiki_list
 478         old_mtime = request.cfg.cache.interwiki_mtime
 479         if request.cfg.cache.interwiki_ts + (1*60) < now: # 1 minutes caching time
 480             max_mtime = get_max_mtime(request.cfg.shared_intermap_files, Page(request, INTERWIKI_PAGE))
 481             if max_mtime > old_mtime:
 482                 raise AttributeError # refresh cache
 483             else:
 484                 request.cfg.cache.interwiki_ts = now
 485     except AttributeError:
 486         _interwiki_list = {}
 487         lines = []
 488 
 489         for filename in request.cfg.shared_intermap_files:
 490             f = codecs.open(filename, "r", config.charset)
 491             lines.extend(f.readlines())
 492             f.close()
 493 
 494         # add the contents of the InterWikiMap page
 495         lines += Page(request, INTERWIKI_PAGE).get_raw_body().splitlines()
 496 
 497         for line in lines:
 498             if not line or line[0] == '#':
 499                 continue
 500             try:
 501                 line = "%s %s/InterWiki" % (line, request.script_root)
 502                 wikitag, urlprefix, dummy = line.split(None, 2)
 503             except ValueError:
 504                 pass
 505             else:
 506                 _interwiki_list[wikitag] = urlprefix
 507 
 508         del lines
 509 
 510         # add own wiki as "Self" and by its configured name
 511         _interwiki_list['Self'] = request.script_root + '/'
 512         if request.cfg.interwikiname:
 513             _interwiki_list[request.cfg.interwikiname] = request.script_root + '/'
 514 
 515         # save for later
 516         request.cfg.cache.interwiki_list = _interwiki_list
 517         request.cfg.cache.interwiki_ts = now
 518         request.cfg.cache.interwiki_mtime = get_max_mtime(request.cfg.shared_intermap_files, Page(request, INTERWIKI_PAGE))
 519 
 520     return _interwiki_list
 521 
 522 def split_wiki(wikiurl):
 523     """
 524     Split a wiki url.
 525 
 526     *** DEPRECATED FUNCTION FOR OLD 1.5 SYNTAX - ONLY STILL HERE FOR THE 1.5 -> 1.6 MIGRATION ***
 527     Use split_interwiki(), see below.
 528 
 529     @param wikiurl: the url to split
 530     @rtype: tuple
 531     @return: (tag, tail)
 532     """
 533     # !!! use a regex here!
 534     try:
 535         wikitag, tail = wikiurl.split(":", 1)
 536     except ValueError:
 537         try:
 538             wikitag, tail = wikiurl.split("/", 1)
 539         except ValueError:
 540             wikitag, tail = 'Self', wikiurl
 541     return wikitag, tail
 542 
 543 def split_interwiki(wikiurl):
 544     """ Split a interwiki name, into wikiname and pagename, e.g:
 545 
 546     'MoinMoin:FrontPage' -> "MoinMoin", "FrontPage"
 547     'FrontPage' -> "Self", "FrontPage"
 548     'MoinMoin:Page with blanks' -> "MoinMoin", "Page with blanks"
 549     'MoinMoin:' -> "MoinMoin", ""
 550 
 551     can also be used for:
 552 
 553     'attachment:filename with blanks.txt' -> "attachment", "filename with blanks.txt"
 554 
 555     @param wikiurl: the url to split
 556     @rtype: tuple
 557     @return: (wikiname, pagename)
 558     """
 559     try:
 560         wikiname, pagename = wikiurl.split(":", 1)
 561     except ValueError:
 562         wikiname, pagename = 'Self', wikiurl
 563     return wikiname, pagename
 564 
 565 def resolve_wiki(request, wikiurl):
 566     """
 567     Resolve an interwiki link.
 568 
 569     *** DEPRECATED FUNCTION FOR OLD 1.5 SYNTAX - ONLY STILL HERE FOR THE 1.5 -> 1.6 MIGRATION ***
 570     Use resolve_interwiki(), see below.
 571 
 572     @param request: the request object
 573     @param wikiurl: the InterWiki:PageName link
 574     @rtype: tuple
 575     @return: (wikitag, wikiurl, wikitail, err)
 576     """
 577     _interwiki_list = load_wikimap(request)
 578     # split wiki url
 579     wikiname, pagename = split_wiki(wikiurl)
 580 
 581     # return resolved url
 582     if wikiname in _interwiki_list:
 583         return (wikiname, _interwiki_list[wikiname], pagename, False)
 584     else:
 585         return (wikiname, request.script_root, "/InterWiki", True)
 586 
 587 def resolve_interwiki(request, wikiname, pagename):
 588     """ Resolve an interwiki reference (wikiname:pagename).
 589 
 590     @param request: the request object
 591     @param wikiname: interwiki wiki name
 592     @param pagename: interwiki page name
 593     @rtype: tuple
 594     @return: (wikitag, wikiurl, wikitail, err)
 595     """
 596     _interwiki_list = load_wikimap(request)
 597     if wikiname in _interwiki_list:
 598         return (wikiname, _interwiki_list[wikiname], pagename, False)
 599     else:
 600         return (wikiname, request.script_root, "/InterWiki", True)
 601 
 602 def join_wiki(wikiurl, wikitail):
 603     """
 604     Add a (url_quoted) page name to an interwiki url.
 605 
 606     Note: We can't know what kind of URL quoting a remote wiki expects.
 607           We just use a utf-8 encoded string with standard URL quoting.
 608 
 609     @param wikiurl: wiki url, maybe including a $PAGE placeholder
 610     @param wikitail: page name
 611     @rtype: string
 612     @return: generated URL of the page in the other wiki
 613     """
 614     wikitail = url_quote(wikitail)
 615     if '$PAGE' in wikiurl:
 616         return wikiurl.replace('$PAGE', wikitail)
 617     else:
 618         return wikiurl + wikitail
 619 
 620 
 621 #############################################################################
 622 ### Page types (based on page names)
 623 #############################################################################
 624 
 625 def isSystemPage(request, pagename):
 626     """ Is this a system page?
 627 
 628     @param request: the request object
 629     @param pagename: the page name
 630     @rtype: bool
 631     @return: true if page is a system page
 632     """
 633     from MoinMoin import i18n
 634     return pagename in i18n.system_pages or isTemplatePage(request, pagename)
 635 
 636 
 637 def isTemplatePage(request, pagename):
 638     """ Is this a template page?
 639 
 640     @param pagename: the page name
 641     @rtype: bool
 642     @return: true if page is a template page
 643     """
 644     return request.cfg.cache.page_template_regexact.search(pagename) is not None
 645 
 646 
 647 def isGroupPage(pagename, cfg):
 648     """ Is this a name of group page?
 649 
 650     @param pagename: the page name
 651     @rtype: bool
 652     @return: true if page is a form page
 653     """
 654     return cfg.cache.page_group_regexact.search(pagename) is not None
 655 
 656 
 657 def filterCategoryPages(request, pagelist):
 658     """ Return category pages in pagelist
 659 
 660     WARNING: DO NOT USE THIS TO FILTER THE FULL PAGE LIST! Use
 661     getPageList with a filter function.
 662 
 663     If you pass a list with a single pagename, either that is returned
 664     or an empty list, thus you can use this function like a `isCategoryPage`
 665     one.
 666 
 667     @param pagelist: a list of pages
 668     @rtype: list
 669     @return: only the category pages of pagelist
 670     """
 671     func = request.cfg.cache.page_category_regexact.search
 672     return [pn for pn in pagelist if func(pn)]
 673 
 674 
 675 def getLocalizedPage(request, pagename): # was: getSysPage
 676     """ Get a system page according to user settings and available translations.
 677 
 678     We include some special treatment for the case that <pagename> is the
 679     currently rendered page, as this is the case for some pages used very
 680     often, like FrontPage, RecentChanges etc. - in that case we reuse the
 681     already existing page object instead creating a new one.
 682 
 683     @param request: the request object
 684     @param pagename: the name of the page
 685     @rtype: Page object
 686     @return: the page object of that system page, using a translated page,
 687              if it exists
 688     """
 689     from MoinMoin.Page import Page
 690     i18n_name = request.getText(pagename)
 691     pageobj = None
 692     if i18n_name != pagename:
 693         if request.page and i18n_name == request.page.page_name:
 694             # do not create new object for current page
 695             i18n_page = request.page
 696             if i18n_page.exists():
 697                 pageobj = i18n_page
 698         else:
 699             i18n_page = Page(request, i18n_name)
 700             if i18n_page.exists():
 701                 pageobj = i18n_page
 702 
 703     # if we failed getting a translated version of <pagename>,
 704     # we fall back to english
 705     if not pageobj:
 706         if request.page and pagename == request.page.page_name:
 707             # do not create new object for current page
 708             pageobj = request.page
 709         else:
 710             pageobj = Page(request, pagename)
 711     return pageobj
 712 
 713 
 714 def getFrontPage(request):
 715     """ Convenience function to get localized front page
 716 
 717     @param request: current request
 718     @rtype: Page object
 719     @return localized page_front_page, if there is a translation
 720     """
 721     return getLocalizedPage(request, request.cfg.page_front_page)
 722 
 723 
 724 def getHomePage(request, username=None):
 725     """
 726     Get a user's homepage, or return None for anon users and
 727     those who have not created a homepage.
 728 
 729     DEPRECATED - try to use getInterwikiHomePage (see below)
 730 
 731     @param request: the request object
 732     @param username: the user's name
 733     @rtype: Page
 734     @return: user's homepage object - or None
 735     """
 736     from MoinMoin.Page import Page
 737     # default to current user
 738     if username is None and request.user.valid:
 739         username = request.user.name
 740 
 741     # known user?
 742     if username:
 743         # Return home page
 744         page = Page(request, username)
 745         if page.exists():
 746             return page
 747 
 748     return None
 749 
 750 
 751 def getInterwikiHomePage(request, username=None):
 752     """
 753     Get a user's homepage.
 754 
 755     cfg.user_homewiki influences behaviour of this:
 756     'Self' does mean we store user homepage in THIS wiki.
 757     When set to our own interwikiname, it behaves like with 'Self'.
 758 
 759     'SomeOtherWiki' means we store user homepages in another wiki.
 760 
 761     @param request: the request object
 762     @param username: the user's name
 763     @rtype: tuple (or None for anon users)
 764     @return: (wikiname, pagename)
 765     """
 766     # default to current user
 767     if username is None and request.user.valid:
 768         username = request.user.name
 769     if not username:
 770         return None # anon user
 771 
 772     homewiki = request.cfg.user_homewiki
 773     if homewiki == request.cfg.interwikiname:
 774         homewiki = u'Self'
 775 
 776     return homewiki, username
 777 
 778 
 779 def AbsPageName(context, pagename):
 780     """
 781     Return the absolute pagename for a (possibly) relative pagename.
 782 
 783     @param context: name of the page where "pagename" appears on
 784     @param pagename: the (possibly relative) page name
 785     @rtype: string
 786     @return: the absolute page name
 787     """
 788     if pagename.startswith(PARENT_PREFIX):
 789         while context and pagename.startswith(PARENT_PREFIX):
 790             context = '/'.join(context.split('/')[:-1])
 791             pagename = pagename[PARENT_PREFIX_LEN:]
 792         pagename = '/'.join(filter(None, [context, pagename, ]))
 793     elif pagename.startswith(CHILD_PREFIX):
 794         if context:
 795             pagename = context + '/' + pagename[CHILD_PREFIX_LEN:]
 796         else:
 797             pagename = pagename[CHILD_PREFIX_LEN:]
 798     return pagename
 799 
 800 def RelPageName(context, pagename):
 801     """
 802     Return the relative pagename for some context.
 803 
 804     @param context: name of the page where "pagename" appears on
 805     @param pagename: the absolute page name
 806     @rtype: string
 807     @return: the relative page name
 808     """
 809     if context == '':
 810         # special case, context is some "virtual root" page with name == ''
 811         # every page is a subpage of this virtual root
 812         return CHILD_PREFIX + pagename
 813     elif pagename.startswith(context + CHILD_PREFIX):
 814         # simple child
 815         return pagename[len(context):]
 816     else:
 817         # some kind of sister/aunt
 818         context_frags = context.split('/')   # A, B, C, D, E
 819         pagename_frags = pagename.split('/') # A, B, C, F
 820         # first throw away common parents:
 821         common = 0
 822         for cf, pf in zip(context_frags, pagename_frags):
 823             if cf == pf:
 824                 common += 1
 825             else:
 826                 break
 827         context_frags = context_frags[common:] # D, E
 828         pagename_frags = pagename_frags[common:] # F
 829         go_up = len(context_frags)
 830         return PARENT_PREFIX * go_up + '/'.join(pagename_frags)
 831 
 832 
 833 def pagelinkmarkup(pagename, text=None):
 834     """ return markup that can be used as link to page <pagename> """
 835     from MoinMoin.parser.text_moin_wiki import Parser
 836     if re.match(Parser.word_rule + "$", pagename, re.U|re.X) and \
 837             (text is None or text == pagename):
 838         return pagename
 839     else:
 840         if text is None or text == pagename:
 841             text = ''
 842         else:
 843             text = '|%s' % text
 844         return u'[[%s%s]]' % (pagename, text)
 845 
 846 #############################################################################
 847 ### mimetype support
 848 #############################################################################
 849 import mimetypes
 850 
 851 MIMETYPES_MORE = {
 852  # OpenOffice 2.x & other open document stuff
 853  '.odt': 'application/vnd.oasis.opendocument.text',
 854  '.ods': 'application/vnd.oasis.opendocument.spreadsheet',
 855  '.odp': 'application/vnd.oasis.opendocument.presentation',
 856  '.odg': 'application/vnd.oasis.opendocument.graphics',
 857  '.odc': 'application/vnd.oasis.opendocument.chart',
 858  '.odf': 'application/vnd.oasis.opendocument.formula',
 859  '.odb': 'application/vnd.oasis.opendocument.database',
 860  '.odi': 'application/vnd.oasis.opendocument.image',
 861  '.odm': 'application/vnd.oasis.opendocument.text-master',
 862  '.ott': 'application/vnd.oasis.opendocument.text-template',
 863  '.ots': 'application/vnd.oasis.opendocument.spreadsheet-template',
 864  '.otp': 'application/vnd.oasis.opendocument.presentation-template',
 865  '.otg': 'application/vnd.oasis.opendocument.graphics-template',
 866  # some systems (like Mac OS X) don't have some of these:
 867  '.patch': 'text/x-diff',
 868  '.diff': 'text/x-diff',
 869  '.py': 'text/x-python',
 870  '.cfg': 'text/plain',
 871  '.conf': 'text/plain',
 872  '.irc': 'text/plain',
 873  '.md5': 'text/plain',
 874  '.csv': 'text/csv',
 875  '.flv': 'video/x-flv',
 876  '.wmv': 'video/x-ms-wmv',
 877  '.swf': 'application/x-shockwave-flash',
 878  '.moin': 'text/moin-wiki',
 879  '.creole': 'text/creole',
 880 }
 881 
 882 # add all mimetype patterns of pygments
 883 import pygments.lexers
 884 
 885 for name, short, patterns, mime in pygments.lexers.get_all_lexers():
 886     for pattern in patterns:
 887         if pattern.startswith('*.') and mime:
 888             MIMETYPES_MORE[pattern[1:]] = mime[0]
 889 
 890 [mimetypes.add_type(mimetype, ext, True) for ext, mimetype in MIMETYPES_MORE.items()]
 891 
 892 MIMETYPES_sanitize_mapping = {
 893     # this stuff is text, but got application/* for unknown reasons
 894     ('application', 'docbook+xml'): ('text', 'docbook'),
 895     ('application', 'x-latex'): ('text', 'latex'),
 896     ('application', 'x-tex'): ('text', 'tex'),
 897     ('application', 'javascript'): ('text', 'javascript'),
 898 }
 899 
 900 MIMETYPES_spoil_mapping = {} # inverse mapping of above
 901 for _key, _value in MIMETYPES_sanitize_mapping.items():
 902     MIMETYPES_spoil_mapping[_value] = _key
 903 
 904 
 905 class MimeType(object):
 906     """ represents a mimetype like text/plain """
 907 
 908     def __init__(self, mimestr=None, filename=None):
 909         self.major = self.minor = None # sanitized mime type and subtype
 910         self.params = {} # parameters like "charset" or others
 911         self.charset = None # this stays None until we know for sure!
 912         self.raw_mimestr = mimestr
 913 
 914         if mimestr:
 915             self.parse_mimetype(mimestr)
 916         elif filename:
 917             self.parse_filename(filename)
 918 
 919     def parse_filename(self, filename):
 920         mtype, encoding = mimetypes.guess_type(filename)
 921         if mtype is None:
 922             mtype = 'application/octet-stream'
 923         self.parse_mimetype(mtype)
 924 
 925     def parse_mimetype(self, mimestr):
 926         """ take a string like used in content-type and parse it into components,
 927             alternatively it also can process some abbreviated string like "wiki"
 928         """
 929         parameters = mimestr.split(";")
 930         parameters = [p.strip() for p in parameters]
 931         mimetype, parameters = parameters[0], parameters[1:]
 932         mimetype = mimetype.split('/')
 933         if len(mimetype) >= 2:
 934             major, minor = mimetype[:2] # we just ignore more than 2 parts
 935         else:
 936             major, minor = self.parse_format(mimetype[0])
 937         self.major = major.lower()
 938         self.minor = minor.lower()
 939         for param in parameters:
 940             key, value = param.split('=')
 941             if value[0] == '"' and value[-1] == '"': # remove quotes
 942                 value = value[1:-1]
 943             self.params[key.lower()] = value
 944         if 'charset' in self.params:
 945             self.charset = self.params['charset'].lower()
 946         self.sanitize()
 947 
 948     def parse_format(self, format):
 949         """ maps from what we currently use on-page in a #format xxx processing
 950             instruction to a sanitized mimetype major, minor tuple.
 951             can also be user later for easier entry by the user, so he can just
 952             type "wiki" instead of "text/moin-wiki".
 953         """
 954         format = format.lower()
 955         if format in config.parser_text_mimetype:
 956             mimetype = 'text', format
 957         else:
 958             mapping = {
 959                 'wiki': ('text', 'moin-wiki'),
 960                 'irc': ('text', 'irssi'),
 961             }
 962             try:
 963                 mimetype = mapping[format]
 964             except KeyError:
 965                 mimetype = 'text', 'x-%s' % format
 966         return mimetype
 967 
 968     def sanitize(self):
 969         """ convert to some representation that makes sense - this is not necessarily
 970             conformant to /etc/mime.types or IANA listing, but if something is
 971             readable text, we will return some text/* mimetype, not application/*,
 972             because we need text/plain as fallback and not application/octet-stream.
 973         """
 974         self.major, self.minor = MIMETYPES_sanitize_mapping.get((self.major, self.minor), (self.major, self.minor))
 975 
 976     def spoil(self):
 977         """ this returns something conformant to /etc/mime.type or IANA as a string,
 978             kind of inverse operation of sanitize(), but doesn't change self
 979         """
 980         major, minor = MIMETYPES_spoil_mapping.get((self.major, self.minor), (self.major, self.minor))
 981         return self.content_type(major, minor)
 982 
 983     def content_type(self, major=None, minor=None, charset=None, params=None):
 984         """ return a string suitable for Content-Type header
 985         """
 986         major = major or self.major
 987         minor = minor or self.minor
 988         params = params or self.params or {}
 989         if major == 'text':
 990             charset = charset or self.charset or params.get('charset', config.charset)
 991             params['charset'] = charset
 992         mimestr = "%s/%s" % (major, minor)
 993         params = ['%s="%s"' % (key.lower(), value) for key, value in params.items()]
 994         params.insert(0, mimestr)
 995         return "; ".join(params)
 996 
 997     def mime_type(self):
 998         """ return a string major/minor only, no params """
 999         return "%s/%s" % (self.major, self.minor)
1000 
1001     def module_name(self):
1002         """ convert this mimetype to a string useable as python module name,
1003             we yield the exact module name first and then proceed to shorter
1004             module names (useful for falling back to them, if the more special
1005             module is not found) - e.g. first "text_python", next "text".
1006             Finally, we yield "application_octet_stream" as the most general
1007             mimetype we have.
1008             Hint: the fallback handler module for text/* should be implemented
1009                   in module "text" (not "text_plain")
1010         """
1011         mimetype = self.mime_type()
1012         modname = mimetype.replace("/", "_").replace("-", "_").replace(".", "_")
1013         fragments = modname.split('_')
1014         for length in range(len(fragments), 1, -1):
1015             yield "_".join(fragments[:length])
1016         yield self.raw_mimestr
1017         yield fragments[0]
1018         yield "application_octet_stream"
1019 
1020 
1021 #############################################################################
1022 ### Plugins
1023 #############################################################################
1024 
1025 class PluginError(Exception):
1026     """ Base class for plugin errors """
1027 
1028 class PluginMissingError(PluginError):
1029     """ Raised when a plugin is not found """
1030 
1031 class PluginAttributeError(PluginError):
1032     """ Raised when plugin does not contain an attribtue """
1033 
1034 
1035 def importPlugin(cfg, kind, name, function="execute"):
1036     """ Import wiki or builtin plugin
1037 
1038     Returns <function> attr from a plugin module <name>.
1039     If <function> attr is missing, raise PluginAttributeError.
1040     If <function> is None, return the whole module object.
1041 
1042     If <name> plugin can not be imported, raise PluginMissingError.
1043 
1044     kind may be one of 'action', 'formatter', 'macro', 'parser' or any other
1045     directory that exist in MoinMoin or data/plugin.
1046 
1047     Wiki plugins will always override builtin plugins. If you want
1048     specific plugin, use either importWikiPlugin or importBuiltinPlugin
1049     directly.
1050 
1051     @param cfg: wiki config instance
1052     @param kind: what kind of module we want to import
1053     @param name: the name of the module
1054     @param function: the function name
1055     @rtype: any object
1056     @return: "function" of module "name" of kind "kind", or None
1057     """
1058     try:
1059         return importWikiPlugin(cfg, kind, name, function)
1060     except PluginMissingError:
1061         return importBuiltinPlugin(kind, name, function)
1062 
1063 
1064 def importWikiPlugin(cfg, kind, name, function="execute"):
1065     """ Import plugin from the wiki data directory
1066 
1067     See importPlugin docstring.
1068     """
1069     plugins = wikiPlugins(kind, cfg)
1070     modname = plugins.get(name, None)
1071     if modname is None:
1072         raise PluginMissingError()
1073     moduleName = '%s.%s' % (modname, name)
1074     return importNameFromPlugin(moduleName, function)
1075 
1076 
1077 def importBuiltinPlugin(kind, name, function="execute"):
1078     """ Import builtin plugin from MoinMoin package
1079 
1080     See importPlugin docstring.
1081     """
1082     if not name in builtinPlugins(kind):
1083         raise PluginMissingError()
1084     moduleName = 'MoinMoin.%s.%s' % (kind, name)
1085     return importNameFromPlugin(moduleName, function)
1086 
1087 
1088 def importNameFromPlugin(moduleName, name):
1089     """ Return <name> attr from <moduleName> module,
1090         raise PluginAttributeError if name does not exist.
1091 
1092         If name is None, return the <moduleName> module object.
1093     """
1094     if name is None:
1095         fromlist = []
1096     else:
1097         fromlist = [name]
1098     module = __import__(moduleName, globals(), {}, fromlist)
1099     if fromlist:
1100         # module has the obj for module <moduleName>
1101         try:
1102             return getattr(module, name)
1103         except AttributeError:
1104             raise PluginAttributeError
1105     else:
1106         # module now has the toplevel module of <moduleName> (see __import__ docs!)
1107         components = moduleName.split('.')
1108         for comp in components[1:]:
1109             module = getattr(module, comp)
1110         return module
1111 
1112 
1113 def builtinPlugins(kind):
1114     """ Gets a list of modules in MoinMoin.'kind'
1115 
1116     @param kind: what kind of modules we look for
1117     @rtype: list
1118     @return: module names
1119     """
1120     modulename = "MoinMoin." + kind
1121     return pysupport.importName(modulename, "modules")
1122 
1123 
1124 def wikiPlugins(kind, cfg):
1125     """
1126     Gets a dict containing the names of all plugins of @kind
1127     as the key and the containing module name as the value.
1128 
1129     @param kind: what kind of modules we look for
1130     @rtype: dict
1131     @return: plugin name to containing module name mapping
1132     """
1133     # short-cut if we've loaded the dict already
1134     # (or already failed to load it)
1135     cache = cfg._site_plugin_lists
1136     if kind in cache:
1137         result = cache[kind]
1138     else:
1139         result = {}
1140         for modname in cfg._plugin_modules:
1141             try:
1142                 module = pysupport.importName(modname, kind)
1143                 packagepath = os.path.dirname(module.__file__)
1144                 plugins = pysupport.getPluginModules(packagepath)
1145                 for p in plugins:
1146                     if not p in result:
1147                         result[p] = '%s.%s' % (modname, kind)
1148             except AttributeError:
1149                 pass
1150         cache[kind] = result
1151     return result
1152 
1153 
1154 def getPlugins(kind, cfg):
1155     """ Gets a list of plugin names of kind
1156 
1157     @param kind: what kind of modules we look for
1158     @rtype: list
1159     @return: module names
1160     """
1161     # Copy names from builtin plugins - so we dont destroy the value
1162     all_plugins = builtinPlugins(kind)[:]
1163 
1164     # Add extension plugins without duplicates
1165     for plugin in wikiPlugins(kind, cfg):
1166         if plugin not in all_plugins:
1167             all_plugins.append(plugin)
1168 
1169     return all_plugins
1170 
1171 
1172 def searchAndImportPlugin(cfg, type, name, what=None):
1173     type2classname = {"parser": "Parser",
1174                       "formatter": "Formatter",
1175     }
1176     if what is None:
1177         what = type2classname[type]
1178     mt = MimeType(name)
1179     plugin = None
1180     for module_name in mt.module_name():
1181         try:
1182             plugin = importPlugin(cfg, type, module_name, what)
1183             break
1184         except PluginMissingError:
1185             pass
1186     else:
1187         raise PluginMissingError("Plugin not found! (%r %r %r)" % (type, name, what))
1188     return plugin
1189 
1190 
1191 #############################################################################
1192 ### Parsers
1193 #############################################################################
1194 
1195 def getParserForExtension(cfg, extension):
1196     """
1197     Returns the Parser class of the parser fit to handle a file
1198     with the given extension. The extension should be in the same
1199     format as os.path.splitext returns it (i.e. with the dot).
1200     Returns None if no parser willing to handle is found.
1201     The dict of extensions is cached in the config object.
1202 
1203     @param cfg: the Config instance for the wiki in question
1204     @param extension: the filename extension including the dot
1205     @rtype: class, None
1206     @returns: the parser class or None
1207     """
1208     if not hasattr(cfg.cache, 'EXT_TO_PARSER'):
1209         etp, etd = {}, None
1210         parser_plugins = getPlugins('parser', cfg)
1211         # force the 'highlight' parser to be the first entry in the list
1212         # this makes it possible to overwrite some mapping entries later, so that
1213         # moin will use some "better" parser for some filename extensions
1214         parser_plugins.remove('highlight')
1215         parser_plugins = ['highlight'] + parser_plugins
1216         for pname in parser_plugins:
1217             try:
1218                 Parser = importPlugin(cfg, 'parser', pname, 'Parser')
1219             except PluginMissingError:
1220                 continue
1221             if hasattr(Parser, 'extensions'):
1222                 exts = Parser.extensions
1223                 if isinstance(exts, list):
1224                     for ext in exts:
1225                         etp[ext] = Parser
1226                 elif str(exts) == '*':
1227                     etd = Parser
1228         cfg.cache.EXT_TO_PARSER = etp
1229         cfg.cache.EXT_TO_PARSER_DEFAULT = etd
1230 
1231     return cfg.cache.EXT_TO_PARSER.get(extension, cfg.cache.EXT_TO_PARSER_DEFAULT)
1232 
1233 
1234 #############################################################################
1235 ### Parameter parsing
1236 #############################################################################
1237 
1238 class BracketError(Exception):
1239     pass
1240 
1241 class BracketUnexpectedCloseError(BracketError):
1242     def __init__(self, bracket):
1243         self.bracket = bracket
1244         BracketError.__init__(self, "Unexpected closing bracket %s" % bracket)
1245 
1246 class BracketMissingCloseError(BracketError):
1247     def __init__(self, bracket):
1248         self.bracket = bracket
1249         BracketError.__init__(self, "Missing closing bracket %s" % bracket)
1250 
1251 class ParserPrefix:
1252     """
1253     Trivial container-class holding a single character for
1254     the possible prefixes for parse_quoted_separated_ext
1255     and implementing rich equal comparison.
1256     """
1257     def __init__(self, prefix):
1258         self.prefix = prefix
1259 
1260     def __eq__(self, other):
1261         return isinstance(other, ParserPrefix) and other.prefix == self.prefix
1262 
1263     def __repr__(self):
1264         return '<ParserPrefix(%s)>' % self.prefix.encode('utf-8')
1265 
1266 def parse_quoted_separated_ext(args, separator=None, name_value_separator=None,
1267                                brackets=None, seplimit=0, multikey=False,
1268                                prefixes=None, quotes='"'):
1269     """
1270     Parses the given string according to the other parameters.
1271 
1272     Items can be quoted with any character from the quotes parameter
1273     and each quote can be escaped by doubling it, the separator and
1274     name_value_separator can both be quoted, when name_value_separator
1275     is set then the name can also be quoted.
1276 
1277     Values that are not given are returned as None, while the
1278     empty string as a value can be achieved by quoting it.
1279 
1280     If a name or value does not start with a quote, then the quote
1281     looses its special meaning for that name or value, unless it
1282     starts with one of the given prefixes (the parameter is unicode
1283     containing all allowed prefixes.) The prefixes will be returned
1284     as ParserPrefix() instances in the first element of the tuple
1285     for that particular argument.
1286 
1287     If multiple separators follow each other, this is treated as
1288     having None arguments inbetween, that is also true for when
1289     space is used as separators (when separator is None), filter
1290     them out afterwards.
1291 
1292     The function can also do bracketing, i.e. parse expressions
1293     that contain things like
1294         "(a (a b))" to ['(', 'a', ['(', 'a', 'b']],
1295     in this case, as in this example, the returned list will
1296     contain sub-lists and the brackets parameter must be a list
1297     of opening and closing brackets, e.g.
1298         brackets = ['()', '<>']
1299     Each sub-list's first item is the opening bracket used for
1300     grouping.
1301     Nesting will be observed between the different types of
1302     brackets given. If bracketing doesn't match, a BracketError
1303     instance is raised with a 'bracket' property indicating the
1304     type of missing or unexpected bracket, the instance will be
1305     either of the class BracketMissingCloseError or of the class
1306     BracketUnexpectedCloseError.
1307 
1308     If multikey is True (along with setting name_value_separator),
1309     then the returned tuples for (key, value) pairs can also have
1310     multiple keys, e.g.
1311         "a=b=c" -> ('a', 'b', 'c')
1312 
1313     @param args: arguments to parse
1314     @param separator: the argument separator, defaults to None, meaning any
1315         space separates arguments
1316     @param name_value_separator: separator for name=value, default '=',
1317         name=value keywords not parsed if evaluates to False
1318     @param brackets: a list of two-character strings giving
1319         opening and closing brackets
1320     @param seplimit: limits the number of parsed arguments
1321     @param multikey: multiple keys allowed for a single value
1322     @rtype: list
1323     @returns: list of unicode strings and tuples containing
1324         unicode strings, or lists containing the same for
1325         bracketing support
1326     """
1327     idx = 0
1328     assert name_value_separator is None or name_value_separator != separator
1329     assert name_value_separator is None or len(name_value_separator) == 1
1330     if not isinstance(args, unicode):
1331         raise TypeError('args must be unicode')
1332     max = len(args)
1333     result = []         # result list
1334     cur = [None]        # current item
1335     quoted = None       # we're inside quotes, indicates quote character used
1336     skipquote = 0       # next quote is a quoted quote
1337     noquote = False     # no quotes expected because word didn't start with one
1338     seplimit_reached = False # number of separators exhausted
1339     separator_count = 0 # number of separators encountered
1340     SPACE = [' ', '\t', ]
1341     nextitemsep = [separator]   # used for skipping trailing space
1342     SPACE = [' ', '\t', ]
1343     if separator is None:
1344         nextitemsep = SPACE[:]
1345         separators = SPACE
1346     else:
1347         nextitemsep = [separator]   # used for skipping trailing space
1348         separators = [separator]
1349     if name_value_separator:
1350         nextitemsep.append(name_value_separator)
1351 
1352     # bracketing support
1353     opening = []
1354     closing = []
1355     bracketstack = []
1356     matchingbracket = {}
1357     if brackets:
1358         for o, c in brackets:
1359             assert not o in opening
1360             opening.append(o)
1361             assert not c in closing
1362             closing.append(c)
1363             matchingbracket[o] = c
1364 
1365     def additem(result, cur, separator_count, nextitemsep):
1366         if len(cur) == 1:
1367             result.extend(cur)
1368         elif cur:
1369             result.append(tuple(cur))
1370         cur = [None]
1371         noquote = False
1372         separator_count += 1
1373         seplimit_reached = False
1374         if seplimit and separator_count >= seplimit:
1375             seplimit_reached = True
1376             nextitemsep = [n for n in nextitemsep if n in separators]
1377 
1378         return cur, noquote, separator_count, seplimit_reached, nextitemsep
1379 
1380     while idx < max:
1381         char = args[idx]
1382         next = None
1383         if idx + 1 < max:
1384             next = args[idx+1]
1385         if skipquote:
1386             skipquote -= 1
1387         if not separator is None and not quoted and char in SPACE:
1388             spaces = ''
1389             # accumulate all space
1390             while char in SPACE and idx < max - 1:
1391                 spaces += char
1392                 idx += 1
1393                 char = args[idx]
1394             # remove space if args end with it
1395             if char in SPACE and idx == max - 1:
1396                 break
1397             # remove space at end of argument
1398             if char in nextitemsep:
1399                 continue
1400             idx -= 1
1401             if len(cur) and cur[-1]:
1402                 cur[-1] = cur[-1] + spaces
1403         elif not quoted and char == name_value_separator:
1404             if multikey or len(cur) == 1:
1405                 cur.append(None)
1406             else:
1407                 if not multikey:
1408                     if cur[-1] is None:
1409                         cur[-1] = ''
1410                     cur[-1] += name_value_separator
1411                 else:
1412                     cur.append(None)
1413             noquote = False
1414         elif not quoted and not seplimit_reached and char in separators:
1415             (cur, noquote, separator_count, seplimit_reached,
1416              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
1417         elif not quoted and not noquote and char in quotes:
1418             if len(cur) and cur[-1] is None:
1419                 del cur[-1]
1420             cur.append(u'')
1421             quoted = char
1422         elif char == quoted and not skipquote:
1423             if next == quoted:
1424                 skipquote = 2 # will be decremented right away
1425             else:
1426                 quoted = None
1427         elif not quoted and char in opening:
1428             while len(cur) and cur[-1] is None:
1429                 del cur[-1]
1430             (cur, noquote, separator_count, seplimit_reached,
1431              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
1432             bracketstack.append((matchingbracket[char], result))
1433             result = [char]
1434         elif not quoted and char in closing:
1435             while len(cur) and cur[-1] is None:
1436                 del cur[-1]
1437             (cur, noquote, separator_count, seplimit_reached,
1438              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
1439             cur = []
1440             if not bracketstack:
1441                 raise BracketUnexpectedCloseError(char)
1442             expected, oldresult = bracketstack[-1]
1443             if not expected == char:
1444                 raise BracketUnexpectedCloseError(char)
1445             del bracketstack[-1]
1446             oldresult.append(result)
1447             result = oldresult
1448         elif not quoted and prefixes and char in prefixes and cur == [None]:
1449             cur = [ParserPrefix(char)]
1450             cur.append(None)
1451         else:
1452             if len(cur):
1453                 if cur[-1] is None:
1454                     cur[-1] = char
1455                 else:
1456                     cur[-1] += char
1457             else:
1458                 cur.append(char)
1459             noquote = True
1460 
1461         idx += 1
1462 
1463     if bracketstack:
1464         raise BracketMissingCloseError(bracketstack[-1][0])
1465 
1466     if quoted:
1467         if len(cur):
1468             if cur[-1] is None:
1469                 cur[-1] = quoted
1470             else:
1471                 cur[-1] = quoted + cur[-1]
1472         else:
1473             cur.append(quoted)
1474 
1475     additem(result, cur, separator_count, nextitemsep)
1476 
1477     return result
1478 
1479 def parse_quoted_separated(args, separator=',', name_value=True, seplimit=0):
1480     result = []
1481     positional = result
1482     if name_value:
1483         name_value_separator = '='
1484         trailing = []
1485         keywords = {}
1486     else:
1487         name_value_separator = None
1488 
1489     l = parse_quoted_separated_ext(args, separator=separator,
1490                                    name_value_separator=name_value_separator,
1491                                    seplimit=seplimit)
1492     for item in l:
1493         if isinstance(item, tuple):
1494             key, value = item
1495             if key is None:
1496                 key = u''
1497             keywords[key] = value
1498             positional = trailing
1499         else:
1500             positional.append(item)
1501 
1502     if name_value:
1503         return result, keywords, trailing
1504     return result
1505 
1506 def get_bool(request, arg, name=None, default=None):
1507     """
1508     For use with values returned from parse_quoted_separated or given
1509     as macro parameters, return a boolean from a unicode string.
1510     Valid input is 'true'/'false', 'yes'/'no' and '1'/'0' or None for
1511     the default value.
1512 
1513     @param request: A request instance
1514     @param arg: The argument, may be None or a unicode string
1515     @param name: Name of the argument, for error messages
1516     @param default: default value if arg is None
1517     @rtype: boolean or None
1518     @returns: the boolean value of the string according to above rules
1519               (or default value)
1520     """
1521     _ = request.getText
1522     assert default is None or isinstance(default, bool)
1523     if arg is None:
1524         return default
1525     elif not isinstance(arg, unicode):
1526         raise TypeError('Argument must be None or unicode')
1527     arg = arg.lower()
1528     if arg in [u'0', u'false', u'no']:
1529         return False
1530     elif arg in [u'1', u'true', u'yes']:
1531         return True
1532     else:
1533         if name:
1534             raise ValueError(
1535                 _('Argument "%s" must be a boolean value, not "%s"') % (
1536                     name, arg))
1537         else:
1538             raise ValueError(
1539                 _('Argument must be a boolean value, not "%s"') % arg)
1540 
1541 
1542 def get_int(request, arg, name=None, default=None):
1543     """
1544     For use with values returned from parse_quoted_separated or given
1545     as macro parameters, return an integer from a unicode string
1546     containing the decimal representation of a number.
1547     None is a valid input and yields the default value.
1548 
1549     @param request: A request instance
1550     @param arg: The argument, may be None or a unicode string
1551     @param name: Name of the argument, for error messages
1552     @param default: default value if arg is None
1553     @rtype: int or None
1554     @returns: the integer value of the string (or default value)
1555     """
1556     _ = request.getText
1557     assert default is None or isinstance(default, (int, long))
1558     if arg is None:
1559         return default
1560     elif not isinstance(arg, unicode):
1561         raise TypeError('Argument must be None or unicode')
1562     try:
1563         return int(arg)
1564     except ValueError:
1565         if name:
1566             raise ValueError(
1567                 _('Argument "%s" must be an integer value, not "%s"') % (
1568                     name, arg))
1569         else:
1570             raise ValueError(
1571                 _('Argument must be an integer value, not "%s"') % arg)
1572 
1573 
1574 def get_float(request, arg, name=None, default=None):
1575     """
1576     For use with values returned from parse_quoted_separated or given
1577     as macro parameters, return a float from a unicode string.
1578     None is a valid input and yields the default value.
1579 
1580     @param request: A request instance
1581     @param arg: The argument, may be None or a unicode string
1582     @param name: Name of the argument, for error messages
1583     @param default: default return value if arg is None
1584     @rtype: float or None
1585     @returns: the float value of the string (or default value)
1586     """
1587     _ = request.getText
1588     assert default is None or isinstance(default, (int, long, float))
1589     if arg is None:
1590         return default
1591     elif not isinstance(arg, unicode):
1592         raise TypeError('Argument must be None or unicode')
1593     try:
1594         return float(arg)
1595     except ValueError:
1596         if name:
1597             raise ValueError(
1598                 _('Argument "%s" must be a floating point value, not "%s"') % (
1599                     name, arg))
1600         else:
1601             raise ValueError(
1602                 _('Argument must be a floating point value, not "%s"') % arg)
1603 
1604 
1605 def get_complex(request, arg, name=None, default=None):
1606     """
1607     For use with values returned from parse_quoted_separated or given
1608     as macro parameters, return a complex from a unicode string.
1609     None is a valid input and yields the default value.
1610 
1611     @param request: A request instance
1612     @param arg: The argument, may be None or a unicode string
1613     @param name: Name of the argument, for error messages
1614     @param default: default return value if arg is None
1615     @rtype: complex or None
1616     @returns: the complex value of the string (or default value)
1617     """
1618     _ = request.getText
1619     assert default is None or isinstance(default, (int, long, float, complex))
1620     if arg is None:
1621         return default
1622     elif not isinstance(arg, unicode):
1623         raise TypeError('Argument must be None or unicode')
1624     try:
1625         # allow writing 'i' instead of 'j'
1626         arg = arg.replace('i', 'j').replace('I', 'j')
1627         return complex(arg)
1628     except ValueError:
1629         if name:
1630             raise ValueError(
1631                 _('Argument "%s" must be a complex value, not "%s"') % (
1632                     name, arg))
1633         else:
1634             raise ValueError(
1635                 _('Argument must be a complex value, not "%s"') % arg)
1636 
1637 
1638 def get_unicode(request, arg, name=None, default=None):
1639     """
1640     For use with values returned from parse_quoted_separated or given
1641     as macro parameters, return a unicode string from a unicode string.
1642     None is a valid input and yields the default value.
1643 
1644     @param request: A request instance
1645     @param arg: The argument, may be None or a unicode string
1646     @param name: Name of the argument, for error messages
1647     @param default: default return value if arg is None;
1648     @rtype: unicode or None
1649     @returns: the unicode string (or default value)
1650     """
1651     assert default is None or isinstance(default, unicode)
1652     if arg is None:
1653         return default
1654     elif not isinstance(arg, unicode):
1655         raise TypeError('Argument must be None or unicode')
1656 
1657     return arg
1658 
1659 
1660 def get_choice(request, arg, name=None, choices=[None], default_none=False):
1661     """
1662     For use with values returned from parse_quoted_separated or given
1663     as macro parameters, return a unicode string that must be in the
1664     choices given. None is a valid input and yields first of the valid
1665     choices.
1666 
1667     @param request: A request instance
1668     @param arg: The argument, may be None or a unicode string
1669     @param name: Name of the argument, for error messages
1670     @param choices: the possible choices
1671     @param default_none: If False (default), get_choice returns first available
1672                          choice if arg is None. If True, get_choice returns
1673                          None if arg is None. This is useful if some arg value
1674                          is required (no default choice).
1675     @rtype: unicode or None
1676     @returns: the unicode string (or default value)
1677     """
1678     assert isinstance(choices, (tuple, list))
1679     if arg is None:
1680         if default_none:
1681             return None
1682         else:
1683             return choices[0]
1684     elif not isinstance(arg, unicode):
1685         raise TypeError('Argument must be None or unicode')
1686     elif not arg in choices:
1687         _ = request.getText
1688         if name:
1689             raise ValueError(
1690                 _('Argument "%s" must be one of "%s", not "%s"') % (
1691                     name, '", "'.join([repr(choice) for choice in choices]),
1692                     arg))
1693         else:
1694             raise ValueError(
1695                 _('Argument must be one of "%s", not "%s"') % (
1696                     '", "'.join([repr(choice) for choice in choices]), arg))
1697 
1698     return arg
1699 
1700 
1701 class IEFArgument:
1702     """
1703     Base class for new argument parsers for
1704     invoke_extension_function.
1705     """
1706     def __init__(self):
1707         pass
1708 
1709     def parse_argument(self, s):
1710         """
1711         Parse the argument given in s (a string) and return
1712         the argument for the extension function.
1713         """
1714         raise NotImplementedError
1715 
1716     def get_default(self):
1717         """
1718         Return the default for this argument.
1719         """
1720         raise NotImplementedError
1721 
1722 
1723 class UnitArgument(IEFArgument):
1724     """
1725     Argument class for invoke_extension_function that forces
1726     having any of the specified units given for a value.
1727 
1728     Note that the default unit is "mm".
1729 
1730     Use, for example, "UnitArgument('7mm', float, ['%', 'mm'])".
1731 
1732     If the defaultunit parameter is given, any argument that
1733     can be converted into the given argtype is assumed to have
1734     the default unit. NOTE: This doesn't work with a choice
1735     (tuple or list) argtype.
1736     """
1737     def __init__(self, default, argtype, units=['mm'], defaultunit=None):
1738         """
1739         Initialise a UnitArgument giving the default,
1740         argument type and the permitted units.
1741         """
1742         IEFArgument.__init__(self)
1743         self._units = list(units)
1744         self._units.sort(lambda x, y: len(y) - len(x))
1745         self._type = argtype
1746         self._defaultunit = defaultunit
1747         assert defaultunit is None or defaultunit in units
1748         if default is not None:
1749             self._default = self.parse_argument(default)
1750         else:
1751             self._default = None
1752 
1753     def parse_argument(self, s):
1754         for unit in self._units:
1755             if s.endswith(unit):
1756                 ret = (self._type(s[:len(s) - len(unit)]), unit)
1757                 return ret
1758         if self._defaultunit is not None:
1759             try:
1760                 return (self._type(s), self._defaultunit)
1761             except ValueError:
1762                 pass
1763         units = ', '.join(self._units)
1764         ## XXX: how can we translate this?
1765         raise ValueError("Invalid unit in value %s (allowed units: %s)" % (s, units))
1766 
1767     def get_default(self):
1768         return self._default
1769 
1770 
1771 class required_arg:
1772     """
1773     Wrap a type in this class and give it as default argument
1774     for a function passed to invoke_extension_function() in
1775     order to get generic checking that the argument is given.
1776     """
1777     def __init__(self, argtype):
1778         """
1779         Initialise a required_arg
1780         @param argtype: the type the argument should have
1781         """
1782         if not (argtype in (bool, int, long, float, complex, unicode) or
1783                 isinstance(argtype, (IEFArgument, tuple, list))):
1784             raise TypeError("argtype must be a valid type")
1785         self.argtype = argtype
1786 
1787 
1788 def invoke_extension_function(request, function, args, fixed_args=[]):
1789     """
1790     Parses arguments for an extension call and calls the extension
1791     function with the arguments.
1792 
1793     If the macro function has a default value that is a bool,
1794     int, long, float or unicode object, then the given value
1795     is converted to the type of that default value before passing
1796     it to the macro function. That way, macros need not call the
1797     wikiutil.get_* functions for any arguments that have a default.
1798 
1799     @param request: the request object
1800     @param function: the function to invoke
1801     @param args: unicode string with arguments (or evaluating to False)
1802     @param fixed_args: fixed arguments to pass as the first arguments
1803     @returns: the return value from the function called
1804     """
1805 
1806     def _convert_arg(request, value, default, name=None):
1807         """
1808         Using the get_* functions, convert argument to the type of the default
1809         if that is any of bool, int, long, float or unicode; if the default
1810         is the type itself then convert to that type (keeps None) or if the
1811         default is a list require one of the list items.
1812 
1813         In other cases return the value itself.
1814         """
1815         # if extending this, extend required_arg as well!
1816         if isinstance(default, bool):
1817             return get_bool(request, value, name, default)
1818         elif isinstance(default, (int, long)):
1819             return get_int(request, value, name, default)
1820         elif isinstance(default, float):
1821             return get_float(request, value, name, default)
1822         elif isinstance(default, complex):
1823             return get_complex(request, value, name, default)
1824         elif isinstance(default, unicode):
1825             return get_unicode(request, value, name, default)
1826         elif isinstance(default, (tuple, list)):
1827             return get_choice(request, value, name, default)
1828         elif default is bool:
1829             return get_bool(request, value, name)
1830         elif default is int or default is long:
1831             return get_int(request, value, name)
1832         elif default is float:
1833             return get_float(request, value, name)
1834         elif default is complex:
1835             return get_complex(request, value, name)
1836         elif isinstance(default, IEFArgument):
1837             # defaults handled later
1838             if value is None:
1839                 return None
1840             return default.parse_argument(value)
1841         elif isinstance(default, required_arg):
1842             if isinstance(default.argtype, (tuple, list)):
1843                 # treat choice specially and return None if no choice
1844                 # is given in the value
1845                 return get_choice(request, value, name, list(default.argtype),
1846                        default_none=True)
1847             else:
1848                 return _convert_arg(request, value, default.argtype, name)
1849         return value
1850 
1851     assert isinstance(fixed_args, (list, tuple))
1852 
1853     _ = request.getText
1854 
1855     kwargs = {}
1856     kwargs_to_pass = {}
1857     trailing_args = []
1858 
1859     if args:
1860         assert isinstance(args, unicode)
1861 
1862         positional, keyword, trailing = parse_quoted_separated(args)
1863 
1864         for kw in keyword:
1865             try:
1866                 kwargs[str(kw)] = keyword[kw]
1867             except UnicodeEncodeError:
1868                 kwargs_to_pass[kw] = keyword[kw]
1869 
1870         trailing_args.extend(trailing)
1871 
1872     else:
1873         positional = []
1874 
1875     if isfunction(function) or ismethod(function):
1876         argnames, varargs, varkw, defaultlist = getargspec(function)
1877     elif isclass(function):
1878         (argnames, varargs,
1879          varkw, defaultlist) = getargspec(function.__init__.im_func)
1880     else:
1881         raise TypeError('function must be a function, method or class')
1882 
1883     # self is implicit!
1884     if ismethod(function) or isclass(function):
1885         argnames = argnames[1:]
1886 
1887     fixed_argc = len(fixed_args)
1888     argnames = argnames[fixed_argc:]
1889     argc = len(argnames)
1890     if not defaultlist:
1891         defaultlist = []
1892 
1893     # if the fixed parameters have defaults too...
1894     if argc < len(defaultlist):
1895         defaultlist = defaultlist[fixed_argc:]
1896     defstart = argc - len(defaultlist)
1897 
1898     defaults = {}
1899     # reverse to be able to pop() things off
1900     positional.reverse()
1901     allow_kwargs = False
1902     allow_trailing = False
1903     # convert all arguments to keyword arguments,
1904     # fill all arguments that weren't given with None
1905     for idx in range(argc):
1906         argname = argnames[idx]
1907         if argname == '_kwargs':
1908             allow_kwargs = True
1909             continue
1910         if argname == '_trailing_args':
1911             allow_trailing = True
1912             continue
1913         if positional:
1914             kwargs[argname] = positional.pop()
1915         if not argname in kwargs:
1916             kwargs[argname] = None
1917         if idx >= defstart:
1918             defaults[argname] = defaultlist[idx - defstart]
1919 
1920     if positional:
1921         if not allow_trailing:
1922             raise ValueError(_('Too many arguments'))
1923         trailing_args.extend(positional)
1924 
1925     if trailing_args:
1926         if not allow_trailing:
1927             raise ValueError(_('Cannot have arguments without name following'
1928                                ' named arguments'))
1929         kwargs['_trailing_args'] = trailing_args
1930 
1931     # type-convert all keyword arguments to the type
1932     # that the default value indicates
1933     for argname in kwargs.keys()[:]:
1934         if argname in defaults:
1935             # the value of 'argname' from kwargs will be put into the
1936             # macro's 'argname' argument, so convert that giving the
1937             # name to the converter so the user is told which argument
1938             # went wrong (if it does)
1939             kwargs[argname] = _convert_arg(request, kwargs[argname],
1940                                            defaults[argname], argname)
1941             if kwargs[argname] is None:
1942                 if isinstance(defaults[argname], required_arg):
1943                     raise ValueError(_('Argument "%s" is required') % argname)
1944                 if isinstance(defaults[argname], IEFArgument):
1945                     kwargs[argname] = defaults[argname].get_default()
1946 
1947         if not argname in argnames:
1948             # move argname into _kwargs parameter
1949             kwargs_to_pass[argname] = kwargs[argname]
1950             del kwargs[argname]
1951 
1952     if kwargs_to_pass:
1953         kwargs['_kwargs'] = kwargs_to_pass
1954         if not allow_kwargs:
1955             raise ValueError(_(u'No argument named "%s"') % (
1956                 kwargs_to_pass.keys()[0]))
1957 
1958     return function(*fixed_args, **kwargs)
1959 
1960 
1961 def parseAttributes(request, attrstring, endtoken=None, extension=None):
1962     """
1963     Parse a list of attributes and return a dict plus a possible
1964     error message.
1965     If extension is passed, it has to be a callable that returns
1966     a tuple (found_flag, msg). found_flag is whether it did find and process
1967     something, msg is '' when all was OK or any other string to return an error
1968     message.
1969 
1970     @param request: the request object
1971     @param attrstring: string containing the attributes to be parsed
1972     @param endtoken: token terminating parsing
1973     @param extension: extension function -
1974                       gets called with the current token, the parser and the dict
1975     @rtype: dict, msg
1976     @return: a dict plus a possible error message
1977     """
1978     import shlex, StringIO
1979 
1980     _ = request.getText
1981 
1982     parser = shlex.shlex(StringIO.StringIO(attrstring))
1983     parser.commenters = ''
1984     msg = None
1985     attrs = {}
1986 
1987     while not msg:
1988         try:
1989             key = parser.get_token()
1990         except ValueError, err:
1991             msg = str(err)
1992             break
1993         if not key:
1994             break
1995         if endtoken and key == endtoken:
1996             break
1997 
1998         # call extension function with the current token, the parser, and the dict
1999         if extension:
2000             found_flag, msg = extension(key, parser, attrs)
2001             #logging.debug("%r = extension(%r, parser, %r)" % (msg, key, attrs))
2002             if found_flag:
2003                 continue
2004             elif msg:
2005                 break
2006             #else (we found nothing, but also didn't have an error msg) we just continue below:
2007 
2008         try:
2009             eq = parser.get_token()
2010         except ValueError, err:
2011             msg = str(err)
2012             break
2013         if eq != "=":
2014             msg = _('Expected "=" to follow "%(token)s"') % {'token': key}
2015             break
2016 
2017         try:
2018             val = parser.get_token()
2019         except ValueError, err:
2020             msg = str(err)
2021             break
2022         if not val:
2023             msg = _('Expected a value for key "%(token)s"') % {'token': key}
2024             break
2025 
2026         key = escape(key) # make sure nobody cheats
2027 
2028         # safely escape and quote value
2029         if val[0] in ["'", '"']:
2030             val = escape(val)
2031         else:
2032             val = '"%s"' % escape(val, 1)
2033 
2034         attrs[key.lower()] = val
2035 
2036     return attrs, msg or ''
2037 
2038 
2039 class ParameterParser:
2040     """ MoinMoin macro parameter parser
2041 
2042         Parses a given parameter string, separates the individual parameters
2043         and detects their type.
2044 
2045         Possible parameter types are:
2046 
2047         Name      | short  | example
2048         ----------------------------
2049          Integer  | i      | -374
2050          Float    | f      | 234.234 23.345E-23
2051          String   | s      | 'Stri\'ng'
2052          Boolean  | b      | 0 1 True false
2053          Name     |        | case_sensitive | converted to string
2054 
2055         So say you want to parse three things, name, age and if the
2056         person is male or not:
2057 
2058         The pattern will be: %(name)s%(age)i%(male)b
2059 
2060         As a result, the returned dict will put the first value into
2061         male, second into age etc. If some argument is missing, it will
2062         get None as its value. This also means that all the identifiers
2063         in the pattern will exist in the dict, they will just have the
2064         value None if they were not specified by the caller.
2065 
2066         So if we call it with the parameters as follows:
2067             ("John Smith", 18)
2068         this will result in the following dict:
2069             {"name": "John Smith", "age": 18, "male": None}
2070 
2071         Another way of calling would be:
2072             ("John Smith", male=True)
2073         this will result in the following dict:
2074             {"name": "John Smith", "age": None, "male": True}
2075     """
2076 
2077     def __init__(self, pattern):
2078         # parameter_re = "([^\"',]*(\"[^\"]*\"|'[^']*')?[^\"',]*)[,)]"
2079         name = "(?P<%s>[a-zA-Z_][a-zA-Z0-9_]*)"
2080         int_re = r"(?P<int>-?\d+)"
2081         bool_re = r"(?P<bool>(([10])|([Tt]rue)|([Ff]alse)))"
2082         float_re = r"(?P<float>-?\d+\.\d+([eE][+-]?\d+)?)"
2083         string_re = (r"(?P<string>('([^']|(\'))*?')|" +
2084                                 r'("([^"]|(\"))*?"))')
2085         name_re = name % "name"
2086         name_param_re = name % "name_param"
2087 
2088         param_re = r"\s*(\s*%s\s*=\s*)?(%s|%s|%s|%s|%s)\s*(,|$)" % (
2089                    name_re, float_re, int_re, bool_re, string_re, name_param_re)
2090         self.param_re = re.compile(param_re, re.U)
2091         self._parse_pattern(pattern)
2092 
2093     def _parse_pattern(self, pattern):
2094         param_re = r"(%(?P<name>\(.*?\))?(?P<type>[ibfs]{1,3}))|\|"
2095         i = 0
2096         # TODO: Optionals aren't checked.
2097         self.optional = []
2098         named = False
2099         self.param_list = []
2100         self.param_dict = {}
2101 
2102         for match in re.finditer(param_re, pattern):
2103             if match.group() == "|":
2104                 self.optional.append(i)
2105                 continue
2106             self.param_list.append(match.group('type'))
2107             if match.group('name'):
2108                 named = True
2109                 self.param_dict[match.group('name')[1:-1]] = i
2110             elif named:
2111                 raise ValueError("Named parameter expected")
2112             i += 1
2113 
2114     def __str__(self):
2115         return "%s, %s, optional:%s" % (self.param_list, self.param_dict,
2116                                         self.optional)
2117 
2118     def parse_parameters(self, params):
2119         # Default list/dict entries to None
2120         parameter_list = [None] * len(self.param_list)
2121         parameter_dict = dict([(key, None) for key in self.param_dict])
2122         check_list = [0] * len(self.param_list)
2123 
2124         i = 0
2125         start = 0
2126         fixed_count = 0
2127         named = False
2128 
2129         while start < len(params):
2130             match = re.match(self.param_re, params[start:])
2131             if not match:
2132                 raise ValueError("malformed parameters")
2133             start += match.end()
2134             if match.group("int"):
2135                 pvalue = int(match.group("int"))
2136                 ptype = 'i'
2137             elif match.group("bool"):
2138                 pvalue = (match.group("bool") == "1") or (match.group("bool") == "True") or (match.group("bool") == "true")
2139                 ptype = 'b'
2140             elif match.group("float"):
2141                 pvalue = float(match.group("float"))
2142                 ptype = 'f'
2143             elif match.group("string"):
2144                 pvalue = match.group("string")[1:-1]
2145                 ptype = 's'
2146             elif match.group("name_param"):
2147                 pvalue = match.group("name_param")
2148                 ptype = 'n'
2149             else:
2150                 raise ValueError("Parameter parser code does not fit param_re regex")
2151 
2152             name = match.group("name")
2153             if name:
2154                 if name not in self.param_dict:
2155                     # TODO we should think on inheritance of parameters
2156                     raise ValueError("unknown parameter name '%s'" % name)
2157                 nr = self.param_dict[name]
2158                 if check_list[nr]:
2159                     raise ValueError("parameter '%s' specified twice" % name)
2160                 else:
2161                     check_list[nr] = 1
2162                 pvalue = self._check_type(pvalue, ptype, self.param_list[nr])
2163                 parameter_dict[name] = pvalue
2164                 parameter_list[nr] = pvalue
2165                 named = True
2166             elif named:
2167                 raise ValueError("only named parameters allowed after first named parameter")
2168             else:
2169                 nr = i
2170                 if nr not in self.param_dict.values():
2171                     fixed_count = nr + 1
2172                 parameter_list[nr] = self._check_type(pvalue, ptype, self.param_list[nr])
2173 
2174             # Let's populate and map our dictionary to what's been found
2175             for name in self.param_dict:
2176                 tmp = self.param_dict[name]
2177                 parameter_dict[name] = parameter_list[tmp]
2178 
2179             i += 1
2180 
2181         for i in range(fixed_count):
2182             parameter_dict[i] = parameter_list[i]
2183 
2184         return fixed_count, parameter_dict
2185 
2186     def _check_type(self, pvalue, ptype, format):
2187         if ptype == 'n' and 's' in format: # n as s
2188             return pvalue
2189 
2190         if ptype in format:
2191             return pvalue # x -> x
2192 
2193         if ptype == 'i':
2194             if 'f' in format:
2195                 return float(pvalue) # i -> f
2196             elif 'b' in format:
2197                 return pvalue != 0 # i -> b
2198         elif ptype == 's':
2199             if 'b' in format:
2200                 if pvalue.lower() == 'false':
2201                     return False # s-> b
2202                 elif pvalue.lower() == 'true':
2203                     return True # s-> b
2204                 else:
2205                     raise ValueError('%r does not match format %r' % (pvalue, format))
2206 
2207         if 's' in format: # * -> s
2208             return str(pvalue)
2209 
2210         raise ValueError('%r does not match format %r' % (pvalue, format))
2211 
2212 
2213 #############################################################################
2214 ### Misc
2215 #############################################################################
2216 def normalize_pagename(name, cfg):
2217     """ Normalize page name
2218 
2219     Prevent creating page names with invisible characters or funny
2220     whitespace that might confuse the users or abuse the wiki, or
2221     just does not make sense.
2222 
2223     Restrict even more group pages, so they can be used inside acl lines.
2224 
2225     @param name: page name, unicode
2226     @rtype: unicode
2227     @return: decoded and sanitized page name
2228     """
2229     # Strip invalid characters
2230     name = config.page_invalid_chars_regex.sub(u'', name)
2231 
2232     # Split to pages and normalize each one
2233     pages = name.split(u'/')
2234     normalized = []
2235     for page in pages:
2236         # Ignore empty or whitespace only pages
2237         if not page or page.isspace():
2238             continue
2239 
2240         # Cleanup group pages.
2241         # Strip non alpha numeric characters, keep white space
2242         if isGroupPage(page, cfg):
2243             page = u''.join([c for c in page
2244                              if c.isalnum() or c.isspace()])
2245 
2246         # Normalize white space. Each name can contain multiple
2247         # words separated with only one space. Split handle all
2248         # 30 unicode spaces (isspace() == True)
2249         page = u' '.join(page.split())
2250 
2251         normalized.append(page)
2252 
2253     # Assemble components into full pagename
2254     name = u'/'.join(normalized)
2255     return name
2256 
2257 def taintfilename(basename):
2258     """
2259     Make a filename that is supposed to be a plain name secure, i.e.
2260     remove any possible path components that compromise our system.
2261 
2262     @param basename: (possibly unsafe) filename
2263     @rtype: string
2264     @return: (safer) filename
2265     """
2266     for x in (os.pardir, ':', '/', '\\', '<', '>'):
2267         basename = basename.replace(x, '_')
2268 
2269     return basename
2270 
2271 
2272 def drawing2fname(drawing):
2273     config.drawing_extensions = ['.tdraw', '.adraw',
2274                                  '.svg',
2275                                  '.png', '.jpg', '.jpeg', '.gif',
2276                                 ]
2277     fname, ext = os.path.splitext(drawing)
2278     # note: do not just check for empty extension or stuff like drawing:foo.bar
2279     # will fail, instead of being expanded to foo.bar.tdraw
2280     if ext not in config.drawing_extensions:
2281         # for backwards compatibility, twikidraw is the default:
2282         drawing += '.tdraw'
2283     return drawing
2284 
2285 
2286 def mapURL(request, url):
2287     """
2288     Map URLs according to 'cfg.url_mappings'.
2289 
2290     @param url: a URL
2291     @rtype: string
2292     @return: mapped URL
2293     """
2294     # check whether we have to map URLs
2295     if request.cfg.url_mappings:
2296         # check URL for the configured prefixes
2297         for prefix in request.cfg.url_mappings:
2298             if url.startswith(prefix):
2299                 # substitute prefix with replacement value
2300                 return request.cfg.url_mappings[prefix] + url[len(prefix):]
2301 
2302     # return unchanged url
2303     return url
2304 
2305 
2306 def getUnicodeIndexGroup(name):
2307     """
2308     Return a group letter for `name`, which must be a unicode string.
2309     Currently supported: Hangul Syllables (U+AC00 - U+D7AF)
2310 
2311     @param name: a string
2312     @rtype: string
2313     @return: group letter or None
2314     """
2315     c = name[0]
2316     if u'\uAC00' <= c <= u'\uD7AF': # Hangul Syllables
2317         return unichr(0xac00 + (int(ord(c) - 0xac00) / 588) * 588)
2318     else:
2319         return c.upper() # we put lower and upper case words into the same index group
2320 
2321 
2322 def isStrictWikiname(name, word_re=re.compile(ur"^(?:[%(u)s][%(l)s]+){2,}$" % {'u': config.chars_upper, 'l': config.chars_lower})):
2323     """
2324     Check whether this is NOT an extended name.
2325 
2326     @param name: the wikiname in question
2327     @rtype: bool
2328     @return: true if name matches the word_re
2329     """
2330     return word_re.match(name)
2331 
2332 
2333 def is_URL(arg, schemas=config.url_schemas):
2334     """ Return True if arg is a URL (with a schema given in the schemas list).
2335 
2336         Note: there are not that many requirements for generic URLs, basically
2337         the only mandatory requirement is the ':' between schema and rest.
2338         Schema itself could be anything, also the rest (but we only support some
2339         schemas, as given in config.url_schemas, so it is a bit less ambiguous).
2340     """
2341     if ':' not in arg:
2342         return False
2343     for schema in schemas:
2344         if arg.startswith(schema + ':'):
2345             return True
2346     return False
2347 
2348 
2349 def isPicture(url):
2350     """
2351     Is this a picture's url?
2352 
2353     @param url: the url in question
2354     @rtype: bool
2355     @return: true if url points to a picture
2356     """
2357     extpos = url.rfind(".") + 1
2358     return extpos > 1 and url[extpos:].lower() in config.browser_supported_images
2359 
2360 
2361 def link_tag(request, params, text=None, formatter=None, on=None, **kw):
2362     """ Create a link.
2363 
2364     TODO: cleanup css_class
2365 
2366     @param request: the request object
2367     @param params: parameter string appended to the URL after the scriptname/
2368     @param text: text / inner part of the <a>...</a> link - does NOT get
2369                  escaped, so you can give HTML here and it will be used verbatim
2370     @param formatter: the formatter object to use
2371     @param on: opening/closing tag only
2372     @keyword attrs: additional attrs (HTMLified string) (removed in 1.5.3)
2373     @rtype: string
2374     @return: formatted link tag
2375     """
2376     if formatter is None:
2377         formatter = request.html_formatter
2378     if 'css_class' in kw:
2379         css_class = kw['css_class']
2380         del kw['css_class'] # one time is enough
2381     else:
2382         css_class = None
2383     id = kw.get('id', None)
2384     name = kw.get('name', None)
2385     if text is None:
2386         text = params # default
2387     if formatter:
2388         url = "%s/%s" % (request.script_root, params)
2389         # formatter.url will escape the url part
2390         if on is not None:
2391             tag = formatter.url(on, url, css_class, **kw)
2392         else:
2393             tag = (formatter.url(1, url, css_class, **kw) +
2394                 formatter.rawHTML(text) +
2395                 formatter.url(0))
2396     else: # this shouldn't be used any more:
2397         if on is not None and not on:
2398             tag = '</a>'
2399         else:
2400             attrs = ''
2401             if css_class:
2402                 attrs += ' class="%s"' % css_class
2403             if id:
2404                 attrs += ' id="%s"' % id
2405             if name:
2406                 attrs += ' name="%s"' % name
2407             tag = '<a%s href="%s/%s">' % (attrs, request.script_root, params)
2408             if not on:
2409                 tag = "%s%s</a>" % (tag, text)
2410         logging.warning("wikiutil.link_tag called without formatter and without request.html_formatter. tag=%r" % (tag, ))
2411     return tag
2412 
2413 def containsConflictMarker(text):
2414     """ Returns true if there is a conflict marker in the text. """
2415     return "/!\\ '''Edit conflict" in text
2416 
2417 def pagediff(request, pagename1, rev1, pagename2, rev2, **kw):
2418     """
2419     Calculate the "diff" between two page contents.
2420 
2421     @param pagename1: name of first page
2422     @param rev1: revision of first page
2423     @param pagename2: name of second page
2424     @param rev2: revision of second page
2425     @keyword ignorews: if 1: ignore pure-whitespace changes.
2426     @rtype: list
2427     @return: lines of diff output
2428     """
2429     from MoinMoin.Page import Page
2430     from MoinMoin.util import diff_text
2431     lines1 = Page(request, pagename1, rev=rev1).getlines()
2432     lines2 = Page(request, pagename2, rev=rev2).getlines()
2433 
2434     lines = diff_text.diff(lines1, lines2, **kw)
2435     return lines
2436 
2437 def anchor_name_from_text(text):
2438     '''
2439     Generate an anchor name from the given text.
2440     This function generates valid HTML IDs matching: [A-Za-z][A-Za-z0-9:_.-]*
2441     Note: this transformation has a special feature: when you feed it with a
2442           valid ID/name, it will return it without modification (identity
2443           transformation).
2444     '''
2445     quoted = urllib.quote_plus(text.encode('utf-7'), safe=':')
2446     res = quoted.replace('%', '.').replace('+', '_')
2447     if not res[:1].isalpha():
2448         return 'A%s' % res
2449     return res
2450 
2451 def split_anchor(pagename):
2452     """
2453     Split a pagename that (optionally) has an anchor into the real pagename
2454     and the anchor part. If there is no anchor, it returns an empty string
2455     for the anchor.
2456 
2457     Note: if pagename contains a # (as part of the pagename, not as anchor),
2458           you can use a trick to make it work nevertheless: just append a
2459           # at the end:
2460           "C##" returns ("C#", "")
2461           "Problem #1#" returns ("Problem #1", "")
2462 
2463     TODO: We shouldn't deal with composite pagename#anchor strings, but keep
2464           it separate.
2465           Current approach: [[pagename#anchor|label|attr=val,&qarg=qval]]
2466           Future approach:  [[pagename|label|attr=val,&qarg=qval,#anchor]]
2467           The future approach will avoid problems when there is a # in the
2468           pagename part (and no anchor). Also, we need to append #anchor
2469           at the END of the generated URL (AFTER the query string).
2470     """
2471     parts = rsplit(pagename, '#', 1)
2472     if len(parts) == 2:
2473         return parts
2474     else:
2475         return pagename, ""
2476 
2477 ########################################################################
2478 ### Tickets - usually used in forms to make sure that form submissions
2479 ### are in response to a form the same user got from moin for the same
2480 ### action and same page.
2481 ########################################################################
2482 
2483 def createTicket(request, tm=None, action=None, pagename=None):
2484     """ Create a ticket using a configured secret
2485 
2486         @param tm: unix timestamp (optional, uses current time if not given)
2487         @param action: action name (optional, uses current action if not given)
2488                        Note: if you create a ticket for a form that calls another
2489                              action than the current one, you MUST specify the
2490                              action you call when posting the form.
2491         @param pagename: page name (optional, uses current page name if not given)
2492                        Note: if you create a ticket for a form that posts to another
2493                              page than the current one, you MUST specify the
2494                              page name you use when posting the form.
2495     """
2496 
2497     from MoinMoin.support.python_compatibility import hmac_new
2498     if tm is None:
2499         # for age-check of ticket
2500         tm = "%010x" % time.time()
2501 
2502     # make the ticket very specific:
2503     if pagename is None:
2504         try:
2505             pagename = request.page.page_name
2506         except:
2507             pagename = ''
2508 
2509     if action is None:
2510         action = request.action
2511 
2512     if request.session:
2513         # either a user is logged in or we have a anon session -
2514         # if session times out, ticket will get invalid
2515         sid = request.session.sid
2516     else:
2517         sid = ''
2518 
2519     if request.user.valid:
2520         uid = request.user.id
2521     else:
2522         uid = ''
2523 
2524     hmac_data = []
2525     for value in [tm, pagename, action, sid, uid, ]:
2526         if isinstance(value, unicode):
2527             value = value.encode('utf-8')
2528         hmac_data.append(value)
2529 
2530     hmac = hmac_new(request.cfg.secrets['wikiutil/tickets'],
2531                     ''.join(hmac_data))
2532     return "%s.%s" % (tm, hmac.hexdigest())
2533 
2534 
2535 def checkTicket(request, ticket):
2536     """Check validity of a previously created ticket"""
2537     try:
2538         timestamp_str = ticket.split('.')[0]
2539         timestamp = int(timestamp_str, 16)
2540     except ValueError:
2541         # invalid or empty ticket
2542         logging.debug("checkTicket: invalid or empty ticket %r" % ticket)
2543         return False
2544     now = time.time()
2545     if timestamp < now - 10 * 3600:
2546         # we don't accept tickets older than 10h
2547         logging.debug("checkTicket: too old ticket, timestamp %r" % timestamp)
2548         return False
2549     # Note: if the session timed out, that will also invalidate the ticket,
2550     #       if the ticket was created within a session.
2551     ourticket = createTicket(request, timestamp_str)
2552     logging.debug("checkTicket: returning %r, got %r, expected %r" % (ticket == ourticket, ticket, ourticket))
2553     return ticket == ourticket
2554 
2555 
2556 def renderText(request, Parser, text):
2557     """executes raw wiki markup with all page elements"""
2558     import StringIO
2559     out = StringIO.StringIO()
2560     request.redirect(out)
2561     wikiizer = Parser(text, request)
2562     wikiizer.format(request.formatter, inhibit_p=True)
2563     result = out.getvalue()
2564     request.redirect()
2565     del out
2566     return result
2567 
2568 def get_processing_instructions(body):
2569     """ Extract the processing instructions / acl / etc. at the beginning of a page's body.
2570 
2571         Hint: if you have a Page object p, you already have the result of this function in
2572               p.meta and (even better) parsed/processed stuff in p.pi.
2573 
2574         Returns a list of (pi, restofline) tuples and a string with the rest of the body.
2575     """
2576     pi = []
2577     while body.startswith('#'):
2578         try:
2579             line, body = body.split('\n', 1) # extract first line
2580         except ValueError:
2581             line = body
2582             body = ''
2583 
2584         # end parsing on empty (invalid) PI
2585         if line == "#":
2586             body = line + '\n' + body
2587             break
2588 
2589         if line[1] == '#':# two hash marks are a comment
2590             comment = line[2:]
2591             if not comment.startswith(' '):
2592                 # we don't require a blank after the ##, so we put one there
2593                 comment = ' ' + comment
2594                 line = '##%s' % comment
2595 
2596         verb, args = (line[1:] + ' ').split(' ', 1) # split at the first blank
2597         pi.append((verb.lower(), args.strip()))
2598 
2599     return pi, body
2600 
2601 
2602 class Version(tuple):
2603     """
2604     Version objects store versions like 1.2.3-4.5alpha6 in a structured
2605     way and support version comparisons and direct version component access.
2606     1: major version (digits only)
2607     2: minor version (digits only)
2608     3: (maintenance) release version (digits only)
2609     4.5alpha6: optional additional version specification (str)
2610 
2611     You can create a Version instance either by giving the components, like:
2612         Version(1,2,3,'4.5alpha6')
2613     or by giving the composite version string, like:
2614         Version(version="1.2.3-4.5alpha6").
2615 
2616     Version subclasses tuple, so comparisons to tuples should work.
2617     Also, we inherit all the comparison logic from tuple base class.
2618     """
2619     VERSION_RE = re.compile(
2620         r"""(?P<major>\d+)
2621             \.
2622             (?P<minor>\d+)
2623             \.
2624             (?P<release>\d+)
2625             (-
2626              (?P<additional>.+)
2627             )?""",
2628             re.VERBOSE)
2629 
2630     @classmethod
2631     def parse_version(cls, version):
2632         match = cls.VERSION_RE.match(version)
2633         if match is None:
2634             raise ValueError("Unexpected version string format: %r" % version)
2635         v = match.groupdict()
2636         return int(v['major']), int(v['minor']), int(v['release']), str(v['additional'] or '')
2637 
2638     def __new__(cls, major=0, minor=0, release=0, additional='', version=None):
2639         if version:
2640             major, minor, release, additional = cls.parse_version(version)
2641         return tuple.__new__(cls, (major, minor, release, additional))
2642 
2643     # properties for easy access of version components
2644     major = property(lambda self: self[0])
2645     minor = property(lambda self: self[1])
2646     release = property(lambda self: self[2])
2647     additional = property(lambda self: self[3])
2648 
2649     def __str__(self):
2650         version_str = "%d.%d.%d" % (self.major, self.minor, self.release)
2651         if self.additional:
2652             version_str += "-%s" % self.additional
2653         return version_str

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2010-08-19 16:16:12, 73.3 KB) [[attachment:Page.py]]
  • [get | view] (2010-08-19 19:29:46, 13.9 KB) [[attachment:RecentChanges.py]]
  • [get | view] (2010-08-22 20:41:59, 10.7 KB) [[attachment:recentchanges.diff]]
  • [get | view] (2010-08-19 16:16:54, 91.8 KB) [[attachment:wikiutil.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.