Attachment 'Bibliography.py'

Download

   1 # -*- coding: iso-8859-1 -*-
   2 """MoinMoin - Bibliography
   3 
   4 A macro to insert a bibliography automatically generated from embedded
   5 bibtex markup, showing the entries cited with the Cite macro on the
   6 current page. It can optionally include another page which may be in
   7 bibtex format or contain further embedded bibtex markup.
   8 
   9 Syntax:
  10     [[Bibliography]]
  11     [[Bibliography(bib_page_name[, abstracts=abstracts, label=label,
  12         chronological=chronological, style=style)]]
  13 
  14 Parameters:
  15     bib_page_name -- page to include which contains additional embedded bibtex
  16                      entries
  17 
  18 Keyword Parameters:
  19     abstracts     -- show abstracts (values: on,true,yes,off,false,no;
  20                      default off)
  21     label         -- show labels if the style is alpha, named, plain,
  22                      or unsort (values: on, true, yes, off, false, no;
  23                      default: on)
  24     chronological -- sort chronologically first by year then by author
  25                      instead of first by author and then by year, can
  26                      also be reversed in order to sort from last to
  27                      earliest entry instead of from earliest to last
  28                      entry (values: on, true, yes, off, false, no,
  29                      reverse, reversed; default: off)
  30     style         -- determine a style to use (values: empty, plain,
  31                      alpha, named, unsort, unsortlist; default: named)
  32 
  33 Configuration Options:
  34     bibtex_bib2html_cmd -- path and command name of bib2xhtml, needs to
  35                            be set if bib2xhtml is not in PATH
  36     bibtex_bst_path     -- path to bib2xhtml bst-style files, needs to
  37                            be set if these are not found by bibtex
  38                            automatically
  39 
  40 Notes:
  41 This macro is intended to be used with the Cite macro and bibtex
  42 parser. It depends on bibtex and bib2xhtml, see
  43 http://en.wikipedia.org/wiki/Bibtex and
  44 http://www.spinellis.gr/sw/textproc/bib2xhtml/ for more information.
  45 
  46 $Id: Bibliography.py,v 1.1.1.1 2007-06-07 20:36:15 gber Exp $
  47 Copyright 2007 by Guido Berhoerster <guido+moinmoin@berhoerster.name>
  48 Licensed under the GNU GPL, see COPYING for details.
  49 
  50 $Log: Bibliography.py,v $
  51 Revision 1.1.1.1  2007-06-07 20:36:15  gber
  52 initial import into CVS
  53 
  54 
  55 """
  56 
  57 import os
  58 import tempfile
  59 import subprocess
  60 import codecs
  61 import re
  62 from MoinMoin import wikimacro, wikiutil
  63 from MoinMoin.Page import Page
  64 
  65 Dependencies = []
  66 
  67 # subprocess.check_call and CalledProcessError are only available in
  68 # Python 2.5, so recreate them here
  69 class CalledProcessError(Exception):
  70     """ This exception is raised when a process run by check_call()
  71     returns a non-zero exit status.  The exit status will be stored in
  72     the returncode attribute. """
  73     def __init__(self, returncode, cmd):
  74         self.returncode = returncode
  75         self.cmd = cmd
  76     def __str__(self):
  77         return "Command '%s' returned non-zero exit status %d" \
  78                 % (self.cmd, self.returncode)
  79 
  80 
  81 class BibtexRenderer:
  82     """Abstracted bibtex renderer
  83 
  84     Arguments:
  85         bibtex  -- string containing bibtex markup to be rendered
  86         request -- request object
  87 
  88     Keyword arguments:
  89         citations     -- list with keys of bibtex entries to be
  90                          rendered (default: empty)
  91         abstracts     -- boolean, show abstracts or not
  92                          (default: False)
  93         chronological -- boolean or the string "reversed",
  94                          chronological or reversed chronological
  95                          sorting (default: False)
  96         style         -- string "empty", "plain", "alpha", "named",
  97                          "unsort", "unsortlist", determining the style
  98                          to use (default: None)
  99 
 100     Notes:
 101     Raises OSError if bib2xhtml is not found or CalledProcessError if
 102     bib2xhtml returns wit a non-zero exit status.
 103 
 104     """
 105 
 106     def __init__(self, bibtex, request, citations=[], abstracts=False,\
 107             label=False, chronological=False, style=None):
 108         cfg = request.cfg
 109         self.bib2html_cmd = "bib2xhtml"
 110         self.bst_path = None
 111 
 112         # retrieve configuration
 113         try:
 114              self.bib2html_cmd = cfg.bibtex_bib2html
 115         except AttributeError:
 116              pass
 117         try:
 118              self.bst_path = cfg.bibtex_bst_path
 119         except AttributeError:
 120              pass
 121 
 122         # the original bibtex implementation is not 8-bit clean, replace
 123         # non-ASCII characters with "?"
 124         self.bibtex = bibtex.encode("ascii", "replace")
 125 
 126         self.args = [self.bib2html_cmd, "-u", "-dMoinMoin"]
 127 
 128         if citations:
 129             cit_list = []
 130             cit_list.append(u"<!-- BEGIN CITATIONS MoinMoin -->")
 131             cit_list.append(u"<!--")
 132             cit_list.extend([ur"\citation{%s}" % c for c in citations])
 133             cit_list.append(u"-->")
 134             cit_list.append(u"<!-- END CITATIONS MoinMoin -->")
 135             self.citations = u"\n".join(cit_list)
 136             # also encode as ASCII
 137             self.citations = self.citations.encode("ascii", "replace")
 138             self.args.append("-i")
 139         else:
 140             self.citations = None
 141 
 142         if abstracts:
 143             self.args.append("-a")
 144         if label:
 145             self.args.append("-k")
 146         if chronological and chronological == "reversed":
 147             self.args.extend(["-c", "-r"])
 148         elif chronological:
 149             self.args.append("-c")
 150         if style in ("empty", "plain", "alpha", "named", "unsort", "unsortlist"):
 151             self.args.extend(["-s", style])
 152 
 153     def render(self):
 154         """Render the bibtex markup (if requested, only cited entries)
 155         and return HTML output in a string.
 156         """
 157         output = []
 158         # create temporary files for Bibtex input, HTML output, and logging
 159         bibfd, bibfile = tempfile.mkstemp(".bib")
 160         xhtmlfd, xhtmlfile = tempfile.mkstemp(".xhtml")
 161         #logfd, logfile = tempfile.mkstemp(".log")
 162         self.args.extend([bibfile, xhtmlfile])
 163 
 164         # write Bibtex input to temporary file
 165         f = open(bibfile, "w")
 166         f.write(self.bibtex)
 167         f.close()
 168 
 169         if self.citations:
 170             # write citations to temporary output file
 171             f = open(xhtmlfile, "w")
 172             f.write(self.citations)
 173             f.close()
 174 
 175         # execute bib2xhtml converter subprocess on forementionened
 176         # temporary files, bib2xhtml creates its temporary files in the
 177         # current working directory, so set it to a reasonable location, set
 178         # the set the BSTINPUTS environment variable if required in order to
 179         # help Bibtex finds the needed .bst files
 180         if self.bst_path:
 181             bstinputs = {"BSTINPUTS": self.bst_path}
 182         else:
 183             bstinputs = None
 184         try:
 185             retcode = subprocess.call(self.args,
 186                 env=bstinputs, cwd=tempfile.gettempdir(),
 187                 stdout=open(os.devnull, "w"), stderr=open(os.devnull, "w"))
 188             #retcode = subprocess.call(self.args,
 189             #    env=bstinputs, cwd=tempfile.gettempdir(),
 190             #    stdout=open(os.devnull, "w"), stderr=open(logfile, "w"))
 191             if retcode:
 192                 raise CalledProcessError(retcode, "".join(self.args))
 193         except OSError, error:
 194             # bib2xhtml not found or not executable
 195             os.remove(bibfile)
 196             os.remove(xhtmlfile)
 197             raise
 198         except CalledProcessError, error:
 199             # non-zero exit status
 200             os.remove(bibfile)
 201             os.remove(xhtmlfile)
 202             #os.remove(logfile)
 203             raise
 204 
 205         os.remove(bibfile)
 206         #os.remove(logfile)
 207 
 208         name_pattern = re.compile('<a name="(?P<anchor>[^"]*)">', re.UNICODE)
 209         href_pattern = re.compile('<a href="#(?P<anchor>[^"]*)">', re.UNICODE)
 210         inside_dl = False
 211 
 212         # read the output (encoded as utf-8) back in
 213         f = codecs.open(xhtmlfile, "r", encoding="utf-8")
 214 
 215         for line in f.readlines():
 216             if line.startswith(u'<!-- Generated by: '):
 217                 # throw away comments at the beginning...
 218                 inside_dl = True
 219                 continue
 220             elif line == u'<!-- END BIBLIOGRAPHY MoinMoin -->\n':
 221                 # ...and the end
 222                 break
 223             if inside_dl:
 224                 # use a ref:-prefix for anchor links in order to avoid
 225                 # interference with other anchors and replace the name- with
 226                 # the id-attribute (it would be more appropriate to fix this in
 227                 # the bst-file)
 228                 line = name_pattern.sub(ur'<a id="ref:\g<anchor>">', line)
 229                 line = href_pattern.sub(ur'<a href="#ref:\g<anchor>">', line)
 230                 output.append(line)
 231 
 232         f.close()
 233         os.remove(xhtmlfile)
 234 
 235         output = "".join(output)
 236         return output
 237 
 238 
 239 class Bibliography:
 240     """The bibliography for the current page."""
 241     def __init__(self, macro, args):
 242         self.macro = macro
 243         self.request = self.macro.request
 244         self._ = self.request.getText
 245         self.this_page_name = self.macro.formatter.page.page_name
 246         self.abstracts = False
 247         self.label = True
 248         self.chronological = False
 249         self.style = "named"
 250         self.bib_page_name = None
 251 
 252         if args and args != "":
 253             # get the page name first
 254             # better user args.partition(",") available in Python 2.5
 255             if "," in args:
 256                 bib_page_name, bib_args = args.split(",", 1)
 257             else:
 258                 bib_page_name = args.strip()
 259                 bib_args = ""
 260             self.bib_page_name = wikiutil.AbsPageName(self.request,
 261                                     self.this_page_name, bib_page_name.strip())
 262 
 263             # get the rest of the (named) parameters
 264             for arg in bib_args.split(","):
 265                 if "=" in arg:
 266                     key, value = arg.lower().split("=", 1)
 267                     key = key.strip()
 268                     value = value.strip()
 269                     if key == "abstracts" and value not in ("on", "true",
 270                                                                         "yes"):
 271                         self.abstracts = True
 272                     if key == "label" and value not in ("on", "true", "yes"):
 273                         self.label = False
 274                     if key == "chronological" and value in ("on", "true",
 275                                                                         "yes"):
 276                         self.chronological = True
 277                     elif key == "chronological" and value in ("reverse",
 278                                                                    "reversed"):
 279                         self.chronological = "reversed"
 280                     if key == "style" and value in ("empty", "plain", "alpha",
 281                                               "named", "unsort", "unsortlist"):
 282                         self.style = value
 283 
 284         # linebreaks
 285         self.eol_re = re.compile(r'\r?\n', re.UNICODE)
 286         # processing instructions and comments, matches format of format PI
 287         self.re_pi_comment = re.compile(r'^\#(((\#|refresh|redirect|deprecated|pragma|form|acl|language)|(format\s*(?P<format>.*?)))(\s|$))', re.UNICODE|re.IGNORECASE)
 288         # end of a formatted section
 289         self.re_formatted_end = re.compile(r'(.*?)\}\}\}', re.UNICODE)
 290         # comments (not inside PI)
 291         self.re_comment = re.compile(r'^\#\#', re.UNICODE)
 292         # formatted section, matches formatter
 293         self.re_formatted = re.compile(
 294             r'\{\{\{(?!.*\}\}\})((\#!\s*(?P<format>.*?)(\s|$))|(.*$))',
 295             re.UNICODE)
 296         # Cite macro, matches key
 297         self.re_cite = re.compile(
 298             r'\[\[Cite\((?P<key>.+?)((,.*?){1,2})?\)\]\]', re.UNICODE)
 299         # other macros, excluding Cite
 300         # FIXME what if [[Cite([[Cite(bla)]])]]?
 301         macronames = [m for m in wikimacro.getNames(macro.request.cfg) \
 302             if m != 'Cite']
 303         # stuff to filter out (mostly because they cannot contain macros)
 304         self.re_filter = re.compile(r'''(
 305 (\^.*?\^)|          # sup
 306 ({\{\{.*?\}\}\})|   # tt
 307 (^\s+.*?::\s)|      # dl
 308 (^\s*(?P<hmarker>=+)\s.*\s(?P=hmarker) $)| # heading
 309 (\[".*?"\])|        # wikiname_bracket
 310 (`.*?`)|            # tt_bt
 311 (\[\[(%(macronames)s)(?:\(.*?\))?\]\]) # macro
 312 )''' % {"macronames": u"|".join(macronames)}, re.UNICODE|re.VERBOSE)
 313 
 314     def run(self):
 315         _ = self._
 316         output = []
 317         # parse current page first
 318         cit_list, bib_list = self.parse(self.macro.parser.raw, True)
 319 
 320         # parse a given page as well
 321         if self.bib_page_name and self.bib_page_name != self.this_page_name:
 322             if self.request.user.may.read(self.bib_page_name):
 323                 bib_page = Page(self.request, self.bib_page_name)
 324             else:
 325                 output.append(self.macro.formatter.div(1, css_class="error"))
 326                 output.append(self.macro.formatter.escapedText(
 327                     _('Error: Page "%(page_name)s" may not be read and cannot be included by Bibliography')
 328                     % {"page_name": self.bib_page_name}))
 329                 output.append(self.macro.formatter.div(0))
 330                 return "".join(output)
 331             if bib_page.exists():
 332                 add_cit_list, add_bib_list = self.parse(
 333                     bib_page.get_raw_body(), False)
 334                 bib_list.extend(add_bib_list)
 335             else:
 336                 output.append(self.macro.formatter.div(1, css_class="error"))
 337                 output.append(self.macro.formatter.escapedText(
 338                     _('Error: Page "%(page_name)s" does not exist and cannot be included by Bibliography')
 339                     % {"page_name": self.bib_page_name}))
 340                 output.append(self.macro.formatter.div(0))
 341                 return "".join(output)
 342 
 343         bibtex = "\n".join(bib_list)
 344 
 345         # try to render as HTML
 346         bib_renderer = BibtexRenderer(bibtex, self.request,
 347                 citations=cit_list, abstracts=self.abstracts, label=self.label,
 348                 chronological=self.chronological, style=self.style)
 349         try:
 350             bib_output = bib_renderer.render()
 351         except OSError, error:
 352             output.append(self.macro.formatter.div(1, css_class="error"))
 353             output.append(self.macro.formatter.escapedText(
 354                 _('Error: "%(bib2html_cmd)s" could not be found or executed by Bibliography')
 355                 % {"bib2html_cmd": bib_renderer.bib2html_cmd}))
 356             output.append(self.macro.formatter.div(0))
 357             return "".join(output)
 358         except CalledProcessError, error:
 359             output.append(self.macro.formatter.div(1, css_class="error"))
 360             output.append(self.macro.formatter.escapedText(
 361                 _('Error: "%(bib2html_cmd)s" returned a non-zero exit status while being executed by Biblography')
 362                 % {"bib2html_cmd": bib_renderer.bib2html_cmd}))
 363             output.append(self.macro.formatter.div(0))
 364             return "".join(output)
 365 
 366         # write through the rawHTML formatter if possible, otherwise write
 367         # just the Bibtex markup as preformatted text
 368         output.append(self.macro.formatter.div(1, css_class="bibliography"))
 369         output.append(self.macro.formatter.heading(1,1))
 370         output.append(self.macro.formatter.escapedText(_("Bibliography")))
 371         output.append(self.macro.formatter.heading(0,1))
 372 
 373         try:
 374             output.append(self.macro.formatter.rawHTML(bib_output))
 375         except:
 376             output.append(self.macro.formatter.preformatted(1))
 377             output.append(self.macro.formatter.text(bibtex.expandtabs()))
 378             output.append(self.macro.formatter.preformatted(0))
 379 
 380         output = "".join(output)
 381         return output
 382 
 383     def parse(self, raw_page, find_cite_macro):
 384         """Parse a page."""
 385 
 386         lines = self.eol_re.split(raw_page.expandtabs())
 387         in_pi_comment = True # inside processing instructions or comments
 388         in_pre = None # inside a preformatted block
 389         page_format = "wiki"
 390         bib_list = [] # contains found embedded bibtex markup
 391         cit_list = [] # contains found citations if requested
 392 
 393         for line in lines:
 394             if in_pi_comment:
 395                 # inside processing instructions or comments at the beginning
 396                 # of the page
 397                 m = self.re_pi_comment.match(line)
 398                 if m and m.group("format") is not None:
 399                     # set page format, the last one applies to the whole page
 400                     page_format = m.group("format")
 401                 elif m:
 402                     # other processing instruction or comment
 403                     continue
 404                 else:
 405                     # not a processing instruction or comment
 406                     in_pi_comment = False
 407             elif in_pre is not None: # formatter might be empty!
 408                 # inside preformatted block
 409                 m = self.re_formatted_end.search(line)
 410                 if in_pre == "bibtex":
 411                     # format of preformatted block is bibtex
 412                     if m:
 413                         # preformatted block in bibtex format ends here
 414                         bib_list.append(m.group(1))
 415                         in_pre = None
 416                         line = line[m.end():]
 417                     else:
 418                         # preformatted block in bibtex format continues
 419                         bib_list.append(line)
 420                         continue
 421                 elif in_pre == "wiki":
 422                     # preformatted block is in wiki format
 423                     if m:
 424                         # preformatted block ends here
 425                         in_pre = None
 426                         # parse inside for citations, delete inside part
 427                         part_line = line[:m.end()-3]
 428                         if find_cite_macro:
 429                             for m in self.re_cite.finditer(line):
 430                                 # cite macro found
 431                                 if in_pre != None and \
 432                                         m.group("key") != "":
 433                                     cit_list.append(m.group("key"))
 434                         # continue parsing outside of this block, there might be
 435                         # further citations
 436                         line = line[m.end():]
 437                 else:
 438                     # preformatted block is not in wiki or bibtex format
 439                     if m:
 440                         # preformatted block ends here
 441                         in_pre = None
 442                         # continue parsing outside of this block, there might be
 443                         # citations
 444                         line = line[m.end():]
 445                     else:
 446                         # preformatted block continues
 447                         continue
 448 
 449             if page_format == "bibtex":
 450                 # page is in bibtex format
 451                 bib_list.append(line)
 452                 continue
 453             elif page_format != "wiki":
 454                 # page is in an unknown, unsupported format
 455                 break
 456 
 457             if self.re_comment.match(line):
 458                 continue
 459 
 460             m = self.re_formatted.search(line)
 461             if m:
 462                 if m.group("format") is None:
 463                     # if there is no "#!" after "{{{" format will be None,
 464                     # thus default to ""
 465                     in_pre = ""
 466                 else:
 467                     # set the formatter to the found value
 468                     in_pre = m.group("format")
 469                 continue
 470 
 471             if find_cite_macro:
 472                 # filter commands which do not allow (cite) macros inside
 473                 line = self.re_filter.sub("", line)
 474                 for m in self.re_cite.finditer(line):
 475                     # cite macro found
 476                     if not in_pre == "" and m.group("key") != "" and not \
 477                         m.group("key").count("}"):
 478                         # citation must not be empty and not contain a "}"
 479                         cit_list.append(m.group("key"))
 480 
 481         return cit_list, bib_list
 482 
 483 
 484 def execute(macro, args):
 485     bib = Bibliography(macro, args)
 486     return bib.run()

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2007-06-07 21:38:59, 19.9 KB) [[attachment:Bibliography.py]]
  • [get | view] (2008-05-20 16:12:49, 16.0 KB) [[attachment:Bibliography1.6.py]]
  • [get | view] (2007-06-07 21:40:16, 44.9 KB) [[attachment:bibliography-sample.png]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.