Attachment 'Bibliography.py'
Download 1 # -*- coding: iso-8859-1 -*-
2 """MoinMoin - Bibliography
3
4 A macro to insert a bibliography automatically generated from embedded
5 bibtex markup, showing the entries cited with the Cite macro on the
6 current page. It can optionally include another page which may be in
7 bibtex format or contain further embedded bibtex markup.
8
9 Syntax:
10 [[Bibliography]]
11 [[Bibliography(bib_page_name[, abstracts=abstracts, label=label,
12 chronological=chronological, style=style)]]
13
14 Parameters:
15 bib_page_name -- page to include which contains additional embedded bibtex
16 entries
17
18 Keyword Parameters:
19 abstracts -- show abstracts (values: on,true,yes,off,false,no;
20 default off)
21 label -- show labels if the style is alpha, named, plain,
22 or unsort (values: on, true, yes, off, false, no;
23 default: on)
24 chronological -- sort chronologically first by year then by author
25 instead of first by author and then by year, can
26 also be reversed in order to sort from last to
27 earliest entry instead of from earliest to last
28 entry (values: on, true, yes, off, false, no,
29 reverse, reversed; default: off)
30 style -- determine a style to use (values: empty, plain,
31 alpha, named, unsort, unsortlist; default: named)
32
33 Configuration Options:
34 bibtex_bib2html_cmd -- path and command name of bib2xhtml, needs to
35 be set if bib2xhtml is not in PATH
36 bibtex_bst_path -- path to bib2xhtml bst-style files, needs to
37 be set if these are not found by bibtex
38 automatically
39
40 Notes:
41 This macro is intended to be used with the Cite macro and bibtex
42 parser. It depends on bibtex and bib2xhtml, see
43 http://en.wikipedia.org/wiki/Bibtex and
44 http://www.spinellis.gr/sw/textproc/bib2xhtml/ for more information.
45
46 $Id: Bibliography.py,v 1.1.1.1 2007-06-07 20:36:15 gber Exp $
47 Copyright 2007 by Guido Berhoerster <guido+moinmoin@berhoerster.name>
48 Licensed under the GNU GPL, see COPYING for details.
49
50 $Log: Bibliography.py,v $
51 Revision 1.1.1.1 2007-06-07 20:36:15 gber
52 initial import into CVS
53
54
55 """
56
57 import os
58 import tempfile
59 import subprocess
60 import codecs
61 import re
62 from MoinMoin import wikimacro, wikiutil
63 from MoinMoin.Page import Page
64
65 Dependencies = []
66
67 # subprocess.check_call and CalledProcessError are only available in
68 # Python 2.5, so recreate them here
69 class CalledProcessError(Exception):
70 """ This exception is raised when a process run by check_call()
71 returns a non-zero exit status. The exit status will be stored in
72 the returncode attribute. """
73 def __init__(self, returncode, cmd):
74 self.returncode = returncode
75 self.cmd = cmd
76 def __str__(self):
77 return "Command '%s' returned non-zero exit status %d" \
78 % (self.cmd, self.returncode)
79
80
81 class BibtexRenderer:
82 """Abstracted bibtex renderer
83
84 Arguments:
85 bibtex -- string containing bibtex markup to be rendered
86 request -- request object
87
88 Keyword arguments:
89 citations -- list with keys of bibtex entries to be
90 rendered (default: empty)
91 abstracts -- boolean, show abstracts or not
92 (default: False)
93 chronological -- boolean or the string "reversed",
94 chronological or reversed chronological
95 sorting (default: False)
96 style -- string "empty", "plain", "alpha", "named",
97 "unsort", "unsortlist", determining the style
98 to use (default: None)
99
100 Notes:
101 Raises OSError if bib2xhtml is not found or CalledProcessError if
102 bib2xhtml returns wit a non-zero exit status.
103
104 """
105
106 def __init__(self, bibtex, request, citations=[], abstracts=False,\
107 label=False, chronological=False, style=None):
108 cfg = request.cfg
109 self.bib2html_cmd = "bib2xhtml"
110 self.bst_path = None
111
112 # retrieve configuration
113 try:
114 self.bib2html_cmd = cfg.bibtex_bib2html
115 except AttributeError:
116 pass
117 try:
118 self.bst_path = cfg.bibtex_bst_path
119 except AttributeError:
120 pass
121
122 # the original bibtex implementation is not 8-bit clean, replace
123 # non-ASCII characters with "?"
124 self.bibtex = bibtex.encode("ascii", "replace")
125
126 self.args = [self.bib2html_cmd, "-u", "-dMoinMoin"]
127
128 if citations:
129 cit_list = []
130 cit_list.append(u"<!-- BEGIN CITATIONS MoinMoin -->")
131 cit_list.append(u"<!--")
132 cit_list.extend([ur"\citation{%s}" % c for c in citations])
133 cit_list.append(u"-->")
134 cit_list.append(u"<!-- END CITATIONS MoinMoin -->")
135 self.citations = u"\n".join(cit_list)
136 # also encode as ASCII
137 self.citations = self.citations.encode("ascii", "replace")
138 self.args.append("-i")
139 else:
140 self.citations = None
141
142 if abstracts:
143 self.args.append("-a")
144 if label:
145 self.args.append("-k")
146 if chronological and chronological == "reversed":
147 self.args.extend(["-c", "-r"])
148 elif chronological:
149 self.args.append("-c")
150 if style in ("empty", "plain", "alpha", "named", "unsort", "unsortlist"):
151 self.args.extend(["-s", style])
152
153 def render(self):
154 """Render the bibtex markup (if requested, only cited entries)
155 and return HTML output in a string.
156 """
157 output = []
158 # create temporary files for Bibtex input, HTML output, and logging
159 bibfd, bibfile = tempfile.mkstemp(".bib")
160 xhtmlfd, xhtmlfile = tempfile.mkstemp(".xhtml")
161 #logfd, logfile = tempfile.mkstemp(".log")
162 self.args.extend([bibfile, xhtmlfile])
163
164 # write Bibtex input to temporary file
165 f = open(bibfile, "w")
166 f.write(self.bibtex)
167 f.close()
168
169 if self.citations:
170 # write citations to temporary output file
171 f = open(xhtmlfile, "w")
172 f.write(self.citations)
173 f.close()
174
175 # execute bib2xhtml converter subprocess on forementionened
176 # temporary files, bib2xhtml creates its temporary files in the
177 # current working directory, so set it to a reasonable location, set
178 # the set the BSTINPUTS environment variable if required in order to
179 # help Bibtex finds the needed .bst files
180 if self.bst_path:
181 bstinputs = {"BSTINPUTS": self.bst_path}
182 else:
183 bstinputs = None
184 try:
185 retcode = subprocess.call(self.args,
186 env=bstinputs, cwd=tempfile.gettempdir(),
187 stdout=open(os.devnull, "w"), stderr=open(os.devnull, "w"))
188 #retcode = subprocess.call(self.args,
189 # env=bstinputs, cwd=tempfile.gettempdir(),
190 # stdout=open(os.devnull, "w"), stderr=open(logfile, "w"))
191 if retcode:
192 raise CalledProcessError(retcode, "".join(self.args))
193 except OSError, error:
194 # bib2xhtml not found or not executable
195 os.remove(bibfile)
196 os.remove(xhtmlfile)
197 raise
198 except CalledProcessError, error:
199 # non-zero exit status
200 os.remove(bibfile)
201 os.remove(xhtmlfile)
202 #os.remove(logfile)
203 raise
204
205 os.remove(bibfile)
206 #os.remove(logfile)
207
208 name_pattern = re.compile('<a name="(?P<anchor>[^"]*)">', re.UNICODE)
209 href_pattern = re.compile('<a href="#(?P<anchor>[^"]*)">', re.UNICODE)
210 inside_dl = False
211
212 # read the output (encoded as utf-8) back in
213 f = codecs.open(xhtmlfile, "r", encoding="utf-8")
214
215 for line in f.readlines():
216 if line.startswith(u'<!-- Generated by: '):
217 # throw away comments at the beginning...
218 inside_dl = True
219 continue
220 elif line == u'<!-- END BIBLIOGRAPHY MoinMoin -->\n':
221 # ...and the end
222 break
223 if inside_dl:
224 # use a ref:-prefix for anchor links in order to avoid
225 # interference with other anchors and replace the name- with
226 # the id-attribute (it would be more appropriate to fix this in
227 # the bst-file)
228 line = name_pattern.sub(ur'<a id="ref:\g<anchor>">', line)
229 line = href_pattern.sub(ur'<a href="#ref:\g<anchor>">', line)
230 output.append(line)
231
232 f.close()
233 os.remove(xhtmlfile)
234
235 output = "".join(output)
236 return output
237
238
239 class Bibliography:
240 """The bibliography for the current page."""
241 def __init__(self, macro, args):
242 self.macro = macro
243 self.request = self.macro.request
244 self._ = self.request.getText
245 self.this_page_name = self.macro.formatter.page.page_name
246 self.abstracts = False
247 self.label = True
248 self.chronological = False
249 self.style = "named"
250 self.bib_page_name = None
251
252 if args and args != "":
253 # get the page name first
254 # better user args.partition(",") available in Python 2.5
255 if "," in args:
256 bib_page_name, bib_args = args.split(",", 1)
257 else:
258 bib_page_name = args.strip()
259 bib_args = ""
260 self.bib_page_name = wikiutil.AbsPageName(self.request,
261 self.this_page_name, bib_page_name.strip())
262
263 # get the rest of the (named) parameters
264 for arg in bib_args.split(","):
265 if "=" in arg:
266 key, value = arg.lower().split("=", 1)
267 key = key.strip()
268 value = value.strip()
269 if key == "abstracts" and value not in ("on", "true",
270 "yes"):
271 self.abstracts = True
272 if key == "label" and value not in ("on", "true", "yes"):
273 self.label = False
274 if key == "chronological" and value in ("on", "true",
275 "yes"):
276 self.chronological = True
277 elif key == "chronological" and value in ("reverse",
278 "reversed"):
279 self.chronological = "reversed"
280 if key == "style" and value in ("empty", "plain", "alpha",
281 "named", "unsort", "unsortlist"):
282 self.style = value
283
284 # linebreaks
285 self.eol_re = re.compile(r'\r?\n', re.UNICODE)
286 # processing instructions and comments, matches format of format PI
287 self.re_pi_comment = re.compile(r'^\#(((\#|refresh|redirect|deprecated|pragma|form|acl|language)|(format\s*(?P<format>.*?)))(\s|$))', re.UNICODE|re.IGNORECASE)
288 # end of a formatted section
289 self.re_formatted_end = re.compile(r'(.*?)\}\}\}', re.UNICODE)
290 # comments (not inside PI)
291 self.re_comment = re.compile(r'^\#\#', re.UNICODE)
292 # formatted section, matches formatter
293 self.re_formatted = re.compile(
294 r'\{\{\{(?!.*\}\}\})((\#!\s*(?P<format>.*?)(\s|$))|(.*$))',
295 re.UNICODE)
296 # Cite macro, matches key
297 self.re_cite = re.compile(
298 r'\[\[Cite\((?P<key>.+?)((,.*?){1,2})?\)\]\]', re.UNICODE)
299 # other macros, excluding Cite
300 # FIXME what if [[Cite([[Cite(bla)]])]]?
301 macronames = [m for m in wikimacro.getNames(macro.request.cfg) \
302 if m != 'Cite']
303 # stuff to filter out (mostly because they cannot contain macros)
304 self.re_filter = re.compile(r'''(
305 (\^.*?\^)| # sup
306 ({\{\{.*?\}\}\})| # tt
307 (^\s+.*?::\s)| # dl
308 (^\s*(?P<hmarker>=+)\s.*\s(?P=hmarker) $)| # heading
309 (\[".*?"\])| # wikiname_bracket
310 (`.*?`)| # tt_bt
311 (\[\[(%(macronames)s)(?:\(.*?\))?\]\]) # macro
312 )''' % {"macronames": u"|".join(macronames)}, re.UNICODE|re.VERBOSE)
313
314 def run(self):
315 _ = self._
316 output = []
317 # parse current page first
318 cit_list, bib_list = self.parse(self.macro.parser.raw, True)
319
320 # parse a given page as well
321 if self.bib_page_name and self.bib_page_name != self.this_page_name:
322 if self.request.user.may.read(self.bib_page_name):
323 bib_page = Page(self.request, self.bib_page_name)
324 else:
325 output.append(self.macro.formatter.div(1, css_class="error"))
326 output.append(self.macro.formatter.escapedText(
327 _('Error: Page "%(page_name)s" may not be read and cannot be included by Bibliography')
328 % {"page_name": self.bib_page_name}))
329 output.append(self.macro.formatter.div(0))
330 return "".join(output)
331 if bib_page.exists():
332 add_cit_list, add_bib_list = self.parse(
333 bib_page.get_raw_body(), False)
334 bib_list.extend(add_bib_list)
335 else:
336 output.append(self.macro.formatter.div(1, css_class="error"))
337 output.append(self.macro.formatter.escapedText(
338 _('Error: Page "%(page_name)s" does not exist and cannot be included by Bibliography')
339 % {"page_name": self.bib_page_name}))
340 output.append(self.macro.formatter.div(0))
341 return "".join(output)
342
343 bibtex = "\n".join(bib_list)
344
345 # try to render as HTML
346 bib_renderer = BibtexRenderer(bibtex, self.request,
347 citations=cit_list, abstracts=self.abstracts, label=self.label,
348 chronological=self.chronological, style=self.style)
349 try:
350 bib_output = bib_renderer.render()
351 except OSError, error:
352 output.append(self.macro.formatter.div(1, css_class="error"))
353 output.append(self.macro.formatter.escapedText(
354 _('Error: "%(bib2html_cmd)s" could not be found or executed by Bibliography')
355 % {"bib2html_cmd": bib_renderer.bib2html_cmd}))
356 output.append(self.macro.formatter.div(0))
357 return "".join(output)
358 except CalledProcessError, error:
359 output.append(self.macro.formatter.div(1, css_class="error"))
360 output.append(self.macro.formatter.escapedText(
361 _('Error: "%(bib2html_cmd)s" returned a non-zero exit status while being executed by Biblography')
362 % {"bib2html_cmd": bib_renderer.bib2html_cmd}))
363 output.append(self.macro.formatter.div(0))
364 return "".join(output)
365
366 # write through the rawHTML formatter if possible, otherwise write
367 # just the Bibtex markup as preformatted text
368 output.append(self.macro.formatter.div(1, css_class="bibliography"))
369 output.append(self.macro.formatter.heading(1,1))
370 output.append(self.macro.formatter.escapedText(_("Bibliography")))
371 output.append(self.macro.formatter.heading(0,1))
372
373 try:
374 output.append(self.macro.formatter.rawHTML(bib_output))
375 except:
376 output.append(self.macro.formatter.preformatted(1))
377 output.append(self.macro.formatter.text(bibtex.expandtabs()))
378 output.append(self.macro.formatter.preformatted(0))
379
380 output = "".join(output)
381 return output
382
383 def parse(self, raw_page, find_cite_macro):
384 """Parse a page."""
385
386 lines = self.eol_re.split(raw_page.expandtabs())
387 in_pi_comment = True # inside processing instructions or comments
388 in_pre = None # inside a preformatted block
389 page_format = "wiki"
390 bib_list = [] # contains found embedded bibtex markup
391 cit_list = [] # contains found citations if requested
392
393 for line in lines:
394 if in_pi_comment:
395 # inside processing instructions or comments at the beginning
396 # of the page
397 m = self.re_pi_comment.match(line)
398 if m and m.group("format") is not None:
399 # set page format, the last one applies to the whole page
400 page_format = m.group("format")
401 elif m:
402 # other processing instruction or comment
403 continue
404 else:
405 # not a processing instruction or comment
406 in_pi_comment = False
407 elif in_pre is not None: # formatter might be empty!
408 # inside preformatted block
409 m = self.re_formatted_end.search(line)
410 if in_pre == "bibtex":
411 # format of preformatted block is bibtex
412 if m:
413 # preformatted block in bibtex format ends here
414 bib_list.append(m.group(1))
415 in_pre = None
416 line = line[m.end():]
417 else:
418 # preformatted block in bibtex format continues
419 bib_list.append(line)
420 continue
421 elif in_pre == "wiki":
422 # preformatted block is in wiki format
423 if m:
424 # preformatted block ends here
425 in_pre = None
426 # parse inside for citations, delete inside part
427 part_line = line[:m.end()-3]
428 if find_cite_macro:
429 for m in self.re_cite.finditer(line):
430 # cite macro found
431 if in_pre != None and \
432 m.group("key") != "":
433 cit_list.append(m.group("key"))
434 # continue parsing outside of this block, there might be
435 # further citations
436 line = line[m.end():]
437 else:
438 # preformatted block is not in wiki or bibtex format
439 if m:
440 # preformatted block ends here
441 in_pre = None
442 # continue parsing outside of this block, there might be
443 # citations
444 line = line[m.end():]
445 else:
446 # preformatted block continues
447 continue
448
449 if page_format == "bibtex":
450 # page is in bibtex format
451 bib_list.append(line)
452 continue
453 elif page_format != "wiki":
454 # page is in an unknown, unsupported format
455 break
456
457 if self.re_comment.match(line):
458 continue
459
460 m = self.re_formatted.search(line)
461 if m:
462 if m.group("format") is None:
463 # if there is no "#!" after "{{{" format will be None,
464 # thus default to ""
465 in_pre = ""
466 else:
467 # set the formatter to the found value
468 in_pre = m.group("format")
469 continue
470
471 if find_cite_macro:
472 # filter commands which do not allow (cite) macros inside
473 line = self.re_filter.sub("", line)
474 for m in self.re_cite.finditer(line):
475 # cite macro found
476 if not in_pre == "" and m.group("key") != "" and not \
477 m.group("key").count("}"):
478 # citation must not be empty and not contain a "}"
479 cit_list.append(m.group("key"))
480
481 return cit_list, bib_list
482
483
484 def execute(macro, args):
485 bib = Bibliography(macro, args)
486 return bib.run()
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.