# -*- coding: iso-8859-1 -*-
"""MoinMoin - Bibliography

A macro to insert a bibliography automatically generated from embedded
bibtex markup, showing the entries cited with the Cite macro on the
current page. It can optionally include another page which may be in
bibtex format or contain further embedded bibtex markup.

Syntax:
    [[Bibliography]]
    [[Bibliography(bib_page_name[, abstracts=abstracts, label=label,
        chronological=chronological, style=style)]]

Parameters:
    bib_page_name -- page to include which contains additional embedded bibtex
                     entries

Keyword Parameters:
    abstracts     -- show abstracts (values: on,true,yes,off,false,no;
                     default off)
    label         -- show labels if the style is alpha, named, plain,
                     or unsort (values: on, true, yes, off, false, no;
                     default: on)
    chronological -- sort chronologically first by year then by author
                     instead of first by author and then by year, can
                     also be reversed in order to sort from last to
                     earliest entry instead of from earliest to last
                     entry (values: on, true, yes, off, false, no,
                     reverse, reversed; default: off)
    style         -- determine a style to use (values: empty, plain,
                     alpha, named, unsort, unsortlist; default: named)

Configuration Options:
    bibtex_bib2html_cmd -- path and command name of bib2xhtml, needs to
                           be set if bib2xhtml is not in PATH
    bibtex_bst_path     -- path to bib2xhtml bst-style files, needs to
                           be set if these are not found by bibtex
                           automatically

Notes:
This macro is intended to be used with the Cite macro and bibtex
parser. It depends on bibtex and bib2xhtml, see
http://en.wikipedia.org/wiki/Bibtex and
http://www.spinellis.gr/sw/textproc/bib2xhtml/ for more information.

$Id: Bibliography.py,v 1.1.1.1 2007-06-07 20:36:15 gber Exp $
Copyright 2007 by Guido Berhoerster <guido+moinmoin@berhoerster.name>
Licensed under the GNU GPL, see COPYING for details.

$Log: Bibliography.py,v $
Revision 1.1.1.1  2007-06-07 20:36:15  gber
initial import into CVS


"""

import os
import tempfile
import subprocess
import codecs
import re
from MoinMoin import wikimacro, wikiutil
from MoinMoin.Page import Page

Dependencies = []

# subprocess.check_call and CalledProcessError are only available in
# Python 2.5, so recreate them here
class CalledProcessError(Exception):
    """ This exception is raised when a process run by check_call()
    returns a non-zero exit status.  The exit status will be stored in
    the returncode attribute. """
    def __init__(self, returncode, cmd):
        self.returncode = returncode
        self.cmd = cmd
    def __str__(self):
        return "Command '%s' returned non-zero exit status %d" \
                % (self.cmd, self.returncode)


class BibtexRenderer:
    """Abstracted bibtex renderer

    Arguments:
        bibtex  -- string containing bibtex markup to be rendered
        request -- request object

    Keyword arguments:
        citations     -- list with keys of bibtex entries to be
                         rendered (default: empty)
        abstracts     -- boolean, show abstracts or not
                         (default: False)
        chronological -- boolean or the string "reversed",
                         chronological or reversed chronological
                         sorting (default: False)
        style         -- string "empty", "plain", "alpha", "named",
                         "unsort", "unsortlist", determining the style
                         to use (default: None)

    Notes:
    Raises OSError if bib2xhtml is not found or CalledProcessError if
    bib2xhtml returns wit a non-zero exit status.

    """

    def __init__(self, bibtex, request, citations=[], abstracts=False,\
            label=False, chronological=False, style=None):
        cfg = request.cfg
        self.bib2html_cmd = "bib2xhtml"
        self.bst_path = None

        # retrieve configuration
        try:
             self.bib2html_cmd = cfg.bibtex_bib2html
        except AttributeError:
             pass
        try:
             self.bst_path = cfg.bibtex_bst_path
        except AttributeError:
             pass

        # the original bibtex implementation is not 8-bit clean, replace
        # non-ASCII characters with "?"
        self.bibtex = bibtex.encode("ascii", "replace")

        self.args = [self.bib2html_cmd, "-u", "-dMoinMoin"]

        if citations:
            cit_list = []
            cit_list.append(u"<!-- BEGIN CITATIONS MoinMoin -->")
            cit_list.append(u"<!--")
            cit_list.extend([ur"\citation{%s}" % c for c in citations])
            cit_list.append(u"-->")
            cit_list.append(u"<!-- END CITATIONS MoinMoin -->")
            self.citations = u"\n".join(cit_list)
            # also encode as ASCII
            self.citations = self.citations.encode("ascii", "replace")
            self.args.append("-i")
        else:
            self.citations = None

        if abstracts:
            self.args.append("-a")
        if label:
            self.args.append("-k")
        if chronological and chronological == "reversed":
            self.args.extend(["-c", "-r"])
        elif chronological:
            self.args.append("-c")
        if style in ("empty", "plain", "alpha", "named", "unsort", "unsortlist"):
            self.args.extend(["-s", style])

    def render(self):
        """Render the bibtex markup (if requested, only cited entries)
        and return HTML output in a string.
        """
        output = []
        # create temporary files for Bibtex input, HTML output, and logging
        bibfd, bibfile = tempfile.mkstemp(".bib")
        xhtmlfd, xhtmlfile = tempfile.mkstemp(".xhtml")
        #logfd, logfile = tempfile.mkstemp(".log")
        self.args.extend([bibfile, xhtmlfile])

        # write Bibtex input to temporary file
        f = open(bibfile, "w")
        f.write(self.bibtex)
        f.close()

        if self.citations:
            # write citations to temporary output file
            f = open(xhtmlfile, "w")
            f.write(self.citations)
            f.close()

        # execute bib2xhtml converter subprocess on forementionened
        # temporary files, bib2xhtml creates its temporary files in the
        # current working directory, so set it to a reasonable location, set
        # the set the BSTINPUTS environment variable if required in order to
        # help Bibtex finds the needed .bst files
        if self.bst_path:
            bstinputs = {"BSTINPUTS": self.bst_path}
        else:
            bstinputs = None
        try:
            retcode = subprocess.call(self.args,
                env=bstinputs, cwd=tempfile.gettempdir(),
                stdout=open(os.devnull, "w"), stderr=open(os.devnull, "w"))
            #retcode = subprocess.call(self.args,
            #    env=bstinputs, cwd=tempfile.gettempdir(),
            #    stdout=open(os.devnull, "w"), stderr=open(logfile, "w"))
            if retcode:
                raise CalledProcessError(retcode, "".join(self.args))
        except OSError, error:
            # bib2xhtml not found or not executable
            os.remove(bibfile)
            os.remove(xhtmlfile)
            raise
        except CalledProcessError, error:
            # non-zero exit status
            os.remove(bibfile)
            os.remove(xhtmlfile)
            #os.remove(logfile)
            raise

        os.remove(bibfile)
        #os.remove(logfile)

        name_pattern = re.compile('<a name="(?P<anchor>[^"]*)">', re.UNICODE)
        href_pattern = re.compile('<a href="#(?P<anchor>[^"]*)">', re.UNICODE)
        inside_dl = False

        # read the output (encoded as utf-8) back in
        f = codecs.open(xhtmlfile, "r", encoding="utf-8")

        for line in f.readlines():
            if line.startswith(u'<!-- Generated by: '):
                # throw away comments at the beginning...
                inside_dl = True
                continue
            elif line == u'<!-- END BIBLIOGRAPHY MoinMoin -->\n':
                # ...and the end
                break
            if inside_dl:
                # use a ref:-prefix for anchor links in order to avoid
                # interference with other anchors and replace the name- with
                # the id-attribute (it would be more appropriate to fix this in
                # the bst-file)
                line = name_pattern.sub(ur'<a id="ref:\g<anchor>">', line)
                line = href_pattern.sub(ur'<a href="#ref:\g<anchor>">', line)
                output.append(line)

        f.close()
        os.remove(xhtmlfile)

        output = "".join(output)
        return output


class Bibliography:
    """The bibliography for the current page."""
    def __init__(self, macro, args):
        self.macro = macro
        self.request = self.macro.request
        self._ = self.request.getText
        self.this_page_name = self.macro.formatter.page.page_name
        self.abstracts = False
        self.label = True
        self.chronological = False
        self.style = "named"
        self.bib_page_name = None

        if args and args != "":
            # get the page name first
            # better user args.partition(",") available in Python 2.5
            if "," in args:
                bib_page_name, bib_args = args.split(",", 1)
            else:
                bib_page_name = args.strip()
                bib_args = ""
            self.bib_page_name = wikiutil.AbsPageName(self.request,
                                    self.this_page_name, bib_page_name.strip())

            # get the rest of the (named) parameters
            for arg in bib_args.split(","):
                if "=" in arg:
                    key, value = arg.lower().split("=", 1)
                    key = key.strip()
                    value = value.strip()
                    if key == "abstracts" and value not in ("on", "true",
                                                                        "yes"):
                        self.abstracts = True
                    if key == "label" and value not in ("on", "true", "yes"):
                        self.label = False
                    if key == "chronological" and value in ("on", "true",
                                                                        "yes"):
                        self.chronological = True
                    elif key == "chronological" and value in ("reverse",
                                                                   "reversed"):
                        self.chronological = "reversed"
                    if key == "style" and value in ("empty", "plain", "alpha",
                                              "named", "unsort", "unsortlist"):
                        self.style = value

        # linebreaks
        self.eol_re = re.compile(r'\r?\n', re.UNICODE)
        # processing instructions and comments, matches format of format PI
        self.re_pi_comment = re.compile(r'^\#(((\#|refresh|redirect|deprecated|pragma|form|acl|language)|(format\s*(?P<format>.*?)))(\s|$))', re.UNICODE|re.IGNORECASE)
        # end of a formatted section
        self.re_formatted_end = re.compile(r'(.*?)\}\}\}', re.UNICODE)
        # comments (not inside PI)
        self.re_comment = re.compile(r'^\#\#', re.UNICODE)
        # formatted section, matches formatter
        self.re_formatted = re.compile(
            r'\{\{\{(?!.*\}\}\})((\#!\s*(?P<format>.*?)(\s|$))|(.*$))',
            re.UNICODE)
        # Cite macro, matches key
        self.re_cite = re.compile(
            r'\[\[Cite\((?P<key>.+?)((,.*?){1,2})?\)\]\]', re.UNICODE)
        # other macros, excluding Cite
        # FIXME what if [[Cite([[Cite(bla)]])]]?
        macronames = [m for m in wikimacro.getNames(macro.request.cfg) \
            if m != 'Cite']
        # stuff to filter out (mostly because they cannot contain macros)
        self.re_filter = re.compile(r'''(
(\^.*?\^)|          # sup
({\{\{.*?\}\}\})|   # tt
(^\s+.*?::\s)|      # dl
(^\s*(?P<hmarker>=+)\s.*\s(?P=hmarker) $)| # heading
(\[".*?"\])|        # wikiname_bracket
(`.*?`)|            # tt_bt
(\[\[(%(macronames)s)(?:\(.*?\))?\]\]) # macro
)''' % {"macronames": u"|".join(macronames)}, re.UNICODE|re.VERBOSE)

    def run(self):
        _ = self._
        output = []
        # parse current page first
        cit_list, bib_list = self.parse(self.macro.parser.raw, True)

        # parse a given page as well
        if self.bib_page_name and self.bib_page_name != self.this_page_name:
            if self.request.user.may.read(self.bib_page_name):
                bib_page = Page(self.request, self.bib_page_name)
            else:
                output.append(self.macro.formatter.div(1, css_class="error"))
                output.append(self.macro.formatter.escapedText(
                    _('Error: Page "%(page_name)s" may not be read and cannot be included by Bibliography')
                    % {"page_name": self.bib_page_name}))
                output.append(self.macro.formatter.div(0))
                return "".join(output)
            if bib_page.exists():
                add_cit_list, add_bib_list = self.parse(
                    bib_page.get_raw_body(), False)
                bib_list.extend(add_bib_list)
            else:
                output.append(self.macro.formatter.div(1, css_class="error"))
                output.append(self.macro.formatter.escapedText(
                    _('Error: Page "%(page_name)s" does not exist and cannot be included by Bibliography')
                    % {"page_name": self.bib_page_name}))
                output.append(self.macro.formatter.div(0))
                return "".join(output)

        bibtex = "\n".join(bib_list)

        # try to render as HTML
        bib_renderer = BibtexRenderer(bibtex, self.request,
                citations=cit_list, abstracts=self.abstracts, label=self.label,
                chronological=self.chronological, style=self.style)
        try:
            bib_output = bib_renderer.render()
        except OSError, error:
            output.append(self.macro.formatter.div(1, css_class="error"))
            output.append(self.macro.formatter.escapedText(
                _('Error: "%(bib2html_cmd)s" could not be found or executed by Bibliography')
                % {"bib2html_cmd": bib_renderer.bib2html_cmd}))
            output.append(self.macro.formatter.div(0))
            return "".join(output)
        except CalledProcessError, error:
            output.append(self.macro.formatter.div(1, css_class="error"))
            output.append(self.macro.formatter.escapedText(
                _('Error: "%(bib2html_cmd)s" returned a non-zero exit status while being executed by Biblography')
                % {"bib2html_cmd": bib_renderer.bib2html_cmd}))
            output.append(self.macro.formatter.div(0))
            return "".join(output)

        # write through the rawHTML formatter if possible, otherwise write
        # just the Bibtex markup as preformatted text
        output.append(self.macro.formatter.div(1, css_class="bibliography"))
        output.append(self.macro.formatter.heading(1,1))
        output.append(self.macro.formatter.escapedText(_("Bibliography")))
        output.append(self.macro.formatter.heading(0,1))

        try:
            output.append(self.macro.formatter.rawHTML(bib_output))
        except:
            output.append(self.macro.formatter.preformatted(1))
            output.append(self.macro.formatter.text(bibtex.expandtabs()))
            output.append(self.macro.formatter.preformatted(0))

        output = "".join(output)
        return output

    def parse(self, raw_page, find_cite_macro):
        """Parse a page."""

        lines = self.eol_re.split(raw_page.expandtabs())
        in_pi_comment = True # inside processing instructions or comments
        in_pre = None # inside a preformatted block
        page_format = "wiki"
        bib_list = [] # contains found embedded bibtex markup
        cit_list = [] # contains found citations if requested

        for line in lines:
            if in_pi_comment:
                # inside processing instructions or comments at the beginning
                # of the page
                m = self.re_pi_comment.match(line)
                if m and m.group("format") is not None:
                    # set page format, the last one applies to the whole page
                    page_format = m.group("format")
                elif m:
                    # other processing instruction or comment
                    continue
                else:
                    # not a processing instruction or comment
                    in_pi_comment = False
            elif in_pre is not None: # formatter might be empty!
                # inside preformatted block
                m = self.re_formatted_end.search(line)
                if in_pre == "bibtex":
                    # format of preformatted block is bibtex
                    if m:
                        # preformatted block in bibtex format ends here
                        bib_list.append(m.group(1))
                        in_pre = None
                        line = line[m.end():]
                    else:
                        # preformatted block in bibtex format continues
                        bib_list.append(line)
                        continue
                elif in_pre == "wiki":
                    # preformatted block is in wiki format
                    if m:
                        # preformatted block ends here
                        in_pre = None
                        # parse inside for citations, delete inside part
                        part_line = line[:m.end()-3]
                        if find_cite_macro:
                            for m in self.re_cite.finditer(line):
                                # cite macro found
                                if in_pre != None and \
                                        m.group("key") != "":
                                    cit_list.append(m.group("key"))
                        # continue parsing outside of this block, there might be
                        # further citations
                        line = line[m.end():]
                else:
                    # preformatted block is not in wiki or bibtex format
                    if m:
                        # preformatted block ends here
                        in_pre = None
                        # continue parsing outside of this block, there might be
                        # citations
                        line = line[m.end():]
                    else:
                        # preformatted block continues
                        continue

            if page_format == "bibtex":
                # page is in bibtex format
                bib_list.append(line)
                continue
            elif page_format != "wiki":
                # page is in an unknown, unsupported format
                break

            if self.re_comment.match(line):
                continue

            m = self.re_formatted.search(line)
            if m:
                if m.group("format") is None:
                    # if there is no "#!" after "{{{" format will be None,
                    # thus default to ""
                    in_pre = ""
                else:
                    # set the formatter to the found value
                    in_pre = m.group("format")
                continue

            if find_cite_macro:
                # filter commands which do not allow (cite) macros inside
                line = self.re_filter.sub("", line)
                for m in self.re_cite.finditer(line):
                    # cite macro found
                    if not in_pre == "" and m.group("key") != "" and not \
                        m.group("key").count("}"):
                        # citation must not be empty and not contain a "}"
                        cit_list.append(m.group("key"))

        return cit_list, bib_list


def execute(macro, args):
    bib = Bibliography(macro, args)
    return bib.run()

