# -*- coding: iso-8859-1 -*-
r"""
= pdf action =
[[TableOfContents]]

== Purpose ==

This action is used to get a pdf copy of the page.

It does an on-the-fly, server-side, html to pdf conversion using
''htmldoc'', and sends an `application/pdf` content to the web
browser.

== Calling sequence ==

{{{
http://my.server/MyWiki/MyPage?action=pdf

http://my.server/MyWiki/MyPage?action=pdf&style=NAME

http://my.server/MyWiki/MyPage?action=pdf&rev=NUM

http://my.server/MyWiki/MyPage?action=pdf&pageaction=ACTION

http://my.server/MyWiki/MyPage?action=pdf&help=1}}}

 style:::
   `page` or `book`.
   These styles can be redefined or extended in the wiki config (see
   below).  Asking for an unknown style will display an error and list
   available styles.

 rev:::
   Specifies a page version number.

 pageaction:::
  Specifies an action for rendering the page. By default and if
  omitted, the `print` action will be used.

 help:::
   Displays this help.


== Procedure ==

Usage:
 * Append `&action=pdf` to any URL
 * or insert the macro `[[pdf]]` (coming soon!) in a page, which will
 insert a small control panel

Hints on the `book` style:

 * Have at least one heading, else ''htmldoc'' will return an error.
 
 * ''htmldoc'' will ignore any content before the first heading.

 * ''htmldoc'' automatically generates a table of contents which is
 looking nicer (has page numbers). To avoid double TOCs, place the
 `[[TableOfContents]]` macro at start of your page, before any
 heading. This way, the output of the macro will be ignored by
 ''htmldoc''.

Suggestions:
 * Modify your themes to include a pdf link/icon besides print.
 * Modify the `AttachFile` standard action to add a pdf link.


== Installation ==

 1. install ''htmldoc'' on the wiki server (on a Debian box:
 `apt-get install htmldoc`)

 1. copy the [http://moinmoin.wikiwikiweb.de/ActionMarket?action=AttachFile&do=get&target=pdf.py pdf.py] 
 action to the `data/plugin/action` directory of your wiki


== Optional: customizing styles ==

To install new styles, or modify the default ones, edit
`wikiconfig.py` and add:
{{{
    htmldoc_styles = {
        "page":
            "--verbose --no-localfiles --no-compression --jpeg " \
            "--header t.D --footer ./. --size a4 --left 0.5in " \
            "--webpage",
        "book":
            "--verbose --no-localfiles --no-compression --jpeg " \
            "--header t.D --footer ./. --size a4 --left 0.5in",
        } }}}

By this means, you can change the page format from `a4` to `letter`,
or add new styles to the dictionary.

For a complete list of available ''htmldoc'' options, see
http://www.easysw.com/htmldoc/docfiles/8-cmdref.html


== Modification History ==
{{{
@copyright: 2006  by Pascal Bauermeister
@license: GNU GPL, see COPYING for details.

2006-05-24 v1.0.0 PascalBauermeister
 Initial revision

2006-05-26 v1.0.1 PascalBauermeister
 * Set env var HTMLDOC_NOCGI to solve CGI issue

2006-05-26 v1.0.2 PascalBauermeister
 * Relative image URLs turned absolute was bogus. It is less bogus now.

}}}
"""

import os, mimetypes, time, zipfile
from MoinMoin import config, user, util, wikiutil, packages
from MoinMoin.Page import Page
from MoinMoin.util import MoinMoinNoFooter, filesys

action_name = __name__.split('.')[-1]
def_style = 'page'

def error_msg (pagename, request, msg):
    Page (request, pagename).send_page (request, msg=msg)

        
def format (src_text, request, formatter):
    # parse the text (in wiki source format) and make HTML,
    # after diverting sys.stdout to a string
    import StringIO
    from MoinMoin.parser import wiki
    str_out = StringIO.StringIO ()      # create str to collect output
    request.redirect (str_out)          # divert output to that string
    p = Page (request, "$$$")
    formatter.setPage (p)
    try:                                # parse text
        wiki.Parser (src_text, request).format (formatter)
    finally:
        request.redirect ()             # restore output
    formatter.setPage (request.page)
    return str_out.getvalue ()          # return what was generated


def get_style (request, name):
    """Defines built-in, default styles. These styles can be exended
    or replaced by declaring a dictionary similar to def_styles
    below, but named htmldoc_styles, in the wikiconfig.py file.
    """
    _ = request.getText

    def_styles = {
        "page":
        "--verbose --no-localfiles --no-compression --jpeg " \
        "--header t.D --footer ./. --size a4 --left 0.5in " \
        "--webpage",
        "book":
        "--verbose --no-localfiles --no-compression --jpeg " \
        "--header t.D --footer ./. --size a4 --left 0.5in",
        }

    try: # get styles from config, to update the default ones
        cfg_styles = request.cfg.htmldoc_styles
        styles = def_styles
        styles.update (cfg_styles)
    except AttributeError:
        styles = def_styles

    try: # get wanted style
        style = styles [name]
    except KeyError:
        msg = _("Unknown style: ") + name + "<br>" + \
              _("Possible styles: ") + ', '.join (styles.keys ()) + "<br>" + \
              _("Default style: ") + def_style
        error_msg (request.page.page_name, request, msg)
        return None
        
    return style

            
def escape (str):
    return str.replace ('&','&amp;').replace ('<', '&lt;').replace ('>', '&gt;')


def execute (pagename, request):
    """ Main dispatcher for the action.
    """
    _ = request.getText

    debug = 0
    revision = None
    pageaction = None
    style = def_style
    msg = None

    if request.form.has_key ('rev'):
        revision = request.form ['rev'] [0]
    if request.form.has_key ('pageaction'):
        pageaction = request.form ['pageaction'] [0]
    if request.form.has_key ('style'):
        style = request.form ['style'] [0]
    if request.form.has_key ('help'):
        help = format (__doc__, request, request.formatter)
        error_msg (pagename, request, help)
	return
    if request.form.has_key ('debug'):
        debug = 1

    # make pdf
    html = get_html (request, revision, pageaction, debug)

    flags = get_style (request, style)
    if not flags: return

    data = html2pdf (request, html, flags, debug)
    if not data:  return
    
    # send it
    filename = pagename.replace ('/', '-') + ".pdf"
    send_pdf (filename, request, data, debug)
    return


def send_pdf (filename, request, data, debug):
    import shutil, StringIO

    # get mimetype
    type = "application/pdf"
    if debug: type = "text/plain"

    # send header
    request.http_headers([
        "Content-Type: %s" % type,
        "Content-Length: %d" % len (data),
        # TODO: fix the encoding here, plain 8 bit is not allowed
        # according to the RFCs There is no solution that is
        # compatible to IE except stripping non-ascii chars
        "Content-Disposition: inline; filename=\"%s\"" %
        filename.encode (config.charset),
        ])

    # send data
    sio = StringIO.StringIO (data)
    shutil.copyfileobj (sio, request, 8192)

    raise MoinMoinNoFooter


# regex to match the source url of <img> tags
import re
RX_IMG = re.compile (r'(<img.*? )src=.(/.*?>)', re.I)


def get_html (request, revision, pageaction, debug):
    """
    get the html body of this page

    @param request:    the request for the page
    @param revision:   wanted revision (None for head)
    @param pageaction: action for page (None for print)
    """
    import copy, sys, StringIO

    # Get HTML for the page:
    #   make a copy of the request, run it and intercept the output
    orig_form = {}
    orig_form.update (request.form)
    #   set form:
    if pageaction == action_name : pageaction = None
    if pageaction: request.form ["action"] = [pageaction]
    else:          request.form ["action"] = [u"print"]
    if revision:   request.form ["rev"] = [revision]
    #   divert stdout, re-run the request
    old_stdout = sys.stdout
    out = StringIO.StringIO ()
    sys.stdout = out
    copy.copy (request).run ()
    html = out.getvalue ()
    out.close ()
    #   restore things
    sys.stdout = old_stdout
    request.form = orig_form

    # Do some post-processing on th ehtml

    #   lf -> cr    
    html = html.replace ('\r\n', '\n')
    html = html.replace ('\r', '\n')

    #   remove headers
    lines = html.split ('\n')
    while lines [0].strip (): del lines [0]
    html = '\n'.join (lines)

    #   look for non-html content (TODO: better done by examining
    #   headers)
    start = html.strip () [0:20] .lower ()
    if not (start.startswith ("<html") or start.startswith ("<!doctype html")):
        return "<pre>%s</pre>" % html
    
    #   make images URLs absolute
    base = request.getQualifiedURL()
    #   this is bogus because other attributes may preceed src...
    ##    html = html.replace ('<img src="/', '<img src="' + base + '/')
    #   instead, find URL in <img ... src="URL">, and replace URL by the
    #   same prefixed by the URL base:
    matches = []
    for match in RX_IMG.finditer (html):
        m = match.groups () [1]
        if m not in matches:
            html = html.replace (m, base+m)
            matches.append (m)
    #   this is not really proof... but I'm too lazy to do an HTML
    #   parser for this !

    return html

    
def html2pdf (request, html, flags, debug):
    import  os

    cmd = "HTMLDOC_NOCGI=1 htmldoc -t pdf %s -" % flags
    inp, out, err = os.popen3 (cmd, 'b')
    inp.write (html)
    inp.close ()

    pdf = out.read ()
    out.close ()
    msg = "Command: <pre>" + escape (cmd) + "</pre>returned:<pre>"+ \
          escape (err.read ()).replace ('\n','<br>') + "</pre>"
    err.close ()

    # as it is difficult to get the htmldoc return code, we check for
    # error by checking the produced pdf length
    if not len (pdf):
        error_msg (request.page.page_name, request, msg)

    if debug: return html
    return pdf