Attachment 'pdf.py'
Download 1 # -*- coding: iso-8859-1 -*-
2 r"""
3 = pdf action =
4 [[TableOfContents]]
5
6 == Purpose ==
7
8 This action is used to get a pdf copy of the page.
9
10 It does an on-the-fly, server-side, html to pdf conversion using
11 ''htmldoc'', and sends an `application/pdf` content to the web
12 browser.
13
14 == Calling sequence ==
15
16 {{{
17 http://my.server/MyWiki/MyPage?action=pdf
18
19 http://my.server/MyWiki/MyPage?action=pdf&style=NAME
20
21 http://my.server/MyWiki/MyPage?action=pdf&rev=NUM
22
23 http://my.server/MyWiki/MyPage?action=pdf&pageaction=ACTION
24
25 http://my.server/MyWiki/MyPage?action=pdf&help=1}}}
26
27 style:::
28 `page` or `book`.
29 These styles can be redefined or extended in the wiki config (see
30 below). Asking for an unknown style will display an error and list
31 available styles.
32
33 rev:::
34 Specifies a page version number.
35
36 pageaction:::
37 Specifies an action for rendering the page. By default and if
38 omitted, the `print` action will be used.
39
40 help:::
41 Displays this help.
42
43
44 == Procedure ==
45
46 Usage:
47 * Append `&action=pdf` to any URL
48 * or insert the macro `[[pdf]]` (coming soon!) in a page, which will
49 insert a small control panel
50
51 Hints on the `book` style:
52
53 * Have at least one heading, else ''htmldoc'' will return an error.
54
55 * ''htmldoc'' will ignore any content before the first heading.
56
57 * ''htmldoc'' automatically generates a table of contents which is
58 looking nicer (has page numbers). To avoid double TOCs, place the
59 `[[TableOfContents]]` macro at start of your page, before any
60 heading. This way, the output of the macro will be ignored by
61 ''htmldoc''.
62
63 Suggestions:
64 * Modify your themes to include a pdf link/icon besides print.
65 * Modify the `AttachFile` standard action to add a pdf link.
66
67
68 == Installation ==
69
70 1. install ''htmldoc'' on the wiki server (on a Debian box:
71 `apt-get install htmldoc`)
72
73 1. copy the [http://moinmoin.wikiwikiweb.de/ActionMarket?action=AttachFile&do=get&target=pdf.py pdf.py]
74 action to the `data/plugin/action` directory of your wiki
75
76
77 == Optional: customizing styles ==
78
79 To install new styles, or modify the default ones, edit
80 `wikiconfig.py` and add:
81 {{{
82 htmldoc_styles = {
83 "page":
84 "--verbose --no-localfiles --no-compression --jpeg " \
85 "--header t.D --footer ./. --size a4 --left 0.5in " \
86 "--webpage",
87 "book":
88 "--verbose --no-localfiles --no-compression --jpeg " \
89 "--header t.D --footer ./. --size a4 --left 0.5in",
90 } }}}
91
92 By this means, you can change the page format from `a4` to `letter`,
93 or add new styles to the dictionary.
94
95 For a complete list of available ''htmldoc'' options, see
96 http://www.easysw.com/htmldoc/docfiles/8-cmdref.html
97
98
99 == Modification History ==
100 {{{
101 @copyright: 2006 by Pascal Bauermeister
102 @license: GNU GPL, see COPYING for details.
103
104 2006-05-24 v1.0.0 PascalBauermeister
105 Initial revision
106
107 2006-05-26 v1.0.1 PascalBauermeister
108 * Set env var HTMLDOC_NOCGI to solve CGI issue
109
110 2006-05-26 v1.0.2 PascalBauermeister
111 * Relative image URLs turned absolute was bogus. It is less bogus now.
112
113 }}}
114 """
115
116 import os, mimetypes, time, zipfile
117 from MoinMoin import config, user, util, wikiutil, packages
118 from MoinMoin.Page import Page
119 from MoinMoin.util import MoinMoinNoFooter, filesys
120
121 action_name = __name__.split('.')[-1]
122 def_style = 'page'
123
124 def error_msg (pagename, request, msg):
125 Page (request, pagename).send_page (request, msg=msg)
126
127
128 def format (src_text, request, formatter):
129 # parse the text (in wiki source format) and make HTML,
130 # after diverting sys.stdout to a string
131 import StringIO
132 from MoinMoin.parser import wiki
133 str_out = StringIO.StringIO () # create str to collect output
134 request.redirect (str_out) # divert output to that string
135 p = Page (request, "$$$")
136 formatter.setPage (p)
137 try: # parse text
138 wiki.Parser (src_text, request).format (formatter)
139 finally:
140 request.redirect () # restore output
141 formatter.setPage (request.page)
142 return str_out.getvalue () # return what was generated
143
144
145 def get_style (request, name):
146 """Defines built-in, default styles. These styles can be exended
147 or replaced by declaring a dictionary similar to def_styles
148 below, but named htmldoc_styles, in the wikiconfig.py file.
149 """
150 _ = request.getText
151
152 def_styles = {
153 "page":
154 "--verbose --no-localfiles --no-compression --jpeg " \
155 "--header t.D --footer ./. --size a4 --left 0.5in " \
156 "--webpage",
157 "book":
158 "--verbose --no-localfiles --no-compression --jpeg " \
159 "--header t.D --footer ./. --size a4 --left 0.5in",
160 }
161
162 try: # get styles from config, to update the default ones
163 cfg_styles = request.cfg.htmldoc_styles
164 styles = def_styles
165 styles.update (cfg_styles)
166 except AttributeError:
167 styles = def_styles
168
169 try: # get wanted style
170 style = styles [name]
171 except KeyError:
172 msg = _("Unknown style: ") + name + "<br>" + \
173 _("Possible styles: ") + ', '.join (styles.keys ()) + "<br>" + \
174 _("Default style: ") + def_style
175 error_msg (request.page.page_name, request, msg)
176 return None
177
178 return style
179
180
181 def escape (str):
182 return str.replace ('&','&').replace ('<', '<').replace ('>', '>')
183
184
185 def execute (pagename, request):
186 """ Main dispatcher for the action.
187 """
188 _ = request.getText
189
190 debug = 0
191 revision = None
192 pageaction = None
193 style = def_style
194 msg = None
195
196 if request.form.has_key ('rev'):
197 revision = request.form ['rev'] [0]
198 if request.form.has_key ('pageaction'):
199 pageaction = request.form ['pageaction'] [0]
200 if request.form.has_key ('style'):
201 style = request.form ['style'] [0]
202 if request.form.has_key ('help'):
203 help = format (__doc__, request, request.formatter)
204 error_msg (pagename, request, help)
205 return
206 if request.form.has_key ('debug'):
207 debug = 1
208
209 # make pdf
210 html = get_html (request, revision, pageaction, debug)
211
212 flags = get_style (request, style)
213 if not flags: return
214
215 data = html2pdf (request, html, flags, debug)
216 if not data: return
217
218 # send it
219 filename = pagename.replace ('/', '-') + ".pdf"
220 send_pdf (filename, request, data, debug)
221 return
222
223
224 def send_pdf (filename, request, data, debug):
225 import shutil, StringIO
226
227 # get mimetype
228 type = "application/pdf"
229 if debug: type = "text/plain"
230
231 # send header
232 request.http_headers([
233 "Content-Type: %s" % type,
234 "Content-Length: %d" % len (data),
235 # TODO: fix the encoding here, plain 8 bit is not allowed
236 # according to the RFCs There is no solution that is
237 # compatible to IE except stripping non-ascii chars
238 "Content-Disposition: inline; filename=\"%s\"" %
239 filename.encode (config.charset),
240 ])
241
242 # send data
243 sio = StringIO.StringIO (data)
244 shutil.copyfileobj (sio, request, 8192)
245
246 raise MoinMoinNoFooter
247
248
249 # regex to match the source url of <img> tags
250 import re
251 RX_IMG = re.compile (r'(<img.*? )src=.(/.*?>)', re.I)
252
253
254 def get_html (request, revision, pageaction, debug):
255 """
256 get the html body of this page
257
258 @param request: the request for the page
259 @param revision: wanted revision (None for head)
260 @param pageaction: action for page (None for print)
261 """
262 import copy, sys, StringIO
263
264 # Get HTML for the page:
265 # make a copy of the request, run it and intercept the output
266 orig_form = {}
267 orig_form.update (request.form)
268 # set form:
269 if pageaction == action_name : pageaction = None
270 if pageaction: request.form ["action"] = [pageaction]
271 else: request.form ["action"] = [u"print"]
272 if revision: request.form ["rev"] = [revision]
273 # divert stdout, re-run the request
274 old_stdout = sys.stdout
275 out = StringIO.StringIO ()
276 sys.stdout = out
277 copy.copy (request).run ()
278 html = out.getvalue ()
279 out.close ()
280 # restore things
281 sys.stdout = old_stdout
282 request.form = orig_form
283
284 # Do some post-processing on th ehtml
285
286 # lf -> cr
287 html = html.replace ('\r\n', '\n')
288 html = html.replace ('\r', '\n')
289
290 # remove headers
291 lines = html.split ('\n')
292 while lines [0].strip (): del lines [0]
293 html = '\n'.join (lines)
294
295 # look for non-html content (TODO: better done by examining
296 # headers)
297 start = html.strip () [0:20] .lower ()
298 if not (start.startswith ("<html") or start.startswith ("<!doctype html")):
299 return "<pre>%s</pre>" % html
300
301 # make images URLs absolute
302 base = request.getQualifiedURL()
303 # this is bogus because other attributes may preceed src...
304 ## html = html.replace ('<img src="/', '<img src="' + base + '/')
305 # instead, find URL in <img ... src="URL">, and replace URL by the
306 # same prefixed by the URL base:
307 matches = []
308 for match in RX_IMG.finditer (html):
309 m = match.groups () [1]
310 if m not in matches:
311 html = html.replace (m, base+m)
312 matches.append (m)
313 # this is not really proof... but I'm too lazy to do an HTML
314 # parser for this !
315
316 return html
317
318
319 def html2pdf (request, html, flags, debug):
320 import os
321
322 cmd = "HTMLDOC_NOCGI=1 htmldoc -t pdf %s -" % flags
323 inp, out, err = os.popen3 (cmd, 'b')
324 inp.write (html)
325 inp.close ()
326
327 pdf = out.read ()
328 out.close ()
329 msg = "Command: <pre>" + escape (cmd) + "</pre>returned:<pre>"+ \
330 escape (err.read ()).replace ('\n','<br>') + "</pre>"
331 err.close ()
332
333 # as it is difficult to get the htmldoc return code, we check for
334 # error by checking the produced pdf length
335 if not len (pdf):
336 error_msg (request.page.page_name, request, msg)
337
338 if debug: return html
339 return pdf
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.