Attachment 'FormatConverter-1.0.py'
Download 1 # -*- coding: iso-8859-1 -*-
2 """
3 MoinMoin - convert various input formats into output formats
4
5 <<FormatConverter(ATTACHMENT_OR_URL, INPUT_FILETYPE, OUTPUT_FILETYPE, WIDTH=0, HEIGHT=0, EXTRAS="")>>
6
7 Features:
8 * fetches source file from urls or page attachments
9 * converts the input file via a local conversion tool (dia, inkscape, ...) to the output format
10 * checks timestamp of content for updating cached image
11 * uses cache for input and output file
12
13 Requires the external conversion program:
14 pdf -> svg: pdf2svg
15 pdf -> png: pstoimg
16 dia -> (svg|png): dia
17 dot -> (svg|png): dot
18 eps -> (svg|png): inkscape
19 xcf -> (svg|png): xcftools
20
21 Most of the code is based on the pdf2img macro created by Reimar Bauer.
22
23 @copyright: 2011 MoinMoin:ReimarBauer
24 @copyright: 2014 MoinMoin:LarsKruse
25 @license: GNU GPL v3 or later
26
27 ----
28
29 Changelog:
30
31 v1.0 - 2014/09/22
32 * initial release
33 * the following conversions are supported:
34 * pdf|dia|dot|eps -> svg|png
35 * xcf -> png
36
37 ----
38
39 Usage:
40
41 <<FormatConverter(diagram.dia, dia, svg)>>
42
43 <<FormatConverter(graphviz.dot, dot, png, width=300)>>
44
45 <<FormatConverter(book.pdf, pdf, svg, extras=page:1)>>
46
47 <<FormatConverter(http://example.org/article.pdf, pdf, png, height=1024)>>
48
49 """
50
51 from MoinMoin import log
52 logging = log.getLogger(__name__)
53
54 import os
55 import urllib2
56 import httplib
57 import subprocess
58 from urlparse import urlparse
59
60 from MoinMoin import caching, config
61 from MoinMoin.util.SubProcess import Popen
62 from MoinMoin.action import AttachFile, cache
63
64 CACHE_ARENA = 'sendcache'
65 CACHE_SCOPE = 'wiki'
66
67 # list all input and output document types here
68 CONTENT_TYPE_MAP = {
69 "svg": 'image/svg+xml',
70 "png": 'image/png',
71 "dia": 'application/dia',
72 "dot": 'application/graphviz',
73 "eps": 'application/postscript',
74 "pdf": 'application/pdf',
75 "xcf": 'application/x-pdf',
76 }
77
78
79 # map pairs of (input, output) to conversion functions
80 # we use a function instead of a dict for lazy evaluation
81 def get_conversion_map():
82 return {
83 ("dot", "svg"): dot_converter,
84 ("dot", "png"): dot_converter,
85 ("dia", "svg"): dia_converter,
86 ("dia", "png"): dia_converter,
87 ("pdf", "png"): pdf2png_converter,
88 ("pdf", "svg"): pdf2svg_converter,
89 #("xcf", "png"): xcf2png_converter_imagemagick,
90 ("xcf", "png"): xcf2png_converter_xcftools,
91 ("eps", "svg"): ps_converter,
92 ("eps", "png"): ps_converter,
93 ("ps", "svg"): ps_converter,
94 ("ps", "png"): ps_converter,
95 }
96
97
98 class SourceNotFound(KeyError): pass
99 class AttachmentSourceNotFound(SourceNotFound): pass
100 class ConversionFailed(RuntimeError): pass
101
102
103 def exec_cmd_tokens(_, args):
104 try:
105 proc = Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
106 stdout, stderr = proc.communicate()
107 return stdout, stderr, proc.returncode
108 except OSError:
109 return "", _("Failed to run '%s'") % args[0], -1
110
111
112 def url_exists(url):
113 try:
114 item = urllib2.urlopen(url)
115 content = item.read(size=1)
116 item.close()
117 return len(content) > 0
118 except (IOError, urllib2.HTTPError, ValueError):
119 return False
120
121
122 def last_modified(request=None, pagename=None, attachment=None, url=""):
123 if not url:
124 pdf_file = os.path.join(AttachFile.getAttachDir(request, pagename), attachment).encode(config.charset)
125 filestat = os.stat(pdf_file)
126 return request.user.getFormattedDateTime(filestat.st_mtime)
127
128 parse_result = urlparse(url)
129 conn = httplib.HTTPConnection(parse_result.netloc)
130 conn.request("GET", parse_result.path)
131 response = conn.getresponse()
132 return response.getheader('last-modified')
133
134
135 def get_cache_item_filename(request, cache_key):
136 """
137 prepares the cache file and returns its file name
138 """
139 return get_cache_data(request, cache_key)._fname
140
141
142 def get_cache_data(request, cache_key):
143 """
144 get the file location of a cache item
145 """
146 return caching.CacheEntry(request, CACHE_ARENA, "%s.data" % cache_key, CACHE_SCOPE, do_locking=False)
147
148
149 def fetch_source_item(request, url, content_type, timestamp):
150 """
151 fetches the source item and stores it as cache file
152 """
153 cache_key = cache.key(request, itemname="multi_convert", content=(url + timestamp))
154 try:
155 item = urllib2.urlopen(url)
156 except (IOError, urllib2.HTTPError, ValueError), err:
157 logging.info(url)
158 logging.debug("%s: %s" % (url, err))
159 return None
160 else:
161 cache.put(request, cache_key, item.read(), content_type=content_type)
162 item.close()
163 return get_cache_item_filename(request, cache_key)
164
165
166 def _parse_extra_options(text):
167 result = {}
168 for token in text.split():
169 if ":" in token:
170 key, value = token.split(":", 1)
171 else:
172 key, value = token, True
173 result[key] = value
174 return result
175
176
177 def is_attachment(source):
178 return not "://" in source
179
180
181 def _get_source_and_cache(macro, source, source_content_type, cache_key_suffix):
182 """
183 Generate the cache key to be used for storing data and put the source data
184 (specified as an attachment or remote URL) into the cache.
185
186 @param macro: the original macro object containing the current request
187 @param source: use the specified attachment or URL as a source
188 @param source_content_type: content type of the source data (e.g. "application/pdf")
189 @param cache_key_suffix: a unique unicode string to be used for this specific conversion
190 all relevant options need to be included in this string (e.g. output format, size, ...)
191 """
192 request = macro.request
193 pagename = request.page.page_name
194
195 # determine source URL and cache key
196 if is_attachment(source):
197 page_name, filename = AttachFile.absoluteName(source, pagename)
198 if not AttachFile.exists(request, page_name, filename):
199 raise AttachmentSourceNotFound("attachment: %s does not exists" % source)
200 identifier = last_modified(request, page_name, filename)
201 source_url = AttachFile.getAttachUrl(page_name, filename, request)
202 source_file = AttachFile.getFilename(request, page_name, filename).encode(config.charset)
203 else:
204 if not url_exists(source):
205 raise SourceNotFound("url: %s does not exists" % source)
206 identifier = last_modified(url=source)
207 source_url = source
208 # store the source data in a local download cache
209 source_file = fetch_source_item(request, source, source_content_type, identifier)
210 logging.debug("%s: %s" % (source_url, identifier))
211 target_cache_key = cache.key(request, itemname=pagename, content="%s.%s.%s.%s" % (macro.name, source_url, identifier, cache_key_suffix))
212
213 return source_file, target_cache_key
214
215
216 def _do_conversion(request, source_file, target_cache_key, conversion_func,
217 target_content_type, conversion_details):
218 """
219 Run the given conversion function and store the result in the cache.
220
221 @param request: a moinmoin request instance
222 @source_file: local name of the input file
223 @cache_key: the key to be used for retrieving the cache content that should be displayed
224 @conversion_func: a conversion function that expects four parameters: request.getText, input_filename, output_filename, details
225 @param target_content_type: content type of the target (e.g. "image/png")
226 """
227 _ = request.getText
228 if cache.exists(request, target_cache_key):
229 # nothing to be done
230 return
231 target_file = get_cache_item_filename(request, target_cache_key)
232 output_msg, error_msg, returncode = conversion_func(_, source_file, target_file, conversion_details)
233 if returncode != 0:
234 raise ConversionFailed(_("Conversion failed: %s") % error_msg)
235 # explicitly transfer the file to the cache (just to be sure that moin's cache is updated)
236 target_handle = open(target_file, 'rb')
237 cache.put(request, target_cache_key, target_handle.read(),
238 content_type=target_content_type)
239 target_handle.close()
240
241
242 def _get_conversion_result(request, width, height, cache_key, attachment_name=None):
243 """
244 Return a formatted representation of a cache item via a selected style.
245
246 @param request: a moinmoin request instance
247 @param target_content_type: content type of the target (e.g. "image/png")
248 @width: the width of the embedded conversion visualization (only for 'do_embed == True')
249 @height: the height of the embedded conversion visualization (only for 'do_embed == True')
250 @cache_key: the key to be used for retrieving the cache content that should be displayed
251 """
252 if cache.exists(request, cache_key):
253 formatter = request.formatter
254 result = ""
255 args = {}
256 if width:
257 args["width"] = width
258 if height:
259 args["height"] = height
260 if attachment_name:
261 result += formatter.attachment_link(1, url=attachment_name)
262 result += formatter.image(src=cache.url(request, cache_key), alt="", **args)
263 if attachment_name:
264 result += formatter.attachment_link(0)
265 return result
266 else:
267 # failed to convert input
268 return ""
269
270
271 def dia_converter(_, source_file, target_file, details):
272 target_type = details["target_type"]
273 args = ["dia", "--nosplash", "--export", target_file]
274 if target_type == "png":
275 args += ["--filter", "png"]
276 # target size is only available for png output format
277 width = details.get("width", 0)
278 height = details.get("height", 0)
279 if width or height:
280 args += ["--size", "%sx%s" % (width or "", height or "")]
281 elif target_type == "svg":
282 args += ["--filter", "svg"]
283 else:
284 raise ConversionFailed(_("Unsupported output format for dia converter: %s" % target_type))
285 args += [source_file]
286 return exec_cmd_tokens(_, args)
287
288
289 def dot_converter(_, source_file, target_file, details):
290 target_type = details["target_type"]
291 args = ["dot"]
292 if target_type == "png":
293 # TODO: implement width/height
294 args += ["-Tpng"]
295 elif target_type == "svg":
296 args += ["-Tsvg"]
297 else:
298 raise ConversionFailed(_("Unsupported output format for dot converter: %s" % target_type))
299 args += ["-o%s" % target_file, source_file]
300 return exec_cmd_tokens(_, args)
301
302
303 def pdf2png_converter(_, source_file, target_file, details):
304 # TODO: implement width/height
305 # TODO: handle "page"
306 return exec_cmd_tokens(_, ["pstoimg", "-quiet", "-crop", "tblr", "-density", 200, "-type", "png", source_file, "-out", target_file])
307
308
309 def pdf2svg_converter(_, source_file, target_file, details):
310 # 'pageno' can be a number or a page specification (e.g. 'iii')
311 page = details.get("page", 1)
312 return exec_cmd_tokens(_, ["pdf2svg", source_file, target_file, str(page)])
313
314
315 def xcf2png_converter_xcftools(_, source_file, target_file, details):
316 return exec_cmd_tokens(_, ["xcf2png", "-o", target_file, source_file])
317
318
319 def xcf2png_converter_imagemagick(_, source_file, target_file, details):
320 return exec_cmd_tokens(_, ["convert", source_file, "-alpha", "on", "-background", "none", "-layers", "merge", "png:%s" % target_file])
321
322
323 def ps_converter(_, source_file, target_file, details):
324 target_type = details["target_type"]
325 args = ["inkscape", "--without-gui"]
326 if target_type == "png":
327 args += ["--export-png", target_file]
328 width = details.get("width", 0)
329 if width:
330 args += ["--export-width", width]
331 height = details.get("height", 0)
332 if height:
333 args += ["--export-height", height]
334 elif target_type == "svg":
335 args += ["--export-plain-svg", target_file]
336 else:
337 raise ConversionFailed(_("Unsupported output format for postscript converter: %s" % target_type))
338 args += [source_file]
339 return exec_cmd_tokens(_, args)
340
341
342 def macro_FormatConverter(macro, source, source_type, target_type, width=0, height=0, extras=""):
343 """
344 converts input data from urls or attachments to image files using MoinMoins cache and renders from there
345
346 Retrieve the source data, run the conversion function and return the formatted output.
347
348 @param macro: the original macro object containing the current request
349 @param source: use the specified attachment or URL as a source
350 @param source_type: content type of the source data (e.g. "pdf")
351 @param target_type: content type of the target (e.g. "png")
352 @width: the width of the embedded conversion visualization (only for 'do_embed == True')
353 @height: the height of the embedded conversion visualization (only for 'do_embed == True')
354 @extras: a space-separated list of 'key:value' pairs - e.g. used for specifying a page number
355 """
356 _ = macro.request.getText
357 source_type = source_type.lower()
358 target_type = target_type.lower()
359 if not extras:
360 extras = ""
361 try:
362 conversion_func = get_conversion_map()[(source_type, target_type)]
363 except KeyError:
364 return _("The requested conversion is not supported: <%s> to <%s>. Use any of the following instead: %s") % \
365 (source_type, target_type, get_conversion_map().keys())
366 source_content_type = CONTENT_TYPE_MAP[source_type]
367 target_content_type = CONTENT_TYPE_MAP[target_type]
368 cache_key_suffix = " ".join(["width=%d" % width, "height=%d" % height, extras])
369 try:
370 source_file, target_cache_key = _get_source_and_cache(macro, source, source_content_type, cache_key_suffix)
371 except AttachmentSourceNotFound:
372 # display an "upload missing attachment" link
373 result = macro.request.formatter.attachment_link(1, url=source)
374 result += macro.request.formatter.text(_("Upload source file"))
375 result += macro.request.formatter.attachment_link(0)
376 return result
377 except SourceNotFound, err_msg:
378 return _("Could not find source (%s) for conversion: %s") % (source, err_msg)
379 # parse 'extras' string
380 extra_tokens = _parse_extra_options(extras)
381 # run conversion
382 conversion_details = {
383 "width": width,
384 "height": height,
385 "source_type": source_type,
386 "target_type": target_type,
387 }
388 conversion_details.update(extra_tokens)
389 _do_conversion(macro.request, source_file, target_cache_key,
390 conversion_func, target_content_type, conversion_details)
391 # return formatted result string
392 if is_attachment(source):
393 attachment_name = source
394 else:
395 attachment_name = None
396 return _get_conversion_result(macro.request, width, height, target_cache_key, attachment_name=attachment_name)
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.