Attachment 'FormatConverter-1.1.py'
Download 1 # -*- coding: iso-8859-1 -*-
2 """
3 MoinMoin - convert various input formats into output formats
4
5 <<FormatConverter(ATTACHMENT_OR_URL, INPUT_FILETYPE, OUTPUT_FILETYPE, WIDTH=0, HEIGHT=0, EXTRAS="")>>
6
7 Features:
8 * fetches source file from urls or page attachments
9 * converts the input file via a local conversion tool (dia, inkscape, ...) to the output format
10 * checks timestamp of content for updating cached image
11 * uses cache for input and output file
12
13 Requires the external conversion program:
14 pdf -> svg: pdf2svg
15 pdf -> png: pstoimg
16 dia -> (svg|png): dia
17 dot -> (svg|png): dot
18 eps -> (svg|png): inkscape
19 xcf -> (svg|png): xcftools
20
21 Most of the code is based on the pdf2img macro created by Reimar Bauer.
22
23 @copyright: 2011 MoinMoin:ReimarBauer
24 @copyright: 2014 MoinMoin:LarsKruse
25 @license: GNU GPL v3 or later
26
27 ----
28
29 Changelog:
30
31 v1.0 - 2014/09/22
32 * initial release
33 * the following conversions are supported:
34 * pdf|dia|dot|eps -> svg|png
35 * xcf -> png
36 v1.1 - 2018/10/29
37 * fix minor incompability with moinmoin v1.9.9
38
39 ----
40
41 Usage:
42
43 <<FormatConverter(diagram.dia, dia, svg)>>
44
45 <<FormatConverter(graphviz.dot, dot, png, width=300)>>
46
47 <<FormatConverter(book.pdf, pdf, svg, extras=page:1)>>
48
49 <<FormatConverter(http://example.org/article.pdf, pdf, png, height=1024)>>
50
51 """
52
53 from MoinMoin import log
54 logging = log.getLogger(__name__)
55
56 import os
57 import urllib2
58 import httplib
59 import subprocess
60 from urlparse import urlparse
61
62 from MoinMoin import caching, config
63 from MoinMoin.action import AttachFile, cache
64
65 CACHE_ARENA = 'sendcache'
66 CACHE_SCOPE = 'wiki'
67
68 # list all input and output document types here
69 CONTENT_TYPE_MAP = {
70 "svg": 'image/svg+xml',
71 "png": 'image/png',
72 "dia": 'application/dia',
73 "dot": 'application/graphviz',
74 "eps": 'application/postscript',
75 "pdf": 'application/pdf',
76 "xcf": 'application/x-pdf',
77 }
78
79
80 # map pairs of (input, output) to conversion functions
81 # we use a function instead of a dict for lazy evaluation
82 def get_conversion_map():
83 return {
84 ("dot", "svg"): dot_converter,
85 ("dot", "png"): dot_converter,
86 ("dia", "svg"): dia_converter,
87 ("dia", "png"): dia_converter,
88 ("pdf", "png"): pdf2png_converter,
89 ("pdf", "svg"): pdf2svg_converter,
90 #("xcf", "png"): xcf2png_converter_imagemagick,
91 ("xcf", "png"): xcf2png_converter_xcftools,
92 ("eps", "svg"): ps_converter,
93 ("eps", "png"): ps_converter,
94 ("ps", "svg"): ps_converter,
95 ("ps", "png"): ps_converter,
96 }
97
98
99 class SourceNotFound(KeyError): pass
100 class AttachmentSourceNotFound(SourceNotFound): pass
101 class ConversionFailed(RuntimeError): pass
102
103
104 def exec_cmd_tokens(_, args):
105 try:
106 proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
107 stdout, stderr = proc.communicate()
108 return stdout, stderr, proc.returncode
109 except OSError:
110 return "", _("Failed to run '%s'") % args[0], -1
111
112
113 def url_exists(url):
114 try:
115 item = urllib2.urlopen(url)
116 content = item.read(size=1)
117 item.close()
118 return len(content) > 0
119 except (IOError, urllib2.HTTPError, ValueError):
120 return False
121
122
123 def last_modified(request=None, pagename=None, attachment=None, url=""):
124 if not url:
125 pdf_file = os.path.join(AttachFile.getAttachDir(request, pagename), attachment).encode(config.charset)
126 filestat = os.stat(pdf_file)
127 return request.user.getFormattedDateTime(filestat.st_mtime)
128
129 parse_result = urlparse(url)
130 conn = httplib.HTTPConnection(parse_result.netloc)
131 conn.request("GET", parse_result.path)
132 response = conn.getresponse()
133 return response.getheader('last-modified')
134
135
136 def get_cache_item_filename(request, cache_key):
137 """
138 prepares the cache file and returns its file name
139 """
140 return get_cache_data(request, cache_key)._fname
141
142
143 def get_cache_data(request, cache_key):
144 """
145 get the file location of a cache item
146 """
147 return caching.CacheEntry(request, CACHE_ARENA, "%s.data" % cache_key, CACHE_SCOPE, do_locking=False)
148
149
150 def fetch_source_item(request, url, content_type, timestamp):
151 """
152 fetches the source item and stores it as cache file
153 """
154 cache_key = cache.key(request, itemname="multi_convert", content=(url + timestamp))
155 try:
156 item = urllib2.urlopen(url)
157 except (IOError, urllib2.HTTPError, ValueError), err:
158 logging.info(url)
159 logging.debug("%s: %s" % (url, err))
160 return None
161 else:
162 cache.put(request, cache_key, item.read(), content_type=content_type)
163 item.close()
164 return get_cache_item_filename(request, cache_key)
165
166
167 def _parse_extra_options(text):
168 result = {}
169 for token in text.split():
170 if ":" in token:
171 key, value = token.split(":", 1)
172 else:
173 key, value = token, True
174 result[key] = value
175 return result
176
177
178 def is_attachment(source):
179 return not "://" in source
180
181
182 def _get_source_and_cache(macro, source, source_content_type, cache_key_suffix):
183 """
184 Generate the cache key to be used for storing data and put the source data
185 (specified as an attachment or remote URL) into the cache.
186
187 @param macro: the original macro object containing the current request
188 @param source: use the specified attachment or URL as a source
189 @param source_content_type: content type of the source data (e.g. "application/pdf")
190 @param cache_key_suffix: a unique unicode string to be used for this specific conversion
191 all relevant options need to be included in this string (e.g. output format, size, ...)
192 """
193 request = macro.request
194 pagename = request.page.page_name
195
196 # determine source URL and cache key
197 if is_attachment(source):
198 page_name, filename = AttachFile.absoluteName(source, pagename)
199 if not AttachFile.exists(request, page_name, filename):
200 raise AttachmentSourceNotFound("attachment: %s does not exists" % source)
201 identifier = last_modified(request, page_name, filename)
202 source_url = AttachFile.getAttachUrl(page_name, filename, request)
203 source_file = AttachFile.getFilename(request, page_name, filename).encode(config.charset)
204 else:
205 if not url_exists(source):
206 raise SourceNotFound("url: %s does not exists" % source)
207 identifier = last_modified(url=source)
208 source_url = source
209 # store the source data in a local download cache
210 source_file = fetch_source_item(request, source, source_content_type, identifier)
211 logging.debug("%s: %s" % (source_url, identifier))
212 target_cache_key = cache.key(request, itemname=pagename, content="%s.%s.%s.%s" % (macro.name, source_url, identifier, cache_key_suffix))
213
214 return source_file, target_cache_key
215
216
217 def _do_conversion(request, source_file, target_cache_key, conversion_func,
218 target_content_type, conversion_details):
219 """
220 Run the given conversion function and store the result in the cache.
221
222 @param request: a moinmoin request instance
223 @source_file: local name of the input file
224 @cache_key: the key to be used for retrieving the cache content that should be displayed
225 @conversion_func: a conversion function that expects four parameters: request.getText, input_filename, output_filename, details
226 @param target_content_type: content type of the target (e.g. "image/png")
227 """
228 _ = request.getText
229 if cache.exists(request, target_cache_key):
230 # nothing to be done
231 return
232 target_file = get_cache_item_filename(request, target_cache_key)
233 output_msg, error_msg, returncode = conversion_func(_, source_file, target_file, conversion_details)
234 if returncode != 0:
235 raise ConversionFailed(_("Conversion failed: %s") % error_msg)
236 # explicitly transfer the file to the cache (just to be sure that moin's cache is updated)
237 target_handle = open(target_file, 'rb')
238 cache.put(request, target_cache_key, target_handle.read(),
239 content_type=target_content_type)
240 target_handle.close()
241
242
243 def _get_conversion_result(request, width, height, cache_key, attachment_name=None):
244 """
245 Return a formatted representation of a cache item via a selected style.
246
247 @param request: a moinmoin request instance
248 @param target_content_type: content type of the target (e.g. "image/png")
249 @width: the width of the embedded conversion visualization (only for 'do_embed == True')
250 @height: the height of the embedded conversion visualization (only for 'do_embed == True')
251 @cache_key: the key to be used for retrieving the cache content that should be displayed
252 """
253 if cache.exists(request, cache_key):
254 formatter = request.formatter
255 result = ""
256 args = {}
257 if width:
258 args["width"] = width
259 if height:
260 args["height"] = height
261 if attachment_name:
262 result += formatter.attachment_link(1, url=attachment_name)
263 result += formatter.image(src=cache.url(request, cache_key), alt="", **args)
264 if attachment_name:
265 result += formatter.attachment_link(0)
266 return result
267 else:
268 # failed to convert input
269 return ""
270
271
272 def dia_converter(_, source_file, target_file, details):
273 target_type = details["target_type"]
274 args = ["dia", "--nosplash", "--export", target_file]
275 if target_type == "png":
276 args += ["--filter", "png"]
277 # target size is only available for png output format
278 width = details.get("width", 0)
279 height = details.get("height", 0)
280 if width or height:
281 args += ["--size", "%sx%s" % (width or "", height or "")]
282 elif target_type == "svg":
283 args += ["--filter", "svg"]
284 else:
285 raise ConversionFailed(_("Unsupported output format for dia converter: %s" % target_type))
286 args += [source_file]
287 return exec_cmd_tokens(_, args)
288
289
290 def dot_converter(_, source_file, target_file, details):
291 target_type = details["target_type"]
292 args = ["dot"]
293 if target_type == "png":
294 # TODO: implement width/height
295 args += ["-Tpng"]
296 elif target_type == "svg":
297 args += ["-Tsvg"]
298 else:
299 raise ConversionFailed(_("Unsupported output format for dot converter: %s" % target_type))
300 args += ["-o%s" % target_file, source_file]
301 return exec_cmd_tokens(_, args)
302
303
304 def pdf2png_converter(_, source_file, target_file, details):
305 # TODO: implement width/height
306 # TODO: handle "page"
307 return exec_cmd_tokens(_, ["pstoimg", "-quiet", "-crop", "tblr", "-density", 200, "-type", "png", source_file, "-out", target_file])
308
309
310 def pdf2svg_converter(_, source_file, target_file, details):
311 # 'pageno' can be a number or a page specification (e.g. 'iii')
312 page = details.get("page", 1)
313 return exec_cmd_tokens(_, ["pdf2svg", source_file, target_file, str(page)])
314
315
316 def xcf2png_converter_xcftools(_, source_file, target_file, details):
317 return exec_cmd_tokens(_, ["xcf2png", "-o", target_file, source_file])
318
319
320 def xcf2png_converter_imagemagick(_, source_file, target_file, details):
321 return exec_cmd_tokens(_, ["convert", source_file, "-alpha", "on", "-background", "none", "-layers", "merge", "png:%s" % target_file])
322
323
324 def ps_converter(_, source_file, target_file, details):
325 target_type = details["target_type"]
326 args = ["inkscape", "--without-gui"]
327 if target_type == "png":
328 args += ["--export-png", target_file]
329 width = details.get("width", 0)
330 if width:
331 args += ["--export-width", width]
332 height = details.get("height", 0)
333 if height:
334 args += ["--export-height", height]
335 elif target_type == "svg":
336 args += ["--export-plain-svg", target_file]
337 else:
338 raise ConversionFailed(_("Unsupported output format for postscript converter: %s" % target_type))
339 args += [source_file]
340 return exec_cmd_tokens(_, args)
341
342
343 def macro_FormatConverter(macro, source, source_type, target_type, width=0, height=0, extras=""):
344 """
345 converts input data from urls or attachments to image files using MoinMoins cache and renders from there
346
347 Retrieve the source data, run the conversion function and return the formatted output.
348
349 @param macro: the original macro object containing the current request
350 @param source: use the specified attachment or URL as a source
351 @param source_type: content type of the source data (e.g. "pdf")
352 @param target_type: content type of the target (e.g. "png")
353 @width: the width of the embedded conversion visualization (only for 'do_embed == True')
354 @height: the height of the embedded conversion visualization (only for 'do_embed == True')
355 @extras: a space-separated list of 'key:value' pairs - e.g. used for specifying a page number
356 """
357 _ = macro.request.getText
358 source_type = source_type.lower()
359 target_type = target_type.lower()
360 if not extras:
361 extras = ""
362 try:
363 conversion_func = get_conversion_map()[(source_type, target_type)]
364 except KeyError:
365 return _("The requested conversion is not supported: <%s> to <%s>. Use any of the following instead: %s") % \
366 (source_type, target_type, get_conversion_map().keys())
367 source_content_type = CONTENT_TYPE_MAP[source_type]
368 target_content_type = CONTENT_TYPE_MAP[target_type]
369 cache_key_suffix = " ".join(["width=%d" % width, "height=%d" % height, extras])
370 try:
371 source_file, target_cache_key = _get_source_and_cache(macro, source, source_content_type, cache_key_suffix)
372 except AttachmentSourceNotFound:
373 # display an "upload missing attachment" link
374 result = macro.request.formatter.attachment_link(1, url=source)
375 result += macro.request.formatter.text(_("Upload source file"))
376 result += macro.request.formatter.attachment_link(0)
377 return result
378 except SourceNotFound, err_msg:
379 return _("Could not find source (%s) for conversion: %s") % (source, err_msg)
380 # parse 'extras' string
381 extra_tokens = _parse_extra_options(extras)
382 # run conversion
383 conversion_details = {
384 "width": width,
385 "height": height,
386 "source_type": source_type,
387 "target_type": target_type,
388 }
389 conversion_details.update(extra_tokens)
390 _do_conversion(macro.request, source_file, target_cache_key,
391 conversion_func, target_content_type, conversion_details)
392 # return formatted result string
393 if is_attachment(source):
394 attachment_name = source
395 else:
396 attachment_name = None
397 return _get_conversion_result(macro.request, width, height, target_cache_key, attachment_name=attachment_name)
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.