Attachment 'FormatConverter-1.0.py'

Download

   1 # -*- coding: iso-8859-1 -*-
   2 """
   3     MoinMoin - convert various input formats into output formats
   4 
   5     <<FormatConverter(ATTACHMENT_OR_URL, INPUT_FILETYPE, OUTPUT_FILETYPE, WIDTH=0, HEIGHT=0, EXTRAS="")>>
   6 
   7     Features:
   8     * fetches source file from urls or page attachments
   9     * converts the input file via a local conversion tool (dia, inkscape, ...) to the output format
  10     * checks timestamp of content for updating cached image
  11     * uses cache for input and output file
  12 
  13     Requires the external conversion program:
  14         pdf -> svg: pdf2svg
  15         pdf -> png: pstoimg
  16         dia -> (svg|png): dia
  17         dot -> (svg|png): dot
  18         eps -> (svg|png): inkscape
  19         xcf -> (svg|png): xcftools
  20 
  21     Most of the code is based on the pdf2img macro created by Reimar Bauer.
  22 
  23     @copyright: 2011 MoinMoin:ReimarBauer
  24     @copyright: 2014 MoinMoin:LarsKruse
  25     @license: GNU GPL v3 or later
  26 
  27     ----
  28 
  29     Changelog:
  30 
  31     v1.0 - 2014/09/22
  32       * initial release
  33       * the following conversions are supported:
  34        * pdf|dia|dot|eps -> svg|png
  35        * xcf -> png
  36 
  37     ----
  38 
  39     Usage:
  40 
  41         <<FormatConverter(diagram.dia, dia, svg)>>
  42 
  43         <<FormatConverter(graphviz.dot, dot, png, width=300)>>
  44 
  45         <<FormatConverter(book.pdf, pdf, svg, extras=page:1)>>
  46 
  47         <<FormatConverter(http://example.org/article.pdf, pdf, png, height=1024)>>
  48 
  49 """
  50 
  51 from MoinMoin import log
  52 logging = log.getLogger(__name__)
  53 
  54 import os
  55 import urllib2
  56 import httplib
  57 import subprocess
  58 from urlparse import urlparse
  59 
  60 from MoinMoin import caching, config
  61 from MoinMoin.util.SubProcess import Popen
  62 from MoinMoin.action import AttachFile, cache
  63 
  64 CACHE_ARENA = 'sendcache'
  65 CACHE_SCOPE = 'wiki'
  66 
  67 # list all input and output document types here
  68 CONTENT_TYPE_MAP = {
  69     "svg": 'image/svg+xml',
  70     "png": 'image/png',
  71     "dia": 'application/dia',
  72     "dot": 'application/graphviz',
  73     "eps": 'application/postscript',
  74     "pdf": 'application/pdf',
  75     "xcf": 'application/x-pdf',
  76 }
  77 
  78 
  79 # map pairs of (input, output) to conversion functions
  80 # we use a function instead of a dict for lazy evaluation
  81 def get_conversion_map():
  82     return {
  83         ("dot", "svg"): dot_converter,
  84         ("dot", "png"): dot_converter,
  85         ("dia", "svg"): dia_converter,
  86         ("dia", "png"): dia_converter,
  87         ("pdf", "png"): pdf2png_converter,
  88         ("pdf", "svg"): pdf2svg_converter,
  89         #("xcf", "png"): xcf2png_converter_imagemagick,
  90         ("xcf", "png"): xcf2png_converter_xcftools,
  91         ("eps", "svg"): ps_converter,
  92         ("eps", "png"): ps_converter,
  93         ("ps", "svg"): ps_converter,
  94         ("ps", "png"): ps_converter,
  95     }
  96 
  97 
  98 class SourceNotFound(KeyError): pass
  99 class AttachmentSourceNotFound(SourceNotFound): pass
 100 class ConversionFailed(RuntimeError): pass
 101 
 102 
 103 def exec_cmd_tokens(_, args):
 104     try:
 105         proc = Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 106         stdout, stderr = proc.communicate()
 107         return stdout, stderr, proc.returncode
 108     except OSError:
 109         return "", _("Failed to run '%s'") % args[0], -1
 110 
 111 
 112 def url_exists(url):
 113     try:
 114         item = urllib2.urlopen(url)
 115         content = item.read(size=1)
 116         item.close()
 117         return len(content) > 0
 118     except (IOError, urllib2.HTTPError, ValueError):
 119         return False
 120 
 121 
 122 def last_modified(request=None, pagename=None, attachment=None, url=""):
 123     if not url:
 124         pdf_file = os.path.join(AttachFile.getAttachDir(request, pagename), attachment).encode(config.charset)
 125         filestat = os.stat(pdf_file)
 126         return request.user.getFormattedDateTime(filestat.st_mtime)
 127 
 128     parse_result = urlparse(url)
 129     conn = httplib.HTTPConnection(parse_result.netloc)
 130     conn.request("GET", parse_result.path)
 131     response = conn.getresponse()
 132     return response.getheader('last-modified')
 133 
 134 
 135 def get_cache_item_filename(request, cache_key):
 136     """
 137     prepares the cache file and returns its file name
 138     """
 139     return get_cache_data(request, cache_key)._fname
 140 
 141 
 142 def get_cache_data(request, cache_key):
 143     """
 144     get the file location of a cache item
 145     """
 146     return caching.CacheEntry(request, CACHE_ARENA, "%s.data" % cache_key, CACHE_SCOPE, do_locking=False)
 147 
 148 
 149 def fetch_source_item(request, url, content_type, timestamp):
 150     """
 151     fetches the source item and stores it as cache file
 152     """
 153     cache_key = cache.key(request, itemname="multi_convert", content=(url + timestamp))
 154     try:
 155         item = urllib2.urlopen(url)
 156     except (IOError, urllib2.HTTPError, ValueError), err:
 157         logging.info(url)
 158         logging.debug("%s: %s" % (url, err))
 159         return None
 160     else:
 161         cache.put(request, cache_key, item.read(), content_type=content_type)
 162         item.close()
 163         return get_cache_item_filename(request, cache_key)
 164 
 165 
 166 def _parse_extra_options(text):
 167     result = {}
 168     for token in text.split():
 169         if ":" in token:
 170             key, value = token.split(":", 1)
 171         else:
 172             key, value = token, True
 173         result[key] = value
 174     return result
 175 
 176 
 177 def is_attachment(source):
 178     return not "://" in source
 179 
 180 
 181 def _get_source_and_cache(macro, source, source_content_type, cache_key_suffix):
 182     """
 183     Generate the cache key to be used for storing data and put the source data
 184     (specified as an attachment or remote URL) into the cache.
 185 
 186     @param macro: the original macro object containing the current request
 187     @param source: use the specified attachment or URL as a source
 188     @param source_content_type: content type of the source data (e.g. "application/pdf")
 189     @param cache_key_suffix: a unique unicode string to be used for this specific conversion
 190            all relevant options need to be included in this string (e.g. output format, size, ...)
 191     """
 192     request = macro.request
 193     pagename = request.page.page_name
 194 
 195     # determine source URL and cache key
 196     if is_attachment(source):
 197         page_name, filename = AttachFile.absoluteName(source, pagename)
 198         if not AttachFile.exists(request, page_name, filename):
 199             raise AttachmentSourceNotFound("attachment: %s does not exists" % source)
 200         identifier = last_modified(request, page_name, filename)
 201         source_url = AttachFile.getAttachUrl(page_name, filename, request)
 202         source_file = AttachFile.getFilename(request, page_name, filename).encode(config.charset)
 203     else:
 204         if not url_exists(source):
 205             raise SourceNotFound("url: %s does not exists" % source)
 206         identifier = last_modified(url=source)
 207         source_url = source
 208         # store the source data in a local download cache
 209         source_file = fetch_source_item(request, source, source_content_type, identifier)
 210     logging.debug("%s: %s" % (source_url, identifier))
 211     target_cache_key = cache.key(request, itemname=pagename, content="%s.%s.%s.%s" % (macro.name, source_url, identifier, cache_key_suffix))
 212 
 213     return source_file, target_cache_key
 214 
 215 
 216 def _do_conversion(request, source_file, target_cache_key, conversion_func,
 217                    target_content_type, conversion_details):
 218     """
 219     Run the given conversion function and store the result in the cache.
 220 
 221     @param request: a moinmoin request instance
 222     @source_file: local name of the input file
 223     @cache_key: the key to be used for retrieving the cache content that should be displayed
 224     @conversion_func: a conversion function that expects four parameters: request.getText, input_filename, output_filename, details
 225     @param target_content_type: content type of the target (e.g. "image/png")
 226     """
 227     _ = request.getText
 228     if cache.exists(request, target_cache_key):
 229         # nothing to be done
 230         return
 231     target_file = get_cache_item_filename(request, target_cache_key)
 232     output_msg, error_msg, returncode = conversion_func(_, source_file, target_file, conversion_details)
 233     if returncode != 0:
 234         raise ConversionFailed(_("Conversion failed: %s") % error_msg)
 235     # explicitly transfer the file to the cache (just to be sure that moin's cache is updated)
 236     target_handle = open(target_file, 'rb')
 237     cache.put(request, target_cache_key, target_handle.read(),
 238               content_type=target_content_type)
 239     target_handle.close()
 240 
 241 
 242 def _get_conversion_result(request, width, height, cache_key, attachment_name=None):
 243     """
 244     Return a formatted representation of a cache item via a selected style.
 245 
 246     @param request: a moinmoin request instance
 247     @param target_content_type: content type of the target (e.g. "image/png")
 248     @width: the width of the embedded conversion visualization (only for 'do_embed == True')
 249     @height: the height of the embedded conversion visualization (only for 'do_embed == True')
 250     @cache_key: the key to be used for retrieving the cache content that should be displayed
 251     """
 252     if cache.exists(request, cache_key):
 253         formatter = request.formatter
 254         result = ""
 255         args = {}
 256         if width:
 257             args["width"] = width
 258         if height:
 259             args["height"] = height
 260         if attachment_name:
 261             result += formatter.attachment_link(1, url=attachment_name)
 262         result += formatter.image(src=cache.url(request, cache_key), alt="", **args)
 263         if attachment_name:
 264             result += formatter.attachment_link(0)
 265         return result
 266     else:
 267         # failed to convert input
 268         return ""
 269 
 270 
 271 def dia_converter(_, source_file, target_file, details):
 272     target_type = details["target_type"]
 273     args = ["dia", "--nosplash", "--export", target_file]
 274     if target_type == "png":
 275         args += ["--filter", "png"]
 276         # target size is only available for png output format
 277         width = details.get("width", 0)
 278         height = details.get("height", 0)
 279         if width or height:
 280             args += ["--size", "%sx%s" % (width or "", height or "")]
 281     elif target_type == "svg":
 282         args += ["--filter", "svg"]
 283     else:
 284         raise ConversionFailed(_("Unsupported output format for dia converter: %s" % target_type))
 285     args += [source_file]
 286     return exec_cmd_tokens(_, args)
 287 
 288 
 289 def dot_converter(_, source_file, target_file, details):
 290     target_type = details["target_type"]
 291     args = ["dot"]
 292     if target_type == "png":
 293         # TODO: implement width/height
 294         args += ["-Tpng"]
 295     elif target_type == "svg":
 296         args += ["-Tsvg"]
 297     else:
 298         raise ConversionFailed(_("Unsupported output format for dot converter: %s" % target_type))
 299     args += ["-o%s" % target_file, source_file]
 300     return exec_cmd_tokens(_, args)
 301 
 302 
 303 def pdf2png_converter(_, source_file, target_file, details):
 304     # TODO: implement width/height
 305     # TODO: handle "page"
 306     return exec_cmd_tokens(_, ["pstoimg", "-quiet", "-crop", "tblr", "-density", 200, "-type", "png", source_file, "-out", target_file])
 307 
 308 
 309 def pdf2svg_converter(_, source_file, target_file, details):
 310     # 'pageno' can be a number or a page specification (e.g. 'iii')
 311     page = details.get("page", 1)
 312     return exec_cmd_tokens(_, ["pdf2svg", source_file, target_file, str(page)])
 313 
 314 
 315 def xcf2png_converter_xcftools(_, source_file, target_file, details):
 316     return exec_cmd_tokens(_, ["xcf2png", "-o", target_file, source_file])
 317 
 318 
 319 def xcf2png_converter_imagemagick(_, source_file, target_file, details):
 320     return exec_cmd_tokens(_, ["convert", source_file, "-alpha", "on", "-background", "none", "-layers", "merge", "png:%s" % target_file])
 321 
 322 
 323 def ps_converter(_, source_file, target_file, details):
 324     target_type = details["target_type"]
 325     args = ["inkscape", "--without-gui"]
 326     if target_type == "png":
 327         args += ["--export-png", target_file]
 328         width = details.get("width", 0)
 329         if width:
 330             args += ["--export-width", width]
 331         height = details.get("height", 0)
 332         if height:
 333             args += ["--export-height", height]
 334     elif target_type == "svg":
 335         args += ["--export-plain-svg", target_file]
 336     else:
 337         raise ConversionFailed(_("Unsupported output format for postscript converter: %s" % target_type))
 338     args += [source_file]
 339     return exec_cmd_tokens(_, args)
 340 
 341 
 342 def macro_FormatConverter(macro, source, source_type, target_type, width=0, height=0, extras=""):
 343     """
 344     converts input data from urls or attachments to image files using MoinMoins cache and renders from there
 345 
 346     Retrieve the source data, run the conversion function and return the formatted output.
 347 
 348     @param macro: the original macro object containing the current request
 349     @param source: use the specified attachment or URL as a source
 350     @param source_type: content type of the source data (e.g. "pdf")
 351     @param target_type: content type of the target (e.g. "png")
 352     @width: the width of the embedded conversion visualization (only for 'do_embed == True')
 353     @height: the height of the embedded conversion visualization (only for 'do_embed == True')
 354     @extras: a space-separated list of 'key:value' pairs - e.g. used for specifying a page number
 355     """
 356     _ = macro.request.getText
 357     source_type = source_type.lower()
 358     target_type = target_type.lower()
 359     if not extras:
 360         extras = ""
 361     try:
 362         conversion_func = get_conversion_map()[(source_type, target_type)]
 363     except KeyError:
 364         return _("The requested conversion is not supported: <%s> to <%s>. Use any of the following instead: %s") % \
 365                 (source_type, target_type, get_conversion_map().keys())
 366     source_content_type = CONTENT_TYPE_MAP[source_type]
 367     target_content_type = CONTENT_TYPE_MAP[target_type]
 368     cache_key_suffix = " ".join(["width=%d" % width, "height=%d" % height, extras])
 369     try:
 370         source_file, target_cache_key = _get_source_and_cache(macro, source, source_content_type, cache_key_suffix)
 371     except AttachmentSourceNotFound:
 372         # display an "upload missing attachment" link
 373         result = macro.request.formatter.attachment_link(1, url=source)
 374         result += macro.request.formatter.text(_("Upload source file"))
 375         result += macro.request.formatter.attachment_link(0)
 376         return result
 377     except SourceNotFound, err_msg:
 378         return _("Could not find source (%s) for conversion: %s") % (source, err_msg)
 379     # parse 'extras' string
 380     extra_tokens = _parse_extra_options(extras)
 381     # run conversion
 382     conversion_details = {
 383         "width": width,
 384         "height": height,
 385         "source_type": source_type,
 386         "target_type": target_type,
 387     }
 388     conversion_details.update(extra_tokens)
 389     _do_conversion(macro.request, source_file, target_cache_key,
 390                    conversion_func, target_content_type, conversion_details)
 391     # return formatted result string
 392     if is_attachment(source):
 393         attachment_name = source
 394     else:
 395         attachment_name = None
 396     return _get_conversion_result(macro.request, width, height, target_cache_key, attachment_name=attachment_name)

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2014-09-22 22:57:50, 14.5 KB) [[attachment:FormatConverter-1.0.py]]
  • [get | view] (2018-11-02 02:20:20, 14.6 KB) [[attachment:FormatConverter-1.1.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.