Attachment 'FormatConverter-1.1.py'

Download

   1 # -*- coding: iso-8859-1 -*-
   2 """
   3     MoinMoin - convert various input formats into output formats
   4 
   5     <<FormatConverter(ATTACHMENT_OR_URL, INPUT_FILETYPE, OUTPUT_FILETYPE, WIDTH=0, HEIGHT=0, EXTRAS="")>>
   6 
   7     Features:
   8     * fetches source file from urls or page attachments
   9     * converts the input file via a local conversion tool (dia, inkscape, ...) to the output format
  10     * checks timestamp of content for updating cached image
  11     * uses cache for input and output file
  12 
  13     Requires the external conversion program:
  14         pdf -> svg: pdf2svg
  15         pdf -> png: pstoimg
  16         dia -> (svg|png): dia
  17         dot -> (svg|png): dot
  18         eps -> (svg|png): inkscape
  19         xcf -> (svg|png): xcftools
  20 
  21     Most of the code is based on the pdf2img macro created by Reimar Bauer.
  22 
  23     @copyright: 2011 MoinMoin:ReimarBauer
  24     @copyright: 2014 MoinMoin:LarsKruse
  25     @license: GNU GPL v3 or later
  26 
  27     ----
  28 
  29     Changelog:
  30 
  31     v1.0 - 2014/09/22
  32       * initial release
  33       * the following conversions are supported:
  34        * pdf|dia|dot|eps -> svg|png
  35        * xcf -> png
  36     v1.1 - 2018/10/29
  37       * fix minor incompability with moinmoin v1.9.9
  38 
  39     ----
  40 
  41     Usage:
  42 
  43         <<FormatConverter(diagram.dia, dia, svg)>>
  44 
  45         <<FormatConverter(graphviz.dot, dot, png, width=300)>>
  46 
  47         <<FormatConverter(book.pdf, pdf, svg, extras=page:1)>>
  48 
  49         <<FormatConverter(http://example.org/article.pdf, pdf, png, height=1024)>>
  50 
  51 """
  52 
  53 from MoinMoin import log
  54 logging = log.getLogger(__name__)
  55 
  56 import os
  57 import urllib2
  58 import httplib
  59 import subprocess
  60 from urlparse import urlparse
  61 
  62 from MoinMoin import caching, config
  63 from MoinMoin.action import AttachFile, cache
  64 
  65 CACHE_ARENA = 'sendcache'
  66 CACHE_SCOPE = 'wiki'
  67 
  68 # list all input and output document types here
  69 CONTENT_TYPE_MAP = {
  70     "svg": 'image/svg+xml',
  71     "png": 'image/png',
  72     "dia": 'application/dia',
  73     "dot": 'application/graphviz',
  74     "eps": 'application/postscript',
  75     "pdf": 'application/pdf',
  76     "xcf": 'application/x-pdf',
  77 }
  78 
  79 
  80 # map pairs of (input, output) to conversion functions
  81 # we use a function instead of a dict for lazy evaluation
  82 def get_conversion_map():
  83     return {
  84         ("dot", "svg"): dot_converter,
  85         ("dot", "png"): dot_converter,
  86         ("dia", "svg"): dia_converter,
  87         ("dia", "png"): dia_converter,
  88         ("pdf", "png"): pdf2png_converter,
  89         ("pdf", "svg"): pdf2svg_converter,
  90         #("xcf", "png"): xcf2png_converter_imagemagick,
  91         ("xcf", "png"): xcf2png_converter_xcftools,
  92         ("eps", "svg"): ps_converter,
  93         ("eps", "png"): ps_converter,
  94         ("ps", "svg"): ps_converter,
  95         ("ps", "png"): ps_converter,
  96     }
  97 
  98 
  99 class SourceNotFound(KeyError): pass
 100 class AttachmentSourceNotFound(SourceNotFound): pass
 101 class ConversionFailed(RuntimeError): pass
 102 
 103 
 104 def exec_cmd_tokens(_, args):
 105     try:
 106         proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 107         stdout, stderr = proc.communicate()
 108         return stdout, stderr, proc.returncode
 109     except OSError:
 110         return "", _("Failed to run '%s'") % args[0], -1
 111 
 112 
 113 def url_exists(url):
 114     try:
 115         item = urllib2.urlopen(url)
 116         content = item.read(size=1)
 117         item.close()
 118         return len(content) > 0
 119     except (IOError, urllib2.HTTPError, ValueError):
 120         return False
 121 
 122 
 123 def last_modified(request=None, pagename=None, attachment=None, url=""):
 124     if not url:
 125         pdf_file = os.path.join(AttachFile.getAttachDir(request, pagename), attachment).encode(config.charset)
 126         filestat = os.stat(pdf_file)
 127         return request.user.getFormattedDateTime(filestat.st_mtime)
 128 
 129     parse_result = urlparse(url)
 130     conn = httplib.HTTPConnection(parse_result.netloc)
 131     conn.request("GET", parse_result.path)
 132     response = conn.getresponse()
 133     return response.getheader('last-modified')
 134 
 135 
 136 def get_cache_item_filename(request, cache_key):
 137     """
 138     prepares the cache file and returns its file name
 139     """
 140     return get_cache_data(request, cache_key)._fname
 141 
 142 
 143 def get_cache_data(request, cache_key):
 144     """
 145     get the file location of a cache item
 146     """
 147     return caching.CacheEntry(request, CACHE_ARENA, "%s.data" % cache_key, CACHE_SCOPE, do_locking=False)
 148 
 149 
 150 def fetch_source_item(request, url, content_type, timestamp):
 151     """
 152     fetches the source item and stores it as cache file
 153     """
 154     cache_key = cache.key(request, itemname="multi_convert", content=(url + timestamp))
 155     try:
 156         item = urllib2.urlopen(url)
 157     except (IOError, urllib2.HTTPError, ValueError), err:
 158         logging.info(url)
 159         logging.debug("%s: %s" % (url, err))
 160         return None
 161     else:
 162         cache.put(request, cache_key, item.read(), content_type=content_type)
 163         item.close()
 164         return get_cache_item_filename(request, cache_key)
 165 
 166 
 167 def _parse_extra_options(text):
 168     result = {}
 169     for token in text.split():
 170         if ":" in token:
 171             key, value = token.split(":", 1)
 172         else:
 173             key, value = token, True
 174         result[key] = value
 175     return result
 176 
 177 
 178 def is_attachment(source):
 179     return not "://" in source
 180 
 181 
 182 def _get_source_and_cache(macro, source, source_content_type, cache_key_suffix):
 183     """
 184     Generate the cache key to be used for storing data and put the source data
 185     (specified as an attachment or remote URL) into the cache.
 186 
 187     @param macro: the original macro object containing the current request
 188     @param source: use the specified attachment or URL as a source
 189     @param source_content_type: content type of the source data (e.g. "application/pdf")
 190     @param cache_key_suffix: a unique unicode string to be used for this specific conversion
 191            all relevant options need to be included in this string (e.g. output format, size, ...)
 192     """
 193     request = macro.request
 194     pagename = request.page.page_name
 195 
 196     # determine source URL and cache key
 197     if is_attachment(source):
 198         page_name, filename = AttachFile.absoluteName(source, pagename)
 199         if not AttachFile.exists(request, page_name, filename):
 200             raise AttachmentSourceNotFound("attachment: %s does not exists" % source)
 201         identifier = last_modified(request, page_name, filename)
 202         source_url = AttachFile.getAttachUrl(page_name, filename, request)
 203         source_file = AttachFile.getFilename(request, page_name, filename).encode(config.charset)
 204     else:
 205         if not url_exists(source):
 206             raise SourceNotFound("url: %s does not exists" % source)
 207         identifier = last_modified(url=source)
 208         source_url = source
 209         # store the source data in a local download cache
 210         source_file = fetch_source_item(request, source, source_content_type, identifier)
 211     logging.debug("%s: %s" % (source_url, identifier))
 212     target_cache_key = cache.key(request, itemname=pagename, content="%s.%s.%s.%s" % (macro.name, source_url, identifier, cache_key_suffix))
 213 
 214     return source_file, target_cache_key
 215 
 216 
 217 def _do_conversion(request, source_file, target_cache_key, conversion_func,
 218                    target_content_type, conversion_details):
 219     """
 220     Run the given conversion function and store the result in the cache.
 221 
 222     @param request: a moinmoin request instance
 223     @source_file: local name of the input file
 224     @cache_key: the key to be used for retrieving the cache content that should be displayed
 225     @conversion_func: a conversion function that expects four parameters: request.getText, input_filename, output_filename, details
 226     @param target_content_type: content type of the target (e.g. "image/png")
 227     """
 228     _ = request.getText
 229     if cache.exists(request, target_cache_key):
 230         # nothing to be done
 231         return
 232     target_file = get_cache_item_filename(request, target_cache_key)
 233     output_msg, error_msg, returncode = conversion_func(_, source_file, target_file, conversion_details)
 234     if returncode != 0:
 235         raise ConversionFailed(_("Conversion failed: %s") % error_msg)
 236     # explicitly transfer the file to the cache (just to be sure that moin's cache is updated)
 237     target_handle = open(target_file, 'rb')
 238     cache.put(request, target_cache_key, target_handle.read(),
 239               content_type=target_content_type)
 240     target_handle.close()
 241 
 242 
 243 def _get_conversion_result(request, width, height, cache_key, attachment_name=None):
 244     """
 245     Return a formatted representation of a cache item via a selected style.
 246 
 247     @param request: a moinmoin request instance
 248     @param target_content_type: content type of the target (e.g. "image/png")
 249     @width: the width of the embedded conversion visualization (only for 'do_embed == True')
 250     @height: the height of the embedded conversion visualization (only for 'do_embed == True')
 251     @cache_key: the key to be used for retrieving the cache content that should be displayed
 252     """
 253     if cache.exists(request, cache_key):
 254         formatter = request.formatter
 255         result = ""
 256         args = {}
 257         if width:
 258             args["width"] = width
 259         if height:
 260             args["height"] = height
 261         if attachment_name:
 262             result += formatter.attachment_link(1, url=attachment_name)
 263         result += formatter.image(src=cache.url(request, cache_key), alt="", **args)
 264         if attachment_name:
 265             result += formatter.attachment_link(0)
 266         return result
 267     else:
 268         # failed to convert input
 269         return ""
 270 
 271 
 272 def dia_converter(_, source_file, target_file, details):
 273     target_type = details["target_type"]
 274     args = ["dia", "--nosplash", "--export", target_file]
 275     if target_type == "png":
 276         args += ["--filter", "png"]
 277         # target size is only available for png output format
 278         width = details.get("width", 0)
 279         height = details.get("height", 0)
 280         if width or height:
 281             args += ["--size", "%sx%s" % (width or "", height or "")]
 282     elif target_type == "svg":
 283         args += ["--filter", "svg"]
 284     else:
 285         raise ConversionFailed(_("Unsupported output format for dia converter: %s" % target_type))
 286     args += [source_file]
 287     return exec_cmd_tokens(_, args)
 288 
 289 
 290 def dot_converter(_, source_file, target_file, details):
 291     target_type = details["target_type"]
 292     args = ["dot"]
 293     if target_type == "png":
 294         # TODO: implement width/height
 295         args += ["-Tpng"]
 296     elif target_type == "svg":
 297         args += ["-Tsvg"]
 298     else:
 299         raise ConversionFailed(_("Unsupported output format for dot converter: %s" % target_type))
 300     args += ["-o%s" % target_file, source_file]
 301     return exec_cmd_tokens(_, args)
 302 
 303 
 304 def pdf2png_converter(_, source_file, target_file, details):
 305     # TODO: implement width/height
 306     # TODO: handle "page"
 307     return exec_cmd_tokens(_, ["pstoimg", "-quiet", "-crop", "tblr", "-density", 200, "-type", "png", source_file, "-out", target_file])
 308 
 309 
 310 def pdf2svg_converter(_, source_file, target_file, details):
 311     # 'pageno' can be a number or a page specification (e.g. 'iii')
 312     page = details.get("page", 1)
 313     return exec_cmd_tokens(_, ["pdf2svg", source_file, target_file, str(page)])
 314 
 315 
 316 def xcf2png_converter_xcftools(_, source_file, target_file, details):
 317     return exec_cmd_tokens(_, ["xcf2png", "-o", target_file, source_file])
 318 
 319 
 320 def xcf2png_converter_imagemagick(_, source_file, target_file, details):
 321     return exec_cmd_tokens(_, ["convert", source_file, "-alpha", "on", "-background", "none", "-layers", "merge", "png:%s" % target_file])
 322 
 323 
 324 def ps_converter(_, source_file, target_file, details):
 325     target_type = details["target_type"]
 326     args = ["inkscape", "--without-gui"]
 327     if target_type == "png":
 328         args += ["--export-png", target_file]
 329         width = details.get("width", 0)
 330         if width:
 331             args += ["--export-width", width]
 332         height = details.get("height", 0)
 333         if height:
 334             args += ["--export-height", height]
 335     elif target_type == "svg":
 336         args += ["--export-plain-svg", target_file]
 337     else:
 338         raise ConversionFailed(_("Unsupported output format for postscript converter: %s" % target_type))
 339     args += [source_file]
 340     return exec_cmd_tokens(_, args)
 341 
 342 
 343 def macro_FormatConverter(macro, source, source_type, target_type, width=0, height=0, extras=""):
 344     """
 345     converts input data from urls or attachments to image files using MoinMoins cache and renders from there
 346 
 347     Retrieve the source data, run the conversion function and return the formatted output.
 348 
 349     @param macro: the original macro object containing the current request
 350     @param source: use the specified attachment or URL as a source
 351     @param source_type: content type of the source data (e.g. "pdf")
 352     @param target_type: content type of the target (e.g. "png")
 353     @width: the width of the embedded conversion visualization (only for 'do_embed == True')
 354     @height: the height of the embedded conversion visualization (only for 'do_embed == True')
 355     @extras: a space-separated list of 'key:value' pairs - e.g. used for specifying a page number
 356     """
 357     _ = macro.request.getText
 358     source_type = source_type.lower()
 359     target_type = target_type.lower()
 360     if not extras:
 361         extras = ""
 362     try:
 363         conversion_func = get_conversion_map()[(source_type, target_type)]
 364     except KeyError:
 365         return _("The requested conversion is not supported: <%s> to <%s>. Use any of the following instead: %s") % \
 366                 (source_type, target_type, get_conversion_map().keys())
 367     source_content_type = CONTENT_TYPE_MAP[source_type]
 368     target_content_type = CONTENT_TYPE_MAP[target_type]
 369     cache_key_suffix = " ".join(["width=%d" % width, "height=%d" % height, extras])
 370     try:
 371         source_file, target_cache_key = _get_source_and_cache(macro, source, source_content_type, cache_key_suffix)
 372     except AttachmentSourceNotFound:
 373         # display an "upload missing attachment" link
 374         result = macro.request.formatter.attachment_link(1, url=source)
 375         result += macro.request.formatter.text(_("Upload source file"))
 376         result += macro.request.formatter.attachment_link(0)
 377         return result
 378     except SourceNotFound, err_msg:
 379         return _("Could not find source (%s) for conversion: %s") % (source, err_msg)
 380     # parse 'extras' string
 381     extra_tokens = _parse_extra_options(extras)
 382     # run conversion
 383     conversion_details = {
 384         "width": width,
 385         "height": height,
 386         "source_type": source_type,
 387         "target_type": target_type,
 388     }
 389     conversion_details.update(extra_tokens)
 390     _do_conversion(macro.request, source_file, target_cache_key,
 391                    conversion_func, target_content_type, conversion_details)
 392     # return formatted result string
 393     if is_attachment(source):
 394         attachment_name = source
 395     else:
 396         attachment_name = None
 397     return _get_conversion_result(macro.request, width, height, target_cache_key, attachment_name=attachment_name)

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2014-09-22 22:57:50, 14.5 KB) [[attachment:FormatConverter-1.0.py]]
  • [get | view] (2018-11-02 02:20:20, 14.6 KB) [[attachment:FormatConverter-1.1.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.