attachment:html2wiki.py of HtmlConverter/Typo3-2Moin

Attachment 'html2wiki.py'

   1 #!/usr/bin/env python
   2 # -*- coding: iso-8859-1 -*-
   3 """
   4     Coconuts example for creating wikipages from html pages
   5 
   6     :copyright: 2010-2013 by ReimarBauer
   7     :license: GNU GPL, see COPYING for details.
   8     
   9     2018-03-04 Rudolf Reuter, modified for Typo3 source
  10 """
  11 
  12 import re  # for regex compile
  13 
  14 from Coconuts import log
  15 log.load_config('coconutslogging.conf')
  16 logging = log.getLogger(__name__)
  17 
  18 import os
  19 import sys
  20 
  21 import urlparse
  22 from settings import Config
  23 from Coconuts.caching.Caching import Caching
  24 #print('\n'.join(sys.path))  # RR for debug
  25 from Coconuts.utils import transform_encoding
  26 from Coconuts.convertor import Convert
  27 from Coconuts.xmlrpc import XmlRpc, SendContent
  28 from Coconuts.parser import Parser, RegexError, TidyLibError
  29 from xmlrpclib import expat
  30 
  31 
  32 def _pagename(encoding, prefix, title, hash_digest):
  33     return title
  34 
  35 
  36 class html2wiki(SendContent.SendContent):
  37     """
  38     Coconuts html2wiki
  39 
  40     example which writes html pages from given urls
  41     as wiki markup to a wiki.
  42 
  43     :copyright: 2010 by ReimarBauer
  44     :license: GNU GPL, see COPYING for details.
  45     """
  46     SendContent._pagename = _pagename
  47 
  48     def run(self):
  49         """
  50         creates the wiki pages
  51         """
  52         s_urls = self.urls
  53         self.cfg.wikipageprefix = ""
  54         for link in s_urls:
  55             logging.debug(link)
  56             urlp = urlparse.urlparse(link)
  57             netloc = urlp.netloc
  58             pagename = link.split(netloc)[1].lstrip('/').split('.htm')[0]
  59             names = pagename.split('/')
  60             names = [name.title() for name in names]
  61             pagename = '/'.join(names)
  62             
  63             # adopt wiki page name prefix
  64             pos = pagename.find("=")                    # RR
  65             pagename = pagename[pos+1:]                 # RR, extract real pagename
  66             pagename = "RudisFlugis" + pagename         # RR, add prefix
  67             #pagename = "RudisFlugisModell" + pagename  # RR, add prefix
  68             
  69             html_page = Caching(self.cfg, link)
  70             text = html_page.read()
  71             parser = Parser(self.cfg, text)
  72             moinmoin = Convert(self.cfg.expatlog_dir)
  73             try:
  74                 # if that is too much broken we don't need to convert at all
  75                 html_fragment = parser.html_fragment
  76             except (RegexError, TidyLibError), err:
  77                 logging.debug("%s %s" % (link, err))
  78                 continue
  79             #print(html_fragment)  # RR for program analysis
  80             try:
  81                 successful, wiki_text = moinmoin.convert_in_moin_markup(parser.title, html_fragment)
  82             except (expat.ExpatError, TidyLibError), err:
  83                 logging.debug("%s %s" % (link, err))
  84                 continue
  85             print(pagename)  # RR for debug
  86             
  87             # Add moin wiki page header to body text
  88             t1 = '#format wiki\n'
  89             t2 = '#language  de\n'
  90             t3 = '||<tablestyle="float: right; margin: 0px;"style="padding: 0.5em; '
  91             t4 = 'border: 0px none; font-size: 100%;"><<TableOfContents>> ||\n'
  92             wiki_text = t1 + t2 + t3 + t4 + wiki_text + '\n\n'
  93             #print(parser.image_urls)  # RR for program analysis
  94             
  95             # get picture file names list
  96             lstPics = parser.image_urls 
  97             print(lstPics)  # RR for debug
  98 
  99             # add picture attachment link list to wiki text
 100             t1 = '||<tablestyle="float: right;">[[attachment:'
 101             t2 = '|{{attachment:'
 102             t3 = '|attachment:'
 103             t4 = '|width="320"}}]] ||\n'
 104             for i in range(len(lstPics)):  # RR build attachment refs
 105                 pic = lstPics[i]
 106                 wiki_text = wiki_text + t1 + pic + t2 + pic + t3 + pic + t4
 107                 
 108             # add wiki footer to wiki text
 109             t1 = '=== Kontakt Email ===\n'
 110             t2 = 'Geben sie bitte ihre Email Adresse ein, wenn sie eine Antwort erwarten\n\n'
 111             t3 = "/!\ '''Die eingegebene Email Adresse wird nicht veroeffentlicht, oder weitergegeben.'''\n"
 112             t4 = '<<AddComment>>\n\n'
 113             t5 = '=== Liste der Seiten in dieser Kategorie ===\n'
 114             t6 = '<<FullSearch(category:CategoryRudisFlugis)>>\n\n'
 115             t7 = '----\n'
 116             t8 = 'Gehe zurueck zu CategoryRudisFlugis oder StartSeite\n'
 117             wiki_text = wiki_text + t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8
 118             
 119             # send wiki text to moin wiki via XMLRPC (Remote Procedure Call)
 120             self.xmlrpc.send_page(pagename, wiki_text)  
 121             #print(parser.image_urls)  # RR for program analysis
 122             #print(wiki_text)          # RR for program analysis
 123             
 124             # prepare list of picture URL's
 125             for i in range(len(lstPics)):  # build URL
 126                 lstPics[i] = "http://192.168.17.72/cms/uploads/pics/" + lstPics[i]
 127             #print(lstPics)  # RR for debug
 128             
 129             # Send pictures to moin wiki as an attachment
 130             self.images_send(lstPics, parser.encoding, pagename, "")
 131 
 132 if __name__ == '__main__':
 133     logging = log.getLogger(__file__)
 134     logging.info('start')
 135     cfg = Config()
 136     xmlrpc_init = XmlRpc(cfg.wikiurl, cfg.username, cfg.password)
 137     cp2wiki = html2wiki(cfg, xmlrpc_init)
 138     cp2wiki.run()
 139     logging.info('done')
Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
You are not allowed to attach a file to this page.
MoinMoin: attachment:html2wiki.py of HtmlConverter/Typo3-2Moin

Attachment 'html2wiki.py'

Attached Files