Attachment 'html2wiki.py'
Download 1 #!/usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
3 """
4 Coconuts example for creating wikipages from html pages
5
6 :copyright: 2010-2013 by ReimarBauer
7 :license: GNU GPL, see COPYING for details.
8
9 2018-03-04 Rudolf Reuter, modified for Typo3 source
10 """
11
12 import re # for regex compile
13
14 from Coconuts import log
15 log.load_config('coconutslogging.conf')
16 logging = log.getLogger(__name__)
17
18 import os
19 import sys
20
21 import urlparse
22 from settings import Config
23 from Coconuts.caching.Caching import Caching
24 #print('\n'.join(sys.path)) # RR for debug
25 from Coconuts.utils import transform_encoding
26 from Coconuts.convertor import Convert
27 from Coconuts.xmlrpc import XmlRpc, SendContent
28 from Coconuts.parser import Parser, RegexError, TidyLibError
29 from xmlrpclib import expat
30
31
32 def _pagename(encoding, prefix, title, hash_digest):
33 return title
34
35
36 class html2wiki(SendContent.SendContent):
37 """
38 Coconuts html2wiki
39
40 example which writes html pages from given urls
41 as wiki markup to a wiki.
42
43 :copyright: 2010 by ReimarBauer
44 :license: GNU GPL, see COPYING for details.
45 """
46 SendContent._pagename = _pagename
47
48 def run(self):
49 """
50 creates the wiki pages
51 """
52 s_urls = self.urls
53 self.cfg.wikipageprefix = ""
54 for link in s_urls:
55 logging.debug(link)
56 urlp = urlparse.urlparse(link)
57 netloc = urlp.netloc
58 pagename = link.split(netloc)[1].lstrip('/').split('.htm')[0]
59 names = pagename.split('/')
60 names = [name.title() for name in names]
61 pagename = '/'.join(names)
62
63 # adopt wiki page name prefix
64 pos = pagename.find("=") # RR
65 pagename = pagename[pos+1:] # RR, extract real pagename
66 pagename = "RudisFlugis" + pagename # RR, add prefix
67 #pagename = "RudisFlugisModell" + pagename # RR, add prefix
68
69 html_page = Caching(self.cfg, link)
70 text = html_page.read()
71 parser = Parser(self.cfg, text)
72 moinmoin = Convert(self.cfg.expatlog_dir)
73 try:
74 # if that is too much broken we don't need to convert at all
75 html_fragment = parser.html_fragment
76 except (RegexError, TidyLibError), err:
77 logging.debug("%s %s" % (link, err))
78 continue
79 #print(html_fragment) # RR for program analysis
80 try:
81 successful, wiki_text = moinmoin.convert_in_moin_markup(parser.title, html_fragment)
82 except (expat.ExpatError, TidyLibError), err:
83 logging.debug("%s %s" % (link, err))
84 continue
85 print(pagename) # RR for debug
86
87 # Add moin wiki page header to body text
88 t1 = '#format wiki\n'
89 t2 = '#language de\n'
90 t3 = '||<tablestyle="float: right; margin: 0px;"style="padding: 0.5em; '
91 t4 = 'border: 0px none; font-size: 100%;"><<TableOfContents>> ||\n'
92 wiki_text = t1 + t2 + t3 + t4 + wiki_text + '\n\n'
93 #print(parser.image_urls) # RR for program analysis
94
95 # get picture file names list
96 lstPics = parser.image_urls
97 print(lstPics) # RR for debug
98
99 # add picture attachment link list to wiki text
100 t1 = '||<tablestyle="float: right;">[[attachment:'
101 t2 = '|{{attachment:'
102 t3 = '|attachment:'
103 t4 = '|width="320"}}]] ||\n'
104 for i in range(len(lstPics)): # RR build attachment refs
105 pic = lstPics[i]
106 wiki_text = wiki_text + t1 + pic + t2 + pic + t3 + pic + t4
107
108 # add wiki footer to wiki text
109 t1 = '=== Kontakt Email ===\n'
110 t2 = 'Geben sie bitte ihre Email Adresse ein, wenn sie eine Antwort erwarten\n\n'
111 t3 = "/!\ '''Die eingegebene Email Adresse wird nicht veroeffentlicht, oder weitergegeben.'''\n"
112 t4 = '<<AddComment>>\n\n'
113 t5 = '=== Liste der Seiten in dieser Kategorie ===\n'
114 t6 = '<<FullSearch(category:CategoryRudisFlugis)>>\n\n'
115 t7 = '----\n'
116 t8 = 'Gehe zurueck zu CategoryRudisFlugis oder StartSeite\n'
117 wiki_text = wiki_text + t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8
118
119 # send wiki text to moin wiki via XMLRPC (Remote Procedure Call)
120 self.xmlrpc.send_page(pagename, wiki_text)
121 #print(parser.image_urls) # RR for program analysis
122 #print(wiki_text) # RR for program analysis
123
124 # prepare list of picture URL's
125 for i in range(len(lstPics)): # build URL
126 lstPics[i] = "http://192.168.17.72/cms/uploads/pics/" + lstPics[i]
127 #print(lstPics) # RR for debug
128
129 # Send pictures to moin wiki as an attachment
130 self.images_send(lstPics, parser.encoding, pagename, "")
131
132 if __name__ == '__main__':
133 logging = log.getLogger(__file__)
134 logging.info('start')
135 cfg = Config()
136 xmlrpc_init = XmlRpc(cfg.wikiurl, cfg.username, cfg.password)
137 cp2wiki = html2wiki(cfg, xmlrpc_init)
138 cp2wiki.run()
139 logging.info('done')
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.