Attachment 'tikihtml2moinmoin.py'
Download 1 #!/usr/local/bin/python
2
3 """
4 converts tiki html to MoinMoin markup
5 author: Daniela Nicklas <dani@miracle-solutions.de>
6 """
7 import sys
8 from HTML2MoinMoin import HTML2MoinMoin
9 import string
10 import os
11 import codecs
12 import StringIO
13 import re
14 sys.path.append('Path to MoinMoin-Libs')
15 from MoinMoin.wikiutil import isStrictWikiname
16
17 # global variables
18 sourcedir = "pages"
19 targetdir = "text"
20 # Pages that start with key go to Category
21 page2category = {
22 'NameInTiki': 'CategoryNewName',
23 'AnothernameInTiki': 'CategoryAnotherOrSameNewName'}
24
25 # Output Ignore
26 class devnull:
27 def write(self, data):
28 return
29
30 # HTML Parser
31 class TikiHTML2MoinMoin(HTML2MoinMoin):
32 start_tags = {
33 "a" : "[",
34 "b" : "'''",
35 "em" : "''",
36 "tt" : "{{{",
37 "pre" : "\n{{{",
38 "p" : "\n\n",
39 "br" : "\n\n",
40 "h1" : "\n\n= ",
41 "h2" : "\n\n== ",
42 "h3" : "\n\n=== ",
43 "h4" : "\n\n==== ",
44 "h5" : "\n\n===== ",
45 "hr" : "\n----\n",
46 "title" : "",
47 "table" : "\n",
48 "tr" : "",
49 "td" : "||"
50 }
51
52 end_tags = {
53 "a" : ']',
54 "b" : "'''",
55 "em" : "''",
56 "tt" : "}}}",
57 "pre" : "}}}\n",
58 "p" : "",
59 "h1" : " =\n\n",
60 "h2" : " ==\n\n",
61 "h3" : " ===\n\n",
62 "h4" : " ====\n\n",
63 "h5" : " =====\n\n",
64 "table" : "\n",
65 "title" : "",
66 "tr" : "||\n",
67 "dt" : ":: "
68 }
69 def __init__(self):
70 HTML2MoinMoin.__init__(self)
71 self.title = 0
72 self.heading = 0
73 self.head = 0
74 self.linebreaks = 1
75 self.tablecount = 0
76 self.devnull = devnull()
77 self.outputbackup = self.output
78 self.div_mode = []
79 self.a_mode = []
80 self.tikipageurl = 'tiki-index.php?page='
81 self.tikiediturl = 'tiki-editpage.php?page='
82 self.tikicategoryurl = 'tiki-browse_categories.php?parentId='
83 self.tikicategory = {
84 1 : 'NameInTiki',
85 4 : 'AnotherNameIn'
86 }
87 self.linkreplacements = {
88 'HomePage': 'StartSeite',
89 'UserPageYourPage': 'YourName'
90 }
91
92
93
94 def set_ignore(self):
95 if self.output != self.devnull:
96 self.outputbackup = self.output
97 self.output = self.devnull
98
99 def unset_ignore(self):
100 self.output = self.outputbackup
101
102
103
104 def do_html_start(self,attrs,tag):
105 self.set_ignore()
106
107 def do_table_end(self,tag):
108 self.tablecount = self.tablecount + 1
109 if self.tablecount == 1:
110 self.unset_ignore()
111
112 def do_h1_start(self,attrs,tag):
113 self.heading = 1
114 self.write(self.start_tags[tag])
115
116 def do_h1_end(self,tag):
117 self.heading = 0
118 self.write(self.end_tags[tag])
119
120 def do_a_start(self,attrs,tag):
121 if self.heading:
122 self.a_mode.append('heading')
123 else:
124 href = ''
125 at_class = ''
126 for att in attrs:
127 if (att[0] == 'href'):
128 href= att[1]
129 if (att[0] == 'class'):
130 at_class = att[1]
131 if at_class == 'wikicache':
132 self.set_ignore()
133 self.a_mode.append('cache')
134 elif href.find(self.tikipageurl) != -1:
135 href = href[:href.find('&')]
136 href = href.replace(self.tikipageurl,'')
137 for key, value in self.linkreplacements.items():
138 if href == key:
139 href = value
140 exit
141 self.write(self.start_tags[tag]+'wiki:'+href+' ')
142 self.a_mode.append(self.end_tags[tag])
143 elif href.find(self.tikiediturl) != -1:
144 self.write('- FixMe/EditLink -')
145 self.set_ignore()
146 self.a_mode.append('fixme')
147 elif href.find(self.tikicategoryurl) != -1:
148 href = href.replace(self.tikicategoryurl, '')
149 number = int(href[:href.find('&')])
150 self.write(self.start_tags[tag]+'wiki:'+self.tikicategory.get(number,'CategoryMissmatch')+' ')
151 self.a_mode.append(self.end_tags[tag])
152 if not self.tikicategory.has_key(number):
153 print "CategoryMissmatch: %s" % number
154 elif href != '':
155 self.write(self.start_tags[tag])
156 self.write(href + " ")
157 self.a_mode.append(self.end_tags[tag])
158
159
160 def do_a_end(self,tag):
161 mode = self.a_mode.pop()
162 if mode == 'fixme' or mode == 'cache':
163 self.unset_ignore()
164 elif not self.heading:
165 self.write(mode)
166
167 def do_div_start(self,attrs,tag):
168 for att in attrs:
169 if att == ('class', 'titlebar'):
170 self.heading = 1
171 self.write(self.start_tags["h3"])
172 self.div_mode.append(self.end_tags["h3"])
173 else:
174 self.div_mode.append("")
175
176 def do_div_end(self,tag):
177 if self.heading:
178 self.heading = 0
179 self.write(self.div_mode.pop())
180
181 def do_p_start(self,attrs,tag):
182 for att in attrs:
183 if att == ('class', 'editdate'):
184 self.set_ignore()
185
186 def handle_data(self, data):
187 data = data.replace("\r", "")
188 if self.preformatted:
189 self.write(data)
190 else:
191 self.write(data.replace("\n", " "))
192
193
194
195 # Main flow
196
197 def main():
198 # sys.setdefaultencoding('iso-8859-1')
199 # look for source directory
200 if not os.access(sourcedir, os.F_OK):
201 print "%s is not accessable"%sourcedir
202 return ''
203 else:
204 sourcelist = os.listdir(sourcedir)
205
206 # create target directory (if necessary)
207 if not os.access(targetdir, os.F_OK):
208 os.mkdir(targetdir)
209
210 # first pass: transform htmp
211 for sourcefile in sourcelist:
212 print sourcefile,
213 # open sourcefile
214 sf = codecs.open(sourcedir+'/'+sourcefile,'r','UTF-8')
215 htmldata = sf.read()
216 htmldata = htmldata.encode('iso-8859-1','replace')
217 sf.close()
218 print ' .',
219 # parse it and write output to target file (tf)
220 p = TikiHTML2MoinMoin()
221 tf = StringIO.StringIO()
222 p.output = tf
223 p.feed(htmldata)
224 p.close()
225
226 print '.',
227
228 # second pass: delete white spaces
229 wikidata = tf.getvalue()
230 wikidata=wikidata.replace('\n\n\n','\n')
231
232 # open targetfile
233 tf = open(targetdir+'/'+sourcefile[:-5], 'w')
234
235 # third pass: purify wiki-links and write to file
236 wikinamepattern = r'\[wiki\:((?P<wikilink>.*?) (?P<label>.*?))\]'
237 matches = re.finditer(wikinamepattern, wikidata)
238 lastend = 0
239 for match in matches:
240 tf.write(wikidata[lastend:match.start()])
241 mdict = match.groupdict()
242 wikilink = mdict['wikilink']
243 label = mdict['label']
244 if wikilink == label:
245 if isStrictWikiname(wikilink):
246 tf.write(wikilink)
247 else:
248 tf.write('["%s"]'%wikilink)
249 else:
250 tf.write(match.group())
251 lastend = match.end()
252 tf.write(wikidata[lastend:])
253
254 # put pages in categorys
255 for key, value in page2category.items():
256 if sourcefile.find(key) == 0:
257 if wikidata.find(value) == -1:
258 tf.write('\n'+value)
259
260 # close targetfile
261 tf.close
262 print '. ->' + sourcefile[:-5]
263
264 main()
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.