sakana2moin.py
Abstract
Author: DanSandler
Usage: sakana2moin.py <sakana-files-dir> <moin-files-dir>
Convert pages from the Sakana wiki syntax to MoinMoin markup. Requires shell access to the Moin installation, because it actually creates a pile of files which need to be moved into <moin>/data/text.
For Sakana pages with comments, the output MoinMoin page will have the comments appended to its body (with --SomeName at TIME and horz. rules). Special (user) pages have the (user) part stripped.
Limitations/Bugs
Doesn't deal well with UTF-8 pagenames -- you end up with _XX_XX_XX.
- You can use or copy i18n.recode to recode from the original name to utf8
{mailto} macro not detected and converted to MM [[MailTo]].
Totally gives up on fancy (meta) and (topic) pages.
Should use MoinMoin's date/time macros in comment conversion.
Has a bunch of crazy macro stuff in it specific to our Sakana and Moin installations; it won't hurt you if you use it on your stock Sakana installation, but chances are you don't have the {change} or {quote} macros...
About Sakana
Sakana (魚 -- Japanese for "fish") is a Python wikisystem written (as a "hello world", no less!) by Brian Swetland. It has some really interesting features, including:
- a powerful comment system that automatically creates new pages for comments and includes them inline in the master page
- ownership of pages
- ACL and user "classes" (editor, read-only, etc.)
Of course, Brian's not maintaining it anymore, and the installation is pretty fragile (as seems to be the case with all wikis that run their own HTTPd).
Implementation
1 #!/usr/bin/env python
2 # sakana2moin.py
3 # by dan sandler <dan.sandler /at/ gmail DOTCOM>
4 # converts sakana wikipages into moin wikipage files
5 # usage: sakana2moin.py <srcdir> <destdir>
6 # version: 0.1
7
8 import sys, os, re, time
9
10 from MoinMoin import wikiutil
11
12 class Translator:
13 indentstring = ' '
14 indentlength = len(indentstring)
15 def __init__(self,OUT):
16 self._out = OUT
17 self._inlist = 0
18 self._indent = ""
19 self._newline = 1
20 self._preformatted = 0
21 def indent(self):
22 self._indent = self._indent + Translator.indentstring
23 def outdent(self):
24 self._indent = self._indent[Translator.indentlength:]
25 def output(self, s):
26 if self._newline:
27 self._out.write(self._indent)
28 self._newline = 0
29 if re.search('\n', s):
30 self._newline = 1
31 self._out.write(s)
32 def translate(self, IN):
33 self._in = IN
34 while 1:
35 line = self._in.readline()
36 if line == '': break
37 if self._preformatted:
38 line = re.sub(r'\n+$', '', line)
39 else:
40 line = line.strip()
41
42 # full-line macros
43 m = re.search(r'{list}', line)
44 if m:
45 self._inlist += 1
46 self.indent() ; continue
47
48 m = re.search(r'{/list}', line)
49 if m:
50 self._inlist -= 1
51 self.outdent() ; continue
52
53 # full-line macros with possible internal text
54
55 postfix = '\n'
56 m = re.search(r'^@\s*(.*)$', line)
57 if m:
58 line = m.group(1)
59 postfix = ' =\n'
60 self.output("= ")
61
62 m = re.search(r'^-\s*(.*)$', line)
63 if m:
64 line = m.group(1)
65 if self._inlist:
66 self.output("* ")
67 else:
68 # I feel like sakana's - operator is more like a H3 than a
69 # H2 ... is that just me?
70 postfix = " ===\n"
71 self.output("=== ")
72
73 # inline formatters
74
75 chunked = re.split(r'({[^}]*})', line)
76 for chunk in chunked:
77 if len(chunk) > 0 \
78 and chunk[0] == '{' and chunk[-1] == '}':
79 tag = chunk[1:-1]
80
81 if tag in ('code','text'):
82 self.output("{{{\n")
83 self._preformatted += 1
84 continue
85
86 if tag in ('/code','/text'):
87 self.output("}}}\n")
88 self._preformatted -= 1
89 continue
90
91 m = re.search(r'^(part|quote)(.*)$', tag)
92 if m:
93 if m.group(1) == "quote" and m.group(2).startswith("|"):
94 pagename = m.group(2)[1:]
95 if pagename.startswith("(user)"):
96 pagename = pagename[len("(user)"):]
97 self.output("""''Quote from ["%s"]:''"""
98 % pagename)
99 self.indent()
100 self.output("\n") #start the indent
101 continue
102
103 if tag in ('/part','/quote'):
104 self.outdent()
105 continue
106
107 if tag == 'b' or tag == 'strong' \
108 or tag == '/b' or tag == '/strong':
109 self.output("'''")
110 continue
111
112 if tag == 'i' or tag == 'em' \
113 or tag == '/i' or tag == '/em':
114 self.output("''")
115 continue
116
117 if tag == 'tt' or tag == '/tt':
118 self.output("`")
119 if tag == 'tt':
120 self._preformatted += 1
121 else: self._preformatted -= 1
122 continue
123
124 if tag == 'hr':
125 self.output("----")
126 continue
127
128 if tag == 'br':
129 self.output("[[BR]]")
130 continue
131
132 m = re.search(r'^verb\|(.*)$', tag)
133 if m:
134 self.output("[[HTML(%s)]]" % m.group(1))
135 continue
136
137 m = re.search(r'^(eroom|change|bug)\|(.*)$', tag)
138 if m:
139 if m.group(1) == "eroom":
140 func = "ERoom"
141 elif m.group(1) == "change":
142 func = "Change"
143 elif m.group(1) == "bug":
144 func="Bug"
145
146 m2 = re.search(r'^(.*)\|(.*)', m.group(2))
147 if m2:
148 page = m2.group(1)
149 name = ',' + m2.group(2)
150 else:
151 page = m.group(2)
152 name = ''
153 self.output("[[%s(%s%s)]]" % (func, page, name))
154 continue
155
156 m = re.search(r'^page\|(.*)$', tag)
157 if m:
158 m2 = re.search(r'^(.*)\|(.*)', m.group(1))
159 if m2:
160 page = m2.group(1)
161 name = m2.group(2)
162 else:
163 page = m.group(1)
164 name = ''
165 self.output("[:%s:%s]" % (page, name))
166 continue
167
168 m = re.search(r'^link\|(.*)$', tag)
169 if m:
170 m2 = re.search(r'^(.*)\|(.*)', m.group(1))
171 if m2:
172 page = m2.group(1)
173 name = ' ' + m2.group(2)
174 else:
175 page = m.group(1)
176 name = ''
177 self.output("[%s%s]" % (page, name))
178 continue
179
180 self.output('`[Unknown macro: %s]`' % chunk)
181
182 else:
183 # blocks of text
184 # must substitute [OtherPage]
185 pagelink_chunks = re.split(r'(\[[^\]]*\])', chunk)
186 for subchunk in pagelink_chunks:
187 if len(subchunk) > 2 \
188 and subchunk[0] == '[' and subchunk[-1] == ']':
189 # OK, special pages!
190 page = subchunk[1:-1]
191 m = re.match(r'\(user\)(.*)$', page)
192 if m: page = m.group(1) #users are regular pages
193 self.output('["%s"]'% page)
194 else:
195 # finally, text with no tags or ANYTHING.
196 self.output(re.sub(r'\\', '', subchunk))
197
198 self.output(postfix)
199
200 def dehexify(s):
201 name=""
202 for i in range(0,len(s),2):
203 name += ("%c" % int(s[i:i+2], 16))
204 return name
205
206 def hexify(name):
207 s=''
208 for c in name:
209 s += ("%x" % ord(c))
210 return s
211
212 class Snip:
213 def __init__(self, srcdir, name):
214 self.srcdir = srcdir
215 self.name = name
216 filepath = os.path.join(srcdir, hexify(name))
217 self.info = eval(open(filepath+':meta').read())
218 if not self.name == self.info["name"]:
219 raise "names not consistent: '%s', '%s'" \
220 % (self.name, self.info['name'])
221 self.datafile = filepath+':data'
222
223 def appendToMoinStream(self, outfile):
224 t = Translator(outfile)
225 text = open(self.datafile)
226 t.translate(text)
227
228 # now, append any notes
229
230 try:
231 emitted_hdr = 0
232 for note in self.info["notes"]:
233 noteSnip = Snip(self.srcdir, note)
234 print " note: %s <%s>" % (self.name, self.datafile)
235 if not emitted_hdr:
236 outfile.write("== Comments ==\n")
237 emitted_hdr = 1
238 else:
239 outfile.write("----\n")
240 noteSnip.appendToMoinStream(outfile)
241 outfile.write("""'' -- ["%s"] at %s''\n""" % (
242 noteSnip.info["created_by"][len("(user)"):],
243 time.strftime("%X %x",
244 time.localtime(noteSnip.info['created_at']))
245 ))
246 except KeyError: pass
247
248 def toMoin(self, destdir):
249 moin_name = self.name
250 if moin_name.startswith("(user)"):
251 moin_name = moin_name[len("(user)"):]
252
253 # MoinMoin has its own bizarre ideas about how to encode file names
254 moin_name = wikiutil.quoteFilename(moin_name)
255
256 print "page: %s <%s>" % (self.name, self.datafile)
257
258 self.appendToMoinStream(open(os.path.join(destdir, moin_name),'w'))
259
260 if __name__ == '__main__':
261 if len(sys.argv) < 3:
262 print "usage: sakana2moin.py <srcdir> <destdir>"
263 sys.exit(1)
264
265 srcdir = sys.argv[1]
266 destdir = sys.argv[2]
267
268 srcfiles = os.listdir(srcdir)
269 for fn in srcfiles:
270 if fn.endswith(":data"):
271 try:
272 #fp = open(os.path.join(srcdir,fn[:-5]+':meta'))
273 #text = fp.read()
274 #exec("info = " + text) # eww! thanks, swetland
275 #pagename = info["name"]
276
277 fn = fn[:-5]
278 pagename = dehexify(fn)
279
280 if pagename.startswith("(note)"):
281 # it will be processed with its parent page
282 continue
283 elif pagename.startswith("(meta)") \
284 or pagename.startswith("(topic)"):
285 print "warning: skipping '%s' (can't handle meta/topic snips)" % pagename
286 continue
287
288 Snip(srcdir, pagename).toMoin(destdir)
289
290 finally:
291 pass
292
293 # vim: ft=python ts=4 sts=4 sw=4 expandtab: