Attachment 'phpwiki2moinmoin135.py'
Download 1 #! /usr/bin/env python
2
3 """
4 Copyright (C) 2004 The Anarcat <anarcat@anarcat.ath.cx>
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20 See also http://www.fsf.org
21
22 ------------------------------------------------------------------------
23 PhpWikiMoinMoinConverter
24
25 = Usage =
26
27 Call this script directly from the wiki directory. You must have write
28 access to ./data and subdirectories to create pages. You will need to
29 chagne the CONFIGURATION DIRECTIVES, below, to have this script
30 connect properly to the MySQL database.
31
32 = Special considerations =
33
34 This script will happily destroy existing wikis if it feels like it,
35 so backups are of course advised before performing a phptomy. Note
36 that this script attempts to use saveText() to create the new pages,
37 and will not save over existing pages (see editExistingPages, below),
38 so it should still be pretty safe to run. But, again, backup or die.
39
40 = Limitations =
41
42 This script is also crucially incomplete, and lacks several phpwiki
43 features. Some are due to inherent limitations of MoinMoin, others are
44 due to the inherent ambiguity of the phpwiki syntax. In particular,
45 links within headers are translated but will not be parsed by
46 MoinMoin. Also, bold, italic, definition lists, <code> tags and newstyle
47 tables are not converted. See MoinMoin:PhpWikiMoinMoinConverter for
48 details and updates.
49
50 = Dependencies =
51
52 This script needs MySQL access, so the python-mysqldb (debian) package
53 (or equivalent) should be installed.
54 """
55
56 # CONFIGURATION DIRECTIVES
57 #
58 # Those are the directives that allows this script to access the MySQL
59 # database containing the phpwiki
60 host = "localhost"
61 db = "wiki"
62 user = "wiki"
63 passwd = "wiki"
64 #
65 # This is a part of an SQL request (part of the WHERE, actually) that
66 # allows you to select which pages get moved over
67 #
68 # do only those pages:
69 pagename = ""
70 # Example:
71 #pagename = "pagename='ParseTest' AND"
72 #
73 # the path to the moinmoin wiki, leave empty if you know what you're
74 # doing
75 wikipath = '/home/moin/share/mammothwiki'
76
77 # by default, we do not edit existing wiki pages, to avoid conflict
78 #
79 # this will override this behavior and allow the edition of those
80 # pages. normally, a new revision should be created, so this is pretty
81 # safe.
82 editExistingPages = 0
83
84 #
85 # END OF CONFIGURATION DIRECTIVES
86 #
87 # XXX: the interactive way will be something like this:
88 #
89 # print "I will need the host/database/username/password of the phpwiki"
90 # print "Host:",
91 # host = raw_input()
92 # print "Database:",
93 # db = raw_input()
94 # print "Username:",
95 # user = raw_input()
96 # print "Password:",
97 # passwd = getpass()
98 # print "Pagename:"
99 # pagename = raw_input()
100 #
101 # But right now this script DOES NOT COMPLETELY WORK, so it often has
102 # to be ran and reran.
103
104 import MySQLdb
105 #import sqlite
106 import re
107 import os
108 import sys
109
110 # If You haven't installed MoinMoin as root,
111 # insert the path where the MoinMoin directory is here
112 sys.path.append('/home/moin/lib/python2.4/site-packages')
113
114 if wikipath:
115 sys.path.append(wikipath)
116
117 from MoinMoin.PageEditor import PageEditor
118 from MoinMoin.request import RequestCLI
119
120 # the block parser deals with the whole text to be converted
121 #
122 # it will call the line parser for each line in the text
123 #
124 # markup is just passed along to lineparser
125 def blockparser(text, markup):
126 result = []
127 pre = 0
128 for line in text.split("\n"):
129 # pre mode is where no parsing takes place
130 #
131 # XXX: we actually treat <pre> and <verbatim> the same
132 # here, but we should not: they are different
133 # constructs with different semantics in phpwiki.
134 #
135 # <verbatim> is the direct equivalent of Moin's {{{ }}}
136 #
137 # <pre> is almost that with the difference that it
138 # allows linking
139 if pre:
140 # look for the end of the pre tag
141 match = re.compile('^(.*?)[ \t\r\f\v]*(?<!~)</'+pre+'>(.*)$').search(line)
142 if match:
143 # don't add the groups as lines if they are empty
144 if match.group(1) != '':
145 result += [match.group(1)]
146 result += ['}}}']
147 if match.group(2) != '':
148 result += [lineparser(match.group(2),markup)]
149 pre = 0
150 else:
151 # don't parse pre data
152 result += [line]
153 else:
154 # look for a pre tag
155 match = re.compile('^(.*)(?<!~)<(verbatim|pre)>[ \t\r\f\v]*(.*?)$').search(line)
156 if match:
157 # found a starting pre tag
158 #
159 # remember it, parse what's before but
160 # not what's after
161 pre = match.group(2)
162 if match.group(1) != '':
163 result += [lineparser(match.group(1),markup)]
164 result += ['{{{']
165 if match.group(3) != '':
166 result += [match.group(3)]
167
168 else:
169 # "common case": normal line parsing
170 result += [lineparser(line,markup)]
171
172 text = "\n".join(result)
173 return text
174
175 # the line parser deals with text as handed to it by blockparser
176 #
177 # the blockparser should send the text line per line to the line
178 # parser
179 #
180 # markup is 1 (old style) or 2 (new style)
181 def lineparser(text, markup):
182 # headlines
183 p=re.compile('^!!!(.*)$',re.MULTILINE)
184 text = p.sub(r'= \1 =',text)
185 p=re.compile('^!!(.*)$',re.MULTILINE)
186 text = p.sub(r'== \1 ==',text)
187 p=re.compile('^!(.*)$',re.MULTILINE)
188 text = p.sub(r'=== \1 ===',text)
189
190 # pictures
191 p=re.compile('^\s*\[(http:.*(png|jpg|gif))\]',re.MULTILINE)
192 text = p.sub(r'\n\1',text)
193
194 # links
195 # the links like [http://]
196 p=re.compile('(?<!~)\[(http|https|ftp|nntp|news|mailto|telnet|file)(://.*?)\]',re.MULTILINE)
197 text = p.sub(r'\1\2',text)
198
199 # the [links like this]
200 p=re.compile('(?<!~|#)\[([^]\|]*)\]',re.MULTILINE)
201 text = p.sub(r'["\1"]',text)
202
203 # links like [foo | http://...]
204 q=re.compile('(?<!~|#)\[([^]#]*?)\s*\|\s*([^]#\s]+?://[^]\s]+?)\]',re.MULTILINE)
205 text = q.sub(r'[\2 \1]',text)
206
207 # [fooo | bar]
208 p=re.compile('(?<!~|#)\[([^]]*?)\s*\|\s*([^]\s]+?)\]',re.MULTILINE)
209 text = p.sub(r'[:\2:\1]',text)
210
211 # XXX: the following constructs are broken. I don't know how
212 # to express that in Moin
213 # [OtherPage#foo] [named|OtherPage#foo]
214
215 # anchors
216 # #[foo] => [[Anchor(foo)]]foo
217 # #[|foo] => [[Anchor(foo)]]
218 # #[howdy|foo] => [[Anchor(foo)]]howdy
219 #
220 # rest should just go along
221 p=re.compile('#\[([^]|]*)\]', re.MULTILINE)
222 text = p.sub(r'[[Anchor(\1)]]\1', text)
223 p=re.compile('#\[\|+([^]\|]*)\]', re.MULTILINE)
224 text = p.sub(r'[[Anchor(\1)]]', text)
225 p=re.compile('#\[([^]\|]*)\|+([^]\|]*)\]', re.MULTILINE)
226 text = p.sub(r'[[Anchor(\2)]]\1', text)
227
228 # indented text
229 # this might work for old style
230 if markup == 1:
231 p=re.compile('^ (.*)$')
232 text = p.sub(r'{{{\n\1\n}}}',text)
233
234 # lists (regexp taken from phpwiki/lib/BlockParser.php:1.37)
235 p=re.compile('^\ {0,4}\
236 (?: \+\
237 | -(?!-)\
238 | [o](?=\ )\
239 | [*] (?!(?=\S)[^*]*(?<=\S)[*](?:\\s|[-)}>"\'\\/:.,;!?_*=]) )\
240 )\ *(?=\S)',re.MULTILINE|re.VERBOSE)
241 text = p.sub(r' * ',text)
242 p=re.compile(' {0,4}(?:\\# (?!\[.*\])) *(?=\S)',re.MULTILINE)
243 text = p.sub(r' 1. ',text)
244
245 if markup == 1:
246 # bold (old style)
247 p=re.compile('__(\w*)',re.MULTILINE)
248 text = p.sub(r"'''\1",text)
249 p=re.compile('(\w*)__',re.MULTILINE)
250 text = p.sub(r"\1'''",text)
251 # emphasis is the same
252 else:
253 # XXX: this doesn't do anything yet
254 #
255 # translated from getStartRegexp() in
256 # phpwiki/lib/InlineParser.php:418
257 i = "_ (?! _)";
258 b = "\\* (?! \\*)";
259 tt = "= (?! =)";
260
261 # any of the three.
262 any = "(?: " + i + "|" + b + "|" + tt + ")";
263
264 # Any of [_*=] is okay if preceded by space or one of [-"'/:]
265 # _ or * is okay after = as long as not immediately followed by =
266 # any delimiter okay after an opening brace ( [{<(] )
267 # as long as it's not immediately followed by the matching closing
268 # brace.
269 start = "(?:" + \
270 "(?<= \\s|^|[-\"'\\/:]) " + any + "|" + \
271 "(?<= =) (?: " + i + "|" + b + ") (?! =)|" + \
272 "(?<= _) (?: " + b + "|" + tt + ") (?! _)|" + \
273 "(?<= \\*) (?: " + i + "|" + tt + ") (?! \\*)|" + \
274 "(?<= { ) " + any + " (?! } )|" + \
275 "(?<= < ) " + any + " (?! > )|" + \
276 "(?<= \\( ) " + any + " (?! \\) )" + \
277 ")"
278
279 # Any of the above must be immediately followed by non-whitespace.
280 start_regexp = start + "(?= \S)";
281
282
283 # PLUGINS
284
285 # calendar plugin
286 p=re.compile('<\?plugin Calendar month=(\d*) year=(\d*)\?>',re.MULTILINE)
287 text = p.sub(r'[[MonthCalendar(,\2,\1)]]',text)
288 p=re.compile('<\?plugin Calendar\?>',re.MULTILINE)
289 text = p.sub(r'[[MonthCalendar]]',text)
290
291 # BackLinks
292 p=re.compile('<\?plugin\s+BackLinks.*?\?>', re.MULTILINE)
293 text = p.sub(r"[[FullSearch()]]",text)
294
295 # FullSearch
296 p=re.compile('<\?plugin\s+FullTextSearch.*?(s=.*?)?\?>', re.MULTILINE)
297 text = p.sub(r'[[FullSearch()]]',text)
298
299 # RecentChanges
300 p=re.compile('<\?plugin\s+RecentChanges.*?\?>', re.MULTILINE)
301 text = p.sub(r'[[RecentChanges]]',text)
302
303 # tables (old style)
304 p=re.compile('^(\|.*)$',re.MULTILINE)
305 text = p.sub(r'\1|',text)
306 p=re.compile('\|',re.MULTILINE)
307 text = p.sub(r'||',text)
308 p=re.compile('\|\|<',re.MULTILINE)
309 text = p.sub(r'||<(>',text)
310 p=re.compile('\|\|>',re.MULTILINE)
311 text = p.sub(r'||<)>',text)
312
313 if markup == 2:
314 # moinmoin tables are on one line
315 p=re.compile('\|\|\s*\n',re.MULTILINE)
316 text = p.sub(r'||',text)
317
318 # mailto
319 p=re.compile('mailto:',re.MULTILINE)
320 text = p.sub(r'',text)
321
322 # line breaks
323 p=re.compile('(?<!~)%%%',re.MULTILINE)
324 text = p.sub(r'[[BR]]',text)
325
326 # unescape
327 # This must stay the last filter or things will break real bad
328 p=re.compile('~(~?)',re.MULTILINE)
329 text = p.sub(r'\1',text)
330
331 return text
332
333 # "secure" password prompting
334 def getpass():
335 os.system("stty -echo")
336 passwd = ''
337 try:
338 passwd = raw_input()
339 except KeyboardInterrupt:
340 os.system("stty echo")
341 raise
342
343 os.system("stty echo")
344 return passwd
345
346 # main loop.
347
348 # connect to the database and fetch phpwiki pages
349 # To use SQLite instead of Mysql, comment out the
350 # MySQL connect line below and use this one instead
351
352 #db = sqlite.connect(
353 # '/var/lib/phpwiki/phpwiki_pagedb.db'
354 # )
355
356 db = MySQLdb.connect(
357 host=host,
358 db=db,
359 user=user,
360 passwd=passwd
361 )
362
363
364 cursor = db.cursor();
365
366 stmt="SELECT pagename,content,versiondata FROM page,recent,version WHERE " + pagename + \
367 " page.id=version.id AND version.id=recent.id AND version=latestversion" + \
368 " ORDER BY pagename"
369 cursor.execute(stmt)
370
371 # loop over the matching phpwiki pages
372 result = cursor.fetchall()
373 for pagename,content,versiondata in result:
374 utf8pagename = unicode(pagename, 'latin-1')
375 request = RequestCLI(utf8pagename)
376 page = PageEditor(request, utf8pagename)
377 print pagename,
378 # overwriting pages if selecting only some
379 if not editExistingPages and page.exists():
380 print " already exists, skipping"
381 continue
382
383 # find out in the serialized field what markup type (old
384 # style?) this page is in
385 match = re.compile('s:6:"markup";s:[0-9]*:"([0-9]*)";').search(versiondata)
386 if match != None:
387 # markup is 1 (old style) or 2 (new style)
388 markup = match.group(1)
389 else:
390 # default to "new" markup style
391 markup = 2
392
393
394 # show some progress
395 #
396 # (ternary operator in python: http://c2.com/cgi/wiki?PythonIdioms)
397 print (markup == 2 and [""] or ["(old style)"])[0],
398
399 # do the actual parsing of this page and save it
400 text=blockparser(content,markup)
401 try:
402 dummy, revision, exists = page.get_rev()
403 err = page.saveText(unicode(text,'latin-1'), revision)
404 # the exceptions thrown by saveText are errors or messages, so
405 # just print both
406 except Exception, msg:
407 print msg,
408 else:
409 if err:
410 print err,
411
412 # the EOL
413 print
414
415 db.close
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.