Attachment 'mw2moin.php.txt'
Download 1 #!/usr/bin/php
2 <?php
3 /*
4 copyright <original author>
5 TJ Fontaine <tjfontaine@gmail.com>
6
7 This script allows the exporting of pages from MediaWiki to MoinMoin.
8
9 Usage:
10 chose the options you want and fill in the appropriate
11 variables. Make sure you at the very least edit the database
12 settings.
13
14 MM_USER_ID requires at least one user be registered in
15 moinmoin, you can find this number in wiki/data/user/
16
17 Once everything is setup run the script, then copy from
18 $output_dir/* wiki/data/pages/ and
19 mv wiki/data/pages/edit-log wiki/data/
20
21 Your MediaWiki pages and history should now be available
22 to you, check wiki/Main_Page
23
24 By default the script exports namespaces 0-3. It has been
25 my experience that namespace 0 are normal editable pages,
26 1 is the normal pages Talk sections, 2 are the user pages,
27 and 3 are the user page Talk sections. When filling in
28 $IMPORT_PAGES if description is set it will export those
29 pages to the description:
30
31 (example)
32 $IMPORT_PAGES['users-talk']['namespace'] = 3;
33 $IMPORT_PAGES['users-talk']['description'] = "Talk";
34
35 will cause all pages in that namespace to be exported to
36 User/Talk where as
37
38 $IMPORT_PAGES['users-talk']['namespace'] = 3;
39 $IMPORT_PAGES['users-talk']['description'] = "TalkAgain";
40
41 will cause all pages in that namespace to be exported to
42 User/TalkAgain.
43
44 Features:
45 * Import Current Pages
46 * Import By Namespace
47 * Import Talk Pages (as Page/Talk)
48 * Import Revision History
49 * Import Images
50 * Add "#format $parser" to header
51 * Or make minimal changes to Wiki syntax
52
53 Known Issues:
54 * Changing the syntax on large sites will eat up memory
55 that part of the code needs overhauled
56 * Thumbnails aren't handled at all
57
58 TODO:
59 * Migrate Users
60 * Map Users in revision history
61 * Overhaul change_syntax
62 * Image thumbnails
63
64 ChangeLog:
65 * 2006-01-12 TJ Fontaine <tjfontaine@gmail.com>
66 - Removed nasty not_pages array
67 - Import based on namespace
68 - Import Talk Pages
69 - Import images (uses find)
70 - Import Revision History
71 - Add Proper Revision Timestamp
72 - Add Conditional Revision Import
73
74 * Version 0.3
75 */
76 ########################
77 ## MediaWiki Options ##
78 ########################
79
80 $MIGRATE_IMAGES = false;#set to true if you want to migrate images to moinmoin
81 $MW_IMAGE_PATH = ""; #full path to mediawiki images
82 $IMPORT_HISTORY = false;#set to false if you only want the current revision
83
84 ########################
85 ## MoinMoin Options ##
86 ########################
87
88 $MM_USER_ID = ""; #moinmoin userid to identify the importer by
89 $ADD_MW_PARSER = true; #set to true to add #format $MW_PARSER to the begining
90 #of every page if false script does minimal conversion
91 #before hand the old code needs reworked, eats too much
92 #memory leave this to true
93 $MW_PARSER = "media"; #name of mediawiki parser in plugin/parser
94
95 ########################
96 ## DB Settings ##
97 ########################
98
99 $MW_TABLE_PREFIX = ""; #mediawiki database was installed with tables prefixed
100 $host = ""; #mediawiki database server
101 $usr = ""; #mediawiki database username
102 $passwd = ""; #mediawiki database password
103 $db = "mediawiki"; #mediawiki database name
104
105 ########################
106 ## Pages To Import ##
107 ########################
108
109 $IMPORT_PAGES['regular']['namespace'] = 0;
110 $IMPORT_PAGES['regular']['description'] = "";
111 $IMPORT_PAGES['regular-talk']['namespace'] = 1;
112 $IMPORT_PAGES['regular-talk']['description'] = "Talk";
113 $IMPORT_PAGES['users']['namespace'] = 2;
114 $IMPORT_PAGES['users']['description'] = "";
115 $IMPORT_PAGES['users-talk']['namespace'] = 3;
116 $IMPORT_PAGES['users-talk']['description'] = "Talk";
117
118 ########################
119 ## Output Directory ##
120 ########################
121
122 $output_dir = "mediawiki_pages"; #where the script will output the exported
123 #pages
124
125 /*
126 DO NOT EDIT BELOW THIS LINE
127 unless you think you know what you're doing
128 -----------------------------------------------------
129 */
130
131 $link = mysql_pconnect($host,$usr,$passwd) or die(mysql_error());
132 mysql_select_db($db) or die("Could not select database");
133
134 $WIKI_LINK_START = "[";
135 $WIKI_LINK_END = "]";
136 $EXTERNAL_LINK_START = "[";
137 $EXTERNAL_LINK_END = "]";
138 $EXTERNAL_LINK_DIVIDER = " ";
139
140 if(file_exists($output_dir)){
141 rmdirr($output_dir);
142 mkdir($output_dir);
143 }
144 else{
145 mkdir($output_dir);
146 }
147
148 chdir("./$output_dir") or die;
149
150 $EDIT_LOG = array();
151
152 foreach($IMPORT_PAGES as $pagetype)
153 migrate_current_pages($pagetype['namespace'], $pagetype['description']);
154
155 print "sorting Edit Log ...";
156 asort($EDIT_LOG);
157 print "Done\n";
158
159 $edit_log = fopen("edit-log", "w");
160 foreach($EDIT_LOG as $entry)
161 fputs($edit_log, $entry);
162 fclose($edit_log);
163
164 chdir("..");
165 ###End of Main
166
167 function migrate_current_pages($page_namespace, $page_description = "")
168 {
169 $curr_sql = "SELECT cur_title as ptitle, " .
170 "cur_text as text, cur_timestamp as timestamp " .
171 "FROM `".$GLOBALS['MW_TABLE_PREFIX']."cur` " .
172 "WHERE cur_user_text not like \"MediaWiki default\" " .
173 "AND cur_namespace = '$page_namespace' " .
174 ";";
175
176 $query = mysql_query($curr_sql) or die(mysql_error());
177
178 $rev_sql = "";
179 $rev_query = null;
180 $rev_counter = 0;
181
182 $revision = "";
183
184 while ($row = mysql_fetch_object($query)) {
185 $rev_sql = "SELECT old_title as ptitle, ".
186 " old_text as text, old_timestamp as timestamp" .
187 " FROM `".$GLOBALS['MW_TABLE_PREFIX']."old` " .
188 " WHERE old_namespace = '$page_namespace' " .
189 " AND old_title = '".$row->ptitle."' " .
190 " ORDER BY old_timestamp " .
191 ";";
192
193 $rev_counter = 0;
194 $revision = sprintf("%008s", $rev_counter);
195
196 if($GLOBALS['IMPORT_HISTORY'])
197 {
198 $rev_query = mysql_query($rev_sql) or
199 die(mysql_error());
200
201 while ($rev_row = mysql_fetch_object($rev_query))
202 {
203 migrate_page_row($rev_row, $page_description,
204 $revision);
205
206 $rev_counter++;
207 $revision = sprintf("%008s", $rev_counter);
208 }
209
210 mysql_free_result($rev_query);
211 }
212 else
213 {
214 migrate_page_row($row, $page_description, $revision);
215 }
216 }
217 mysql_free_result($query);
218 }
219
220 function migrate_page_row($row, $desc, $revision)
221 {
222 $timestamp = $row->timestamp;
223 $title = clean_title($row->ptitle);
224 $text = $row->text;
225
226 if(strlen($desc))
227 create_page($title."(2f)".$desc, $text, $timestamp, $revision);
228 else
229 create_page($title, $text, $timestamp, $revision);
230 }
231
232 function create_page($page_title, $page_text, $page_timestamp, $page_revision)
233 {
234 print 'create page '.$page_title.' revision '.$page_revision."\n";
235
236 if(!is_dir($page_title))
237 mkdir($page_title) or die($page_title);
238
239 chdir($page_title) or die($page_title);
240
241 append_edit_log($page_title, $page_timestamp, $page_revision);
242
243 $file = fopen("current", "w");
244 fputs($file, $page_revision);
245
246 fclose($file);
247
248 if($GLOBALS['MIGRATE_IMAGES'])
249 migrate_images($page_text);
250
251 if(!is_dir("revisions"))
252 mkdir("revisions") or die("revisions");
253
254 chdir("revisions") or die("revisions");
255
256 $file = fopen($page_revision, "w");
257
258 #break up one string into lines
259 $file_text = explode("\n", $page_text);
260
261 if($GLOBALS['ADD_MW_PARSER'])
262 {
263 $mw_parser = $GLOBALS['MW_PARSER'];
264 fputs($file, "#format $mw_parser \n");
265 }
266 else
267 $file_text = change_syntax($file_text);
268
269 $b = 0;
270
271 while ($b < count($file_text)) {
272 fputs($file, rtrim($file_text[$b]) . "\n");
273 $b++;
274 }
275
276 unset($file_text);
277 fclose($file);
278 chdir("..") or die(system('pwd')); #revision
279 chdir("..") or die(system('pwd')); #page name
280 }
281
282 function append_edit_log($page_title, $timestamp, $revision)
283 {
284 $file = fopen('edit-log', 'a+');
285
286 if($revision == 0)
287 $action = 'SAVENEW';
288 else
289 $action = 'SAVE';
290
291 if(strlen($timestamp))
292 $tstamp = getStamp($timestamp);
293 else
294 $tstamp = uts();
295
296 $el_string = "$tstamp\t$revision\t$action\t$page_title\t" .
297 "127.0.0.1\tlocalhost\t".$GLOBALS['MM_USER_ID']."\n";
298
299 fputs($file, $el_string);
300
301 $GLOBALS['EDIT_LOG'][$tstamp] = $el_string;
302
303 fclose($file);
304 }
305
306 function uts(){
307 $Asec = explode(" ", microtime());
308 $Amicro = explode(".", $Asec[0]);
309 return ($Asec[1].substr($Amicro[1], 0, 6));
310 }
311
312 function getStamp($t)
313 {
314 $year = substr($t, 0, 4);
315 $month = substr($t, 4, 2);
316 $day = substr($t, 6, 2);
317 $hour = substr($t, 8, 2);
318 $min = substr($t, 11, 2);
319 $sec = substr($t, 13, 2);
320 $micro = mktime($hour, $min, $sec, $month, $day, $year);
321 return sprintf("%-016s", $micro);
322 }
323
324 function migrate_images($page_text)
325 {
326 $mw_path = $GLOBALS['MW_IMAGE_PATH'];
327 $image_matches = array();
328 $image_pat = "/\[\[Image:(.*)\]\]/";
329 if(preg_match_all($image_pat, $page_text, $image_matches))
330 {
331 if(!is_dir("attachments"))
332 mkdir("attachments");
333
334 for($z = 0; $z < count($image_matches[1]); $z++)
335 {
336 $image_file_name = strtok($image_matches[1][$z], '|');
337 if(!file_exists('attachments/'.$image_file_name))
338 {
339 $find_string = "find $mw_path -type f -name \"".
340 "$image_file_name\"";
341
342 $image_file_path = system($find_string, $ret);
343 if($ret) die($image_file_path);
344 if(strlen($image_file_path))
345 {
346 if(!copy($image_file_path, "./attachments/$image_file_name"))
347 die("failed to copy $image_file_name\n");
348 print " added attachment: $image_file_name \n";
349 }
350 }
351 }
352 }
353 }
354
355 function clean_title ($page_title)
356 {
357 $page_title = utf8_encode(str_replace(" ", "_", $page_title));
358 $quoted = array();
359 $in_parenthesis = false;
360 for ($i = 0; $i < strlen($page_title); $i++)
361 {
362 $curchar = substr ($page_title, $i, 1);
363 if (ereg('[^a-zA-Z0-9_]', $curchar))
364 {
365 if (!$in_parenthesis)
366 {
367 $quoted[] = '(';
368 $in_parenthesis = true;
369 }
370 $quoted[] = str_pad(dechex(ord($curchar)),
371 2, '0', STR_PAD_LEFT);
372 }
373 else
374 {
375 if ($in_parenthesis)
376 {
377 $quoted[] = ')';
378 $in_parenthesis = false;
379 }
380 $quoted[] = $curchar;
381 }
382 }
383
384 if ($in_parenthesis)
385 $quoted[] = ')';
386
387 $page_title = implode('', $quoted);
388 unset($quoted);
389 return $page_title;
390 }
391
392 function change_syntax ($textString) {
393 #$a = 0;
394
395 for($a = 0; $a < count($textString); $a++){
396 #print "str(before mod) = $textString[$a] \n";
397
398 #custom plugin
399 #if(preg_match("/\<fileshare\>.+\<\/fileshare\>/",$textString[$a])){
400 # $textString[$a] = fileShare($textString[$a]);
401 #}
402
403 #strpos : Returns the numeric position of the first occurrence of needle in the haystack string. Unlike the strrpos(), this function can take a full string as the needle parameter and the entire string will be used.
404 #substr() returns the portion of string specified by the start and length parameters.
405 #string substr ( string string, int start [, int length] )
406 if(substr($textString[$a], 0, 1) == '*'){
407 $textString[$a] = bullets($textString[$a]);
408 }
409
410 if(preg_match("/^#/",$textString[$a])){
411 $textString[$a] = numberedList( $textString[$a]);
412 }
413
414 #headings
415 if(preg_match("/^==.+==/",$textString[$a])){
416 $textString[$a] = heading( $textString[$a]);
417 }
418
419 #wikilink
420 if(preg_match("/\[\[.+\]\]/",$textString[$a])){
421 $textString[$a] = wikiLinks($textString[$a]);
422 }
423
424 #media wiki new line <br\> or <BR>
425 #must be after wiki links
426 if (preg_match("/\<br\/{0,1}\>/i", $textString[$a])) {
427 $textString[$a] = preg_replace("/\\<br\/{0,1}\>/i", "[[BR]]",$textString[$a]);
428 #print "result = $textString[$a]\n";
429 }
430 }
431
432 return $textString;
433 }
434
435
436
437 #custom plugin
438 #function fileShare($string) {
439 # $fileshare = substr($string, strpos($string, "\\\\"));
440 # $fileshare = preg_replace("/<\/fileshare>/","",$fileshare);
441 # $string = "[file:" .$fileshare ."]";
442 # return $string;
443 #}
444
445 function heading($string){
446 $theHeading = $string;
447 $headingLevel = 0;
448
449 #strip the left side '=' chars
450 while($headingLevel < strlen($theHeading)){
451 if(substr($theHeading, 0, 1) == '='){
452 $theHeading = substr($theHeading, 1);
453 }
454 else{
455 #no more ='s in front of text
456 break;
457 }
458 $headingLevel++;
459 }
460
461 #the left side '=' chars are now removed
462 #now strip right side '=' chars
463 $theHeading = substr($theHeading, 0, strpos($theHeading, '='));
464
465 $theSyntax = "";
466 #note moinmoin uses 1 less = for heading levels
467 #so mediawiki "===" is the same as moinmoin "=="
468 for($i = 1; $i < $headingLevel; $i++){
469 $theSyntax .= "=";
470 }
471
472 $string = $theSyntax ." $theHeading " .$theSyntax;
473
474 return $string;
475 }
476
477
478 function bullets ($string) {
479 $a = 0;
480 while ($a < strlen($string)) {
481 $a++;
482 if (substr($string, 1, 1) == "*")
483 $string = substr($string, 1);
484 else
485 break;
486 }
487 while ($a > 0) {
488 $string = " " . $string;
489 $a--;
490 }
491 return $string;
492 }
493
494 function numberedList ($string) {
495 if(preg_match("/^#/",$string)){
496 $string = preg_replace("/^#/", " 1.", $string);
497 }
498 elseif(preg_match("/^##/",$string)){
499 $string = preg_replace("/^##/", " 1.", $string);
500 }
501 return $string;
502 }
503
504
505 function wikiLinks ($string) {
506 global $WIKI_LINK_START;
507 global $WIKI_LINK_END;
508
509 while (strpos($string, "[[") !== false && strpos($string, "]]") !== false) {
510 #isolate link
511 $link = substr($string, strpos($string, "[[") + 2);
512 $link = substr($link, 0, strpos($link, "]]") + 0);
513
514 if (strpos($link, "|") == false){
515 #add new link syntax
516 $link = $WIKI_LINK_START ."\"". $link ."\"" .$WIKI_LINK_END;
517 }
518 else{
519 $dividerPosition = strpos($link, "|");
520
521 $wikilink = substr($link, 0, $dividerPosition);
522 $label = substr($link, $dividerPosition + 1, strlen($link) - $dividerPosition);
523
524 #remove whitespace from beginning and end
525 $label = trim($label);
526
527 $link = $WIKI_LINK_START .":" .$wikilink .": " .$label .$WIKI_LINK_END;
528 }
529
530 $string = substr($string, 0, strpos($string, "[[") - 0) . $link .substr($string, strpos($string, "]]") + 2);
531 }
532
533 return $string;
534 }
535
536
537 function externalLinks($string){
538 global $EXTERNAL_LINK_START;
539 global $EXTERNAL_LINK_END;
540 global $EXTERNAL_LINK_DIVIDER;
541
542 #external link syntax is the same except for the label divider
543
544 if(preg_match("/| /")){
545 $string = preg_replace("/| /", " ", $string);
546 }
547 elseif(preg_match("/|/")){
548 $string = preg_replace("/|/", " ", $string);
549 }
550
551 return $string;
552
553 }
554
555 function rmdirr($dir) {
556 if($objs = glob($dir."/*")){
557 foreach($objs as $obj) {
558 is_dir($obj)? rmdirr($obj) : unlink($obj);
559 }
560 }
561 rmdir($dir);
562 }
563
564 ?>
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.