Attachment 'mw_11_2_moin.php.txt'
Download 1 #!/usr/bin/php
2 <?php
3 /*
4 copyright <original author>
5 TJ Fontaine <tjfontaine@gmail.com>
6
7 This script allows the exporting of pages from MediaWiki to MoinMoin.
8
9 Usage:
10 chose the options you want and fill in the appropriate
11 variables. Make sure you at the very least edit the database
12 settings.
13
14 MM_USER_ID requires at least one user be registered in
15 moinmoin, you can find this number in wiki/data/user/
16
17 Once everything is setup run the script, then copy from
18 $output_dir/* wiki/data/pages/ and
19 mv wiki/data/pages/edit-log wiki/data/
20
21 Your MediaWiki pages and history should now be available
22 to you, check wiki/Main_Page
23
24 By default the script exports namespaces 0-3. It has been
25 my experience that namespace 0 are normal editable pages,
26 1 is the normal pages Talk sections, 2 are the user pages,
27 and 3 are the user page Talk sections. When filling in
28 $IMPORT_PAGES if description is set it will export those
29 pages to the description:
30
31 (example)
32 $IMPORT_PAGES['users-talk']['namespace'] = 3;
33 $IMPORT_PAGES['users-talk']['description'] = "Talk";
34
35 will cause all pages in that namespace to be exported to
36 User/Talk where as
37
38 $IMPORT_PAGES['users-talk']['namespace'] = 3;
39 $IMPORT_PAGES['users-talk']['description'] = "TalkAgain";
40
41 will cause all pages in that namespace to be exported to
42 User/TalkAgain.
43
44 Features:
45 * Import Current Pages
46 * Import By Namespace
47 * Import Talk Pages (as Page/Talk)
48 * Import Revision History
49 * Import Images
50 * Add "#format $parser" to header
51 * Or make minimal changes to Wiki syntax
52
53 Known Issues:
54 * Changing the syntax on large sites will eat up memory
55 that part of the code needs overhauled
56 * Thumbnails aren't handled at all
57
58 TODO:
59 * Migrate Users
60 * Map Users in revision history
61 * Overhaul change_syntax
62 * Image thumbnails
63
64 ChangeLog:
65 * 2006-01-12 TJ Fontaine <tjfontaine@gmail.com>
66 - Removed nasty not_pages array
67 - Import based on namespace
68 - Import Talk Pages
69 - Import images (uses find)
70 - Import Revision History
71 - Add Proper Revision Timestamp
72 - Add Conditional Revision Import
73
74 * Version 0.3
75
76 * 2007-11-07 David Huggins-Daines <dhuggins@cs.cmu.edu>
77 - Updated for Mediawiki 0.11
78 - Updated for newer MoinMoin (needs %08d revisions)
79 */
80 ########################
81 ## MediaWiki Options ##
82 ########################
83
84 $MIGRATE_IMAGES = false;#set to true if you want to migrate images to moinmoin
85 $MW_IMAGE_PATH = ""; #full path to mediawiki images
86 $IMPORT_HISTORY = true;#set to false if you only want the current revision
87
88 ########################
89 ## MoinMoin Options ##
90 ########################
91
92 $MM_USER_ID = ""; #moinmoin userid to identify the importer by
93 $ADD_MW_PARSER = true; #set to true to add #format $MW_PARSER to the begining
94 #of every page if false script does minimal conversion
95 #before hand the old code needs reworked, eats too much
96 #memory leave this to true
97 $MW_PARSER = "media"; #name of mediawiki parser in plugin/parser
98
99 ########################
100 ## DB Settings ##
101 ########################
102
103 $MW_TABLE_PREFIX = ""; #mediawiki database was installed with tables prefixed
104 $host = ""; #mediawiki database server
105 $usr = ""; #mediawiki database username
106 $passwd = ""; #mediawiki database password
107 $db = ""; #mediawiki database name
108
109 ########################
110 ## Pages To Import ##
111 ########################
112
113 $IMPORT_PAGES['regular']['namespace'] = 0;
114 $IMPORT_PAGES['regular']['description'] = "";
115 $IMPORT_PAGES['regular-talk']['namespace'] = 1;
116 $IMPORT_PAGES['regular-talk']['description'] = "Talk";
117 $IMPORT_PAGES['users']['namespace'] = 2;
118 $IMPORT_PAGES['users']['description'] = "";
119 $IMPORT_PAGES['users-talk']['namespace'] = 3;
120 $IMPORT_PAGES['users-talk']['description'] = "Talk";
121
122 ########################
123 ## Output Directory ##
124 ########################
125
126 $output_dir = "mediawiki_pages"; #where the script will output the exported
127 #pages
128
129 /*
130 DO NOT EDIT BELOW THIS LINE
131 unless you think you know what you're doing
132 -----------------------------------------------------
133 */
134
135 $link = mysql_pconnect($host,$usr,$passwd) or die(mysql_error());
136 mysql_select_db($db) or die("Could not select database");
137
138 $WIKI_LINK_START = "[";
139 $WIKI_LINK_END = "]";
140 $EXTERNAL_LINK_START = "[";
141 $EXTERNAL_LINK_END = "]";
142 $EXTERNAL_LINK_DIVIDER = " ";
143
144 if(file_exists($output_dir)){
145 rmdirr($output_dir);
146 mkdir($output_dir);
147 }
148 else{
149 mkdir($output_dir);
150 }
151
152 chdir("./$output_dir") or die;
153
154 $EDIT_LOG = array();
155
156 foreach($IMPORT_PAGES as $pagetype)
157 migrate_current_pages($pagetype['namespace'], $pagetype['description']);
158
159 print "sorting Edit Log ...";
160 asort($EDIT_LOG);
161 print "Done\n";
162
163 $edit_log = fopen("edit-log", "w");
164 foreach($EDIT_LOG as $entry)
165 fputs($edit_log, $entry);
166 fclose($edit_log);
167
168 chdir("..");
169 ###End of Main
170
171 function migrate_current_pages($page_namespace, $page_description = "")
172 {
173 $page_table = $GLOBALS['MW_TABLE_PREFIX']."page";
174 $text_table = $GLOBALS['MW_TABLE_PREFIX']."text";
175 $revision_table = $GLOBALS['MW_TABLE_PREFIX']."revision";
176
177 $curr_sql = "SELECT `$page_table`.page_title as ptitle, " .
178 "`$page_table`.page_latest as revision, ".
179 "`$page_table`.page_id as id, ".
180 "`$text_table`.old_text as text, ".
181 "`$page_table`.page_touched as timestamp " .
182 "FROM `$page_table`, `$text_table`, `$revision_table` ".
183 "WHERE `$revision_table`.rev_page = `$page_table`.page_id ".
184 "AND `$revision_table`.rev_id = `$page_table`.page_latest ".
185 "AND `$text_table`.old_id = `$revision_table`.rev_text_id ".
186 "AND `$text_table`.old_text NOT LIKE \"MediaWiki default\" " .
187 "AND page_namespace = '$page_namespace' " .
188 ";";
189
190 $query = mysql_query($curr_sql) or die(mysql_error());
191
192 while ($row = mysql_fetch_object($query)) {
193 if ($GLOBALS['IMPORT_HISTORY']) {
194 $rev_sql = "SELECT `$page_table`.page_title as ptitle, " .
195 "`$revision_table`.rev_id as revision, ".
196 "`$text_table`.old_text as text, ".
197 "`$page_table`.page_touched as timestamp " .
198 "FROM `$page_table`, `$text_table`, `$revision_table` ".
199 "WHERE `$page_table`.page_id = $row->id ".
200 "AND `$revision_table`.rev_page = `$page_table`.page_id ".
201 "AND `$text_table`.old_id = `$revision_table`.rev_text_id ".
202 "AND `$text_table`.old_text NOT LIKE \"MediaWiki default\" " .
203 "AND page_namespace = '$page_namespace' " .
204 ";";
205
206 $rev_query = mysql_query($rev_sql) or
207 die(mysql_error());
208
209 while ($rev_row = mysql_fetch_object($rev_query))
210 {
211 migrate_page_row($rev_row, $page_description);
212 }
213 }
214 else {
215 migrate_page_row($row, $page_description);
216 }
217 }
218 mysql_free_result($query);
219 }
220
221 function migrate_page_row($row, $desc)
222 {
223 $timestamp = $row->timestamp;
224 $title = clean_title($row->ptitle);
225 $text = $row->text;
226 $revision = sprintf("%08d", $row->revision);
227
228 if(strlen($desc))
229 create_page($title."(2f)".$desc, $text, $timestamp, $revision);
230 else
231 create_page($title, $text, $timestamp, $revision);
232 }
233
234 function create_page($page_title, $page_text, $page_timestamp, $page_revision)
235 {
236 print 'create page '.$page_title.' revision '.$page_revision."\n";
237
238 @mkdir($page_title);
239 chdir($page_title) or die($page_title);
240
241 append_edit_log($page_title, $page_timestamp, $page_revision);
242
243 $file = fopen("current", "w");
244 fputs($file, $page_revision);
245
246 fclose($file);
247
248 if($GLOBALS['MIGRATE_IMAGES'])
249 migrate_images($page_text);
250
251 @mkdir("revisions");
252 chdir("revisions") or die("revisions");
253
254 $file = fopen($page_revision, "w");
255
256 #break up one string into lines
257 $file_text = explode("\n", $page_text);
258
259 if($GLOBALS['ADD_MW_PARSER'])
260 {
261 $mw_parser = $GLOBALS['MW_PARSER'];
262 fputs($file, "#format $mw_parser \n");
263 }
264 else
265 $file_text = change_syntax($file_text);
266
267 $b = 0;
268
269 while ($b < count($file_text)) {
270 fputs($file, rtrim($file_text[$b]) . "\n");
271 $b++;
272 }
273
274 unset($file_text);
275 fclose($file);
276 chdir("..") or die(system('pwd')); #revision
277 chdir("..") or die(system('pwd')); #page name
278 }
279
280 function append_edit_log($page_title, $timestamp, $revision)
281 {
282 $file = fopen('edit-log', 'a+');
283
284 if($revision == 0)
285 $action = 'SAVENEW';
286 else
287 $action = 'SAVE';
288
289 if(strlen($timestamp))
290 $tstamp = getStamp($timestamp);
291 else
292 $tstamp = uts();
293
294 $el_string = "$tstamp\t$revision\t$action\t$page_title\t" .
295 "127.0.0.1\tlocalhost\t".$GLOBALS['MM_USER_ID']."\n";
296
297 fputs($file, $el_string);
298
299 $GLOBALS['EDIT_LOG'][$tstamp] = $el_string;
300
301 fclose($file);
302 }
303
304 function uts(){
305 $Asec = explode(" ", microtime());
306 $Amicro = explode(".", $Asec[0]);
307 return ($Asec[1].substr($Amicro[1], 0, 6));
308 }
309
310 function getStamp($t)
311 {
312 $year = substr($t, 0, 4);
313 $month = substr($t, 4, 2);
314 $day = substr($t, 6, 2);
315 $hour = substr($t, 8, 2);
316 $min = substr($t, 11, 2);
317 $sec = substr($t, 13, 2);
318 $micro = mktime($hour, $min, $sec, $month, $day, $year);
319 return sprintf("%-016s", $micro);
320 }
321
322 function migrate_images($page_text)
323 {
324 $mw_path = $GLOBALS['MW_IMAGE_PATH'];
325 $image_matches = array();
326 $image_pat = "/\[\[Image:(.*)\]\]/";
327 if(preg_match_all($image_pat, $page_text, $image_matches))
328 {
329 @mkdir("attachments");
330
331 for($z = 0; $z < count($image_matches[1]); $z++)
332 {
333 $image_file_name = strtok($image_matches[1][$z], '|');
334 if(!file_exists('attachments/'.$image_file_name))
335 {
336 $find_string = "find $mw_path -type f -name \"".
337 "$image_file_name\"";
338
339 $image_file_path = system($find_string, $ret);
340 if($ret) die($image_file_path);
341 if(strlen($image_file_path))
342 {
343 if(!copy($image_file_path, "./attachments/$image_file_name"))
344 die("failed to copy $image_file_name\n");
345 print " added attachment: $image_file_name \n";
346 }
347 }
348 }
349 }
350 }
351
352 function clean_title ($page_title)
353 {
354 $page_title = utf8_encode(str_replace(" ", "_", $page_title));
355 $quoted = array();
356 $in_parenthesis = false;
357 for ($i = 0; $i < strlen($page_title); $i++)
358 {
359 $curchar = substr ($page_title, $i, 1);
360 if (ereg('[^a-zA-Z0-9_]', $curchar))
361 {
362 if (!$in_parenthesis)
363 {
364 $quoted[] = '(';
365 $in_parenthesis = true;
366 }
367 $quoted[] = str_pad(dechex(ord($curchar)),
368 2, '0', STR_PAD_LEFT);
369 }
370 else
371 {
372 if ($in_parenthesis)
373 {
374 $quoted[] = ')';
375 $in_parenthesis = false;
376 }
377 $quoted[] = $curchar;
378 }
379 }
380
381 if ($in_parenthesis)
382 $quoted[] = ')';
383
384 $page_title = implode('', $quoted);
385 unset($quoted);
386 return $page_title;
387 }
388
389 function change_syntax ($textString) {
390 #$a = 0;
391
392 for($a = 0; $a < count($textString); $a++){
393 #print "str(before mod) = $textString[$a] \n";
394
395 #custom plugin
396 #if(preg_match("/\<fileshare\>.+\<\/fileshare\>/",$textString[$a])){
397 # $textString[$a] = fileShare($textString[$a]);
398 #}
399
400 #strpos : Returns the numeric position of the first occurrence of needle in the haystack string. Unlike the strrpos(), this function can take a full string as the needle parameter and the entire string will be used.
401 #substr() returns the portion of string specified by the start and length parameters.
402 #string substr ( string string, int start [, int length] )
403 if(substr($textString[$a], 0, 1) == '*'){
404 $textString[$a] = bullets($textString[$a]);
405 }
406
407 if(preg_match("/^#/",$textString[$a])){
408 $textString[$a] = numberedList( $textString[$a]);
409 }
410
411 #headings
412 if(preg_match("/^==.+==/",$textString[$a])){
413 $textString[$a] = heading( $textString[$a]);
414 }
415
416 #wikilink
417 if(preg_match("/\[\[.+\]\]/",$textString[$a])){
418 $textString[$a] = wikiLinks($textString[$a]);
419 }
420
421 #media wiki new line <br\> or <BR>
422 #must be after wiki links
423 if (preg_match("/\<br\/{0,1}\>/i", $textString[$a])) {
424 $textString[$a] = preg_replace("/\\<br\/{0,1}\>/i", "[[BR]]",$textString[$a]);
425 #print "result = $textString[$a]\n";
426 }
427 }
428
429 return $textString;
430 }
431
432
433
434 #custom plugin
435 #function fileShare($string) {
436 # $fileshare = substr($string, strpos($string, "\\\\"));
437 # $fileshare = preg_replace("/<\/fileshare>/","",$fileshare);
438 # $string = "[file:" .$fileshare ."]";
439 # return $string;
440 #}
441
442 function heading($string){
443 $theHeading = $string;
444 $headingLevel = 0;
445
446 #strip the left side '=' chars
447 while($headingLevel < strlen($theHeading)){
448 if(substr($theHeading, 0, 1) == '='){
449 $theHeading = substr($theHeading, 1);
450 }
451 else{
452 #no more ='s in front of text
453 break;
454 }
455 $headingLevel++;
456 }
457
458 #the left side '=' chars are now removed
459 #now strip right side '=' chars
460 $theHeading = substr($theHeading, 0, strpos($theHeading, '='));
461
462 $theSyntax = "";
463 #note moinmoin uses 1 less = for heading levels
464 #so mediawiki "===" is the same as moinmoin "=="
465 for($i = 1; $i < $headingLevel; $i++){
466 $theSyntax .= "=";
467 }
468
469 $string = $theSyntax ." $theHeading " .$theSyntax;
470
471 return $string;
472 }
473
474
475 function bullets ($string) {
476 $a = 0;
477 while ($a < strlen($string)) {
478 $a++;
479 if (substr($string, 1, 1) == "*")
480 $string = substr($string, 1);
481 else
482 break;
483 }
484 while ($a > 0) {
485 $string = " " . $string;
486 $a--;
487 }
488 return $string;
489 }
490
491 function numberedList ($string) {
492 if(preg_match("/^#/",$string)){
493 $string = preg_replace("/^#/", " 1.", $string);
494 }
495 elseif(preg_match("/^##/",$string)){
496 $string = preg_replace("/^##/", " 1.", $string);
497 }
498 return $string;
499 }
500
501
502 function wikiLinks ($string) {
503 global $WIKI_LINK_START;
504 global $WIKI_LINK_END;
505
506 while (strpos($string, "[[") !== false && strpos($string, "]]") !== false) {
507 #isolate link
508 $link = substr($string, strpos($string, "[[") + 2);
509 $link = substr($link, 0, strpos($link, "]]") + 0);
510
511 if (strpos($link, "|") == false){
512 #add new link syntax
513 $link = $WIKI_LINK_START ."\"". $link ."\"" .$WIKI_LINK_END;
514 }
515 else{
516 $dividerPosition = strpos($link, "|");
517
518 $wikilink = substr($link, 0, $dividerPosition);
519 $label = substr($link, $dividerPosition + 1, strlen($link) - $dividerPosition);
520
521 #remove whitespace from beginning and end
522 $label = trim($label);
523
524 $link = $WIKI_LINK_START .":" .$wikilink .": " .$label .$WIKI_LINK_END;
525 }
526
527 $string = substr($string, 0, strpos($string, "[[") - 0) . $link .substr($string, strpos($string, "]]") + 2);
528 }
529
530 return $string;
531 }
532
533
534 function externalLinks($string){
535 global $EXTERNAL_LINK_START;
536 global $EXTERNAL_LINK_END;
537 global $EXTERNAL_LINK_DIVIDER;
538
539 #external link syntax is the same except for the label divider
540
541 if(preg_match("/| /")){
542 $string = preg_replace("/| /", " ", $string);
543 }
544 elseif(preg_match("/|/")){
545 $string = preg_replace("/|/", " ", $string);
546 }
547
548 return $string;
549
550 }
551
552 function rmdirr($dir) {
553 if($objs = glob($dir."/*")){
554 foreach($objs as $obj) {
555 is_dir($obj)? rmdirr($obj) : unlink($obj);
556 }
557 }
558 rmdir($dir);
559 }
560
561 ?>
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.