attachment:mw2moin.php.txt of MediaWikiConverter

Attachment 'mw2moin.php.txt'

   1 #!/usr/bin/php
   2 <?php
   3 /*
   4 	copyright <original author>
   5 	TJ Fontaine <tjfontaine@gmail.com>
   6 
   7 	This script allows the exporting of pages from MediaWiki to MoinMoin.
   8 
   9 	Usage:
  10 		chose the options you want and fill in the appropriate 
  11 		variables. Make sure you at the very least edit the database
  12 		settings. 
  13 		
  14 		MM_USER_ID requires at least one user be registered in
  15 		moinmoin, you can find this number in wiki/data/user/
  16 
  17 		Once everything is setup run the script, then copy from
  18 		$output_dir/* wiki/data/pages/ and 
  19 		mv wiki/data/pages/edit-log wiki/data/
  20 
  21 		Your MediaWiki pages and history should now be available
  22 		to you, check wiki/Main_Page
  23 
  24 		By default the script exports namespaces 0-3. It has been
  25 		my experience that namespace 0 are normal editable pages,
  26 		1 is the normal pages Talk sections, 2 are the user pages,
  27 		and 3 are the user page Talk sections. When filling in
  28 		$IMPORT_PAGES if description is set it will export those
  29 		pages to the description:
  30 		
  31 		(example)
  32 			$IMPORT_PAGES['users-talk']['namespace'] = 3;
  33 			$IMPORT_PAGES['users-talk']['description'] = "Talk";
  34 
  35 		will cause all pages in that namespace to be exported to 
  36 		User/Talk where as
  37 
  38 			$IMPORT_PAGES['users-talk']['namespace'] = 3;
  39 			$IMPORT_PAGES['users-talk']['description'] = "TalkAgain";
  40 
  41 		will cause all pages in that namespace to be exported to
  42 		User/TalkAgain.
  43 
  44 	Features:
  45 		* Import Current Pages
  46 		* Import By Namespace
  47 		* Import Talk Pages (as Page/Talk)
  48 		* Import Revision History
  49 		* Import Images
  50 		* Add "#format $parser" to header
  51 		* Or make minimal changes to Wiki syntax
  52 
  53 	Known Issues:
  54 		* Changing the syntax on large sites will eat up memory
  55 		  that part of the code needs overhauled
  56 		* Thumbnails aren't handled at all
  57 		
  58 	TODO:
  59 		* Migrate Users
  60 		* Map Users in revision history
  61 		* Overhaul change_syntax
  62 		* Image thumbnails
  63 
  64 	ChangeLog:
  65 		* 2006-01-12 TJ Fontaine <tjfontaine@gmail.com>
  66 		  - Removed nasty not_pages array
  67 		  - Import based on namespace
  68 		  - Import Talk Pages
  69 		  - Import images (uses find)
  70 		  - Import Revision History
  71 		  - Add Proper Revision Timestamp
  72 		  - Add Conditional Revision Import
  73 
  74 		* Version 0.3
  75 */
  76 ########################
  77 ##  MediaWiki Options ##
  78 ########################
  79 
  80 $MIGRATE_IMAGES = false;#set to true if you want to migrate images to moinmoin
  81 $MW_IMAGE_PATH = "";	#full path to mediawiki images
  82 $IMPORT_HISTORY = false;#set to false if you only want the current revision
  83 
  84 ########################
  85 ##  MoinMoin Options  ##
  86 ########################
  87 
  88 $MM_USER_ID = "";	#moinmoin userid to identify the importer by
  89 $ADD_MW_PARSER = true;	#set to true to add #format $MW_PARSER to the begining 
  90 			#of every page if false script does minimal conversion 
  91 			#before hand the old code needs reworked, eats too much
  92 			#memory leave this to true
  93 $MW_PARSER = "media";   #name of mediawiki parser in plugin/parser
  94 
  95 ########################
  96 ##  DB Settings       ##
  97 ########################
  98 
  99 $MW_TABLE_PREFIX = "";	#mediawiki database was installed with tables prefixed
 100 $host = "";		#mediawiki database server
 101 $usr = "";		#mediawiki database username
 102 $passwd = "";		#mediawiki database password
 103 $db = "mediawiki";		#mediawiki database name
 104 
 105 ########################
 106 ##  Pages To Import   ##
 107 ########################
 108 
 109 $IMPORT_PAGES['regular']['namespace'] = 0;
 110 $IMPORT_PAGES['regular']['description'] = "";
 111 $IMPORT_PAGES['regular-talk']['namespace'] = 1;
 112 $IMPORT_PAGES['regular-talk']['description'] = "Talk";
 113 $IMPORT_PAGES['users']['namespace'] = 2;
 114 $IMPORT_PAGES['users']['description'] = "";
 115 $IMPORT_PAGES['users-talk']['namespace'] = 3;
 116 $IMPORT_PAGES['users-talk']['description'] = "Talk";
 117 
 118 ########################
 119 ##  Output Directory  ##
 120 ########################
 121 
 122 $output_dir = "mediawiki_pages"; #where the script will output the exported
 123 				 #pages
 124 
 125 /*
 126 	DO NOT EDIT BELOW THIS LINE
 127 	unless you think you know what you're doing
 128 -----------------------------------------------------
 129 */
 130 
 131 $link = mysql_pconnect($host,$usr,$passwd) or die(mysql_error());
 132 mysql_select_db($db) or die("Could not select database");
 133 
 134 $WIKI_LINK_START = "[";
 135 $WIKI_LINK_END = "]";
 136 $EXTERNAL_LINK_START = "[";
 137 $EXTERNAL_LINK_END = "]";
 138 $EXTERNAL_LINK_DIVIDER = " ";
 139 
 140 if(file_exists($output_dir)){
 141    rmdirr($output_dir);
 142    mkdir($output_dir);
 143 }
 144 else{
 145    mkdir($output_dir);
 146 }
 147 
 148 chdir("./$output_dir") or die;
 149 
 150 $EDIT_LOG = array();
 151 
 152 foreach($IMPORT_PAGES as $pagetype)
 153 	migrate_current_pages($pagetype['namespace'], $pagetype['description']);
 154 
 155 print "sorting Edit Log ...";
 156 asort($EDIT_LOG);
 157 print "Done\n";
 158 
 159 $edit_log = fopen("edit-log", "w");
 160 foreach($EDIT_LOG as $entry)
 161 	fputs($edit_log, $entry);
 162 fclose($edit_log);
 163 
 164 chdir("..");
 165 ###End of Main
 166 
 167 function migrate_current_pages($page_namespace, $page_description = "")
 168 {
 169 	$curr_sql = "SELECT cur_title as ptitle, " .
 170 		"cur_text as text, cur_timestamp as timestamp " .
 171 		"FROM `".$GLOBALS['MW_TABLE_PREFIX']."cur` " .
 172 		"WHERE cur_user_text not like \"MediaWiki default\" " .
 173 		"AND cur_namespace = '$page_namespace' " .
 174 		";";
 175 	
 176 	$query = mysql_query($curr_sql) or die(mysql_error());
 177 
 178 	$rev_sql = "";
 179 	$rev_query = null;
 180 	$rev_counter = 0;
 181 
 182 	$revision = "";
 183 	
 184 	while ($row = mysql_fetch_object($query)) {
 185 		$rev_sql = "SELECT old_title as ptitle, ".
 186 			" old_text as text, old_timestamp as timestamp" .
 187 			" FROM `".$GLOBALS['MW_TABLE_PREFIX']."old` " .
 188 			" WHERE old_namespace = '$page_namespace' " .
 189 			" AND old_title = '".$row->ptitle."' " .
 190 			" ORDER BY old_timestamp " .
 191 			";";
 192 
 193 		$rev_counter = 0;
 194 		$revision = sprintf("%008s",  $rev_counter);			
 195 		
 196 		if($GLOBALS['IMPORT_HISTORY'])
 197 		{
 198 			$rev_query = mysql_query($rev_sql) or
 199 				die(mysql_error());
 200 				
 201 			while ($rev_row = mysql_fetch_object($rev_query))
 202 			{
 203 				migrate_page_row($rev_row, $page_description, 
 204 					$revision);
 205 					
 206 				$rev_counter++;
 207 				$revision = sprintf("%008s",  $rev_counter);
 208 			}
 209 
 210 			mysql_free_result($rev_query);
 211 		}
 212 		else
 213 		{
 214 			migrate_page_row($row, $page_description, $revision);
 215 		}
 216 	}
 217 	mysql_free_result($query);
 218 }
 219 
 220 function migrate_page_row($row, $desc, $revision)
 221 {
 222 	$timestamp = $row->timestamp;
 223 	$title = clean_title($row->ptitle);
 224 	$text = $row->text;
 225 
 226 	if(strlen($desc))
 227 		create_page($title."(2f)".$desc, $text, $timestamp, $revision);
 228 	else
 229 		create_page($title, $text, $timestamp, $revision);
 230 }
 231 
 232 function create_page($page_title, $page_text, $page_timestamp, $page_revision)
 233 {
 234 	print 'create page '.$page_title.' revision '.$page_revision."\n";
 235 	
 236 	if(!is_dir($page_title))
 237 		mkdir($page_title) or die($page_title);
 238 
 239 	chdir($page_title) or die($page_title);
 240 	
 241 	append_edit_log($page_title, $page_timestamp, $page_revision);
 242 	
 243 	$file = fopen("current", "w");
 244 	fputs($file, $page_revision);
 245 	
 246 	fclose($file);
 247 	
 248 	if($GLOBALS['MIGRATE_IMAGES'])
 249 		migrate_images($page_text);
 250 	
 251 	if(!is_dir("revisions"))
 252 		mkdir("revisions") or die("revisions");	
 253 
 254 	chdir("revisions") or die("revisions");
 255 	
 256 	$file = fopen($page_revision, "w");
 257 	
 258 	#break up one string into lines
 259 	$file_text = explode("\n", $page_text);
 260 	
 261 	if($GLOBALS['ADD_MW_PARSER'])
 262 	{
 263 		$mw_parser = $GLOBALS['MW_PARSER'];
 264 		fputs($file, "#format $mw_parser \n");
 265 	}
 266 	else
 267 		$file_text = change_syntax($file_text);
 268 		
 269 	$b = 0;
 270 	
 271 	while ($b < count($file_text)) {
 272 		fputs($file, rtrim($file_text[$b]) . "\n");
 273 		$b++;
 274 	}
 275 	
 276 	unset($file_text);
 277 	fclose($file);
 278 	chdir("..") or die(system('pwd')); #revision
 279 	chdir("..") or die(system('pwd')); #page name
 280 }
 281 
 282 function append_edit_log($page_title, $timestamp, $revision)
 283 {
 284 	$file = fopen('edit-log', 'a+');
 285 
 286 	if($revision == 0)
 287 		$action = 'SAVENEW';
 288 	else
 289 		$action = 'SAVE';
 290 	
 291 	if(strlen($timestamp))
 292 		$tstamp = getStamp($timestamp);
 293 	else
 294 		$tstamp = uts();
 295 	
 296 	$el_string = "$tstamp\t$revision\t$action\t$page_title\t" .
 297 		"127.0.0.1\tlocalhost\t".$GLOBALS['MM_USER_ID']."\n";
 298 	
 299 	fputs($file, $el_string);
 300 
 301 	$GLOBALS['EDIT_LOG'][$tstamp] = $el_string;
 302 
 303 	fclose($file);
 304 }
 305 
 306 function uts(){
 307 	$Asec = explode(" ", microtime());
 308 	$Amicro = explode(".", $Asec[0]);
 309 	return ($Asec[1].substr($Amicro[1], 0, 6));
 310 }
 311 
 312 function getStamp($t)
 313 {
 314 	$year = substr($t, 0, 4);
 315 	$month = substr($t, 4, 2);
 316 	$day = substr($t, 6, 2);
 317 	$hour = substr($t, 8, 2);
 318 	$min = substr($t, 11, 2);
 319 	$sec = substr($t, 13, 2);
 320 	$micro = mktime($hour, $min, $sec, $month, $day, $year);
 321 	return sprintf("%-016s", $micro);
 322 }
 323 
 324 function migrate_images($page_text)
 325 {
 326 	$mw_path = $GLOBALS['MW_IMAGE_PATH'];
 327 	$image_matches = array();
 328 	$image_pat = "/\[\[Image:(.*)\]\]/";
 329 	if(preg_match_all($image_pat, $page_text, $image_matches))
 330 	{
 331 		if(!is_dir("attachments"))
 332 			mkdir("attachments");
 333 	
 334 		for($z = 0; $z < count($image_matches[1]); $z++)
 335 		{
 336 			$image_file_name = strtok($image_matches[1][$z], '|');
 337 			if(!file_exists('attachments/'.$image_file_name))
 338 			{
 339 				$find_string = "find $mw_path -type f -name \"".
 340 					"$image_file_name\"";
 341 				
 342 				$image_file_path = system($find_string, $ret);
 343 				if($ret) die($image_file_path);
 344 				if(strlen($image_file_path))
 345 				{
 346 					if(!copy($image_file_path, "./attachments/$image_file_name"))
 347 						die("failed to copy $image_file_name\n");
 348 					print " added attachment: $image_file_name \n";
 349 				}
 350 			}
 351 		}
 352 	}
 353 }
 354 
 355 function clean_title ($page_title)
 356 {
 357 	$page_title = utf8_encode(str_replace(" ", "_", $page_title));
 358 	$quoted = array();
 359 	$in_parenthesis = false;
 360 	for ($i = 0; $i < strlen($page_title); $i++)
 361 	{
 362 		$curchar = substr ($page_title, $i, 1);
 363 		if (ereg('[^a-zA-Z0-9_]', $curchar))
 364 		{
 365 			if (!$in_parenthesis)
 366 			{
 367 				$quoted[] = '(';
 368 				$in_parenthesis = true;
 369 			}
 370 			$quoted[] = str_pad(dechex(ord($curchar)),
 371 				2, '0', STR_PAD_LEFT);
 372 		} 
 373 		else 
 374 		{
 375 			if ($in_parenthesis) 
 376 			{
 377 				$quoted[] = ')';
 378 				$in_parenthesis = false;
 379 			}
 380 			$quoted[] = $curchar;
 381 		}
 382 	}
 383 	
 384 	if ($in_parenthesis)
 385 		$quoted[] = ')';
 386 	
 387 	$page_title = implode('', $quoted);
 388 	unset($quoted);
 389 	return $page_title;
 390 }
 391 
 392 function change_syntax ($textString) {
 393 	#$a = 0;
 394    
 395    for($a = 0; $a < count($textString); $a++){
 396       #print "str(before mod) = $textString[$a] \n";
 397       
 398       #custom plugin
 399       #if(preg_match("/\<fileshare\>.+\<\/fileshare\>/",$textString[$a])){
 400       #   $textString[$a] = fileShare($textString[$a]); 
 401       #}
 402       
 403       #strpos : Returns the numeric position of the first occurrence of needle in the haystack string. Unlike the strrpos(), this function can take a full string as the needle parameter and the entire string will be used.
 404       #substr() returns the portion of string  specified by the start and length parameters.
 405       #string substr ( string string, int start [, int length] )
 406       if(substr($textString[$a], 0, 1) == '*'){
 407          $textString[$a] = bullets($textString[$a]);
 408       }
 409       
 410       if(preg_match("/^#/",$textString[$a])){ 
 411          $textString[$a] = numberedList( $textString[$a]); 
 412       }
 413       
 414       #headings
 415       if(preg_match("/^==.+==/",$textString[$a])){ 
 416          $textString[$a] = heading( $textString[$a]); 
 417       }
 418       
 419       #wikilink
 420       if(preg_match("/\[\[.+\]\]/",$textString[$a])){
 421 			$textString[$a] = wikiLinks($textString[$a]);
 422       }
 423 
 424       #media wiki new line <br\> or <BR>
 425       #must be after wiki links 
 426       if (preg_match("/\<br\/{0,1}\>/i", $textString[$a])) { 
 427          $textString[$a] = preg_replace("/\\<br\/{0,1}\>/i", "[[BR]]",$textString[$a]);
 428          #print "result = $textString[$a]\n";
 429       }
 430    }
 431 
 432 	return $textString;
 433 }
 434 
 435 
 436 
 437 #custom plugin
 438 #function fileShare($string) {
 439 #   $fileshare = substr($string, strpos($string, "\\\\"));
 440 #   $fileshare = preg_replace("/<\/fileshare>/","",$fileshare);
 441 #   $string = "[file:" .$fileshare ."]";
 442 #   return $string;
 443 #}
 444 
 445 function heading($string){
 446    $theHeading = $string;
 447    $headingLevel = 0;
 448    
 449    #strip the left side '=' chars
 450    while($headingLevel < strlen($theHeading)){
 451       if(substr($theHeading, 0, 1) == '='){
 452          $theHeading = substr($theHeading, 1);
 453       }
 454       else{ 
 455          #no more ='s in front of text
 456          break; 
 457       }
 458       $headingLevel++;
 459    }
 460    
 461    #the left side '=' chars are now removed
 462    #now strip right side '=' chars
 463    $theHeading = substr($theHeading, 0, strpos($theHeading, '='));
 464    
 465    $theSyntax = "";
 466    #note moinmoin uses 1 less = for heading levels
 467    #so mediawiki "===" is the same as moinmoin "=="
 468    for($i = 1; $i < $headingLevel; $i++){
 469       $theSyntax .= "=";  
 470    }
 471       
 472    $string = $theSyntax ." $theHeading " .$theSyntax;
 473 
 474    return $string;
 475 }
 476 
 477 
 478 function bullets ($string) {
 479 	$a = 0;
 480 	while ($a < strlen($string)) {
 481 		$a++;
 482 		if (substr($string, 1, 1) == "*")
 483 			$string = substr($string, 1);
 484 		else
 485 			break;
 486 	}
 487 	while ($a > 0) {
 488 		$string = " " . $string;
 489 		$a--;
 490    }
 491 	return $string;
 492 }
 493 
 494 function numberedList ($string) {
 495 	if(preg_match("/^#/",$string)){
 496       $string = preg_replace("/^#/", " 1.", $string);
 497    }
 498    elseif(preg_match("/^##/",$string)){
 499       $string = preg_replace("/^##/", "  1.", $string);
 500    }
 501 	return $string;
 502 }
 503 
 504 
 505 function wikiLinks ($string) {
 506    global $WIKI_LINK_START;
 507    global $WIKI_LINK_END;
 508    
 509 	while (strpos($string, "[[") !== false && strpos($string, "]]") !== false) {
 510       #isolate link
 511 		$link = substr($string, strpos($string, "[[") + 2);
 512 		$link = substr($link, 0, strpos($link, "]]") + 0);
 513       
 514       if (strpos($link, "|") == false){
 515          #add new link syntax
 516          $link = $WIKI_LINK_START ."\"". $link ."\"" .$WIKI_LINK_END;
 517       }
 518       else{
 519          $dividerPosition = strpos($link, "|");
 520          
 521          $wikilink = substr($link, 0, $dividerPosition);
 522          $label = substr($link, $dividerPosition + 1, strlen($link) - $dividerPosition);
 523          
 524          #remove whitespace from beginning and end
 525          $label = trim($label);
 526          
 527          $link = $WIKI_LINK_START .":" .$wikilink .": "  .$label .$WIKI_LINK_END;
 528       }
 529 
 530 		$string = substr($string, 0, strpos($string, "[[") - 0) . $link .substr($string, strpos($string, "]]") + 2);
 531    }
 532    
 533 	return $string;
 534 }
 535 
 536 
 537 function externalLinks($string){
 538    global $EXTERNAL_LINK_START;
 539    global $EXTERNAL_LINK_END;
 540    global $EXTERNAL_LINK_DIVIDER;
 541    
 542    #external link syntax is the same except for the label divider
 543    
 544    if(preg_match("/| /")){
 545       $string = preg_replace("/| /", " ", $string);
 546    }
 547    elseif(preg_match("/|/")){
 548       $string = preg_replace("/|/", " ", $string);
 549    }
 550    
 551 	return $string;
 552    
 553 }
 554 
 555 function rmdirr($dir) {
 556    if($objs = glob($dir."/*")){
 557        foreach($objs as $obj) {
 558            is_dir($obj)? rmdirr($obj) : unlink($obj);
 559        }
 560    }
 561    rmdir($dir);
 562 }
 563 
 564 ?>
Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
You are not allowed to attach a file to this page.
MoinMoin: attachment:mw2moin.php.txt of MediaWikiConverter

Attachment 'mw2moin.php.txt'

Attached Files