Attachment 'mw0_11_2moin.php.txt'

Download

   1 #!/usr/bin/php
   2 <?php
   3 /*
   4 	copyright <original author>
   5 	TJ Fontaine <tjfontaine@gmail.com>
   6 
   7 	This script allows the exporting of pages from MediaWiki to MoinMoin.
   8 
   9 	Usage:
  10 		chose the options you want and fill in the appropriate 
  11 		variables. Make sure you at the very least edit the database
  12 		settings. 
  13 		
  14 		MM_USER_ID requires at least one user be registered in
  15 		moinmoin, you can find this number in wiki/data/user/
  16 
  17 		Once everything is setup run the script, then copy from
  18 		$output_dir/* wiki/data/pages/ and 
  19 		mv wiki/data/pages/edit-log wiki/data/
  20 
  21 		Your MediaWiki pages and history should now be available
  22 		to you, check wiki/Main_Page
  23 
  24 		By default the script exports namespaces 0-3. It has been
  25 		my experience that namespace 0 are normal editable pages,
  26 		1 is the normal pages Talk sections, 2 are the user pages,
  27 		and 3 are the user page Talk sections. When filling in
  28 		$IMPORT_PAGES if description is set it will export those
  29 		pages to the description:
  30 		
  31 		(example)
  32 			$IMPORT_PAGES['users-talk']['namespace'] = 3;
  33 			$IMPORT_PAGES['users-talk']['description'] = "Talk";
  34 
  35 		will cause all pages in that namespace to be exported to 
  36 		User/Talk where as
  37 
  38 			$IMPORT_PAGES['users-talk']['namespace'] = 3;
  39 			$IMPORT_PAGES['users-talk']['description'] = "TalkAgain";
  40 
  41 		will cause all pages in that namespace to be exported to
  42 		User/TalkAgain.
  43 
  44 	Features:
  45 		* Import Current Pages
  46 		* Import By Namespace
  47 		* Import Talk Pages (as Page/Talk)
  48 		* Import Revision History
  49 		* Import Images
  50 		* Add "#format $parser" to header
  51 		* Or make minimal changes to Wiki syntax
  52 
  53 	Known Issues:
  54 		* Changing the syntax on large sites will eat up memory
  55 		  that part of the code needs overhauled
  56 		* Thumbnails aren't handled at all
  57 		
  58 	TODO:
  59 		* Migrate Users
  60 		* Map Users in revision history
  61 		* Overhaul change_syntax
  62 		* Image thumbnails
  63 
  64 	ChangeLog:
  65 		* 2006-01-12 TJ Fontaine <tjfontaine@gmail.com>
  66 		  - Removed nasty not_pages array
  67 		  - Import based on namespace
  68 		  - Import Talk Pages
  69 		  - Import images (uses find)
  70 		  - Import Revision History
  71 		  - Add Proper Revision Timestamp
  72 		  - Add Conditional Revision Import
  73 
  74 		* Version 0.3
  75 
  76 		* 2007-11-07 David Huggins-Daines <dhuggins@cs.cmu.edu>
  77 		  - Updated for Mediawiki 0.11
  78 		  - Removed history for the time being (will put it back soon)
  79 */
  80 ########################
  81 ##  MediaWiki Options ##
  82 ########################
  83 
  84 $MIGRATE_IMAGES = false;#set to true if you want to migrate images to moinmoin
  85 $MW_IMAGE_PATH = "";	#full path to mediawiki images
  86 
  87 ########################
  88 ##  MoinMoin Options  ##
  89 ########################
  90 
  91 $MM_USER_ID = "";	#moinmoin userid to identify the importer by
  92 $ADD_MW_PARSER = true;	#set to true to add #format $MW_PARSER to the begining 
  93 			#of every page if false script does minimal conversion 
  94 			#before hand the old code needs reworked, eats too much
  95 			#memory leave this to true
  96 $MW_PARSER = "media";   #name of mediawiki parser in plugin/parser
  97 
  98 ########################
  99 ##  DB Settings       ##
 100 ########################
 101 
 102 $MW_TABLE_PREFIX = "";	#mediawiki database was installed with tables prefixed
 103 $host = "";		#mediawiki database server
 104 $usr = "";		#mediawiki database username
 105 $passwd = "";		#mediawiki database password
 106 $db = "";		#mediawiki database name
 107 
 108 ########################
 109 ##  Pages To Import   ##
 110 ########################
 111 
 112 $IMPORT_PAGES['regular']['namespace'] = 0;
 113 $IMPORT_PAGES['regular']['description'] = "";
 114 $IMPORT_PAGES['regular-talk']['namespace'] = 1;
 115 $IMPORT_PAGES['regular-talk']['description'] = "Talk";
 116 $IMPORT_PAGES['users']['namespace'] = 2;
 117 $IMPORT_PAGES['users']['description'] = "";
 118 $IMPORT_PAGES['users-talk']['namespace'] = 3;
 119 $IMPORT_PAGES['users-talk']['description'] = "Talk";
 120 
 121 ########################
 122 ##  Output Directory  ##
 123 ########################
 124 
 125 $output_dir = "mediawiki_pages"; #where the script will output the exported
 126 				 #pages
 127 
 128 /*
 129 	DO NOT EDIT BELOW THIS LINE
 130 	unless you think you know what you're doing
 131 -----------------------------------------------------
 132 */
 133 
 134 $link = mysql_pconnect($host,$usr,$passwd) or die(mysql_error());
 135 mysql_select_db($db) or die("Could not select database");
 136 
 137 $WIKI_LINK_START = "[";
 138 $WIKI_LINK_END = "]";
 139 $EXTERNAL_LINK_START = "[";
 140 $EXTERNAL_LINK_END = "]";
 141 $EXTERNAL_LINK_DIVIDER = " ";
 142 
 143 if(file_exists($output_dir)){
 144    rmdirr($output_dir);
 145    mkdir($output_dir);
 146 }
 147 else{
 148    mkdir($output_dir);
 149 }
 150 
 151 chdir("./$output_dir") or die;
 152 
 153 $EDIT_LOG = array();
 154 
 155 foreach($IMPORT_PAGES as $pagetype)
 156 	migrate_current_pages($pagetype['namespace'], $pagetype['description']);
 157 
 158 print "sorting Edit Log ...";
 159 asort($EDIT_LOG);
 160 print "Done\n";
 161 
 162 $edit_log = fopen("edit-log", "w");
 163 foreach($EDIT_LOG as $entry)
 164 	fputs($edit_log, $entry);
 165 fclose($edit_log);
 166 
 167 chdir("..");
 168 ###End of Main
 169 
 170 function migrate_current_pages($page_namespace, $page_description = "")
 171 {
 172 	$page_table = $GLOBALS['MW_TABLE_PREFIX']."page";
 173 	$text_table = $GLOBALS['MW_TABLE_PREFIX']."text";
 174 	$revision_table = $GLOBALS['MW_TABLE_PREFIX']."revision";
 175 
 176 	$curr_sql = "SELECT `$page_table`.page_title as ptitle, " .
 177 		"`$page_table`.page_latest as revision, ".
 178 		"`$text_table`.old_text as text, `$page_table`.page_touched as timestamp " .
 179 		"FROM `$page_table`, `$text_table`, `$revision_table` ".
 180 		"WHERE `$revision_table`.rev_id = `$page_table`.page_latest ".
 181 		"AND `$text_table`.old_id = `$revision_table`.rev_text_id ".
 182 		"AND `$text_table`.old_text NOT LIKE \"MediaWiki default\" " .
 183 		"AND page_namespace = '$page_namespace' " .
 184 		";";
 185 
 186 	$query = mysql_query($curr_sql) or die(mysql_error());
 187 
 188 	while ($row = mysql_fetch_object($query)) {
 189 		migrate_page_row($row, $page_description);
 190 	}
 191 	mysql_free_result($query);
 192 }
 193 
 194 function migrate_page_row($row, $desc)
 195 {
 196 	$timestamp = $row->timestamp;
 197 	$title = clean_title($row->ptitle);
 198 	$text = $row->text;
 199 	$revision = $row->revision;
 200 
 201 	if(strlen($desc))
 202 		create_page($title."(2f)".$desc, $text, $timestamp, $revision);
 203 	else
 204 		create_page($title, $text, $timestamp, $revision);
 205 }
 206 
 207 function create_page($page_title, $page_text, $page_timestamp, $page_revision)
 208 {
 209 	print 'create page '.$page_title.' revision '.$page_revision."\n";
 210 	
 211 	if(!is_dir($page_title))
 212 		mkdir($page_title) or die($page_title);
 213 
 214 	chdir($page_title) or die($page_title);
 215 	
 216 	append_edit_log($page_title, $page_timestamp, $page_revision);
 217 	
 218 	$file = fopen("current", "w");
 219 	fputs($file, $page_revision);
 220 	
 221 	fclose($file);
 222 	
 223 	if($GLOBALS['MIGRATE_IMAGES'])
 224 		migrate_images($page_text);
 225 	
 226 	if(!is_dir("revisions"))
 227 		mkdir("revisions") or die("revisions");	
 228 
 229 	chdir("revisions") or die("revisions");
 230 	
 231 	$file = fopen($page_revision, "w");
 232 	
 233 	#break up one string into lines
 234 	$file_text = explode("\n", $page_text);
 235 	
 236 	if($GLOBALS['ADD_MW_PARSER'])
 237 	{
 238 		$mw_parser = $GLOBALS['MW_PARSER'];
 239 		fputs($file, "#format $mw_parser \n");
 240 	}
 241 	else
 242 		$file_text = change_syntax($file_text);
 243 		
 244 	$b = 0;
 245 	
 246 	while ($b < count($file_text)) {
 247 		fputs($file, rtrim($file_text[$b]) . "\n");
 248 		$b++;
 249 	}
 250 	
 251 	unset($file_text);
 252 	fclose($file);
 253 	chdir("..") or die(system('pwd')); #revision
 254 	chdir("..") or die(system('pwd')); #page name
 255 }
 256 
 257 function append_edit_log($page_title, $timestamp, $revision)
 258 {
 259 	$file = fopen('edit-log', 'a+');
 260 
 261 	if($revision == 0)
 262 		$action = 'SAVENEW';
 263 	else
 264 		$action = 'SAVE';
 265 	
 266 	if(strlen($timestamp))
 267 		$tstamp = getStamp($timestamp);
 268 	else
 269 		$tstamp = uts();
 270 	
 271 	$el_string = "$tstamp\t$revision\t$action\t$page_title\t" .
 272 		"127.0.0.1\tlocalhost\t".$GLOBALS['MM_USER_ID']."\n";
 273 	
 274 	fputs($file, $el_string);
 275 
 276 	$GLOBALS['EDIT_LOG'][$tstamp] = $el_string;
 277 
 278 	fclose($file);
 279 }
 280 
 281 function uts(){
 282 	$Asec = explode(" ", microtime());
 283 	$Amicro = explode(".", $Asec[0]);
 284 	return ($Asec[1].substr($Amicro[1], 0, 6));
 285 }
 286 
 287 function getStamp($t)
 288 {
 289 	$year = substr($t, 0, 4);
 290 	$month = substr($t, 4, 2);
 291 	$day = substr($t, 6, 2);
 292 	$hour = substr($t, 8, 2);
 293 	$min = substr($t, 11, 2);
 294 	$sec = substr($t, 13, 2);
 295 	$micro = mktime($hour, $min, $sec, $month, $day, $year);
 296 	return sprintf("%-016s", $micro);
 297 }
 298 
 299 function migrate_images($page_text)
 300 {
 301 	$mw_path = $GLOBALS['MW_IMAGE_PATH'];
 302 	$image_matches = array();
 303 	$image_pat = "/\[\[Image:(.*)\]\]/";
 304 	if(preg_match_all($image_pat, $page_text, $image_matches))
 305 	{
 306 		if(!is_dir("attachments"))
 307 			mkdir("attachments");
 308 	
 309 		for($z = 0; $z < count($image_matches[1]); $z++)
 310 		{
 311 			$image_file_name = strtok($image_matches[1][$z], '|');
 312 			if(!file_exists('attachments/'.$image_file_name))
 313 			{
 314 				$find_string = "find $mw_path -type f -name \"".
 315 					"$image_file_name\"";
 316 				
 317 				$image_file_path = system($find_string, $ret);
 318 				if($ret) die($image_file_path);
 319 				if(strlen($image_file_path))
 320 				{
 321 					if(!copy($image_file_path, "./attachments/$image_file_name"))
 322 						die("failed to copy $image_file_name\n");
 323 					print " added attachment: $image_file_name \n";
 324 				}
 325 			}
 326 		}
 327 	}
 328 }
 329 
 330 function clean_title ($page_title)
 331 {
 332 	$page_title = utf8_encode(str_replace(" ", "_", $page_title));
 333 	$quoted = array();
 334 	$in_parenthesis = false;
 335 	for ($i = 0; $i < strlen($page_title); $i++)
 336 	{
 337 		$curchar = substr ($page_title, $i, 1);
 338 		if (ereg('[^a-zA-Z0-9_]', $curchar))
 339 		{
 340 			if (!$in_parenthesis)
 341 			{
 342 				$quoted[] = '(';
 343 				$in_parenthesis = true;
 344 			}
 345 			$quoted[] = str_pad(dechex(ord($curchar)),
 346 				2, '0', STR_PAD_LEFT);
 347 		} 
 348 		else 
 349 		{
 350 			if ($in_parenthesis) 
 351 			{
 352 				$quoted[] = ')';
 353 				$in_parenthesis = false;
 354 			}
 355 			$quoted[] = $curchar;
 356 		}
 357 	}
 358 	
 359 	if ($in_parenthesis)
 360 		$quoted[] = ')';
 361 	
 362 	$page_title = implode('', $quoted);
 363 	unset($quoted);
 364 	return $page_title;
 365 }
 366 
 367 function change_syntax ($textString) {
 368 	#$a = 0;
 369    
 370    for($a = 0; $a < count($textString); $a++){
 371       #print "str(before mod) = $textString[$a] \n";
 372       
 373       #custom plugin
 374       #if(preg_match("/\<fileshare\>.+\<\/fileshare\>/",$textString[$a])){
 375       #   $textString[$a] = fileShare($textString[$a]); 
 376       #}
 377       
 378       #strpos : Returns the numeric position of the first occurrence of needle in the haystack string. Unlike the strrpos(), this function can take a full string as the needle parameter and the entire string will be used.
 379       #substr() returns the portion of string  specified by the start and length parameters.
 380       #string substr ( string string, int start [, int length] )
 381       if(substr($textString[$a], 0, 1) == '*'){
 382          $textString[$a] = bullets($textString[$a]);
 383       }
 384       
 385       if(preg_match("/^#/",$textString[$a])){ 
 386          $textString[$a] = numberedList( $textString[$a]); 
 387       }
 388       
 389       #headings
 390       if(preg_match("/^==.+==/",$textString[$a])){ 
 391          $textString[$a] = heading( $textString[$a]); 
 392       }
 393       
 394       #wikilink
 395       if(preg_match("/\[\[.+\]\]/",$textString[$a])){
 396 			$textString[$a] = wikiLinks($textString[$a]);
 397       }
 398 
 399       #media wiki new line <br\> or <BR>
 400       #must be after wiki links 
 401       if (preg_match("/\<br\/{0,1}\>/i", $textString[$a])) { 
 402          $textString[$a] = preg_replace("/\\<br\/{0,1}\>/i", "[[BR]]",$textString[$a]);
 403          #print "result = $textString[$a]\n";
 404       }
 405    }
 406 
 407 	return $textString;
 408 }
 409 
 410 
 411 
 412 #custom plugin
 413 #function fileShare($string) {
 414 #   $fileshare = substr($string, strpos($string, "\\\\"));
 415 #   $fileshare = preg_replace("/<\/fileshare>/","",$fileshare);
 416 #   $string = "[file:" .$fileshare ."]";
 417 #   return $string;
 418 #}
 419 
 420 function heading($string){
 421    $theHeading = $string;
 422    $headingLevel = 0;
 423    
 424    #strip the left side '=' chars
 425    while($headingLevel < strlen($theHeading)){
 426       if(substr($theHeading, 0, 1) == '='){
 427          $theHeading = substr($theHeading, 1);
 428       }
 429       else{ 
 430          #no more ='s in front of text
 431          break; 
 432       }
 433       $headingLevel++;
 434    }
 435    
 436    #the left side '=' chars are now removed
 437    #now strip right side '=' chars
 438    $theHeading = substr($theHeading, 0, strpos($theHeading, '='));
 439    
 440    $theSyntax = "";
 441    #note moinmoin uses 1 less = for heading levels
 442    #so mediawiki "===" is the same as moinmoin "=="
 443    for($i = 1; $i < $headingLevel; $i++){
 444       $theSyntax .= "=";  
 445    }
 446       
 447    $string = $theSyntax ." $theHeading " .$theSyntax;
 448 
 449    return $string;
 450 }
 451 
 452 
 453 function bullets ($string) {
 454 	$a = 0;
 455 	while ($a < strlen($string)) {
 456 		$a++;
 457 		if (substr($string, 1, 1) == "*")
 458 			$string = substr($string, 1);
 459 		else
 460 			break;
 461 	}
 462 	while ($a > 0) {
 463 		$string = " " . $string;
 464 		$a--;
 465    }
 466 	return $string;
 467 }
 468 
 469 function numberedList ($string) {
 470 	if(preg_match("/^#/",$string)){
 471       $string = preg_replace("/^#/", " 1.", $string);
 472    }
 473    elseif(preg_match("/^##/",$string)){
 474       $string = preg_replace("/^##/", "  1.", $string);
 475    }
 476 	return $string;
 477 }
 478 
 479 
 480 function wikiLinks ($string) {
 481    global $WIKI_LINK_START;
 482    global $WIKI_LINK_END;
 483    
 484 	while (strpos($string, "[[") !== false && strpos($string, "]]") !== false) {
 485       #isolate link
 486 		$link = substr($string, strpos($string, "[[") + 2);
 487 		$link = substr($link, 0, strpos($link, "]]") + 0);
 488       
 489       if (strpos($link, "|") == false){
 490          #add new link syntax
 491          $link = $WIKI_LINK_START ."\"". $link ."\"" .$WIKI_LINK_END;
 492       }
 493       else{
 494          $dividerPosition = strpos($link, "|");
 495          
 496          $wikilink = substr($link, 0, $dividerPosition);
 497          $label = substr($link, $dividerPosition + 1, strlen($link) - $dividerPosition);
 498          
 499          #remove whitespace from beginning and end
 500          $label = trim($label);
 501          
 502          $link = $WIKI_LINK_START .":" .$wikilink .": "  .$label .$WIKI_LINK_END;
 503       }
 504 
 505 		$string = substr($string, 0, strpos($string, "[[") - 0) . $link .substr($string, strpos($string, "]]") + 2);
 506    }
 507    
 508 	return $string;
 509 }
 510 
 511 
 512 function externalLinks($string){
 513    global $EXTERNAL_LINK_START;
 514    global $EXTERNAL_LINK_END;
 515    global $EXTERNAL_LINK_DIVIDER;
 516    
 517    #external link syntax is the same except for the label divider
 518    
 519    if(preg_match("/| /")){
 520       $string = preg_replace("/| /", " ", $string);
 521    }
 522    elseif(preg_match("/|/")){
 523       $string = preg_replace("/|/", " ", $string);
 524    }
 525    
 526 	return $string;
 527    
 528 }
 529 
 530 function rmdirr($dir) {
 531    if($objs = glob($dir."/*")){
 532        foreach($objs as $obj) {
 533            is_dir($obj)? rmdirr($obj) : unlink($obj);
 534        }
 535    }
 536    rmdir($dir);
 537 }
 538 
 539 ?>

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2005-11-18 00:01:54, 17.8 KB) [[attachment:mediawiki2moin-v0.3.php.txt]]
  • [get | view] (2005-02-21 18:59:32, 4.6 KB) [[attachment:mediawiki2moin.php.gz]]
  • [get | view] (2007-11-07 16:57:07, 13.8 KB) [[attachment:mw0_11_2moin.php.txt]]
  • [get | view] (2007-11-07 17:15:36, 14.7 KB) [[attachment:mw112moin.php.txt]]
  • [get | view] (2006-01-12 21:02:29, 14.1 KB) [[attachment:mw2moin.php.txt]]
  • [get | view] (2007-11-07 17:27:00, 14.8 KB) [[attachment:mw_11_2_moin.php.txt]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.