#!/usr/bin/php TJ Fontaine This script allows the exporting of pages from MediaWiki to MoinMoin. Usage: chose the options you want and fill in the appropriate variables. Make sure you at the very least edit the database settings. MM_USER_ID requires at least one user be registered in moinmoin, you can find this number in wiki/data/user/ Once everything is setup run the script, then copy from $output_dir/* wiki/data/pages/ and mv wiki/data/pages/edit-log wiki/data/ Your MediaWiki pages and history should now be available to you, check wiki/Main_Page By default the script exports namespaces 0-3. It has been my experience that namespace 0 are normal editable pages, 1 is the normal pages Talk sections, 2 are the user pages, and 3 are the user page Talk sections. When filling in $IMPORT_PAGES if description is set it will export those pages to the description: (example) $IMPORT_PAGES['users-talk']['namespace'] = 3; $IMPORT_PAGES['users-talk']['description'] = "Talk"; will cause all pages in that namespace to be exported to User/Talk where as $IMPORT_PAGES['users-talk']['namespace'] = 3; $IMPORT_PAGES['users-talk']['description'] = "TalkAgain"; will cause all pages in that namespace to be exported to User/TalkAgain. Features: * Import Current Pages * Import By Namespace * Import Talk Pages (as Page/Talk) * Import Revision History * Import Images * Add "#format $parser" to header * Or make minimal changes to Wiki syntax Known Issues: * Changing the syntax on large sites will eat up memory that part of the code needs overhauled * Thumbnails aren't handled at all TODO: * Migrate Users * Map Users in revision history * Overhaul change_syntax * Image thumbnails ChangeLog: * 2006-01-12 TJ Fontaine - Removed nasty not_pages array - Import based on namespace - Import Talk Pages - Import images (uses find) - Import Revision History - Add Proper Revision Timestamp - Add Conditional Revision Import * Version 0.3 * 2007-11-07 David Huggins-Daines - Updated for Mediawiki 0.11 */ ######################## ## MediaWiki Options ## ######################## $MIGRATE_IMAGES = false;#set to true if you want to migrate images to moinmoin $MW_IMAGE_PATH = ""; #full path to mediawiki images $IMPORT_HISTORY = true;#set to false if you only want the current revision ######################## ## MoinMoin Options ## ######################## $MM_USER_ID = ""; #moinmoin userid to identify the importer by $ADD_MW_PARSER = true; #set to true to add #format $MW_PARSER to the begining #of every page if false script does minimal conversion #before hand the old code needs reworked, eats too much #memory leave this to true $MW_PARSER = "media"; #name of mediawiki parser in plugin/parser ######################## ## DB Settings ## ######################## $MW_TABLE_PREFIX = ""; #mediawiki database was installed with tables prefixed $host = ""; #mediawiki database server $usr = ""; #mediawiki database username $passwd = ""; #mediawiki database password $db = ""; #mediawiki database name ######################## ## Pages To Import ## ######################## $IMPORT_PAGES['regular']['namespace'] = 0; $IMPORT_PAGES['regular']['description'] = ""; $IMPORT_PAGES['regular-talk']['namespace'] = 1; $IMPORT_PAGES['regular-talk']['description'] = "Talk"; $IMPORT_PAGES['users']['namespace'] = 2; $IMPORT_PAGES['users']['description'] = ""; $IMPORT_PAGES['users-talk']['namespace'] = 3; $IMPORT_PAGES['users-talk']['description'] = "Talk"; ######################## ## Output Directory ## ######################## $output_dir = "mediawiki_pages"; #where the script will output the exported #pages /* DO NOT EDIT BELOW THIS LINE unless you think you know what you're doing ----------------------------------------------------- */ $link = mysql_pconnect($host,$usr,$passwd) or die(mysql_error()); mysql_select_db($db) or die("Could not select database"); $WIKI_LINK_START = "["; $WIKI_LINK_END = "]"; $EXTERNAL_LINK_START = "["; $EXTERNAL_LINK_END = "]"; $EXTERNAL_LINK_DIVIDER = " "; if(file_exists($output_dir)){ rmdirr($output_dir); mkdir($output_dir); } else{ mkdir($output_dir); } chdir("./$output_dir") or die; $EDIT_LOG = array(); foreach($IMPORT_PAGES as $pagetype) migrate_current_pages($pagetype['namespace'], $pagetype['description']); print "sorting Edit Log ..."; asort($EDIT_LOG); print "Done\n"; $edit_log = fopen("edit-log", "w"); foreach($EDIT_LOG as $entry) fputs($edit_log, $entry); fclose($edit_log); chdir(".."); ###End of Main function migrate_current_pages($page_namespace, $page_description = "") { $page_table = $GLOBALS['MW_TABLE_PREFIX']."page"; $text_table = $GLOBALS['MW_TABLE_PREFIX']."text"; $revision_table = $GLOBALS['MW_TABLE_PREFIX']."revision"; $curr_sql = "SELECT `$page_table`.page_title as ptitle, " . "`$page_table`.page_latest as revision, ". "`$page_table`.page_id as id, ". "`$text_table`.old_text as text, ". "`$page_table`.page_touched as timestamp " . "FROM `$page_table`, `$text_table`, `$revision_table` ". "WHERE `$revision_table`.rev_page = `$page_table`.page_id ". "AND `$revision_table`.rev_id = `$page_table`.page_latest ". "AND `$text_table`.old_id = `$revision_table`.rev_text_id ". "AND `$text_table`.old_text NOT LIKE \"MediaWiki default\" " . "AND page_namespace = '$page_namespace' " . ";"; $query = mysql_query($curr_sql) or die(mysql_error()); while ($row = mysql_fetch_object($query)) { if ($GLOBALS['IMPORT_HISTORY']) { $rev_sql = "SELECT `$page_table`.page_title as ptitle, " . "`$revision_table`.rev_id as revision, ". "`$text_table`.old_text as text, ". "`$page_table`.page_touched as timestamp " . "FROM `$page_table`, `$text_table`, `$revision_table` ". "WHERE `$page_table`.page_id = $row->id ". "AND `$revision_table`.rev_page = `$page_table`.page_id ". "AND `$text_table`.old_id = `$revision_table`.rev_text_id ". "AND `$text_table`.old_text NOT LIKE \"MediaWiki default\" " . "AND page_namespace = '$page_namespace' " . ";"; $rev_query = mysql_query($rev_sql) or die(mysql_error()); while ($rev_row = mysql_fetch_object($rev_query)) { migrate_page_row($rev_row, $page_description); } } else { migrate_page_row($row, $page_description); } } mysql_free_result($query); } function migrate_page_row($row, $desc) { $timestamp = $row->timestamp; $title = clean_title($row->ptitle); $text = $row->text; $revision = $row->revision; if(strlen($desc)) create_page($title."(2f)".$desc, $text, $timestamp, $revision); else create_page($title, $text, $timestamp, $revision); } function create_page($page_title, $page_text, $page_timestamp, $page_revision) { print 'create page '.$page_title.' revision '.$page_revision."\n"; @mkdir($page_title); chdir($page_title) or die($page_title); append_edit_log($page_title, $page_timestamp, $page_revision); $file = fopen("current", "w"); fputs($file, $page_revision); fclose($file); if($GLOBALS['MIGRATE_IMAGES']) migrate_images($page_text); @mkdir("revisions"); chdir("revisions") or die("revisions"); $file = fopen($page_revision, "w"); #break up one string into lines $file_text = explode("\n", $page_text); if($GLOBALS['ADD_MW_PARSER']) { $mw_parser = $GLOBALS['MW_PARSER']; fputs($file, "#format $mw_parser \n"); } else $file_text = change_syntax($file_text); $b = 0; while ($b < count($file_text)) { fputs($file, rtrim($file_text[$b]) . "\n"); $b++; } unset($file_text); fclose($file); chdir("..") or die(system('pwd')); #revision chdir("..") or die(system('pwd')); #page name } function append_edit_log($page_title, $timestamp, $revision) { $file = fopen('edit-log', 'a+'); if($revision == 0) $action = 'SAVENEW'; else $action = 'SAVE'; if(strlen($timestamp)) $tstamp = getStamp($timestamp); else $tstamp = uts(); $el_string = "$tstamp\t$revision\t$action\t$page_title\t" . "\tlocalhost\t".$GLOBALS['MM_USER_ID']."\n"; fputs($file, $el_string); $GLOBALS['EDIT_LOG'][$tstamp] = $el_string; fclose($file); } function uts(){ $Asec = explode(" ", microtime()); $Amicro = explode(".", $Asec[0]); return ($Asec[1].substr($Amicro[1], 0, 6)); } function getStamp($t) { $year = substr($t, 0, 4); $month = substr($t, 4, 2); $day = substr($t, 6, 2); $hour = substr($t, 8, 2); $min = substr($t, 11, 2); $sec = substr($t, 13, 2); $micro = mktime($hour, $min, $sec, $month, $day, $year); return sprintf("%-016s", $micro); } function migrate_images($page_text) { $mw_path = $GLOBALS['MW_IMAGE_PATH']; $image_matches = array(); $image_pat = "/\[\[Image:(.*)\]\]/"; if(preg_match_all($image_pat, $page_text, $image_matches)) { @mkdir("attachments"); for($z = 0; $z < count($image_matches[1]); $z++) { $image_file_name = strtok($image_matches[1][$z], '|'); if(!file_exists('attachments/'.$image_file_name)) { $find_string = "find $mw_path -type f -name \"". "$image_file_name\""; $image_file_path = system($find_string, $ret); if($ret) die($image_file_path); if(strlen($image_file_path)) { if(!copy($image_file_path, "./attachments/$image_file_name")) die("failed to copy $image_file_name\n"); print " added attachment: $image_file_name \n"; } } } } } function clean_title ($page_title) { $page_title = utf8_encode(str_replace(" ", "_", $page_title)); $quoted = array(); $in_parenthesis = false; for ($i = 0; $i < strlen($page_title); $i++) { $curchar = substr ($page_title, $i, 1); if (ereg('[^a-zA-Z0-9_]', $curchar)) { if (!$in_parenthesis) { $quoted[] = '('; $in_parenthesis = true; } $quoted[] = str_pad(dechex(ord($curchar)), 2, '0', STR_PAD_LEFT); } else { if ($in_parenthesis) { $quoted[] = ')'; $in_parenthesis = false; } $quoted[] = $curchar; } } if ($in_parenthesis) $quoted[] = ')'; $page_title = implode('', $quoted); unset($quoted); return $page_title; } function change_syntax ($textString) { #$a = 0; for($a = 0; $a < count($textString); $a++){ #print "str(before mod) = $textString[$a] \n"; #custom plugin #if(preg_match("/\.+\<\/fileshare\>/",$textString[$a])){ # $textString[$a] = fileShare($textString[$a]); #} #strpos : Returns the numeric position of the first occurrence of needle in the haystack string. Unlike the strrpos(), this function can take a full string as the needle parameter and the entire string will be used. #substr() returns the portion of string specified by the start and length parameters. #string substr ( string string, int start [, int length] ) if(substr($textString[$a], 0, 1) == '*'){ $textString[$a] = bullets($textString[$a]); } if(preg_match("/^#/",$textString[$a])){ $textString[$a] = numberedList( $textString[$a]); } #headings if(preg_match("/^==.+==/",$textString[$a])){ $textString[$a] = heading( $textString[$a]); } #wikilink if(preg_match("/\[\[.+\]\]/",$textString[$a])){ $textString[$a] = wikiLinks($textString[$a]); } #media wiki new line or
#must be after wiki links if (preg_match("/\/i", $textString[$a])) { $textString[$a] = preg_replace("/\\/i", "[[BR]]",$textString[$a]); #print "result = $textString[$a]\n"; } } return $textString; } #custom plugin #function fileShare($string) { # $fileshare = substr($string, strpos($string, "\\\\")); # $fileshare = preg_replace("/<\/fileshare>/","",$fileshare); # $string = "[file:" .$fileshare ."]"; # return $string; #} function heading($string){ $theHeading = $string; $headingLevel = 0; #strip the left side '=' chars while($headingLevel < strlen($theHeading)){ if(substr($theHeading, 0, 1) == '='){ $theHeading = substr($theHeading, 1); } else{ #no more ='s in front of text break; } $headingLevel++; } #the left side '=' chars are now removed #now strip right side '=' chars $theHeading = substr($theHeading, 0, strpos($theHeading, '=')); $theSyntax = ""; #note moinmoin uses 1 less = for heading levels #so mediawiki "===" is the same as moinmoin "==" for($i = 1; $i < $headingLevel; $i++){ $theSyntax .= "="; } $string = $theSyntax ." $theHeading " .$theSyntax; return $string; } function bullets ($string) { $a = 0; while ($a < strlen($string)) { $a++; if (substr($string, 1, 1) == "*") $string = substr($string, 1); else break; } while ($a > 0) { $string = " " . $string; $a--; } return $string; } function numberedList ($string) { if(preg_match("/^#/",$string)){ $string = preg_replace("/^#/", " 1.", $string); } elseif(preg_match("/^##/",$string)){ $string = preg_replace("/^##/", " 1.", $string); } return $string; } function wikiLinks ($string) { global $WIKI_LINK_START; global $WIKI_LINK_END; while (strpos($string, "[[") !== false && strpos($string, "]]") !== false) { #isolate link $link = substr($string, strpos($string, "[[") + 2); $link = substr($link, 0, strpos($link, "]]") + 0); if (strpos($link, "|") == false){ #add new link syntax $link = $WIKI_LINK_START ."\"". $link ."\"" .$WIKI_LINK_END; } else{ $dividerPosition = strpos($link, "|"); $wikilink = substr($link, 0, $dividerPosition); $label = substr($link, $dividerPosition + 1, strlen($link) - $dividerPosition); #remove whitespace from beginning and end $label = trim($label); $link = $WIKI_LINK_START .":" .$wikilink .": " .$label .$WIKI_LINK_END; } $string = substr($string, 0, strpos($string, "[[") - 0) . $link .substr($string, strpos($string, "]]") + 2); } return $string; } function externalLinks($string){ global $EXTERNAL_LINK_START; global $EXTERNAL_LINK_END; global $EXTERNAL_LINK_DIVIDER; #external link syntax is the same except for the label divider if(preg_match("/| /")){ $string = preg_replace("/| /", " ", $string); } elseif(preg_match("/|/")){ $string = preg_replace("/|/", " ", $string); } return $string; } function rmdirr($dir) { if($objs = glob($dir."/*")){ foreach($objs as $obj) { is_dir($obj)? rmdirr($obj) : unlink($obj); } } rmdir($dir); } ?>