attachment:vqwiki_to_moin_v2.php of VeryQuickWikiConverter

Attachment 'vqwiki_to_moin_v2.php'

   1 #!/usr/local/bin/php
   2 
   3 # Copy data from vqWiki to MoinMoin wiki
   4 #  Jeff Olson <jeff@olsonzoo.com> - October 20, 2005   
   5 #  Based on "mediawiki2moin.php" from http://moinmoin.wikiwikiweb.de/MediaWikiConverter
   6 #  
   7 #  Version 1: 10/20/2005
   8 #  Version 2: 1/20/2006
   9 #
  10 #  - Copies all current and historical versions
  11 #  - Copies attachments
  12 #  - Tries to convert all wiki markup
  13 #
  14 
  15 <?php
  16 
  17 # Set these variables.  
  18 # - $inputDir is location of vqWiki data files
  19 # - $outputDir is location where MoinMoin pages directory is located...
  20 #   WARNING! Any existing pages in $outputDir will be deleted if they exist in $inputDir
  21 # - $ip is IP address to use in edit-log 
  22 # - $address is DNS name of server to use in edit log
  23 
  24 $inputDir = '/home/wiki/wiki';
  25 $outputDir = '/codeswiki/data';
  26 $ip = 0.0.0.0;
  27 $address = something.example.com;
  28 
  29 echo "*****\n\nReading Existing Files\n*****\n";
  30 
  31 # This might run a while, so set time limit high (1 hour?)
  32 set_time_limit(3600);
  33 
  34 # Read input data from vqWiki
  35 $a = 0;
  36 if ($handle = opendir($inputDir)) 
  37 {
  38 	echo "Directory handle: $handle\n";
  39 	echo "Files:\n";
  40 
  41 	/* This is the correct way to loop over the directory. */
  42 	while (false !== ($file = readdir($handle))) 
  43 	{
  44 		#$fullPath = $inputDir . "/" . $file;
  45 		#$fp = fopen($fullPath, "r");
  46 		
  47 		if (fnmatch("*.txt", $file) 
  48 			#&& fnmatch("S*.txt", $file)  # uncomment if testing on specific files
  49 		   ) 
  50 		{
  51 			#echo "$file\n";
  52 			$title[$a] = preg_replace("/\.txt/", "", $file);
  53 			$a++;
  54 		}
  55 	}
  56 
  57 	closedir($handle);
  58 }
  59 
  60 # Get all revisions 
  61 $versionsDir = "$inputDir/versions";
  62 chdir($versionsDir);
  63 for ($i = 0; $i < count($title); $i++)
  64 {
  65 	echo "$title[$i]............................";
  66 	$revisionTitle[$i] = glob("$title[$i].txt.*");
  67 	#print_r($revisionTitle[$i]);
  68 	echo count($revisionTitle[$i]) . " versions found\n";
  69 
  70 		$fullPath = $inputDir . "/" . $file;
  71 		$fp = fopen($fullPath, "r");
  72 
  73 }
  74 
  75 
  76 # Go to output directory for MoinMoin wiki
  77 echo "\n\n*****\nCreating New Files\n*****\n";
  78 chdir($outputDir) or die;
  79 chdir("pages") or die;
  80 
  81 $count = count($title);
  82 for ($i = 0; $i < $count; $i++) 
  83 {
  84 	echo "\n$i: $title[$i]\n";
  85 	
  86 	# Fix title
  87 	$title[$i] = fix_title($title[$i]);
  88 	echo "\tfixed: $title[$i]\n";
  89 	
  90 	# Delete existing directory for specific page
  91 	#echo "deleting any existing folder with name $title[$i]\n";
  92 	system("rm -rf \"$title[$i]\"");
  93 
  94 	# Remake directory for specific page
  95 	#echo "trying to make $title[$i]\n";
  96 	mkdir($title[$i]) or die;
  97 	
  98 	# Change to specific page directory
  99 	#echo "trying to change to $title[$i]\n";
 100 	chdir($title[$i]) or die;
 101 	
 102 	#echo "current dir: " . getcwd() . "\n";
 103 	
 104 	# Write out current revision to "current" file
 105 	#  - pad to 8 digits
 106 	$numberOfRevisions = count($revisionTitle[$i]);
 107 	$lastRevisionNumber = sprintf("%08d", $numberOfRevisions);
 108 	$file = fopen("current", "w");
 109 	fputs($file, $lastRevisionNumber);
 110 	fclose($file);
 111 
 112 	# Create an "edit-log" file and keep it open to write out our info to it
 113 	$editLogFile = fopen("edit-log", "w");
 114 
 115 	# Create "revisions" directory & change to it
 116 	mkdir("revisions") or die;
 117 	chdir("revisions") or die;
 118 	#echo "current dir: " . getcwd() . "\n";
 119 	
 120 	echo "\tVersions: ";
 121 	
 122 	# Go through each version of the page
 123 	for ($j = 0; $j < count($revisionTitle[$i]); $j++)
 124 	{
 125 		echo ($j + 1) . "...";
 126 		#echo "\tFilename = " . $revisionTitle[$i][$j] . "\n";
 127 		
 128 		# get file contents
 129 		$fullPath = $versionsDir . "/" . $revisionTitle[$i][$j];
 130 		$fp = fopen($fullPath, "r");
 131 		$file_text = "";
 132 		if (filesize($fullPath) > 0)
 133 		{
 134 			$file_text = fread($fp, filesize($fullPath));
 135 		}
 136 		$file_text = explode("\n", $file_text);
 137 		
 138 		# Copy text from old to new, fixing syntax as we go 
 139 		# - also pass in title for copying attachments and input & output directory to copy them
 140 		$file_text = change_syntax($file_text, $title[$i], $inputDir, $outputDir);
 141 		
 142 		# open file for writing -- each file is a eight-digit zero-padded number, starting with 1 (so use $j+1)
 143 		$revisionNumber = sprintf("%08d", $j+1);
 144 		$file = fopen($revisionNumber, "w");
 145 		
 146 		# Create output file
 147 		$k = 0;
 148 		while ($k < count($file_text)) 
 149 		{
 150 			fputs($file, rtrim($file_text[$k]) . "\n");
 151 			$k++;
 152 		}
 153 		unset($file_text);
 154 		
 155 		# Close file
 156 		fclose($file);
 157 		
 158 		# get modification time from filename (format is filename.yyyy.mm.dd.hh.MM.ss)
 159 		preg_match("/(\d{4})\.(\d{2})\.(\d{2})\.(\d{2})\.(\d{2})\.(\d{2})/", $revisionTitle[$i][$j], $modTimeString);
 160 #		echo $modTimeString[0] . "\n";
 161 
 162 		$year = $modTimeString[1];
 163 		$month = $modTimeString[2];
 164 		$day = $modTimeString[3];
 165 		$hour = $modTimeString[4];
 166 		$minute = $modTimeString[5];
 167 		$second = $modTimeString[6];
 168 		
 169 		$modTime = mktime($hour, $minute, $second, $month, $day, $year);
 170 #		echo $modTime . "\n\n";
 171 		
 172 		# touch file (named "revisionNumber") to correct modification time
 173 		touch($revisionNumber, $modTime);
 174 		
 175 		# Write entry in edit-log file
 176 		$time = $modTime . "000000";
 177 		$j == 0 ? $label = "SAVENEW" : $label = "SAVE";
 178 		$entry = "$time\t$revisionNumber\t$label\t$title[$i]\t$ip\t$address\n";
 179 		fputs($editLogFile, $entry);
 180 
 181 	}
 182 	echo "\n";
 183 	
 184 	# Close edit-log file
 185 	fclose($editLogFile);
 186 	
 187 	# Back up to specific page directory
 188 	chdir("..") or die;
 189 	#echo "current dir: " . getcwd() . "\n";
 190 
 191 	# chown & chmod to set correct permissions (this assumes we're running script as root)
 192 	system("chown -R apache:apache .");
 193 	system("chmod -R g+w .");
 194 	system("chmod -R o-rx .");
 195 	
 196 	# Back up to "pages" directory 
 197 	chdir("..") or die;
 198 }
 199 
 200 # Back up to Moin directory
 201 chdir("..") or die;
 202 
 203 function fix_title($title)
 204 {
 205 	$title = utf8_encode(str_replace(" ", "_", $title));
 206 	$title = utf8_encode(str_replace("+", "_", $title));
 207 	return $title;
 208 }
 209 
 210 function change_syntax ($array, $pageTitle, $inputDir, $outputDir) 
 211 {
 212 	# initialize
 213 	$in_preformatted_text = 0;
 214 	$in_multiple_line_code = 0;	
 215 	$in_multiple_line_java_code = 0;
 216 	$in_multiple_line_html_code = 0;
 217 
 218 	# patterns
 219 	$java_start_tag_pattern = "/\[<java>\]/";
 220 	$java_end_tag_pattern = "/\[<\/java>\]/";
 221 	$html_start_tag_pattern = "/\[<html>\]/";
 222 	$html_end_tag_pattern = "/\[<\/html>\]/";
 223 	
 224 	for ($a = 0; $a < count($array); $a++) 
 225 	{
 226 		# assign row as a reference to current array item
 227 		$row =& $array[$a];
 228 		
 229 		# Handle multiple-line preformatted text
 230 		if ($in_preformatted_text) 
 231 		{
 232 			# found the end?
 233 			if (preg_match("/^\s*$/", $row)) 
 234 			{
 235 				$row = "}}}";
 236 				$in_preformatted_text = 0;
 237 			}
 238 			else 
 239 			{
 240 				# do nothing - skip all other substitutions
 241 				continue;		
 242 			}
 243 		}
 244 
 245 		# Handle multiple-line code
 246 		elseif ($in_multiple_line_code)
 247 		{
 248 			# found the end?
 249 			if (preg_match("/}}}/", $row)) 
 250 			{
 251 				$in_multiple_line_code = 0;
 252 			}
 253 			else 
 254 			{
 255 				# do nothing - skip all other substitutions
 256 				continue;		
 257 			}
 258 		}
 259 
 260 		# Handle multiple-line java code
 261 		elseif ($in_multiple_line_java_code)
 262 		{
 263 			# found the end?
 264 			if (preg_match($java_end_tag_pattern, $row)) 
 265 			{
 266 				$row  = preg_replace($java_end_tag_pattern, "}}}", $row);
 267 				$in_multiple_line_java_code = 0;
 268 			}
 269 			else 
 270 			{
 271 				# do nothing - skip all other substitutions
 272 				continue;		
 273 			}			
 274 		}
 275 
 276 		# Handle multiple-line html code
 277 		elseif ($in_multiple_line_html_code)
 278 		{
 279 			# found the end?
 280 			if (preg_match($html_end_tag_pattern, $row)) 
 281 			{
 282 				$row  = preg_replace($html_end_tag_pattern, "}}}", $row);
 283 				$in_multiple_line_html_code = 0;
 284 			}
 285 			else 
 286 			{
 287 				# do nothing - skip all other substitutions
 288 				continue;		
 289 			}			
 290 		}
 291 
 292 		# Not in multiple-line preformatted text or multiple-line code block
 293 		else 
 294 		{
 295 			# Preformatted text - @@@@ on line by self, but ending on another line where it's all blank
 296 			if (preg_match("/^@@@@\s*$/", $row)) 
 297 			{
 298 				$row  = preg_replace("/^\s*@@@@\s*$/", "{{{", $row);	
 299 				$in_preformatted_text = 1;	
 300 
 301 				# Don't do any more processing on this line
 302 				continue;
 303 			}
 304 			
 305 			# Code - {{{ xxx }}} - may be on same or different lines
 306 			if (preg_match("/{{{/", $row)) 
 307 			{
 308 				# if we don't find the closing braces, we are in a multiple-line code situation
 309 				if (!preg_match("/}}}/", $row)) 
 310 				{
 311 					$in_multiple_line_code = 1;	
 312 				}
 313 
 314 				# Don't do any more processing on this line
 315 				continue;
 316 			}
 317 			
 318 			# Java Code - [<java>] xxx [</java>] - may be on same or different lines
 319 			if (preg_match($java_start_tag_pattern, $row)) 
 320 			{
 321 				$row  = preg_replace($java_start_tag_pattern, "{{{#!java", $row);
 322 
 323 				# if we don't find the closing tag, we are in a multiple-line java code situation
 324 				if (!preg_match($java_end_tag_pattern, $row)) 
 325 				{
 326 					$in_multiple_line_java_code = 1;	
 327 				}
 328 				# otherwise, replace end tag
 329 				else
 330 				{
 331 					$row  = preg_replace($java_end_tag_pattern, "}}}", $row);
 332 				}
 333 
 334 				# also add line break after open tag
 335 				$row  = preg_replace("/{{{#!java/", "{{{#!java\n",  $row);
 336 
 337 				# Don't do any more processing on this line
 338 				continue;
 339 			}
 340 			
 341 			# HTML Code - [<html>] xxx [</html>] - may be on same or different lines
 342 			if (preg_match($html_start_tag_pattern, $row)) 
 343 			{
 344 				#echo "in html\n";
 345 				$row  = preg_replace($html_start_tag_pattern, "{{{#!html", $row);
 346 
 347 				# if we don't find the closing tag, we are in a multiple-line html code situation
 348 				if (!preg_match($html_end_tag_pattern, $row)) 
 349 				{
 350 					$in_multiple_line_html_code = 1;	
 351 				}
 352 				# otherwise, replace end tag
 353 				else
 354 				{
 355 					$row  = preg_replace($html_end_tag_pattern, "}}}", $row);
 356 
 357 				}
 358 				# also add line break after open tag
 359 				$row  = preg_replace("/{{{#!html/", "{{{#!html\n",  $row);
 360 
 361 				#echo "$row\n";
 362 
 363 				# Don't do any more processing on this line
 364 				continue;
 365 			}
 366 
 367 			# Tables
 368 			$row  = preg_replace("/####/", "", $row );                  # don't need these
 369 			$row  = preg_replace("/^([^#]+)##/", "||$1||", $row, 1);    # add 1st column start marker
 370 			$row  = preg_replace("/##/", "||", $row );                  # all other markers
 371 			
 372 			# Backtick links: `link` => ["link"] - must come before 'No formatting code'
 373 			$row  = preg_replace("/`([^`]+)`/", "[\"$1\"]", $row);     	
 374 			
 375 			# C2 links
 376 			$row  = preg_replace("/c2:/", "wiki:Wiki:", $row);     	
 377 			
 378 			# No formatting code (__) - must come before underline conversion step
 379 			$row  = preg_replace("/__([^_]+)__/", "`$1`", $row);    
 380 			
 381 			# Underline: ===text=== => __text__ (must come before headings)
 382 			$row  = preg_replace("/===([^=]+)===/", "__$1__", $row);    # underline
 383 			
 384 			# Headings
 385 			$row  = preg_replace("/!!!([^!]+)!!!/", "= $1 =", $row);    # heading level 1
 386 			$row  = preg_replace("/!!([^!]+)!!/", "== $1 ==", $row);    # heading level 2
 387 			$row  = preg_replace("/!([^!]+)!/", "=== $1 ===", $row);    # heading level 3
 388 			
 389 			# Bulleted Lists: (there may be a better way to do this)
 390 			$row  = preg_replace("/^\t\*/", " * ", $row);     			# bullet indented 1
 391 			$row  = preg_replace("/^\t\t\*/", "   * ", $row);     			# bullet indented 2
 392 			$row  = preg_replace("/^\t\t\t\*/", "     * ", $row);     			# bullet indented 3
 393 	
 394 			# Numbered Lists: # => 1.  (note: there may be a better way to do this)
 395 			$row  = preg_replace("/^\t\#/", " 1. ", $row);     			# item indented 1
 396 			$row  = preg_replace("/^\t\t\#/", "   1. ", $row);     			# item indented 2
 397 			$row  = preg_replace("/^\t\t\t\#/", "     1. ", $row);     			# item indented 3
 398 			
 399 			# Line breaks inside lists
 400 #			echo "$row\n";
 401 			if (preg_match("/^(\s*)(1\.|\*)(.*)@@/", $row, $matches)) 
 402 			{
 403 				$leadingSpaces = $matches[1];
 404 				#echo "spaces: ->$leadingSpaces<- \n";
 405 				#echo "before: $row\n";
 406 				$row  = preg_replace("/@@/", "\n$leadingSpaces", $row);  # add two spaces
 407 				#echo "after : $row\n";
 408 			}
 409 			
 410 			# Other line breaks - appearing anywhere else
 411 			$row  = preg_replace("/@@/", " [[BR]] ", $row);
 412 			
 413 			# Attachments: attach: -> attachment:   - Also copy attachments to new wiki
 414 			# does not handle attachments in this format:  attach:"File name with spaces" -- fix those manually
 415 			
 416 			$attachmentPattern = "/attach:([\w.-]+)/"; # this is not a complete filename regex, but works for me!!!
 417 			
 418 			if (preg_match($attachmentPattern, $row, $attachmentMatches))
 419 			{
 420 				# Fix syntax
 421 				$row  = preg_replace($attachmentPattern, "attachment:$1", $row);
 422 				
 423 				# Copy file attachments: note this assumes there is only one attachment per line!!
 424 				$attachmentFilename = $attachmentMatches[1];
 425 				
 426 				$existingLocation = "$inputDir/upload/jsp/$attachmentFilename";
 427 				#echo "existing location: $existingLocation\n";
 428 				
 429 				$newDirectory = "$outputDir/pages/$pageTitle/attachments"; 
 430 				$newLocation = "$newDirectory/$attachmentFilename";
 431 				#echo "new location: $newLocation\n";
 432 
 433 				if (!is_dir($newDirectory))
 434 				{
 435 					#echo "making new attachments directory: $newDirectory\n";
 436 					mkdir($newDirectory);
 437 				}
 438 				#echo "current dir: " . getcwd() . "\n";
 439 				echo "\tattachment...$attachmentFilename\n";
 440 				copy($existingLocation, $newLocation);
 441 			}
 442 			
 443 	
 444 			# Horizontal rules - no conversion necessary
 445 			
 446 			# Bold/italic - no conversion necessary
 447 
 448 
 449 			# Handle line break issue
 450 			# Look at next line
 451 			if ($a+1 < count($array)) # only proceed if there are more lines
 452 			{  
 453 				
 454 				$nextRow = $array[$a+1];
 455 				$emptyRowPattern = "/^\s*$/";
 456 				
 457 				# figure out if we should add a line break - only if all of these conditions are met
 458 				if (!preg_match($emptyRowPattern, $row)               # current row is not empty
 459 					&& !preg_match("/----/", $row)               	  # current row does not have horizontal rule
 460 					&& !preg_match("/=+[^=]+=+/", $row)               # current row is not a heading
 461 					&& !preg_match($emptyRowPattern, $nextRow)        # next row is not empty
 462 					&& !preg_match("/^\t+[\*\#]/", $nextRow)          # next row doesn't start with bullet or numbered item
 463 					&& !preg_match("/##/", $nextRow)           		  # next row doesn't contain table markup
 464 				   )    
 465 				{
 466 					# only if all above conditions are met do we add a break
 467 					$row .= " [[BR]]"; # include space before to prevent "Java:[[BR]] making an Interwiki link, among other things
 468 				}
 469 			}
 470 		}		
 471 
 472 	}
 473 	return $array;
 474 }
 475 
 476 
 477 # Code to fix titles that I did not need
 478 /*	$quoted = array();
 479 	$in_parenthesis = false;
 480 	for ($i = 0; $i < strlen($title[$a]); $i++) 
 481 	{
 482 		$curchar = substr ($title[$a], $i, 1);
 483 		if (ereg('[^a-zA-Z0-9_]', $curchar)) 
 484 		{
 485 			if (!$in_parenthesis) 
 486 			{
 487 				$quoted[] = '(';
 488 				$in_parenthesis = true;
 489 			}
 490 			$quoted[] = str_pad(dechex(ord($curchar)), 2, '0', STR_PAD_LEFT);
 491 		} 
 492 		else 
 493 		{
 494 			if ($in_parenthesis) 
 495 			{
 496 				$quoted[] = ')';
 497 				$in_parenthesis = false;
 498 			}
 499 			$quoted[] = $curchar;
 500 		}
 501 	}
 502 	if ($in_parenthesis)
 503 	{
 504 		$quoted[] = ')';
 505 	}
 506 	$title[$a] = implode('', $quoted);
 507 	unset($quoted);
 508 */
 509 ?>
Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
You are not allowed to attach a file to this page.
MoinMoin: attachment:vqwiki_to_moin_v2.php of VeryQuickWikiConverter

Attachment 'vqwiki_to_moin_v2.php'

Attached Files