Attachment 'vqwiki_to_moin.php'

Download

   1 #!/usr/local/bin/php
   2 
   3 # Copy data from vqWiki to MoinMoin wiki
   4 #  Jeff Olson <jeff@olsonzoo.com> - October 20, 2005   
   5 #  Based on "mediawiki2moin.php" from http://moinmoin.wikiwikiweb.de/MediaWikiConverter
   6 
   7 <?php
   8 
   9 # Set these variables.  
  10 # - $inputDir is location of vqWiki data files
  11 # - $outputDir is location where MoinMoin pages directory is located...
  12 #   WARNING! Any existing pages in $outputDir will be deleted if they exist in $inputDir
  13 
  14 $inputDir = '/home/wiki/wiki';
  15 $outputDir = '/codeswiki/data';
  16 
  17 echo "*****\n\nReading Existing Files\n*****\n";
  18 
  19 # Read input data from vqWiki
  20 $a = 0;
  21 if ($handle = opendir($inputDir)) 
  22 {
  23 	echo "Directory handle: $handle\n";
  24 	echo "Files:\n";
  25 
  26 	/* This is the correct way to loop over the directory. */
  27 	while (false !== ($file = readdir($handle))) 
  28 	{
  29 		$fullPath = $inputDir . "/" . $file;
  30 		$fp = fopen($fullPath, "r");
  31 		if (fnmatch("*.txt", $file) 
  32 			#&& fnmatch("Data+Migrations.txt", $file)  # uncomment if testing on specific files
  33 		   ) 
  34 		{
  35 			echo "$file\n";
  36 			$title[$a] = preg_replace("/\.txt/", "", $file);
  37 			#echo "\t$a -> $title[$a]\n";
  38 			if (filesize($fullPath) > 0)
  39 			{
  40 				$text[$a] = fread($fp, filesize($fullPath));
  41 			}
  42 			else 
  43 			{
  44 				$text[$a] = "";
  45 			}
  46 			#echo $text[$a] . "\n\n";
  47 			$a++;
  48 		}
  49 	}
  50 
  51 	closedir($handle);
  52 }
  53 
  54 # Get historical versions - still needs some work
  55 #$versionsDir = "$inputDir/versions";
  56 #chdir($versionsDir);
  57 #for ($i = 0; $i < count($title); $i++)
  58 #{
  59 #	$historical[$i] = glob("$title[$i].txt.*");
  60 #	print_r($historical[$i]);
  61 #	echo "\t" . count($historical[$i]) . " versions found\n";
  62 #}
  63 
  64 
  65 # Go to output directory for MoinMoin wiki
  66 echo "\n\n*****\nCreating New Files\n*****\n";
  67 chdir($outputDir) or die;
  68 chdir("pages") or die;
  69 
  70 $count = count($title);
  71 for ($a = 0; $a < $count; $a++) 
  72 {
  73 	echo "$a: $title[$a]\n";
  74 	
  75 	# Fix title
  76 	$title[$a] = fix_title($title[$a]);
  77 	echo "\tfixed: $title[$a]\n";
  78 	
  79 	# Parse historical versions for date & time, ip address
  80 	# TODO
  81 	
  82 	# Delete existing folder for page
  83 	#echo "deleting any existing folder with name $title[$a]\n";
  84 	system("rm -rf \"$title[$a]\"");
  85 
  86 	# Remake folder for page
  87 	#echo "trying to make $title[$a]\n";
  88 	mkdir($title[$a]) or die;
  89 	
  90 	#echo "trying to change to $title[$a]\n";
  91 	chdir($title[$a]) or die;
  92 	
  93 	#echo "current dir: " . getcwd() . "\n";
  94 	
  95 	# Write out needed files & directories
  96 	$file = fopen("current", "w");
  97 	fputs($file, "00000001");
  98 	fclose($file);
  99 	mkdir("revisions") or die;
 100 	chdir("revisions") or die;
 101 	#echo "current dir: " . getcwd() . "\n";
 102 
 103 	$file = fopen("00000001", "w");
 104 	$file_text = explode("\n", $text[$a]);
 105 	
 106 	
 107 	# Copy text from old to new, fixing syntax as we go 
 108 	# - also pass in title for copying attachments and input & output directory to copy them
 109 	$file_text = change_syntax($file_text, $title[$a], $inputDir, $outputDir);
 110 	
 111 	# Create output file
 112 	$b = 0;
 113 	while ($b < count($file_text)) 
 114 	{
 115 		fputs($file, rtrim($file_text[$b]) . "\n");
 116 		$b++;
 117 	}
 118 	unset($file_text);
 119 	fclose($file);
 120 	
 121 	chdir("..") or die;
 122 	#echo "current dir: " . getcwd() . "\n";
 123 
 124 	# chown & chmod to set correct permissions (this assumes we're running script as root)
 125 	system("chown -R apache:apache .");
 126 	system("chmod -R g+w .");
 127 	system("chmod -R o-rx .");
 128 
 129 	chdir("..") or die;
 130 }
 131 chdir("..") or die;
 132 
 133 function fix_title($title)
 134 {
 135 	$title = utf8_encode(str_replace(" ", "_", $title));
 136 	$title = utf8_encode(str_replace("+", "_", $title));
 137 	return $title;
 138 }
 139 
 140 function change_syntax ($array, $pageTitle, $inputDir, $outputDir) 
 141 {
 142 	# initialize
 143 	$in_preformatted_text = 0;
 144 	$in_multiple_line_code = 0;	
 145 	$in_multiple_line_java_code = 0;
 146 	$in_multiple_line_html_code = 0;
 147 
 148 	# patterns
 149 	$java_start_tag_pattern = "/\[<java>\]/";
 150 	$java_end_tag_pattern = "/\[<\/java>\]/";
 151 	$html_start_tag_pattern = "/\[<html>\]/";
 152 	$html_end_tag_pattern = "/\[<\/html>\]/";
 153 	
 154 	for ($a = 0; $a < count($array); $a++) 
 155 	{
 156 		# assign row as a reference to current array item
 157 		$row =& $array[$a];
 158 		
 159 		# Handle multiple-line preformatted text
 160 		if ($in_preformatted_text) 
 161 		{
 162 			# found the end?
 163 			if (preg_match("/^\s*$/", $row)) 
 164 			{
 165 				$row = "}}}";
 166 				$in_preformatted_text = 0;
 167 			}
 168 			else 
 169 			{
 170 				# do nothing - skip all other substitutions
 171 				continue;		
 172 			}
 173 		}
 174 
 175 		# Handle multiple-line code
 176 		elseif ($in_multiple_line_code)
 177 		{
 178 			# found the end?
 179 			if (preg_match("/}}}/", $row)) 
 180 			{
 181 				$in_multiple_line_code = 0;
 182 			}
 183 			else 
 184 			{
 185 				# do nothing - skip all other substitutions
 186 				continue;		
 187 			}
 188 		}
 189 
 190 		# Handle multiple-line java code
 191 		elseif ($in_multiple_line_java_code)
 192 		{
 193 			# found the end?
 194 			if (preg_match($java_end_tag_pattern, $row)) 
 195 			{
 196 				$row  = preg_replace($java_end_tag_pattern, "}}}", $row);
 197 				$in_multiple_line_java_code = 0;
 198 			}
 199 			else 
 200 			{
 201 				# do nothing - skip all other substitutions
 202 				continue;		
 203 			}			
 204 		}
 205 
 206 		# Handle multiple-line html code
 207 		elseif ($in_multiple_line_html_code)
 208 		{
 209 			# found the end?
 210 			if (preg_match($html_end_tag_pattern, $row)) 
 211 			{
 212 				$row  = preg_replace($html_end_tag_pattern, "}}}", $row);
 213 				$in_multiple_line_html_code = 0;
 214 			}
 215 			else 
 216 			{
 217 				# do nothing - skip all other substitutions
 218 				continue;		
 219 			}			
 220 		}
 221 
 222 		# Not in multiple-line preformatted text or multiple-line code block
 223 		else 
 224 		{
 225 			# Preformatted text - @@@@ on line by self, but ending on another line where it's all blank
 226 			if (preg_match("/^@@@@\s*$/", $row)) 
 227 			{
 228 				$row  = preg_replace("/^\s*@@@@\s*$/", "{{{", $row);	
 229 				$in_preformatted_text = 1;	
 230 
 231 				# Don't do any more processing on this line
 232 				continue;
 233 			}
 234 			
 235 			# Code - {{{ xxx }}} - may be on same or different lines
 236 			if (preg_match("/{{{/", $row)) 
 237 			{
 238 				# if we don't find the closing braces, we are in a multiple-line code situation
 239 				if (!preg_match("/}}}/", $row)) 
 240 				{
 241 					$in_multiple_line_code = 1;	
 242 				}
 243 
 244 				# Don't do any more processing on this line
 245 				continue;
 246 			}
 247 			
 248 			# Java Code - [<java>] xxx [</java>] - may be on same or different lines
 249 			if (preg_match($java_start_tag_pattern, $row)) 
 250 			{
 251 				$row  = preg_replace($java_start_tag_pattern, "{{{#!java", $row);
 252 
 253 				# if we don't find the closing tag, we are in a multiple-line java code situation
 254 				if (!preg_match($java_end_tag_pattern, $row)) 
 255 				{
 256 					$in_multiple_line_java_code = 1;	
 257 				}
 258 				# otherwise, replace end tag
 259 				else
 260 				{
 261 					$row  = preg_replace($java_end_tag_pattern, "}}}", $row);
 262 				}
 263 
 264 				# also add line break after open tag
 265 				$row  = preg_replace("/{{{#!java/", "{{{#!java\n",  $row);
 266 
 267 				# Don't do any more processing on this line
 268 				continue;
 269 			}
 270 			
 271 			# HTML Code - [<html>] xxx [</html>] - may be on same or different lines
 272 			if (preg_match($html_start_tag_pattern, $row)) 
 273 			{
 274 				#echo "in html\n";
 275 				$row  = preg_replace($html_start_tag_pattern, "{{{#!html", $row);
 276 
 277 				# if we don't find the closing tag, we are in a multiple-line html code situation
 278 				if (!preg_match($html_end_tag_pattern, $row)) 
 279 				{
 280 					$in_multiple_line_html_code = 1;	
 281 				}
 282 				# otherwise, replace end tag
 283 				else
 284 				{
 285 					$row  = preg_replace($html_end_tag_pattern, "}}}", $row);
 286 
 287 				}
 288 				# also add line break after open tag
 289 				$row  = preg_replace("/{{{#!html/", "{{{#!html\n",  $row);
 290 
 291 				#echo "$row\n";
 292 
 293 				# Don't do any more processing on this line
 294 				continue;
 295 			}
 296 
 297 			# Tables
 298 			$row  = preg_replace("/####/", "", $row );                  # don't need these
 299 			$row  = preg_replace("/^([^#]+)##/", "||$1||", $row, 1);    # add 1st column start marker
 300 			$row  = preg_replace("/##/", "||", $row );                  # all other markers
 301 			
 302 			# Backtick links: `link` => ["link"] - must come before 'No formatting code'
 303 			$row  = preg_replace("/`([^`]+)`/", "[\"$1\"]", $row);     	
 304 			
 305 			# C2 links
 306 			$row  = preg_replace("/c2:/", "wiki:Wiki:", $row);     	
 307 			
 308 			# No formatting code (__) - must come before underline conversion step
 309 			$row  = preg_replace("/__([^_]+)__/", "`$1`", $row);    
 310 			
 311 			# Underline: ===text=== => __text__ (must come before headings)
 312 			$row  = preg_replace("/===([^=]+)===/", "__$1__", $row);    # underline
 313 			
 314 			# Headings
 315 			$row  = preg_replace("/!!!([^!]+)!!!/", "= $1 =", $row);    # heading level 1
 316 			$row  = preg_replace("/!!([^!]+)!!/", "== $1 ==", $row);    # heading level 2
 317 			$row  = preg_replace("/!([^!]+)!/", "=== $1 ===", $row);    # heading level 3
 318 			
 319 			# Bulleted Lists: (there may be a better way to do this)
 320 			$row  = preg_replace("/^\t\*/", " * ", $row);     			# bullet indented 1
 321 			$row  = preg_replace("/^\t\t\*/", "   * ", $row);     			# bullet indented 2
 322 			$row  = preg_replace("/^\t\t\t\*/", "     * ", $row);     			# bullet indented 3
 323 	
 324 			# Numbered Lists: # => 1.  (note: there may be a better way to do this)
 325 			$row  = preg_replace("/^\t\#/", " 1. ", $row);     			# item indented 1
 326 			$row  = preg_replace("/^\t\t\#/", "   1. ", $row);     			# item indented 2
 327 			$row  = preg_replace("/^\t\t\t\#/", "     1. ", $row);     			# item indented 3
 328 			
 329 			# Line breaks inside lists
 330 #			echo "$row\n";
 331 			if (preg_match("/^(\s*)(1\.|\*)(.*)@@/", $row, $matches)) 
 332 			{
 333 				$leadingSpaces = $matches[1];
 334 				#echo "spaces: ->$leadingSpaces<- \n";
 335 				#echo "before: $row\n";
 336 				$row  = preg_replace("/@@/", "\n$leadingSpaces", $row);  # add two spaces
 337 				#echo "after : $row\n";
 338 			}
 339 			
 340 			# Other line breaks - appearing anywhere else
 341 			$row  = preg_replace("/@@/", " [[BR]] ", $row);
 342 			
 343 			# Attachments: attach: -> attachment:   - Also copy attachments to new wiki
 344 			# does not handle attachments in this format:  attach:"File name with spaces" -- fix those manually
 345 			
 346 			$attachmentPattern = "/attach:([\w.-]+)/"; # this is not a complete filename regex, but works for me!!!
 347 			
 348 			if (preg_match($attachmentPattern, $row, $attachmentMatches))
 349 			{
 350 				# Fix syntax
 351 				$row  = preg_replace($attachmentPattern, "attachment:$1", $row);
 352 				
 353 				# Copy file attachments: note this assumes there is only one attachment per line!!
 354 				$attachmentFilename = $attachmentMatches[1];
 355 				
 356 				$existingLocation = "$inputDir/upload/jsp/$attachmentFilename";
 357 				#echo "existing location: $existingLocation\n";
 358 				
 359 				$newDirectory = "$outputDir/pages/$pageTitle/attachments"; 
 360 				$newLocation = "$newDirectory/$attachmentFilename";
 361 				#echo "new location: $newLocation\n";
 362 
 363 				if (!is_dir($newDirectory))
 364 				{
 365 					#echo "making new attachments directory: $newDirectory\n";
 366 					mkdir($newDirectory);
 367 				}
 368 				#echo "current dir: " . getcwd() . "\n";
 369 				echo "\tattachment...$attachmentFilename\n";
 370 				copy($existingLocation, $newLocation);
 371 			}
 372 			
 373 	
 374 			# Horizontal rules - no conversion necessary
 375 			
 376 			# Bold/italic - no conversion necessary
 377 
 378 
 379 			# Handle line break issue
 380 			# Look at next line
 381 			if ($a+1 < count($array)) # only proceed if there are more lines
 382 			{  
 383 				
 384 				$nextRow = $array[$a+1];
 385 				$emptyRowPattern = "/^\s*$/";
 386 				
 387 				# figure out if we should add a line break - only if all of these conditions are met
 388 				if (!preg_match($emptyRowPattern, $row)               # current row is not empty
 389 					&& !preg_match("/----/", $row)               	  # current row does not have horizontal rule
 390 					&& !preg_match("/=+[^=]+=+/", $row)               # current row is not a heading
 391 					&& !preg_match($emptyRowPattern, $nextRow)        # next row is not empty
 392 					&& !preg_match("/^\t+[\*\#]/", $nextRow)          # next row doesn't start with bullet or numbered item
 393 					&& !preg_match("/##/", $nextRow)           		  # next row doesn't contain table markup
 394 				   )    
 395 				{
 396 					# only if all above conditions are met do we add a break
 397 					$row .= " [[BR]]"; # include space before to prevent "Java:[[BR]] making an Interwiki link, among other things
 398 				}
 399 			}
 400 		}		
 401 
 402 	}
 403 	return $array;
 404 }
 405 
 406 
 407 # Code to fix titles that I did not need
 408 /*	$quoted = array();
 409 	$in_parenthesis = false;
 410 	for ($i = 0; $i < strlen($title[$a]); $i++) 
 411 	{
 412 		$curchar = substr ($title[$a], $i, 1);
 413 		if (ereg('[^a-zA-Z0-9_]', $curchar)) 
 414 		{
 415 			if (!$in_parenthesis) 
 416 			{
 417 				$quoted[] = '(';
 418 				$in_parenthesis = true;
 419 			}
 420 			$quoted[] = str_pad(dechex(ord($curchar)), 2, '0', STR_PAD_LEFT);
 421 		} 
 422 		else 
 423 		{
 424 			if ($in_parenthesis) 
 425 			{
 426 				$quoted[] = ')';
 427 				$in_parenthesis = false;
 428 			}
 429 			$quoted[] = $curchar;
 430 		}
 431 	}
 432 	if ($in_parenthesis)
 433 	{
 434 		$quoted[] = ')';
 435 	}
 436 	$title[$a] = implode('', $quoted);
 437 	unset($quoted);
 438 */
 439 ?>

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2005-10-25 20:26:36, 12.5 KB) [[attachment:vqwiki_to_moin.php]]
  • [get | view] (2006-01-26 22:15:55, 14.7 KB) [[attachment:vqwiki_to_moin_v2.php]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.