Attachment 'vqwiki_to_moin_v2.php'
Download 1 #!/usr/local/bin/php
2
3 # Copy data from vqWiki to MoinMoin wiki
4 # Jeff Olson <jeff@olsonzoo.com> - October 20, 2005
5 # Based on "mediawiki2moin.php" from http://moinmoin.wikiwikiweb.de/MediaWikiConverter
6 #
7 # Version 1: 10/20/2005
8 # Version 2: 1/20/2006
9 #
10 # - Copies all current and historical versions
11 # - Copies attachments
12 # - Tries to convert all wiki markup
13 #
14
15 <?php
16
17 # Set these variables.
18 # - $inputDir is location of vqWiki data files
19 # - $outputDir is location where MoinMoin pages directory is located...
20 # WARNING! Any existing pages in $outputDir will be deleted if they exist in $inputDir
21 # - $ip is IP address to use in edit-log
22 # - $address is DNS name of server to use in edit log
23
24 $inputDir = '/home/wiki/wiki';
25 $outputDir = '/codeswiki/data';
26 $ip = 0.0.0.0;
27 $address = something.example.com;
28
29 echo "*****\n\nReading Existing Files\n*****\n";
30
31 # This might run a while, so set time limit high (1 hour?)
32 set_time_limit(3600);
33
34 # Read input data from vqWiki
35 $a = 0;
36 if ($handle = opendir($inputDir))
37 {
38 echo "Directory handle: $handle\n";
39 echo "Files:\n";
40
41 /* This is the correct way to loop over the directory. */
42 while (false !== ($file = readdir($handle)))
43 {
44 #$fullPath = $inputDir . "/" . $file;
45 #$fp = fopen($fullPath, "r");
46
47 if (fnmatch("*.txt", $file)
48 #&& fnmatch("S*.txt", $file) # uncomment if testing on specific files
49 )
50 {
51 #echo "$file\n";
52 $title[$a] = preg_replace("/\.txt/", "", $file);
53 $a++;
54 }
55 }
56
57 closedir($handle);
58 }
59
60 # Get all revisions
61 $versionsDir = "$inputDir/versions";
62 chdir($versionsDir);
63 for ($i = 0; $i < count($title); $i++)
64 {
65 echo "$title[$i]............................";
66 $revisionTitle[$i] = glob("$title[$i].txt.*");
67 #print_r($revisionTitle[$i]);
68 echo count($revisionTitle[$i]) . " versions found\n";
69
70 $fullPath = $inputDir . "/" . $file;
71 $fp = fopen($fullPath, "r");
72
73 }
74
75
76 # Go to output directory for MoinMoin wiki
77 echo "\n\n*****\nCreating New Files\n*****\n";
78 chdir($outputDir) or die;
79 chdir("pages") or die;
80
81 $count = count($title);
82 for ($i = 0; $i < $count; $i++)
83 {
84 echo "\n$i: $title[$i]\n";
85
86 # Fix title
87 $title[$i] = fix_title($title[$i]);
88 echo "\tfixed: $title[$i]\n";
89
90 # Delete existing directory for specific page
91 #echo "deleting any existing folder with name $title[$i]\n";
92 system("rm -rf \"$title[$i]\"");
93
94 # Remake directory for specific page
95 #echo "trying to make $title[$i]\n";
96 mkdir($title[$i]) or die;
97
98 # Change to specific page directory
99 #echo "trying to change to $title[$i]\n";
100 chdir($title[$i]) or die;
101
102 #echo "current dir: " . getcwd() . "\n";
103
104 # Write out current revision to "current" file
105 # - pad to 8 digits
106 $numberOfRevisions = count($revisionTitle[$i]);
107 $lastRevisionNumber = sprintf("%08d", $numberOfRevisions);
108 $file = fopen("current", "w");
109 fputs($file, $lastRevisionNumber);
110 fclose($file);
111
112 # Create an "edit-log" file and keep it open to write out our info to it
113 $editLogFile = fopen("edit-log", "w");
114
115 # Create "revisions" directory & change to it
116 mkdir("revisions") or die;
117 chdir("revisions") or die;
118 #echo "current dir: " . getcwd() . "\n";
119
120 echo "\tVersions: ";
121
122 # Go through each version of the page
123 for ($j = 0; $j < count($revisionTitle[$i]); $j++)
124 {
125 echo ($j + 1) . "...";
126 #echo "\tFilename = " . $revisionTitle[$i][$j] . "\n";
127
128 # get file contents
129 $fullPath = $versionsDir . "/" . $revisionTitle[$i][$j];
130 $fp = fopen($fullPath, "r");
131 $file_text = "";
132 if (filesize($fullPath) > 0)
133 {
134 $file_text = fread($fp, filesize($fullPath));
135 }
136 $file_text = explode("\n", $file_text);
137
138 # Copy text from old to new, fixing syntax as we go
139 # - also pass in title for copying attachments and input & output directory to copy them
140 $file_text = change_syntax($file_text, $title[$i], $inputDir, $outputDir);
141
142 # open file for writing -- each file is a eight-digit zero-padded number, starting with 1 (so use $j+1)
143 $revisionNumber = sprintf("%08d", $j+1);
144 $file = fopen($revisionNumber, "w");
145
146 # Create output file
147 $k = 0;
148 while ($k < count($file_text))
149 {
150 fputs($file, rtrim($file_text[$k]) . "\n");
151 $k++;
152 }
153 unset($file_text);
154
155 # Close file
156 fclose($file);
157
158 # get modification time from filename (format is filename.yyyy.mm.dd.hh.MM.ss)
159 preg_match("/(\d{4})\.(\d{2})\.(\d{2})\.(\d{2})\.(\d{2})\.(\d{2})/", $revisionTitle[$i][$j], $modTimeString);
160 # echo $modTimeString[0] . "\n";
161
162 $year = $modTimeString[1];
163 $month = $modTimeString[2];
164 $day = $modTimeString[3];
165 $hour = $modTimeString[4];
166 $minute = $modTimeString[5];
167 $second = $modTimeString[6];
168
169 $modTime = mktime($hour, $minute, $second, $month, $day, $year);
170 # echo $modTime . "\n\n";
171
172 # touch file (named "revisionNumber") to correct modification time
173 touch($revisionNumber, $modTime);
174
175 # Write entry in edit-log file
176 $time = $modTime . "000000";
177 $j == 0 ? $label = "SAVENEW" : $label = "SAVE";
178 $entry = "$time\t$revisionNumber\t$label\t$title[$i]\t$ip\t$address\n";
179 fputs($editLogFile, $entry);
180
181 }
182 echo "\n";
183
184 # Close edit-log file
185 fclose($editLogFile);
186
187 # Back up to specific page directory
188 chdir("..") or die;
189 #echo "current dir: " . getcwd() . "\n";
190
191 # chown & chmod to set correct permissions (this assumes we're running script as root)
192 system("chown -R apache:apache .");
193 system("chmod -R g+w .");
194 system("chmod -R o-rx .");
195
196 # Back up to "pages" directory
197 chdir("..") or die;
198 }
199
200 # Back up to Moin directory
201 chdir("..") or die;
202
203 function fix_title($title)
204 {
205 $title = utf8_encode(str_replace(" ", "_", $title));
206 $title = utf8_encode(str_replace("+", "_", $title));
207 return $title;
208 }
209
210 function change_syntax ($array, $pageTitle, $inputDir, $outputDir)
211 {
212 # initialize
213 $in_preformatted_text = 0;
214 $in_multiple_line_code = 0;
215 $in_multiple_line_java_code = 0;
216 $in_multiple_line_html_code = 0;
217
218 # patterns
219 $java_start_tag_pattern = "/\[<java>\]/";
220 $java_end_tag_pattern = "/\[<\/java>\]/";
221 $html_start_tag_pattern = "/\[<html>\]/";
222 $html_end_tag_pattern = "/\[<\/html>\]/";
223
224 for ($a = 0; $a < count($array); $a++)
225 {
226 # assign row as a reference to current array item
227 $row =& $array[$a];
228
229 # Handle multiple-line preformatted text
230 if ($in_preformatted_text)
231 {
232 # found the end?
233 if (preg_match("/^\s*$/", $row))
234 {
235 $row = "}}}";
236 $in_preformatted_text = 0;
237 }
238 else
239 {
240 # do nothing - skip all other substitutions
241 continue;
242 }
243 }
244
245 # Handle multiple-line code
246 elseif ($in_multiple_line_code)
247 {
248 # found the end?
249 if (preg_match("/}}}/", $row))
250 {
251 $in_multiple_line_code = 0;
252 }
253 else
254 {
255 # do nothing - skip all other substitutions
256 continue;
257 }
258 }
259
260 # Handle multiple-line java code
261 elseif ($in_multiple_line_java_code)
262 {
263 # found the end?
264 if (preg_match($java_end_tag_pattern, $row))
265 {
266 $row = preg_replace($java_end_tag_pattern, "}}}", $row);
267 $in_multiple_line_java_code = 0;
268 }
269 else
270 {
271 # do nothing - skip all other substitutions
272 continue;
273 }
274 }
275
276 # Handle multiple-line html code
277 elseif ($in_multiple_line_html_code)
278 {
279 # found the end?
280 if (preg_match($html_end_tag_pattern, $row))
281 {
282 $row = preg_replace($html_end_tag_pattern, "}}}", $row);
283 $in_multiple_line_html_code = 0;
284 }
285 else
286 {
287 # do nothing - skip all other substitutions
288 continue;
289 }
290 }
291
292 # Not in multiple-line preformatted text or multiple-line code block
293 else
294 {
295 # Preformatted text - @@@@ on line by self, but ending on another line where it's all blank
296 if (preg_match("/^@@@@\s*$/", $row))
297 {
298 $row = preg_replace("/^\s*@@@@\s*$/", "{{{", $row);
299 $in_preformatted_text = 1;
300
301 # Don't do any more processing on this line
302 continue;
303 }
304
305 # Code - {{{ xxx }}} - may be on same or different lines
306 if (preg_match("/{{{/", $row))
307 {
308 # if we don't find the closing braces, we are in a multiple-line code situation
309 if (!preg_match("/}}}/", $row))
310 {
311 $in_multiple_line_code = 1;
312 }
313
314 # Don't do any more processing on this line
315 continue;
316 }
317
318 # Java Code - [<java>] xxx [</java>] - may be on same or different lines
319 if (preg_match($java_start_tag_pattern, $row))
320 {
321 $row = preg_replace($java_start_tag_pattern, "{{{#!java", $row);
322
323 # if we don't find the closing tag, we are in a multiple-line java code situation
324 if (!preg_match($java_end_tag_pattern, $row))
325 {
326 $in_multiple_line_java_code = 1;
327 }
328 # otherwise, replace end tag
329 else
330 {
331 $row = preg_replace($java_end_tag_pattern, "}}}", $row);
332 }
333
334 # also add line break after open tag
335 $row = preg_replace("/{{{#!java/", "{{{#!java\n", $row);
336
337 # Don't do any more processing on this line
338 continue;
339 }
340
341 # HTML Code - [<html>] xxx [</html>] - may be on same or different lines
342 if (preg_match($html_start_tag_pattern, $row))
343 {
344 #echo "in html\n";
345 $row = preg_replace($html_start_tag_pattern, "{{{#!html", $row);
346
347 # if we don't find the closing tag, we are in a multiple-line html code situation
348 if (!preg_match($html_end_tag_pattern, $row))
349 {
350 $in_multiple_line_html_code = 1;
351 }
352 # otherwise, replace end tag
353 else
354 {
355 $row = preg_replace($html_end_tag_pattern, "}}}", $row);
356
357 }
358 # also add line break after open tag
359 $row = preg_replace("/{{{#!html/", "{{{#!html\n", $row);
360
361 #echo "$row\n";
362
363 # Don't do any more processing on this line
364 continue;
365 }
366
367 # Tables
368 $row = preg_replace("/####/", "", $row ); # don't need these
369 $row = preg_replace("/^([^#]+)##/", "||$1||", $row, 1); # add 1st column start marker
370 $row = preg_replace("/##/", "||", $row ); # all other markers
371
372 # Backtick links: `link` => ["link"] - must come before 'No formatting code'
373 $row = preg_replace("/`([^`]+)`/", "[\"$1\"]", $row);
374
375 # C2 links
376 $row = preg_replace("/c2:/", "wiki:Wiki:", $row);
377
378 # No formatting code (__) - must come before underline conversion step
379 $row = preg_replace("/__([^_]+)__/", "`$1`", $row);
380
381 # Underline: ===text=== => __text__ (must come before headings)
382 $row = preg_replace("/===([^=]+)===/", "__$1__", $row); # underline
383
384 # Headings
385 $row = preg_replace("/!!!([^!]+)!!!/", "= $1 =", $row); # heading level 1
386 $row = preg_replace("/!!([^!]+)!!/", "== $1 ==", $row); # heading level 2
387 $row = preg_replace("/!([^!]+)!/", "=== $1 ===", $row); # heading level 3
388
389 # Bulleted Lists: (there may be a better way to do this)
390 $row = preg_replace("/^\t\*/", " * ", $row); # bullet indented 1
391 $row = preg_replace("/^\t\t\*/", " * ", $row); # bullet indented 2
392 $row = preg_replace("/^\t\t\t\*/", " * ", $row); # bullet indented 3
393
394 # Numbered Lists: # => 1. (note: there may be a better way to do this)
395 $row = preg_replace("/^\t\#/", " 1. ", $row); # item indented 1
396 $row = preg_replace("/^\t\t\#/", " 1. ", $row); # item indented 2
397 $row = preg_replace("/^\t\t\t\#/", " 1. ", $row); # item indented 3
398
399 # Line breaks inside lists
400 # echo "$row\n";
401 if (preg_match("/^(\s*)(1\.|\*)(.*)@@/", $row, $matches))
402 {
403 $leadingSpaces = $matches[1];
404 #echo "spaces: ->$leadingSpaces<- \n";
405 #echo "before: $row\n";
406 $row = preg_replace("/@@/", "\n$leadingSpaces", $row); # add two spaces
407 #echo "after : $row\n";
408 }
409
410 # Other line breaks - appearing anywhere else
411 $row = preg_replace("/@@/", " [[BR]] ", $row);
412
413 # Attachments: attach: -> attachment: - Also copy attachments to new wiki
414 # does not handle attachments in this format: attach:"File name with spaces" -- fix those manually
415
416 $attachmentPattern = "/attach:([\w.-]+)/"; # this is not a complete filename regex, but works for me!!!
417
418 if (preg_match($attachmentPattern, $row, $attachmentMatches))
419 {
420 # Fix syntax
421 $row = preg_replace($attachmentPattern, "attachment:$1", $row);
422
423 # Copy file attachments: note this assumes there is only one attachment per line!!
424 $attachmentFilename = $attachmentMatches[1];
425
426 $existingLocation = "$inputDir/upload/jsp/$attachmentFilename";
427 #echo "existing location: $existingLocation\n";
428
429 $newDirectory = "$outputDir/pages/$pageTitle/attachments";
430 $newLocation = "$newDirectory/$attachmentFilename";
431 #echo "new location: $newLocation\n";
432
433 if (!is_dir($newDirectory))
434 {
435 #echo "making new attachments directory: $newDirectory\n";
436 mkdir($newDirectory);
437 }
438 #echo "current dir: " . getcwd() . "\n";
439 echo "\tattachment...$attachmentFilename\n";
440 copy($existingLocation, $newLocation);
441 }
442
443
444 # Horizontal rules - no conversion necessary
445
446 # Bold/italic - no conversion necessary
447
448
449 # Handle line break issue
450 # Look at next line
451 if ($a+1 < count($array)) # only proceed if there are more lines
452 {
453
454 $nextRow = $array[$a+1];
455 $emptyRowPattern = "/^\s*$/";
456
457 # figure out if we should add a line break - only if all of these conditions are met
458 if (!preg_match($emptyRowPattern, $row) # current row is not empty
459 && !preg_match("/----/", $row) # current row does not have horizontal rule
460 && !preg_match("/=+[^=]+=+/", $row) # current row is not a heading
461 && !preg_match($emptyRowPattern, $nextRow) # next row is not empty
462 && !preg_match("/^\t+[\*\#]/", $nextRow) # next row doesn't start with bullet or numbered item
463 && !preg_match("/##/", $nextRow) # next row doesn't contain table markup
464 )
465 {
466 # only if all above conditions are met do we add a break
467 $row .= " [[BR]]"; # include space before to prevent "Java:[[BR]] making an Interwiki link, among other things
468 }
469 }
470 }
471
472 }
473 return $array;
474 }
475
476
477 # Code to fix titles that I did not need
478 /* $quoted = array();
479 $in_parenthesis = false;
480 for ($i = 0; $i < strlen($title[$a]); $i++)
481 {
482 $curchar = substr ($title[$a], $i, 1);
483 if (ereg('[^a-zA-Z0-9_]', $curchar))
484 {
485 if (!$in_parenthesis)
486 {
487 $quoted[] = '(';
488 $in_parenthesis = true;
489 }
490 $quoted[] = str_pad(dechex(ord($curchar)), 2, '0', STR_PAD_LEFT);
491 }
492 else
493 {
494 if ($in_parenthesis)
495 {
496 $quoted[] = ')';
497 $in_parenthesis = false;
498 }
499 $quoted[] = $curchar;
500 }
501 }
502 if ($in_parenthesis)
503 {
504 $quoted[] = ')';
505 }
506 $title[$a] = implode('', $quoted);
507 unset($quoted);
508 */
509 ?>
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.