Attachment 'vqwiki_to_moin.php'
Download 1 #!/usr/local/bin/php
2
3 # Copy data from vqWiki to MoinMoin wiki
4 # Jeff Olson <jeff@olsonzoo.com> - October 20, 2005
5 # Based on "mediawiki2moin.php" from http://moinmoin.wikiwikiweb.de/MediaWikiConverter
6
7 <?php
8
9 # Set these variables.
10 # - $inputDir is location of vqWiki data files
11 # - $outputDir is location where MoinMoin pages directory is located...
12 # WARNING! Any existing pages in $outputDir will be deleted if they exist in $inputDir
13
14 $inputDir = '/home/wiki/wiki';
15 $outputDir = '/codeswiki/data';
16
17 echo "*****\n\nReading Existing Files\n*****\n";
18
19 # Read input data from vqWiki
20 $a = 0;
21 if ($handle = opendir($inputDir))
22 {
23 echo "Directory handle: $handle\n";
24 echo "Files:\n";
25
26 /* This is the correct way to loop over the directory. */
27 while (false !== ($file = readdir($handle)))
28 {
29 $fullPath = $inputDir . "/" . $file;
30 $fp = fopen($fullPath, "r");
31 if (fnmatch("*.txt", $file)
32 #&& fnmatch("Data+Migrations.txt", $file) # uncomment if testing on specific files
33 )
34 {
35 echo "$file\n";
36 $title[$a] = preg_replace("/\.txt/", "", $file);
37 #echo "\t$a -> $title[$a]\n";
38 if (filesize($fullPath) > 0)
39 {
40 $text[$a] = fread($fp, filesize($fullPath));
41 }
42 else
43 {
44 $text[$a] = "";
45 }
46 #echo $text[$a] . "\n\n";
47 $a++;
48 }
49 }
50
51 closedir($handle);
52 }
53
54 # Get historical versions - still needs some work
55 #$versionsDir = "$inputDir/versions";
56 #chdir($versionsDir);
57 #for ($i = 0; $i < count($title); $i++)
58 #{
59 # $historical[$i] = glob("$title[$i].txt.*");
60 # print_r($historical[$i]);
61 # echo "\t" . count($historical[$i]) . " versions found\n";
62 #}
63
64
65 # Go to output directory for MoinMoin wiki
66 echo "\n\n*****\nCreating New Files\n*****\n";
67 chdir($outputDir) or die;
68 chdir("pages") or die;
69
70 $count = count($title);
71 for ($a = 0; $a < $count; $a++)
72 {
73 echo "$a: $title[$a]\n";
74
75 # Fix title
76 $title[$a] = fix_title($title[$a]);
77 echo "\tfixed: $title[$a]\n";
78
79 # Parse historical versions for date & time, ip address
80 # TODO
81
82 # Delete existing folder for page
83 #echo "deleting any existing folder with name $title[$a]\n";
84 system("rm -rf \"$title[$a]\"");
85
86 # Remake folder for page
87 #echo "trying to make $title[$a]\n";
88 mkdir($title[$a]) or die;
89
90 #echo "trying to change to $title[$a]\n";
91 chdir($title[$a]) or die;
92
93 #echo "current dir: " . getcwd() . "\n";
94
95 # Write out needed files & directories
96 $file = fopen("current", "w");
97 fputs($file, "00000001");
98 fclose($file);
99 mkdir("revisions") or die;
100 chdir("revisions") or die;
101 #echo "current dir: " . getcwd() . "\n";
102
103 $file = fopen("00000001", "w");
104 $file_text = explode("\n", $text[$a]);
105
106
107 # Copy text from old to new, fixing syntax as we go
108 # - also pass in title for copying attachments and input & output directory to copy them
109 $file_text = change_syntax($file_text, $title[$a], $inputDir, $outputDir);
110
111 # Create output file
112 $b = 0;
113 while ($b < count($file_text))
114 {
115 fputs($file, rtrim($file_text[$b]) . "\n");
116 $b++;
117 }
118 unset($file_text);
119 fclose($file);
120
121 chdir("..") or die;
122 #echo "current dir: " . getcwd() . "\n";
123
124 # chown & chmod to set correct permissions (this assumes we're running script as root)
125 system("chown -R apache:apache .");
126 system("chmod -R g+w .");
127 system("chmod -R o-rx .");
128
129 chdir("..") or die;
130 }
131 chdir("..") or die;
132
133 function fix_title($title)
134 {
135 $title = utf8_encode(str_replace(" ", "_", $title));
136 $title = utf8_encode(str_replace("+", "_", $title));
137 return $title;
138 }
139
140 function change_syntax ($array, $pageTitle, $inputDir, $outputDir)
141 {
142 # initialize
143 $in_preformatted_text = 0;
144 $in_multiple_line_code = 0;
145 $in_multiple_line_java_code = 0;
146 $in_multiple_line_html_code = 0;
147
148 # patterns
149 $java_start_tag_pattern = "/\[<java>\]/";
150 $java_end_tag_pattern = "/\[<\/java>\]/";
151 $html_start_tag_pattern = "/\[<html>\]/";
152 $html_end_tag_pattern = "/\[<\/html>\]/";
153
154 for ($a = 0; $a < count($array); $a++)
155 {
156 # assign row as a reference to current array item
157 $row =& $array[$a];
158
159 # Handle multiple-line preformatted text
160 if ($in_preformatted_text)
161 {
162 # found the end?
163 if (preg_match("/^\s*$/", $row))
164 {
165 $row = "}}}";
166 $in_preformatted_text = 0;
167 }
168 else
169 {
170 # do nothing - skip all other substitutions
171 continue;
172 }
173 }
174
175 # Handle multiple-line code
176 elseif ($in_multiple_line_code)
177 {
178 # found the end?
179 if (preg_match("/}}}/", $row))
180 {
181 $in_multiple_line_code = 0;
182 }
183 else
184 {
185 # do nothing - skip all other substitutions
186 continue;
187 }
188 }
189
190 # Handle multiple-line java code
191 elseif ($in_multiple_line_java_code)
192 {
193 # found the end?
194 if (preg_match($java_end_tag_pattern, $row))
195 {
196 $row = preg_replace($java_end_tag_pattern, "}}}", $row);
197 $in_multiple_line_java_code = 0;
198 }
199 else
200 {
201 # do nothing - skip all other substitutions
202 continue;
203 }
204 }
205
206 # Handle multiple-line html code
207 elseif ($in_multiple_line_html_code)
208 {
209 # found the end?
210 if (preg_match($html_end_tag_pattern, $row))
211 {
212 $row = preg_replace($html_end_tag_pattern, "}}}", $row);
213 $in_multiple_line_html_code = 0;
214 }
215 else
216 {
217 # do nothing - skip all other substitutions
218 continue;
219 }
220 }
221
222 # Not in multiple-line preformatted text or multiple-line code block
223 else
224 {
225 # Preformatted text - @@@@ on line by self, but ending on another line where it's all blank
226 if (preg_match("/^@@@@\s*$/", $row))
227 {
228 $row = preg_replace("/^\s*@@@@\s*$/", "{{{", $row);
229 $in_preformatted_text = 1;
230
231 # Don't do any more processing on this line
232 continue;
233 }
234
235 # Code - {{{ xxx }}} - may be on same or different lines
236 if (preg_match("/{{{/", $row))
237 {
238 # if we don't find the closing braces, we are in a multiple-line code situation
239 if (!preg_match("/}}}/", $row))
240 {
241 $in_multiple_line_code = 1;
242 }
243
244 # Don't do any more processing on this line
245 continue;
246 }
247
248 # Java Code - [<java>] xxx [</java>] - may be on same or different lines
249 if (preg_match($java_start_tag_pattern, $row))
250 {
251 $row = preg_replace($java_start_tag_pattern, "{{{#!java", $row);
252
253 # if we don't find the closing tag, we are in a multiple-line java code situation
254 if (!preg_match($java_end_tag_pattern, $row))
255 {
256 $in_multiple_line_java_code = 1;
257 }
258 # otherwise, replace end tag
259 else
260 {
261 $row = preg_replace($java_end_tag_pattern, "}}}", $row);
262 }
263
264 # also add line break after open tag
265 $row = preg_replace("/{{{#!java/", "{{{#!java\n", $row);
266
267 # Don't do any more processing on this line
268 continue;
269 }
270
271 # HTML Code - [<html>] xxx [</html>] - may be on same or different lines
272 if (preg_match($html_start_tag_pattern, $row))
273 {
274 #echo "in html\n";
275 $row = preg_replace($html_start_tag_pattern, "{{{#!html", $row);
276
277 # if we don't find the closing tag, we are in a multiple-line html code situation
278 if (!preg_match($html_end_tag_pattern, $row))
279 {
280 $in_multiple_line_html_code = 1;
281 }
282 # otherwise, replace end tag
283 else
284 {
285 $row = preg_replace($html_end_tag_pattern, "}}}", $row);
286
287 }
288 # also add line break after open tag
289 $row = preg_replace("/{{{#!html/", "{{{#!html\n", $row);
290
291 #echo "$row\n";
292
293 # Don't do any more processing on this line
294 continue;
295 }
296
297 # Tables
298 $row = preg_replace("/####/", "", $row ); # don't need these
299 $row = preg_replace("/^([^#]+)##/", "||$1||", $row, 1); # add 1st column start marker
300 $row = preg_replace("/##/", "||", $row ); # all other markers
301
302 # Backtick links: `link` => ["link"] - must come before 'No formatting code'
303 $row = preg_replace("/`([^`]+)`/", "[\"$1\"]", $row);
304
305 # C2 links
306 $row = preg_replace("/c2:/", "wiki:Wiki:", $row);
307
308 # No formatting code (__) - must come before underline conversion step
309 $row = preg_replace("/__([^_]+)__/", "`$1`", $row);
310
311 # Underline: ===text=== => __text__ (must come before headings)
312 $row = preg_replace("/===([^=]+)===/", "__$1__", $row); # underline
313
314 # Headings
315 $row = preg_replace("/!!!([^!]+)!!!/", "= $1 =", $row); # heading level 1
316 $row = preg_replace("/!!([^!]+)!!/", "== $1 ==", $row); # heading level 2
317 $row = preg_replace("/!([^!]+)!/", "=== $1 ===", $row); # heading level 3
318
319 # Bulleted Lists: (there may be a better way to do this)
320 $row = preg_replace("/^\t\*/", " * ", $row); # bullet indented 1
321 $row = preg_replace("/^\t\t\*/", " * ", $row); # bullet indented 2
322 $row = preg_replace("/^\t\t\t\*/", " * ", $row); # bullet indented 3
323
324 # Numbered Lists: # => 1. (note: there may be a better way to do this)
325 $row = preg_replace("/^\t\#/", " 1. ", $row); # item indented 1
326 $row = preg_replace("/^\t\t\#/", " 1. ", $row); # item indented 2
327 $row = preg_replace("/^\t\t\t\#/", " 1. ", $row); # item indented 3
328
329 # Line breaks inside lists
330 # echo "$row\n";
331 if (preg_match("/^(\s*)(1\.|\*)(.*)@@/", $row, $matches))
332 {
333 $leadingSpaces = $matches[1];
334 #echo "spaces: ->$leadingSpaces<- \n";
335 #echo "before: $row\n";
336 $row = preg_replace("/@@/", "\n$leadingSpaces", $row); # add two spaces
337 #echo "after : $row\n";
338 }
339
340 # Other line breaks - appearing anywhere else
341 $row = preg_replace("/@@/", " [[BR]] ", $row);
342
343 # Attachments: attach: -> attachment: - Also copy attachments to new wiki
344 # does not handle attachments in this format: attach:"File name with spaces" -- fix those manually
345
346 $attachmentPattern = "/attach:([\w.-]+)/"; # this is not a complete filename regex, but works for me!!!
347
348 if (preg_match($attachmentPattern, $row, $attachmentMatches))
349 {
350 # Fix syntax
351 $row = preg_replace($attachmentPattern, "attachment:$1", $row);
352
353 # Copy file attachments: note this assumes there is only one attachment per line!!
354 $attachmentFilename = $attachmentMatches[1];
355
356 $existingLocation = "$inputDir/upload/jsp/$attachmentFilename";
357 #echo "existing location: $existingLocation\n";
358
359 $newDirectory = "$outputDir/pages/$pageTitle/attachments";
360 $newLocation = "$newDirectory/$attachmentFilename";
361 #echo "new location: $newLocation\n";
362
363 if (!is_dir($newDirectory))
364 {
365 #echo "making new attachments directory: $newDirectory\n";
366 mkdir($newDirectory);
367 }
368 #echo "current dir: " . getcwd() . "\n";
369 echo "\tattachment...$attachmentFilename\n";
370 copy($existingLocation, $newLocation);
371 }
372
373
374 # Horizontal rules - no conversion necessary
375
376 # Bold/italic - no conversion necessary
377
378
379 # Handle line break issue
380 # Look at next line
381 if ($a+1 < count($array)) # only proceed if there are more lines
382 {
383
384 $nextRow = $array[$a+1];
385 $emptyRowPattern = "/^\s*$/";
386
387 # figure out if we should add a line break - only if all of these conditions are met
388 if (!preg_match($emptyRowPattern, $row) # current row is not empty
389 && !preg_match("/----/", $row) # current row does not have horizontal rule
390 && !preg_match("/=+[^=]+=+/", $row) # current row is not a heading
391 && !preg_match($emptyRowPattern, $nextRow) # next row is not empty
392 && !preg_match("/^\t+[\*\#]/", $nextRow) # next row doesn't start with bullet or numbered item
393 && !preg_match("/##/", $nextRow) # next row doesn't contain table markup
394 )
395 {
396 # only if all above conditions are met do we add a break
397 $row .= " [[BR]]"; # include space before to prevent "Java:[[BR]] making an Interwiki link, among other things
398 }
399 }
400 }
401
402 }
403 return $array;
404 }
405
406
407 # Code to fix titles that I did not need
408 /* $quoted = array();
409 $in_parenthesis = false;
410 for ($i = 0; $i < strlen($title[$a]); $i++)
411 {
412 $curchar = substr ($title[$a], $i, 1);
413 if (ereg('[^a-zA-Z0-9_]', $curchar))
414 {
415 if (!$in_parenthesis)
416 {
417 $quoted[] = '(';
418 $in_parenthesis = true;
419 }
420 $quoted[] = str_pad(dechex(ord($curchar)), 2, '0', STR_PAD_LEFT);
421 }
422 else
423 {
424 if ($in_parenthesis)
425 {
426 $quoted[] = ')';
427 $in_parenthesis = false;
428 }
429 $quoted[] = $curchar;
430 }
431 }
432 if ($in_parenthesis)
433 {
434 $quoted[] = ')';
435 }
436 $title[$a] = implode('', $quoted);
437 unset($quoted);
438 */
439 ?>
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.