Attachment 'jsp2moin.pl'
Download 1 #!/usr/bin/perl
2
3 use File::Basename;
4 use File::stat;
5 use File::Copy;
6 use Time::localtime;
7
8 # 1) check with the others whether they can live with the current result
9 # 2) distribute some wiki admin capabilities amongst the possible volunteers (I am)
10 # 3) do a final conversion to capture the latest state
11 # 4) I will make the old Wiki read-only and stop the wikidiffmail script
12 # 5) Rewrite
13
14 # Manual tasks
15 # Put script at MoinMoin wiki, under UtilityTools, JSPConversionTool
16 # Create a homepage (based upon our old 'Main' page)
17 # Edit PoweredByCocoon page to add || to end of lines
18 # Move directives, e.g. [something] out of heading lines
19 # Fix orphaned pages [[OrphanedPages]]
20
21
22 my ($src, $dest, $history, $quiet, $exclusionFile, $baseFolder, $attachSrc);
23 my ($attachDest, $appendAttachments, $regexp, $regexpInv, $prefix, $rewriteMap, $appendRewriteMap);
24 my (%editlog, $filecount, $authorcount, %exclusions, @commands, $longestPattern, %pageAttachments);
25 my ($rewriteMapHandle);
26
27 &setCommands;
28 &handleParameters;
29 &init;
30 &loadExclusions;
31 &clean;
32 &processAttachments;
33 &processPages;
34 ©BasePages;
35 &writeEditLog;
36 &packageFiles;
37 &complete;
38
39 sub init {
40 print "Starting...\n" unless $quiet;
41 $filecount = 0;
42 $authorcount = 0;
43 print "REWRITE MAP is $rewriteMap\n";
44 if ($rewriteMap ne "" && $appendRewriteMap) {
45 open (rewriteMapHandle, ">>$rewriteMap");
46 } elsif ($rewriteMap ne "") {
47 open (rewriteMapHandle, ">$rewriteMap");
48 }
49 }
50
51 sub handleParameters {
52 my %cmd;
53 foreach $cmd (@commands) {
54 %cmd = %{$cmd};
55 &{$cmd{DEFAULT}};
56 $longestPattern = (length($cmd{PATTERN}) > $longestPattern) ? length($cmd{PATTERN}) : $longestPattern;
57 }
58
59 my $found = 0;
60 while ($#ARGV>=0) {
61 $_ = shift(@ARGV);
62 foreach $cmd (@commands) {
63 %cmd = %{$cmd};
64 my $pattern = $cmd{PATTERN};
65 if (/^$pattern$/) {
66 &{$cmd{ACTION}};
67 $found =1;
68 last;
69 }
70 }
71 }
72 }
73
74 sub setCommands {
75 @commands = ({
76 PATTERN=>"-nh|--no-history",
77 ACTION=>sub {$history=0},
78 DEFAULT=>sub {$history=1},
79 DESC=>"Do not process history - just pages",
80 SYNTAX=>"-nh"
81 }, {
82 PATTERN=>"-q|--quiet",
83 ACTION=>sub {$quiet=1},
84 DEFAULT=>sub {$quiet=0},
85 DESC=>"Don't display anything, just do it",
86 SYNTAX=>"-q"
87 }, {
88 PATTERN=>"-s|--source",
89 ACTION=>sub {$src=shift(@ARGV)},
90 DEFAULT=>sub {$src = "jsp/pages"},
91 DESC=>"Specify the folder containing the JSPWiki pages",
92 SYNTAX=>"-s <source folder>"
93 }, {
94 PATTERN=>"-d|--destination",
95 ACTION=>sub {$dest=shift(@ARGV)},
96 DEFAULT=>sub {$dest = "moin/wikidata"},
97 DESC=>"Specify the folder to contain the MoinMoin pages",
98 SYNTAX=>"-d <destination folder>"
99 }, {
100 PATTERN=>"-x|--exclude",
101 ACTION=>sub {$exclusionFile = shift(@ARGV)},
102 DEFAULT=>sub {$exclusionFile = dirname($0)."/jsp-exclude.txt"},
103 DESC=>"Specify a file listing JSPWiki pages to ignore",
104 SYNTAX=>"-x <exclusions file>"
105 }, {
106 PATTERN=>"-na|--no-attachments",
107 ACTION=>sub {$attachments = 0},
108 DEFAULT=>sub {$attachments = 1},
109 DESC=>"Do not process attachments",
110 SYNTAX=>"-na"
111 }, {
112 PATTERN=>"-as|--attachment-source",
113 ACTION=>sub {$attachSrc=shift(@ARGV)},
114 DEFAULT=>sub {$attachSrc = "jsp/attachments"},
115 DESC=>"Specify the folder containing JSPWiki attachments",
116 SYNTAX=>"-as <attachment source folder>"
117 }, {
118 PATTERN=>"-ad|--attachment-destination",
119 ACTION=>sub {$attachDest=shift(@ARGV)},
120 DEFAULT=>sub {$attachDest = "pages"},
121 DESC=>"Specify the folder to contain MoinMoin attachments",
122 SYNTAX=>"-ad <attachment destination folder>"
123 }, {
124 PATTERN=>"-aa|--append-attachment-links",
125 ACTION=>sub {$appendAttachments=1},
126 DEFAULT=>sub {$appendAttachments=0},
127 DESC=>"Appends links to all attachments to end of each page",
128 SYNTAX=>"-aa"
129 }, {
130 PATTERN=>"-b|--base-folder",
131 ACTION=>sub {$baseFolder=shift(@ARGV)},
132 DEFAULT=>sub {$baseFolder = "foo/data/text"},
133 DESC=>"Specify the folder containing the default MoinMoin pages",
134 SYNTAX=>"-b <base folder>"
135 }, {
136 PATTERN=>"-p|--package",
137 ACTION=>sub {$packageFile = shift(@ARGV)},
138 DEFAULT=>sub {$packageFile = "cocoon-wiki.tgz"},
139 DESC=>"Specify the tgz package name for content uploading",
140 SYNTAX=>"-p <package file>"
141 }, {
142 PATTERN=>"-rp|--remove-prefix",
143 ACTION=>sub {$prefix = shift(@ARGV)},
144 DEFAULT=>sub {$prefix = ""},
145 DESC=>"Specify a string to be removed from the beginning of a page name",
146 SYNTAX=>"-rp <prefix>"
147 }, {
148 PATTERN=>"-rw|--create-rewrite-map",
149 ACTION=>sub {$rewriteMap = shift(@ARGV)},
150 DEFAULT=>sub {$rewriteMap = ""},
151 DESC=>"Build an Apache mod_rewrite rewrite map file for redirecting old JSP wiki to new Moin wiki",
152 SYNTAX=>"-rw <rewrite map file>"
153 }, {
154 PATTERN=>"-arw|--append-rewrite-map",
155 ACTION=>sub {$rewriteMap = shift(@ARGV); $appendRewriteMap = 1},
156 DEFAULT=>sub {$appendRewriteMap = 0},
157 DESC=>"Extend an existing Apache mod_rewrite rewrite map file for redirecting old JSP wiki to new Moin wiki",
158 SYNTAX=>"-arw <rewrite map file>"
159 }, {
160 PATTERN=>"-r|--page-regexp",
161 ACTION=>sub {$regexp = shift(@ARGV)},
162 DEFAULT=>sub {$regexp = ".*"},
163 DESC=>"Specify a regular expression that which must match a page for that page to be converted",
164 SYNTAX=>"-r <regular expression>"
165 }, {
166 PATTERN=>"-R|--inverse-page-regexp",
167 ACTION=>sub {$regexpInv = shift(@ARGV)},
168 DEFAULT=>sub {$regexpInv = ""},
169 DESC=>"Specify a regular expression that which must *not* match a page for that page to be converted",
170 SYNTAX=>"-R <regular expression>"
171 }, {
172 PATTERN=>"-h|--help",
173 ACTION=>sub {&help},
174 DEFAULT=>sub {},
175 DESC=>"Display this help",
176 SYNTAX=>"-h"
177 });
178 }
179 sub usage {
180 print "jsp2moin.pl ";
181 foreach $cmd (@commands) {
182 my %cmd = %{$cmd};
183 print "$cmd{SYNTAX} ";
184 }
185 print "\n";
186 }
187
188 sub help {
189 &usage;
190 print "\n";
191 foreach $cmd (@commands) {
192 my %cmd = %{$cmd};
193 print "$cmd{PATTERN}", " "x($longestPattern+1-length($cmd{PATTERN})), "$cmd{DESC}\n";
194 }
195 exit;
196 }
197
198 sub loadExclusions {
199 print "Loading exclusions from $exclusionFile...\n" unless $quiet;
200 open (in, $exclusionFile);
201 while (<in>) {
202 s/\n$//;
203 next if /^\s*#/ || /^\s*$/;
204 if (/^(.*)->(.*)$/) {
205 $exclusions{$1}=$2;
206 } else {
207 $exclusions{$_}=1;
208 }
209 }
210 close in;
211 }
212
213 sub clean {
214 print "Cleaning...\n" unless $quiet;
215 system "rm -rf $dest/text" if -e "$dest/text";
216 system "rm -rf $dest/backup" if -e "$dest/backup";
217 system "rm -rf $dest/pages" if -e "$dest/pages";
218 mkdir "$dest/text", 0777;
219 mkdir "$dest/backup", 0777;
220 mkdir "$dest/$attachDest", 0777;
221 }
222
223 sub processAttachments {
224 return unless $attachments;
225 print "Processing attachments...\n";
226
227 my @files = glob("$attachSrc/*-att/*-dir/*");
228 my %attachments;
229 foreach (@files) {
230 next if /attachment.properties$/;
231 next if m#/\.{1,2}$#;
232 my ($page, $attach, $no, $ext) = (m#$attachSrc/(.*)-att/(.*)-dir/(\d+)\.(.*)$#);
233 push @{$attachments{"$page/$attach"}}, "$no.$ext";
234 }
235
236 my %latestAttachments;
237 my ($no, $ext);
238 foreach my $attachment (keys %attachments) {
239 my @attach = @{$attachments{$attachment}};
240 my $highest = 0;
241 foreach (@attach) {
242 ($no, $ext) = (/(.*)\.(.*)/);
243 $highest = $no if $no > $highest;
244 }
245 my ($page, $attach) = ($attachment=~m#(.*)/(.*)#);
246 $latestAttachments{$attachment}="$attachSrc/$page-att/$attach-dir/$highest.$ext";
247 push @{$pageAttachments{$page}}, "$attach";
248 }
249
250 foreach (keys %latestAttachments) {
251 my ($page, $attach) = (m#(.*)/(.*)#);
252 my $infile = $latestAttachments{$_};
253 my $outfile = &getOutputFilename($page);
254 my $outpath = "$dest/$attachDest/$outfile/attachments/$attach";
255 mkdir "$dest/$attachDest/$outfile", 0777;
256 mkdir "$dest/$attachDest/$outfile/attachments", 0777;
257 system ("cp \"$infile\" \"$outpath\"");
258 print "\@" unless $quiet;
259 }
260 print "\n";
261 }
262
263 sub processPages {
264 print "Processing...\n" unless $quiet;
265 my @toProcess = &getPages;
266
267 foreach my $file (@toProcess) {
268 my $outfile = &getOutputFilename($file);
269 next if $exclusions{$file}==1;
270 next if (($regexp ne "" && $file!~/$regexp/) || ($regexpInv ne "" && $file=~/$regexpInv/));
271 print $file unless $quiet;
272 my $content;
273 my $timestamp;
274 my $filename;
275 my %authors = &getAuthors($file);
276 my $first = 1;
277 my $maxAuthor = 0;
278
279 if ($history) {
280 foreach my $history (&getHistory("$src/OLD/$file")) {
281 $content = &getContent("$src/OLD/$file/$history.txt");
282 $content = &processPage($content, $file);
283 $content = &appendAttachments($content, $file) if $appendAttachments;
284 $timestamp = &getDate("$src/OLD/$file/$history.txt");
285 &addToEditLog($outfile,
286 $authors{$history},
287 $timestamp,
288 $authors{$history},
289 $first);
290 $filename = "backup/$outfile.$timestamp";
291 &writeContent("$dest/$filename", $content);
292 $first = 0;
293 $maxAuthor = $history>$maxAuthor ? $history : $maxAuthor;
294 print "." unless $quiet;
295 $filecount++;
296 }
297 }
298 &appendRewriteMap($file, $outfile) if ($rewriteMap ne "");
299
300 $authorcount+=$#{keys %authors};
301 $content = &getContent("$src/$file.txt");
302 $content = &processPage($content);
303 $content = &appendAttachments($content, $file) if $appendAttachments;
304 $timestamp = &getDate("$src/$file.txt");
305 &writeContent("$dest/text/$outfile", $content);
306 &addToEditLog($outfile,
307 $authors{$maxAuthor+1},
308 $timestamp,
309 $authors{$maxAuthor+1},
310 $first);
311 print "*\n" unless $quiet;
312 $filecount++;
313 }
314 }
315
316 sub getOutputFilename {
317 my $file = shift;
318 $file = $exclusions{$file} if defined $exclusions{$file};
319 $file=~s/_/_25/g;
320 $file=~s/\./_2e/g;
321 $file=~s/\-/_2d/g;
322 $file=~s/\+/_2b/g;
323 $file=~s/%2F/_2f/g;
324 $file=~s/^$prefix// if $prefix ne "" && $file ne $prefix;
325 return $file;
326 }
327
328 sub appendRewriteMap {
329 my $infile = shift;
330 my $outfile = shift;
331
332 print rewriteMapHandle "$infile $outfile\n";
333 }
334
335 sub copyBasePages {
336 print "Copying base pages...\n" unless $quiet;
337 opendir dir, $baseFolder;
338 my @files = readdir(dir);
339 closedir dir;
340
341 foreach $file (@files) {
342 next if (-d "$dest/text/$file" || $file eq "CVS");
343
344 if (-e "$dest/text/$file") {
345 print "$file exists already\n";
346 next;
347 }
348
349 system "cp $baseFolder/$file $dest/text/$file";
350 }
351 }
352
353 sub writeEditLog {
354 print "Writing Edit log...\n";
355 open editlog, ">$dest/editlog";
356 my @sorted = sort {$a <=> $b} keys %editlog;
357 foreach $key (@sorted) {
358 $ref= $editlog{$key};
359 print editlog $ref->{page},
360 "\t",
361 $ref->{author},
362 "\t",
363 $ref->{timestamp},
364 "\t",
365 $ref->{host},
366 "\t\t\t",
367 $ref->{version},
368 "\n";
369 }
370 close editlog;
371 }
372
373
374 sub complete {
375 close rewriteMapHandle if $rewriteMap ne "";
376 print "Done.\n" unless $quiet;
377 }
378
379 sub getAuthors {
380 my $file = shift;
381 my %authors;
382
383 open in, "$src/OLD/$file/page.properties";
384 while (<in>) {
385 (my $history, my $author) = (/^(\d+).author=(.*)$/);
386 $author=~s/\./:/g;
387 $authors{$history}=$author;
388 }
389 close in;
390 return %authors;
391 }
392
393 sub addToEditLog {
394 my $page = shift;
395 my $author = shift;
396 my $timestamp = shift;
397 my $host = shift;
398 my $first = shift;
399 my $version = $first ? "SAVENEW" : "SAVE";
400
401 $timestamp++ while defined $editlog{$timestamp};
402
403 my $line = {page=>$page,
404 author=>$author,
405 timestamp=>$timestamp,
406 host=>$host,
407 version=>$version};
408 $editlog{$timestamp} = $line;
409 }
410
411 sub getDir {
412 my $dir = shift;
413 opendir handle, $dir;
414 my @files = readdir handle;
415 closedir handle;
416 return @files;
417 }
418
419 sub getPages {
420 my @files;
421 foreach (&getDir($src)) {
422 push @files, $1 if /^(.*)\.txt$/;
423 }
424 return @files;
425 }
426
427 sub getHistory {
428 my $page = shift;
429 my @files;
430 foreach (&getDir("$page")) {
431 push @files, $1 if /^(.*)\.txt$/;
432 }
433 return @files;
434 }
435
436 sub getDate {
437 my $history = shift;
438 return stat($history)->mtime;
439 }
440
441 sub getContent {
442 my $file = shift;
443 my $content = "";
444 open in, $file;
445 $content.=$_ while <in>;
446 close in;
447 return $content;
448 }
449
450 sub writeContent {
451 my $file = shift;
452 my $content = shift;
453 open out, ">$file";
454 print out $content;
455 close out;
456 }
457
458
459 sub processLinks {
460 $_ = shift;
461 my $file = shift;
462 my $new = "";
463 while (/\[(.*?)\]/m) {
464 $link = $1;
465 $new .= &escapeCamelCase($`);
466 $_ = $';
467 if ($link=~/^\[/) { # Quoted square brackets
468 $link="$link]";
469 } elsif ($link=~/^(\d+)$/) {
470 $link = "[#$1]";
471 } elsif ($link=~/^#(\d+)$/) {
472 $link="[[Anchor($1)]]";
473 } elsif (&isAttachmentLink($link, $file)) {
474 $link = &getAttachmentLink($link, $file);
475 } elsif ($link=~/\|/) {
476 if ($link=~/^(.*)\s*\|\s*(http:.*)$/) { # http:// links with a label
477 $link = "[$2 $1]";
478 } elsif ($link=~/^([^\|]+)\s*\|\s*(.*)/) { # [SN | Steven Noels] -> [:SN:StevenNoels]
479 my $label = $1;
480 $link = filterLink($2);
481 $link=~s/\s//g;
482 $link=~s/\./_2e/g;
483 $link="[:$link:$label]";
484 }
485 } elsif ($link=~/\s*(http:[^\s]+)\s*/) { # http:// without label
486 $link="[".&filterLink($1)."]";
487 } else {
488 $link=~s/\s//g;
489 if ($link=~/^([A-Z][a-z0-9]+){2,}$/) { # [WikiPage] -> WikiPage (detect camel case)
490 $link=&filterLink($link);
491 } else { # [Upayavira] -> [:Upayavira]
492 $link =~s/\s//g;
493 $link = "[:".&filterLink($link)."]";
494 }
495 }
496 $new.=$link;
497 }
498 $new.=&escapeCamelCase($_);
499 return $new;
500 }
501
502 sub filterLink {
503 my $link = shift;
504 $link=~s/^$prefix// if $prefix ne "" && $link ne $prefix;
505 return $link;
506 }
507
508 sub escapeCamelCase {
509 my $text = shift;
510 $text =~s/(([A-Z][a-z0-9]+){2,})/!$1/gm;
511 return $text;
512 }
513
514 sub isAttachmentLink {
515 my $link = shift;
516 my $file = shift;
517 return &getAttachmentLink($link, $file) ne $link;
518 }
519
520 sub getAttachmentLink {
521 my $link = shift;
522 my $file = shift;
523 if ($link=~/\|/) {
524 if ($link=~/^(.*)\s*\|\s*(http:.*)$/) { # http:// links with a label
525 return $link;
526 } elsif ($link=~/^([^\|]+)\s*\|\s*(.*)/) { # [SN | Steven Noels] -> [:SN:StevenNoels]
527 my $label = $1;
528 $link = $2;
529 }
530 } elsif ($link=~/\s*(http:[^\s]+)\s*/) { # http:// without label
531 return $link;
532 }
533 foreach my $attachment (@{$pageAttachments{$file}}) {
534 if ($link =~/\s*$attachment\s*/) {
535 return $link = "attachment:$attachment ";
536 }
537 }
538 return $link;
539 }
540
541 sub appendAttachments {
542 my $content = shift;
543 my $file = shift;
544
545 $content.="[[BR]]\n[[BR]]\n" if $#{$pageAttachments{$file}}>=0;
546 foreach my $attachment (@{$pageAttachments{$file}}) {
547 print "@";
548 $content.= "'''Attachment:''' attachment:$attachment [[BR]]\n";
549 }
550 return $content;
551 }
552
553 sub processPage {
554 my $page = shift;
555 my $file = shift;
556
557 $page=~s#([^\{])\{\{([^{}]*?)\}\}(?!\})#$1\{\{\{$2\}\}\}#sg;
558 #$page=~s#([^\{])\{{2}([^\{])#$1\{\{\{$2#gm; # Inline code snippets
559 #$page=~s#([^\}])\}{2}([^\}])#$1\}\}\}$2#gm; # Inline code snippets
560
561 my $newPage = "";
562 my $remaining = $page;
563 while ($remaining=~/\{\{\{.*?\}\}\}/s) {
564 my $before = $`;
565 my $during = $&;
566 $remaining = $';
567 $newPage.= &processLines($before, $file).$during;
568 }
569 $newPage.=&processLines($remaining, $file);
570 return $newPage;
571 }
572
573 sub less {
574 my $description = shift;
575 my $content = shift;
576 open(my $less, "|less");
577 print $less "$description\n","-"x length($description),"\n\n";
578 print $less $content;
579 close($less);
580 }
581
582 sub processLines {
583 $_ = &processLinks(shift, shift);
584 my $m = chr(13);
585 s#$m##g;
586 s#^[\t ]*\!\!\!(.*)$#= $1 =#gm; # Largest Heading
587 s#^[\t ]*\!\!(.*)$#== $1 ==#gm; # Middle Heading
588 s#^[\t ]*\!(.*)$#=== $1 ===#gm; # Smallest Heading
589 s#^(=+)\s\d+\.#$1 #gm; # Remove Numbers from headings; Moin adds them itself
590 s#__#'''#gm; # Bold
591 s#^\*\*\*# *#gm; # Nested Bulletted lists
592 s#^\*\*\*# *#gm; # Nested Bulletted lists
593 s#^\*\*# *#gm; # Nested Bulletted lists
594 s#^\*# * #gm; # Bulletted lists
595 s#^\## 1.#gm; # Numbered lists
596 s#\\\\#[[BR]]#gm; # Line Breaks
597 s#\|\|(.*?)(?=\|\|)#\|'''$1'''#gm; # Table Headers
598 s#\|#\|\|#gm; # Table Entries
599 # ''Italics'' (not supported in JSPWiki)
600 # ^superscript^ (not supported in JSPWiki)
601 # ,,subscript,, (not supported in JSPWiki)
602 return $_;
603 }
604
605 sub packageFiles {
606 print "Making archive...\n";
607 my $cwd = `pwd`;
608 chdir $dest;
609 system "zip -rq moin-wiki-site.zip pages text backup editlog";
610 chdir $cwd;
611 rename "$dest/moin-wiki-site.zip","$cwd/moin-wiki-site.zip";
612 }
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.