#!/usr/bin/perl

use File::Basename;
use File::stat;
use File::Copy;
use Time::localtime;

# 1) check with the others whether they can live with the current result
# 2) distribute some wiki admin capabilities amongst the possible volunteers (I am)
# 3) do a final conversion to capture the latest state
# 4) I will make the old Wiki read-only and stop the wikidiffmail script
# 5) Rewrite 

# Manual tasks
# Put script at MoinMoin wiki, under UtilityTools, JSPConversionTool
# Create a homepage (based upon our old 'Main' page)
# Edit PoweredByCocoon page to add || to end of lines
# Move directives, e.g. [something] out of heading lines
# Fix orphaned pages [[OrphanedPages]]


my ($src, $dest, $history, $quiet, $exclusionFile, $baseFolder, $attachSrc);
my ($attachDest, $appendAttachments, $regexp, $regexpInv, $prefix, $rewriteMap, $appendRewriteMap);
my (%editlog, $filecount, $authorcount, %exclusions, @commands, $longestPattern, %pageAttachments);
my ($rewriteMapHandle);

&setCommands;
&handleParameters;
&init;
&loadExclusions;
&clean;
&processAttachments;
&processPages;
&copyBasePages;
&writeEditLog;
&packageFiles;
&complete;

sub init {
  print "Starting...\n" unless $quiet;
  $filecount = 0;  
  $authorcount = 0;
  print "REWRITE MAP is $rewriteMap\n";
  if ($rewriteMap ne "" && $appendRewriteMap) {
    open (rewriteMapHandle, ">>$rewriteMap");
  } elsif ($rewriteMap ne "") {
    open (rewriteMapHandle, ">$rewriteMap");
  }
}

sub handleParameters {
  my %cmd;
  foreach $cmd (@commands) {
    %cmd = %{$cmd};
    &{$cmd{DEFAULT}};
    $longestPattern = (length($cmd{PATTERN}) > $longestPattern) ? length($cmd{PATTERN}) : $longestPattern;
  }
  
  my $found = 0;
  while ($#ARGV>=0) {
    $_ = shift(@ARGV);
    foreach $cmd (@commands) {
      %cmd = %{$cmd};
      my $pattern = $cmd{PATTERN};
      if (/^$pattern$/) {
        &{$cmd{ACTION}};
        $found =1;
        last;
      }
    }
  }
}

sub setCommands {
  @commands = ({
                PATTERN=>"-nh|--no-history",
                ACTION=>sub {$history=0},
                DEFAULT=>sub {$history=1},
                DESC=>"Do not process history - just pages",
                SYNTAX=>"-nh"
               }, {
                PATTERN=>"-q|--quiet",
                ACTION=>sub {$quiet=1},
                DEFAULT=>sub {$quiet=0},
                DESC=>"Don't display anything, just do it",
                SYNTAX=>"-q"
               }, {
                PATTERN=>"-s|--source",
                ACTION=>sub {$src=shift(@ARGV)},
                DEFAULT=>sub {$src = "jsp/pages"},
                DESC=>"Specify the folder containing the JSPWiki pages",
                SYNTAX=>"-s <source folder>"
               }, {
                PATTERN=>"-d|--destination",
                ACTION=>sub {$dest=shift(@ARGV)},
                DEFAULT=>sub {$dest = "moin/wikidata"},
                DESC=>"Specify the folder to contain the MoinMoin pages",
                SYNTAX=>"-d <destination folder>"
               }, {
                PATTERN=>"-x|--exclude",
                ACTION=>sub {$exclusionFile = shift(@ARGV)},
                DEFAULT=>sub {$exclusionFile = dirname($0)."/jsp-exclude.txt"},
                DESC=>"Specify a file listing JSPWiki pages to ignore",
                SYNTAX=>"-x <exclusions file>"
               }, {
                PATTERN=>"-na|--no-attachments",
                ACTION=>sub {$attachments = 0},
                DEFAULT=>sub {$attachments = 1},
                DESC=>"Do not process attachments",
                SYNTAX=>"-na"
               }, {
                PATTERN=>"-as|--attachment-source",
                ACTION=>sub {$attachSrc=shift(@ARGV)},
                DEFAULT=>sub {$attachSrc = "jsp/attachments"},
                DESC=>"Specify the folder containing JSPWiki attachments",
                SYNTAX=>"-as <attachment source folder>"
               }, {
                PATTERN=>"-ad|--attachment-destination",
                ACTION=>sub {$attachDest=shift(@ARGV)},
                DEFAULT=>sub {$attachDest = "pages"},
                DESC=>"Specify the folder to contain MoinMoin attachments",
                SYNTAX=>"-ad <attachment destination folder>"
               }, {
                PATTERN=>"-aa|--append-attachment-links",
                ACTION=>sub {$appendAttachments=1},
                DEFAULT=>sub {$appendAttachments=0},
                DESC=>"Appends links to all attachments to end of each page",
                SYNTAX=>"-aa"
               }, {
                PATTERN=>"-b|--base-folder",
                ACTION=>sub {$baseFolder=shift(@ARGV)},
                DEFAULT=>sub {$baseFolder = "foo/data/text"},
                DESC=>"Specify the folder containing the default MoinMoin pages",
                SYNTAX=>"-b <base folder>"
               }, {
                PATTERN=>"-p|--package",
                ACTION=>sub {$packageFile = shift(@ARGV)},
                DEFAULT=>sub {$packageFile = "cocoon-wiki.tgz"},
                DESC=>"Specify the tgz package name for content uploading",
                SYNTAX=>"-p <package file>"
               }, {
                PATTERN=>"-rp|--remove-prefix",
                ACTION=>sub {$prefix = shift(@ARGV)},
                DEFAULT=>sub {$prefix = ""},
                DESC=>"Specify a string to be removed from the beginning of a page name",
                SYNTAX=>"-rp <prefix>"
               }, {
                PATTERN=>"-rw|--create-rewrite-map",
                ACTION=>sub {$rewriteMap = shift(@ARGV)},
                DEFAULT=>sub {$rewriteMap = ""},
                DESC=>"Build an Apache mod_rewrite rewrite map file for redirecting old JSP wiki to new Moin wiki",
                SYNTAX=>"-rw <rewrite map file>"
               }, {
                PATTERN=>"-arw|--append-rewrite-map",
                ACTION=>sub {$rewriteMap = shift(@ARGV); $appendRewriteMap = 1},
                DEFAULT=>sub {$appendRewriteMap = 0},
                DESC=>"Extend an existing Apache mod_rewrite rewrite map file for redirecting old JSP wiki to new Moin wiki",
                SYNTAX=>"-arw <rewrite map file>"
               }, {
                PATTERN=>"-r|--page-regexp",
                ACTION=>sub {$regexp = shift(@ARGV)},
                DEFAULT=>sub {$regexp = ".*"},
                DESC=>"Specify a regular expression that which must match a page for that page to be converted",
                SYNTAX=>"-r <regular expression>"
               }, {
                PATTERN=>"-R|--inverse-page-regexp",
                ACTION=>sub {$regexpInv = shift(@ARGV)},
                DEFAULT=>sub {$regexpInv = ""},
                DESC=>"Specify a regular expression that which must *not* match a page for that page to be converted",
                SYNTAX=>"-R <regular expression>"
               }, {
                PATTERN=>"-h|--help",
                ACTION=>sub {&help},
                DEFAULT=>sub {},
                DESC=>"Display this help",
                SYNTAX=>"-h"
               });
  }             
sub usage {
  print "jsp2moin.pl ";
  foreach $cmd (@commands) {
    my %cmd = %{$cmd};
    print "$cmd{SYNTAX} ";
  }
  print "\n";
}

sub help {
  &usage;
  print "\n";
  foreach $cmd (@commands) {
    my %cmd = %{$cmd};
    print "$cmd{PATTERN}", " "x($longestPattern+1-length($cmd{PATTERN})), "$cmd{DESC}\n";
  }
  exit;
}

sub loadExclusions {
  print "Loading exclusions from $exclusionFile...\n" unless $quiet;
  open (in, $exclusionFile);
  while (<in>) {
    s/\n$//;
    next if /^\s*#/ || /^\s*$/;
    if (/^(.*)->(.*)$/) {
      $exclusions{$1}=$2;
    } else {
      $exclusions{$_}=1;
    }
  }
  close in;
}

sub clean {
  print "Cleaning...\n" unless $quiet;
  system "rm -rf $dest/text" if -e "$dest/text";
  system "rm -rf $dest/backup" if -e "$dest/backup";
  system "rm -rf $dest/pages" if -e "$dest/pages";
  mkdir "$dest/text", 0777;
  mkdir "$dest/backup", 0777;
  mkdir "$dest/$attachDest", 0777;
}

sub processAttachments {
  return unless $attachments;
  print "Processing attachments...\n";

  my @files = glob("$attachSrc/*-att/*-dir/*");
  my %attachments;
  foreach (@files) {
    next if /attachment.properties$/;
    next if m#/\.{1,2}$#;
    my ($page, $attach, $no, $ext) = (m#$attachSrc/(.*)-att/(.*)-dir/(\d+)\.(.*)$#);
    push @{$attachments{"$page/$attach"}}, "$no.$ext";
  }

  my %latestAttachments;
  my ($no, $ext);
  foreach my $attachment (keys %attachments) {
    my @attach = @{$attachments{$attachment}};
    my $highest = 0;
    foreach (@attach) {
      ($no, $ext) = (/(.*)\.(.*)/);
      $highest = $no if $no > $highest;
    }
    my ($page, $attach) = ($attachment=~m#(.*)/(.*)#);
    $latestAttachments{$attachment}="$attachSrc/$page-att/$attach-dir/$highest.$ext";
    push @{$pageAttachments{$page}}, "$attach";
  }

  foreach (keys %latestAttachments) {
    my ($page, $attach) = (m#(.*)/(.*)#);
    my $infile = $latestAttachments{$_};
    my $outfile = &getOutputFilename($page);
    my $outpath = "$dest/$attachDest/$outfile/attachments/$attach";
    mkdir "$dest/$attachDest/$outfile", 0777;
    mkdir "$dest/$attachDest/$outfile/attachments", 0777;
    system ("cp \"$infile\" \"$outpath\"");
    print "\@" unless $quiet;
  }
  print "\n";
}

sub processPages {
  print "Processing...\n" unless $quiet;
  my @toProcess = &getPages;

  foreach my $file (@toProcess) {
    my $outfile = &getOutputFilename($file);
    next if $exclusions{$file}==1;
    next if (($regexp ne "" && $file!~/$regexp/) || ($regexpInv ne "" && $file=~/$regexpInv/));
    print $file unless $quiet;
    my $content;
    my $timestamp;
    my $filename;
    my %authors = &getAuthors($file);
    my $first = 1;
    my $maxAuthor = 0;
    
    if ($history) {
      foreach my $history (&getHistory("$src/OLD/$file")) {
        $content = &getContent("$src/OLD/$file/$history.txt");
        $content = &processPage($content, $file);
        $content = &appendAttachments($content, $file) if $appendAttachments;
        $timestamp = &getDate("$src/OLD/$file/$history.txt");
        &addToEditLog($outfile,
                      $authors{$history}, 
                      $timestamp,
                      $authors{$history}, 
                      $first);
        $filename = "backup/$outfile.$timestamp";
        &writeContent("$dest/$filename", $content);
        $first = 0;
        $maxAuthor = $history>$maxAuthor ? $history : $maxAuthor;
        print "." unless $quiet;
        $filecount++;
      }
    }
    &appendRewriteMap($file, $outfile) if ($rewriteMap ne "");

    $authorcount+=$#{keys %authors};
    $content = &getContent("$src/$file.txt");
    $content = &processPage($content);
    $content = &appendAttachments($content, $file) if $appendAttachments;
    $timestamp = &getDate("$src/$file.txt");
    &writeContent("$dest/text/$outfile", $content);
    &addToEditLog($outfile,
                  $authors{$maxAuthor+1}, 
                  $timestamp,
                  $authors{$maxAuthor+1},
                  $first);
    print "*\n" unless $quiet;
    $filecount++;
  }
}

sub getOutputFilename {
  my $file = shift;
  $file = $exclusions{$file} if defined $exclusions{$file};
  $file=~s/_/_25/g;
  $file=~s/\./_2e/g;
  $file=~s/\-/_2d/g;
  $file=~s/\+/_2b/g;
  $file=~s/%2F/_2f/g;
  $file=~s/^$prefix// if $prefix ne "" && $file ne $prefix;
  return $file;
}

sub appendRewriteMap {
  my $infile = shift;
  my $outfile = shift;
  
  print rewriteMapHandle "$infile $outfile\n";
}
 
sub copyBasePages {
  print "Copying base pages...\n" unless $quiet;
  opendir dir, $baseFolder;
  my @files = readdir(dir);
  closedir dir;
  
  foreach $file (@files) {
    next if (-d "$dest/text/$file" || $file eq "CVS");

    if (-e "$dest/text/$file") {
      print "$file exists already\n";
      next;
    }
     
    system "cp $baseFolder/$file $dest/text/$file";
  }
}

sub writeEditLog {
  print "Writing Edit log...\n";
  open editlog, ">$dest/editlog";
  my @sorted = sort {$a <=> $b} keys %editlog;
  foreach $key (@sorted) {
    $ref= $editlog{$key};
    print editlog $ref->{page},
                   "\t",
                   $ref->{author},
                   "\t",
                   $ref->{timestamp},
                   "\t",
                   $ref->{host},
                   "\t\t\t",
                   $ref->{version},
                   "\n";
  }
  close editlog;
}


sub complete {
  close rewriteMapHandle if $rewriteMap ne "";
  print "Done.\n" unless $quiet;
}

sub getAuthors {
  my $file = shift;
  my %authors;
  
  open in, "$src/OLD/$file/page.properties";
  while (<in>) {
    (my $history, my $author) = (/^(\d+).author=(.*)$/);
    $author=~s/\./:/g;
    $authors{$history}=$author;
  }
  close in;
  return %authors;
}

sub addToEditLog {
  my $page = shift;
  my $author = shift;
  my $timestamp = shift;
  my $host = shift;
  my $first = shift;
  my $version = $first ? "SAVENEW" : "SAVE";

  $timestamp++ while defined $editlog{$timestamp};

  my $line = {page=>$page,
              author=>$author,
              timestamp=>$timestamp,
              host=>$host,
              version=>$version};
  $editlog{$timestamp} = $line;    
} 
 
sub getDir {
  my $dir = shift;
  opendir handle, $dir;
  my @files = readdir handle;
  closedir handle;
  return @files;
}

sub getPages {
  my @files;
  foreach (&getDir($src)) {
    push @files, $1 if /^(.*)\.txt$/;
  }
  return @files;
}

sub getHistory {
  my $page = shift;
  my @files;
  foreach (&getDir("$page")) {
    push @files, $1 if /^(.*)\.txt$/;
  }
  return @files;
}

sub getDate {
  my $history = shift;
  return stat($history)->mtime;
}

sub getContent {
  my $file = shift;
  my $content = "";
  open in, $file;
  $content.=$_ while <in>;
  close in;
  return $content;
}

sub writeContent {
  my $file = shift;
  my $content = shift;
  open out, ">$file";
  print out $content;
  close out;
}


sub processLinks {
  $_ = shift;
  my $file = shift;
  my $new = "";
  while (/\[(.*?)\]/m) {
    $link = $1;
    $new .= &escapeCamelCase($`);
    $_ = $';
    if ($link=~/^\[/) {                           # Quoted square brackets
      $link="$link]";
    } elsif ($link=~/^(\d+)$/) {
      $link = "[#$1]";
    } elsif ($link=~/^#(\d+)$/) {
      $link="[[Anchor($1)]]";
    } elsif (&isAttachmentLink($link, $file)) {
      $link = &getAttachmentLink($link, $file);
    } elsif ($link=~/\|/) {
      if ($link=~/^(.*)\s*\|\s*(http:.*)$/) {     # http:// links with a label
        $link = "[$2 $1]";
      } elsif ($link=~/^([^\|]+)\s*\|\s*(.*)/) {  # [SN | Steven Noels] -> [:SN:StevenNoels]
        my $label = $1;
        $link = filterLink($2);
        $link=~s/\s//g;
        $link=~s/\./_2e/g;
        $link="[:$link:$label]";
      }
    } elsif ($link=~/\s*(http:[^\s]+)\s*/) {      # http:// without label
      $link="[".&filterLink($1)."]";
    } else {
      $link=~s/\s//g;
      if ($link=~/^([A-Z][a-z0-9]+){2,}$/) {      # [WikiPage] -> WikiPage (detect camel case)
        $link=&filterLink($link);
      } else {                                    # [Upayavira] -> [:Upayavira]
        $link =~s/\s//g;
        $link = "[:".&filterLink($link)."]";
      }
    }
    $new.=$link;
  }
  $new.=&escapeCamelCase($_);
  return $new;
}

sub filterLink {
  my $link = shift;
  $link=~s/^$prefix// if $prefix ne "" && $link ne $prefix;
  return $link;
}
  
sub escapeCamelCase {
  my $text = shift;
  $text =~s/(([A-Z][a-z0-9]+){2,})/!$1/gm;
  return $text;
}

sub isAttachmentLink {
  my $link = shift;
  my $file = shift;
  return &getAttachmentLink($link, $file) ne $link;
}

sub getAttachmentLink {
  my $link = shift;
  my $file = shift;
  if ($link=~/\|/) {
    if ($link=~/^(.*)\s*\|\s*(http:.*)$/) {     # http:// links with a label
      return $link;
    } elsif ($link=~/^([^\|]+)\s*\|\s*(.*)/) {  # [SN | Steven Noels] -> [:SN:StevenNoels]
      my $label = $1;
      $link = $2;
    }
  } elsif ($link=~/\s*(http:[^\s]+)\s*/) {      # http:// without label
    return $link;
  }
  foreach my $attachment (@{$pageAttachments{$file}}) {
    if ($link =~/\s*$attachment\s*/) {
      return $link = "attachment:$attachment ";
    }
  }
  return $link;
}  

sub appendAttachments {
  my $content = shift;
  my $file = shift;
  
  $content.="[[BR]]\n[[BR]]\n" if $#{$pageAttachments{$file}}>=0;
  foreach my $attachment (@{$pageAttachments{$file}}) {
    print "@";
    $content.= "'''Attachment:''' attachment:$attachment [[BR]]\n";
  }
  return $content;
}

sub processPage {
  my $page = shift;
  my $file = shift;

  $page=~s#([^\{])\{\{([^{}]*?)\}\}(?!\})#$1\{\{\{$2\}\}\}#sg;
  #$page=~s#([^\{])\{{2}([^\{])#$1\{\{\{$2#gm; # Inline code snippets
  #$page=~s#([^\}])\}{2}([^\}])#$1\}\}\}$2#gm; # Inline code snippets
  
  my $newPage = "";
  my $remaining = $page;
  while ($remaining=~/\{\{\{.*?\}\}\}/s) {
    my $before = $`;
    my $during = $&;
    $remaining = $';
    $newPage.= &processLines($before, $file).$during;
  }
  $newPage.=&processLines($remaining, $file);
  return $newPage;
}

sub less {
  my $description = shift;
  my $content = shift;
  open(my $less, "|less");
  print $less "$description\n","-"x length($description),"\n\n";
  print $less $content;
  close($less);
}

sub processLines {
  $_ = &processLinks(shift, shift);
  my $m = chr(13);
  s#$m##g;
  s#^[\t ]*\!\!\!(.*)$#= $1 =#gm;         # Largest Heading
  s#^[\t ]*\!\!(.*)$#== $1 ==#gm;         # Middle Heading
  s#^[\t ]*\!(.*)$#=== $1 ===#gm;         # Smallest Heading
  s#^(=+)\s\d+\.#$1 #gm;               # Remove Numbers from headings; Moin adds them itself
  s#__#'''#gm;                         # Bold
  s#^\*\*\*#    *#gm;                  # Nested Bulletted lists
  s#^\*\*\*#   *#gm;                   # Nested Bulletted lists
  s#^\*\*#  *#gm;                      # Nested Bulletted lists
  s#^\*# * #gm;                        # Bulletted lists
  s#^\## 1.#gm;                        # Numbered lists
  s#\\\\#[[BR]]#gm;                    # Line Breaks
  s#\|\|(.*?)(?=\|\|)#\|'''$1'''#gm;   # Table Headers
  s#\|#\|\|#gm;                        # Table Entries
  # ''Italics''                          (not supported in JSPWiki)
  # ^superscript^                        (not supported in JSPWiki)
  # ,,subscript,,                        (not supported in JSPWiki)
  return $_;
}

sub packageFiles {
  print "Making archive...\n";
  my $cwd = `pwd`;
  chdir $dest;
  system "zip -rq moin-wiki-site.zip pages text backup editlog";
  chdir $cwd;
  rename "$dest/moin-wiki-site.zip","$cwd/moin-wiki-site.zip";
}
