#!/usr/local/bin/python

"""
 converts tiki html to MoinMoin markup
 author: Daniela Nicklas <dani@miracle-solutions.de>
"""
import sys
from HTML2MoinMoin import HTML2MoinMoin
import string
import os
import codecs
import StringIO
import re
sys.path.append('Path to MoinMoin-Libs')
from MoinMoin.wikiutil import isStrictWikiname

# global variables
sourcedir = "pages"
targetdir = "text"
# Pages that start with key go to Category
page2category = {
    'NameInTiki': 'CategoryNewName',
    'AnothernameInTiki': 'CategoryAnotherOrSameNewName'}

# Output Ignore
class devnull:
    def write(self, data):
        return
    
# HTML Parser
class TikiHTML2MoinMoin(HTML2MoinMoin):
    start_tags = {
        "a"     : "[",
        "b"     : "'''",
        "em"    : "''",
        "tt"    : "{{{",
        "pre"   : "\n{{{",
        "p"     : "\n\n",
        "br"    : "\n\n",
        "h1"    : "\n\n= ",
        "h2"    : "\n\n== ",
        "h3"    : "\n\n=== ",
        "h4"    : "\n\n==== ",
        "h5"    : "\n\n===== ",
        "hr"    : "\n----\n",
        "title" : "",
        "table" : "\n",
        "tr"    : "",
        "td"    : "||"
        }

    end_tags = {
        "a"     : ']',
        "b"     : "'''",
        "em"    : "''",
        "tt"    : "}}}",
        "pre"   : "}}}\n",
        "p"     : "",
        "h1"    : " =\n\n",
        "h2"    : " ==\n\n",
        "h3"    : " ===\n\n",
        "h4"    : " ====\n\n",
        "h5"    : " =====\n\n",
        "table" : "\n",
        "title" : "",
        "tr"    : "||\n",
        "dt"    : ":: "
        }
    def __init__(self):
        HTML2MoinMoin.__init__(self)
        self.title = 0
        self.heading = 0
        self.head = 0
        self.linebreaks = 1
        self.tablecount = 0
        self.devnull = devnull()
        self.outputbackup = self.output
        self.div_mode = []
        self.a_mode = []
        self.tikipageurl = 'tiki-index.php?page='
        self.tikiediturl = 'tiki-editpage.php?page='
        self.tikicategoryurl = 'tiki-browse_categories.php?parentId='
        self.tikicategory = {
            1 : 'NameInTiki',
            4 : 'AnotherNameIn'
            }
        self.linkreplacements = {
            'HomePage': 'StartSeite',
            'UserPageYourPage': 'YourName'
            }

            

    def set_ignore(self):
        if self.output != self.devnull:
            self.outputbackup = self.output
            self.output = self.devnull
            
    def unset_ignore(self):
        self.output = self.outputbackup



    def do_html_start(self,attrs,tag):
        self.set_ignore()
        
    def do_table_end(self,tag):
        self.tablecount = self.tablecount + 1
        if self.tablecount == 1:
            self.unset_ignore()
            
    def do_h1_start(self,attrs,tag):
        self.heading = 1
        self.write(self.start_tags[tag])

    def do_h1_end(self,tag):
        self.heading = 0
        self.write(self.end_tags[tag])

    def do_a_start(self,attrs,tag):
        if self.heading:
            self.a_mode.append('heading')
        else:
            href = ''
            at_class = ''
            for att in attrs:
                if (att[0] == 'href'):
                    href= att[1]
                if (att[0] == 'class'):
                    at_class = att[1]
            if at_class == 'wikicache':
                self.set_ignore()
                self.a_mode.append('cache')
            elif href.find(self.tikipageurl) != -1:
                href = href[:href.find('&')]
                href = href.replace(self.tikipageurl,'')
                for key, value in self.linkreplacements.items():
                    if href == key:
                        href = value
                        exit
                self.write(self.start_tags[tag]+'wiki:'+href+' ')
                self.a_mode.append(self.end_tags[tag])
            elif href.find(self.tikiediturl) != -1:
                self.write('- FixMe/EditLink -')
                self.set_ignore()
                self.a_mode.append('fixme')
            elif href.find(self.tikicategoryurl) != -1:
                href = href.replace(self.tikicategoryurl, '')
                number = int(href[:href.find('&')])
                self.write(self.start_tags[tag]+'wiki:'+self.tikicategory.get(number,'CategoryMissmatch')+' ')
                self.a_mode.append(self.end_tags[tag])
                if not self.tikicategory.has_key(number):
                    print "CategoryMissmatch: %s" % number
            elif href != '':
                self.write(self.start_tags[tag])
                self.write(href + " ")
                self.a_mode.append(self.end_tags[tag])
            
            
    def do_a_end(self,tag):
        mode = self.a_mode.pop()
        if mode == 'fixme' or mode == 'cache':
            self.unset_ignore()
        elif not self.heading:
            self.write(mode)

    def do_div_start(self,attrs,tag):
        for att in attrs:
            if att == ('class', 'titlebar'):
                self.heading = 1
                self.write(self.start_tags["h3"])
                self.div_mode.append(self.end_tags["h3"])
            else:
                self.div_mode.append("")

    def do_div_end(self,tag):
        if self.heading:
            self.heading = 0
        self.write(self.div_mode.pop())

    def do_p_start(self,attrs,tag):
        for att in attrs:
            if att == ('class', 'editdate'):
                self.set_ignore()

    def handle_data(self, data):
        data = data.replace("\r", "")
        if self.preformatted:
            self.write(data)
        else:
            self.write(data.replace("\n", " "))

            
                
# Main flow

def main():
#    sys.setdefaultencoding('iso-8859-1')
    # look for source directory
    if not os.access(sourcedir, os.F_OK):
        print "%s is not accessable"%sourcedir
        return ''
    else:
        sourcelist = os.listdir(sourcedir)
        
    # create target directory (if necessary)
    if not os.access(targetdir, os.F_OK):
        os.mkdir(targetdir)

    # first pass: transform htmp
    for sourcefile in sourcelist:
        print sourcefile,
        # open sourcefile
        sf = codecs.open(sourcedir+'/'+sourcefile,'r','UTF-8')
        htmldata = sf.read()
        htmldata = htmldata.encode('iso-8859-1','replace')
        sf.close()
        print ' .',
        # parse it and write output to target file (tf)
        p = TikiHTML2MoinMoin()
        tf = StringIO.StringIO()
        p.output = tf
        p.feed(htmldata)
        p.close()
        
        print '.',
        
        # second pass: delete white spaces
        wikidata = tf.getvalue()
        wikidata=wikidata.replace('\n\n\n','\n')
        
        # open targetfile
        tf = open(targetdir+'/'+sourcefile[:-5], 'w')
        
        # third pass: purify wiki-links and write to file
        wikinamepattern = r'\[wiki\:((?P<wikilink>.*?) (?P<label>.*?))\]'
        matches = re.finditer(wikinamepattern, wikidata)
        lastend = 0
        for match in matches:
            tf.write(wikidata[lastend:match.start()])
            mdict = match.groupdict()
            wikilink = mdict['wikilink']
            label = mdict['label']
            if wikilink == label:
                if isStrictWikiname(wikilink):
                    tf.write(wikilink)
                else:
                    tf.write('["%s"]'%wikilink)
            else:
                tf.write(match.group())
            lastend = match.end()
        tf.write(wikidata[lastend:])

        # put pages in categorys
        for key, value in page2category.items():
            if sourcefile.find(key) == 0:
                if wikidata.find(value) == -1:
                    tf.write('\n'+value)
                    
        # close targetfile
        tf.close
        print '. ->' + sourcefile[:-5]

main()
