# -*- coding: utf8 -*-
"""
Normalize page names

See http://moinmoin.wikiwikiweb.de/MoinMoinBugs/CanCreateBadNames
"""

import unittest

class InvalidPageNameError(Exception): pass


def normalizePageName(candidate):
    """ Normalize page name
    
    Normalaized page name contain only alpha numeric Unicode characters with 
    optional one space between words, and no empty path components.
    
    Leading, traling or multiple spaces in names are considered a user error
    and fixed quitely without any error mesages.
    
    Empty path components are ignored just like the shell does.
    
    @param candidate: Unicode string with candidate name
    @return: normalized name
    @rtype: Unicode string
    """
    # Validate input
    if candidate == '':
        raise InvalidPageNameError(candidate)

    # Split sub pages and normalize each component
    validComponents = []
    pages = candidate.split('/')
    for pageName in pages:    
        # Split words removing multiple white space between and around words
        pageWords = pageName.split()        
        # Ignore empty names in the path e.g. '/  /Name/' - > 'Name'
        if not pageWords:
            continue
            
        for word in pageWords:
            for character in word:
                if not character.isalnum():
                    raise InvalidPageNameError(candidate)
                        
        # All words are valid, return valid name using one space between words
        pageName = ' '.join(pageWords)
        validComponents.append(pageName)
            
    # Join normalized components
    normalizedName = '/'.join(validComponents)
    
    # Do not allow empty names after normalization
    if normalizedName == '':
        raise InvalidPageNameError(candidate)
    return normalizedName
    

class ValidNamesTestCase(unittest.TestCase):

    TESTS = (
        # name, normalized name
        # Remove empty components
        (u'/Page////Page//', u'Page/Page'),
        # Remove extra White Space
        (u' Page \t Name / SubPage ', u'Page Name/SubPage'),
        # German umlauts
        (u'Jürgen Hermann', u'Jürgen Hermann'),
        # Hebrew
        (u'\u05d0\u05d1 \u05d2', u'\u05d0\u05d1 \u05d2'),
        # Add more tests
    )
    
    def testNormalizePageName(self):
        """ wikiutil: normalize page names """
        for test, expected in self.TESTS:
            result = normalizePageName(test)
            self.failUnlessEqual(result, expected,
                'expected "%(expected)s" but got "%(result)s"' % locals())


class InvalidNamesTestCase(unittest.TestCase):    
    TESTS = (
        # Empty
        u'',
        # White Space only
        u'  \t\n  ', 
        # Special Unicode white space
        u' \u200f ',
        # Sub pages with empty pages or white space
        u'///', u' /   / /  / ', u'\u200f/ / '
        # Non alpha numeric
        u'.', u'-', u'|', u'%', u'&', u'?', u'@', u'!', u'$', u'*', u'(', u')', 
        u'<', u'>', u'\u202a',
        # Mix
        u'\u202a/ValidComponent',
        # Add more tests
    )
    
    def testInvalidPageNames(self):
        """ wikiutil: invalid page name raise exception """
        for test in self.TESTS:
            self.failUnlessRaises(InvalidPageNameError, normalizePageName, test)


if __name__ == '__main__':
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(ValidNamesTestCase))
    suite.addTest(unittest.makeSuite(InvalidNamesTestCase))
    unittest.TextTestRunner(verbosity=2).run(suite)
    
