# HG changeset patch
# User anarcat@titine.anarcat.ath.cx
# Date 1190262113 14400
# Node ID 3fcaf6561a8915f4eb83f5737fe258c6888509a9
# Parent  93be75db205186c2932e6512b9a9c803aba83da1
make nicer headings for latin1 charsets

we use a trivial heuristic to guess if our nicer heading is really nicer. the converted string is accepted if:

 * it's longer than 1 characters
 * it's longer than half the length of the original string

diff -r 93be75db2051 -r 3fcaf6561a89 MoinMoin/wikiutil.py
--- a/MoinMoin/wikiutil.py	Wed Sep 19 21:39:48 2007 +0200
+++ b/MoinMoin/wikiutil.py	Thu Sep 20 00:21:53 2007 -0400
@@ -2154,8 +2154,16 @@ def anchor_name_from_text(text):
     Generate an anchor name from the given text
     This function generates valid HTML IDs.
     '''
-    quoted = urllib.quote_plus(text.encode('utf-7'))
-    res = quoted.replace('%', '.').replace('+', '').replace('_', '')
+    import unicodedata
+    if not isinstance(text, unicode):
+        text = unicode(text, 'utf8')
+    res = re.sub('[^-A-Za-z0-9_:.]+', '-', unicodedata.normalize('NFKD', text).encode('ascii', 'ignore'))
+    # Heuristic to guess if we made a good job at interpreting the string, if:
+    # the resulted string is too small OR
+    # the resulting string is more that 50% smaller 
+    # then we consider that we failed and revert to a systematic utf7 encoding
+    if len(res) <= 1 or len(res) <= (len(text) / 2):
+        res = urllib.quote_plus(text.encode('utf-7')).replace('%', '.').replace('+', '').replace('_', '')
     if not res[:1].isalpha():
         return 'A%s' % res
     return res