# HG changeset patch
# User anarcat@titine.anarcat.ath.cx
# Date 1178839159 14400
# Node ID 7de937813f1a07b3ff98f7de4b68092780ab7e11
# Parent  dc9a3809af61aa74bdb4861f1ab7d02f8b730c0e
factor out the heading uniqueness code into wikiutil

rework the code so that ascii charsets are readable (and not SHA-1 encrypted)

non-ascii charsets will receive incremental headings

all tests show that heading ids are still unique after this, and this actually fixes a bug in the Include macro where the generated heading had a duplicate id

Ref: MoinMoin:FeatureRequests/NicerHeadingIds

diff -r dc9a3809af61 -r 7de937813f1a MoinMoin/macro/Include.py
--- a/MoinMoin/macro/Include.py	Mon May 07 22:50:51 2007 +0200
+++ b/MoinMoin/macro/Include.py	Thu May 10 19:19:19 2007 -0400
@@ -188,19 +188,8 @@ def execute(macro, text, args_re=re.comp
                               macro.formatter.text(heading) +
                               macro.formatter.heading(0, level))
             else:
-                import sha
-                from MoinMoin import config
-                # this heading id might produce duplicate ids,
-                # if the same page is included multiple times
-                # Encode stuf we feed into sha module.
-                pntt = (inc_name + heading).encode(config.charset)
-                hid = "head-" + sha.new(pntt).hexdigest()
-                request._page_headings.setdefault(pntt, 0)
-                request._page_headings[pntt] += 1
-                if request._page_headings[pntt] > 1:
-                    hid += '-%d' % (request._page_headings[pntt], )
                 result.append(
-                    macro.formatter.heading(1, level, id=hid) +
+                    macro.formatter.heading(1, level, id=wikiutil.unique_heading_id(request._page_headings, heading)) +
                     inc_page.link_to(request, heading, css_class="include-heading-link") +
                     macro.formatter.heading(0, level)
                 )
diff -r dc9a3809af61 -r 7de937813f1a MoinMoin/macro/TableOfContents.py
--- a/MoinMoin/macro/TableOfContents.py	Mon May 07 22:50:51 2007 +0200
+++ b/MoinMoin/macro/TableOfContents.py	Thu May 10 19:19:19 2007 -0400
@@ -8,7 +8,7 @@
     @license: GNU GPL, see COPYING for details.
 """
 
-import re, sha
+import re
 from MoinMoin import config, wikiutil
 
 #Dependencies = ["page"]
@@ -126,9 +126,6 @@ class TableOfContents:
         if not match:
             return
         title_text = match.group('htext').strip()
-        pntt = pagename + title_text
-        self.titles.setdefault(pntt, 0)
-        self.titles[pntt] += 1
 
         # Get new indent level
         newindent = len(match.group('hmarker'))
@@ -148,11 +145,6 @@ class TableOfContents:
             self.result.append(self.macro.formatter.number_list(1))
             self.result.append(self.macro.formatter.listitem(1))
 
-        # Add the heading
-        unique_id = ''
-        if self.titles[pntt] > 1:
-            unique_id = '-%d' % (self.titles[pntt],)
-
         # close last listitem if same level
         if self.indent == newindent:
             self.result.append(self.macro.formatter.listitem(0))
@@ -160,7 +152,7 @@ class TableOfContents:
         if self.indent >= newindent:
             self.result.append(self.macro.formatter.listitem(1))
         self.result.append(self.macro.formatter.anchorlink(1,
-            "head-" + sha.new(pntt.encode(config.charset)).hexdigest() + unique_id) +
+                           wikiutil.unique_heading_id(self.titles, title_text)) +
                            self.macro.formatter.text(title_text) +
                            self.macro.formatter.anchorlink(0))
 
diff -r dc9a3809af61 -r 7de937813f1a MoinMoin/parser/text_moin_wiki.py
--- a/MoinMoin/parser/text_moin_wiki.py	Mon May 07 22:50:51 2007 +0200
+++ b/MoinMoin/parser/text_moin_wiki.py	Thu May 10 19:19:19 2007 -0400
@@ -777,8 +777,6 @@ class Parser:
 
     def _heading_repl(self, word):
         """Handle section headings."""
-        import sha
-
         h = word.strip()
         level = 1
         while h[level:level+1] == '=':
@@ -788,15 +786,8 @@ class Parser:
         # FIXME: needed for Included pages but might still result in unpredictable results
         # when included the same page multiple times
         title_text = h[level:-level].strip()
-        pntt = self.formatter.page.page_name + title_text
-        self.titles.setdefault(pntt, 0)
-        self.titles[pntt] += 1
-
-        unique_id = ''
-        if self.titles[pntt] > 1:
-            unique_id = '-%d' % self.titles[pntt]
         result = self._closeP()
-        result += self.formatter.heading(1, depth, id="head-"+sha.new(pntt.encode(config.charset)).hexdigest()+unique_id)
+        result += self.formatter.heading(1, depth, id=wikiutil.unique_heading_id(self.request._page_headings, title_text))
 
         return (result + self.formatter.text(title_text) +
                 self.formatter.heading(0, depth))
diff -r dc9a3809af61 -r 7de937813f1a MoinMoin/wikiutil.py
--- a/MoinMoin/wikiutil.py	Mon May 07 22:50:51 2007 +0200
+++ b/MoinMoin/wikiutil.py	Thu May 10 19:19:19 2007 -0400
@@ -271,6 +271,25 @@ def make_breakable(text, maxlen):
         else:
             newtext.append(part)
     return " ".join(newtext)
+
+def unique_heading_id(headings, text):
+    """ generate an ID for a heading that is unique to this request, human-readable and HTML-compliant
+    """
+    import unicodedata
+    # ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
+    # followed by any number of letters, digits ([0-9]), hyphens ("-"),
+    # underscores ("_"), colons (":"), and periods (".").
+    # http://www.w3.org/TR/html4/types.html
+    pntt = re.sub('[^-A-Za-z0-9_:.]+', '-', unicodedata.normalize('NFKD', text).encode('ascii', 'ignore')).lower()
+    hid = "head-" + pntt # basic heading structure
+    # count the number of times this heading is found in this request
+    headings.setdefault(pntt, 0)
+    headings[pntt] += 1
+    # spcial case: if the text is strictly non-ascii, add a number anyways so it looks nicer
+    if headings[pntt] > 1 or pntt == "-":  
+        hid += '-%d' % (headings[pntt], ) # increment the heading id, to avoid duplicates
+    return re.sub('--+', '-', hid) # necessary because the last line might have added another duplicate -
+
 
 ########################################################################
 ### Storage