1 """
   2     MoinMoin - FuzzyLikePages action
   3 
   4     Copyright (c) 2001 by Richard Jones <richard@bizarsoftware.com.au>
   5     Copyright (c) 2001 by Jürgen Hermann <jh@web.de>
   6     Modifications by June Kim
   7     All rights reserved, see COPYING for details.
   8 
   9     This action generates a list of pages that either start or end
  10     with the same word as the current pagename. If only one matching
  11     page is found, that page is displayed directly.
  12 
  13     $Id: LikePages.py,v 1.3 2002/01/29 04:48:18 june Exp $
  14 """
  15 
  16 import re, sys
  17 from MoinMoin import config, user, util, wikiutil, webapi
  18 #from MoinMoin.Page import Page
  19 from cStringIO import StringIO
  20 from MoinMoin.support.difflib import get_close_matches
  21 _ = lambda x: x
  22 
  23 FUZZY_CUT_OFF=0.5333
  24 PIVOT=4
  25 
  26 def displayLikePages(anOutStream,aPageName):
  27     pageList=wikiutil.getPageList(config.text_dir)
  28     singleSideSearcher=FuzzyAndIncludingNameSearcher(aPageName,pageList)
  29     twoSidesSearcher=SameInitialOrFinalSearcher(aPageName,pageList)
  30     singleSideSearcher.searchAndDisplayAll(sys.stdout)
  31     twoSidesSearcher.searchAndDisplayAll(sys.stdout)
  32 
  33 def execute(pagename, form):
  34     webapi.http_headers()
  35     wikiutil.send_title(_('Like %s'%pagename),\
  36                         pagename=pagename)
  37     displayLikePages(sys.stdout,pagename)
  38     wikiutil.send_footer(pagename)
  39 
  40 
  41 def getUnion(aList1,aList2):
  42     assert aList1 is not None and aList2 is not None
  43     temp=aList1[:]
  44     for eachElement in aList2:
  45         if eachElement not in temp:
  46             temp.append(eachElement)
  47     return temp
  48 
  49 
  50 class SimilarPageSearcher:
  51     def __init__(self,aPageName,anAllPageList):
  52         self.pageName=aPageName
  53         self.similarPages=[]
  54         self.allPageList=anAllPageList[:]
  55         if aPageName in self.allPageList: #the page might be non-existent at the moment
  56             self.allPageList.remove(aPageName)
  57     def getSimilarPages(self):
  58         return self.similarPages
  59     def searchAndDisplayAll(self,anOutStream):
  60         self.searchPages(isSorted=1)
  61         self.display(anOutStream)
  62 
  63 class TwoSidesSearcher(SimilarPageSearcher):
  64     def __init__(self,aPageName,anAllPageList):
  65         SimilarPageSearcher.__init__(self,aPageName,anAllPageList)
  66         self.similarPagesInitial=[];self.similarPagesFinal=[]
  67     def getSimilarPages(self):
  68         temp=[]
  69         temp=getUnion(self.similarPagesInitial,self.similarPagesFinal)
  70         temp.sort()
  71         return temp
  72     def getSimilarPagesInitial(self):
  73         return self.similarPagesInitial
  74     def getSimilarPagesFinal(self):
  75         return self.similarPagesFinal
  76     def display(self,anOutStream,printHeading=1): #TODO: take out html tags and use formatter or something like HTMLgen
  77         if printHeading:
  78             print >>anOutStream, "<b>%s</b>"%_("Pages sharing initial or final title words/letters...")
  79         print >>anOutStream, "<table><tr><td valign=top><ol>"
  80         for eachPageName in self.getSimilarPagesInitial():
  81             print >>anOutStream, '<li><a href="%s">%s</a>'%(
  82                 wikiutil.quoteWikiname(eachPageName),eachPageName)
  83         print >>anOutStream, "</ol></td><td valign=top><ol>"
  84         for eachPageName in self.getSimilarPagesFinal():
  85             print >>anOutStream, '<li><a href="%s">%s</a>'%(
  86                 wikiutil.quoteWikiname(eachPageName),eachPageName)
  87         print >>anOutStream, "</ol></td></tr></table>"
  88 
  89 class FuzzyNameSearcher(SimilarPageSearcher):
  90     def __init__(self,aPageName,anAllPageList):
  91         SimilarPageSearcher.__init__(self,aPageName,anAllPageList)
  92 
  93     def searchPages(self,isSorted=0,maxPages=9):
  94         self.similarPages=get_close_matches(self.pageName,self.allPageList,n=maxPages,cutoff=FUZZY_CUT_OFF)
  95         if isSorted:
  96             self.similarPages.sort()
  97 
  98 class IncludingNameSearcher(SimilarPageSearcher):
  99     def __init__(self,aPageName,anAllPageList):
 100         SimilarPageSearcher.__init__(self,aPageName,anAllPageList)
 101 
 102     def searchPages(self,isSorted=0):
 103         self.similarPages=filter(lambda eachPage,thisPage=self.pageName:\
 104             eachPage.lower().find(thisPage.lower())>=0, self.allPageList)
 105         if isSorted:
 106             self.similarPages.sort()
 107 
 108 class SameInitialOrFinalWordSearcher(TwoSidesSearcher):
 109     def __init__(self,aPageName,anAllPageList):
 110         SimilarPageSearcher.__init__(self,aPageName,anAllPageList)
 111         self.s_re=re.compile('([%s][%s]+)' % (config.upperletters, config.lowerletters))
 112         self.e_re=re.compile('([%s][%s]+)$' % (config.upperletters, config.lowerletters))
 113         self.s_re2=re.compile('([^ ]+)' )
 114         self.e_re2=re.compile('([^ ]+)$')
 115 
 116     def searchPages(self,isSorted=0):
 117         matches,t,t=self._getMatchesStartEnd()
 118         self.similarPagesInitial=[eachPage for eachPage in matches.keys() if matches[eachPage] in [1,3]]
 119         self.similarPagesFinal=[eachPage for eachPage in matches.keys() if matches[eachPage] in [2,3]]
 120         if isSorted:
 121             self.similarPagesInitial.sort()
 122             self.similarPagesFinal.sort()
 123 
 124     def _getMatchesStartEnd(self):
 125         # figure the start and end words
 126         try:
 127             match = self.s_re.match(self.pageName)
 128             start = match.group(1)
 129             s_len = len(start)
 130             match = self.e_re.search(self.pageName)
 131             end = match.group(1)
 132             e_len = len(end)
 133         except:
 134             match = self.s_re2.match(self.pageName)
 135             start = match.group(1)
 136             s_len = len(start)
 137             match = self.e_re2.search(self.pageName)
 138             end = match.group(1)
 139             e_len = len(end)
 140 
 141         # find any matching pages
 142         matches = {}
 143         for anypage in self.allPageList:
 144             if anypage == self.pageName:  # skip current page
 145                 continue
 146             p_len = len(anypage)
 147             if p_len > s_len and anypage[:s_len] == start:
 148                 matches[anypage] = 1
 149             if p_len > e_len and anypage[-e_len:] == end:
 150                 matches[anypage] = matches.get(anypage, 0) + 2
 151         return matches,start,end
 152 
 153 
 154 class SameInitialOrFinalBytesSearcher(TwoSidesSearcher):
 155     theirPivot=PIVOT
 156     def __init__(self,aPageName,anAllPageList):
 157         TwoSidesSearcher.__init__(self,aPageName,anAllPageList)
 158     def searchPages(self,isSorted=0):
 159         if len(self.pageName)<self.theirPivot:
 160             self.similarPagesInitial=self.similarPagesFinal=[]
 161             return
 162         self.initial=self.pageName[:self.theirPivot].lower()
 163         self.final=self.pageName[-self.theirPivot:].lower()
 164         for eachPage in self.allPageList:
 165             lowered=eachPage.lower()
 166             if lowered.startswith(self.initial): # or lowered.endswith(self.final):
 167                 self.similarPagesInitial.append(eachPage)
 168             if lowered.endswith(self.final):
 169                 self.similarPagesFinal.append(eachPage)
 170 
 171         if isSorted:
 172             self.similarPagesInitial.sort()
 173             self.similarPagesFinal.sort()
 174 
 175 
 176 class SameInitialOrFinalSearcher(TwoSidesSearcher):
 177     theirPivot=PIVOT
 178     def __init__(self,aPageName,anAllPageList):
 179         #SimilarPageSearcher.__init__(self,aPageName,anAllPageList)
 180         if wikiutil.isStrictWikiname(aPageName):
 181             self._searcher=SameInitialOrFinalWordSearcher(aPageName,anAllPageList)
 182         else:
 183             self._searcher=SameInitialOrFinalBytesSearcher(aPageName,anAllPageList)
 184     def searchPages(self,isSorted=0):
 185         return self._searcher.searchPages(isSorted)
 186     def getSimilarPages(self):
 187         return self._searcher.getSimilarPages()
 188     def getSimilarPagesInitial(self):
 189         return self._searcher.getSimilarPagesInitial()
 190     def getSimilarPagesFinal(self):
 191         return self._searcher.getSimilarPagesFinal()
 192 
 193 
 194 class SingleSideSearcher(SimilarPageSearcher):
 195     def __init__(self,aPageName,anAllPageList):
 196         SimilarPageSearcher.__init__(self,aPageName,anAllPageList)
 197 
 198     def display(self,anOutStream,printHeading=1): #TODO: take out html tags and use formatter or something like HTMLgen
 199         if printHeading:
 200             print >>anOutStream, "<b>%s</b>"%_("Pages sharing a similar title...")
 201 
 202 
 203         print >>anOutStream, "<table><tr><td valign=top><ol>"
 204         for eachPageName in self.getSimilarPages():
 205             print >>anOutStream, '<li><a href="%s">%s</a>'%(
 206                 wikiutil.quoteWikiname(eachPageName),eachPageName)
 207         print >>anOutStream, "</ol></td></tr></table>"
 208 
 209 
 210 
 211 class FuzzyAndIncludingNameSearcher(SingleSideSearcher):
 212     def __init__(self,aPageName,anAllPageList):
 213         self._searcher1=FuzzyNameSearcher(aPageName,anAllPageList)
 214         self._searcher2=IncludingNameSearcher(aPageName,anAllPageList)
 215     def searchPages(self,isSorted=0):
 216         self._searcher1.searchPages(isSorted)
 217         self._searcher2.searchPages(isSorted)
 218         self.similarPages=getUnion(self._searcher1.getSimilarPages(),self._searcher2.getSimilarPages())
 219         if isSorted:
 220             self.similarPages.sort()
 221 
 222 #class PagesSharingInitialOrFinalWord:
 223 #    def __init__(self,aPageName):
 224 #        self.pageName=aPageName
 225 #
 226 #    def getMatchesStartEnd(self, s_re=re.compile('([%s][%s]+)' % (config.upperletters, config.lowerletters)),
 227 #                                 e_re=re.compile('([%s][%s]+)$' % (config.upperletters, config.lowerletters)),
 228 #                                 s_re2=re.compile('([^ ]+)' ),
 229 #                                 e_re2=re.compile('([^ ]+)$')):
 230 #        # figure the start and end words
 231 #        try:
 232 #            match = s_re.match(self.pageName)
 233 #            start = match.group(1)
 234 #            s_len = len(start)
 235 #            match = e_re.search(self.pageName)
 236 #            end = match.group(1)
 237 #            e_len = len(end)
 238 #        except:
 239 #            match = s_re2.match(self.pageName)
 240 #            start = match.group(1)
 241 #            s_len = len(start)
 242 #            match = e_re2.search(self.pageName)
 243 #            end = match.group(1)
 244 #            e_len = len(end)
 245 #
 246 #        # find any matching pages
 247 #        matches = {}
 248 #        for anypage in wikiutil.getPageList(config.text_dir):
 249 #            if anypage == self.pageName:  # skip current page
 250 #                continue
 251 #            p_len = len(anypage)
 252 #            if p_len > s_len and anypage[:s_len] == start:
 253 #                matches[anypage] = 1
 254 #            if p_len > e_len and anypage[-e_len:] == end:
 255 #                matches[anypage] = matches.get(anypage, 0) + 2
 256 #        return matches,start,end
 257 #
 258 #    def getLikePagesOutput(self,aMatches,aStart,anEnd):
 259 #        # more than one match, list 'em
 260 #        st=StringIO()
 261 #        temp=sys.stdout
 262 #        sys.stdout=st
 263 #        keys = aMatches.keys()
 264 #        keys.sort()
 265 #        self.showMatches(aMatches, keys, 3, "%s...%s" % (aStart, anEnd))
 266 #        self.showMatches(aMatches, keys, 1, "%s..." % (aStart,))
 267 #        self.showMatches(aMatches, keys, 2, "...%s" % (anEnd,))
 268 #        texts=st.getvalue()
 269 #        sys.stdout=temp
 270 #        return texts
 271 #
 272 #    def showMatches(self,matches, keys, match, title):
 273 #        matchcount = matches.values().count(match)
 274 #
 275 #        if matchcount:
 276 #            print '<b>' + _('%(matchcount)d %(matches)s for "%(title)s"') % {
 277 #                'matchcount': matchcount,
 278 #                'matches': (_(' match'), _(' matches'))[matchcount != 1],
 279 #                'title': title} + '</b>'
 280 #            print "<ul>"
 281 #            for key in keys:
 282 #                if matches[key] == match:
 283 #                    page = Page(key)
 284 #                    print '<li><a href="%s">%s</a>' % (
 285 #                        wikiutil.quoteWikiname(page.page_name),
 286 #                        page.split_title())
 287 #            print "</ul>"
 288 #
 289 
 290 #def executeOld(pagename, form):
 291 #    pagesInitialFinal=PagesSharingInitialOrFinalWord(pagename)
 292 #    matches,start,end=pagesInitialFinal.getMatchesStartEnd()
 293 #
 294 #    # no matches :(
 295 #    if not matches:
 296 #        return Page(pagename).send_page(form,
 297 #            msg='<strong>' + _('No pages match "%s"!') % (pagename,) + '</strong>')
 298 #
 299 #    # one match - display it
 300 #    if len(matches) == 1:
 301 #        return Page(matches.keys()[0]).send_page(form,
 302 #            msg='<strong>' + _('Exactly one matching page for "%s" found!') % (pagename,) + '</strong>')
 303 #
 304 #    webapi.http_headers()
 305 #    wikiutil.send_title(_('Multiple matches for "%s...%s"') % (start, end),  
 306 #        pagename=pagename)
 307 #
 308 #    sys.stdout.write(pagesInitialFinal.getLikePagesOutput(matches,start,end))
 309 #    wikiutil.send_footer(pagename)
 310 #

MoinMoin: ActionMarket/FuzzyLikePages/Code (last edited 2008-01-19 01:03:12 by KennethLV)