Attachment 'search.py'
Download 1 """
2 MoinMoin search engine
3
4 @copyright: Florian Festi TODO: email
5 @license: GNU GPL, see COPYING for details
6 """
7
8 import re, time, sys, StringIO
9 from MoinMoin import wikiutil, config
10 from MoinMoin.Page import Page
11
12 #try:
13 # import xapian
14 #except ImportError:
15 # xapian = False
16
17 #############################################################################
18 ### query objects
19 #############################################################################
20
21 class BaseExpression:
22 """ Base class for all search terms """
23
24 def __init__(self):
25 self.negated = 0
26
27 def __str__(self):
28 return unicode(self).encode(config.charset, 'replace')
29
30 def negate(self):
31 """ Negate the result of this term """
32 self.negated = 1
33
34 def search(self, page):
35 """ Search a page
36
37 Returns a list of Match objects or None if term didn't find
38 anything (viceversa if negate() was called). Terms containing
39 other terms must call this method to aggregate the results.
40 This Base class returns True (Match()) if not negated.
41 """
42 if self.negated:
43 # XXX why?
44 return [Match()]
45 else:
46 return None
47
48 def costs(self):
49 """ estimated time to calculate this term
50
51 Number is is relative to other terms and has no real unit.
52 It allows to do the fast searches first.
53 """
54 return 0
55
56 def highlight_re(self):
57 """ Return a regular expression of what the term searches for
58
59 Used to display the needle in the page.
60 """
61 return ''
62
63 def indexed_query(self):
64 """ Experimental/unused
65
66 May become interface to the indexing search engine
67 """
68 return self
69
70 def _build_re(self, pattern, use_re=False, case=False):
71 """ Make a regular expression out of a text pattern """
72 if case:
73 # case sensitive
74 flags = re.U
75 else:
76 # ignore case
77 flags = re.U | re.I
78
79 if use_re:
80 try:
81 self.search_re = re.compile(pattern, flags)
82 except re.error:
83 pattern = re.escape(pattern)
84 self.pattern = pattern
85 self.search_re = re.compile(pattern, flags)
86 else:
87 pattern = re.escape(pattern)
88 self.search_re = re.compile(pattern, flags)
89 self.pattern = pattern
90
91
92 class AndExpression(BaseExpression):
93 """ A term connecting several subterms with a logical AND """
94
95 operator = ' '
96
97 def __init__(self, *terms):
98 self._subterms = list(terms)
99 self._costs = 0
100 for t in self._subterms:
101 self._costs += t.costs()
102 self.negated = 0
103
104 def append(self, expression):
105 """ Append another term """
106 self._subterms.append(expression)
107 self._costs += expression.costs()
108
109 def subterms(self):
110 return self._subterms
111
112 def costs(self):
113 return self._costs
114
115 def __unicode__(self):
116 result = ''
117 for t in self._subterms:
118 result += self.operator + t
119 return u'[' + result[len(self.operator):] + u']'
120
121 def pageFilter(self):
122 """ Return a page filtering function
123
124 This function is used to filter page list before we search
125 it.
126
127 Return a function that get a page name, and return bool.
128 """
129 # Sort terms by cost, then get all title searches
130 self.sortByCost()
131 terms = [term for term in self._subterms
132 if isinstance(term, TitleSearch)]
133 if terms:
134 # Create and return a filter function
135 def filter(name):
136 """ A function that return True if all terms filter name """
137 for term in terms:
138 filter = term.pageFilter()
139 if not filter(name):
140 return False
141 return True
142 return filter
143
144 return None
145
146 def sortByCost(self):
147 tmp = [(term.costs(), term) for term in self._subterms]
148 tmp.sort()
149 self._subterms = [item[1] for item in tmp]
150
151 def search(self, page):
152 """ Search for each term, cheap searches first """
153 self.sortByCost()
154 matches = []
155 for term in self._subterms:
156 result = term.search(page)
157 if not result:
158 return None
159 matches.extend(result)
160 return matches
161
162 def highlight_re(self):
163 result = []
164 for s in self._subterms:
165 highlight_re = s.highlight_re()
166 if highlight_re: result.append(highlight_re)
167
168 return '|'.join(result)
169
170 def indexed_query(self):
171 indexed_terms = []
172 sub_terms = []
173 for term in self._subterms:
174 term = term.indexed_query()
175 if term is isinstance(BaseExpression):
176 subterms.append(term)
177 else:
178 indexed_terms.append(term)
179
180 if indexed_terms:
181
182 if not sub_terms:
183 return indexed_terms
184
185 def indexed_search(self):
186 if self.indexed_query:
187 indexed_result = self.indexed_query.indexed_query()
188 result = []
189 for foundpage in indexed_result:
190 matches = self.search(foundpage.page)
191 if matches:
192 result.append(foundpage)
193 foundpage.add_matches(matches)
194
195
196 class OrExpression(AndExpression):
197 """ A term connecting several subterms with a logical OR """
198
199 operator = ' or '
200
201 def search(self, page):
202 """ Search page with terms, cheap terms first
203
204 XXX Do we have any reason to sort here? we are not breaking out
205 of the search in any case.
206 """
207 self.sortByCost()
208 matches = []
209 for term in self._subterms:
210 result = term.search(page)
211 if result:
212 matches.extend(result)
213 return matches
214
215
216 class TextSearch(BaseExpression):
217 """ A term that does a normal text search
218
219 Both page content and the page title are searched, using an
220 additional TitleSearch term.
221 """
222
223 def __init__(self, pattern, use_re=False, case=False):
224 """ Init a text search
225
226 @param pattern: pattern to search for, ascii string or unicode
227 @param use_re: treat pattern as re of plain text, bool
228 @param case: do case sensitive search, bool
229 """
230 self._pattern = unicode(pattern)
231 self.negated = 0
232 self._build_re(self._pattern,
233 use_re=use_re, case=case)
234 self.titlesearch = TitleSearch(self._pattern, use_re=use_re, case=case)
235
236 def costs(self):
237 return 10000
238
239 def __unicode__(self):
240 if self.negated: neg = '-'
241 else: neg = ''
242 return u'%s"%s"' % (neg, unicode(self._pattern))
243
244 def highlight_re(self):
245 return u"(%s)" % self._pattern
246
247 def pageFilter(self):
248 """ Page filter function for single text search """
249 return None
250
251 def search(self, page):
252 matches = []
253
254 # Search in page name
255 results = self.titlesearch.search(page)
256 if results:
257 matches.extend(results)
258
259 # Search in page body
260 body = page.get_raw_body()
261 for match in self.search_re.finditer(body):
262 matches.append(TextMatch(match.start(),match.end()))
263
264 # Decide what to do with the results.
265 if ((self.negated and matches) or
266 (not self.negated and not matches)):
267 return None
268 elif matches:
269 return matches
270 else:
271 # XXX why not return None or empty list?
272 return [Match()]
273
274 def indexed_query(self):
275 return xapian.Query(self._pattern)
276
277
278 class TitleSearch(BaseExpression):
279 """ Term searches in pattern in page title only """
280
281 def __init__(self, pattern, use_re=False, case=False):
282 """ Init a title search
283
284 @param pattern: pattern to search for, ascii string or unicode
285 @param use_re: treat pattern as re of plain text, bool
286 @param case: do case sensitive search, bool
287 """
288 self._pattern = pattern
289 self.negated = 0
290 self._build_re(unicode(pattern), use_re=use_re, case=case)
291
292 def costs(self):
293 return 100
294
295 def __unicode__(self):
296 if self.negated: neg = '-'
297 else: neg = ''
298 return u'%s!"%s"' % (neg, unicode(self._pattern))
299
300 def highlight_re(self):
301 return u"(%s)" % self._pattern
302
303 def pageFilter(self):
304 """ Page filter function for single title search """
305 def filter(name):
306 match = self.search_re.search(name)
307 if ((self.negated and match) or
308 (not self.negated and not match)):
309 return False
310 return True
311 return filter
312
313 def search(self, page):
314 # Get matches in page name
315 matches = []
316 for match in self.search_re.finditer(page.page_name):
317 matches.append(TitleMatch(match.start(),match.end()))
318
319 if ((self.negated and matches) or
320 (not self.negated and not matches)):
321 return None
322 elif matches:
323 return matches
324 else:
325 # XXX why not return None or empty list?
326 return [Match()]
327
328 def indexed_query(self):
329 return self
330
331
332 class IndexedQuery:
333 """unused and experimental"""
334 def __init__(self, queryobject):
335 self.queryobject = queryobject
336 def indexed_search(self):
337 pass
338 # return list of results
339
340
341 ############################################################################
342 ### Results
343 ############################################################################
344
345 class Match:
346 """ Base class for all Matches (found pieces of pages).
347
348 This class represents a empty True value as returned from negated searches.
349 """
350 # Default match weight
351 _weight = 1.0
352
353 def __init__(self, start=0, end=0):
354 self.start = start
355 self.end = end
356
357 def __len__(self):
358 return self.end - self.start
359
360 def __eq__(self, other):
361 equal = (self.__class__ == other.__class__ and
362 self.start == other.start and
363 self.end == other.end)
364 return equal
365
366 def __ne__(self, other):
367 return not self.__eq__(other)
368
369 def view(self):
370 return ''
371
372 def weight(self):
373 return self._weight
374
375
376 class TextMatch(Match):
377 """ Represents a match in the page content """
378 pass
379
380
381 class MatchInAttachment(Match):
382 """ Represents a match in a attachment content
383
384 Not used yet.
385 """
386 pass
387
388
389 class TitleMatch(Match):
390 """ Represents a match in the page title
391
392 Has more weight as a match in the page content.
393 """
394 # Matches in titles are much more important in wikis. This setting
395 # seems to make all pages that have matches in the title to appear
396 # before pages that their title does not match.
397 _weight = 100.0
398
399
400 class FoundPage:
401 """ Represents a page in a search result """
402
403 def __init__(self, page_name, matches=None, page=None):
404 self.page_name = page_name
405 self.page = page
406 if matches is None:
407 matches = []
408 self._matches = matches
409
410 def weight(self, unique=1):
411 """ returns how important this page is for the terms searched for
412
413 Summarize the weight of all page matches
414
415 @param unique: ignore identical matches
416 @rtype: int
417 @return: page weight
418 """
419 weight = 0
420 for match in self.get_matches(unique=unique):
421 weight += match.weight()
422 # More sophisticated things to be added, like increase
423 # weight of near matches.
424 return weight
425
426 def add_matches(self, matches):
427 """ Add found matches """
428 self._matches.extend(matches)
429
430 def get_matches(self, unique=1, sort='start', type=Match):
431 """ Return all matches of type sorted by sort
432
433 @param unique: return only unique matches (bool)
434 @param sort: match attribute to sort by (string)
435 @param type: type of match to return (Match or sub class)
436 @rtype: list
437 @return: list of matches
438 """
439 if unique:
440 matches = self._unique_matches(type=type)
441 if sort == 'start':
442 # matches already sorted by match.start, finished.
443 return matches
444 else:
445 matches = self._matches
446
447 # Filter by type and sort by sort using fast schwartzian
448 # transform.
449 if sort == 'start':
450 tmp = [(match.start, match) for match in matches
451 if instance(match, type)]
452 else:
453 tmp = [(match.weight(), match) for match in matches
454 if instance(match, type)]
455 tmp.sort()
456 if sort == 'weight':
457 tmp.reverse()
458 matches = [item[1] for item in tmp]
459
460 return matches
461
462 def _unique_matches(self, type=Match):
463 """ Get a list of unique matches of type
464
465 The result is sorted by match.start, because its easy to remove
466 duplicates like this.
467
468 @param type: type of match to return
469 @rtype: list
470 @return: list of matches of type, sorted by match.start
471 """
472 # Filter by type and sort by match.start using fast schwartzian
473 # transform.
474 tmp = [(match.start, match) for match in self._matches
475 if isinstance(match, type)]
476 tmp.sort()
477
478 if not len(tmp):
479 return []
480
481 # Get first match into matches list
482 matches = [tmp[0][1]]
483
484 # Add rest of matches ignoring identical matches
485 for item in tmp[1:]:
486 if item[1] == matches[-1]:
487 continue
488 matches.append(item[1])
489
490 return matches
491
492
493 class FoundAttachment(FoundPage):
494 """ Represent an attachment in search results """
495 pass
496
497
498 ##############################################################################
499 ### Parse Query
500 ##############################################################################
501
502
503 class QueryParser:
504 """
505 Converts a String into a tree of Query objects
506 using recursive top/down parsing
507 """
508
509 def __init__(self, **kw):
510 """
511 @keyword titlesearch: treat all terms as title searches
512 @keyword case: do case sensitive search
513 @keyword regex: treat all terms as regular expressions
514 """
515 self.titlesearch = kw.get('titlesearch', 0)
516 self.case = kw.get('case', 0)
517 self.regex = kw.get('regex', 0)
518
519 def parse_query(self, query):
520 """ transform an string into a tree of Query objects"""
521 self._query = query
522 result = self._or_expression()
523 if result is None:
524 result = BaseExpression()
525 return result
526
527 def _or_expression(self):
528 result = self._and_expression()
529 if self._query:
530 result = OrExpression(result)
531 while self._query:
532 q = self._and_expression()
533 if q:
534 result.append(q)
535 return result
536
537 def _and_expression(self):
538 result = None
539 while not result and self._query:
540 result = self._single_term()
541 term = self._single_term()
542 if term:
543 result = AndExpression(result, term)
544 else:
545 return result
546 term = self._single_term()
547 while term:
548 result.append(term)
549 term = self._single_term()
550 return result
551
552 def _single_term(self):
553 regex = (r'(?P<NEG>-?)\s*(' + # leading '-'
554 r'(?P<OPS>\(|\)|(or\b(?!$)))|' + # or, (, )
555 r'(?P<MOD>(\w+:)*)' +
556 r'(?P<TERM>("[^"]+")|' +
557 r"('[^']+')|(\S+)))") # search word itself
558 self._query = self._query.strip()
559 match = re.match(regex, self._query, re.U)
560 if not match:
561 return None
562 self._query = self._query[match.end():]
563 ops = match.group("OPS")
564 if ops == '(':
565 result = self._or_expression()
566 if match.group("NEG"): restult.negate()
567 return result
568 elif ops == ')':
569 return None
570 elif ops == 'or':
571 return None
572 modifiers = match.group('MOD').split(":")[:-1]
573 text = match.group('TERM')
574 if ((text[0] == text[-1] == '"') or
575 (text[0] == text[-1] == "'")): text = text[1:-1]
576
577 title_search = self.titlesearch
578 regex = self.regex
579 case = self.case
580
581 for m in modifiers:
582 if "title".startswith(m):
583 title_search = True
584 elif "regex".startswith(m):
585 regex = True
586 elif "case".startswith(m):
587 case = True
588
589 if title_search:
590 obj = TitleSearch(text, use_re=regex, case=case)
591 else:
592 obj = TextSearch(text, use_re=regex, case=case)
593
594 if match.group("NEG"):
595 obj.negate()
596 return obj
597
598
599 class SearchResults:
600 """ Manage search results, supply different views
601
602 Search results can hold valid search results and format them for
603 many requests, until the wiki content change.
604
605 For example, one might ask for full page list sorted from A to Z,
606 and then ask for the same list sorted from Z to A. Or sort results
607 by name and then by rank.
608 """
609 # Public functions --------------------------------------------------
610
611 def __init__(self, query, hits, pages, elapsed):
612 self.query = query # the query
613 self.hits = hits # hits list
614 self.sort = None # hits are unsorted initially
615 self.pages = pages # number of pages in the wiki
616 self.elapsed = elapsed # search time
617
618 def sortByWeight(self):
619 """ Sorts found pages by the weight of the matches """
620 tmp = [(hit.weight(), hit.page_name, hit) for hit in self.hits]
621 tmp.sort()
622 tmp.reverse()
623 self.hits = [item[2] for item in tmp]
624 self.sort = 'weight'
625
626 def sortByPagename(self):
627 """ Sorts a list of found pages alphabetical by page name """
628 tmp = [(hit.page_name, hit) for hit in self.hits]
629 tmp.sort()
630 self.hits = [item[1] for item in tmp]
631 self.sort = 'page_name'
632
633 def stats(self, request, formatter):
634 """ Return search statistics, formatted with formatter
635
636 @param request: current request
637 @param formatter: formatter to use
638 @rtype: unicode
639 @return formatted statistics
640 """
641 _ = request.getText
642 f = formatter
643 output = [
644 f.paragraph(1),
645 # TODO: update to "results of about" in 1.4
646 f.text(_("%(hits)d results out of %(pages)d pages.") %
647 {'hits': len(self.hits), 'pages': self.pages}),
648 u' (%s)' % f.text(_("%.2f seconds") % self.elapsed),
649 f.paragraph(0),
650 ]
651 return ''.join(output)
652
653 def pageList(self, request, formatter, info=0, numbered=1):
654 """ Format a list of found pages
655
656 @param request: current request
657 @param formatter: formatter to use
658 @param info: show match info in title
659 @param numbered: use numbered list for display
660 @rtype: unicode
661 @return formatted page list
662 """
663 self._reset(request, formatter)
664 f = formatter
665 write = self.buffer.write
666 if numbered:
667 list = f.number_list
668 else:
669 list = f.bullet_list
670 querystr = self.querystring()
671
672 # Add pages formatted as list
673 if self.hits:
674 write(list(1))
675
676 for page in self.hits:
677 matchInfo = ''
678 if info:
679 matchInfo = self.formatInfo(page)
680 item = [
681 f.listitem(1),
682 f.pagelink(1, page.page_name, querystr=querystr),
683 self.formatTitle(page),
684 f.pagelink(0, page.page_name),
685 matchInfo,
686 f.listitem(0),
687 ]
688 write(''.join(item))
689 write(list(0))
690
691 return self.getvalue()
692
693 def pageListWithContext(self, request, formatter, info=1, context=180,
694 maxlines=1):
695 """ Format a list of found pages with context
696
697 The default parameter values will create Google-like search
698 results, as this is the most known search interface. Good
699 interface is familiar interface, so unless we have much better
700 solution (we don't), being like Google is the way.
701
702 @param request: current request
703 @param formatter: formatter to use
704 @param info: show match info near the page link
705 @param context: how many characters to show around each match.
706 @param maxlines: how many contexts lines to show.
707 @rtype: unicode
708 @return formatted page list with context
709 """
710 self._reset(request, formatter)
711 f = formatter
712 write = self.buffer.write
713 querystr = self.querystring()
714
715 # Add pages formatted as definition list
716 if self.hits:
717 write(f.definition_list(1))
718
719 for page in self.hits:
720 matchInfo = ''
721 if info:
722 matchInfo = self.formatInfo(page)
723 item = [
724 f.definition_term(1),
725 f.pagelink(1, page.page_name, querystr=querystr),
726 self.formatTitle(page),
727 f.pagelink(0, page.page_name),
728 matchInfo,
729 f.definition_term(0),
730 f.definition_desc(1),
731 self.formatContext(page, context, maxlines),
732 f.definition_desc(0),
733 ]
734 write(''.join(item))
735 write(f.definition_list(0))
736
737 return self.getvalue()
738
739 # Private -----------------------------------------------------------
740
741 # This methods are not meant to be used by clients and may change
742 # without notice.
743
744 def formatContext(self, page, context, maxlines):
745 """ Format search context for each matched page
746
747 Try to show first maxlines interesting matches context.
748 """
749 f = self.formatter
750 if not page.page:
751 page.page = Page(self.request, page.page_name)
752 body = page.page.get_raw_body()
753 last = len(body) -1
754 lineCount = 0
755 output = []
756
757 # Get unique text matches sorted by match.start, try to ignore
758 # matches in page header, and show the first maxlines matches.
759 # TODO: when we implement weight algorithm for text matches, we
760 # should get the list of text matches sorted by weight and show
761 # the first maxlines matches.
762 matches = page.get_matches(unique=1, sort='start', type=TextMatch)
763 i, start = self.firstInterestingMatch(page, matches)
764
765 # Format context
766 while i < len(matches) and lineCount < maxlines:
767 match = matches[i]
768
769 # Get context range for this match
770 start, end = self.contextRange(context, match, start, last)
771
772 # Format context lines for matches. Each complete match in
773 # the context will be highlighted, and if the full match is
774 # in the context, we increase the index, and will not show
775 # same match again on a separate line.
776
777 output.append(f.text(u'...'))
778
779 # Get the index of the first match completely within the
780 # context.
781 for j in xrange(0, len(matches)):
782 if matches[j].start >= start:
783 break
784
785 # Add all matches in context and the text between them
786 while 1:
787 match = matches[j]
788 # Ignore matches behind the current position
789 if start < match.end:
790 # Append the text before match
791 if start < match.start:
792 output.append(f.text(body[start:match.start]))
793 # And the match
794 output.append(self.formatMatch(body, match, start))
795 start = match.end
796 # Get next match, but only if its completely within the context
797 if j < len(matches) - 1 and matches[j + 1].end <= end:
798 j += 1
799 else:
800 break
801
802 # Add text after last match and finish the line
803 if match.end < end:
804 output.append(f.text(body[match.end:end]))
805 output.append(f.text(u'...'))
806 output.append(f.linebreak(preformatted=0))
807
808 # Increase line and point to the next match
809 lineCount += 1
810 i = j + 1
811
812 output = ''.join(output)
813
814 if not output:
815 # Return the first context characters from the page text
816 output = f.text(page.page.getPageText(length=context))
817 output = output.strip()
818 if not output:
819 # This is a page with no text, only header, for example,
820 # a redirect page.
821 output = f.text(page.page.getPageHeader(length=context))
822
823 return output
824
825 def firstInterestingMatch(self, page, matches):
826 """ Return the first interesting match
827
828 This function is needed only because we don't have yet a weight
829 algorithm for page text matches.
830
831 Try to find the first match in the page text. If we can't find
832 one, we return the first match and start=0.
833
834 @rtype: tuple
835 @return: index of first match, start of text
836 """
837 header = page.page.getPageHeader()
838 start = len(header)
839 # Find first match after start
840 for i in xrange(len(matches)):
841 if matches[i].start >= start:
842 return i, start
843 return 0, 0
844
845 def contextRange(self, context, match, start, last):
846 """ Compute context range
847
848 Add context around each match. If there is no room for context
849 before or after the match, show more context on the other side.
850
851 @param context: context length
852 @param match: current match
853 @param start: context should not start before that index, unless
854 end is past the last character.
855 @param last: last character index
856 @rtype: tuple
857 @return: start, end of context
858 """
859 # Start by giving equal context on both sides of match
860 contextlen = max(context - len(match), 0)
861 cstart = match.start - contextlen / 2
862 cend = match.end + contextlen / 2
863
864 # If context start before start, give more context on end
865 if cstart < start:
866 cend += start - cstart
867 cstart = start
868
869 # But if end if after last, give back context to start
870 if cend > last:
871 cstart -= cend - last
872 cend = last
873
874 # Keep context start positive for very short texts
875 cstart = max(cstart, 0)
876
877 return cstart, cend
878
879 def formatTitle(self, page):
880 """ Format page title
881
882 Invoke format match on all unique matches in page title.
883
884 @param page: found page
885 @rtype: unicode
886 @return: formated title
887 """
888 # Get unique title matches sorted by match.start
889 matches = page.get_matches(unique=1, sort='start', type=TitleMatch)
890
891 # Format
892 pagename = page.page_name
893 f = self.formatter
894 output = []
895 start = 0
896 for match in matches:
897 # Ignore matches behind the current position
898 if start < match.end:
899 # Append the text before the match
900 if start < match.start:
901 output.append(f.text(pagename[start:match.start]))
902 # And the match
903 output.append(self.formatMatch(pagename, match, start))
904 start = match.end
905 # Add text after match
906 if start < len(pagename):
907 output.append(f.text(pagename[start:]))
908
909 return ''.join(output)
910
911 def formatMatch(self, body, match, location):
912 """ Format single match in text
913
914 Format the part of the match after the current location in the
915 text. Matches behind location are ignored and an empty string is
916 returned.
917
918 @param text: text containing match
919 @param match: search match in text
920 @param location: current location in text
921 @rtype: unicode
922 @return: formated match or empty string
923 """
924 start = max(location, match.start)
925 if start < match.end:
926 f = self.formatter
927 output = [
928 f.strong(1),
929 f.text(body[start:match.end]),
930 f.strong(0),
931 ]
932 return ''.join(output)
933 return ''
934
935 def querystring(self):
936 """ Return query string, used in the page link """
937 from MoinMoin.util import web
938
939 querystr = {'highlight': self.query.highlight_re()}
940 querystr = web.makeQueryString(querystr)
941 querystr = wikiutil.escape(querystr)
942 return querystr
943
944 def formatInfo(self, page):
945 """ Return formated match info """
946 # TODO: this will not work with non-html formats
947 template = u'<span class="info"> . . . %s %s</span>'
948 # Count number of unique matches in text of all types
949 count = len(page.get_matches(unique=1))
950 info = template % (count, self.matchLabel[count != 1])
951 return self.formatter.rawHTML(info)
952
953 def getvalue(self):
954 """ Return output in div with css class """
955 write = self.request.write
956 # TODO: this will not work with other formatter then
957 # text_html. we should add a div/section creation method to all
958 # formatters.
959 value = [
960 self.formatter.open('div', attr={'class': 'searchresults'}),
961 self.buffer.getvalue(),
962 self.formatter.close('div'),
963 ]
964 return '\n'.join(value)
965
966 def _reset(self, request, formatter):
967 """ Update internal state before new output
968
969 Do not calls this, it should be called only by the instance
970 code.
971
972 Each request might need different translations or other user
973 preferences.
974 """
975 self.buffer = StringIO.StringIO()
976 self.formatter = formatter
977 self.request = request
978 # Use 1 match, 2 matches...
979 _ = request.getText
980 self.matchLabel = (_('match'), _('matches'))
981
982
983 ##############################################################################
984 ### Searching
985 ##############################################################################
986
987 def searchPages(request, query, **kw):
988 """
989 Search the text of all pages for query.
990 @param query: the expression we want to search for
991 @rtype: SearchResults instance
992 @return: search results
993 """
994 from MoinMoin.Page import Page
995 hits = []
996
997 start = time.time()
998
999 filter = query.pageFilter()
1000 if filter:
1001 # Get a list of readable pages, filtered by query page filter.
1002 pages = request.rootpage.getPageList(filter=filter)
1003 else:
1004 # Get an unfiltered list, then filter the hits. This works much
1005 # faster for common cases, and is even faster when you can't
1006 # read any page! This might change if we cache the page list,
1007 # or storage will be faster.
1008 pages = request.rootpage.getPageList(user='', exists=0)
1009
1010 # Search through pages
1011 for name in pages:
1012 page = Page(request, name)
1013 result = query.search(page)
1014 if result:
1015 if not filter:
1016 # Filter deleted pages or pages the user can't read.
1017 if not (page.exists() and request.user.may.read(name)):
1018 continue
1019 hits.append(FoundPage(name, result))
1020
1021 elapsed = time.time() - start
1022 count = request.rootpage.getPageCount()
1023 results = SearchResults(query, hits, count, elapsed)
1024 return results
1025
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.