Attachment 'xapian_re_search_opt.patch'
Download 1 diff -r 9fbcd746f135 MoinMoin/search/Xapian/indexing.py
2 --- a/MoinMoin/search/Xapian/indexing.py Fri Mar 12 13:52:00 2010 +0100
3 +++ b/MoinMoin/search/Xapian/indexing.py Sun Mar 14 09:25:38 2010 +0100
4 @@ -97,6 +97,7 @@
5 self.add_field_action('title', INDEX_FREETEXT, weight=100)
6 self.add_field_action('title', STORE_CONTENT)
7 self.add_field_action('content', INDEX_FREETEXT, spell=True)
8 + self.add_field_action('content', STORE_CONTENT)
9 self.add_field_action('domain', INDEX_EXACT)
10 self.add_field_action('domain', STORE_CONTENT)
11 self.add_field_action('lang', INDEX_EXACT)
12 @@ -297,7 +298,7 @@
13 @param page: the page instance
14 """
15 body = page.get_raw_body()
16 -
17 + # ToDo check category regex below
18 prev, next = (0, 1)
19 pos = 0
20 while next:
21 diff -r 9fbcd746f135 MoinMoin/search/queryparser/expressions.py
22 --- a/MoinMoin/search/queryparser/expressions.py Fri Mar 12 13:52:00 2010 +0100
23 +++ b/MoinMoin/search/queryparser/expressions.py Sun Mar 14 09:25:38 2010 +0100
24 @@ -140,7 +140,32 @@
25 if field_to_check in data:
26 for term in data[field_to_check]:
27 if self.search_re.match(term):
28 - queries.append(connection.query_field(field_to_check, term))
29 + # To speed up search we try to minimize the length of re search queries.
30 + # for some search patterns we can minimize the query string
31 + # e.g. if someone searches for .* then any word in term matches.
32 + # this means for showing results we can use the first word.
33 + # for a search term like .*text.* we try to match for
34 + # (?ims)(?P<text>.\w*%s.\w*). If the result of this match
35 + # also matches for the original search pattern we use this result
36 + # for the query string.
37 + pattern = self.search_re.pattern
38 + if pattern.startswith('.*') and pattern.endswith('.*'):
39 + new_pattern = pattern.replace('.*', '.\w*')
40 + embedded_rawstr = "(?ims)(?P<value>%s)" % new_pattern
41 + match_obj = re.search(embedded_rawstr, term)
42 + if match_obj:
43 + value = match_obj.groups('value')[0]
44 + if self.search_re.match(value):
45 + queries.append(connection.query_field(field_to_check, value))
46 + break
47 + elif pattern == '.*':
48 + queries.append(connection.query_field(field_to_check, term[0]))
49 + break
50 + # queries.append becomes very slow if too much content
51 + # is appended and your system can run out of memory.
52 + max_len = 3000
53 + queries.append(connection.query_field(field_to_check, term[:max_len]))
54 + break
55 else:
56 # Check all fields
57 for field, terms in data.iteritems():
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.