Attachment 'moin-1.3_attsearch.diff'
Download 1 diff -u -r -P /home/wr/install/moin-1.3/MoinMoin/action/AttachFile.py ./action/AttachFile.py
2 --- /home/wr/install/moin-1.3/MoinMoin/action/AttachFile.py 2004-08-18 07:00:26.000000000 +0200
3 +++ ./action/AttachFile.py 2004-08-25 15:59:38.727164243 +0200
4 @@ -51,19 +51,33 @@
5 """ Get directory where attachments for page `pagename` are stored.
6 """
7 if htdocs_access(request):
8 + print "a"
9 # direct file access via webserver, from public htdocs area
10 pagename = wikiutil.quoteWikinameFS(pagename)
11 attach_dir = os.path.join(request.cfg.attachments['dir'], pagename, "attachments")
12 else:
13 + print "b"
14 # send file via CGI, from page storage area
15 attach_dir = wikiutil.getPagePath(request, pagename, "attachments", check_create=create)
16
17 if create and not os.path.isdir(attach_dir):
18 filesys.makeDirs(attach_dir)
19
20 + print "attach_dir=",attach_dir
21 return attach_dir
22
23 +def getTextVersionDir(request, pagename, create=0):
24 + """ Get directory where the converted text version is stored.
25 + It is for PageName: data/cache/AttachSearch/PageName/filename
26 + """
27 + txt_dir = os.path.join(request.cfg.data_dir, "cache", "AttachSearch", pagename)
28 +
29 + if create and not os.path.isdir(txt_dir):
30 + filesys.makeDirs(txt_dir)
31
32 + return txt_dir
33 +
34 +
35 def getAttachUrl(pagename, filename, request, addts=0):
36 """ Get URL that points to attachment `filename` of page `pagename`.
37
38 diff -u -r -P /home/wr/install/moin-1.3/MoinMoin/action/fullsearch.py ./action/fullsearch.py
39 --- /home/wr/install/moin-1.3/MoinMoin/action/fullsearch.py 2004-08-18 07:00:31.000000000 +0200
40 +++ ./action/fullsearch.py 2004-08-23 16:33:20.000000000 +0200
41 @@ -38,7 +38,8 @@
42 query = search.QueryParser(literal=request.form.has_key('literal'),
43 case=case).parse_query(needle)
44
45 - hits = search.searchPages(request, query)
46 + # hits = search.searchPages(request, query)
47 + hits = search.searchEverything(request, query)
48
49 search.sort_by_weight(hits)
50 formatter = Formatter(request)
51 diff -u -r -P /home/wr/install/moin-1.3/MoinMoin/attach2txt/__init__.py ./attach2txt/__init__.py
52 --- /home/wr/install/moin-1.3/MoinMoin/attach2txt/__init__.py 1970-01-01 01:00:00.000000000 +0100
53 +++ ./attach2txt/__init__.py 2004-08-24 10:49:38.000000000 +0200
54 @@ -0,0 +1,16 @@
55 +# -*- coding: iso-8859-1 -*-
56 +"""
57 + MoinMoin - Attachment converter package
58 +
59 + @copyright: 2004 Willi Richert <w.richert@gmx.net>
60 + @license: GNU GPL, see COPYING for details.
61 +"""
62 +
63 +
64 +from MoinMoin.util import pysupport
65 +modules = pysupport.getPackageModules(__file__)
66 +print "modules in init.py:", modules
67 +
68 +import pdf2txt
69 +
70 +converter_mapping = {"pdf":pdf2txt.convert}
71
72 diff -u -r -P /home/wr/install/moin-1.3/MoinMoin/attach2txt/pdf2txt.py ./attach2txt/pdf2txt.py
73 --- /home/wr/install/moin-1.3/MoinMoin/attach2txt/pdf2txt.py 1970-01-01 01:00:00.000000000 +0100
74 +++ ./attach2txt/pdf2txt.py 2004-08-25 16:17:40.954215485 +0200
75 @@ -0,0 +1,26 @@
76 +# -*- coding: iso-8859-1 -*-
77 +"""
78 + MoinMoin - pdf to txt converter.
79 +
80 + You will need pdftotext (xpdf package) in the PATH. Works only on Linux.
81 +
82 + @copyright: 2004 by Willi Richert (w.richert@gmx.net)
83 + @license: GNU GPL, see COPYING for details.
84 +"""
85 +
86 +import os, mimetypes, time, urllib
87 +from MoinMoin import config, user, util, wikiutil
88 +from MoinMoin.Page import Page
89 +from MoinMoin.util import MoinMoinNoFooter, filesys
90 +
91 +converter_name = __name__.split('.')[-1]
92 +
93 +def convert(att_fn, txt_fn):
94 + print att_fn
95 + print 'pdftotext "%s" "%s"'%(att_fn, txt_fn)
96 + ret = os.system("pdftotext \"%s\" \"%s\""%(att_fn, txt_fn))
97 + if ret != 0:
98 + open(txt_fn, "w").close() # empty the file
99 + return 1
100 + else:
101 + return 0
102
103 diff -u -r -P /home/wr/install/moin-1.3/MoinMoin/Attachment.py ./Attachment.py
104 --- /home/wr/install/moin-1.3/MoinMoin/Attachment.py 1970-01-01 01:00:00.000000000 +0100
105 +++ ./Attachment.py 2004-08-25 16:14:50.309377429 +0200
106 @@ -0,0 +1,187 @@
107 +# -*- coding: iso-8859-1 -*-
108 +"""
109 + MoinMoin - Attachment class. Derived from Page.py.
110 +
111 + @copyright: 2004 by Willi Richert <w.richert@gmx.net>
112 + @license: GNU GPL, see COPYING for details.
113 +"""
114 +
115 +# Imports
116 +import os.path
117 +from MoinMoin import config
118 +#import MoinMoin.util.web
119 +from MoinMoin.logfile import eventlog
120 +from MoinMoin.action import AttachFile
121 +from MoinMoin.attach2txt import converter_mapping
122 +from MoinMoin.wikiutil import quoteWikinameFS
123 +
124 +class Attachment:
125 + """Attachment - Manage an (immutable) attachment associated with a page.
126 + For search, the attachments text versions are saved in
127 + data/cache/AttachSearch/PageName/filename
128 + """
129 +
130 + def __init__(self, request, att_name, page, **keywords):
131 + """
132 + Create attachment object.
133 +
134 + @param page_name: WikiName of the associated page
135 + @keyword: ignored
136 + """
137 + self.request = request
138 + self.att_name = att_name
139 +
140 + self._assoc_page_name = page.page_name
141 +
142 + self.att_filename = os.path.join(AttachFile.getAttachDir(self.request, self._assoc_page_name), self.att_name)
143 + txt_dir = AttachFile.getTextVersionDir(self.request, self._assoc_page_name, create=1)
144 + self.txt_filename = os.path.join(txt_dir, self.att_name+".txt")
145 +
146 + self.suffix = os.path.splitext(self.att_name)[1][1:] # suffix without the dot
147 + self._raw_body = None
148 + self._raw_body_modified = 0
149 + self.hilite_re = None
150 +
151 +
152 +
153 + def exists(self):
154 + """
155 + Does this page exist?
156 +
157 + @rtype: bool
158 + @return: true, if page exists
159 + """
160 + return os.path.exists(self.att_filename)
161 +
162 +
163 + def size(self):
164 + """
165 + Get Attachment size.
166 +
167 + @rtype: int
168 + @return: attachment size, 0 for non-existent pages.
169 + """
170 + if self._raw_body is not None:
171 + return len(self._raw_body)
172 +
173 + try:
174 + return os.path.getsize(self.att_filename)
175 + except EnvironmentError, e:
176 + import errno
177 + if e.errno == errno.ENOENT: return 0
178 + raise
179 +
180 + def getTextVersion(self, request, create=0):
181 + """ Returns the extracted textual content of the attachment, if possible.
182 + @param att: att string without pagename: e.g. "file.pdf"
183 + @rtype: string
184 + @return: Textual content of the attachment.
185 + """
186 + # if we've come so far, the attachment dir does exist together with the attachment
187 + #att_dir = AttachFile.getAttachDir(request, self._assoc_page_name)
188 + att_file = self.att_filename
189 + txt_file = self.txt_filename
190 +
191 + alreadyConverted=1
192 + try:
193 + print "considering:",att_file,txt_file
194 + if not os.path.isfile(txt_file):
195 + alreadyConverted=0
196 + print 1
197 + else:
198 + att_ctime = os.path.getmtime(att_file)
199 + txt_ctime = os.path.getmtime(txt_file)
200 +
201 + if att_ctime>txt_file:
202 + alreadyConverted=0
203 + print 2
204 + print 3
205 + except os.error:
206 + alreadyConverted=0
207 + print 4
208 +
209 + if not alreadyConverted:
210 + # we have to convert this attachment if the proper tools are available
211 + print "We have to convert the attachment %s"%att_file
212 + convertResult = converter_mapping[self.suffix](att_file, txt_file)
213 + else:
214 + convertResult = 0
215 +
216 + if alreadyConverted or convertResult==0:
217 + file = open(txt_file, "r")
218 + content = file.read()
219 + return content
220 + else:
221 + return ""
222 +
223 +
224 + def get_raw_body(self):
225 + """
226 + Load the raw textual version of the attachment. None if not convertable.
227 +
228 + @rtype: string
229 + @return: raw text contents of this attachment
230 + """
231 +
232 + if self._raw_body is None:
233 + att = self.getTextVersion(self.request, self.att_name)
234 + self.set_raw_body(att)
235 +
236 + return self._raw_body
237 +
238 +
239 + def set_raw_body(self, body, modified=0):
240 + """
241 + Set the raw body text (prevents loading from disk).
242 +
243 + @param body: raw body text
244 + @param modified: 1 means that we internally modified the raw text and
245 + that it is not in sync with the page file on disk.
246 + This is used e.g. by PageEditor when previewing the page.
247 + """
248 + self._raw_body = body
249 + self._raw_body_modified = modified
250 +
251 + def link_to(self, request, text=None, querystr=None, anchor=None, **kw):
252 + """
253 + Return HTML markup that links to this attachment.
254 + See wikiutil.link_tag() for possible keyword parameters.
255 +
256 + @param request: the request object
257 + @param text: inner text of the link
258 + @param querystr: the query string to add after a "?" after the url
259 + @param anchor: if specified, make a link to this anchor
260 + @keyword on: opening/closing tag only
261 + @keyword attachment_indicator: if 1, add attachment indicator after link tag
262 + @keyword css_class: css class to use
263 + @rtype: string
264 + @return: formatted link
265 + """
266 + text = text or self.split_title(request)
267 + fmt = getattr(self, 'formatter', None)
268 +
269 + url = wikiutil.quoteWikinameURL(self.page_name)
270 + if querystr:
271 + querystr = util.web.makeQueryString(querystr)
272 + url = "%s?%s" % (url, querystr)
273 + if anchor: url = "%s#%s" % (url, urllib.quote_plus(anchor.encode(config.charset)))
274 +
275 + # create a link to attachments if any exist
276 + attach_link = ''
277 + if kw.get('attachment_indicator', 0):
278 + from MoinMoin.action import AttachFile
279 + attach_link = AttachFile.getIndicator(request, self.page_name)
280 +
281 + if self.exists():
282 + return wikiutil.link_tag(request, url, text, formatter=fmt, **kw) + attach_link
283 + else:
284 + kw['css_class'] = 'nonexistent'
285 +
286 + if request.user.show_nonexist_qm:
287 + return wikiutil.link_tag(request, url,
288 + '?', formatter=fmt, **kw) + text + attach_link
289 + else:
290 + return wikiutil.link_tag(request, url, text, formatter=fmt, **kw) + attach_link
291 +
292 +
293 + AttachFile.getAttachUrl(self._assoc_page_name, self.att_name, request, addts=0)
294
295 diff -u -r -P /home/wr/install/moin-1.3/MoinMoin/formatter/text_html.py ./formatter/text_html.py
296 --- /home/wr/install/moin-1.3/MoinMoin/formatter/text_html.py 2004-08-18 07:00:33.000000000 +0200
297 +++ ./formatter/text_html.py 2004-08-24 09:02:59.000000000 +0200
298 @@ -100,6 +100,14 @@
299 apply(FormatterBase.pagelink, (self, on, pagename), kw)
300 return Page(self.request, pagename, formatter=self).link_to(self.request, on=on, **kw)
301
302 + def attachlink(self, on, attname='', **kw):
303 + """ Link to an attachment.
304 +
305 + See wikiutil.link_tag() for possible keyword parameters.
306 + """
307 + apply(FormatterBase.pagelink, (self, on, attname), kw)
308 + return Attachment(self.request, attname, formatter=self).link_to(self.request, on=on, **kw)
309 +
310 def interwikilink(self, on, interwiki='', pagename='', **kw):
311 if not on: return '</a>'
312
313 diff -u -r -P /home/wr/install/moin-1.3/MoinMoin/search.py ./search.py
314 --- /home/wr/install/moin-1.3/MoinMoin/search.py 2004-08-18 07:00:25.000000000 +0200
315 +++ ./search.py 2004-08-25 16:15:01.523658101 +0200
316 @@ -5,10 +5,12 @@
317 @license: GNU GPL, see COPYING for details
318 """
319
320 -import re, time, sys, urllib
321 +import re, time, sys, urllib, os
322 #sys.path.append('..')
323 from MoinMoin import wikiutil, config
324 from MoinMoin.Page import Page
325 +from MoinMoin.action import AttachFile
326 +from Attachment import Attachment
327
328 #try:
329 # import xapian
330 @@ -165,11 +167,11 @@
331 def highlight_re(self):
332 return u"(%s)" % self.pattern
333
334 - def search(self, page):
335 - body = page.get_raw_body()
336 + def search(self, obj): # obj is page or attachment
337 + body = obj.get_raw_body()
338
339 pos = 0
340 - fragments = self.titlesearch.search(page)
341 + fragments = self.titlesearch.search(obj)
342 if fragments is None: fragments = []
343 while 1:
344 match = self.search_re.search(body, pos)
345 @@ -205,8 +207,11 @@
346 def highlight_re(self):
347 return u"(%s)" % self.pattern
348
349 - def search(self, page):
350 - match = self.search_re.search(page.page_name)
351 + def search(self, obj):
352 + if isinstance(obj, Page):
353 + match = self.search_re.search(obj.page_name)
354 + else:
355 + match = self.search_re.search(obj.att_name)
356 if ((self.negated and match) or
357 (not self.negated and not match)):
358 return None
359 @@ -230,10 +235,9 @@
360 ### Results
361 ############################################################################
362
363 -
364 -class FoundPage:
365 - def __init__(self, page_name, matches=[], page=None):
366 - self.page_name = page_name
367 +class FoundObject:
368 + def __init__(self, name, matches=[], page=None):
369 + self.page_name = name
370 self.page = page
371 self._matches = matches
372
373 @@ -252,11 +256,20 @@
374 def get_matches(self):
375 return self._matches[:]
376
377 +
378 +class FoundPage(FoundObject):
379 + pass
380
381 -
382 -class FoundAttachment(FoundPage):
383 +class FoundAttachment(FoundObject):
384 + """
385 + The attachments text versions are saved in
386 + data/cache/AttachSearch/PageName/filename
387 + """
388 + # TODO: needs to be more attachment like
389 pass
390
391 +
392 +
393 class Match:
394 def __init__(self, start=0, end=0):
395 self.start = start
396 @@ -382,6 +395,40 @@
397
398 return hits
399
400 +def searchEverything(request, query, **kw):
401 + """
402 + Search the text of all pages and their attachment's content for query.
403 + @param query: the expression we want to search for
404 + @rtype: list
405 + @return: List of FoundPage objects
406 + """
407 + from MoinMoin.Page import Page
408 +
409 + hits = []
410 + all_pages = wikiutil.getPageList(request.cfg.text_dir)
411 + for page_name in all_pages:
412 + page = Page(request, page_name)
413 + if not request.user.may.read(page_name):
414 + continue
415 + result = query.search(page)
416 + if result:
417 + hits.append(FoundPage(page_name, result))
418 +
419 + # search now in all attachments of this page
420 + attach_dir = AttachFile.getAttachDir(request, page_name)
421 + if os.path.exists(attach_dir):
422 + att_list = os.listdir(attach_dir)
423 + for att in att_list:
424 + print "searchEverything: att=",att
425 +
426 + result = query.search(Attachment(request, att, page)) # TODO: argument hides as a page
427 + if result:
428 + # TODO: append not the page name but the attachment itself directly be means of
429 + # FoundAttachment (see the class stub above)
430 + hits.append(FoundAttachment(page_name, result))
431 +
432 + return hits
433 +
434
435 ##############################################################################
436 ### Sort results
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.