Attachment 'moin_export.py'
Download 1 # -*- coding: iso-8859-1 -*-
2 """
3 MoinMoin - Dump a MoinMoin wiki to static pages
4
5 Copyright (c) 2002, 2003 by Jürgen Hermann <jh@web.de>
6 All rights reserved, see COPYING for details.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 ORS modifications:
19 12.01.04 RS upgrade to 1.1., mark all previous ORS changes
20 20.01.04 RS repair indent errors
21 29.06.04 RS handle KeyboardInterrupt
22 07.09.05 RS upgrade to 1.3.5
23 24.09.05 RS use StringIO to buffer output, enable postprocessing
24 """
25 __version__ = "20040329"
26
27 # use this if your moin installation is not in sys.path:
28 #############################################################################
29 ### Helpers
30 #############################################################################
31
32 ##HTML_SUFFIX = ".html" # perhaps put this in config.py as html_suffix?
33
34 logo_html = '<img src="moinmoin.png">'
35
36 url_prefix = "."
37
38 page_template = u'''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
39 <html>
40 <head>
41 <meta http-equiv="content-type" content="text/html; charset=%(charset)s">
42 <title>%(pagename)s</title>
43 <link rel="stylesheet" type="text/css" media="all" charset="utf-8" href="%(theme)s/css/common.css">
44 <link rel="stylesheet" type="text/css" media="screen" charset="utf-8" href="%(theme)s/css/screen.css">
45 <link rel="stylesheet" type="text/css" media="print" charset="utf-8" href="%(theme)s/css/print.css">
46 </head>
47 <body>
48 <table>
49 <tr>
50 <td>
51 %(logo_html)s
52 </td>
53 <td>
54 %(navibar_html)s
55 </td>
56 </tr>
57 </table>
58 <hr>
59 %(pagehtml)s
60 <hr>
61 %(timestamp)s
62 </body>
63 </html>
64 '''
65
66 SUFFIX={'html':".html",'plain':".txt",'word':".docdmp",'hhk':".hhk",'hhc':".hhc",'tal':".html",'xxml':".xml",'pdf':".pdfdmp"}
67 #############################################################################
68 ### Main program
69 #############################################################################
70 import sys,re,string
71 import os, time, StringIO, codecs, shutil
72 from MoinMoin import config, search, wikiutil, Page
73 from MoinMoin.scripts import _util
74 #from MoinMoin.util import pysupport
75 from MoinMoin.request_ors import RequestExportCLI
76
77 class MoinExporter(_util.Script):
78 def __init__(self):
79 _util.Script.__init__(self, __name__, "[options] <target-directory>")
80 self.pageoptions={}
81 self.exp_format="html"
82 # self.request=None
83 # --config=DIR
84 self.parser.add_option(
85 "--config", metavar="DIR", dest="configdir",
86 help="Path to wikiconfig.py (or its directory)"
87 )
88
89 # --wiki=URL
90 self.parser.add_option(
91 "--wiki", metavar="WIKIURL", dest="wiki_url",
92 help="URL of wiki to dump (e.g. moinmaster.wikiwikiweb.de)"
93 )
94
95 # --page=NAME
96 self.parser.add_option(
97 "--page", metavar="NAME", dest="page",
98 help="Dump a single page (with possibly broken links)"
99 )
100
101 #RS additional options
102 # --pattern=REGEX
103 self.parser.add_option(
104 "--pattern", metavar="PATTERN", dest="pattern",
105 help="Dump all pages with this pattern"
106 )
107 # --format=FORMAT
108 self.parser.add_option(
109 "--format", metavar="FORMAT", dest="exportformat",
110 help="Use export format"
111 )
112 #RS end
113 #SYNC0
114 #RS refactored: separated code parts from parsing parts
115 def setOutputDir(self,outdir):
116 """
117 set and create output dir
118 """
119 self.outputdir = os.path.abspath(outdir)
120 if not os.path.isdir(self.outputdir):
121 try:
122 os.mkdir(self.outputdir)
123 _util.log("Created output directory '%s'!" % self.outputdir)
124 except OSError:
125 _util.fatal("Cannot create output directory '%s'!" % self.outputdir)
126 self.request.export_dir=self.outputdir
127
128 def setFormatter(self,exp_format="html"):
129 """
130 load formatter
131 """
132 from MoinMoin import wikiutil
133 # try to load the formatter
134 mimetype = u"text/%s" % exp_format
135 ## print "format set to %s" % mimetype
136 #RS extension handling
137 self.ext=SUFFIX.get(string.split(mimetype,"/")[1],'.html')
138 # qfn_file = lambda pagename, qfn=wikiutil.quoteWikiname: qfn(pagename) + self.ext
139
140 #modify behavior of wikiutil.quoteWikinameURL to add the extension
141 wikiutil.quoteWikinameURL = lambda pagename, qfn=wikiutil.quoteWikinameFS: (qfn(pagename)) + self.ext
142 #RS stop
143
144 Formatter=None
145 Formatter = wikiutil.importPlugin(self.request.cfg, "formatter",
146 mimetype.translate({ord(u'/'): u'_', ord(u'.'): u'_'}), "Formatter")
147 # Formatter = pysupport.importName("MoinMoin.formatter." +
148 # string.translate(mimetype, string.maketrans('/.', '__')), "Formatter")
149 if Formatter is None:
150 # default to plain text formatter
151 sys.stderr.write("\nfallback to text/html!")
152 del Formatter
153 mimetype = "text/html"
154 from MoinMoin.formatter.text_html import Formatter
155
156 # self.pageoptions["formatter"]=Formatter(self.request,targetdir=self.outputdir)
157 # self.pageoptions["formatter"]=Formatter(self.request)
158 # self.pageoptions["export_mode"]=1
159 # self.formatter=self.pageoptions["formatter"]
160 sys.stderr.write("\nformatter:%s" % str(Formatter))
161 self.formatter=Formatter(self.request,targetdir=self.outputdir)
162 self.pageoptions["formatter"]=self.formatter
163
164 self.exp_format=exp_format
165 sys.stderr.write("\nself.formatter:%s" % str(self.formatter))
166
167 def write(self, *data):
168 """ Write to output stream.
169 """
170 for piece in data:
171 self.out.write(piece)
172
173
174
175 def dumpPage(self,pagename,targetfile):
176 """
177 export the single page
178 """
179 from MoinMoin import Page
180 #RS attachment handling
181 from MoinMoin.action import AttachFile
182 #RS end
183 _util.log('Writing "%s"...' % targetfile)
184 #RS output
185 self.request.write('Writing page "%s" to "%s"...' % (pagename,targetfile))
186 # return
187 #RS end
188 self.request.pragma={}
189
190 errcnt=0
191 request=self.request
192 errlog=self.errlog
193 outputdir=self.outputdir
194 filepath = os.path.join(self.outputdir, targetfile)
195 # self.out = codecs.open(filepath, 'w', config.charset)
196 self.out=StringIO.StringIO()
197 self.result=""
198 # self.normwrite=self.request.write
199 try:
200 #RS options
201 # page = Page.Page(self.request,pagename,**self.pageoptions)
202 page = Page.Page(request, pagename,**self.pageoptions)
203 #RS end
204 #
205 try:
206 request.reset()
207 # out = StringIO.StringIO()
208 request.redirect(self.out)
209 page.send_page(request, count_hit=0, content_only=0)
210 # pagehtml = out.getvalue()
211 request.exportAttachments(pagename)
212 request.redirect()
213 except KeyboardInterrupt:
214 print >>sys.stderr, "*** keyboard interrupt,terminating while writing page %s!" % pagename
215 print >>self.errlog, "~" * 78
216 request.redirect()
217 self.errlog.close()
218 sys.exit(1)
219 except:
220 errcnt = errcnt + 1
221 print >>sys.stderr, "*** Caught exception while writing page!"
222 print >>errlog, "~" * 78
223 print >>errlog, targetfile # page filename
224 import traceback
225 traceback.print_exc(None, errlog)
226 request.redirect()
227
228 #
229 # sys.stdout = self.out
230 # self.request.write=self.write
231 # try:
232 # page.send_page(self.request)
233 #RS handle KeyboardInterrupt
234 ## except KeyboardInterrupt:
235 ## print >>sys.stderr, "*** keyboard interrupt,terminating while writing page %s!" % pagename
236 ## print >>self.errlog, "~" * 78
237 ## self.out.close()
238 ## sys.stdout = sys.__stdout__
239 ## self.request.write=self.normwrite
240 ## self.errlog.close()
241 ## sys.exit(1)
242 ###RS end
243 ## except:
244 ## errcnt = errcnt + 1
245 ## print >>sys.stderr, "*** Caught exception while writing page %s!" % pagename
246 ## print >>self.errlog, "~" * 78
247 ## import traceback
248 ## traceback.print_exc(None, self.errlog)
249 #RS word cleanup
250 # Formatter=self.pageoptions.get("formatter",None)
251 Formatter=self.formatter
252 if Formatter!=None:
253 if vars(Formatter).has_key('word_host'):
254 if Formatter.word_host!=None:
255 Formatter.word_host.Quit()
256 finally:
257 # timestamp = time.strftime("%Y-%m-%d %H:%M")
258 # filepath = os.path.join(outputdir, file)
259 # fileout = codecs.open(filepath, 'w', config.charset)
260 # fileout.write(page_template % {
261 ## 'charset': config.charset,
262 ## 'pagename': pagename,
263 ## 'pagehtml': pagehtml,
264 ## 'logo_html': logo_html,
265 ## 'navibar_html': navibar_html,
266 ## 'timestamp': timestamp,
267 ## 'theme': request.cfg.theme_default,
268 # })
269 # fileout.close()
270
271 self.result=self.out.getvalue()
272 self.out.close()
273 ## sys.stdout = sys.__stdout__
274 ## self.request.write=self.normwrite
275 #RS attachment
276 # AttachFile.dump_filelist(self.request, pagename,os.path.dirname(self.outputdir))
277 #RS end
278
279 #RS end
280 # if errcnt:
281 # print >>sys.stderr, "*** %d error(s) occurred, see '%s'!" % (errcnt, errfile)
282 self.outf = codecs.open(filepath, 'w', config.charset)
283 self.outf.write(self.result)
284 self.outf.close()
285 return self.result
286
287 def postprocessPage(self,pagename,file,output):
288 """
289 process the dumpPage result
290 """
291 if self.exp_format=="pdf":
292 pass
293 # output is a string with embedded tags, this could be processed similar to the odyssee sample
294 # just line-by line and handling paging
295 ptitle = wikiutil.quoteWikinameFS(pname)
296 pfile = ptitle + ".pdf"
297 pfilepath = os.path.abspath(os.path.join(exporter.outputdir, pfile))
298 pauthor = exporter.request.user.name
299 #fuer testzwecke den outputstring ueberschreiben
300 ## output ="""
301 ##Provided by The Internet Classics Archive.
302 ##See bottom for copyright. Available online at
303 ## http://classics.mit.edu//Homer/odyssey.html
304 ##
305 ##The Odyssey
306 ##By Homer
307 ##
308 ##
309 ##Translated by Samuel Butler
310 ##
311 ##----------------------------------------------------------------------
312 ##
313 ##BOOK I
314 output ="""
315 BOOK 1
316 Das <sup>ist</sup> ein <sub>kleiner</sub> Teststring.
317 Er geht <u>ueber</u> <i>mehrere</i> <b>Zeilen</b>
318
319 --achtung, hier sollte nicht zu lesen sein, dass ein zweiter abschnitt beginnt!!--
320 BOOK 2
321
322 und das ist auch ganz gut so"""
323 from MoinMoin.formatter.text_pdf import parseOdyssey
324 parseOdyssey(output,pfilepath, ptitle, pauthor)
325
326 # pfile = wikiutil.quoteFilename(pname) + ".X.pdf"
327 # pfilepath = os.path.abspath(os.path.join(exporter.outputdir, pfile))
328 # canv = canvas.Canvas(pfilepath, invariant=1)
329 # canv.setPageCompression(1)
330 # drawPageFrame(canv,pagename)
331
332 #do some title page stuff
333 # canv.setFont("Times-Bold", 36)
334 # canv.drawCentredString(0.5 * A4[0], 7 * inch, pname)
335
336 # canv.setFont("Times-Bold", 18)
337 # canv.drawCentredString(0.5 * A4[0], 5 * inch, "Translated by %s" % exporter.request.user.name)
338
339 # canv.setFont("Times-Bold", 12)
340 # tx = canv.beginText(left_margin, 3 * inch)
341 # canv.showPage()
342 # canv.setFont('Times-Roman', 12)
343 # tx = canv.beginText(left_margin, top_margin - 0.5*inch)
344 # data=output.split("\n")
345 # verbose=1
346 # for line in data:
347 #this just does it the fast way...
348 # tx.textLine(line)
349 #this forces it to do text metrics, which would be the slow
350 #part if we were wrappng paragraphs.
351 #canv.textOut(line)
352 #canv.textLine('')
353
354 #page breaking
355 # y = tx.getY() #get y coordinate
356 # if y < bottom_margin + 0.5*inch:
357 # canv.drawText(tx)
358 # canv.showPage()
359 # drawPageFrame(canv,pagename)
360 # canv.setFont('Times-Roman', 12)
361 # tx = canv.beginText(left_margin, top_margin - 0.5*inch)
362
363 #page
364 # pg = canv.getPageNumber()
365 # if verbose and pg % 10 == 0:
366 # print 'formatted page %d' % canv.getPageNumber()
367 #
368 # if tx:
369 # canv.drawText(tx)
370 # canv.showPage()
371 # drawPageFrame(canv,pagename)
372 #
373 # if verbose:
374 # print 'about to write to disk...'
375 #
376 # canv.save()
377 return
378
379
380
381 def mainloop(self):
382 """ moin-exporter main code.
383 """
384
385 if len(sys.argv) == 1:
386 self.parser.print_help()
387 sys.exit(1)
388
389 if len(self.args) != 1:
390 self.parser.error("incorrect number of arguments")
391
392 #
393 ##SYNC1
394 #
395 # Load the configuration
396 #
397 configdir = self.options.configdir
398 #RS additional options
399 if self.options.pattern:
400 page_pattern = self.options.pattern
401 else:
402 page_pattern=".*"
403
404 if self.options.exportformat:
405 exp_format = self.options.exportformat
406 else:
407 exp_format = "html"
408
409
410
411 ## print "reading config from", configdir
412 if configdir:
413 if os.path.isfile(configdir):
414 configdir = os.path.dirname(configdir)
415 ## print "configdir now ",configdir
416 if not os.path.isdir(configdir):
417 _util.fatal("Bad path given to --config parameter")
418 configdir = os.path.abspath(configdir)
419 ## print "configdir now ",configdir
420 sys.path[0:0] = [configdir]
421 os.chdir(configdir)
422 ## print "importing config from", configdir
423 ## try:
424 ## from MoinMoin import config
425 ## except:
426 #### print "CFG ERR"
427 ## raise
428 ## if config.default_config:
429 ## _util.fatal("You have to be in the directory containing moin_config.py, "
430 ## "or use the --config option!")
431
432 # fix some values so we get relative paths in output html
433 # XXX maybe this should be configurable
434 # config.url_prefix = "../wiki"
435 # config.css_url = "../wiki/css/moinmoin.css"
436 #RS output
437 ## print "dumping wiki site:",config.sitename," using pattern:",page_pattern
438 #RS end
439
440 #
441 # Dump the wiki
442
443 sys.stderr.write("\n\nself.options.wiki_url:%s" % self.options.wiki_url)
444 # request = RequestExportCLI({'url':self.options.wiki_url})
445 request = RequestExportCLI(self.options.wiki_url)
446 request.form = request.args = request.setup_args()
447 # fix url_prefix so we get relative paths in output html
448 request.cfg.url_prefix = url_prefix
449 self.request=request
450 self.cfg=request.cfg
451 # Prepare output directory
452 #
453 #RS refactored code
454 self.setOutputDir(self.args[0])
455 #RS end
456 #
457 #RS user handling
458 from MoinMoin import user
459 from MoinMoin import wikiutil
460 #RS stop
461 # self.request = cgimain.createRequest()
462 #RS continue
463 # admin=user.getUserId('RobertSeeger')
464 # self.request.user=user.User(self.request,admin)
465 # self.request.user.show_topbottom=0
466
467 #RS end
468
469
470 #
471 #SYNC3
472
473 # import cgi
474 # self.request.form = cgi.FieldStorage(environ = {'QUERY_STRING': 'action=print&export=1'})
475 #RS refactored code
476 self.setFormatter(exp_format)
477 if exp_format in ["word","pdf"]:
478 self.request.cfg.default_markup="wiki_word"
479 #RS end
480
481
482 ## print "Filtering from %s pages" % (len(all_pages))
483 if self.options.page:
484 pages = [self.options.page]
485 else:
486 #RS continue
487
488 ## pages = list(wikiutil.getPageList(config.text_dir))
489 #RS filter pagelist by pattern
490 query = search.QueryParser(regex=1, titlesearch=1,
491 case=1).parse_query(page_pattern)
492 results = search.searchPages(self.request, query)
493 results.sortByPagename()
494 # pages= results.pageList(self.request, self.formatter)
495 pages= [(hit.page_name) for hit in results.hits]
496
497 #RS stop
498 ## all_pages = list(wikiutil.getPageList(config.text_dir))
499 ## if page_pattern:
500 ## try:
501 ## needle_re = re.compile(page_pattern, re.IGNORECASE)
502 ## except re.error, e:
503 ## print ('%s' %
504 ## _("ERROR in pattern regex '%s'") % (inc_pattern,), e)
505 ## pages = filter(needle_re.search, all_pages)
506 ## else:
507 ## pages=all_pages
508 #RS end
509 # pages.sort()
510 ## print "Dumping %s pages" % (len(pages))
511
512 errfile = os.path.join(self.outputdir, 'error.log')
513 self.errlog = open(errfile, 'w')
514 errcnt = 0
515
516 for pagename in pages:
517 #RS refactored code
518 #RS extension handling
519 # file = wikiutil.quoteFilename(pagename) + self.ext
520 file = wikiutil.quoteWikinameURL(pagename) # we have the same name in URL and FS
521 _util.log('Writing "%s"...' % file)
522 #RS end
523 output=self.dumpPage(pagename,file)
524 self.postprocessPage(pagename,file,output)
525
526 #RS disabled, HTML only and nonsense for our purpose
527 #RS end
528 self.errlog.close()
529 if errcnt:
530 print >>sys.stderr, "*** %d error(s) occurred, see '%s'!" % (errcnt, errfile)
531
532 def run():
533 print "this is run from ROBERTS FARM"
534
535 MoinExporter().run()
536
537 if __name__ == "__main__":
538 run()
539
540 """
541
542
543
544
545
546 """
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.