%(pagename)s

# -*- coding: iso-8859-1 -*- """ MoinMoin - Dump a MoinMoin wiki to static pages Copyright (c) 2002, 2003 by Jürgen Hermann All rights reserved, see COPYING for details. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ORS modifications: 12.01.04 RS upgrade to 1.1., mark all previous ORS changes 20.01.04 RS repair indent errors 29.06.04 RS handle KeyboardInterrupt 07.09.05 RS upgrade to 1.3.5 24.09.05 RS use StringIO to buffer output, enable postprocessing """ __version__ = "20040329" # use this if your moin installation is not in sys.path: ############################################################################# ### Helpers ############################################################################# ##HTML_SUFFIX = ".html" # perhaps put this in config.py as html_suffix? logo_html = '

' url_prefix = "." page_template = u''' %(pagename)s

%(logo_html)s

%(navibar_html)s

%(pagehtml)s

%(timestamp)s ''' SUFFIX={'html':".html",'plain':".txt",'word':".docdmp",'hhk':".hhk",'hhc':".hhc",'tal':".html",'xxml':".xml",'pdf':".pdfdmp"} ############################################################################# ### Main program ############################################################################# import sys,re,string import os, time, StringIO, codecs, shutil from MoinMoin import config, search, wikiutil, Page from MoinMoin.scripts import _util #from MoinMoin.util import pysupport from MoinMoin.request_ors import RequestExportCLI class MoinExporter(_util.Script): def __init__(self): _util.Script.__init__(self, __name__, "[options] ") self.pageoptions={} self.exp_format="html" # self.request=None # --config=DIR self.parser.add_option( "--config", metavar="DIR", dest="configdir", help="Path to wikiconfig.py (or its directory)" ) # --wiki=URL self.parser.add_option( "--wiki", metavar="WIKIURL", dest="wiki_url", help="URL of wiki to dump (e.g. moinmaster.wikiwikiweb.de)" ) # --page=NAME self.parser.add_option( "--page", metavar="NAME", dest="page", help="Dump a single page (with possibly broken links)" ) #RS additional options # --pattern=REGEX self.parser.add_option( "--pattern", metavar="PATTERN", dest="pattern", help="Dump all pages with this pattern" ) # --format=FORMAT self.parser.add_option( "--format", metavar="FORMAT", dest="exportformat", help="Use export format" ) #RS end #SYNC0 #RS refactored: separated code parts from parsing parts def setOutputDir(self,outdir): """ set and create output dir """ self.outputdir = os.path.abspath(outdir) if not os.path.isdir(self.outputdir): try: os.mkdir(self.outputdir) _util.log("Created output directory '%s'!" % self.outputdir) except OSError: _util.fatal("Cannot create output directory '%s'!" % self.outputdir) self.request.export_dir=self.outputdir def setFormatter(self,exp_format="html"): """ load formatter """ from MoinMoin import wikiutil # try to load the formatter mimetype = u"text/%s" % exp_format ## print "format set to %s" % mimetype #RS extension handling self.ext=SUFFIX.get(string.split(mimetype,"/")[1],'.html') # qfn_file = lambda pagename, qfn=wikiutil.quoteWikiname: qfn(pagename) + self.ext #modify behavior of wikiutil.quoteWikinameURL to add the extension wikiutil.quoteWikinameURL = lambda pagename, qfn=wikiutil.quoteWikinameFS: (qfn(pagename)) + self.ext #RS stop Formatter=None Formatter = wikiutil.importPlugin(self.request.cfg, "formatter", mimetype.translate({ord(u'/'): u'_', ord(u'.'): u'_'}), "Formatter") # Formatter = pysupport.importName("MoinMoin.formatter." + # string.translate(mimetype, string.maketrans('/.', '__')), "Formatter") if Formatter is None: # default to plain text formatter sys.stderr.write("\nfallback to text/html!") del Formatter mimetype = "text/html" from MoinMoin.formatter.text_html import Formatter # self.pageoptions["formatter"]=Formatter(self.request,targetdir=self.outputdir) # self.pageoptions["formatter"]=Formatter(self.request) # self.pageoptions["export_mode"]=1 # self.formatter=self.pageoptions["formatter"] sys.stderr.write("\nformatter:%s" % str(Formatter)) self.formatter=Formatter(self.request,targetdir=self.outputdir) self.pageoptions["formatter"]=self.formatter self.exp_format=exp_format sys.stderr.write("\nself.formatter:%s" % str(self.formatter)) def write(self, *data): """ Write to output stream. """ for piece in data: self.out.write(piece) def dumpPage(self,pagename,targetfile): """ export the single page """ from MoinMoin import Page #RS attachment handling from MoinMoin.action import AttachFile #RS end _util.log('Writing "%s"...' % targetfile) #RS output self.request.write('Writing page "%s" to "%s"...' % (pagename,targetfile)) # return #RS end self.request.pragma={} errcnt=0 request=self.request errlog=self.errlog outputdir=self.outputdir filepath = os.path.join(self.outputdir, targetfile) # self.out = codecs.open(filepath, 'w', config.charset) self.out=StringIO.StringIO() self.result="" # self.normwrite=self.request.write try: #RS options # page = Page.Page(self.request,pagename,**self.pageoptions) page = Page.Page(request, pagename,**self.pageoptions) #RS end # try: request.reset() # out = StringIO.StringIO() request.redirect(self.out) page.send_page(request, count_hit=0, content_only=0) # pagehtml = out.getvalue() request.exportAttachments(pagename) request.redirect() except KeyboardInterrupt: print >>sys.stderr, "*** keyboard interrupt,terminating while writing page %s!" % pagename print >>self.errlog, "~" * 78 request.redirect() self.errlog.close() sys.exit(1) except: errcnt = errcnt + 1 print >>sys.stderr, "*** Caught exception while writing page!" print >>errlog, "~" * 78 print >>errlog, targetfile # page filename import traceback traceback.print_exc(None, errlog) request.redirect() # # sys.stdout = self.out # self.request.write=self.write # try: # page.send_page(self.request) #RS handle KeyboardInterrupt ## except KeyboardInterrupt: ## print >>sys.stderr, "*** keyboard interrupt,terminating while writing page %s!" % pagename ## print >>self.errlog, "~" * 78 ## self.out.close() ## sys.stdout = sys.__stdout__ ## self.request.write=self.normwrite ## self.errlog.close() ## sys.exit(1) ###RS end ## except: ## errcnt = errcnt + 1 ## print >>sys.stderr, "*** Caught exception while writing page %s!" % pagename ## print >>self.errlog, "~" * 78 ## import traceback ## traceback.print_exc(None, self.errlog) #RS word cleanup # Formatter=self.pageoptions.get("formatter",None) Formatter=self.formatter if Formatter!=None: if vars(Formatter).has_key('word_host'): if Formatter.word_host!=None: Formatter.word_host.Quit() finally: # timestamp = time.strftime("%Y-%m-%d %H:%M") # filepath = os.path.join(outputdir, file) # fileout = codecs.open(filepath, 'w', config.charset) # fileout.write(page_template % { ## 'charset': config.charset, ## 'pagename': pagename, ## 'pagehtml': pagehtml, ## 'logo_html': logo_html, ## 'navibar_html': navibar_html, ## 'timestamp': timestamp, ## 'theme': request.cfg.theme_default, # }) # fileout.close() self.result=self.out.getvalue() self.out.close() ## sys.stdout = sys.__stdout__ ## self.request.write=self.normwrite #RS attachment # AttachFile.dump_filelist(self.request, pagename,os.path.dirname(self.outputdir)) #RS end #RS end # if errcnt: # print >>sys.stderr, "*** %d error(s) occurred, see '%s'!" % (errcnt, errfile) self.outf = codecs.open(filepath, 'w', config.charset) self.outf.write(self.result) self.outf.close() return self.result def postprocessPage(self,pagename,file,output): """ process the dumpPage result """ if self.exp_format=="pdf": pass # output is a string with embedded tags, this could be processed similar to the odyssee sample # just line-by line and handling paging ptitle = wikiutil.quoteWikinameFS(pname) pfile = ptitle + ".pdf" pfilepath = os.path.abspath(os.path.join(exporter.outputdir, pfile)) pauthor = exporter.request.user.name #fuer testzwecke den outputstring ueberschreiben ## output =""" ##Provided by The Internet Classics Archive. ##See bottom for copyright. Available online at ## http://classics.mit.edu//Homer/odyssey.html ## ##The Odyssey ##By Homer ## ## ##Translated by Samuel Butler ## ##---------------------------------------------------------------------- ## ##BOOK I output =""" BOOK 1 Das ^ist ein _kleiner Teststring. Er geht ueber mehrere Zeilen --achtung, hier sollte nicht zu lesen sein, dass ein zweiter abschnitt beginnt!!-- BOOK 2 und das ist auch ganz gut so""" from MoinMoin.formatter.text_pdf import parseOdyssey parseOdyssey(output,pfilepath, ptitle, pauthor) # pfile = wikiutil.quoteFilename(pname) + ".X.pdf" # pfilepath = os.path.abspath(os.path.join(exporter.outputdir, pfile)) # canv = canvas.Canvas(pfilepath, invariant=1) # canv.setPageCompression(1) # drawPageFrame(canv,pagename) #do some title page stuff # canv.setFont("Times-Bold", 36) # canv.drawCentredString(0.5 * A4[0], 7 * inch, pname) # canv.setFont("Times-Bold", 18) # canv.drawCentredString(0.5 * A4[0], 5 * inch, "Translated by %s" % exporter.request.user.name) # canv.setFont("Times-Bold", 12) # tx = canv.beginText(left_margin, 3 * inch) # canv.showPage() # canv.setFont('Times-Roman', 12) # tx = canv.beginText(left_margin, top_margin - 0.5*inch) # data=output.split("\n") # verbose=1 # for line in data: #this just does it the fast way... # tx.textLine(line) #this forces it to do text metrics, which would be the slow #part if we were wrappng paragraphs. #canv.textOut(line) #canv.textLine('') #page breaking # y = tx.getY() #get y coordinate # if y < bottom_margin + 0.5*inch: # canv.drawText(tx) # canv.showPage() # drawPageFrame(canv,pagename) # canv.setFont('Times-Roman', 12) # tx = canv.beginText(left_margin, top_margin - 0.5*inch) #page # pg = canv.getPageNumber() # if verbose and pg % 10 == 0: # print 'formatted page %d' % canv.getPageNumber() # # if tx: # canv.drawText(tx) # canv.showPage() # drawPageFrame(canv,pagename) # # if verbose: # print 'about to write to disk...' # # canv.save() return def mainloop(self): """ moin-exporter main code. """ if len(sys.argv) == 1: self.parser.print_help() sys.exit(1) if len(self.args) != 1: self.parser.error("incorrect number of arguments") # ##SYNC1 # # Load the configuration # configdir = self.options.configdir #RS additional options if self.options.pattern: page_pattern = self.options.pattern else: page_pattern=".*" if self.options.exportformat: exp_format = self.options.exportformat else: exp_format = "html" ## print "reading config from", configdir if configdir: if os.path.isfile(configdir): configdir = os.path.dirname(configdir) ## print "configdir now ",configdir if not os.path.isdir(configdir): _util.fatal("Bad path given to --config parameter") configdir = os.path.abspath(configdir) ## print "configdir now ",configdir sys.path[0:0] = [configdir] os.chdir(configdir) ## print "importing config from", configdir ## try: ## from MoinMoin import config ## except: #### print "CFG ERR" ## raise ## if config.default_config: ## _util.fatal("You have to be in the directory containing moin_config.py, " ## "or use the --config option!") # fix some values so we get relative paths in output html # XXX maybe this should be configurable # config.url_prefix = "../wiki" # config.css_url = "../wiki/css/moinmoin.css" #RS output ## print "dumping wiki site:",config.sitename," using pattern:",page_pattern #RS end # # Dump the wiki sys.stderr.write("\n\nself.options.wiki_url:%s" % self.options.wiki_url) # request = RequestExportCLI({'url':self.options.wiki_url}) request = RequestExportCLI(self.options.wiki_url) request.form = request.args = request.setup_args() # fix url_prefix so we get relative paths in output html request.cfg.url_prefix = url_prefix self.request=request self.cfg=request.cfg # Prepare output directory # #RS refactored code self.setOutputDir(self.args[0]) #RS end # #RS user handling from MoinMoin import user from MoinMoin import wikiutil #RS stop # self.request = cgimain.createRequest() #RS continue # admin=user.getUserId('RobertSeeger') # self.request.user=user.User(self.request,admin) # self.request.user.show_topbottom=0 #RS end # #SYNC3 # import cgi # self.request.form = cgi.FieldStorage(environ = {'QUERY_STRING': 'action=print&export=1'}) #RS refactored code self.setFormatter(exp_format) if exp_format in ["word","pdf"]: self.request.cfg.default_markup="wiki_word" #RS end ## print "Filtering from %s pages" % (len(all_pages)) if self.options.page: pages = [self.options.page] else: #RS continue ## pages = list(wikiutil.getPageList(config.text_dir)) #RS filter pagelist by pattern query = search.QueryParser(regex=1, titlesearch=1, case=1).parse_query(page_pattern) results = search.searchPages(self.request, query) results.sortByPagename() # pages= results.pageList(self.request, self.formatter) pages= [(hit.page_name) for hit in results.hits] #RS stop ## all_pages = list(wikiutil.getPageList(config.text_dir)) ## if page_pattern: ## try: ## needle_re = re.compile(page_pattern, re.IGNORECASE) ## except re.error, e: ## print ('%s' % ## _("ERROR in pattern regex '%s'") % (inc_pattern,), e) ## pages = filter(needle_re.search, all_pages) ## else: ## pages=all_pages #RS end # pages.sort() ## print "Dumping %s pages" % (len(pages)) errfile = os.path.join(self.outputdir, 'error.log') self.errlog = open(errfile, 'w') errcnt = 0 for pagename in pages: #RS refactored code #RS extension handling # file = wikiutil.quoteFilename(pagename) + self.ext file = wikiutil.quoteWikinameURL(pagename) # we have the same name in URL and FS _util.log('Writing "%s"...' % file) #RS end output=self.dumpPage(pagename,file) self.postprocessPage(pagename,file,output) #RS disabled, HTML only and nonsense for our purpose #RS end self.errlog.close() if errcnt: print >>sys.stderr, "*** %d error(s) occurred, see '%s'!" % (errcnt, errfile) def run(): print "this is run from ROBERTS FARM" MoinExporter().run() if __name__ == "__main__": run() """ """