MoinMoin Exporter Package - Work in Progress
I do not have the time to do this the proper way - isolate the minimum necessary core patches and test this extensively, write unit tests, document it, do i18n...
However, as some people have asked for more info, I have at least created some patch files for all the affected modules and decided to publish the whole thing as-is.
Contents
Patches to core MoinMoin 1.3.5
I put them here as code snippets rather than attachments, so I can comment on questions later on.
MoinMoin.Page
I have reduced this to the core patches relevant for the exporter feature, however I did not do intensive testing of this reduced patch (my work environment contains a lot of other patches in this file). Reasons are commented in the patch. The patches are mostly done to correct the calling sequence for formatter functions (which is wrong in the original but does not matter in the default formatters), to avoid parser reset and to use the text/html formatter even if it is not set as default_formatter.
--- E:\Install\MoinMoin\moin-1.3.5.tar\moin-1.3.5\MoinMoin\Page.py Sun Jul 24 13:06:26 2005 +++ E:\moin13\patch\Page.py Mon Oct 24 15:02:58 2005 @@ -1,13 +1,15 @@ # -*- coding: iso-8859-1 -*- """ MoinMoin - Page class @copyright: 2000-2004 by Jürgen Hermann <jh@web.de> @license: GNU GPL, see COPYING for details. + contains core patches by RobertSeeger (#RS) for the MoinExporter feature + """ import StringIO, os, re, urllib, random, codecs from MoinMoin import config, caching, user, util, wikiutil from MoinMoin.logfile import eventlog from MoinMoin.util import filesys, web @@ -964,17 +967,23 @@ verb, args = (line[1:]+' ').split(' ', 1) verb = verb.lower() args = args.strip() # check the PIs if verb == "format": # markup format - pi_format, pi_formatargs = (args+' ').split(' ',1) - pi_format = pi_format.lower() - pi_formatargs = pi_formatargs.strip() +#RS for some exporter formats we use a modified wiki parser 'wiki_word' +# and we do have to ignore a page format instruction 'wiki' in this case +# to keep using our modified parser + m_pi_format, m_pi_formatargs = (args+' ').split(' ',1) + if m_pi_format!="wiki": + pi_format=m_pi_format + pi_format = pi_format.lower() + pi_formatargs = m_pi_formatargs.strip() +#RS end elif verb == "refresh": if self.cfg.refresh: try: mindelay, targetallowed = self.cfg.refresh args = args.split() if len(args) >= 1: delay = max(int(args[0]), mindelay) @@ -1118,14 +1128,25 @@ request.write(''.join(pi_formtext)) +#RS extensions for export mode + else: +#nearly same as default formatter but used in export + if self.formatter.mimetype=="text/html": + title = self.split_title(request) + wikiutil.send_title(request, title, page=self, link='', msg='', + pagename=self.page_name, print_mode=1, + media='print', pi_refresh=None, + allow_doubleclick=0,trail=None + ) +#RS end # try to load the parser Parser = wikiutil.importPlugin(self.request.cfg, "parser", self.pi_format, "Parser") if Parser is None: # default to plain text formatter (i.e. show the page source) del Parser @@ -1157,23 +1178,38 @@ if getattr(request, 'footnotes', None): from MoinMoin.macro.FootNote import emit_footnotes request.write(emit_footnotes(request, self.formatter)) # end wiki content div request.write(self.formatter.endContent()) +#RS for the exporter, self.formatter.endDocument() must be the last activity +# as some formatters really have to close the document. +# also, we should only do self.formatter.endDocument() if we also did +# self.formatter.startDocument() + +# doc_trailer = self.formatter.endDocument() +# # end document output - doc_trailer = self.formatter.endDocument() if not content_only: # send the page footer if self.default_formatter: wikiutil.send_footer(request, self.page_name, print_mode=print_mode) - - request.write(doc_trailer) - + doc_trailer = self.formatter.endDocument() + request.write(doc_trailer) + elif self.formatter.mimetype=="text/html": + wikiutil.send_footer(request, self.page_name, print_mode=print_mode) + doc_trailer = self.formatter.endDocument() + request.write(doc_trailer) + else: + doc_trailer = self.formatter.endDocument() + request.write(doc_trailer) + +#RS end + # cache the pagelinks if do_cache and self.default_formatter and page_exists: cache = caching.CacheEntry(request, self, 'pagelinks') if cache.needsUpdate(self._text_filename()): links = self.formatter.pagelinks cache.update('\n'.join(links) + '\n', True)
note that I had to remove an (insignificant) line containing '}'}'} from the generated diff, so the positions may be wrong, but you should apply those patches manually, anyway.
MoinMoin.parser.wiki
This is not so important, I do'nt even recall exactly why I did this, but it should not hurt.
--- E:\Install\MoinMoin\moin-1.3.5.tar\moin-1.3.5\MoinMoin\parser\wiki.py Sat Jul 30 14:51:12 2005 +++ E:\moin13\patch\parser\wiki.py Mon Oct 24 15:22:46 2005 @@ -1,13 +1,14 @@ # -*- coding: iso-8859-1 -*- """ MoinMoin - MoinMoin Wiki Markup Parser @copyright: 2000, 2001, 2002 by Jürgen Hermann <jh@web.de> @license: GNU GPL, see COPYING for details. + contains core patches by RobertSeeger (#RS) for the MoinExporter feature """ import os, re from MoinMoin import config, wikimacro, wikiutil from MoinMoin.Page import Page from MoinMoin.util import web @@ -526,15 +528,17 @@ """Handle definition lists.""" result = [] self._close_item(result) #self.inhibit_p = 1 self.in_dd = 1 result.extend([ self.formatter.definition_term(1), - self.formatter.text(match[1:-3]), +#RS ignore character formatting inside DT + self.formatter.text(match[1:-3].replace("'","")), +#RS end self.formatter.definition_term(0), self.formatter.definition_desc(1), ## CHANGE: no automatic paragraph ##self.formatter.paragraph(1) ]) return ''.join(result)
MoinMoin.formatter.base
I introduced a formatter property self.mimetype to be able to recognize a loaded formatter (e.g. in some of my macros I use the formatter in a different way depending on its mimetype. In addition, I defined a function pure to be called by my modified parser if it finds some "pure" text or characters, as well as a function nbsp to replace the hard use of in cases where the output format is not html or similar. I did this back in 1.1. when "pure" text was just ignored by default wiki parser and formatter, most likely this can be done better or is even not needed in 1.3.5.
--- E:\Install\MoinMoin\moin-1.3.5.tar\moin-1.3.5\MoinMoin\formatter\base.py Tue Jul 26 20:46:52 2005 +++ E:\moin13\patch\formatter\base.py Mon Oct 24 15:51:58 2005 @@ -1,13 +1,14 @@ # -*- coding: iso-8859-1 -*- """ MoinMoin - Formatter Base Class @copyright: 2000 - 2004 by Jürgen Hermann <jh@web.de> @license: GNU GPL, see COPYING for details. + contains core patches by RobertSeeger (#RS) for the MoinExporter feature """ from MoinMoin import wikiutil import re, types class FormatterBase: """ This defines the output interface used all over the rest of the code. @@ -18,15 +19,17 @@ """ hardspace = ' ' def __init__(self, request, **kw): self.request = request self._ = request.getText - +#RS additional property "mimetype" + self.mimetype="text/base" +#RS end self._store_pagelinks = kw.get('store_pagelinks', 0) self._terse = kw.get('terse', 0) self.pagelinks = [] self.in_p = 0 self.in_pre = 0 self._highlight_re = None self._base_depth = 0 @@ -107,14 +110,27 @@ return u'<img%s>' % attrstr def smiley(self, text): return text # Text and Text Attributes ########################################### +#RS additional handler for pure and nbsp +#may be obsolete as text is now properly used by parser?? + def pure(self, text): + """ + this handles the "not in any markup" case + used in formatters with "side effects" + """ + return self._text(text) + + def nbsp(self): + return self.hardspace + +#RS end def text(self, text): if not self._highlight_re: return self._text(text) result = [] lastpos = 0 match = self._highlight_re.search(text)
MoinMoin.formatter.text_html
This redefines endDocument() to properly close the HTML tags in export mode (at least in 1.1. Moin did not do this properly in the page handling or formatter calls)
--- E:\Install\MoinMoin\moin-1.3.5.tar\moin-1.3.5\MoinMoin\formatter\text_html.py Tue Jul 26 20:46:52 2005 +++ E:\moin13\patch\formatter\text_html.py Mon Oct 24 15:53:43 2005 @@ -1,13 +1,14 @@ # -*- coding: iso-8859-1 -*- """ MoinMoin - "text/html+css" Formatter @copyright: 2000 - 2004 by Jürgen Hermann <jh@web.de> @license: GNU GPL, see COPYING for details. + contains core patches by RobertSeeger (#RS) for the MoinExporter feature """ from MoinMoin.formatter.base import FormatterBase from MoinMoin import wikiutil, i18n, config from MoinMoin.Page import Page class Formatter(FormatterBase): @@ -15,14 +16,17 @@ Send HTML data. """ hardspace = ' ' def __init__(self, request, **kw): apply(FormatterBase.__init__, (self, request), kw) +#RS additional property "mimetype" + self.mimetype="text/html" +#RS end # inline tags stack. When an inline tag is called, it goes into # the stack. When a block element starts, all inline tags in # the stack are closed. self._inlineStack = [] self._in_li = 0 @@ -144,14 +148,22 @@ # The code that calls us should keep correct calling order. if tag in self._inlineStack: self._inlineStack.remove(tag) return '</%s>' % tag # Public methods ################################################### + def startDocument(self, pagename): + return "" + + def endDocument(self): + if self.request.export_mode: + return "\n<!-- EOD -->\n</body>\n</html>\n" + return "" + def startContent(self, content_id='content', **kwargs): """ Start page content div """ # Setup id if content_id!='content': aid = 'top_%s' % (content_id,) @@ -733,10 +745,15 @@ attrs = self._checkTableAttr(attrs, '') return self.open(tag, newline=1, attr=attrs) return self.close(tag) def escapedText(self, text): return wikiutil.escape(text) +#RS additional handler for nbsp + def nbsp(self): + return ' ' + +#RS end + def rawHTML(self, markup): return markup -
Additions to the configuration
#the physical path to your /wiki url, needed to export smiley images etc. url_prefix_dir = 'E:\\moin13\\share\\moin\\htdocs\\'
You also need to have a way to configure the target export directory - at the moment I have not bothered to integrate this into the config, but rather define it in the used action or command batch.
Additional files
MoinMoin.request_ors
I have derived from the core request classes to encapsulate some general patches and enhancements. I did this only for the CGI (needed if the exporter is called via action) and CLI (needed for batch use of the exporter), you may have to derive for your type of server. The most relevant addition is the attachment export (I have seen today a patch to moin_dump.py on MoinMoinPatch that is quite similar in its effect).
MoinMoin.parser.wiki_word
I had to create a modified wiki parser that is only used in the exporter context and only for the text_word and text_pdf formatters. Basically it parses also for unformatted text words and characters which are not needed by the normal parser-formatter combination.
-- RobertSeeger 2005-11-03 17:40:18 updated version suppresses wrong paragraph insertions for my additional regex rules (otherwise table formatting breaks)
The Exporter Files
MoinMoin.scripts.moin_export
This is based on moin_dump.py but has been completely refactored into a class and enhanced to fit my purpose.
The new formatters
-- RobertSeeger 2005-11-03 17:40:18 We have decided to stop work on both the text_word and the text_pdf formatters soon and create a new, more universal formatter using TX Text Control ActiveX Server to produce e.g. DOC, PDF, RTF, HTML. Maybe somebody else can take over the text_pdf source (uses reportlab) and finalize it now that the most important features have been implemented.
-- RobertSeeger 2005-11-30 22:05:54 Here's another situation update: due to the high license costs of TX Text Control ActiveX Server we have stopped work on this version and are now "back in the open world". AlexanderBormann will finalize the PDF version (using reportlab) and integrate a good user interface to deliver the created files to the user. He will then use OpenOffice.org 2.0 to implement first the Word to Wiki converter (probably with some limitations) and finally a new Wiki to Word exporter. Note that there is already a macro to export OOo documents to MoinMoin. Search this wiki for it
MoinMoin.formatter.text_word
The implementation is nearly finished for all the main markup features (however migration to 1.3.5 broke some tables etc.), see an export of page SyntaxReference: SyntaxReference.doc
You need to have Microsoft Word (2000) on your MoinMoin server and should have generated a python wrapper with makepy (oh yes, and you need Mark Hammonds win32 package (win32all build 163), of course). Due to some weird exceptions I have decided to serve the Word control with Visible=1, this means that you will watch Word "typing in" your target document!
If you have a different server environment (e.g. Unix) then you could still use this on a special Windows client machine with Microsoft Word and python with a special setting:
install MoinMoin source code (but not wiki pages) on client
- establish a fileserver read access to the wiki pages directory on the server
- use the exporter from the command line interface
MoinMoin.formatter.text_pdf
This is still in its early stages and will be finalized by Alexander Bormann in a few weeks. It needs the reportlab package. Here is a preview version that may break e.g. on some macro calls, but it renders something simple like FrontPage (see FrontPage.pdf):
Now handles paging and most image links (smiley, attachment, file) -- AlexanderBormann 2005-10-27 14:32:27
Usage Scenarios
Exporting from a command line batch
This can be used to batch export multiple pages matching PAGEPATTERN - be aware that Word may be slow, though ;-().
set GATEWAY_INTERFACE=CGI/1.1 set DUMPNAME=word set PYTHONPATH=E:\moin13\Lib\site-packages set TARGETROOT=E:\moin13 set WIKIURL="http://localhost/moin13/" set DUMPPATH=%TARGETROOT%\dump_%DUMPNAME% set PAGEPATTERN=SyntaxReference set PAGEPATTERN="%PAGEPATTERN%" E:\python23\python.exe -c "from MoinMoin.scripts.moin_export import run; run()" --wiki=%WIKIURL% --format=word --pattern=%PAGEPATTERN% %DUMPPATH% 1>dump.log 2>dumperr.log pause
Exporting from an Action
To use this, you may have to modify the server to use a different request from request_ors.py, in my case I modified the moin.cgi (or you could try to create such a request inside the action?):
from MoinMoin.request_ors import RequestORSCGI request = RequestORSCGI() #disable my more critical modifications request.ors_mod_active=0
Here is a test action that can be applied to a single page to trigger the export of this page (in Word format, but for PDF it looks very similar). We will later provide an action with a proper user dialog (select format, output name ...maybe even return a link to the created file). Right now, the output directory is hardcoded and you must pick up the result file from the server.
1 # Imports
2 import string, time,sys,os,copy
3 from MoinMoin import user, webapi, wikiutil
4 from MoinMoin.PageEditor import PageEditor
5 from MoinMoin.scripts.moin_export import MoinExporter
6
7 def execute(pagename, request):
8 _ = request.getText
9 page = PageEditor(request,pagename)
10 exp_format="word"
11 # exp_format="html"
12 # exp_format="plain"
13 # exp_format="pdf"
14 #must protect a function that is overrided by the exporter
15 qfn_orig=wikiutil.quoteWikinameURL
16 markup_orig=request.cfg.default_markup
17
18 pname=pagename
19
20 #open exporter
21 exporter=MoinExporter()
22 #assign a request (use a copy because we modify the request)
23 exporter.request=copy.copy(request)
24 #set target output directory
25 exporter.setOutputDir(r"E:\moin13\dump_word")
26 #redirect the request to action=print and indicate "export" mode
27 exporter.request.user.show_topbottom=0
28 exporter.request.form['action']='print'
29 exporter.request.form['export']='1'
30 #assign the export formatter
31 exporter.setFormatter(exp_format)
32 #use special parser for some export formats
33 if exp_format in ("word","pdf"):
34 request.cfg.default_markup="wiki_word"
35 #create target file name - this is the plain output of the formatter, the "real" target file
36 #is different for word and pdf as they create it as a "side effect"
37 file = wikiutil.quoteWikinameFS(pname) + exporter.ext
38 #create error log file (right now we always use the same file for all - this may be bad)
39 errfile = os.path.join(exporter.outputdir, 'error.log')
40
41 exporter.errlog = open(errfile, 'w')
42 #dump the page using the formatter
43 output=exporter.dumpPage(pname,file)
44 #close the error log
45 exporter.errlog.close()
46
47
48 #reset overrided function
49 wikiutil.quoteWikinameURL = qfn_orig
50 request.cfg.default_markup=markup_orig
51 #use this to test output
52 # return page.send_page(request, msg='<pre>%s</pre>' % output)
53
54 return page.send_page(request,
55 msg='<strong>%s</strong>' %
56 _('Export of page %s completed.' % pagename))