First prototype
""" program to convert files of wiki markup language (moinmoin 1.21) to xml and pdf @copyright: 2004 Uwe Fechner (ufechner AT sk28 DOT de) @license: GNU GPL printParagraph uses some code from Chris Arndt, http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/134571 version: 0.07 """ from reportlab.pdfgen import canvas import libxml2,sys,string from os import getcwd from string import strip import re # change as needed wikiencoding="iso8859-1" # change as needed xmlencoding ="iso8859-1" # change as needed pdfencoding ="iso8859-1" # change as needed pageheight = 1000 pagetop = 200 pageleft = 50 docstr='<?xml version="1.0" encoding="'+xmlencoding+'"?><pdfreport/>' # the parameters of the libxml functions must be UTF8 encoded def toUTF8(wikistring): return (unicode(wikistring,wikiencoding).encode('UTF-8')) # if the pdf library needs something else then UTF8, it must be decoded def toPdf(xmlcontent): return (unicode(xmlcontent,'UTF-8').encode(pdfencoding)) class XmlReport: "A class to convert wiki pages to xml" # create empty pdf canvas, empty xmldoc def __init__(self,name): self.name = name self.path = getcwd()+"/" # remove indentation when parsing xml files libxml2.keepBlanksDefault(0) # parse emty default document self.doc = libxml2.parseDoc(docstr) # save the document root node self.root = self.doc.children # set the name of the document self.root.setProp("name", toUTF8(name)) def clearSection(self): self.section = "" self.enumNodes = [None,None,None,None,None,None] # save self.doc as xml file def saveAsXml(self): str=self.doc.serialize(format=1) filename=self.path+self.name+'.xml' print "xml-file: "+filename newfile = open(filename, "w") newfile.write(str) newfile.close() return def appendParagraph(self): # strip trailing newline self.section=string.rstrip(self.section,"\n") childname="p" # check, if it is a header for j in range(6): if (len(self.section)>0): if (self.section[0]=="="): childname="h"+str(j+1) self.section=strip(self.section[1:-1]) # append paragraph node para = self.page.newTextChild(None, childname, toUTF8(self.section)) XmlReport.clearSection(self) # parses and appends (to xml dom) the wikipage with the given filename def appendWikiPage(self, pagename): print "wiki-file: "+self.path+'/'+pagename # Open input file, read it, and close it file = open(self.path+'/'+pagename, "r") text = file.readlines() file.close() # create a page element and append it to the root node self.page = self.root.newTextChild(None, "page", None) # set the name of the page self.page.setProp("name", toUTF8(pagename)) i = 0 XmlReport.clearSection(self) # parse the wiki page for line in text: i=i+1 # if we find an empty line or EOF, than we have a new paragraph if (line=="\n") or (i==len(text)): # make shure, that we don't lose the last line if i==len(text): self.section = self.section + line XmlReport.appendParagraph(self) else: # check, if it is a unorderd list self.enumNodes[0] = self.page isEnum = 0 for k in range(6): if line[k+1]=="*": isEnum = 1 if self.enumNodes[k+1] == None: self.enumNodes[k+1] = self.enumNodes[k].newChild(None, "ul", "") self.enumNodes[k+1].newTextChild(None, "li", toUTF8(strip(line[k+2:]))) # if it is no list if isEnum == 0: self.section = self.section + line for enumNode in self.enumNodes: enumNode = None return # needed for libxml2 C library def free(self): report.doc.freeDoc() class PdfReport(XmlReport): "A class to convert a wiki dom to pdf" def __init__(self,name): XmlReport.__init__(self,name) self.filename = self.path+self.name+".pdf" self.can = canvas.Canvas(self.filename) # the cursor self.x = pageleft self.y = pageheight-pagetop # indentlevel of lists self.indentLevel = 0 # print one line of text to the canvas def pdfPrint(self, text, size=10): # for debugging print text self.can.drawString(self.x,self.y,text) self.y -= 20 # add a header to the canvas def printHeader(self, headerNode): size = headerNode.name[1] text = toPdf(headerNode.content) self.pdfPrint( "H"+str(size)+": "+text) def printParagraph(self, paraNode, width=80): """Wrap paragraphs to width linelength. Inter-word space is reduced to one space character Indention is currently also lost. """ text=toPdf(paraNode.content) # split paragraphs into a list of words words = text.strip().split() line = []; new_par = [] while 1: if words: if len(' '.join(line + [words[0]])) > width and line: # the line is already long enough -> add it to paragraph new_par.append(' '.join(line)) line = [] else: # append next word line.append(words.pop(0)) else: # last line in paragraph new_par.append(' '.join(line)) line = [] break # print the lines of the paragraph for par in new_par: PdfReport.pdfPrint(self, par) return # add a list to the canvas def printList(self, listNode): child = listNode.children self.indentLevel += 1 while child != None: if child.name[0]=="u": PdfReport.printList(self,child) child = child.next else: # skip text nodes (from xml indentation) if child.name[0]=="l": # print list element text = toPdf(child.content) self.pdfPrint("LI"+str(self.indentLevel)+": "+text) child = child.next self.indentLevel -= 1 # converts the xml dom to pdf and writes it to a file def convertToPdf(self): # loop through all children of the page element child = self.root.children.children while child != None: # print child if child.name[0]=="h": PdfReport.printHeader(self, child) if child.name[0]=="p": PdfReport.printParagraph(self, child) if child.name[0]=="u": PdfReport.printList(self, child) child = child.next # create pdf page self.can.showPage() # save pdf file self.can.save() return report = PdfReport("WikiSchulung") report.appendWikiPage("WikiSchulung") report.saveAsXml() report.convertToPdf() report.free()