Attachment 'HTML2MoinMoin.py'
Download 1 #!/usr/bin/python2
2
3 """
4 Usage:
5 moinconvert URL
6
7 Retrives the given URL and convert it to MoinMoin markup. The result is
8 written to stdout.
9 """
10
11 import htmlentitydefs, sys
12
13 from HTMLParser import HTMLParser
14
15 class HTML2MoinMoin(HTMLParser):
16
17 start_tags = {
18 "a" : " [%(0)s ",
19 "b" : "'''",
20 "em" : "''",
21 "tt" : "{{{",
22 "pre" : "\n{{{",
23 "p" : "\n\n",
24 "br" : "\n\n",
25 "h1" : "\n\n= ",
26 "h2" : "\n\n== ",
27 "h3" : "\n\n=== ",
28 "h4" : "\n\n==== ",
29 "h5" : "\n\n===== ",
30 "title" : "TITLE: ",
31 "table" : "\n",
32 "tr" : "",
33 "td" : "||"
34 }
35
36 end_tags = {
37 "a" : ']',
38 "b" : "'''",
39 "em" : "''",
40 "tt" : "}}}",
41 "pre" : "}}}\n",
42 "p" : "",
43 "h1" : " =\n\n",
44 "h2" : " ==\n\n",
45 "h3" : " ===\n\n",
46 "h4" : " ====\n\n",
47 "h5" : " =====\n\n",
48 "table" : "\n",
49 "tr" : "||\n",
50 "dt" : ":: "
51 }
52
53 def __init__(self):
54 HTMLParser.__init__(self)
55 self.output = sys.stdout
56 self.list_mode = []
57 self.preformatted = False
58 self.verbose = 0
59
60 def write(self, text):
61 self.output.write(text)
62
63 def do_ul_start(self, attrs, tag):
64 self.list_mode.append("*")
65
66 def do_ol_start(self, attrs, tag):
67 self.list_mode.append("1.")
68
69 def do_dl_start(self, attrs, tag):
70 self.list_mode.append("")
71
72 def do_ul_end(self, tag):
73 self.list_mode = self.list_mode[:-1]
74
75 do_ol_end = do_ul_end
76 do_dl_end = do_ul_end
77
78 def do_li_start(self, args, tag):
79 self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1])
80
81 def do_dt_start(self, args, tag):
82 self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1])
83
84 def do_pre_start(self, args, tag):
85 self.preformatted = True
86 self.write(self.start_tags["pre"])
87
88 def do_pre_end(self, tag):
89 self.preformatted = False
90 self.write(self.end_tags["pre"])
91
92 def handle_starttag(self, tag, attrs):
93 func = HTML2MoinMoin.__dict__.get("do_%s_start" % tag,
94 HTML2MoinMoin.do_default_start)
95 if ((func == HTML2MoinMoin.do_default_start) and
96 self.start_tags.has_key(tag)):
97 attr_dict = {}
98 i = 0
99 for a in attrs:
100 attr_dict[a[0]] = a[1]
101 attr_dict[str(i)] = a[1]
102 i += 1
103 self.write(self.start_tags[tag] % attr_dict)
104 else:
105 func(self, attrs, tag)
106
107 def handle_endtag(self, tag):
108 func = HTML2MoinMoin.__dict__.get("do_%s_end" % tag,
109 HTML2MoinMoin.do_default_end)
110 if ((func == HTML2MoinMoin.do_default_end) and
111 self.end_tags.has_key(tag)):
112 self.write(self.end_tags[tag])
113 else:
114 func(self, tag)
115
116 def handle_data(self, data):
117 if self.preformatted:
118 self.write(data)
119 else:
120 self.write(data.replace("\n", " "))
121
122 def handle_charref(self, name):
123 self.write(name)
124
125 def handle_entityref(self, name):
126 if htmlentitydefs.entitydefs.has_key(name):
127 self.write(htmlentitydefs.entitydefs[name])
128 else:
129 self.write("&" + name)
130
131 def do_default_start(self, attrs, tag):
132 if self.verbose:
133 print "Encountered the beginning of a %s tag" % tag
134 print "Attribs: %s" % attrs
135
136 def do_default_end(self, tag):
137 if self.verbose:
138 print "Encountered the end of a %s tag" % tag
139
140
141 def main():
142 import urllib
143 htmldata = urllib.urlopen(sys.argv[1]).read()
144
145 p = HTML2MoinMoin()
146 p.feed(htmldata)
147 p.close()
148
149
150 if __name__ == "__main__":
151 main()
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.