Tool to convert multiply files between Python known file encodings. Useful for Hebrew and other RTL languages translation
This is not enough to switch a wiki to utf-8. A tool for a complete utf-8 conversion is contained in moin--main--1.3.
1 #!/usr/bin/env python
2 """ convert multiply files between Python known file encodings
3
4 Open files in the command line arguments and save copies of the files
5 in outencoding format, adding format name to the names.
6
7 Author: Nir Soffer nirs at freeshell.org"""
8
9 import os.path, sys, glob, operator
10 import codecs # does the encoding
11
12
13 def main():
14 # get arguments
15 patterns = sys.argv[1:]
16 if not patterns:
17 sys.exit('Noting to do\nUsage: unicode2utf-8.py [file_pattern...]')
18
19 # expand files - for windows comptibility
20 # windows shell does not expand * etc.
21 files = reduce(
22 operator.add, # add file lists
23 map(glob.glob, patterns) # expand each pattern
24 )
25
26 for path in files:
27
28 # is it a file?
29 if os.path.isfile(path):
30 convert(path)
31 else:
32 print 'Skipping %s: Not a file' % path
33
34
35 def convert(path, inEncoding='utf-16', outEncoding='utf-8'):
36 """ convert - convert file encoding
37
38 Open the file using codecs module which does the dirty work.
39 Support all python built in encodings"""
40
41 try:
42 # read data from the file
43 inFile = codecs.open(path, 'rb', encoding=inEncoding)
44 data = inFile.read()
45 inFile.close()
46
47 # make new name
48 # insert '.outEncoding' before the extension
49 split = path.rfind('.')
50 name, ext = path[:split], path[split:]
51 newName = name + '.' + outEncoding + ext
52
53 # write to output file
54 outFile = codecs.open(newName, 'wb', encoding=outEncoding)
55 outFile.write(data)
56 outFile.close()
57
58 # log
59 print 'Converted %s to %s' % (path, newName)
60
61 except (IOError, OSError), why:
62 print 'Could not convert %s: %s' % (path, str(why))
63
64
65 if __name__ == '__main__':
66 main()