Attachment 'PHPUnserialize.py'
Download 1 import types, string, re
2
3 """
4 Unserialize class for the PHP serialization format.
5
6 @version v0.4 BETA
7 @author Scott Hurring; scott at hurring dot com
8 @copyright Copyright (c) 2005 Scott Hurring
9 @license http://opensource.org/licenses/gpl-license.php GNU Public License
10 $Id: PHPUnserialize.py,v 1.1 2006/01/08 21:53:19 shurring Exp $
11
12 Most recent version can be found at:
13 http://hurring.com/code/python/phpserialize/
14
15 Usage:
16 # Create an instance of the unserialize engine
17 u = PHPUnserialize()
18 # unserialize some string into python data
19 data = u.unserialize(serialized_string)
20
21 Please see README.txt for more information.
22 """
23
24 class PHPUnserialize(object):
25 """
26 Class to unserialize something from the PHP Serialize format.
27
28 Usage:
29 u = PHPUnserialize()
30 data = u.unserialize(serialized_string)
31 """
32
33 def __init__(self):
34 pass
35
36 def session_decode(self, data):
37 """Thanks to Ken Restivo for suggesting the addition
38 of session_encode
39 """
40 session = {}
41 while len(data) > 0:
42 m = re.match('^(\w+)\|', data)
43 if m:
44 key = m.group(1)
45 offset = len(key)+1
46 (dtype, dataoffset, value) = self._unserialize(data, offset)
47 offset = offset + dataoffset
48 data = data[offset:]
49 session[key] = value
50 else:
51 # No more stuff to decode
52 return session
53
54 return session
55
56 def unserialize(self, data):
57 return self._unserialize(data, 0)[2]
58
59 def _unserialize(self, data, offset=0):
60 """
61 Find the next token and unserialize it.
62 Recurse on array.
63
64 offset = raw offset from start of data
65
66 return (type, offset, value)
67 """
68
69 buf = []
70 dtype = string.lower(data[offset:offset+1])
71
72 #print "# dtype =", dtype
73
74 # 't:' = 2 chars
75 dataoffset = offset + 2
76 typeconvert = lambda x : x
77 chars = datalength = 0
78
79 # int => Integer
80 if dtype == 'i':
81 typeconvert = lambda x : int(x)
82 (chars, readdata) = self.read_until(data, dataoffset, ';')
83 # +1 for end semicolon
84 dataoffset += chars + 1
85
86 # bool => Boolean
87 elif dtype == 'b':
88 typeconvert = lambda x : (int(x) == 1)
89 (chars, readdata) = self.read_until(data, dataoffset, ';')
90 # +1 for end semicolon
91 dataoffset += chars + 1
92
93 # double => Floating Point
94 elif dtype == 'd':
95 typeconvert = lambda x : float(x)
96 (chars, readdata) = self.read_until(data, dataoffset, ';')
97 # +1 for end semicolon
98 dataoffset += chars + 1
99
100 # n => None
101 elif dtype == 'n':
102 readdata = None
103
104 # s => String
105 elif dtype == 's':
106 (chars, stringlength) = self.read_until(data, dataoffset, ':')
107 # +2 for colons around length field
108 dataoffset += chars + 2
109
110 # +1 for start quote
111 (chars, readdata) = self.read_chars(data, dataoffset+1, int(stringlength))
112 # +2 for endquote semicolon
113 dataoffset += chars + 2
114
115 if chars != int(stringlength) != int(readdata):
116 raise Exception("String length mismatch")
117
118 # array => Dict
119 # If you originally serialized a Tuple or List, it will
120 # be unserialized as a Dict. PHP doesn't have tuples or lists,
121 # only arrays - so everything has to get converted into an array
122 # when serializing and the original type of the array is lost
123 elif dtype == 'a':
124 readdata = {}
125
126 # How many keys does this list have?
127 (chars, keys) = self.read_until(data, dataoffset, ':')
128 # +2 for colons around length field
129 dataoffset += chars + 2
130
131 # Loop through and fetch this number of key/value pairs
132 for i in range(0, int(keys)):
133 # Read the key
134 (ktype, kchars, key) = self._unserialize(data, dataoffset)
135 dataoffset += kchars
136 #print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset)
137
138 # Read value of the key
139 (vtype, vchars, value) = self._unserialize(data, dataoffset)
140 dataoffset += vchars
141 #print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset)
142
143 # Set the list element
144 readdata[key] = value
145
146 # +1 for end semicolon
147 dataoffset += 1
148 #chars = int(dataoffset) - start
149
150 # I don't know how to unserialize this
151 else:
152 raise Exception("Unknown / Unhandled data type (%s)!" % dtype)
153
154
155 return (dtype, dataoffset-offset, typeconvert(readdata))
156
157 def read_until(self, data, offset, stopchar):
158 """
159 Read from data[offset] until you encounter some char 'stopchar'.
160 """
161 buf = []
162 char = data[offset:offset+1]
163 i = 2
164 while char != stopchar:
165 # Consumed all the characters and havent found ';'
166 if i+offset > len(data):
167 raise Exception("Invalid")
168 buf.append(char)
169 char = data[offset+(i-1):offset+i]
170 i += 1
171
172 # (chars_read, data)
173 return (len(buf), "".join(buf))
174
175 def read_chars(self, data, offset, length):
176 """
177 Read 'length' number of chars from data[offset].
178 """
179 buf = []
180 # Account for the starting quote char
181 #offset += 1
182 for i in range(0, length):
183 char = data[offset+(i-1):offset+i]
184 buf.append(char)
185
186 # (chars_read, data)
187 return (len(buf), "".join(buf))
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.