Attachment 'pycdb.py'

Download

   1 """
   2 pycdb.py - Python implementation of cdb
   3 
   4   by Yusuke Shinyama
   5   * public domain *
   6 
   7   Coding style fixes (and tcdb removal) by Johannes Berg
   8 """
   9 
  10 import os
  11 from struct import pack, unpack
  12 from array import array
  13 
  14 
  15 def cdbhash(s, n=0L):
  16     """calc hash value with a given key"""
  17     return reduce(lambda h, c: ((h * 33) ^ ord(c)) & 0xffffffffL, s, n + 5381L)
  18 
  19 if pack('=i', 1) == pack('>i', 1):
  20     def decode(x):
  21         a = array('I', x)
  22         a.byteswap()
  23         return a
  24     def encode(a):
  25         a.byteswap()
  26         return a.tostring()
  27 else:
  28     def decode(x):
  29         a = array('I', x)
  30         return a
  31     def encode(a):
  32         return a.tostring()
  33 
  34 
  35 def cdbiter(fp, eod):
  36     kloc = 2048
  37     while kloc < eod:
  38         fp.seek(kloc)
  39         (klen, vlen) = unpack('<II', fp.read(8))
  40         k = fp.read(klen)
  41         v = fp.read(vlen)
  42         kloc += 8 + klen + vlen
  43         yield (k, v)
  44     fp.close()
  45 
  46 
  47 class CDBReader:
  48     def __init__(self, cdbname, docache=1):
  49         self.name = cdbname
  50         self._fp = file(cdbname, 'rb')
  51         hash0 = decode(self._fp.read(2048))
  52         self._hash0 = [ (hash0[i], hash0[i+1]) for i in xrange(0, 512, 2) ]
  53         self._hash1 = [ None ] * 256
  54         self._eod = hash0[0]
  55         self._docache = docache
  56         self._cache = {}
  57         self._keyiter = None
  58         self._eachiter = None
  59 
  60     def __getstate__(self):
  61         raise TypeError
  62 
  63     def __setstate__(self, dict):
  64         raise TypeError
  65 
  66     def __getitem__(self, k):
  67         k = str(k)
  68         if k in self._cache:
  69             return self._cache[k]
  70         h = cdbhash(k)
  71         h1 = h & 0xff
  72         (pos_bucket, ncells) = self._hash0[h1]
  73         if ncells == 0:
  74             raise KeyError(k)
  75         hs = self._hash1[h1]
  76         if hs == None:
  77             self._fp.seek(pos_bucket)
  78             hs = decode(self._fp.read(ncells * 8))
  79             self._hash1[h1] = hs
  80         i = ((h >> 8) % ncells) * 2
  81         n = ncells * 2
  82         for _ in xrange(ncells):
  83             p1 = hs[i + 1]
  84             if p1 == 0: raise KeyError(k)
  85             if hs[i] == h:
  86                 self._fp.seek(p1)
  87                 (klen, vlen) = unpack('<II', self._fp.read(8))
  88                 k1 = self._fp.read(klen)
  89                 if k1 == k:
  90                     v1 = self._fp.read(vlen)
  91                     if self._docache:
  92                         self._cache[k] = v1
  93                     return v1
  94             i = (i + 2) % n
  95         raise KeyError(k)
  96 
  97     def get(self, k, failed=None):
  98         try:
  99             return self.__getitem__(k)
 100         except KeyError:
 101             return failed
 102 
 103     def has_key(self, k):
 104         try:
 105             self.__getitem__(k)
 106             return True
 107         except KeyError:
 108             return False
 109 
 110     def __contains__(self, k):
 111         return self.has_key(k)
 112 
 113     def firstkey(self):
 114         self._keyiter = None
 115         return self.nextkey()
 116     
 117     def nextkey(self):
 118         if not self._keyiter:
 119             self._keyiter = ( k for (k, v) in cdbiter(self._fp, self._eod) )
 120         try:
 121             return self._keyiter.next()
 122         except StopIteration:
 123             return None
 124 
 125     def each(self):
 126         if not self._eachiter:
 127             self._eachiter = cdbiter(self._fp, self._eod)
 128         try:
 129             return self._eachiter.next()
 130         except StopIteration:
 131             return None
 132     
 133     def iterkeys(self):
 134         return ( k for (k, v) in cdbiter(self._fp, self._eod) )
 135 
 136     def itervalues(self):
 137         return ( v for (k, v) in cdbiter(self._fp, self._eod) )
 138 
 139     def iteritems(self):
 140         return cdbiter(self._fp, self._eod)
 141 
 142 
 143 class CDBMaker:
 144     def __init__(self, cdbname, tmpname):
 145         self.fn = cdbname
 146         self.fntmp = tmpname
 147         self.numentries = 0
 148         self._fp = file(tmpname, 'wb')
 149         self._pos = 2048
 150         self._bucket = [ array('I') for _ in xrange(256) ]
 151 
 152     def __len__(self):
 153         return self.numentries
 154 
 155     def __getstate__(self):
 156         raise TypeError
 157 
 158     def __setstate__(self, dict):
 159         raise TypeError
 160 
 161     def add(self, k, v):
 162         (k, v) = (str(k), str(v))
 163         (klen, vlen) = (len(k), len(v))
 164         self._fp.seek(self._pos)
 165         self._fp.write(pack('<II', klen, vlen))
 166         self._fp.write(k)
 167         self._fp.write(v)
 168         h = cdbhash(k)
 169         b = self._bucket[h % 256]
 170         b.append(h)
 171         b.append(self._pos)
 172         # sizeof(keylen)+sizeof(datalen)+sizeof(key)+sizeof(data)
 173         self._pos += 8+klen+vlen
 174         self.numentries += 1
 175         return self
 176     
 177     def finish(self):
 178         self._fp.seek(self._pos)
 179         pos_hash = self._pos
 180         # write hashes
 181         for b1 in self._bucket:
 182             if not b1: continue
 183             blen = len(b1)
 184             a = array('I', [0] * blen * 2)
 185             for j in xrange(0, blen, 2):
 186                 (h, p) = (b1[j], b1[j+1])
 187                 i = ((h >> 8) % blen) * 2
 188                 while a[i + 1]:
 189                     i = (i + 2) % len(a)
 190                 a[i] = h
 191                 a[i + 1] = p
 192             self._fp.write(encode(a))
 193         # write header
 194         self._fp.seek(0)
 195         a = array('I')
 196         for b1 in self._bucket:
 197             a.append(pos_hash)
 198             a.append(len(b1))
 199             pos_hash += len(b1)*8
 200         self._fp.write(encode(a))
 201         self._fp.close()
 202         os.rename(self.fntmp, self.fn)
 203 
 204 cdbmake = CDBMaker
 205 init = CDBReader

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2008-07-02 23:46:15, 5.3 KB) [[attachment:pycdb.py]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.