#! /usr/bin/env python """ Dan Bernstein's CDB implemented in Python see http://cr.yp.to/cdb.html """ from __future__ import generators import os import struct import mmap def uint32_unpack(buf): return struct.unpack('>= 8 u %= self.hslots u <<= 3 self.kpos = self.hpos + u while self.loop < self.hslots: buf = self.read(8, self.kpos) pos = uint32_unpack(buf[4:]) if not pos: raise KeyError self.loop += 1 self.kpos += 8 if self.kpos == self.hpos + (self.hslots << 3): self.kpos = self.hpos u = uint32_unpack(buf[:4]) if u == self.khash: buf = self.read(8, pos) u = uint32_unpack(buf[:4]) if u == len(key): if self.match(key, pos + 8): dlen = uint32_unpack(buf[4:]) dpos = pos + 8 + len(key) return self.read(dlen, dpos) raise KeyError def __getitem__(self, key): self.findstart() return self.findnext(key) def get(self, key, default=None): self.findstart() try: return self.findnext(key) except KeyError: return default def cdb_dump(infile): """dump a database in djb's cdbdump format""" db = Cdb(infile) for key,value in db.iteritems(): print "+%d,%d:%s->%s" % (len(key), len(value), key, value) print def cdb_make(outfile, items): pos = 2048 tables = {} # { h & 255 : [(h, p)] } # write keys and data outfile.seek(pos) for key, value in items: outfile.write(uint32_pack(len(key)) + uint32_pack(len(value))) h = cdb_hash(key) outfile.write(key) outfile.write(value) tables.setdefault(h & 255, []).append((h, pos)) pos += 8 + len(key) + len(value) final = '' # write hash tables for i in range(256): entries = tables.get(i, []) nslots = 2*len(entries) final += uint32_pack(pos) + uint32_pack(nslots) null = (0, 0) table = [null] * nslots for h, p in entries: n = (h >> 8) % nslots while table[n] is not null: n = (n + 1) % nslots table[n] = (h, p) for h, p in table: outfile.write(uint32_pack(h) + uint32_pack(p)) pos += 8 # write header (pointers to tables and their lengths) outfile.flush() outfile.seek(0) outfile.write(final) class CdbMake(object): def __init__(self, outfile): self.pos = 2048 self.outfile = outfile self.outfile.seek(self.pos) self.tables = {} def add(self, key, value): outfile = self.outfile outfile.write(uint32_pack(len(key)) + uint32_pack(len(value))) h = cdb_hash(key) outfile.write(key) outfile.write(value) self.tables.setdefault(h & 255, []).append((h, self.pos)) self.pos += 8 + len(key) + len(value) def finish(self): final = '' tables = self.tables pos = self.pos outfile = self.outfile # write hash tables for i in range(256): entries = tables.get(i, []) nslots = 2*len(entries) final += uint32_pack(pos) + uint32_pack(nslots) null = (0, 0) table = [null] * nslots for h, p in entries: n = (h >> 8) % nslots while table[n] is not null: n = (n + 1) % nslots table[n] = (h, p) for h, p in table: outfile.write(uint32_pack(h) + uint32_pack(p)) pos += 8 # write header (pointers to tables and their lengths) outfile.flush() outfile.seek(0) outfile.write(final) def test(): #db = Cdb(open("t")) #print db['one'] #print db['two'] #print db['foo'] #print db['us'] #print db.get('ec') #print db.get('notthere') db = open('test.cdb', 'wb') cdb_make(db, [('one', 'Hello'), ('two', 'Goodbye'), ('foo', 'Bar'), ('us', 'United States'), ]) db.close() db = Cdb(open("test.cdb", 'rb')) print db['one'] print db['two'] print db['foo'] print db['us'] print db.get('ec') print db.get('notthere') if __name__ == '__main__': test()