moolb.py (2236B)
1 import hashlib 2 import logging 3 import math 4 5 logg = logging.getLogger(__name__) 6 7 # m = ceil((n * log(p)) / log(1 / pow(2, log(2)))); 8 9 10 class Bloom: 11 12 def __init__(self, bits, rounds, hasher=None, default_data=None): 13 self.bits = bits 14 self.bytes = int(bits / 8) 15 if self.bytes * 8 != self.bits: 16 raise ValueError('Need byte boundary bit value') 17 self.rounds = rounds 18 if hasher == None: 19 logg.debug('moolb using default hasher (SHA256)') 20 hasher = self.__hash 21 self.hasher = self.set_hasher(hasher) 22 23 self.filter = None 24 if default_data != None: 25 self.merge(default_data) 26 else: 27 self.filter = [0] * self.bytes 28 29 30 def merge(self, filter_data): 31 datalen = len(filter_data) 32 if datalen != self.bytes: 33 raise ValueError('expected byte array bit size {}, got {}'.format(self.bits, datalen * 8)) 34 35 if self.filter == None: 36 self.filter = filter_data 37 else: 38 self.filter = list(map(lambda x, y: x | y, filter_data, self.filter)) 39 40 41 def set_hasher(self, hasher): 42 self.hasher = hasher 43 44 45 def add(self, b): 46 for i in range(self.rounds): 47 #salt = str(i) 48 #s = salt.encode('utf-8') 49 s = i.to_bytes(4, byteorder='big') 50 z = self.__hash(b, s) 51 r = int.from_bytes(z, byteorder='big') 52 m = r % self.bits 53 bytepos = math.floor(m / 8) 54 bitpos = m % 8 55 self.filter[bytepos] |= 1 << bitpos 56 return m 57 58 59 def check(self, b): 60 for i in range(self.rounds): 61 #salt = str(i) 62 #s = salt.encode('utf-8') 63 s = i.to_bytes(4, byteorder='big') 64 z = self.__hash(b, s) 65 r = int.from_bytes(z, byteorder='big') 66 m = r % self.bits 67 bytepos = math.floor(m / 8) 68 bitpos = m % 8 69 if not self.filter[bytepos] & 1 << bitpos: 70 return False 71 return True 72 73 74 def to_bytes(self): 75 return bytes(self.filter) 76 77 78 def __hash(self, b, s): 79 h = hashlib.sha256() 80 h.update(b) 81 h.update(s) 82 return h.digest()