moolb

Bloom filter for python3 with pluggable hasher backend
git clone git://git.defalsify.org/python-moolb.git
Log | Files | Refs | README | LICENSE

commit e3dfcb4be8e3c8ad72b0cddf4c6506ec0003469e
parent c51dc47f80f0a6baa285f689b729606cd40b9f8f
Author: nolash <dev@holbrook.no>
Date:   Thu, 29 Oct 2020 20:55:52 +0100

Initial commit, add v to filer

Diffstat:
Mmain.py | 33++++++++++++++++++++++++++-------
1 file changed, 26 insertions(+), 7 deletions(-)

diff --git a/main.py b/main.py @@ -16,17 +16,14 @@ class BloomFilter: raise ValueError('Need byte boundary bit value') self.rounds = rounds self.filter = numpy.zeros(self.bytes, dtype=numpy.uint8) + self.hasher = self.__hash def add(self, b): for i in range(self.rounds): salt = str(i) - salt_str = salt.encode('utf-8') - logg.debug('hashing {} {}'.format(b.hex(), salt)) - h = hashlib.sha256() - h.update(b) - h.update(salt_str) - z = h.digest() + s = salt.encode('utf-8') + z = self.__hash(b, s) r = int.from_bytes(z, byteorder='big') m = r % self.bits bytepos = math.floor(m / 8) @@ -36,8 +33,30 @@ class BloomFilter: return m - def check(self, s): + def check(self, b): + for i in range(self.rounds): + salt = str(i) + s = salt.encode('utf-8') + z = self.__hash(b, s) + r = int.from_bytes(z, byteorder='big') + m = r % self.bits + bytepos = math.floor(m / 8) + bitpos = m % 8 + if not self.filter[bytepos] & 1 << bitpos: + return False + return True + + + def __hash(self, b, s): + logg.debug('hashing {} {}'.format(b.hex(), s.hex())) + h = hashlib.sha256() + h.update(b) + h.update(s) + return h.digest() + f = BloomFilter(8192 * 8, 3) f.add(b'1024') +print(f.check(b'1024')) +print(f.check(b'1023'))