moolb

Bloom filter for python3 with pluggable hasher backend
git clone git://git.defalsify.org/python-moolb.git
Log | Files | Refs | README | LICENSE

commit bbe443f58ca5411d20b9aa436542f91c457112e1
Author: nolash <dev@holbrook.no>
Date:   Thu, 29 Oct 2020 20:42:41 +0100

Initial commit, add v to filer

Diffstat:
Amain.py | 40++++++++++++++++++++++++++++++++++++++++
1 file changed, 40 insertions(+), 0 deletions(-)

diff --git a/main.py b/main.py @@ -0,0 +1,40 @@ +import numpy +import hashlib +import logging +import math + +logging.basicConfig(level=logging.DEBUG) +logg = logging.getLogger() + + +class BloomFilter: + + def __init__(self, bits, rounds): + self.bits = bits + self.bytes = int(bits / 8) + if self.bytes * 8 != self.bits: + raise ValueError('Need byte boundary bit value') + self.rounds = rounds + self.filter = numpy.zeros(self.bytes, dtype=numpy.uint8) + + + def add(self, s): + for i in range(self.rounds): + salt = str(i) + salt_str = salt.encode('utf-8') + logg.debug('hashing {} {}'.format(s, salt)) + h = hashlib.sha256() + h.update(s.encode('utf-8')) + h.update(salt_str) + z = h.digest() + r = int.from_bytes(z, byteorder='big') + m = r % self.bits + bytepos = math.floor(m / 8) + bitpos = m % 8 + self.filter[bytepos] |= 1 << bitpos + logg.debug('foo {} {}'.format(bytepos, bitpos)) + return m + + +f = BloomFilter(8192 * 8, 3) +f.add('1024')