moolb

Bloom filter for python3 with pluggable hasher backend
git clone git://git.defalsify.org/python-moolb.git
Log | Files | Refs | README | LICENSE

commit ae667ff39f95808fd62c99ebde9d36a5bfb3c797
parent b3a16cb3829e0622de4be86b2addf0295d942ef1
Author: nolash <dev@holbrook.no>
Date:   Sun, 14 Feb 2021 21:28:24 +0100

Add default data, merge filter

Diffstat:
MCHANGELOG | 3+++
Mmoolb/moolb.py | 20++++++++++++++++++--
Msetup.py | 2+-
Mtests/test_basic.py | 37+++++++++++++++++++++++++++++++++++--
4 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG @@ -1,3 +1,6 @@ +- 0.1.1-pending + * Add default data import + * Add filter merge - 0.1.0 * Use bytes for serial instead of str - 0.0.4 diff --git a/moolb/moolb.py b/moolb/moolb.py @@ -10,18 +10,34 @@ logg = logging.getLogger() class Bloom: - def __init__(self, bits, rounds, hasher=None): + def __init__(self, bits, rounds, hasher=None, default_data=None): self.bits = bits self.bytes = int(bits / 8) if self.bytes * 8 != self.bits: raise ValueError('Need byte boundary bit value') self.rounds = rounds - self.filter = [0] * self.bytes if hasher == None: logg.info('using default hasher (SHA256)') hasher = self.__hash self.hasher = self.set_hasher(hasher) + self.filter = None + if default_data != None: + self.merge(default_data) + else: + self.filter = [0] * self.bytes + + + def merge(self, filter_data): + datalen = len(filter_data) + if datalen != self.bytes: + raise ValueError('expected byte array bit size {}, got {}'.format(self.bits, datalen * 8)) + + if self.filter == None: + self.filter = filter_data + else: + self.filter = list(map(lambda x, y: x | y, filter_data, self.filter)) + def set_hasher(self, hasher): self.hasher = hasher diff --git a/setup.py b/setup.py @@ -6,7 +6,7 @@ f.close() setup( name='moolb', - version='0.1.0', + version='0.1.1b1', description='Simple bloom filter with pluggable hash backend', author='Louis Holbrook', author_email='dev@holbrook.no', diff --git a/tests/test_basic.py b/tests/test_basic.py @@ -18,19 +18,52 @@ def hashround(self, b, s): class Test(unittest.TestCase): - def test_default(self): + def test_basic(self): f = Bloom(8192 * 8, 3) f.add(b'1024') self.assertTrue(f.check(b'1024')) self.assertFalse(f.check(b'1023')) - + + def test_defaul(self): + b = bytearray(8192) + b[42] = 13 + f = Bloom(8192 * 8, 3, default_data=b) + self.assertEqual(f.filter[42], 13) + + b = bytearray(8193) + with self.assertRaises(ValueError): + f = Bloom(8192 * 8, 3, default_data=b) + + def test_plug(self): f = Bloom(8192 * 8, 3, hashround) f.add(b'1024') self.assertTrue(f.check(b'1024')) self.assertFalse(f.check(b'1023')) + + def test_merge(self): + f = Bloom(8 * 8, 3, hashround) + b = bytearray(8) + b[2] = 2 + b[6] = 4 + f.merge(b) + self.assertEqual(f.filter[2], 2) + self.assertEqual(f.filter[6], 4) + + b = bytearray(8) + b[2] = 1 + b[6] = 8 + f.merge(b) + self.assertEqual(f.filter[2], 3) + self.assertEqual(f.filter[6], 12) + + b = bytearray(9) + with self.assertRaises(ValueError): + f.merge(b) + + # def test_dump(self): # f = Bloom(8192 * 8, 3) # f.add(b'1024')