commit ae667ff39f95808fd62c99ebde9d36a5bfb3c797
parent b3a16cb3829e0622de4be86b2addf0295d942ef1
Author: nolash <dev@holbrook.no>
Date: Sun, 14 Feb 2021 21:28:24 +0100
Add default data, merge filter
Diffstat:
4 files changed, 57 insertions(+), 5 deletions(-)
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,3 +1,6 @@
+- 0.1.1-pending
+ * Add default data import
+ * Add filter merge
- 0.1.0
* Use bytes for serial instead of str
- 0.0.4
diff --git a/moolb/moolb.py b/moolb/moolb.py
@@ -10,18 +10,34 @@ logg = logging.getLogger()
class Bloom:
- def __init__(self, bits, rounds, hasher=None):
+ def __init__(self, bits, rounds, hasher=None, default_data=None):
self.bits = bits
self.bytes = int(bits / 8)
if self.bytes * 8 != self.bits:
raise ValueError('Need byte boundary bit value')
self.rounds = rounds
- self.filter = [0] * self.bytes
if hasher == None:
logg.info('using default hasher (SHA256)')
hasher = self.__hash
self.hasher = self.set_hasher(hasher)
+ self.filter = None
+ if default_data != None:
+ self.merge(default_data)
+ else:
+ self.filter = [0] * self.bytes
+
+
+ def merge(self, filter_data):
+ datalen = len(filter_data)
+ if datalen != self.bytes:
+ raise ValueError('expected byte array bit size {}, got {}'.format(self.bits, datalen * 8))
+
+ if self.filter == None:
+ self.filter = filter_data
+ else:
+ self.filter = list(map(lambda x, y: x | y, filter_data, self.filter))
+
def set_hasher(self, hasher):
self.hasher = hasher
diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@ f.close()
setup(
name='moolb',
- version='0.1.0',
+ version='0.1.1b1',
description='Simple bloom filter with pluggable hash backend',
author='Louis Holbrook',
author_email='dev@holbrook.no',
diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -18,19 +18,52 @@ def hashround(self, b, s):
class Test(unittest.TestCase):
- def test_default(self):
+ def test_basic(self):
f = Bloom(8192 * 8, 3)
f.add(b'1024')
self.assertTrue(f.check(b'1024'))
self.assertFalse(f.check(b'1023'))
-
+
+ def test_defaul(self):
+ b = bytearray(8192)
+ b[42] = 13
+ f = Bloom(8192 * 8, 3, default_data=b)
+ self.assertEqual(f.filter[42], 13)
+
+ b = bytearray(8193)
+ with self.assertRaises(ValueError):
+ f = Bloom(8192 * 8, 3, default_data=b)
+
+
def test_plug(self):
f = Bloom(8192 * 8, 3, hashround)
f.add(b'1024')
self.assertTrue(f.check(b'1024'))
self.assertFalse(f.check(b'1023'))
+
+ def test_merge(self):
+ f = Bloom(8 * 8, 3, hashround)
+ b = bytearray(8)
+ b[2] = 2
+ b[6] = 4
+ f.merge(b)
+ self.assertEqual(f.filter[2], 2)
+ self.assertEqual(f.filter[6], 4)
+
+ b = bytearray(8)
+ b[2] = 1
+ b[6] = 8
+ f.merge(b)
+ self.assertEqual(f.filter[2], 3)
+ self.assertEqual(f.filter[6], 12)
+
+ b = bytearray(9)
+ with self.assertRaises(ValueError):
+ f.merge(b)
+
+
# def test_dump(self):
# f = Bloom(8192 * 8, 3)
# f.add(b'1024')