commit a5aedfb5514f9f668b4de0037cc9d8fd5a1818b4
parent 10b4c6cff5a0c4e8445f556dffc7d5f7bcef42e9
Author: nolash <dev@holbrook.no>
Date: Fri, 30 Oct 2020 09:48:05 +0100
Add test, py repo structure
Diffstat:
8 files changed, 129 insertions(+), 68 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+__pycache__
diff --git a/README.md b/README.md
@@ -0,0 +1 @@
+# Moolb - simple bloom filter with pluggable hash backend
diff --git a/main.py b/main.py
@@ -1,68 +0,0 @@
-import numpy
-import hashlib
-import logging
-import math
-
-logging.basicConfig(level=logging.DEBUG)
-logg = logging.getLogger()
-
-
-# m = ceil((n * log(p)) / log(1 / pow(2, log(2))));
-
-class BloomFilter:
-
- def __init__(self, bits, rounds):
- self.bits = bits
- self.bytes = int(bits / 8)
- if self.bytes * 8 != self.bits:
- raise ValueError('Need byte boundary bit value')
- self.rounds = rounds
- self.filter = numpy.zeros(self.bytes, dtype=numpy.uint8)
- self.hasher = self.set_hasher(self.__hash)
-
-
- def set_hasher(self, hasher):
- self.hasher = hasher
-
-
- def add(self, b):
- for i in range(self.rounds):
- salt = str(i)
- s = salt.encode('utf-8')
- z = self.__hash(b, s)
- r = int.from_bytes(z, byteorder='big')
- m = r % self.bits
- bytepos = math.floor(m / 8)
- bitpos = m % 8
- self.filter[bytepos] |= 1 << bitpos
- logg.debug('foo {} {}'.format(bytepos, bitpos))
- return m
-
-
- def check(self, b):
- for i in range(self.rounds):
- salt = str(i)
- s = salt.encode('utf-8')
- z = self.__hash(b, s)
- r = int.from_bytes(z, byteorder='big')
- m = r % self.bits
- bytepos = math.floor(m / 8)
- bitpos = m % 8
- if not self.filter[bytepos] & 1 << bitpos:
- return False
- return True
-
-
- def __hash(self, b, s):
- logg.debug('hashing {} {}'.format(b.hex(), s.hex()))
- h = hashlib.sha256()
- h.update(b)
- h.update(s)
- return h.digest()
-
-
-
-f = BloomFilter(8192 * 8, 3)
-f.add(b'1024')
-print(f.check(b'1024'))
-print(f.check(b'1023'))
diff --git a/moolb/__init__.py b/moolb/__init__.py
@@ -0,0 +1 @@
+from .moolb import Bloom
diff --git a/moolb/moolb.py b/moolb/moolb.py
@@ -0,0 +1,64 @@
+import numpy
+import hashlib
+import logging
+import math
+
+logging.basicConfig(level=logging.DEBUG)
+logg = logging.getLogger()
+
+
+# m = ceil((n * log(p)) / log(1 / pow(2, log(2))));
+
+class Bloom:
+
+ def __init__(self, bits, rounds, hasher=None):
+ self.bits = bits
+ self.bytes = int(bits / 8)
+ if self.bytes * 8 != self.bits:
+ raise ValueError('Need byte boundary bit value')
+ self.rounds = rounds
+ self.filter = numpy.zeros(self.bytes, dtype=numpy.uint8)
+ if hasher == None:
+ logg.info('using default hasher (SHA256)')
+ hasher = self.__hash
+ self.hasher = self.set_hasher(hasher)
+
+
+ def set_hasher(self, hasher):
+ self.hasher = hasher
+
+
+ def add(self, b):
+ for i in range(self.rounds):
+ salt = str(i)
+ s = salt.encode('utf-8')
+ z = self.__hash(b, s)
+ r = int.from_bytes(z, byteorder='big')
+ m = r % self.bits
+ bytepos = math.floor(m / 8)
+ bitpos = m % 8
+ self.filter[bytepos] |= 1 << bitpos
+ logg.debug('foo {} {}'.format(bytepos, bitpos))
+ return m
+
+
+ def check(self, b):
+ for i in range(self.rounds):
+ salt = str(i)
+ s = salt.encode('utf-8')
+ z = self.__hash(b, s)
+ r = int.from_bytes(z, byteorder='big')
+ m = r % self.bits
+ bytepos = math.floor(m / 8)
+ bitpos = m % 8
+ if not self.filter[bytepos] & 1 << bitpos:
+ return False
+ return True
+
+
+ def __hash(self, b, s):
+ logg.debug('hashing {} {}'.format(b.hex(), s.hex()))
+ h = hashlib.sha256()
+ h.update(b)
+ h.update(s)
+ return h.digest()
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,10 @@
+[metadata]
+classifiers =
+ Programming Language :: Python :: 3
+ Operating System :: OS Independent
+ Development Status :: 3 - Alpha
+ Intended Audience :: Developers
+ Topic :: Software Development :: Libraries
+ License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
+license_files =
+ LICENSE.txt
diff --git a/setup.py b/setup.py
@@ -0,0 +1,20 @@
+from setuptools import setup
+
+f = open('./README.md', 'r')
+long_description = f.read()
+f.close()
+
+setup(
+ name='moolb',
+ version='0.0.1',
+ description='Simple bloom filter with pluggable hash backend',
+ author='Louis Holbrook',
+ author_email='dev@holbrook.no',
+ license='GPL3',
+ long_description=long_description,
+ long_description_content_type='text/markdown',
+ packages=[
+ 'moolb',
+ ],
+ url='https://gitlab.com/nolash/python-moolb',
+ )
diff --git a/tests/test_basic.py b/tests/test_basic.py
@@ -0,0 +1,32 @@
+import unittest
+import hashlib
+
+from moolb import Bloom
+
+
+def hashround(self, b, s):
+ logg.debug('sha1 hashing {} {}'.format(b.hex(), s.hex()))
+ h = hashlib.sha1()
+ h.update(b)
+ h.update(s)
+ return h.digest()
+
+
+class Test(unittest.TestCase):
+
+ def test_default(self):
+ f = Bloom(8192 * 8, 3)
+ f.add(b'1024')
+ self.assertTrue(f.check(b'1024'))
+ self.assertFalse(f.check(b'1023'))
+
+
+ def test_plug(self):
+ f = Bloom(8192 * 8, 3, hashround)
+ f.add(b'1024')
+ self.assertTrue(f.check(b'1024'))
+ self.assertFalse(f.check(b'1023'))
+
+
+if __name__ == '__main__':
+ unittest.main()