leveldir

Multi-level directory structure data stores in python3
git clone git://git.defalsify.org/python-leveldir.git
Log | Files | Refs | LICENSE

commit ba2df8529ea3b98ea491a336ccec1606da553cca
parent 4947209eba8f66dd2ff29e3967402b6410d66302
Author: nolash <dev@holbrook.no>
Date:   Sun, 27 Jun 2021 09:47:39 +0200

Add numeric subdirs

Diffstat:
Mhexdir/__init__.py | 1-
Mhexdir/base.py | 109++++---------------------------------------------------------------------------
Ahexdir/hex.py | 118+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahexdir/numeric.py | 42++++++++++++++++++++++++++++++++++++++++++
Msetup.py | 27+--------------------------
Mtests/test_hexdir.py | 8+++++---
Atests/test_numdir.py | 43+++++++++++++++++++++++++++++++++++++++++++
7 files changed, 214 insertions(+), 134 deletions(-)

diff --git a/hexdir/__init__.py b/hexdir/__init__.py @@ -1 +0,0 @@ -from .base import HexDir diff --git a/hexdir/base.py b/hexdir/base.py @@ -1,68 +1,22 @@ # standard imports import os -import stat -import logging -# external imports -from hexathon import valid as valid_hex -logg = logging.getLogger(__name__) +class LevelDir: - -class HexDir: - - def __init__(self, root_path, key_length, levels=2, prefix_length=0): + def __init__(self, root_path, levels, entry_length): self.path = root_path - self.key_length = key_length - self.prefix_length = prefix_length - self.entry_length = key_length + prefix_length - self.__levels = levels + 2 + self.levels = levels + self.entry_length = entry_length fi = None try: fi = os.stat(self.path) self.__verify_directory() except FileNotFoundError: - HexDir.__prepare_directory(self.path) + LevelDir.__prepare_directory(self.path) self.master_file = os.path.join(self.path, 'master') - @property - def levels(self): - return self.__levels - 2 - - - def add(self, key, content, prefix=b''): - l = len(key) - if l != self.key_length: - raise ValueError('expected key length {}, got {}'.format(self.key_length, l)) - l = len(prefix) - if l != self.prefix_length: - raise ValueError('expected prefix length {}, got {}'.format(self.prefix_length, l)) - if not isinstance(content, bytes): - raise ValueError('content must be bytes, got {}'.format(type(content).__name__)) - if prefix != None and not isinstance(prefix, bytes): - raise ValueError('prefix must be bytes, got {}'.format(type(content).__name__)) - key_hex = key.hex() - entry_path = self.to_filepath(key_hex) - - c = self.count() - - os.makedirs(os.path.dirname(entry_path), exist_ok=True) - f = open(entry_path, 'wb') - f.write(content) - f.close() - - f = open(self.master_file, 'ab') - if prefix != None: - f.write(prefix) - f.write(key) - f.close() - - logg.info('created new entry {} idx {} in {}'.format(key_hex, c, entry_path)) - - return c - - def count(self): fi = os.stat(self.master_file) c = fi.st_size / self.entry_length @@ -72,62 +26,9 @@ class HexDir: return r - def __cursor(self, idx): - return idx * (self.prefix_length + self.key_length) - - - def set_prefix(self, idx, prefix): - l = len(prefix) - if l != self.prefix_length: - raise ValueError('expected prefix length {}, got {}'.format(self.prefix_length, l)) - if not isinstance(prefix, bytes): - raise ValueError('prefix must be bytes, got {}'.format(type(content).__name__)) - cursor = self.__cursor(idx) - f = open(self.master_file, 'rb+') - f.seek(cursor) - f.write(prefix) - f.close() - - - def get(self, idx): - cursor = self.__cursor(idx) - f = open(self.master_file, 'rb') - f.seek(cursor) - prefix = f.read(self.prefix_length) - key = f.read(self.key_length) - f.close() - return (prefix, key) - - def to_subpath(self, hx): - lead = '' - for i in range(0, self.__levels, 2): - lead += hx[i:i+2] + '/' - return lead.upper() - - - def to_dirpath(self, hx): - sub_path = self.to_subpath(hx) - return os.path.join(self.path, sub_path) - - - def to_filepath(self, hx): - dir_path = self.to_dirpath(hx) - file_path = os.path.join(dir_path, hx.upper()) - return file_path - - - def __verify_directory(self): - #if not stat.S_ISDIR(fi.st_mode): - # raise ValueError('{} is not a directory'.format(self.path)) - f = opendir(self.path) - f.close() - return True - - @staticmethod def __prepare_directory(path): os.makedirs(path, exist_ok=True) state_file = os.path.join(path, 'master') f = open(state_file, 'w') f.close() - diff --git a/hexdir/hex.py b/hexdir/hex.py @@ -0,0 +1,118 @@ +# standard imports +import os +import stat +import logging + +# external imports +from hexathon import valid as valid_hex + +# local imports +from .base import LevelDir + +logg = logging.getLogger(__name__) + + +class HexDir(LevelDir): + + def __init__(self, root_path, key_length, levels=2, prefix_length=0): + super(HexDir, self).__init__(root_path, levels, key_length + prefix_length) + #self.path = root_path + self.key_length = key_length + self.prefix_length = prefix_length + self.__levels = levels + 2 + + + def add(self, key, content, prefix=b''): + l = len(key) + if l != self.key_length: + raise ValueError('expected key length {}, got {}'.format(self.key_length, l)) + l = len(prefix) + if l != self.prefix_length: + raise ValueError('expected prefix length {}, got {}'.format(self.prefix_length, l)) + if not isinstance(content, bytes): + raise ValueError('content must be bytes, got {}'.format(type(content).__name__)) + if prefix != None and not isinstance(prefix, bytes): + raise ValueError('prefix must be bytes, got {}'.format(type(content).__name__)) + key_hex = key.hex() + entry_path = self.to_filepath(key_hex) + + c = self.count() + + os.makedirs(os.path.dirname(entry_path), exist_ok=True) + f = open(entry_path, 'wb') + f.write(content) + f.close() + + f = open(self.master_file, 'ab') + if prefix != None: + f.write(prefix) + f.write(key) + f.close() + + logg.info('created new entry {} idx {} in {}'.format(key_hex, c, entry_path)) + + return (c, entry_path) + + + def __cursor(self, idx): + return idx * (self.prefix_length + self.key_length) + + + def set_prefix(self, idx, prefix): + l = len(prefix) + if l != self.prefix_length: + raise ValueError('expected prefix length {}, got {}'.format(self.prefix_length, l)) + if not isinstance(prefix, bytes): + raise ValueError('prefix must be bytes, got {}'.format(type(content).__name__)) + cursor = self.__cursor(idx) + f = open(self.master_file, 'rb+') + f.seek(cursor) + f.write(prefix) + f.close() + + + def get(self, idx): + cursor = self.__cursor(idx) + f = open(self.master_file, 'rb') + f.seek(cursor) + prefix = f.read(self.prefix_length) + key = f.read(self.key_length) + f.close() + return (prefix, key) + + + def to_subpath(self, hx): + lead = '' + for i in range(0, self.__levels, 2): + lead += hx[i:i+2] + '/' + return lead.upper() + + + def to_dirpath(self, hx): + sub_path = self.to_subpath(hx) + return os.path.join(self.path, sub_path) + + + def to_filepath(self, hx): + dir_path = self.to_dirpath(hx) + file_path = os.path.join(dir_path, hx.upper()) + return file_path + + + def __verify_directory(self): + fi = stat(self.path) + if not stat.S_ISDIR(fi.st_mode): + raise ValueError('{} is not a directory'.format(self.path)) + #f = os.listdir(self.path) + #os.listdir(self.path) + #f.close() + return True + + + @staticmethod + def __prepare_directory(path): + os.makedirs(path, exist_ok=True) + state_file = os.path.join(path, 'master') + f = open(state_file, 'w') + f.close() + diff --git a/hexdir/numeric.py b/hexdir/numeric.py @@ -0,0 +1,42 @@ +# standard imports +import math +import os + +# local imports +from .base import LevelDir + + +class NumDir(LevelDir): + + def __init__(self, root_path, thresholds=[1000]): + super(NumDir, self).__init__(root_path, len(thresholds), 8) + fi = os.stat(self.master_file) + self.thresholds = thresholds + self.entry_length = 8 + + + def to_dirpath(self, n): + c = n + x = 0 + d = [] + for t in self.thresholds: + x = math.floor(c / t) + y = x * t + d.append(str(y)) + c -= y + return os.path.join(self.path, *d) + + + def to_filepath(self, n): + path = self.to_dirpath(n) + return os.path.join(path, str(n)) + + + def add(self, n, content, prefix=b''): + path = to_filepath(n) + f = open(path, 'wb') + f.write(content) + + f = open(self.master_file, 'ab') + f.write(n.to_bytes(8, byteorder('big'))) + f.close() diff --git a/setup.py b/setup.py @@ -1,28 +1,3 @@ from setuptools import setup -import configparser -import os - -requirements = [] -f = open('requirements.txt', 'r') -while True: - l = f.readline() - if l == '': - break - requirements.append(l.rstrip()) -f.close() - -test_requirements = [] -f = open('test_requirements.txt', 'r') -while True: - l = f.readline() - if l == '': - break - test_requirements.append(l.rstrip()) -f.close() - - -setup( - install_requires=requirements, - tests_require=test_requirements, - ) +setup() diff --git a/tests/test_hexdir.py b/tests/test_hexdir.py @@ -6,7 +6,7 @@ import logging import os # local imports -from hexdir import HexDir +from hexdir.hex import HexDir logging.basicConfig(level=logging.DEBUG) logg = logging.getLogger() @@ -29,8 +29,10 @@ class HexDirTest(unittest.TestCase): content = b'cdef' prefix = b'ab' label = b'\xde\xad\xbe\xef' - self.hexdir.add(label, content, prefix=prefix) + (c, entry_path) = self.hexdir.add(label, content, prefix=prefix) + file_path = os.path.join(self.dir, 'q', 'DE', 'AD', 'BE', label.hex().upper()) + self.assertEqual(file_path, entry_path) f = open(file_path, 'rb') r = f.read() @@ -54,7 +56,7 @@ class HexDirTest(unittest.TestCase): def test_index(self): self.hexdir.add(b'\xde\xad\xbe\xef', b'foo', b'ab') self.hexdir.add(b'\xbe\xef\xfe\xed', b'bar', b'cd') - c = self.hexdir.add(b'\x01\x02\x03\x04', b'baz', b'ef') + (c, entry_path) = self.hexdir.add(b'\x01\x02\x03\x04', b'baz', b'ef') self.assertEqual(c, 2) diff --git a/tests/test_numdir.py b/tests/test_numdir.py @@ -0,0 +1,43 @@ +# standard imports +import unittest +import tempfile +import shutil +import logging +import os + +# local imports +from hexdir.numeric import NumDir + + +logging.basicConfig(level=logging.DEBUG) +logg = logging.getLogger() + + + +class NumDirTest(unittest.TestCase): + + def setUp(self): + self.dir = tempfile.mkdtemp() + self.numdir = NumDir(os.path.join(self.dir, 'n'), [1000, 100]) + logg.debug('setup numdir root {}'.format(self.dir)) + +# def tearDown(self): +# shutil.rmtree(self.dir) +# logg.debug('cleaned numdir root {}'.format(self.dir)) + + def test_path(self): + path = self.numdir.to_filepath(1337) + path_parts = [] + logg.debug(path) + (path, three) = os.path.split(path) + (path, two) = os.path.split(path) + (path, one) = os.path.split(path) + self.assertEqual(three, '1337') + self.assertEqual(two, '300') + self.assertEqual(one, '1000') + + + + +if __name__ == '__main__': + unittest.main()