commit 4fa1edf1f11457632beb82ee755434d7397f8b60
parent 19209618cf3c52e3a90fc2da0720b90ee3b4c54e
Author: nolash <dev@holbrook.no>
Date: Sun, 28 Nov 2021 10:34:43 +0100
Add docstrings, replace filter section strings with enum
Diffstat:
11 files changed, 521 insertions(+), 68 deletions(-)
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,3 @@
-chainsyncer==0.0.2b1
-#chainlib==0.0.2a15
-chainlib==0.0.3rc3
-moolb==0.1.1b2
+chainsyncer==0.0.7
+chainlib==0.0.12
+moolb==0.2.0
diff --git a/run_tests.sh b/run_tests.sh
@@ -1,3 +1,12 @@
+#!/bin/bash
+
+set -a
+set -e
+set -x
+
+default_pythonpath=$PYTHONPATH:.
+export PYTHONPATH=${default_pythonpath:-.}
+
for f in `ls tests`; do
if [ "test_" == ${f:0:5} ]; then
python tests/$f
@@ -6,3 +15,6 @@ for f in `ls tests`; do
fi
fi
done
+set +x
+set +e
+set +a
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
[metadata]
name = taint
-version = 0.0.1a1
+version = 0.0.2a1
description = Cryptocurrency forensics for private use
author = Louis Holbrook
author_email = dev@holbrook.no
diff --git a/taint/account.py b/taint/account.py
@@ -7,7 +7,23 @@ from .crypto import Salter
class Account(Salter):
-
+ """Represents a single account in the cache.
+
+ An account is a blockchain address associated with one or more tags. It provides methods to compare addresses, view tags, merge tags from two accounts, as well as serializing and deserializing for storage.
+
+ The provided chain_spec will be used to generate the salt to obfuscate the address in the cache.
+
+ :param chain_spec: The chain spec the address is valid for
+ :type chain_spec: chainlib.chain.ChainSpec
+ :param account: The account address
+ :type account: bytes
+ :param label: Human-readable label for account used for logging
+ :type label: str
+ :param tags: Tags to associate account with
+ :type tags: list of bytes
+ :param create_digest: If set to false, account obfuscation will be omitted
+ :type create_digest: boolean
+ """
def __init__(self, chain_spec, account, label=None, tags=[], create_digest=True):
super(Account, self).__init__(chain_spec)
@@ -15,7 +31,8 @@ class Account(Salter):
label = str(account)
self.label = label
self.account_src = None
- if create_digest:
+ self.create_digest = create_digest
+ if self.create_digest:
self.account_src = account
self.account = self.sprinkle(self.account_src)
else:
@@ -26,36 +43,93 @@ class Account(Salter):
def tag(self, value):
+ """Add a tag to the account.
+
+ :param value: Literal tag value
+ :type value: bytes
+ """
self.tags.create(value)
def sum(self):
+ """Get the sum of all the tags for the account.
+
+ :rtype: bytes
+ :returns: Tag sum
+ """
return self.tags.get()
def connect(self, account):
+ """Associate two accounts with each other. After this operation, both accounts will have the same tag sum.
+
+ :param account: Account to merge with
+ :type account: taint.account.Account
+ """
if not isinstance(account, Account):
- raise TypeError('account must be type crypto_account_cache.account.Account')
+ raise TypeError('account must be type taint.account.Account')
self.tags.merge(account.tags)
def is_same(self, account):
+ """Compare two accounts.
+
+ This will not compare the tag state of the accounts.
+
+ :param account: Account to compare
+ :type account: taint.account.Account
+ :rtype: boolean
+ :return: True if the account effectively represents the same underlying blockchain address
+ """
if not isinstance(account, Account):
raise TypeError('account must be type crypto_account_cache.account.Account')
return self.account == account.account
- def is_account(self, account):
- return self.sprinkle(account) == self.account
+# def __eq__(self, account):
+# return self.is_same(account)
+
+
+ def is_account(self, address):
+ """Compare blockchain address to address represented by account object.
+
+ If account obfuscation is being used, the input value has to match the unobfuscated value.
+
+ :param address: Address to compare with
+ :type address: bytes
+ :rtype: boolean
+ :return: True on address match
+ """
+ if self.create_digest:
+ return self.sprinkle(address) == self.account
+ return address == self.account
def serialize(self):
+ """Serialize account object for storage.
+
+ Account serialization consists of serialization of the account's tags, followed by the serialization of the underlying blockchain address.
+
+ :rtype: bytes
+ :return: Serialized data
+ """
b = self.tags.serialize() + self.account
return b
@staticmethod
def from_serialized(b, chain_spec, label=None):
+ """Deserialize account object from storage.
+
+ BUG: deserialization may break if account is not obfuscated, since the address may not end on 32 byte boundary
+
+ :param chain_spec: Chain spec to instantiate account for
+ :type chain_spec: chainlib.chain.ChainSpec
+ :param label: Human-readable label for logging
+ :type label: str
+ :rtype: taint.account.Account
+ :returns: Deserialized account
+ """
l = len(b)
if l % 32 > 0:
raise ValueError('invalid data length; remainder {} of 32'.format(l % 32))
diff --git a/taint/cache.py b/taint/cache.py
@@ -1,6 +1,7 @@
# standard imports
import os
import logging
+import enum
# external imports
from moolb import Bloom
@@ -14,7 +15,19 @@ from .account import Account
logg = logging.getLogger().getChild(__name__)
+class CacheAccountEnum(enum.Enum):
+ SUBJECT = 'subject'
+ OBJECT = 'object'
+
+
+class CacheStateEnum(enum.Enum):
+ CACHE = 'cache'
+ EXTRA = 'extra'
+
+
def to_index(block_height, tx_index=None):
+ """Create a cache store serialized index from block height and transaction index
+ """
b = block_height.to_bytes(12, 'big')
if tx_index != None:
b += tx_index.to_bytes(4, 'big')
@@ -22,49 +35,97 @@ def to_index(block_height, tx_index=None):
def from_index(b):
+ """Load ablock height and transaction index from a cache store serialized index
+ """
block_height = int.from_bytes(b[:12], 'big')
tx_index = int.from_bytes(b[12:], 'big')
return (block_height, tx_index)
class CacheBloom:
+ """Bloom filter for a cache state.
+
+ The filter has four parts, all identified by the values of the taint.cache.CacheAccountEnum and taint.cache.CacheStateEnum classes:
+
+ - subject: All subject account addresses being tracked
+ - object: All object account addresses being tracked
+ - cache: All block/tx indexes involving a subject address
+ - extra: All block/tx indexes involving an object address
+
+ Filter values are calculated using sha256 (the default of the underlying "moolb" python module)
+
+ :param bits_size: Bit size of bloom filter
+ :type bits_size: int
+ """
rounds = 3
+ """Number of hashing rounds used to calculate a single cache entry"""
def __init__(self, bits_size):
self.bits_size = bits_size
- self.filter = {
- 'subject': None,
- 'object': None,
- 'cache': None,
- 'extra': None,
- }
+ self.filter = {}
+ for v in CacheAccountEnum:
+ self.filter[v.value] = None
+ for v in CacheStateEnum:
+ self.filter[v.value] = None
def reset(self):
- self.filter['subject'] = Bloom(self.bits_size, CacheBloom.rounds)
- self.filter['object'] = Bloom(self.bits_size, CacheBloom.rounds)
- self.filter['cache'] = Bloom(self.bits_size, CacheBloom.rounds)
- self.filter['extra'] = Bloom(self.bits_size, CacheBloom.rounds)
+ """Empties all filters.
+ """
+ for v in CacheAccountEnum:
+ self.filter[v.value] = Bloom(self.bits_size, CacheBloom.rounds)
+ for v in CacheStateEnum:
+ self.filter[v.value] = Bloom(self.bits_size, CacheBloom.rounds)
def add_raw(self, v, label):
- self.filter[label].add(v)
+ """Add a raw byte value to the bloom filter part with the corresponding label.
+
+ :param v: Value to add
+ :type v: bytes
+ :param label: Filter section to add value to
+ :type label: CacheAccountEnum or CacheStateEnum
+ """
+ self.filter[label.value].add(v)
def serialize(self):
- if self.filter['subject'] == None:
+ """Serialize cache bloom filter state for storage.
+
+ The serialized format of the filter is simply all filter contents concatenated in the following order:
+
+ 1. subject
+ 2. object
+ 3. cache
+ 4. extra
+
+ :rtype: bytes
+ :returns: Serialized cache state
+ """
+ if self.filter[CacheAccountEnum.SUBJECT.value] == None:
logg.warning('serialize called on uninitialized cache bloom')
return b''
- b = self.filter['subject'].to_bytes()
- b += self.filter['object'].to_bytes()
- b += self.filter['cache'].to_bytes()
- b += self.filter['extra'].to_bytes()
+ b = b''
+ for v in CacheAccountEnum:
+ b += self.filter[v.value].to_bytes()
+ for v in CacheStateEnum:
+ b += self.filter[v.value].to_bytes()
+
return b
def deserialize(self, b):
+ """Deserialize a stored cache bloom filter state into instantiated BloomCache object.
+
+ Any existing bloom filter state in the object will be overwritten.
+
+ Client code should use static method taint.cache.BloomCache.from_serialized() instead.
+
+ :param b: Serialized bloom filter state
+ :type b: bytes
+ """
byte_size = int(self.bits_size / 8)
length_expect = byte_size * 4
length_data = len(b)
@@ -72,20 +133,25 @@ class CacheBloom:
raise ValueError('data size mismatch; expected {}, got {}'.format(length_expect, length_data))
cursor = 0
- self.filter['subject'] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size])
+ for v in CacheAccountEnum:
+ self.filter[v.value] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size])
+ cursor += byte_size
- cursor += byte_size
- self.filter['object'] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size])
-
- cursor += byte_size
- self.filter['cache'] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size])
-
- cursor += byte_size
- self.filter['extra'] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size])
+ for v in CacheStateEnum:
+ self.filter[v.value] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size])
+ cursor += byte_size
@staticmethod
def from_serialized(b):
+ """Convenience function to deserialize a stored cache bloom filter state.
+
+ :param b: Serialized bloom filter state
+ :type b: bytes
+ :raises ValueError: If data does not pass integrity check
+ :rtype: taint.cache.BloomCache
+ :returns: Instantiated bloom cache objectcache object
+ """
if len(b) % 4 > 0:
raise ValueError('invalid data length, remainder {} of 4'.format(len(b) % 32))
@@ -96,23 +162,56 @@ class CacheBloom:
def have(self, data, label):
- return self.filter[label].check(data)
+ """Check if value generates a match in bloom filter
+
+ :param data: Data to match
+ :type data: byts
+ :param label: Bloom cache section to match
+ :type label: CacheAccountEnum or CacheStateEnum
+ """
+ return self.filter[label.value].check(data)
def have_index(self, block_height, tx_index=None):
+ """Check if block number or block/tx index exists in bloom cache.
+
+ This will match against any of the 'cache' and 'extra' sections.
+
+ :param block_height: Block height to match
+ :type block_height: int
+ :param tx_index: Transaction index to match (optional)
+ :type tx_index: int
+ :rtype: boolean
+ :return: True if bloom filter match in one of the sections
+ """
b = to_index(block_height, tx_index)
- if self.have(b, 'cache'):
+ if self.have(b, CacheStateEnum.CACHE):
return True
- return self.have(b, 'extra')
+ return self.have(b, CacheStateEnum.EXTRA)
def register(self, accounts, block_height, tx_index=None):
+ """Add a match for block number or block/tx index for the specified accounts.
+
+ If none of the given accounts exist in the tracked account filter, no change will be made to state.
+
+ BUG: False positive accounts matches are not discarded.
+
+ :param accounts: List of blockchain addresses to match
+ :type accounts: list of bytes
+ :param block_height: Block height to register
+ :type block_height: int
+ :param tx_index: Transaction index to register
+ :type tx_index: int
+ :rtype: boolean
+ :return: False if no match in accounts was found.
+ """
subject_match = False
object_match = False
for account in accounts:
- if self.have(account, 'subject'):
+ if self.have(account, CacheAccountEnum.SUBJECT):
subject_match = True
- elif self.have(account, 'object'):
+ elif self.have(account, CacheAccountEnum.OBJECT):
object_match = True
if not subject_match and not object_match:
@@ -120,15 +219,25 @@ class CacheBloom:
b = to_index(block_height, tx_index)
if subject_match:
- self.add_raw(b, 'cache')
+ self.add_raw(b, CacheStateEnum.CACHE)
if object_match:
- self.add_raw(b, 'extra')
+ self.add_raw(b, CacheStateEnum.EXTRA)
return True
class Cache(Salter):
+ """Core session engine for caching and associating block transactions with accounts.
+ If cache_bloom is omitted, a new CacheBloom object will be instantiated as backend, using the provided bits_size.
+
+ :param chain_spec: Chain spec to use cache for.
+ :type chain_spec: chainlib.chain.ChainSpec
+ :param bits_size: Bit size of underlying bloomfilter
+ :type bits_size: int
+ :param cache_bloom: Cache bloom state to initialize cache session with
+ :type cache_bloom: taint.cache.CacheBloom
+ """
def __init__(self, chain_spec, bits_size, cache_bloom=None):
super(Cache, self).__init__(chain_spec)
self.bits_size = bits_size
@@ -149,6 +258,12 @@ class Cache(Salter):
def serialize(self):
+ """Serialize the underlying bloom cache state together with the block range of registered matches.
+
+ :raises AttributeError: If no content has yet been cached
+ :rtype: bytes
+ :return: Serialized cache state
+ """
if self.first_block_height < 0:
raise AttributeError('no content to serialize')
@@ -160,6 +275,15 @@ class Cache(Salter):
@classmethod
def from_serialized(cls, chain_spec, b):
+ """Instantiate a new Cache object from a previously serialized state.
+
+ :param chain_spec: Chain spec to instantiate the Cache object for
+ :type chain_spec: chainlib.chain.ChainSpec
+ :param b: Serialized data
+ :type b: bytes
+ :rtype: taint.cache.Cache
+ :return: Instantiated cache object
+ """
cursor = len(b)-32
bloom = CacheBloom.from_serialized(b[:cursor])
c = cls(chain_spec, bloom.bits_size, cache_bloom=bloom)
@@ -172,14 +296,23 @@ class Cache(Salter):
def divide(self, accounts):
+ """Divides the given accounts into subjects and objects depending on their match in the bloom cache state backend.
+
+ Accounts that do not generate matches will be omitted.
+
+ :param accounts: List of blockchain addresses to process
+ :type account: List of bytes
+ :rtype: tuple of lists of bytes
+ :return: list of subjects and list of objects, in that order
+ """
subjects = []
objects = []
for account in accounts:
- if self.cache_bloom.have(account, 'subject'):
+ if self.cache_bloom.have(account, CacheAccountEnum.SUBJECT):
subject = self.subjects[account]
subjects.append(subject)
- elif self.cache_bloom.have(account, 'object'):
+ elif self.cache_bloom.have(account, CacheAccountEnum.OBJECT):
objct = self.objects[account]
objects.append(objct)
@@ -187,26 +320,71 @@ class Cache(Salter):
def add_account(self, account, label):
+ """Add a new account to the bloom cache state, in the corresponding section
+
+ Client code should use taint.cache.Cache.add_subject() or taint.cache.Cache.add_object() instead.
+
+ :param account: account to add
+ :type account: taint.account.Account
+ :param label: bloom cache section
+ :type label: taint.cache.CacheAccountEnum
+ """
self.cache_bloom.add_raw(account.account, label)
def add_subject(self, account):
+ """Convenience function to add an account as a subject.
+
+ :param account: account to add
+ :type account: taint.account.Account
+ :raises TypeError: If account is not right type
+ """
if not isinstance(account, Account):
- raise TypeError('subject must be type crypto_account_cache.account.Account')
- self.add_account(account, 'subject')
+ raise TypeError('subject must be type taint.account.Account')
+ self.add_account(account, CacheAccountEnum.SUBJECT)
logg.debug('added subject {}'.format(account))
self.subjects[account.account] = account
def add_object(self, account):
+ """Convenience function to add an account as a object.
+
+ :param account: account to add
+ :type account: taint.account.Account
+ :raises TypeError: If account is not right type
+ """
+
if not isinstance(account, Account):
- raise TypeError('subject must be type crypto_account_cache.account.Account')
- self.add_account(account, 'object')
+ raise TypeError('subject must be type taint.account.Account')
+ self.add_account(account, CacheAccountEnum.OBJECT)
logg.debug('added object {}'.format(account))
self.objects[account.account] = account
def add_tx(self, sender, recipient, block_height, tx_index, block_hash=None, tx_hash=None, relays=[]):
+ """Add a transaction to the bloom cache state.
+
+ If a subject address is matched, tags will be merged for all subjects involved in the transaction.
+
+ If an object address is matched, tags will be merged for all subjects and the object involved in the transaction.
+
+ :param sender: Blockchain addresses providing output for the transaction
+ :type sender: list of bytes
+ :param recipient: Blockchain addresses providing input for the transaction
+ :type recipient: list of bytes
+ :param block_height: Block height of transaction
+ :type block_height: int
+ :param tx_index: Transaction index in block
+ :type tx_index: int
+ :param block_hash: Block hash (used for debugging/log output only)
+ :type block_hash: str
+ :param tx_hash: Transaction hash (used for debugging/log output only)
+ :type tx_hash: str
+ :param relays: Additional blockchain addresses to generate match for
+ :type relays: list of bytes
+ :rtype: tuple of lists of bytes
+ :return: Matched subjects and objects, or None of no matching account was found
+ """
accounts = [sender, recipient] + relays
self.cache_bloom.register(accounts, block_height)
match = self.cache_bloom.register(accounts, block_height, tx_index)
@@ -220,7 +398,7 @@ class Cache(Salter):
self.last_block_height = block_height
self.last_tx_index = tx_index
- logg.info('match in {}:{} {}'.format(block_height, tx_index, tx_hash))
+ logg.info('match in {}:{} {}:{}'.format(block_height, tx_index, block_hash, tx_hash))
# TODO: watch out, this currently scales geometrically
(subjects, objects) = self.divide(accounts)
@@ -237,11 +415,40 @@ class Cache(Salter):
def have(self, block_height, tx_index=None):
+ """Check if block number or block/tx index exists in bloom cache state
+
+ :param block_height: Block height to match
+ :type block_height: int
+ :param tx_index: Transaction index to match
+ :type tx_index: int
+ :rtype: boolean
+ :return: True on match
+ """
return self.cache_bloom.have_index(block_height, tx_index)
class CacheSyncBackend(MemBackend):
-
+ """Volatile chainsyncer backend generating matches for all block/tx matched in the bloom cache state.
+
+ Can be used to replay the syncing session for only the block/tx indices known to be of interest.
+
+ TODO: Add a tx_index max value hint on stored blocks to eliminate the need for the scan_limit, which can cause transactions to be missed, aswell as reduce resource usage.
+
+ :param cache: Cache object
+ :type cache taint.cache.Cache
+ :param chain_spec: Chain spec to run the syncer session for
+ :type chain_spec: chainlib.chain.ChainSpec
+ :param object_id: chainsyncer backend object id
+ :type object_id: str
+ :param start_block: Block offset to start syncing at, inclusive
+ :type start_block: int
+ :param target_block: Block to stop syncing at, exclusive
+ :type target_block: int
+ :param tick_callback: Callback called for every processed transaction
+ :type tick_callback: function receiving block_height and tx_index
+ :param tx_scan_limit: Maximum transaction index in a block to scan for
+ :type tx_scan_limit: int
+ """
def __init__(self, cache, chain_spec, object_id, start_block=0, target_block=0, tick_callback=None, tx_scan_limit=500):
if target_block <= start_block:
raise ValueError('target block number must be higher than start block number')
@@ -253,6 +460,13 @@ class CacheSyncBackend(MemBackend):
def get(self):
+ """Advance to the next matched block/tx index in the bloom cache state, and return as a block index result for the chainsyncer sync driver.
+
+ Transaction execution filters for the syncer are not implemented, so the returned filter state will always be 0.
+
+ :rtype: tuple
+ :return: tuple of block_height and tx_index, and a static 0 as filter value
+ """
while self.block_height < self.target_block + 1:
if self.cache.have(self.block_height):
if self.tx_height < self.tx_scan_limit:
diff --git a/taint/crypto.py b/taint/crypto.py
@@ -2,8 +2,15 @@
import hashlib
import os
+
class Salter:
+ """Base class to provide cryptographic salt for cache objects that should be obfuscated.
+
+ By default a random base value will be generated. The salt will be deterministically determined from the value and the provided chain spec.
+ :param chain_spec: Chain spec to generate the salt with.
+ :type chain_spec: chainlib.chain.ChainSpec
+ """
salt = os.urandom(32)
def __init__(self, chain_spec):
@@ -13,6 +20,14 @@ class Salter:
def sprinkle(self, data):
+ """Hash the given data with the salt
+
+ :param data: Input data
+ :type data: bytes
+ :rtype: bytes
+ :returns: Hashed, salted value
+
+ """
h = hashlib.new('sha256')
if isinstance(data, list):
for d in data:
@@ -24,4 +39,9 @@ class Salter:
def root_key(self):
+ """Returns the salt value generated from the chain spec.
+
+ :rtype: bytes
+ :returns: Salt
+ """
return self.ionized_salt
diff --git a/taint/store/base.py b/taint/store/base.py
@@ -8,3 +8,27 @@ def to_key(k):
else:
k = even(k)
return k
+
+
+class BaseStore:
+
+ def put(self, k, v):
+ """Store value v under key k
+
+ :param k: Key
+ :type k: bytes
+ :param v: Value
+ :type v: bytes
+ """
+ raise NotImplementedError
+
+
+ def get(self, k):
+ """Return value stored under key k
+
+ :param k: Key
+ :type k: bytes
+ :rtype: bytes
+ :return: Value
+ """
+ raise NotImplementedError
diff --git a/taint/store/file.py b/taint/store/file.py
@@ -1,14 +1,14 @@
# standard imports
import os
-import logging
# local imports
-from .base import to_key
+from .base import (
+ to_key,
+ BaseStore,
+ )
-logg = logging.getLogger().getChild(__name__)
-
-class FileStore:
+class FileStore(BaseStore):
def __init__(self, base_dir):
os.makedirs(base_dir, exist_ok=True)
diff --git a/taint/tag.py b/taint/tag.py
@@ -25,7 +25,10 @@ class TagPool:
class Tag:
+ """Represents a collection of tags for a cached object.
+ When a new tag is added, the tag collection is deterministically ordered and summed.
+ """
def __init__(self):
self.tags = []
self.tag_values = {}
@@ -34,6 +37,11 @@ class Tag:
def get(self):
+ """The current deterministic sum of the tags.
+
+ :rtype: bytes
+ :return: Tag digest sum
+ """
if self.dirty:
self.tags.sort()
h = hashlib.new('sha256')
@@ -44,6 +52,17 @@ class Tag:
def add(self, tag, value=None):
+ """Add a tag to the collection.
+
+ Client code should call Tag.create() instead.
+
+ :param tag: Tag value digest
+ :type tag: bytes
+ :param value: Tag value
+ :type value: bytes
+ :rtype: boolean
+ :returns: False if tag digest already exists in object
+ """
if tag in self.tags:
return False
self.tags.append(tag)
@@ -53,6 +72,13 @@ class Tag:
def create(self, value):
+ """Create a new tag record to add to the collection.
+
+ :param value: Tag value
+ :type value: bytes
+ :rtype: bytes
+ :return: Digest of newly added tag
+ """
h = hashlib.new('sha256')
h.update(value)
tag = h.digest()
@@ -61,6 +87,12 @@ class Tag:
def merge(self, tags):
+ """Merge contents of two tag objects. After this operation the sum of each of the tag objects will be identical.
+
+ :param tags: Tag collection to merge with
+ :type tags: taint.tag.Tag
+ :raises TypeError: If argument is not a taint.tag.Tag instance
+ """
if not isinstance(tags, Tag):
raise TypeError('tags must be type taint.tag.Tag')
for tag in tags.tags:
@@ -73,13 +105,28 @@ class Tag:
def serialize(self):
+ """Serialize tags for storage.
+
+ Serialized tags are deterministically ordered.
+
+ :rtype: bytes
+ :returns: Serialized tags
+ """
b = self.get()
for tag in self.tags:
b += tag
return b
- def deserialize(self, b):
+ def deserialize(self, b, skip_check=False):
+ """Deserialize tags into currently instantiated object.
+
+ Deserialization will ADD tags to the current object. If different tags already exist in the object, the resulting collection will not be identical to the serialized data.
+
+ :param b: Serialized tag data
+ :type b: bytes
+ :raises ValueError: If skip_check is not set, and serialized data does not match tag object sum
+ """
if len(b) % 32 > 0:
raise ValueError('invalid data length; remainder {} from 32'.format(len(b) % 32))
cursor = 32
@@ -90,9 +137,10 @@ class Tag:
logg.debug('deserialize add {}'.format(tag))
self.add(tag)
- zz = self.get()
- if z != zz:
- raise ValueError('data sum does not match content; expected {}, found {}'.format(zz.hex(), z.hex()))
+ if not skip_check:
+ zz = self.get()
+ if z != zz:
+ raise ValueError('data sum does not match content; expected {}, found {}'.format(zz.hex(), z.hex()))
def __str__(self):
diff --git a/taint/taint.py b/taint/taint.py
@@ -13,7 +13,19 @@ logg = logging.getLogger().getChild(__name__)
class Tainter(Cache):
-
+ """Frontend object containing code to load and save state of a cache, aswell as chain sync handling.
+
+ :param chain_spec: Chain spec context for the cache
+ :type chain_spec: chainlib.chain.ChainSpec
+ :param bits_size: Bitsize of bloom filter used for cache
+ :type bits_size: int
+ :param result_handler: Callback called once for each registered account found in a transaction.
+ :type result_handler: function
+ :param store: State storage for cache
+ :type store: taint.store.base.BaseStore
+ :param cache_bloom: Cache bloom filter to instantiate
+ :type cache_bloom: taint.cache.CacheBloom
+ """
def __init__(self, chain_spec, bits_size, result_handler=None, store=None, cache_bloom=None):
super(Tainter, self).__init__(chain_spec, bits_size, cache_bloom=cache_bloom)
self.store = store
@@ -21,12 +33,34 @@ class Tainter(Cache):
def add_account(self, account, label):
+ """Add account to be tracked in cache.
+
+ If registered, the result handler will be called with the initial state of the added account.
+
+ The label will only be stored in memory for the given session, and will not be part of state storage.
+
+ :param account: Account to add
+ :type account: taint.account.Account
+ :param label: Filter section to add account to
+ :type label: taint.cache.CacheAccountEnum
+ """
super(Tainter, self).add_account(account, label)
if self.result_handler != None:
self.result_handler.register(account)
def filter(self, conn, block, tx, storer):
+ """Transaction callback for chainsyncer.
+
+ :param conn: RPC connection object
+ :type conn: chainlib.connection.RPCConnection
+ :param block: Block object
+ :type block: chainlib.block.Block
+ :param tx: Transaction object
+ :type tx: chainlib.tx.Tx
+ :param storer: State storage object (e.g. a sql database session)
+ :type storer: any
+ """
for output in tx.outputs:
for inpt in tx.inputs:
sender = bytes.fromhex(strip_0x(output))
@@ -59,12 +93,20 @@ class Tainter(Cache):
def save(self):
+ """Save state of all accounts and the salt used for the session to the cache store.
+ """
for account in self.subjects.values():
self.store.put(account.account, account.serialize())
self.store.put(self.root_key(), self.serialize())
def load_account(self, k, label=None):
+ """Load state for an accounts from a cache store.
+
+ :param k: Account to load, by obfuscated value.
+ :type k: bytes
+ :param label: Label to associate with account, for display use.
+ """
try:
b = self.store.get(k)
except FileNotFoundError:
@@ -73,6 +115,14 @@ class Tainter(Cache):
def load_subject(self, k, label=None):
+ """Load state for an account as subject from the cache store.
+
+ A subject will always merge tags with any other subject or object in the same transaction.
+
+ :param k: Account to load, by obfuscated value.
+ :type k: bytes
+ :param label: Label to associate with account, for display use.
+ """
a = self.load_account(k, label)
if a == None:
return False
@@ -81,6 +131,14 @@ class Tainter(Cache):
def load_object(self, k, label=None):
+ """Load state for an account as object from the cache store.
+
+ An object will only merge tags with other subjects in the same transaction.
+
+ :param k: Account to load, by obfuscated value.
+ :type k: bytes
+ :param label: Label to associate with account, for display use.
+ """
a = self.load_account(k, label)
if a == None:
return False
@@ -90,6 +148,8 @@ class Tainter(Cache):
@staticmethod
def load(store, chain_spec, result_handler=None):
+ """Instantiate new Tainter object with salt stored from previous session.
+ """
a = Salter(chain_spec)
b = store.get(a.root_key())
c = Tainter.from_serialized(chain_spec, b)
diff --git a/tests/test_bloom.py b/tests/test_bloom.py
@@ -7,6 +7,8 @@ import copy
from taint.cache import (
CacheBloom,
to_index,
+ CacheAccountEnum,
+ CacheStateEnum,
)
@@ -19,24 +21,24 @@ class TestBloom(unittest.TestCase):
self.alice = os.urandom(20)
self.bob = os.urandom(20)
- self.bloom.add_raw(self.alice, 'subject')
- self.bloom.add_raw(self.bob, 'object')
+ self.bloom.add_raw(self.alice, CacheAccountEnum.SUBJECT)
+ self.bloom.add_raw(self.bob, CacheAccountEnum.OBJECT)
def reset_with_accounts(self):
self.bloom.reset()
- self.bloom.add_raw(self.alice, 'subject')
- self.bloom.add_raw(self.bob, 'object')
+ self.bloom.add_raw(self.alice, CacheAccountEnum.SUBJECT)
+ self.bloom.add_raw(self.bob, CacheAccountEnum.OBJECT)
def test_bloom(self):
orig_serial = self.bloom.serialize()
- self.bloom.add_raw(b'\x01', 'subject')
- self.bloom.add_raw(b'\x01', 'object')
- self.bloom.add_raw(b'\x01', 'cache')
- self.bloom.add_raw(b'\x01', 'extra')
+ self.bloom.add_raw(b'\x01', CacheAccountEnum.SUBJECT)
+ self.bloom.add_raw(b'\x01', CacheAccountEnum.OBJECT)
+ self.bloom.add_raw(b'\x01', CacheStateEnum.CACHE)
+ self.bloom.add_raw(b'\x01', CacheStateEnum.EXTRA)
b = self.bloom.serialize()
byte_size = int(1024 / 8)