taint

Crypto forensics for private use
git clone git://git.defalsify.org/taint.git
Log | Files | Refs | LICENSE

commit 4fa1edf1f11457632beb82ee755434d7397f8b60
parent 19209618cf3c52e3a90fc2da0720b90ee3b4c54e
Author: nolash <dev@holbrook.no>
Date:   Sun, 28 Nov 2021 10:34:43 +0100

Add docstrings, replace filter section strings with enum

Diffstat:
Mrequirements.txt | 7+++----
Mrun_tests.sh | 12++++++++++++
Msetup.cfg | 2+-
Mtaint/account.py | 84++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Mtaint/cache.py | 294++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
Mtaint/crypto.py | 20++++++++++++++++++++
Mtaint/store/base.py | 24++++++++++++++++++++++++
Mtaint/store/file.py | 10+++++-----
Mtaint/tag.py | 56++++++++++++++++++++++++++++++++++++++++++++++++++++----
Mtaint/taint.py | 62+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mtests/test_bloom.py | 18++++++++++--------
11 files changed, 521 insertions(+), 68 deletions(-)

diff --git a/requirements.txt b/requirements.txt @@ -1,4 +1,3 @@ -chainsyncer==0.0.2b1 -#chainlib==0.0.2a15 -chainlib==0.0.3rc3 -moolb==0.1.1b2 +chainsyncer==0.0.7 +chainlib==0.0.12 +moolb==0.2.0 diff --git a/run_tests.sh b/run_tests.sh @@ -1,3 +1,12 @@ +#!/bin/bash + +set -a +set -e +set -x + +default_pythonpath=$PYTHONPATH:. +export PYTHONPATH=${default_pythonpath:-.} + for f in `ls tests`; do if [ "test_" == ${f:0:5} ]; then python tests/$f @@ -6,3 +15,6 @@ for f in `ls tests`; do fi fi done +set +x +set +e +set +a diff --git a/setup.cfg b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = taint -version = 0.0.1a1 +version = 0.0.2a1 description = Cryptocurrency forensics for private use author = Louis Holbrook author_email = dev@holbrook.no diff --git a/taint/account.py b/taint/account.py @@ -7,7 +7,23 @@ from .crypto import Salter class Account(Salter): - + """Represents a single account in the cache. + + An account is a blockchain address associated with one or more tags. It provides methods to compare addresses, view tags, merge tags from two accounts, as well as serializing and deserializing for storage. + + The provided chain_spec will be used to generate the salt to obfuscate the address in the cache. + + :param chain_spec: The chain spec the address is valid for + :type chain_spec: chainlib.chain.ChainSpec + :param account: The account address + :type account: bytes + :param label: Human-readable label for account used for logging + :type label: str + :param tags: Tags to associate account with + :type tags: list of bytes + :param create_digest: If set to false, account obfuscation will be omitted + :type create_digest: boolean + """ def __init__(self, chain_spec, account, label=None, tags=[], create_digest=True): super(Account, self).__init__(chain_spec) @@ -15,7 +31,8 @@ class Account(Salter): label = str(account) self.label = label self.account_src = None - if create_digest: + self.create_digest = create_digest + if self.create_digest: self.account_src = account self.account = self.sprinkle(self.account_src) else: @@ -26,36 +43,93 @@ class Account(Salter): def tag(self, value): + """Add a tag to the account. + + :param value: Literal tag value + :type value: bytes + """ self.tags.create(value) def sum(self): + """Get the sum of all the tags for the account. + + :rtype: bytes + :returns: Tag sum + """ return self.tags.get() def connect(self, account): + """Associate two accounts with each other. After this operation, both accounts will have the same tag sum. + + :param account: Account to merge with + :type account: taint.account.Account + """ if not isinstance(account, Account): - raise TypeError('account must be type crypto_account_cache.account.Account') + raise TypeError('account must be type taint.account.Account') self.tags.merge(account.tags) def is_same(self, account): + """Compare two accounts. + + This will not compare the tag state of the accounts. + + :param account: Account to compare + :type account: taint.account.Account + :rtype: boolean + :return: True if the account effectively represents the same underlying blockchain address + """ if not isinstance(account, Account): raise TypeError('account must be type crypto_account_cache.account.Account') return self.account == account.account - def is_account(self, account): - return self.sprinkle(account) == self.account +# def __eq__(self, account): +# return self.is_same(account) + + + def is_account(self, address): + """Compare blockchain address to address represented by account object. + + If account obfuscation is being used, the input value has to match the unobfuscated value. + + :param address: Address to compare with + :type address: bytes + :rtype: boolean + :return: True on address match + """ + if self.create_digest: + return self.sprinkle(address) == self.account + return address == self.account def serialize(self): + """Serialize account object for storage. + + Account serialization consists of serialization of the account's tags, followed by the serialization of the underlying blockchain address. + + :rtype: bytes + :return: Serialized data + """ b = self.tags.serialize() + self.account return b @staticmethod def from_serialized(b, chain_spec, label=None): + """Deserialize account object from storage. + + BUG: deserialization may break if account is not obfuscated, since the address may not end on 32 byte boundary + + :param chain_spec: Chain spec to instantiate account for + :type chain_spec: chainlib.chain.ChainSpec + :param label: Human-readable label for logging + :type label: str + :rtype: taint.account.Account + :returns: Deserialized account + """ l = len(b) if l % 32 > 0: raise ValueError('invalid data length; remainder {} of 32'.format(l % 32)) diff --git a/taint/cache.py b/taint/cache.py @@ -1,6 +1,7 @@ # standard imports import os import logging +import enum # external imports from moolb import Bloom @@ -14,7 +15,19 @@ from .account import Account logg = logging.getLogger().getChild(__name__) +class CacheAccountEnum(enum.Enum): + SUBJECT = 'subject' + OBJECT = 'object' + + +class CacheStateEnum(enum.Enum): + CACHE = 'cache' + EXTRA = 'extra' + + def to_index(block_height, tx_index=None): + """Create a cache store serialized index from block height and transaction index + """ b = block_height.to_bytes(12, 'big') if tx_index != None: b += tx_index.to_bytes(4, 'big') @@ -22,49 +35,97 @@ def to_index(block_height, tx_index=None): def from_index(b): + """Load ablock height and transaction index from a cache store serialized index + """ block_height = int.from_bytes(b[:12], 'big') tx_index = int.from_bytes(b[12:], 'big') return (block_height, tx_index) class CacheBloom: + """Bloom filter for a cache state. + + The filter has four parts, all identified by the values of the taint.cache.CacheAccountEnum and taint.cache.CacheStateEnum classes: + + - subject: All subject account addresses being tracked + - object: All object account addresses being tracked + - cache: All block/tx indexes involving a subject address + - extra: All block/tx indexes involving an object address + + Filter values are calculated using sha256 (the default of the underlying "moolb" python module) + + :param bits_size: Bit size of bloom filter + :type bits_size: int + """ rounds = 3 + """Number of hashing rounds used to calculate a single cache entry""" def __init__(self, bits_size): self.bits_size = bits_size - self.filter = { - 'subject': None, - 'object': None, - 'cache': None, - 'extra': None, - } + self.filter = {} + for v in CacheAccountEnum: + self.filter[v.value] = None + for v in CacheStateEnum: + self.filter[v.value] = None def reset(self): - self.filter['subject'] = Bloom(self.bits_size, CacheBloom.rounds) - self.filter['object'] = Bloom(self.bits_size, CacheBloom.rounds) - self.filter['cache'] = Bloom(self.bits_size, CacheBloom.rounds) - self.filter['extra'] = Bloom(self.bits_size, CacheBloom.rounds) + """Empties all filters. + """ + for v in CacheAccountEnum: + self.filter[v.value] = Bloom(self.bits_size, CacheBloom.rounds) + for v in CacheStateEnum: + self.filter[v.value] = Bloom(self.bits_size, CacheBloom.rounds) def add_raw(self, v, label): - self.filter[label].add(v) + """Add a raw byte value to the bloom filter part with the corresponding label. + + :param v: Value to add + :type v: bytes + :param label: Filter section to add value to + :type label: CacheAccountEnum or CacheStateEnum + """ + self.filter[label.value].add(v) def serialize(self): - if self.filter['subject'] == None: + """Serialize cache bloom filter state for storage. + + The serialized format of the filter is simply all filter contents concatenated in the following order: + + 1. subject + 2. object + 3. cache + 4. extra + + :rtype: bytes + :returns: Serialized cache state + """ + if self.filter[CacheAccountEnum.SUBJECT.value] == None: logg.warning('serialize called on uninitialized cache bloom') return b'' - b = self.filter['subject'].to_bytes() - b += self.filter['object'].to_bytes() - b += self.filter['cache'].to_bytes() - b += self.filter['extra'].to_bytes() + b = b'' + for v in CacheAccountEnum: + b += self.filter[v.value].to_bytes() + for v in CacheStateEnum: + b += self.filter[v.value].to_bytes() + return b def deserialize(self, b): + """Deserialize a stored cache bloom filter state into instantiated BloomCache object. + + Any existing bloom filter state in the object will be overwritten. + + Client code should use static method taint.cache.BloomCache.from_serialized() instead. + + :param b: Serialized bloom filter state + :type b: bytes + """ byte_size = int(self.bits_size / 8) length_expect = byte_size * 4 length_data = len(b) @@ -72,20 +133,25 @@ class CacheBloom: raise ValueError('data size mismatch; expected {}, got {}'.format(length_expect, length_data)) cursor = 0 - self.filter['subject'] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size]) + for v in CacheAccountEnum: + self.filter[v.value] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size]) + cursor += byte_size - cursor += byte_size - self.filter['object'] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size]) - - cursor += byte_size - self.filter['cache'] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size]) - - cursor += byte_size - self.filter['extra'] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size]) + for v in CacheStateEnum: + self.filter[v.value] = Bloom(self.bits_size, CacheBloom.rounds, default_data=b[cursor:cursor+byte_size]) + cursor += byte_size @staticmethod def from_serialized(b): + """Convenience function to deserialize a stored cache bloom filter state. + + :param b: Serialized bloom filter state + :type b: bytes + :raises ValueError: If data does not pass integrity check + :rtype: taint.cache.BloomCache + :returns: Instantiated bloom cache objectcache object + """ if len(b) % 4 > 0: raise ValueError('invalid data length, remainder {} of 4'.format(len(b) % 32)) @@ -96,23 +162,56 @@ class CacheBloom: def have(self, data, label): - return self.filter[label].check(data) + """Check if value generates a match in bloom filter + + :param data: Data to match + :type data: byts + :param label: Bloom cache section to match + :type label: CacheAccountEnum or CacheStateEnum + """ + return self.filter[label.value].check(data) def have_index(self, block_height, tx_index=None): + """Check if block number or block/tx index exists in bloom cache. + + This will match against any of the 'cache' and 'extra' sections. + + :param block_height: Block height to match + :type block_height: int + :param tx_index: Transaction index to match (optional) + :type tx_index: int + :rtype: boolean + :return: True if bloom filter match in one of the sections + """ b = to_index(block_height, tx_index) - if self.have(b, 'cache'): + if self.have(b, CacheStateEnum.CACHE): return True - return self.have(b, 'extra') + return self.have(b, CacheStateEnum.EXTRA) def register(self, accounts, block_height, tx_index=None): + """Add a match for block number or block/tx index for the specified accounts. + + If none of the given accounts exist in the tracked account filter, no change will be made to state. + + BUG: False positive accounts matches are not discarded. + + :param accounts: List of blockchain addresses to match + :type accounts: list of bytes + :param block_height: Block height to register + :type block_height: int + :param tx_index: Transaction index to register + :type tx_index: int + :rtype: boolean + :return: False if no match in accounts was found. + """ subject_match = False object_match = False for account in accounts: - if self.have(account, 'subject'): + if self.have(account, CacheAccountEnum.SUBJECT): subject_match = True - elif self.have(account, 'object'): + elif self.have(account, CacheAccountEnum.OBJECT): object_match = True if not subject_match and not object_match: @@ -120,15 +219,25 @@ class CacheBloom: b = to_index(block_height, tx_index) if subject_match: - self.add_raw(b, 'cache') + self.add_raw(b, CacheStateEnum.CACHE) if object_match: - self.add_raw(b, 'extra') + self.add_raw(b, CacheStateEnum.EXTRA) return True class Cache(Salter): + """Core session engine for caching and associating block transactions with accounts. + If cache_bloom is omitted, a new CacheBloom object will be instantiated as backend, using the provided bits_size. + + :param chain_spec: Chain spec to use cache for. + :type chain_spec: chainlib.chain.ChainSpec + :param bits_size: Bit size of underlying bloomfilter + :type bits_size: int + :param cache_bloom: Cache bloom state to initialize cache session with + :type cache_bloom: taint.cache.CacheBloom + """ def __init__(self, chain_spec, bits_size, cache_bloom=None): super(Cache, self).__init__(chain_spec) self.bits_size = bits_size @@ -149,6 +258,12 @@ class Cache(Salter): def serialize(self): + """Serialize the underlying bloom cache state together with the block range of registered matches. + + :raises AttributeError: If no content has yet been cached + :rtype: bytes + :return: Serialized cache state + """ if self.first_block_height < 0: raise AttributeError('no content to serialize') @@ -160,6 +275,15 @@ class Cache(Salter): @classmethod def from_serialized(cls, chain_spec, b): + """Instantiate a new Cache object from a previously serialized state. + + :param chain_spec: Chain spec to instantiate the Cache object for + :type chain_spec: chainlib.chain.ChainSpec + :param b: Serialized data + :type b: bytes + :rtype: taint.cache.Cache + :return: Instantiated cache object + """ cursor = len(b)-32 bloom = CacheBloom.from_serialized(b[:cursor]) c = cls(chain_spec, bloom.bits_size, cache_bloom=bloom) @@ -172,14 +296,23 @@ class Cache(Salter): def divide(self, accounts): + """Divides the given accounts into subjects and objects depending on their match in the bloom cache state backend. + + Accounts that do not generate matches will be omitted. + + :param accounts: List of blockchain addresses to process + :type account: List of bytes + :rtype: tuple of lists of bytes + :return: list of subjects and list of objects, in that order + """ subjects = [] objects = [] for account in accounts: - if self.cache_bloom.have(account, 'subject'): + if self.cache_bloom.have(account, CacheAccountEnum.SUBJECT): subject = self.subjects[account] subjects.append(subject) - elif self.cache_bloom.have(account, 'object'): + elif self.cache_bloom.have(account, CacheAccountEnum.OBJECT): objct = self.objects[account] objects.append(objct) @@ -187,26 +320,71 @@ class Cache(Salter): def add_account(self, account, label): + """Add a new account to the bloom cache state, in the corresponding section + + Client code should use taint.cache.Cache.add_subject() or taint.cache.Cache.add_object() instead. + + :param account: account to add + :type account: taint.account.Account + :param label: bloom cache section + :type label: taint.cache.CacheAccountEnum + """ self.cache_bloom.add_raw(account.account, label) def add_subject(self, account): + """Convenience function to add an account as a subject. + + :param account: account to add + :type account: taint.account.Account + :raises TypeError: If account is not right type + """ if not isinstance(account, Account): - raise TypeError('subject must be type crypto_account_cache.account.Account') - self.add_account(account, 'subject') + raise TypeError('subject must be type taint.account.Account') + self.add_account(account, CacheAccountEnum.SUBJECT) logg.debug('added subject {}'.format(account)) self.subjects[account.account] = account def add_object(self, account): + """Convenience function to add an account as a object. + + :param account: account to add + :type account: taint.account.Account + :raises TypeError: If account is not right type + """ + if not isinstance(account, Account): - raise TypeError('subject must be type crypto_account_cache.account.Account') - self.add_account(account, 'object') + raise TypeError('subject must be type taint.account.Account') + self.add_account(account, CacheAccountEnum.OBJECT) logg.debug('added object {}'.format(account)) self.objects[account.account] = account def add_tx(self, sender, recipient, block_height, tx_index, block_hash=None, tx_hash=None, relays=[]): + """Add a transaction to the bloom cache state. + + If a subject address is matched, tags will be merged for all subjects involved in the transaction. + + If an object address is matched, tags will be merged for all subjects and the object involved in the transaction. + + :param sender: Blockchain addresses providing output for the transaction + :type sender: list of bytes + :param recipient: Blockchain addresses providing input for the transaction + :type recipient: list of bytes + :param block_height: Block height of transaction + :type block_height: int + :param tx_index: Transaction index in block + :type tx_index: int + :param block_hash: Block hash (used for debugging/log output only) + :type block_hash: str + :param tx_hash: Transaction hash (used for debugging/log output only) + :type tx_hash: str + :param relays: Additional blockchain addresses to generate match for + :type relays: list of bytes + :rtype: tuple of lists of bytes + :return: Matched subjects and objects, or None of no matching account was found + """ accounts = [sender, recipient] + relays self.cache_bloom.register(accounts, block_height) match = self.cache_bloom.register(accounts, block_height, tx_index) @@ -220,7 +398,7 @@ class Cache(Salter): self.last_block_height = block_height self.last_tx_index = tx_index - logg.info('match in {}:{} {}'.format(block_height, tx_index, tx_hash)) + logg.info('match in {}:{} {}:{}'.format(block_height, tx_index, block_hash, tx_hash)) # TODO: watch out, this currently scales geometrically (subjects, objects) = self.divide(accounts) @@ -237,11 +415,40 @@ class Cache(Salter): def have(self, block_height, tx_index=None): + """Check if block number or block/tx index exists in bloom cache state + + :param block_height: Block height to match + :type block_height: int + :param tx_index: Transaction index to match + :type tx_index: int + :rtype: boolean + :return: True on match + """ return self.cache_bloom.have_index(block_height, tx_index) class CacheSyncBackend(MemBackend): - + """Volatile chainsyncer backend generating matches for all block/tx matched in the bloom cache state. + + Can be used to replay the syncing session for only the block/tx indices known to be of interest. + + TODO: Add a tx_index max value hint on stored blocks to eliminate the need for the scan_limit, which can cause transactions to be missed, aswell as reduce resource usage. + + :param cache: Cache object + :type cache taint.cache.Cache + :param chain_spec: Chain spec to run the syncer session for + :type chain_spec: chainlib.chain.ChainSpec + :param object_id: chainsyncer backend object id + :type object_id: str + :param start_block: Block offset to start syncing at, inclusive + :type start_block: int + :param target_block: Block to stop syncing at, exclusive + :type target_block: int + :param tick_callback: Callback called for every processed transaction + :type tick_callback: function receiving block_height and tx_index + :param tx_scan_limit: Maximum transaction index in a block to scan for + :type tx_scan_limit: int + """ def __init__(self, cache, chain_spec, object_id, start_block=0, target_block=0, tick_callback=None, tx_scan_limit=500): if target_block <= start_block: raise ValueError('target block number must be higher than start block number') @@ -253,6 +460,13 @@ class CacheSyncBackend(MemBackend): def get(self): + """Advance to the next matched block/tx index in the bloom cache state, and return as a block index result for the chainsyncer sync driver. + + Transaction execution filters for the syncer are not implemented, so the returned filter state will always be 0. + + :rtype: tuple + :return: tuple of block_height and tx_index, and a static 0 as filter value + """ while self.block_height < self.target_block + 1: if self.cache.have(self.block_height): if self.tx_height < self.tx_scan_limit: diff --git a/taint/crypto.py b/taint/crypto.py @@ -2,8 +2,15 @@ import hashlib import os + class Salter: + """Base class to provide cryptographic salt for cache objects that should be obfuscated. + + By default a random base value will be generated. The salt will be deterministically determined from the value and the provided chain spec. + :param chain_spec: Chain spec to generate the salt with. + :type chain_spec: chainlib.chain.ChainSpec + """ salt = os.urandom(32) def __init__(self, chain_spec): @@ -13,6 +20,14 @@ class Salter: def sprinkle(self, data): + """Hash the given data with the salt + + :param data: Input data + :type data: bytes + :rtype: bytes + :returns: Hashed, salted value + + """ h = hashlib.new('sha256') if isinstance(data, list): for d in data: @@ -24,4 +39,9 @@ class Salter: def root_key(self): + """Returns the salt value generated from the chain spec. + + :rtype: bytes + :returns: Salt + """ return self.ionized_salt diff --git a/taint/store/base.py b/taint/store/base.py @@ -8,3 +8,27 @@ def to_key(k): else: k = even(k) return k + + +class BaseStore: + + def put(self, k, v): + """Store value v under key k + + :param k: Key + :type k: bytes + :param v: Value + :type v: bytes + """ + raise NotImplementedError + + + def get(self, k): + """Return value stored under key k + + :param k: Key + :type k: bytes + :rtype: bytes + :return: Value + """ + raise NotImplementedError diff --git a/taint/store/file.py b/taint/store/file.py @@ -1,14 +1,14 @@ # standard imports import os -import logging # local imports -from .base import to_key +from .base import ( + to_key, + BaseStore, + ) -logg = logging.getLogger().getChild(__name__) - -class FileStore: +class FileStore(BaseStore): def __init__(self, base_dir): os.makedirs(base_dir, exist_ok=True) diff --git a/taint/tag.py b/taint/tag.py @@ -25,7 +25,10 @@ class TagPool: class Tag: + """Represents a collection of tags for a cached object. + When a new tag is added, the tag collection is deterministically ordered and summed. + """ def __init__(self): self.tags = [] self.tag_values = {} @@ -34,6 +37,11 @@ class Tag: def get(self): + """The current deterministic sum of the tags. + + :rtype: bytes + :return: Tag digest sum + """ if self.dirty: self.tags.sort() h = hashlib.new('sha256') @@ -44,6 +52,17 @@ class Tag: def add(self, tag, value=None): + """Add a tag to the collection. + + Client code should call Tag.create() instead. + + :param tag: Tag value digest + :type tag: bytes + :param value: Tag value + :type value: bytes + :rtype: boolean + :returns: False if tag digest already exists in object + """ if tag in self.tags: return False self.tags.append(tag) @@ -53,6 +72,13 @@ class Tag: def create(self, value): + """Create a new tag record to add to the collection. + + :param value: Tag value + :type value: bytes + :rtype: bytes + :return: Digest of newly added tag + """ h = hashlib.new('sha256') h.update(value) tag = h.digest() @@ -61,6 +87,12 @@ class Tag: def merge(self, tags): + """Merge contents of two tag objects. After this operation the sum of each of the tag objects will be identical. + + :param tags: Tag collection to merge with + :type tags: taint.tag.Tag + :raises TypeError: If argument is not a taint.tag.Tag instance + """ if not isinstance(tags, Tag): raise TypeError('tags must be type taint.tag.Tag') for tag in tags.tags: @@ -73,13 +105,28 @@ class Tag: def serialize(self): + """Serialize tags for storage. + + Serialized tags are deterministically ordered. + + :rtype: bytes + :returns: Serialized tags + """ b = self.get() for tag in self.tags: b += tag return b - def deserialize(self, b): + def deserialize(self, b, skip_check=False): + """Deserialize tags into currently instantiated object. + + Deserialization will ADD tags to the current object. If different tags already exist in the object, the resulting collection will not be identical to the serialized data. + + :param b: Serialized tag data + :type b: bytes + :raises ValueError: If skip_check is not set, and serialized data does not match tag object sum + """ if len(b) % 32 > 0: raise ValueError('invalid data length; remainder {} from 32'.format(len(b) % 32)) cursor = 32 @@ -90,9 +137,10 @@ class Tag: logg.debug('deserialize add {}'.format(tag)) self.add(tag) - zz = self.get() - if z != zz: - raise ValueError('data sum does not match content; expected {}, found {}'.format(zz.hex(), z.hex())) + if not skip_check: + zz = self.get() + if z != zz: + raise ValueError('data sum does not match content; expected {}, found {}'.format(zz.hex(), z.hex())) def __str__(self): diff --git a/taint/taint.py b/taint/taint.py @@ -13,7 +13,19 @@ logg = logging.getLogger().getChild(__name__) class Tainter(Cache): - + """Frontend object containing code to load and save state of a cache, aswell as chain sync handling. + + :param chain_spec: Chain spec context for the cache + :type chain_spec: chainlib.chain.ChainSpec + :param bits_size: Bitsize of bloom filter used for cache + :type bits_size: int + :param result_handler: Callback called once for each registered account found in a transaction. + :type result_handler: function + :param store: State storage for cache + :type store: taint.store.base.BaseStore + :param cache_bloom: Cache bloom filter to instantiate + :type cache_bloom: taint.cache.CacheBloom + """ def __init__(self, chain_spec, bits_size, result_handler=None, store=None, cache_bloom=None): super(Tainter, self).__init__(chain_spec, bits_size, cache_bloom=cache_bloom) self.store = store @@ -21,12 +33,34 @@ class Tainter(Cache): def add_account(self, account, label): + """Add account to be tracked in cache. + + If registered, the result handler will be called with the initial state of the added account. + + The label will only be stored in memory for the given session, and will not be part of state storage. + + :param account: Account to add + :type account: taint.account.Account + :param label: Filter section to add account to + :type label: taint.cache.CacheAccountEnum + """ super(Tainter, self).add_account(account, label) if self.result_handler != None: self.result_handler.register(account) def filter(self, conn, block, tx, storer): + """Transaction callback for chainsyncer. + + :param conn: RPC connection object + :type conn: chainlib.connection.RPCConnection + :param block: Block object + :type block: chainlib.block.Block + :param tx: Transaction object + :type tx: chainlib.tx.Tx + :param storer: State storage object (e.g. a sql database session) + :type storer: any + """ for output in tx.outputs: for inpt in tx.inputs: sender = bytes.fromhex(strip_0x(output)) @@ -59,12 +93,20 @@ class Tainter(Cache): def save(self): + """Save state of all accounts and the salt used for the session to the cache store. + """ for account in self.subjects.values(): self.store.put(account.account, account.serialize()) self.store.put(self.root_key(), self.serialize()) def load_account(self, k, label=None): + """Load state for an accounts from a cache store. + + :param k: Account to load, by obfuscated value. + :type k: bytes + :param label: Label to associate with account, for display use. + """ try: b = self.store.get(k) except FileNotFoundError: @@ -73,6 +115,14 @@ class Tainter(Cache): def load_subject(self, k, label=None): + """Load state for an account as subject from the cache store. + + A subject will always merge tags with any other subject or object in the same transaction. + + :param k: Account to load, by obfuscated value. + :type k: bytes + :param label: Label to associate with account, for display use. + """ a = self.load_account(k, label) if a == None: return False @@ -81,6 +131,14 @@ class Tainter(Cache): def load_object(self, k, label=None): + """Load state for an account as object from the cache store. + + An object will only merge tags with other subjects in the same transaction. + + :param k: Account to load, by obfuscated value. + :type k: bytes + :param label: Label to associate with account, for display use. + """ a = self.load_account(k, label) if a == None: return False @@ -90,6 +148,8 @@ class Tainter(Cache): @staticmethod def load(store, chain_spec, result_handler=None): + """Instantiate new Tainter object with salt stored from previous session. + """ a = Salter(chain_spec) b = store.get(a.root_key()) c = Tainter.from_serialized(chain_spec, b) diff --git a/tests/test_bloom.py b/tests/test_bloom.py @@ -7,6 +7,8 @@ import copy from taint.cache import ( CacheBloom, to_index, + CacheAccountEnum, + CacheStateEnum, ) @@ -19,24 +21,24 @@ class TestBloom(unittest.TestCase): self.alice = os.urandom(20) self.bob = os.urandom(20) - self.bloom.add_raw(self.alice, 'subject') - self.bloom.add_raw(self.bob, 'object') + self.bloom.add_raw(self.alice, CacheAccountEnum.SUBJECT) + self.bloom.add_raw(self.bob, CacheAccountEnum.OBJECT) def reset_with_accounts(self): self.bloom.reset() - self.bloom.add_raw(self.alice, 'subject') - self.bloom.add_raw(self.bob, 'object') + self.bloom.add_raw(self.alice, CacheAccountEnum.SUBJECT) + self.bloom.add_raw(self.bob, CacheAccountEnum.OBJECT) def test_bloom(self): orig_serial = self.bloom.serialize() - self.bloom.add_raw(b'\x01', 'subject') - self.bloom.add_raw(b'\x01', 'object') - self.bloom.add_raw(b'\x01', 'cache') - self.bloom.add_raw(b'\x01', 'extra') + self.bloom.add_raw(b'\x01', CacheAccountEnum.SUBJECT) + self.bloom.add_raw(b'\x01', CacheAccountEnum.OBJECT) + self.bloom.add_raw(b'\x01', CacheStateEnum.CACHE) + self.bloom.add_raw(b'\x01', CacheStateEnum.EXTRA) b = self.bloom.serialize() byte_size = int(1024 / 8)