feedwarrior

Slim, portable tooling for creating and distributing decentralized append logs
git clone git://git.defalsify.org/logwarrior.git
Log | Files | Refs | README | LICENSE

entry.py (3569B)


      1 # standard imports
      2 import email
      3 import uuid
      4 import logging
      5 import base64
      6 import enum
      7 import time
      8 import gzip
      9 
     10 # local imports
     11 from .common import defaulthashers
     12 
     13 logg = logging.getLogger()
     14 
     15 extensiontype = {
     16     'TASKWARRIOR': uuid.UUID
     17 }
     18 
     19 class extension(enum.Enum):
     20     TASKWARRIOR = 'TASKWARRIOR'
     21     pass
     22 
     23 class entry:
     24 
     25     def __init__(self, uu, message):
     26         self.uuid = uu
     27         self.message = message
     28         self.extensions = {}
     29 
     30 
     31     def add_extension(self, k, v):
     32         if not isinstance(k, extension):
     33             raise ValueError('extension type {} invalid'.format(type(k)))
     34         requiredtyp = extensiontype[k.value]
     35         if not isinstance(v, requiredtyp):
     36             raise ValueError('extension value is {}, but {} is required'.format(type(v).__name__, requiredtyp))
     37         if self.extensions.get(k.value) == None:
     38            self.extensions[k.value] = []
     39         
     40         self.extensions[k.value].append(str(v))
     41 
     42         return True
     43             
     44 
     45     def serialize(self):
     46 
     47         for x in self.extensions.keys():
     48             logg.debug('adding extension header {}'.format(x))
     49             v = ','.join(self.extensions[x])
     50             self.message.add_header('X-FEEDWARRIOR-{}'.format(x), v)
     51 
     52         logg.debug('complete message {}'.format(self.message))
     53 
     54         d = email.utils.parsedate(self.message.get('Date'))
     55         logg.debug('date {} {}'.format(d, self.message.get('Date')))
     56         ts = time.mktime(d)
     57 
     58         return {
     59             'uuid': str(self.uuid),
     60             'timestamp': int(ts),
     61             'payload': self.message.as_string(),
     62                 }
     63 
     64    
     65 
     66 def from_multipart_file(filename, hashers=defaulthashers):
     67     f = None
     68     try:
     69         f = open(filename, 'r')
     70     except FileNotFoundError:
     71         f = gzip.open(filename + '.gz', 'rb')
     72     m = email.message_from_file(f)
     73     f.close()
     74     return from_multipart(m, hashers)
     75 
     76 
     77 def from_multipart(m, hashers=defaulthashers):
     78     if not m.is_multipart():
     79         raise ValueError('{} is not a MIME multipart message'.format(filename))
     80 
     81     # the hasher calculates a uuid from the canonical order of the message contents
     82     # TODO: currently the canonical order is the order of items in the message. this should
     83     # rather be the lexiographical order of the hash integer values of the items.
     84     htops = []
     85     hparts = {}
     86     for h in hashers:
     87         hasher = h()
     88         htops.append(h())
     89         hparts[hasher.name] = hasher
     90 
     91     # TODO: this is a naïve parser. If presumes that the message stucture will
     92     # always be a multipart container on top. Therefore it will always discard the top item
     93     subject = None
     94     i = 0
     95     for p in m.walk():
     96         if i == 0:
     97             subject = p.get('Subject')
     98             i += 1
     99             continue
    100         if p.get_content_maintype() == 'multipart':
    101             logg.warn('recursive multipart is not implemented, skipping part {}'.format(i))
    102 
    103         for htop in htops:
    104             hpart = hparts[htop.name]
    105             hpart.update(p.get_payload(decode=True))
    106             psum = hpart.digest()
    107             htop.update(psum)
    108 
    109         i += 1
    110 
    111     for h in htops:
    112         hasher = hparts[h.name]
    113         msum = hasher.digest()
    114         uu = uuid.UUID(bytes=msum[:16])
    115         #m.add_header('X-FEEDWARRIOR-HASH', htop.name)
    116         header_key = 'X-FEEDWARRIOR-{}'.format(h.name.upper())
    117         m.add_header(header_key, base64.encodebytes(msum).decode('ascii'))
    118 
    119     if subject == None:
    120         subject = str(uu)
    121         logg.info('subject not specified, using uuid {}'.format(subject))
    122 
    123     return entry(uu, m)
    124