entry.py (3569B)
1 # standard imports 2 import email 3 import uuid 4 import logging 5 import base64 6 import enum 7 import time 8 import gzip 9 10 # local imports 11 from .common import defaulthashers 12 13 logg = logging.getLogger() 14 15 extensiontype = { 16 'TASKWARRIOR': uuid.UUID 17 } 18 19 class extension(enum.Enum): 20 TASKWARRIOR = 'TASKWARRIOR' 21 pass 22 23 class entry: 24 25 def __init__(self, uu, message): 26 self.uuid = uu 27 self.message = message 28 self.extensions = {} 29 30 31 def add_extension(self, k, v): 32 if not isinstance(k, extension): 33 raise ValueError('extension type {} invalid'.format(type(k))) 34 requiredtyp = extensiontype[k.value] 35 if not isinstance(v, requiredtyp): 36 raise ValueError('extension value is {}, but {} is required'.format(type(v).__name__, requiredtyp)) 37 if self.extensions.get(k.value) == None: 38 self.extensions[k.value] = [] 39 40 self.extensions[k.value].append(str(v)) 41 42 return True 43 44 45 def serialize(self): 46 47 for x in self.extensions.keys(): 48 logg.debug('adding extension header {}'.format(x)) 49 v = ','.join(self.extensions[x]) 50 self.message.add_header('X-FEEDWARRIOR-{}'.format(x), v) 51 52 logg.debug('complete message {}'.format(self.message)) 53 54 d = email.utils.parsedate(self.message.get('Date')) 55 logg.debug('date {} {}'.format(d, self.message.get('Date'))) 56 ts = time.mktime(d) 57 58 return { 59 'uuid': str(self.uuid), 60 'timestamp': int(ts), 61 'payload': self.message.as_string(), 62 } 63 64 65 66 def from_multipart_file(filename, hashers=defaulthashers): 67 f = None 68 try: 69 f = open(filename, 'r') 70 except FileNotFoundError: 71 f = gzip.open(filename + '.gz', 'rb') 72 m = email.message_from_file(f) 73 f.close() 74 return from_multipart(m, hashers) 75 76 77 def from_multipart(m, hashers=defaulthashers): 78 if not m.is_multipart(): 79 raise ValueError('{} is not a MIME multipart message'.format(filename)) 80 81 # the hasher calculates a uuid from the canonical order of the message contents 82 # TODO: currently the canonical order is the order of items in the message. this should 83 # rather be the lexiographical order of the hash integer values of the items. 84 htops = [] 85 hparts = {} 86 for h in hashers: 87 hasher = h() 88 htops.append(h()) 89 hparts[hasher.name] = hasher 90 91 # TODO: this is a naïve parser. If presumes that the message stucture will 92 # always be a multipart container on top. Therefore it will always discard the top item 93 subject = None 94 i = 0 95 for p in m.walk(): 96 if i == 0: 97 subject = p.get('Subject') 98 i += 1 99 continue 100 if p.get_content_maintype() == 'multipart': 101 logg.warn('recursive multipart is not implemented, skipping part {}'.format(i)) 102 103 for htop in htops: 104 hpart = hparts[htop.name] 105 hpart.update(p.get_payload(decode=True)) 106 psum = hpart.digest() 107 htop.update(psum) 108 109 i += 1 110 111 for h in htops: 112 hasher = hparts[h.name] 113 msum = hasher.digest() 114 uu = uuid.UUID(bytes=msum[:16]) 115 #m.add_header('X-FEEDWARRIOR-HASH', htop.name) 116 header_key = 'X-FEEDWARRIOR-{}'.format(h.name.upper()) 117 m.add_header(header_key, base64.encodebytes(msum).decode('ascii')) 118 119 if subject == None: 120 subject = str(uu) 121 logg.info('subject not specified, using uuid {}'.format(subject)) 122 123 return entry(uu, m) 124