commit 24ed2be5033acac9b4c80f5da7a670c0e813fdff
parent 43ebb25273728f2dd0aa06ee90f68f40315221a2
Author: nolash <dev@holbrook.no>
Date: Sun, 28 Jun 2020 17:43:12 +0200
Add entry object, calculate uuid from payload contents
Diffstat:
5 files changed, 94 insertions(+), 9 deletions(-)
diff --git a/src/feedwarrior/__init__.py b/src/feedwarrior/__init__.py
@@ -1,2 +1,3 @@
from .feed import feed
+from . import entry
from .config import load_config
diff --git a/src/feedwarrior/cmd/entry.py b/src/feedwarrior/cmd/entry.py
@@ -1,5 +1,14 @@
# standard imports
import email
+import logging
+import uuid
+import json
+
+# local imports
+import feedwarrior
+
+logg = logging.getLogger()
+
def parse_args(argparser):
argparser.add_argument('-l', required=True, help='log to add entry to')
@@ -11,14 +20,14 @@ def check_args(args):
pass
-def process_as_multipart_file(config, feed, filename):
- f = open(filename, 'r')
- m = email.message_from_file(f)
- f.close()
- if not m.is_multipart():
- raise ValueError('{} is not a MIME multipart message'.format(filename))
- pass
+ return feedwarrior.entry(uu, m.get_payload())
+
def execute(config, feed, args):
- process_as_mime(config, feed, args.path)
- pass
+ entry = feedwarrior.entry.from_multipart_file(args.path)
+ entry_serialized = entry.serialize()
+ uu = str(entry.uuid)
+ logg.debug('adding entry {}'.format(uu))
+ f = open(os.path.join(config.entries_dir, uu)
+ json.dump(f, entry_serialized)
+ f.close()
diff --git a/src/feedwarrior/common.py b/src/feedwarrior/common.py
@@ -1,5 +1,8 @@
# standard imports
import uuid
+import hashlib
+
+defaulthasher = hashlib.sha256
def parse_uuid(uu):
if type(uu).__name__ == 'str':
diff --git a/src/feedwarrior/config.py b/src/feedwarrior/config.py
@@ -1,6 +1,7 @@
# standard imports
import os
import configparser
+import hashlib
class config:
@@ -10,6 +11,7 @@ class config:
self.data_dir = cp['FEEDWARRIOR']['datadir']
self.feeds_dir = os.path.join(cp['FEEDWARRIOR']['datadir'], 'feeds')
self.entries_dir = os.path.join(cp['FEEDWARRIOR']['datadir'], 'entries')
+ self.hasher = hashlib.sha256
def load_config(filename):
return config(filename)
diff --git a/src/feedwarrior/entry.py b/src/feedwarrior/entry.py
@@ -0,0 +1,70 @@
+# standard imports
+import email
+import uuid
+import logging
+import base64
+
+# local imports
+from .common import defaulthasher
+
+logg = logging.getLogger()
+
+
+class entry:
+
+ def __init__(self, uu, payload):
+ self.uuid = uu
+ self.payload = payload
+
+
+ def serialize(self):
+ return {
+ 'uuid': str(self.uuid),
+ 'payload': self.payload,
+ }
+
+
+
+def from_multipart_file(filename, hasher=defaulthasher):
+#def process_as_multipart_file(config, feed, filename):
+ f = open(filename, 'r')
+ m = email.message_from_file(f)
+ f.close()
+ if not m.is_multipart():
+ raise ValueError('{} is not a MIME multipart message'.format(filename))
+
+ # the hasher calculates a uuid from the canonical order of the message contents
+ # TODO: currently the canonical order is the order of items in the message. this should
+ # rather be the lexiographical order of the hash integer values of the items.
+ htop = hasher()
+
+ # TODO: this is a naïve parser. If presumes that the message stucture will
+ # always be a multipart container on top. Therefore it will always discard the top item
+ subject = None
+ i = 0
+ for p in m.walk():
+ if i == 0:
+ subject = p.get('Subject')
+ i += 1
+ continue
+ if p.get_content_maintype() == 'multipart':
+ logg.warn('recursive multipart is not implemented, skipping part {}'.format(i))
+
+ hpart = hasher()
+ hpart.update(p.get_payload(decode=True))
+ psum = hpart.digest()
+ htop.update(psum)
+
+ i += 1
+
+ msum = htop.digest()
+ uu = uuid.UUID(bytes=msum[:16])
+ m.add_header('X-FEEDWARRIOR-HASH', htop.name)
+ m.add_header('X-FEEDWARRIOR-DIGEST', base64.encodebytes(msum).decode('ascii'))
+
+ if subject == None:
+ subject = str(uu)
+ logg.info('subject not specified, using uuid {}'.format(subject))
+
+ return entry(uu, m.as_bytes())
+