feed.py (3510B)
1 # standard imports 2 import email 3 import os 4 import uuid 5 import copy 6 import time 7 import json 8 import logging 9 import gzip 10 11 # local imports 12 from feedwarrior.common import parse_uuid 13 from feedwarrior.adapters import fileadapter 14 15 logg = logging.getLogger() 16 17 18 19 class feed: 20 21 def __init__(self, uu=None, parent=None, created=None, updated=None): 22 if uu == None: 23 self.uuid = uuid.uuid4() 24 else: 25 self.uuid = uu 26 27 self.parent = None 28 if parent != None: 29 if type(parent).__name__ != 'feed': 30 raise ValueError('wrong type for parent: {}'.format(type(parent).__name__)) 31 self.parent = copy.copy(parent) 32 33 self.updated = 0 34 self.created = 0 35 if created != None: 36 self.created = created 37 if updated == None: 38 self.updated = copy.copy(created) 39 else: 40 self.created = int(time.time()) 41 self.updated = copy.copy(self.created) 42 43 if self.updated == None: 44 self.updated = updated 45 46 self.entries = [] 47 self.entries_cursor = 0 48 self.entries_sorted = False 49 50 51 def add(self, entry): 52 logg.debug('adding entry {}'.format(entry)) 53 self.entries.append(entry) 54 55 56 def serialize(self): 57 o = { 58 'uuid': str(self.uuid), 59 'created': self.created, 60 'updated': self.updated, 61 } 62 if self.parent != None: 63 o['parent_uuid'] = str(self.parent.uuid) 64 65 return o 66 67 68 # TODO: use index instead 69 def _sort_entries(self): 70 logg.debug('entries for {} {}'.format(self.uuid, self.entries)) 71 new_entries = [] 72 for e in self.entries: 73 entry = self.getter.get(e) 74 o = json.loads(entry) 75 m = email.message_from_string(o['payload']) 76 d = email.utils.parsedate(m.get('Date')) 77 t = time.mktime(d) 78 ts = str(t) 79 if not m.is_multipart(): 80 raise ValueError('invalid entry {}'.format(e)) 81 logg.debug('date {} {}'.format(e, ts)) 82 new_entries.append('_'.join([ts, e])) 83 84 self.entries = [] 85 new_entries.sort() 86 for ne in new_entries: 87 e = ne.split('_', maxsplit=1) 88 self.entries.append(e[1]) 89 90 self.entries_cursor = 0 91 self.entries_sorted = True 92 93 94 def set_getter(self, getter): 95 self.getter = getter 96 97 98 def next_entry(self): 99 if not self.entries_sorted: 100 self._sort_entries() 101 if self.entries_cursor == len(self.entries): 102 raise IndexError('no more entries') 103 104 e = self.getter.get(self.entries[self.entries_cursor]) 105 self.entries_cursor += 1 106 return e 107 108 109 110 # TODO: add input checking for timestamps 111 # TODO: check state of symlink index 112 def load(data_dir, uu): 113 path = os.path.join(data_dir, 'feeds', str(uu)) 114 feed_meta_path = os.path.join(path, '.log') 115 f = open(feed_meta_path, 'r') 116 o = json.load(f) 117 uu = parse_uuid(o['uuid']) 118 puu = None 119 p = None 120 if o.get('parent_uuid') != None: 121 puu = parse_uuid(o['parent_uuid']) 122 p = feed(puu) 123 feed_loaded = feed(uu, p, int(o['created']), int(o['updated'])) 124 125 feed_entries_path = os.path.join(path, 'entries') 126 for entry in os.listdir(feed_entries_path): 127 feed_loaded.entries.append(entry) 128 129 fg = fileadapter(data_dir, uu) 130 feed_loaded.set_getter(fg) 131 132 return feed_loaded 133 134