1# Copyright (C) 2004-2021 Aaron Swartz 2# Andrey Zelenchuk <azelenchuk@parallels.com> 3# Andrey Zelenchuk <azelenchuk@plesk.com> 4# Brian Lalor 5# Dean Jackson 6# Erik Hetzner 7# Etienne Millon <me@emillon.org> 8# Joey Hess 9# Kaashif Hymabaccus <kaashif@kaashif.co.uk> 10# Lindsey Smith <lindsey.smith@gmail.com> 11# Léo Gaspard <leo@gaspard.io> 12# Marcel Ackermann 13# Martin 'Joey' Schulze 14# Matej Cepl 15# Profpatsch <mail@profpatsch.de> 16# Raphaël Droz <raphael.droz+floss@gmail.com> 17# W. Trevor King <wking@tremily.us> 18# ryneeverett <ryneeverett@gmail.com> 19# 20# This file is part of rss2email. 21# 22# rss2email is free software: you can redistribute it and/or modify it under 23# the terms of the GNU General Public License as published by the Free Software 24# Foundation, either version 2 of the License, or (at your option) version 3 of 25# the License. 26# 27# rss2email is distributed in the hope that it will be useful, but WITHOUT ANY 28# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 29# A PARTICULAR PURPOSE. See the GNU General Public License for more details. 30# 31# You should have received a copy of the GNU General Public License along with 32# rss2email. If not, see <http://www.gnu.org/licenses/>. 33 34"""Define the ``Feed`` class for handling a list of feeds 35""" 36 37import codecs as _codecs 38import collections as _collections 39import os as _os 40import json as _json 41import pickle as _pickle 42import sys as _sys 43 44from . import LOG as _LOG 45from . import config as _config 46from . import error as _error 47from . import feed as _feed 48 49try: 50 import fcntl as _fcntl 51 UNIX = True 52except ImportError: 53 UNIX = False 54 55# Path to the filesystem root, '/' on POSIX.1 (IEEE Std 1003.1-2008). 56ROOT_PATH = _os.path.splitdrive(_sys.executable)[0] or _os.sep 57 58 59class Feeds (list): 60 """Utility class for rss2email activity. 61 62 >>> import codecs 63 >>> import os.path 64 >>> import json 65 >>> import tempfile 66 >>> from .feed import Feed 67 68 Setup a temporary directory to load. 69 70 >>> tmpdir = tempfile.TemporaryDirectory(prefix='rss2email-test-') 71 >>> configfile = os.path.join(tmpdir.name, 'rss2email.cfg') 72 >>> with open(configfile, 'w') as f: 73 ... count = f.write('[DEFAULT]\\n') 74 ... count = f.write('to = a@b.com\\n') 75 ... count = f.write('[feed.f1]\\n') 76 ... count = f.write('url = http://a.net/feed.atom\\n') 77 ... count = f.write('to = x@y.net\\n') 78 ... count = f.write('[feed.f2]\\n') 79 ... count = f.write('url = http://b.com/rss.atom\\n') 80 >>> datafile = os.path.join(tmpdir.name, 'rss2email.json') 81 >>> with codecs.open(datafile, 'w', Feeds.datafile_encoding) as f: 82 ... json.dump({ 83 ... 'version': 1, 84 ... 'feeds': [ 85 ... Feed(name='f1').get_state(), 86 ... Feed(name='f2').get_state(), 87 ... ], 88 ... }, f) 89 90 >>> feeds = Feeds(configfiles=[configfile,], datafile=datafile) 91 >>> feeds.load() 92 >>> for feed in feeds: 93 ... print(feed) 94 f1 (http://a.net/feed.atom -> x@y.net) 95 f2 (http://b.com/rss.atom -> a@b.com) 96 97 You can index feeds by array index or by feed name. 98 99 >>> feeds[0] 100 <Feed f1 (http://a.net/feed.atom -> x@y.net)> 101 >>> feeds[-1] 102 <Feed f2 (http://b.com/rss.atom -> a@b.com)> 103 >>> feeds['f1'] 104 <Feed f1 (http://a.net/feed.atom -> x@y.net)> 105 >>> feeds['missing'] 106 Traceback (most recent call last): 107 ... 108 IndexError: missing 109 110 Tweak the feed configuration and save. 111 112 >>> feeds[0].to = None 113 >>> feeds.save() 114 >>> print(open(configfile, 'r').read().rstrip('\\n')) 115 ... # doctest: +REPORT_UDIFF, +ELLIPSIS 116 [DEFAULT] 117 from = user@rss2email.invalid 118 ... 119 verbose = warning 120 <BLANKLINE> 121 [feed.f1] 122 url = http://a.net/feed.atom 123 <BLANKLINE> 124 [feed.f2] 125 url = http://b.com/rss.atom 126 127 Cleanup the temporary directory. 128 129 >>> tmpdir.cleanup() 130 """ 131 datafile_version = 2 132 datafile_encoding = 'utf-8' 133 134 def __init__(self, configfiles=None, datafile_path=None, config=None): 135 super(Feeds, self).__init__() 136 if configfiles is None: 137 configfiles = self._get_configfiles() 138 self.configfiles = configfiles 139 if datafile_path is None: 140 datafile_path = self._get_datafile_path() 141 self.datafile_path = _os.path.realpath(datafile_path) 142 if config is None: 143 config = _config.CONFIG 144 self.config = config 145 self.datafile = None 146 147 def __getitem__(self, key): 148 for feed in self: 149 if feed.name == key: 150 return feed 151 try: 152 index = int(key) 153 except ValueError as e: 154 raise IndexError(key) from e 155 return super(Feeds, self).__getitem__(index) 156 157 def __append__(self, feed): 158 feed.load_from_config(self.config) 159 feed = super(Feeds, self).append(feed) 160 161 def __pop__(self, index=-1): 162 feed = super(Feeds, self).pop(index=index) 163 if feed.section in self.config: 164 self.config.pop(feed.section) 165 return feed 166 167 def index(self, index): 168 if isinstance(index, int): 169 try: 170 return self[index] 171 except IndexError as e: 172 raise _error.FeedIndexError(index=index, feeds=self) from e 173 elif isinstance(index, str): 174 try: 175 index = int(index) 176 except ValueError: 177 pass 178 else: 179 return self.index(index) 180 for feed in self: 181 if feed.name == index: 182 return feed 183 try: 184 super(Feeds, self).index(index) 185 except (IndexError, ValueError) as e: 186 raise _error.FeedIndexError(index=index, feeds=self) from e 187 188 def remove(self, feed): 189 super(Feeds, self).remove(feed) 190 if feed.section in self.config: 191 self.config.pop(feed.section) 192 193 def clear(self): 194 while self: 195 self.pop(0) 196 197 def _get_configfiles(self): 198 """Get configuration file paths 199 200 Following the XDG Base Directory Specification. 201 """ 202 config_home = _os.environ.get( 203 'XDG_CONFIG_HOME', 204 _os.path.expanduser(_os.path.join('~', '.config'))) 205 config_dirs = [config_home] 206 config_dirs.extend( 207 _os.environ.get( 208 'XDG_CONFIG_DIRS', 209 _os.path.join(ROOT_PATH, 'etc', 'xdg'), 210 ).split(':')) 211 # reverse because ConfigParser wants most significant last 212 return list(reversed( 213 [_os.path.join(config_dir, 'rss2email.cfg') 214 for config_dir in config_dirs])) 215 216 def _get_datafile_path(self): 217 """Get the data file path 218 219 Following the XDG Base Directory Specification. 220 """ 221 data_home = _os.environ.get( 222 'XDG_DATA_HOME', 223 _os.path.expanduser(_os.path.join('~', '.local', 'share'))) 224 return _os.path.join(data_home, 'rss2email.json') 225 226 def load(self, require=False): 227 _LOG.debug('load feed configuration from {}'.format(self.configfiles)) 228 if self.configfiles: 229 read_configfiles = self.config.read(self.configfiles) 230 else: 231 read_configfiles = [] 232 _LOG.debug('loaded configuration from {}'.format(read_configfiles)) 233 self._load_feeds(require=require) 234 235 def _load_feeds(self, require): 236 _LOG.debug('load feed data from {}'.format(self.datafile_path)) 237 if not _os.path.exists(self.datafile_path): 238 if require: 239 raise _error.NoDataFile(feeds=self) 240 _LOG.info('feed data file not found at {}'.format(self.datafile_path)) 241 _LOG.debug('creating an empty data file') 242 dirname = _os.path.dirname(self.datafile_path) 243 if dirname and not _os.path.isdir(dirname): 244 _os.makedirs(dirname, mode=0o700, exist_ok=True) 245 with _codecs.open(self.datafile_path, 'w', self.datafile_encoding) as f: 246 self._save_feed_states(feeds=[], stream=f) 247 try: 248 self.datafile = _codecs.open( 249 self.datafile_path, 'r', self.datafile_encoding) 250 except IOError as e: 251 raise _error.DataFileError(feeds=self) from e 252 253 if UNIX: 254 _fcntl.lockf(self.datafile, _fcntl.LOCK_SH) 255 256 self.clear() 257 258 level = _LOG.level 259 handlers = list(_LOG.handlers) 260 feeds = [] 261 try: 262 data = _json.load(self.datafile) 263 except ValueError as e: 264 _LOG.info('could not load data file using JSON') 265 data = self._load_pickled_data(self.datafile) 266 version = data.get('version', None) 267 if version != self.datafile_version: 268 data = self._upgrade_state_data(data) 269 for state in data['feeds']: 270 feed = _feed.Feed(name='dummy-name') 271 feed.set_state(state) 272 if 'name' not in state: 273 raise _error.DataFileError( 274 feeds=self, 275 message='missing feed name in datafile {}'.format( 276 self.datafile_path)) 277 feeds.append(feed) 278 _LOG.setLevel(level) 279 _LOG.handlers = handlers 280 self.extend(feeds) 281 282 for feed in self: 283 feed.load_from_config(self.config) 284 285 feed_names = set(feed.name for feed in self) 286 order = _collections.defaultdict(lambda: (1e3, '')) 287 for i,section in enumerate(self.config.sections()): 288 if section.startswith('feed.'): 289 name = section[len('feed.'):] 290 order[name] = (i, name) 291 if name not in feed_names: 292 _LOG.debug( 293 ('feed {} not found in feed file, ' 294 'initializing from config').format(name)) 295 self.append(_feed.Feed(name=name, config=self.config)) 296 feed_names.add(name) 297 def key(feed): 298 return order[feed.name] 299 self.sort(key=key) 300 301 def _load_pickled_data(self, stream): 302 _LOG.info('try and load data file using Pickle') 303 with open(self.datafile_path, 'rb') as f: 304 feeds = list(feed.get_state() for feed in _pickle.load(f)) 305 return { 306 'version': self.datafile_version, 307 'feeds': feeds, 308 } 309 310 def _upgrade_state_data(self, data): 311 version = data.get('version', 'unknown') 312 if version == 1: 313 for feed in data['feeds']: 314 seen = feed['seen'] 315 for guid,id_ in seen.items(): 316 seen[guid] = {'id': id_} 317 return data 318 raise NotImplementedError( 319 'cannot convert data file from version {} to {}'.format( 320 version, self.datafile_version)) 321 322 def save(self): 323 dst_config_file = _os.path.realpath(self.configfiles[-1]) 324 _LOG.debug('save feed configuration to {}'.format(dst_config_file)) 325 for feed in self: 326 feed.save_to_config() 327 dirname = _os.path.dirname(dst_config_file) 328 if dirname and not _os.path.isdir(dirname): 329 _os.makedirs(dirname, mode=0o700, exist_ok=True) 330 tmpfile = dst_config_file + '.tmp' 331 with open(tmpfile, 'w') as f: 332 self.config.write(f) 333 f.flush() 334 _os.fsync(f.fileno()) 335 _os.replace(tmpfile, dst_config_file) 336 self._save_feeds() 337 338 def _save_feeds(self): 339 _LOG.debug('save feed data to {}'.format(self.datafile_path)) 340 dirname = _os.path.dirname(self.datafile_path) 341 if dirname and not _os.path.isdir(dirname): 342 _os.makedirs(dirname, mode=0o700, exist_ok=True) 343 tmpfile = self.datafile_path + '.tmp' 344 with _codecs.open(tmpfile, 'w', self.datafile_encoding) as f: 345 self._save_feed_states(feeds=self, stream=f) 346 f.flush() 347 _os.fsync(f.fileno()) 348 if UNIX: 349 # Replace the file, then release the lock by closing the old one. 350 _os.replace(tmpfile, self.datafile_path) 351 if self.datafile is not None: 352 self.datafile.close() # release the lock 353 self.datafile = None 354 else: 355 # On Windows we cannot replace the file while it is opened. And we have no lock. 356 if self.datafile is not None: 357 self.datafile.close() 358 self.datafile = None 359 _os.replace(tmpfile, self.datafile_path) 360 361 def _save_feed_states(self, feeds, stream): 362 _json.dump( 363 {'version': self.datafile_version, 364 'feeds': list(feed.get_state() for feed in feeds), 365 }, 366 stream, 367 indent=2, 368 separators=(',', ': '), 369 ) 370 stream.write('\n') 371 372 def new_feed(self, name=None, prefix='feed-', **kwargs): 373 """Return a new feed, possibly auto-generating a name. 374 375 >>> feeds = Feeds() 376 >>> print(feeds.new_feed(name='my-feed')) 377 my-feed (None -> a@b.com) 378 >>> print(feeds.new_feed()) 379 feed-0 (None -> a@b.com) 380 >>> print(feeds.new_feed()) 381 feed-1 (None -> a@b.com) 382 >>> print(feeds.new_feed(name='feed-1')) 383 Traceback (most recent call last): 384 ... 385 rss2email.error.DuplicateFeedName: duplicate feed name 'feed-1' 386 """ 387 feed_names = [feed.name for feed in self] 388 if name is None: 389 i = 0 390 while True: 391 name = '{}{}'.format(prefix, i) 392 if name not in feed_names: 393 break 394 i += 1 395 elif name in feed_names: 396 feed = self[name] 397 raise _error.DuplicateFeedName(name=feed.name, feed=feed) 398 feed = _feed.Feed(name=name, **kwargs) 399 self.append(feed) 400 return feed 401