1# Copyright (C) 2004-2021 Aaron Swartz
2#                         Andrey Zelenchuk <azelenchuk@parallels.com>
3#                         Andrey Zelenchuk <azelenchuk@plesk.com>
4#                         Brian Lalor
5#                         Dean Jackson
6#                         Erik Hetzner
7#                         Etienne Millon <me@emillon.org>
8#                         Joey Hess
9#                         Kaashif Hymabaccus <kaashif@kaashif.co.uk>
10#                         Lindsey Smith <lindsey.smith@gmail.com>
11#                         Léo Gaspard <leo@gaspard.io>
12#                         Marcel Ackermann
13#                         Martin 'Joey' Schulze
14#                         Matej Cepl
15#                         Profpatsch <mail@profpatsch.de>
16#                         Raphaël Droz <raphael.droz+floss@gmail.com>
17#                         W. Trevor King <wking@tremily.us>
18#                         ryneeverett <ryneeverett@gmail.com>
19#
20# This file is part of rss2email.
21#
22# rss2email is free software: you can redistribute it and/or modify it under
23# the terms of the GNU General Public License as published by the Free Software
24# Foundation, either version 2 of the License, or (at your option) version 3 of
25# the License.
26#
27# rss2email is distributed in the hope that it will be useful, but WITHOUT ANY
28# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
29# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
30#
31# You should have received a copy of the GNU General Public License along with
32# rss2email.  If not, see <http://www.gnu.org/licenses/>.
33
34"""Define the ``Feed`` class for handling a list of feeds
35"""
36
37import codecs as _codecs
38import collections as _collections
39import os as _os
40import json as _json
41import pickle as _pickle
42import sys as _sys
43
44from . import LOG as _LOG
45from . import config as _config
46from . import error as _error
47from . import feed as _feed
48
49try:
50    import fcntl as _fcntl
51    UNIX = True
52except ImportError:
53    UNIX = False
54
55# Path to the filesystem root, '/' on POSIX.1 (IEEE Std 1003.1-2008).
56ROOT_PATH = _os.path.splitdrive(_sys.executable)[0] or _os.sep
57
58
59class Feeds (list):
60    """Utility class for rss2email activity.
61
62    >>> import codecs
63    >>> import os.path
64    >>> import json
65    >>> import tempfile
66    >>> from .feed import Feed
67
68    Setup a temporary directory to load.
69
70    >>> tmpdir = tempfile.TemporaryDirectory(prefix='rss2email-test-')
71    >>> configfile = os.path.join(tmpdir.name, 'rss2email.cfg')
72    >>> with open(configfile, 'w') as f:
73    ...     count = f.write('[DEFAULT]\\n')
74    ...     count = f.write('to = a@b.com\\n')
75    ...     count = f.write('[feed.f1]\\n')
76    ...     count = f.write('url = http://a.net/feed.atom\\n')
77    ...     count = f.write('to = x@y.net\\n')
78    ...     count = f.write('[feed.f2]\\n')
79    ...     count = f.write('url = http://b.com/rss.atom\\n')
80    >>> datafile = os.path.join(tmpdir.name, 'rss2email.json')
81    >>> with codecs.open(datafile, 'w', Feeds.datafile_encoding) as f:
82    ...     json.dump({
83    ...             'version': 1,
84    ...             'feeds': [
85    ...                 Feed(name='f1').get_state(),
86    ...                 Feed(name='f2').get_state(),
87    ...                 ],
88    ...             }, f)
89
90    >>> feeds = Feeds(configfiles=[configfile,], datafile=datafile)
91    >>> feeds.load()
92    >>> for feed in feeds:
93    ...     print(feed)
94    f1 (http://a.net/feed.atom -> x@y.net)
95    f2 (http://b.com/rss.atom -> a@b.com)
96
97    You can index feeds by array index or by feed name.
98
99    >>> feeds[0]
100    <Feed f1 (http://a.net/feed.atom -> x@y.net)>
101    >>> feeds[-1]
102    <Feed f2 (http://b.com/rss.atom -> a@b.com)>
103    >>> feeds['f1']
104    <Feed f1 (http://a.net/feed.atom -> x@y.net)>
105    >>> feeds['missing']
106    Traceback (most recent call last):
107      ...
108    IndexError: missing
109
110    Tweak the feed configuration and save.
111
112    >>> feeds[0].to = None
113    >>> feeds.save()
114    >>> print(open(configfile, 'r').read().rstrip('\\n'))
115    ... # doctest: +REPORT_UDIFF, +ELLIPSIS
116    [DEFAULT]
117    from = user@rss2email.invalid
118    ...
119    verbose = warning
120    <BLANKLINE>
121    [feed.f1]
122    url = http://a.net/feed.atom
123    <BLANKLINE>
124    [feed.f2]
125    url = http://b.com/rss.atom
126
127    Cleanup the temporary directory.
128
129    >>> tmpdir.cleanup()
130    """
131    datafile_version = 2
132    datafile_encoding = 'utf-8'
133
134    def __init__(self, configfiles=None, datafile_path=None, config=None):
135        super(Feeds, self).__init__()
136        if configfiles is None:
137            configfiles = self._get_configfiles()
138        self.configfiles = configfiles
139        if datafile_path is None:
140            datafile_path = self._get_datafile_path()
141        self.datafile_path = _os.path.realpath(datafile_path)
142        if config is None:
143            config = _config.CONFIG
144        self.config = config
145        self.datafile = None
146
147    def __getitem__(self, key):
148        for feed in self:
149            if feed.name == key:
150                return feed
151        try:
152            index = int(key)
153        except ValueError as e:
154            raise IndexError(key) from e
155        return super(Feeds, self).__getitem__(index)
156
157    def __append__(self, feed):
158        feed.load_from_config(self.config)
159        feed = super(Feeds, self).append(feed)
160
161    def __pop__(self, index=-1):
162        feed = super(Feeds, self).pop(index=index)
163        if feed.section in self.config:
164            self.config.pop(feed.section)
165        return feed
166
167    def index(self, index):
168        if isinstance(index, int):
169            try:
170                return self[index]
171            except IndexError as e:
172                raise _error.FeedIndexError(index=index, feeds=self) from e
173        elif isinstance(index, str):
174            try:
175                index = int(index)
176            except ValueError:
177                pass
178            else:
179                return self.index(index)
180            for feed in self:
181                if feed.name == index:
182                    return feed
183        try:
184            super(Feeds, self).index(index)
185        except (IndexError, ValueError) as e:
186            raise _error.FeedIndexError(index=index, feeds=self) from e
187
188    def remove(self, feed):
189        super(Feeds, self).remove(feed)
190        if feed.section in self.config:
191            self.config.pop(feed.section)
192
193    def clear(self):
194        while self:
195            self.pop(0)
196
197    def _get_configfiles(self):
198        """Get configuration file paths
199
200        Following the XDG Base Directory Specification.
201        """
202        config_home = _os.environ.get(
203            'XDG_CONFIG_HOME',
204            _os.path.expanduser(_os.path.join('~', '.config')))
205        config_dirs = [config_home]
206        config_dirs.extend(
207            _os.environ.get(
208                'XDG_CONFIG_DIRS',
209                _os.path.join(ROOT_PATH, 'etc', 'xdg'),
210                ).split(':'))
211        # reverse because ConfigParser wants most significant last
212        return list(reversed(
213                [_os.path.join(config_dir, 'rss2email.cfg')
214                 for config_dir in config_dirs]))
215
216    def _get_datafile_path(self):
217        """Get the data file path
218
219        Following the XDG Base Directory Specification.
220        """
221        data_home = _os.environ.get(
222            'XDG_DATA_HOME',
223            _os.path.expanduser(_os.path.join('~', '.local', 'share')))
224        return _os.path.join(data_home, 'rss2email.json')
225
226    def load(self, require=False):
227        _LOG.debug('load feed configuration from {}'.format(self.configfiles))
228        if self.configfiles:
229            read_configfiles = self.config.read(self.configfiles)
230        else:
231            read_configfiles = []
232        _LOG.debug('loaded configuration from {}'.format(read_configfiles))
233        self._load_feeds(require=require)
234
235    def _load_feeds(self, require):
236        _LOG.debug('load feed data from {}'.format(self.datafile_path))
237        if not _os.path.exists(self.datafile_path):
238            if require:
239                raise _error.NoDataFile(feeds=self)
240            _LOG.info('feed data file not found at {}'.format(self.datafile_path))
241            _LOG.debug('creating an empty data file')
242            dirname = _os.path.dirname(self.datafile_path)
243            if dirname and not _os.path.isdir(dirname):
244                _os.makedirs(dirname, mode=0o700, exist_ok=True)
245            with _codecs.open(self.datafile_path, 'w', self.datafile_encoding) as f:
246                self._save_feed_states(feeds=[], stream=f)
247        try:
248            self.datafile = _codecs.open(
249                self.datafile_path, 'r', self.datafile_encoding)
250        except IOError as e:
251            raise _error.DataFileError(feeds=self) from e
252
253        if UNIX:
254            _fcntl.lockf(self.datafile, _fcntl.LOCK_SH)
255
256        self.clear()
257
258        level = _LOG.level
259        handlers = list(_LOG.handlers)
260        feeds = []
261        try:
262            data = _json.load(self.datafile)
263        except ValueError as e:
264            _LOG.info('could not load data file using JSON')
265            data = self._load_pickled_data(self.datafile)
266        version = data.get('version', None)
267        if version != self.datafile_version:
268            data = self._upgrade_state_data(data)
269        for state in data['feeds']:
270            feed = _feed.Feed(name='dummy-name')
271            feed.set_state(state)
272            if 'name' not in state:
273                raise _error.DataFileError(
274                    feeds=self,
275                    message='missing feed name in datafile {}'.format(
276                        self.datafile_path))
277            feeds.append(feed)
278        _LOG.setLevel(level)
279        _LOG.handlers = handlers
280        self.extend(feeds)
281
282        for feed in self:
283            feed.load_from_config(self.config)
284
285        feed_names = set(feed.name for feed in self)
286        order = _collections.defaultdict(lambda: (1e3, ''))
287        for i,section in enumerate(self.config.sections()):
288            if section.startswith('feed.'):
289                name = section[len('feed.'):]
290                order[name] = (i, name)
291                if name not in feed_names:
292                    _LOG.debug(
293                        ('feed {} not found in feed file, '
294                         'initializing from config').format(name))
295                    self.append(_feed.Feed(name=name, config=self.config))
296                    feed_names.add(name)
297        def key(feed):
298            return order[feed.name]
299        self.sort(key=key)
300
301    def _load_pickled_data(self, stream):
302        _LOG.info('try and load data file using Pickle')
303        with open(self.datafile_path, 'rb') as f:
304            feeds = list(feed.get_state() for feed in _pickle.load(f))
305        return {
306            'version': self.datafile_version,
307            'feeds': feeds,
308            }
309
310    def _upgrade_state_data(self, data):
311        version = data.get('version', 'unknown')
312        if version == 1:
313            for feed in data['feeds']:
314                seen = feed['seen']
315                for guid,id_ in seen.items():
316                    seen[guid] = {'id': id_}
317            return data
318        raise NotImplementedError(
319            'cannot convert data file from version {} to {}'.format(
320                version, self.datafile_version))
321
322    def save(self):
323        dst_config_file = _os.path.realpath(self.configfiles[-1])
324        _LOG.debug('save feed configuration to {}'.format(dst_config_file))
325        for feed in self:
326            feed.save_to_config()
327        dirname = _os.path.dirname(dst_config_file)
328        if dirname and not _os.path.isdir(dirname):
329            _os.makedirs(dirname, mode=0o700, exist_ok=True)
330        tmpfile = dst_config_file + '.tmp'
331        with open(tmpfile, 'w') as f:
332            self.config.write(f)
333            f.flush()
334            _os.fsync(f.fileno())
335        _os.replace(tmpfile, dst_config_file)
336        self._save_feeds()
337
338    def _save_feeds(self):
339        _LOG.debug('save feed data to {}'.format(self.datafile_path))
340        dirname = _os.path.dirname(self.datafile_path)
341        if dirname and not _os.path.isdir(dirname):
342            _os.makedirs(dirname, mode=0o700, exist_ok=True)
343        tmpfile = self.datafile_path + '.tmp'
344        with _codecs.open(tmpfile, 'w', self.datafile_encoding) as f:
345            self._save_feed_states(feeds=self, stream=f)
346            f.flush()
347            _os.fsync(f.fileno())
348        if UNIX:
349            # Replace the file, then release the lock by closing the old one.
350            _os.replace(tmpfile, self.datafile_path)
351            if self.datafile is not None:
352                self.datafile.close()  # release the lock
353                self.datafile = None
354        else:
355            # On Windows we cannot replace the file while it is opened. And we have no lock.
356            if self.datafile is not None:
357                self.datafile.close()
358                self.datafile = None
359            _os.replace(tmpfile, self.datafile_path)
360
361    def _save_feed_states(self, feeds, stream):
362        _json.dump(
363            {'version': self.datafile_version,
364             'feeds': list(feed.get_state() for feed in feeds),
365             },
366            stream,
367            indent=2,
368            separators=(',', ': '),
369            )
370        stream.write('\n')
371
372    def new_feed(self, name=None, prefix='feed-', **kwargs):
373        """Return a new feed, possibly auto-generating a name.
374
375        >>> feeds = Feeds()
376        >>> print(feeds.new_feed(name='my-feed'))
377        my-feed (None -> a@b.com)
378        >>> print(feeds.new_feed())
379        feed-0 (None -> a@b.com)
380        >>> print(feeds.new_feed())
381        feed-1 (None -> a@b.com)
382        >>> print(feeds.new_feed(name='feed-1'))
383        Traceback (most recent call last):
384          ...
385        rss2email.error.DuplicateFeedName: duplicate feed name 'feed-1'
386        """
387        feed_names = [feed.name for feed in self]
388        if name is None:
389            i = 0
390            while True:
391                name = '{}{}'.format(prefix, i)
392                if name not in feed_names:
393                    break
394                i += 1
395        elif name in feed_names:
396            feed = self[name]
397            raise _error.DuplicateFeedName(name=feed.name, feed=feed)
398        feed = _feed.Feed(name=name, **kwargs)
399        self.append(feed)
400        return feed
401