1import re
2
3from loguru import logger
4
5from flexget import plugin
6from flexget.entry import Entry
7from flexget.event import event
8
9logger = logger.bind(name='twitterfeed')
10
11# Size of the chunks when fetching a timeline
12CHUNK_SIZE = 200
13
14# Maximum number of tweets to fetch no matter how (if there is no
15# since_id for example or a too old since_id)
16MAX_TWEETS = 1000
17
18
19class TwitterFeed:
20    """Parses a twitter feed
21
22    Example::
23
24      twitterfeed:
25        account: <account>
26        consumer_key: <consumer_key>
27        consumer_secret: <consumer_secret>
28        access_token_key: <access_token_key>
29        access_token_secret: <access_token_secret>
30
31    By default, the 50 last tweets are fetched corresponding to the option:
32      all_entries: yes
33
34    To change that default number:
35      tweets: 75
36
37    Beware that Twitter only allows 300 requests during a 15 minutes
38    window.
39
40    If you want to process only new tweets:
41      all_entries: no
42
43    That option's behaviour is changed if the corresponding task's
44    configuration has been changed. In that case, new tweets are
45    fetched and if there are no more than `tweets`, older ones are
46    fetched to have `tweets` of them in total.
47    """
48
49    schema = {
50        'type': 'object',
51        'properties': {
52            'account': {'type': 'string'},
53            'consumer_key': {'type': 'string'},
54            'consumer_secret': {'type': 'string'},
55            'access_token_key': {'type': 'string'},
56            'access_token_secret': {'type': 'string'},
57            'all_entries': {'type': 'boolean', 'default': True},
58            'tweets': {'type': 'number', 'default': 50},
59        },
60        'required': [
61            'account',
62            'consumer_key',
63            'consumer_secret',
64            'access_token_secret',
65            'access_token_key',
66        ],
67        'additionalProperties': False,
68    }
69
70    def on_task_start(self, task, config):
71        try:
72            import twitter  # noqa
73        except ImportError:
74            raise plugin.PluginError('twitter module required', logger=logger)
75
76    def on_task_input(self, task, config):
77        import twitter
78
79        account = config['account']
80        logger.debug('Looking at twitter account `{}`', account)
81
82        try:
83            self.api = twitter.Api(
84                consumer_key=config['consumer_key'],
85                consumer_secret=config['consumer_secret'],
86                access_token_key=config['access_token_key'],
87                access_token_secret=config['access_token_secret'],
88            )
89        except twitter.TwitterError as ex:
90            raise plugin.PluginError(
91                'Unable to authenticate to twitter for task %s: %s' % (task.name, ex)
92            )
93
94        if config['all_entries']:
95            logger.debug(
96                'Fetching {} last tweets from {} timeline', config['tweets'], config['account']
97            )
98            tweets = self.get_tweets(account, number=config['tweets'])
99        else:
100            # Fetching from where we left off last time
101            since_id = task.simple_persistence.get('since_id', None)
102            if since_id:
103                logger.debug(
104                    'Fetching from tweet id {} from {} timeline', since_id, config['account']
105                )
106                kwargs = {'since_id': since_id}
107            else:
108                logger.debug('No since_id, fetching last {} tweets', config['tweets'])
109                kwargs = {'number': config['tweets']}
110
111            tweets = self.get_tweets(account, **kwargs)
112            if task.config_modified and len(tweets) < config['tweets']:
113                logger.debug(
114                    'Configuration modified; fetching at least {} tweets', config['tweets']
115                )
116                max_id = tweets[-1].id if tweets else None
117                remaining_tweets = config['tweets'] - len(tweets)
118                tweets = tweets + self.get_tweets(account, max_id=max_id, number=remaining_tweets)
119            if tweets:
120                last_tweet = tweets[0]
121                logger.debug('New last tweet id: {}', last_tweet.id)
122                task.simple_persistence['since_id'] = last_tweet.id
123
124        logger.debug('{} tweets fetched', len(tweets))
125        for t in tweets:
126            logger.debug('id:{}', t.id)
127
128        return [self.entry_from_tweet(e) for e in tweets]
129
130    def get_tweets(self, account, number=MAX_TWEETS, since_id=None, max_id=None):
131        """Fetch tweets from twitter account `account`."""
132        import twitter
133
134        all_tweets = []
135        while number > 0:
136            try:
137                tweets = self.api.GetUserTimeline(
138                    screen_name=account,
139                    include_rts=False,
140                    exclude_replies=True,
141                    count=min(number, CHUNK_SIZE),
142                    since_id=since_id,
143                    max_id=max_id,
144                )
145            except twitter.TwitterError as e:
146                raise plugin.PluginError('Unable to fetch timeline %s for %s' % (account, e))
147
148            if not tweets:
149                break
150
151            all_tweets += tweets
152            number -= len(tweets)
153            max_id = tweets[-1].id - 1
154
155        return all_tweets
156
157    def entry_from_tweet(self, tweet):
158        new_entry = Entry()
159        new_entry['title'] = tweet.text
160        urls = re.findall(r'(https?://\S+)', tweet.text)
161        new_entry['urls'] = urls
162        if urls:
163            new_entry['url'] = urls[0]
164        return new_entry
165
166
167@event('plugin.register')
168def register_plugin():
169    plugin.register(TwitterFeed, 'twitterfeed', api_ver=2)
170