1import re 2 3from loguru import logger 4 5from flexget import plugin 6from flexget.entry import Entry 7from flexget.event import event 8 9logger = logger.bind(name='twitterfeed') 10 11# Size of the chunks when fetching a timeline 12CHUNK_SIZE = 200 13 14# Maximum number of tweets to fetch no matter how (if there is no 15# since_id for example or a too old since_id) 16MAX_TWEETS = 1000 17 18 19class TwitterFeed: 20 """Parses a twitter feed 21 22 Example:: 23 24 twitterfeed: 25 account: <account> 26 consumer_key: <consumer_key> 27 consumer_secret: <consumer_secret> 28 access_token_key: <access_token_key> 29 access_token_secret: <access_token_secret> 30 31 By default, the 50 last tweets are fetched corresponding to the option: 32 all_entries: yes 33 34 To change that default number: 35 tweets: 75 36 37 Beware that Twitter only allows 300 requests during a 15 minutes 38 window. 39 40 If you want to process only new tweets: 41 all_entries: no 42 43 That option's behaviour is changed if the corresponding task's 44 configuration has been changed. In that case, new tweets are 45 fetched and if there are no more than `tweets`, older ones are 46 fetched to have `tweets` of them in total. 47 """ 48 49 schema = { 50 'type': 'object', 51 'properties': { 52 'account': {'type': 'string'}, 53 'consumer_key': {'type': 'string'}, 54 'consumer_secret': {'type': 'string'}, 55 'access_token_key': {'type': 'string'}, 56 'access_token_secret': {'type': 'string'}, 57 'all_entries': {'type': 'boolean', 'default': True}, 58 'tweets': {'type': 'number', 'default': 50}, 59 }, 60 'required': [ 61 'account', 62 'consumer_key', 63 'consumer_secret', 64 'access_token_secret', 65 'access_token_key', 66 ], 67 'additionalProperties': False, 68 } 69 70 def on_task_start(self, task, config): 71 try: 72 import twitter # noqa 73 except ImportError: 74 raise plugin.PluginError('twitter module required', logger=logger) 75 76 def on_task_input(self, task, config): 77 import twitter 78 79 account = config['account'] 80 logger.debug('Looking at twitter account `{}`', account) 81 82 try: 83 self.api = twitter.Api( 84 consumer_key=config['consumer_key'], 85 consumer_secret=config['consumer_secret'], 86 access_token_key=config['access_token_key'], 87 access_token_secret=config['access_token_secret'], 88 ) 89 except twitter.TwitterError as ex: 90 raise plugin.PluginError( 91 'Unable to authenticate to twitter for task %s: %s' % (task.name, ex) 92 ) 93 94 if config['all_entries']: 95 logger.debug( 96 'Fetching {} last tweets from {} timeline', config['tweets'], config['account'] 97 ) 98 tweets = self.get_tweets(account, number=config['tweets']) 99 else: 100 # Fetching from where we left off last time 101 since_id = task.simple_persistence.get('since_id', None) 102 if since_id: 103 logger.debug( 104 'Fetching from tweet id {} from {} timeline', since_id, config['account'] 105 ) 106 kwargs = {'since_id': since_id} 107 else: 108 logger.debug('No since_id, fetching last {} tweets', config['tweets']) 109 kwargs = {'number': config['tweets']} 110 111 tweets = self.get_tweets(account, **kwargs) 112 if task.config_modified and len(tweets) < config['tweets']: 113 logger.debug( 114 'Configuration modified; fetching at least {} tweets', config['tweets'] 115 ) 116 max_id = tweets[-1].id if tweets else None 117 remaining_tweets = config['tweets'] - len(tweets) 118 tweets = tweets + self.get_tweets(account, max_id=max_id, number=remaining_tweets) 119 if tweets: 120 last_tweet = tweets[0] 121 logger.debug('New last tweet id: {}', last_tweet.id) 122 task.simple_persistence['since_id'] = last_tweet.id 123 124 logger.debug('{} tweets fetched', len(tweets)) 125 for t in tweets: 126 logger.debug('id:{}', t.id) 127 128 return [self.entry_from_tweet(e) for e in tweets] 129 130 def get_tweets(self, account, number=MAX_TWEETS, since_id=None, max_id=None): 131 """Fetch tweets from twitter account `account`.""" 132 import twitter 133 134 all_tweets = [] 135 while number > 0: 136 try: 137 tweets = self.api.GetUserTimeline( 138 screen_name=account, 139 include_rts=False, 140 exclude_replies=True, 141 count=min(number, CHUNK_SIZE), 142 since_id=since_id, 143 max_id=max_id, 144 ) 145 except twitter.TwitterError as e: 146 raise plugin.PluginError('Unable to fetch timeline %s for %s' % (account, e)) 147 148 if not tweets: 149 break 150 151 all_tweets += tweets 152 number -= len(tweets) 153 max_id = tweets[-1].id - 1 154 155 return all_tweets 156 157 def entry_from_tweet(self, tweet): 158 new_entry = Entry() 159 new_entry['title'] = tweet.text 160 urls = re.findall(r'(https?://\S+)', tweet.text) 161 new_entry['urls'] = urls 162 if urls: 163 new_entry['url'] = urls[0] 164 return new_entry 165 166 167@event('plugin.register') 168def register_plugin(): 169 plugin.register(TwitterFeed, 'twitterfeed', api_ver=2) 170