1#!/usr/bin/env python 2# -*- coding: utf-8 -*- 3 4# Copyright(C) 2012 Romain Bignon 5# 6# This file is part of weboob. 7# 8# weboob is free software: you can redistribute it and/or modify 9# it under the terms of the GNU Lesser General Public License as published by 10# the Free Software Foundation, either version 3 of the License, or 11# (at your option) any later version. 12# 13# weboob is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU Lesser General Public License for more details. 17# 18# You should have received a copy of the GNU Lesser General Public License 19# along with weboob. If not, see <http://www.gnu.org/licenses/>. 20 21from __future__ import print_function 22 23import itertools 24import logging 25import os 26import re 27import sys 28import urllib 29import urlparse 30from datetime import datetime, timedelta 31from math import log 32from random import choice, randint 33from threading import Event, Thread 34 35from dateutil.parser import parse as parse_date 36from irc.bot import SingleServerIRCBot 37 38from weboob.browser import Browser 39from weboob.browser.exceptions import HTTPNotFound 40from weboob.browser.pages import HTMLPage 41from weboob.core import Weboob 42from weboob.exceptions import BrowserHTTPError, BrowserUnavailable 43from weboob.tools.application.base import ApplicationStorage 44from weboob.tools.misc import get_backtrace, to_unicode 45from weboob.tools.storage import StandardStorage 46 47IRC_CHANNELS = os.getenv('BOOBOT_CHANNELS', '#weboob').split(',') 48IRC_NICKNAME = os.getenv('BOOBOT_NICKNAME', 'boobot') 49IRC_SERVER = os.getenv('BOOBOT_SERVER', 'dickson.freenode.net') 50IRC_IGNORE = [re.compile(i) for i in os.getenv('BOOBOT_IGNORE', '!~?irker@').split(',')] 51STORAGE_FILE = os.getenv('BOOBOT_STORAGE', 'boobot.storage') 52 53 54def fixurl(url): 55 url = to_unicode(url) 56 57 # remove javascript crap 58 url = url.replace('/#!/', '/') 59 60 # parse it 61 parsed = urlparse.urlsplit(url) 62 63 # divide the netloc further 64 userpass, at, hostport = parsed.netloc.rpartition('@') 65 user, colon1, pass_ = userpass.partition(':') 66 host, colon2, port = hostport.partition(':') 67 68 # encode each component 69 scheme = parsed.scheme.encode('utf8') 70 user = urllib.quote(user.encode('utf8')) 71 colon1 = colon1.encode('utf8') 72 pass_ = urllib.quote(pass_.encode('utf8')) 73 at = at.encode('utf8') 74 host = host.encode('idna') 75 colon2 = colon2.encode('utf8') 76 port = port.encode('utf8') 77 path = '/'.join(pce.encode('utf8') for pce in parsed.path.split('/')) 78 # while valid, it is most likely an error 79 path = path.replace('//', '/') 80 query = parsed.query.encode('utf8') 81 fragment = parsed.fragment.encode('utf8') 82 83 # put it back together 84 netloc = ''.join((user, colon1, pass_, at, host, colon2, port)) 85 return urlparse.urlunsplit((scheme, netloc, path, query, fragment)) 86 87 88class BoobotBrowser(Browser): 89 TIMEOUT = 3.0 90 91 def urlinfo(self, url, maxback=2): 92 if urlparse.urlsplit(url).netloc == 'mobile.twitter.com': 93 url = url.replace('mobile.twitter.com', 'twitter.com', 1) 94 try: 95 r = self.open(url, method='HEAD') 96 body = False 97 except HTTPNotFound as e: 98 if maxback and not url[-1].isalnum(): 99 return self.urlinfo(url[:-1], maxback-1) 100 raise e 101 except BrowserHTTPError as e: 102 if e.response.status_code in (501, 405): 103 r = self.open(url) 104 body = True 105 else: 106 raise e 107 content_type = r.headers.get('Content-Type') 108 try: 109 size = int(r.headers.get('Content-Length')) 110 hsize = self.human_size(size) 111 except TypeError: 112 size = None 113 hsize = None 114 is_html = ('html' in content_type) if content_type else re.match(r'\.x?html?$', url) 115 title = None 116 if is_html: 117 if not body: 118 r = self.open(url) 119 # update size has we might not have it from headers 120 size = len(r.content) 121 hsize = self.human_size(size) 122 123 page = HTMLPage(self, r) 124 125 for title in page.doc.xpath('//head/title'): 126 title = to_unicode(title.text_content()).strip() 127 title = ' '.join(title.split()) 128 if urlparse.urlsplit(url).netloc.endswith('twitter.com'): 129 for title in page.doc.getroot().cssselect('.permalink-tweet .tweet-text'): 130 title = to_unicode(title.text_content()).strip() 131 title = ' '.join(title.splitlines()) 132 133 return content_type, hsize, title 134 135 def human_size(self, size): 136 if size: 137 units = ('B', 'KiB', 'MiB', 'GiB', 138 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB') 139 exponent = int(log(size, 1024)) 140 return "%.1f %s" % (float(size) / pow(1024, exponent), units[exponent]) 141 return '0 B' 142 143 144class Task(object): 145 def __init__(self, datetime, message, channel=None): 146 self.datetime = datetime 147 self.message = message 148 self.channel = channel 149 150 151class MyThread(Thread): 152 daemon = True 153 154 def __init__(self, bot): 155 Thread.__init__(self) 156 self.weboob = Weboob(storage=StandardStorage(STORAGE_FILE)) 157 self.weboob.load_backends() 158 self.bot = bot 159 self.bot.set_weboob(self.weboob) 160 161 def run(self): 162 for ev in self.bot.joined.values(): 163 ev.wait() 164 165 self.weboob.repeat(5, self.check_tasks) 166 self.weboob.repeat(300, self.check_board) 167 self.weboob.repeat(600, self.check_dlfp) 168 self.weboob.repeat(600, self.check_twitter) 169 170 self.weboob.loop() 171 172 def find_keywords(self, text): 173 for word in [ 174 'weboob', 'videoob', 'havesex', 'havedate', 'monboob', 'boobmsg', 175 'flatboob', 'boobill', 'pastoob', 'radioob', 'translaboob', 'traveloob', 'handjoob', 176 'boobathon', 'boobank', 'boobtracker', 'comparoob', 'wetboobs', 177 'webcontentedit', 'weboorrents', 'assnet', 178 'budget insight', 'budget-insight', 'budgetinsight', 'budgea']: 179 if word in text.lower(): 180 return word 181 return None 182 183 def check_twitter(self): 184 nb_tweets = 10 185 186 for backend in self.weboob.iter_backends(module='twitter'): 187 for thread in list(itertools.islice(backend.iter_resources(None, ['search', 'weboob']), 188 0, 189 nb_tweets)): 190 191 if not backend.storage.get('lastpurge'): 192 backend.storage.set('lastpurge', datetime.now() - timedelta(days=60)) 193 backend.storage.save() 194 195 if thread.id not in backend.storage.get('seen', default={}) and\ 196 thread.date > backend.storage.get('lastpurge'): 197 _item = thread.id.split('#') 198 url = 'https://twitter.com/%s/status/%s' % (_item[0], _item[1]) 199 for msg in self.bot.on_url(url): 200 self.bot.send_message('%s: %s' % (_item[0], url)) 201 self.bot.send_message(msg) 202 203 backend.set_message_read(backend.fill_thread(thread, ['root']).root) 204 205 def check_dlfp(self): 206 for msg in self.weboob.do('iter_unread_messages', backends=['dlfp']): 207 word = self.find_keywords(msg.content) 208 if word is not None: 209 url = msg.signature[msg.signature.find('https://linuxfr'):] 210 self.bot.send_message('[DLFP] %s talks about %s: %s' % ( 211 msg.sender, word, url)) 212 self.weboob[msg.backend].set_message_read(msg) 213 214 def check_board(self): 215 def iter_messages(backend): 216 return backend.browser.iter_new_board_messages() 217 218 for msg in self.weboob.do(iter_messages, backends=['dlfp']): 219 word = self.find_keywords(msg.message) 220 if word is not None and msg.login != 'moules': 221 message = msg.message.replace(word, '\002%s\002' % word) 222 self.bot.send_message('[DLFP] <%s> %s' % (msg.login, message)) 223 224 def check_tasks(self): 225 for task in list(self.bot.tasks_queue): 226 if task.datetime < datetime.now(): 227 self.bot.send_message(task.message, task.channel) 228 self.bot.tasks_queue.remove(task) 229 230 def stop(self): 231 self.weboob.want_stop() 232 self.weboob.deinit() 233 234 235class Boobot(SingleServerIRCBot): 236 def __init__(self, channels, nickname, server, port=6667): 237 SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname) 238 # self.connection.add_global_handler('pubmsg', self.on_pubmsg) 239 self.connection.add_global_handler('join', self.on_join) 240 self.connection.add_global_handler('welcome', self.on_welcome) 241 self.connection.buffer_class.errors = 'replace' 242 243 self.mainchannel = channels[0] 244 self.joined = dict() 245 for channel in channels: 246 self.joined[channel] = Event() 247 self.weboob = None 248 self.storage = None 249 250 self.tasks_queue = [] 251 252 def set_weboob(self, weboob): 253 self.weboob = weboob 254 self.storage = ApplicationStorage('boobot', weboob.storage) 255 self.storage.load({}) 256 257 def on_welcome(self, c, event): 258 for channel in self.joined.keys(): 259 c.join(channel) 260 261 def on_join(self, c, event): 262 # irclib 5.0 compatibility 263 if callable(event.target): 264 channel = event.target() 265 else: 266 channel = event.target 267 self.joined[channel].set() 268 269 def send_message(self, msg, channel=None): 270 for m in msg.splitlines(): 271 msg = to_unicode(m).encode('utf-8')[:450].decode('utf-8') 272 self.connection.privmsg(to_unicode(channel or self.mainchannel), msg) 273 274 def on_pubmsg(self, c, event): 275 # irclib 5.0 compatibility 276 if callable(event.arguments): 277 text = ' '.join(event.arguments()) 278 channel = event.target() 279 nick = event.source() 280 else: 281 text = ' '.join(event.arguments) 282 channel = event.target 283 nick = event.source 284 for ignore in IRC_IGNORE: 285 if ignore.search(nick): 286 return 287 for m in re.findall('([\w\d_\-]+@\w+)', text): 288 for msg in self.on_boobid(m): 289 self.send_message(msg, channel) 290 for m in re.findall(u'(https?://[^\s\xa0+]+)', text): 291 for msg in self.on_url(m): 292 self.send_message(msg, channel) 293 294 m = re.match('^%(?P<cmd>\w+)(?P<args>.*)$', text) 295 if m and hasattr(self, 'cmd_%s' % m.groupdict()['cmd']): 296 getattr(self, 'cmd_%s' % m.groupdict()['cmd'])(nick, channel, m.groupdict()['args'].strip()) 297 298 def cmd_at(self, nick, channel, text): 299 try: 300 datetime, message = text.split(' ', 1) 301 except ValueError: 302 self.send_message('Syntax: %at [YYYY-MM-DDT]HH:MM[:SS] message', channel) 303 return 304 305 try: 306 datetime = parse_date(datetime) 307 except ValueError: 308 self.send_message('Unable to read date %r' % datetime) 309 return 310 311 self.tasks_queue.append(Task(datetime, message, channel)) 312 313 def cmd_addquote(self, nick, channel, text): 314 quotes = self.storage.get(channel, 'quotes', default=[]) 315 quotes.append({'author': nick, 'timestamp': datetime.now(), 'text': text}) 316 self.storage.set(channel, 'quotes', quotes) 317 self.storage.save() 318 self.send_message('Quote #%s added' % (len(quotes) - 1), channel) 319 320 def cmd_delquote(self, nick, channel, text): 321 quotes = self.storage.get(channel, 'quotes', default=[]) 322 323 try: 324 n = int(text) 325 except ValueError: 326 self.send_message("Quote #%s not found gros" % text, channel) 327 return 328 329 quotes.pop(n) 330 self.storage.set(channel, 'quotes', quotes) 331 self.storage.save() 332 self.send_message('Quote #%s removed' % n, channel) 333 334 def cmd_searchquote(self, nick, channel, text): 335 try: 336 pattern = re.compile(to_unicode(text), re.IGNORECASE | re.UNICODE) 337 except Exception as e: 338 self.send_message(str(e), channel) 339 return 340 341 quotes = [] 342 for quote in self.storage.get(channel, 'quotes', default=[]): 343 if pattern.search(to_unicode(quote['text'])): 344 quotes.append(quote) 345 346 try: 347 quote = choice(quotes) 348 except IndexError: 349 self.send_message('No match', channel) 350 else: 351 self.send_message('%s' % quote['text'], channel) 352 353 def cmd_getquote(self, nick, channel, text): 354 quotes = self.storage.get(channel, 'quotes', default=[]) 355 if len(quotes) == 0: 356 return 357 358 try: 359 n = int(text) 360 except ValueError: 361 n = randint(0, len(quotes)-1) 362 363 try: 364 quote = quotes[n] 365 except IndexError: 366 self.send_message('Unable to find quote #%s' % n, channel) 367 else: 368 self.send_message('[%s] %s' % (n, quote['text']), channel) 369 370 def on_boobid(self, boobid): 371 _id, backend_name = boobid.split('@', 1) 372 if backend_name in self.weboob.backend_instances: 373 backend = self.weboob.backend_instances[backend_name] 374 for cap in backend.iter_caps(): 375 func = 'obj_info_%s' % cap.__name__[3:].lower() 376 if hasattr(self, func): 377 try: 378 for msg in getattr(self, func)(backend, _id): 379 yield msg 380 except Exception as e: 381 print(get_backtrace()) 382 yield u'Oops: [%s] %s' % (type(e).__name__, e) 383 break 384 385 def on_url(self, url): 386 url = fixurl(url) 387 try: 388 content_type, hsize, title = BoobotBrowser().urlinfo(url) 389 if title: 390 yield u'URL: %s' % title 391 elif hsize: 392 yield u'URL (file): %s, %s' % (content_type, hsize) 393 else: 394 yield u'URL (file): %s' % content_type 395 except BrowserUnavailable as e: 396 yield u'URL (error): %s' % e 397 except Exception as e: 398 print(get_backtrace()) 399 yield u'Oops: [%s] %s' % (type(e).__name__, e) 400 401 def obj_info_video(self, backend, id): 402 v = backend.get_video(id) 403 if v: 404 yield u'Video: %s (%s)' % (v.title, v.duration) 405 406 def obj_info_housing(self, backend, id): 407 h = backend.get_housing(id) 408 if h: 409 yield u'Housing: %s (%sm² / %s%s)' % (h.title, h.area, h.cost, h.currency) 410 411 412def main(): 413 logging.basicConfig(level=logging.DEBUG) 414 bot = Boobot(IRC_CHANNELS, IRC_NICKNAME, IRC_SERVER) 415 416 thread = MyThread(bot) 417 thread.start() 418 419 try: 420 bot.start() 421 except KeyboardInterrupt: 422 print("Stopped.") 423 424 thread.stop() 425 426 427if __name__ == "__main__": 428 sys.exit(main()) 429