1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4# Copyright(C) 2012  Romain Bignon
5#
6# This file is part of weboob.
7#
8# weboob is free software: you can redistribute it and/or modify
9# it under the terms of the GNU Lesser General Public License as published by
10# the Free Software Foundation, either version 3 of the License, or
11# (at your option) any later version.
12#
13# weboob is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16# GNU Lesser General Public License for more details.
17#
18# You should have received a copy of the GNU Lesser General Public License
19# along with weboob. If not, see <http://www.gnu.org/licenses/>.
20
21from __future__ import print_function
22
23import itertools
24import logging
25import os
26import re
27import sys
28import urllib
29import urlparse
30from datetime import datetime, timedelta
31from math import log
32from random import choice, randint
33from threading import Event, Thread
34
35from dateutil.parser import parse as parse_date
36from irc.bot import SingleServerIRCBot
37
38from weboob.browser import Browser
39from weboob.browser.exceptions import HTTPNotFound
40from weboob.browser.pages import HTMLPage
41from weboob.core import Weboob
42from weboob.exceptions import BrowserHTTPError, BrowserUnavailable
43from weboob.tools.application.base import ApplicationStorage
44from weboob.tools.misc import get_backtrace, to_unicode
45from weboob.tools.storage import StandardStorage
46
47IRC_CHANNELS = os.getenv('BOOBOT_CHANNELS', '#weboob').split(',')
48IRC_NICKNAME = os.getenv('BOOBOT_NICKNAME', 'boobot')
49IRC_SERVER = os.getenv('BOOBOT_SERVER', 'dickson.freenode.net')
50IRC_IGNORE = [re.compile(i) for i in os.getenv('BOOBOT_IGNORE', '!~?irker@').split(',')]
51STORAGE_FILE = os.getenv('BOOBOT_STORAGE', 'boobot.storage')
52
53
54def fixurl(url):
55    url = to_unicode(url)
56
57    # remove javascript crap
58    url = url.replace('/#!/', '/')
59
60    # parse it
61    parsed = urlparse.urlsplit(url)
62
63    # divide the netloc further
64    userpass, at, hostport = parsed.netloc.rpartition('@')
65    user, colon1, pass_ = userpass.partition(':')
66    host, colon2, port = hostport.partition(':')
67
68    # encode each component
69    scheme = parsed.scheme.encode('utf8')
70    user = urllib.quote(user.encode('utf8'))
71    colon1 = colon1.encode('utf8')
72    pass_ = urllib.quote(pass_.encode('utf8'))
73    at = at.encode('utf8')
74    host = host.encode('idna')
75    colon2 = colon2.encode('utf8')
76    port = port.encode('utf8')
77    path = '/'.join(pce.encode('utf8') for pce in parsed.path.split('/'))
78    # while valid, it is most likely an error
79    path = path.replace('//', '/')
80    query = parsed.query.encode('utf8')
81    fragment = parsed.fragment.encode('utf8')
82
83    # put it back together
84    netloc = ''.join((user, colon1, pass_, at, host, colon2, port))
85    return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
86
87
88class BoobotBrowser(Browser):
89    TIMEOUT = 3.0
90
91    def urlinfo(self, url, maxback=2):
92        if urlparse.urlsplit(url).netloc == 'mobile.twitter.com':
93            url = url.replace('mobile.twitter.com', 'twitter.com', 1)
94        try:
95            r = self.open(url, method='HEAD')
96            body = False
97        except HTTPNotFound as e:
98            if maxback and not url[-1].isalnum():
99                return self.urlinfo(url[:-1], maxback-1)
100            raise e
101        except BrowserHTTPError as e:
102            if e.response.status_code in (501, 405):
103                r = self.open(url)
104                body = True
105            else:
106                raise e
107        content_type = r.headers.get('Content-Type')
108        try:
109            size = int(r.headers.get('Content-Length'))
110            hsize = self.human_size(size)
111        except TypeError:
112            size = None
113            hsize = None
114        is_html = ('html' in content_type) if content_type else re.match(r'\.x?html?$', url)
115        title = None
116        if is_html:
117            if not body:
118                r = self.open(url)
119            # update size has we might not have it from headers
120            size = len(r.content)
121            hsize = self.human_size(size)
122
123            page = HTMLPage(self, r)
124
125            for title in page.doc.xpath('//head/title'):
126                title = to_unicode(title.text_content()).strip()
127                title = ' '.join(title.split())
128            if urlparse.urlsplit(url).netloc.endswith('twitter.com'):
129                for title in page.doc.getroot().cssselect('.permalink-tweet .tweet-text'):
130                    title = to_unicode(title.text_content()).strip()
131                    title = ' '.join(title.splitlines())
132
133        return content_type, hsize, title
134
135    def human_size(self, size):
136        if size:
137            units = ('B', 'KiB', 'MiB', 'GiB',
138                     'TiB', 'PiB', 'EiB', 'ZiB', 'YiB')
139            exponent = int(log(size, 1024))
140            return "%.1f %s" % (float(size) / pow(1024, exponent), units[exponent])
141        return '0 B'
142
143
144class Task(object):
145    def __init__(self, datetime, message, channel=None):
146        self.datetime = datetime
147        self.message = message
148        self.channel = channel
149
150
151class MyThread(Thread):
152    daemon = True
153
154    def __init__(self, bot):
155        Thread.__init__(self)
156        self.weboob = Weboob(storage=StandardStorage(STORAGE_FILE))
157        self.weboob.load_backends()
158        self.bot = bot
159        self.bot.set_weboob(self.weboob)
160
161    def run(self):
162        for ev in self.bot.joined.values():
163            ev.wait()
164
165        self.weboob.repeat(5, self.check_tasks)
166        self.weboob.repeat(300, self.check_board)
167        self.weboob.repeat(600, self.check_dlfp)
168        self.weboob.repeat(600, self.check_twitter)
169
170        self.weboob.loop()
171
172    def find_keywords(self, text):
173        for word in [
174                     'weboob', 'videoob', 'havesex', 'havedate', 'monboob', 'boobmsg',
175                     'flatboob', 'boobill', 'pastoob', 'radioob', 'translaboob', 'traveloob', 'handjoob',
176                     'boobathon', 'boobank', 'boobtracker', 'comparoob', 'wetboobs',
177                     'webcontentedit', 'weboorrents', 'assnet',
178                     'budget insight', 'budget-insight', 'budgetinsight', 'budgea']:
179            if word in text.lower():
180                return word
181        return None
182
183    def check_twitter(self):
184        nb_tweets = 10
185
186        for backend in self.weboob.iter_backends(module='twitter'):
187            for thread in list(itertools.islice(backend.iter_resources(None, ['search', 'weboob']),
188                                                0,
189                                                nb_tweets)):
190
191                if not backend.storage.get('lastpurge'):
192                    backend.storage.set('lastpurge', datetime.now() - timedelta(days=60))
193                    backend.storage.save()
194
195                if thread.id not in backend.storage.get('seen', default={}) and\
196                   thread.date > backend.storage.get('lastpurge'):
197                    _item = thread.id.split('#')
198                    url = 'https://twitter.com/%s/status/%s' % (_item[0], _item[1])
199                    for msg in self.bot.on_url(url):
200                        self.bot.send_message('%s: %s' % (_item[0], url))
201                        self.bot.send_message(msg)
202
203                    backend.set_message_read(backend.fill_thread(thread, ['root']).root)
204
205    def check_dlfp(self):
206        for msg in self.weboob.do('iter_unread_messages', backends=['dlfp']):
207            word = self.find_keywords(msg.content)
208            if word is not None:
209                url = msg.signature[msg.signature.find('https://linuxfr'):]
210                self.bot.send_message('[DLFP] %s talks about %s: %s' % (
211                    msg.sender, word, url))
212            self.weboob[msg.backend].set_message_read(msg)
213
214    def check_board(self):
215        def iter_messages(backend):
216            return backend.browser.iter_new_board_messages()
217
218        for msg in self.weboob.do(iter_messages, backends=['dlfp']):
219            word = self.find_keywords(msg.message)
220            if word is not None and msg.login != 'moules':
221                message = msg.message.replace(word, '\002%s\002' % word)
222                self.bot.send_message('[DLFP] <%s> %s' % (msg.login, message))
223
224    def check_tasks(self):
225        for task in list(self.bot.tasks_queue):
226            if task.datetime < datetime.now():
227                self.bot.send_message(task.message, task.channel)
228                self.bot.tasks_queue.remove(task)
229
230    def stop(self):
231        self.weboob.want_stop()
232        self.weboob.deinit()
233
234
235class Boobot(SingleServerIRCBot):
236    def __init__(self, channels, nickname, server, port=6667):
237        SingleServerIRCBot.__init__(self, [(server, port)], nickname, nickname)
238        # self.connection.add_global_handler('pubmsg', self.on_pubmsg)
239        self.connection.add_global_handler('join', self.on_join)
240        self.connection.add_global_handler('welcome', self.on_welcome)
241        self.connection.buffer_class.errors = 'replace'
242
243        self.mainchannel = channels[0]
244        self.joined = dict()
245        for channel in channels:
246            self.joined[channel] = Event()
247        self.weboob = None
248        self.storage = None
249
250        self.tasks_queue = []
251
252    def set_weboob(self, weboob):
253        self.weboob = weboob
254        self.storage = ApplicationStorage('boobot', weboob.storage)
255        self.storage.load({})
256
257    def on_welcome(self, c, event):
258        for channel in self.joined.keys():
259            c.join(channel)
260
261    def on_join(self, c, event):
262        # irclib 5.0 compatibility
263        if callable(event.target):
264            channel = event.target()
265        else:
266            channel = event.target
267        self.joined[channel].set()
268
269    def send_message(self, msg, channel=None):
270        for m in msg.splitlines():
271            msg = to_unicode(m).encode('utf-8')[:450].decode('utf-8')
272            self.connection.privmsg(to_unicode(channel or self.mainchannel), msg)
273
274    def on_pubmsg(self, c, event):
275        # irclib 5.0 compatibility
276        if callable(event.arguments):
277            text = ' '.join(event.arguments())
278            channel = event.target()
279            nick = event.source()
280        else:
281            text = ' '.join(event.arguments)
282            channel = event.target
283            nick = event.source
284        for ignore in IRC_IGNORE:
285            if ignore.search(nick):
286                return
287        for m in re.findall('([\w\d_\-]+@\w+)', text):
288            for msg in self.on_boobid(m):
289                self.send_message(msg, channel)
290        for m in re.findall(u'(https?://[^\s\xa0+]+)', text):
291            for msg in self.on_url(m):
292                self.send_message(msg, channel)
293
294        m = re.match('^%(?P<cmd>\w+)(?P<args>.*)$', text)
295        if m and hasattr(self, 'cmd_%s' % m.groupdict()['cmd']):
296            getattr(self, 'cmd_%s' % m.groupdict()['cmd'])(nick, channel, m.groupdict()['args'].strip())
297
298    def cmd_at(self, nick, channel, text):
299        try:
300            datetime, message = text.split(' ', 1)
301        except ValueError:
302            self.send_message('Syntax: %at [YYYY-MM-DDT]HH:MM[:SS] message', channel)
303            return
304
305        try:
306            datetime = parse_date(datetime)
307        except ValueError:
308            self.send_message('Unable to read date %r' % datetime)
309            return
310
311        self.tasks_queue.append(Task(datetime, message, channel))
312
313    def cmd_addquote(self, nick, channel, text):
314        quotes = self.storage.get(channel, 'quotes', default=[])
315        quotes.append({'author': nick, 'timestamp': datetime.now(), 'text': text})
316        self.storage.set(channel, 'quotes', quotes)
317        self.storage.save()
318        self.send_message('Quote #%s added' % (len(quotes) - 1), channel)
319
320    def cmd_delquote(self, nick, channel, text):
321        quotes = self.storage.get(channel, 'quotes', default=[])
322
323        try:
324            n = int(text)
325        except ValueError:
326            self.send_message("Quote #%s not found gros" % text, channel)
327            return
328
329        quotes.pop(n)
330        self.storage.set(channel, 'quotes', quotes)
331        self.storage.save()
332        self.send_message('Quote #%s removed' % n, channel)
333
334    def cmd_searchquote(self, nick, channel, text):
335        try:
336            pattern = re.compile(to_unicode(text), re.IGNORECASE | re.UNICODE)
337        except Exception as e:
338            self.send_message(str(e), channel)
339            return
340
341        quotes = []
342        for quote in self.storage.get(channel, 'quotes', default=[]):
343            if pattern.search(to_unicode(quote['text'])):
344                quotes.append(quote)
345
346        try:
347            quote = choice(quotes)
348        except IndexError:
349            self.send_message('No match', channel)
350        else:
351            self.send_message('%s' % quote['text'], channel)
352
353    def cmd_getquote(self, nick, channel, text):
354        quotes = self.storage.get(channel, 'quotes', default=[])
355        if len(quotes) == 0:
356            return
357
358        try:
359            n = int(text)
360        except ValueError:
361            n = randint(0, len(quotes)-1)
362
363        try:
364            quote = quotes[n]
365        except IndexError:
366            self.send_message('Unable to find quote #%s' % n, channel)
367        else:
368            self.send_message('[%s] %s' % (n, quote['text']), channel)
369
370    def on_boobid(self, boobid):
371        _id, backend_name = boobid.split('@', 1)
372        if backend_name in self.weboob.backend_instances:
373            backend = self.weboob.backend_instances[backend_name]
374            for cap in backend.iter_caps():
375                func = 'obj_info_%s' % cap.__name__[3:].lower()
376                if hasattr(self, func):
377                    try:
378                        for msg in getattr(self, func)(backend, _id):
379                            yield msg
380                    except Exception as e:
381                        print(get_backtrace())
382                        yield u'Oops: [%s] %s' % (type(e).__name__, e)
383                    break
384
385    def on_url(self, url):
386        url = fixurl(url)
387        try:
388            content_type, hsize, title = BoobotBrowser().urlinfo(url)
389            if title:
390                yield u'URL: %s' % title
391            elif hsize:
392                yield u'URL (file): %s, %s' % (content_type, hsize)
393            else:
394                yield u'URL (file): %s' % content_type
395        except BrowserUnavailable as e:
396            yield u'URL (error): %s' % e
397        except Exception as e:
398            print(get_backtrace())
399            yield u'Oops: [%s] %s' % (type(e).__name__, e)
400
401    def obj_info_video(self, backend, id):
402        v = backend.get_video(id)
403        if v:
404            yield u'Video: %s (%s)' % (v.title, v.duration)
405
406    def obj_info_housing(self, backend, id):
407        h = backend.get_housing(id)
408        if h:
409            yield u'Housing: %s (%sm² / %s%s)' % (h.title, h.area, h.cost, h.currency)
410
411
412def main():
413    logging.basicConfig(level=logging.DEBUG)
414    bot = Boobot(IRC_CHANNELS, IRC_NICKNAME, IRC_SERVER)
415
416    thread = MyThread(bot)
417    thread.start()
418
419    try:
420        bot.start()
421    except KeyboardInterrupt:
422        print("Stopped.")
423
424    thread.stop()
425
426
427if __name__ == "__main__":
428    sys.exit(main())
429