1###
2# Copyright (c) 2002-2004, Jeremiah Fincher
3# Copyright (c) 2008-2010, James McCoy
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are met:
8#
9#   * Redistributions of source code must retain the above copyright notice,
10#     this list of conditions, and the following disclaimer.
11#   * Redistributions in binary form must reproduce the above copyright notice,
12#     this list of conditions, and the following disclaimer in the
13#     documentation and/or other materials provided with the distribution.
14#   * Neither the name of the author of this software nor the name of
15#     contributors to this software may be used to endorse or promote products
16#     derived from this software without specific prior written consent.
17#
18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28# POSSIBILITY OF SUCH DAMAGE.
29###
30
31import re
32import sys
33import json
34
35import supybot.conf as conf
36import supybot.utils as utils
37import supybot.world as world
38from supybot.commands import *
39import supybot.utils.minisix as minisix
40import supybot.ircmsgs as ircmsgs
41import supybot.ircutils as ircutils
42import supybot.callbacks as callbacks
43from supybot.i18n import PluginInternationalization, internationalizeDocstring
44_ = PluginInternationalization('Google')
45
46class Google(callbacks.PluginRegexp):
47    """This is a simple plugin to provide access to the Google services we
48    all know and love from our favorite IRC bot."""
49    threaded = True
50    callBefore = ['Web']
51    regexps = ['googleSnarfer']
52
53    _colorGoogles = {}
54    def _getColorGoogle(self, m):
55        s = m.group(1)
56        ret = self._colorGoogles.get(s)
57        if not ret:
58            L = list(s)
59            L[0] = ircutils.mircColor(L[0], 'blue')[:-1]
60            L[1] = ircutils.mircColor(L[1], 'red')[:-1]
61            L[2] = ircutils.mircColor(L[2], 'yellow')[:-1]
62            L[3] = ircutils.mircColor(L[3], 'blue')[:-1]
63            L[4] = ircutils.mircColor(L[4], 'green')[:-1]
64            L[5] = ircutils.mircColor(L[5], 'red')
65            ret = ''.join(L)
66            self._colorGoogles[s] = ret
67        return ircutils.bold(ret)
68
69    _googleRe = re.compile(r'\b(google)\b', re.I)
70    def outFilter(self, irc, msg):
71        if msg.command == 'PRIVMSG' and \
72           self.registryValue('colorfulFilter', msg.args[0]):
73            s = msg.args[1]
74            s = re.sub(self._googleRe, self._getColorGoogle, s)
75            msg = ircmsgs.privmsg(msg.args[0], s, msg=msg)
76        return msg
77
78    _decode_re = re.compile(r'<div class="\w+"><a href="/url\?q=(?P<url>[^"]+)&[^"]+"><div class="(\w| )+">(?P<title>.*?)</div><div class="(\w| )+">(?P<breadcrumbs>.*?)</div></a></div>(?P<content><div class="(\w| )+">.*?</div></div>)', re.DOTALL | re.MULTILINE)
79    @classmethod
80    def decode(cls, text):
81        matches = cls._decode_re.finditer(text)
82        results = []
83        for match in matches:
84            r = match.groupdict()
85            r['url'] = utils.web.urlunquote(utils.web.htmlToText(r['url'].split('&amp;')[0]))
86            results.append(r)
87        return results
88
89
90    _gsearchUrl = 'https://www.google.com/search'
91    def search(self, query, channel, options={}):
92        """search("search phrase", options={})
93
94        Valid options are:
95            smallsearch - True/False (Default: False)
96            filter - {active,moderate,off} (Default: "moderate")
97            language - Restrict search to documents in the given language
98                       (Default: "lang_en")
99        """
100        self.log.warning('The Google plugin search is deprecated since '
101                'Google closed their public API and will be removed in a '
102                'future release. Please consider switching to an other '
103                'plugin for your searches, like '
104                '<https://github.com/Hoaas/Supybot-plugins/tree/master/DuckDuckGo>, '
105                '<https://github.com/joulez/GoogleCSE>, or '
106                '<https://github.com/GLolol/SupyPlugins/tree/master/DDG>.')
107        ref = self.registryValue('referer')
108        if not ref:
109            ref = 'http://%s/%s' % (dynamic.irc.server,
110                                    dynamic.irc.nick)
111        headers = dict(utils.web.defaultHeaders)
112        headers['Referer'] = ref
113        opts = {'q': query, 'gbv': '2'}
114        for (k, v) in options.items():
115            if k == 'smallsearch':
116                if v:
117                    opts['rsz'] = 'small'
118                else:
119                    opts['rsz'] = 'large'
120            elif k == 'filter':
121                opts['safe'] = v
122            elif k == 'language':
123                opts['hl'] = v
124        defLang = self.registryValue('defaultLanguage', channel)
125        if 'hl' not in opts and defLang:
126            opts['hl'] = defLang.strip('lang_')
127        if 'safe' not in opts:
128            opts['safe'] = self.registryValue('searchFilter', dynamic.channel)
129        if 'rsz' not in opts:
130            opts['rsz'] = 'large'
131
132        text = utils.web.getUrl('%s?%s' % (self._gsearchUrl,
133                                           utils.web.urlencode(opts)),
134                                headers=headers).decode('utf8')
135        return text
136
137    def formatData(self, data, bold=True, max=0, onetoone=False):
138        data = self.decode(data)
139        results = []
140        if max:
141            data = data[:max]
142        for result in data:
143            title = utils.web.htmlToText(result['title']\
144                                         .encode('utf-8'))
145            url = result['url']
146            if minisix.PY2:
147                url = url.encode('utf-8')
148            if title:
149                if bold:
150                    title = ircutils.bold(title)
151                results.append(format('%s: %u', title, url))
152            else:
153                results.append(url)
154        if minisix.PY2:
155            repl = lambda x:x if isinstance(x, unicode) else unicode(x, 'utf8')
156            results = list(map(repl, results))
157        if not results:
158            return [_('No matches found.')]
159        elif onetoone:
160            return results
161        else:
162            return [minisix.u('; ').join(results)]
163
164    @internationalizeDocstring
165    def lucky(self, irc, msg, args, opts, text):
166        """[--snippet] <search>
167
168        Does a google search, but only returns the first result.
169        If option --snippet is given, returns also the page text snippet.
170        """
171        opts = dict(opts)
172        data = self.search(text, msg.args[0], {'smallsearch': True})
173        data = self.decode(data)
174        if data:
175            url = data[0]['url']
176            if 'snippet' in opts:
177                snippet = data[0]['content']
178                snippet = " | " + utils.web.htmlToText(snippet, tagReplace='')
179            else:
180                snippet = ""
181            result = url + snippet
182            irc.reply(result)
183        else:
184            irc.reply(_('Google found nothing.'))
185    lucky = wrap(lucky, [getopts({'snippet':'',}), 'text'])
186
187    @internationalizeDocstring
188    def google(self, irc, msg, args, optlist, text):
189        """<search> [--{filter,language} <value>]
190
191        Searches google.com for the given string.  As many results as can fit
192        are included.  --language accepts a language abbreviation; --filter
193        accepts a filtering level ('active', 'moderate', 'off').
194        """
195        if 'language' in optlist and optlist['language'].lower() not in \
196           conf.supybot.plugins.Google.safesearch.validStrings:
197            irc.errorInvalid('language')
198        data = self.search(text, msg.args[0], dict(optlist))
199        bold = self.registryValue('bold', msg.args[0])
200        max = self.registryValue('maximumResults', msg.args[0])
201        # We don't use supybot.reply.oneToOne here, because you generally
202        # do not want @google to echo ~20 lines of results, even if you
203        # have reply.oneToOne enabled.
204        onetoone = self.registryValue('oneToOne', msg.args[0])
205        for result in self.formatData(data,
206                                  bold=bold, max=max, onetoone=onetoone):
207            irc.reply(result)
208    google = wrap(google, [getopts({'language':'something',
209                                    'filter':''}),
210                           'text'])
211
212    @internationalizeDocstring
213    def cache(self, irc, msg, args, url):
214        """<url>
215
216        Returns a link to the cached version of <url> if it is available.
217        """
218        data = self.search(url, msg.args[0], {'smallsearch': True})
219        if data:
220            m = data[0]
221            if m['cacheUrl']:
222                url = m['cacheUrl'].encode('utf-8')
223                irc.reply(url)
224                return
225        irc.error(_('Google seems to have no cache for that site.'))
226    cache = wrap(cache, ['url'])
227
228    _fight_re = re.compile(r'id="resultStats"[^>]*>(?P<stats>[^<]*)')
229    @internationalizeDocstring
230    def fight(self, irc, msg, args):
231        """<search string> <search string> [<search string> ...]
232
233        Returns the results of each search, in order, from greatest number
234        of results to least.
235        """
236        channel = msg.args[0]
237        results = []
238        for arg in args:
239            text = self.search(arg, channel, {'smallsearch': True})
240            i = text.find('id="resultStats"')
241            stats = utils.web.htmlToText(self._fight_re.search(text).group('stats'))
242            if stats == '':
243                results.append((0, args))
244                continue
245            count = ''.join(filter('0123456789'.__contains__, stats))
246            results.append((int(count), arg))
247        results.sort()
248        results.reverse()
249        if self.registryValue('bold', msg.args[0]):
250            bold = ircutils.bold
251        else:
252            bold = repr
253        s = ', '.join([format('%s: %i', bold(s), i) for (i, s) in results])
254        irc.reply(s)
255
256
257    def _translate(self, sourceLang, targetLang, text):
258        headers = dict(utils.web.defaultHeaders)
259        headers['User-Agent'] = ('Mozilla/5.0 (X11; U; Linux i686) '
260                                 'Gecko/20071127 Firefox/2.0.0.11')
261
262        sourceLang = utils.web.urlquote(sourceLang)
263        targetLang = utils.web.urlquote(targetLang)
264
265        text = utils.web.urlquote(text)
266
267        result = utils.web.getUrlFd('http://translate.googleapis.com/translate_a/single'
268                                    '?client=gtx&dt=t&sl=%s&tl=%s&q='
269                                    '%s' % (sourceLang, targetLang, text),
270                                    headers).read().decode('utf8')
271
272        while ',,' in result:
273            result = result.replace(',,', ',null,')
274        while '[,' in result:
275            result = result.replace('[,', '[')
276        data = json.loads(result)
277
278        try:
279            language = data[2]
280        except:
281            language = 'unknown'
282
283        if data[0]:
284            return (''.join(x[0] for x in data[0]), language)
285        else:
286            return (_('No translations found.'), language)
287
288    @internationalizeDocstring
289    def translate(self, irc, msg, args, sourceLang, targetLang, text):
290        """<source language> [to] <target language> <text>
291
292        Returns <text> translated from <source language> into <target
293        language>. <source language> and <target language> take language
294        codes (not language names), which are listed here:
295        https://cloud.google.com/translate/docs/languages
296        """
297        channel = msg.args[0]
298        (text, language) = self._translate(sourceLang, targetLang, text)
299        irc.reply(text, language)
300    translate = wrap(translate, ['something', 'to', 'something', 'text'])
301
302    def googleSnarfer(self, irc, msg, match):
303        r"^google\s+(.*)$"
304        if not self.registryValue('searchSnarfer', msg.args[0]):
305            return
306        searchString = match.group(1)
307        data = self.search(searchString, msg.args[0], {'smallsearch': True})
308        if data['responseData']['results']:
309            url = data['responseData']['results'][0]['unescapedUrl']
310            irc.reply(url, prefixNick=False)
311    googleSnarfer = urlSnarfer(googleSnarfer)
312
313    def _googleUrl(self, s, channel):
314        s = utils.web.urlquote_plus(s)
315        url = r'http://%s/search?q=%s' % \
316                (self.registryValue('baseUrl', channel), s)
317        return url
318
319    _calcRe1 = re.compile(r'<span class="cwcot".*?>(.*?)</span>', re.I)
320    _calcRe2 = re.compile(r'<div class="vk_ans.*?>(.*?)</div>', re.I | re.S)
321    _calcRe3 = re.compile(r'<div class="side_div" id="rhs_div">.*?<input class="ucw_data".*?value="(.*?)"', re.I)
322    @internationalizeDocstring
323    def calc(self, irc, msg, args, expr):
324        """<expression>
325
326        Uses Google's calculator to calculate the value of <expression>.
327        """
328        channel = msg.args[0]
329        if not irc.isChannel(channel):
330            channel = None
331        url = self._googleUrl(expr, channel)
332        h = {"User-Agent":"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36"}
333        html = utils.web.getUrl(url, headers=h).decode('utf8')
334        match = self._calcRe1.search(html)
335        if not match:
336            match = self._calcRe2.search(html)
337            if not match:
338                match = self._calcRe3.search(html)
339                if not match:
340                    irc.reply("I could not find an output from Google Calc for: %s" % expr)
341                    return
342                else:
343                    s = match.group(1)
344            else:
345                s = match.group(1)
346        else:
347            s = match.group(1)
348        # do some cleanup of text
349        s = re.sub(r'<sup>(.*)</sup>&#8260;<sub>(.*)</sub>', r' \1/\2', s)
350        s = re.sub(r'<sup>(.*)</sup>', r'^\1', s)
351        s = utils.web.htmlToText(s)
352        irc.reply("%s = %s" % (expr, s))
353    calc = wrap(calc, ['text'])
354
355    _phoneRe = re.compile(r'Phonebook.*?<font size=-1>(.*?)<a href')
356    @internationalizeDocstring
357    def phonebook(self, irc, msg, args, phonenumber):
358        """<phone number>
359
360        Looks <phone number> up on Google.
361        """
362        channel = msg.args[0]
363        if not irc.isChannel(channel):
364            channel = None
365        url = self._googleUrl(phonenumber, channel)
366        html = utils.web.getUrl(url).decode('utf8')
367        m = self._phoneRe.search(html)
368        if m is not None:
369            s = m.group(1)
370            s = s.replace('<b>', '')
371            s = s.replace('</b>', '')
372            s = utils.web.htmlToText(s)
373            irc.reply(s)
374        else:
375            irc.reply(_('Google\'s phonebook didn\'t come up with anything.'))
376    phonebook = wrap(phonebook, ['text'])
377
378
379Class = Google
380
381
382# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79:
383