1### 2# Copyright (c) 2002-2004, Jeremiah Fincher 3# Copyright (c) 2008-2010, James McCoy 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions are met: 8# 9# * Redistributions of source code must retain the above copyright notice, 10# this list of conditions, and the following disclaimer. 11# * Redistributions in binary form must reproduce the above copyright notice, 12# this list of conditions, and the following disclaimer in the 13# documentation and/or other materials provided with the distribution. 14# * Neither the name of the author of this software nor the name of 15# contributors to this software may be used to endorse or promote products 16# derived from this software without specific prior written consent. 17# 18# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28# POSSIBILITY OF SUCH DAMAGE. 29### 30 31import re 32import sys 33import json 34 35import supybot.conf as conf 36import supybot.utils as utils 37import supybot.world as world 38from supybot.commands import * 39import supybot.utils.minisix as minisix 40import supybot.ircmsgs as ircmsgs 41import supybot.ircutils as ircutils 42import supybot.callbacks as callbacks 43from supybot.i18n import PluginInternationalization, internationalizeDocstring 44_ = PluginInternationalization('Google') 45 46class Google(callbacks.PluginRegexp): 47 """This is a simple plugin to provide access to the Google services we 48 all know and love from our favorite IRC bot.""" 49 threaded = True 50 callBefore = ['Web'] 51 regexps = ['googleSnarfer'] 52 53 _colorGoogles = {} 54 def _getColorGoogle(self, m): 55 s = m.group(1) 56 ret = self._colorGoogles.get(s) 57 if not ret: 58 L = list(s) 59 L[0] = ircutils.mircColor(L[0], 'blue')[:-1] 60 L[1] = ircutils.mircColor(L[1], 'red')[:-1] 61 L[2] = ircutils.mircColor(L[2], 'yellow')[:-1] 62 L[3] = ircutils.mircColor(L[3], 'blue')[:-1] 63 L[4] = ircutils.mircColor(L[4], 'green')[:-1] 64 L[5] = ircutils.mircColor(L[5], 'red') 65 ret = ''.join(L) 66 self._colorGoogles[s] = ret 67 return ircutils.bold(ret) 68 69 _googleRe = re.compile(r'\b(google)\b', re.I) 70 def outFilter(self, irc, msg): 71 if msg.command == 'PRIVMSG' and \ 72 self.registryValue('colorfulFilter', msg.args[0]): 73 s = msg.args[1] 74 s = re.sub(self._googleRe, self._getColorGoogle, s) 75 msg = ircmsgs.privmsg(msg.args[0], s, msg=msg) 76 return msg 77 78 _decode_re = re.compile(r'<div class="\w+"><a href="/url\?q=(?P<url>[^"]+)&[^"]+"><div class="(\w| )+">(?P<title>.*?)</div><div class="(\w| )+">(?P<breadcrumbs>.*?)</div></a></div>(?P<content><div class="(\w| )+">.*?</div></div>)', re.DOTALL | re.MULTILINE) 79 @classmethod 80 def decode(cls, text): 81 matches = cls._decode_re.finditer(text) 82 results = [] 83 for match in matches: 84 r = match.groupdict() 85 r['url'] = utils.web.urlunquote(utils.web.htmlToText(r['url'].split('&')[0])) 86 results.append(r) 87 return results 88 89 90 _gsearchUrl = 'https://www.google.com/search' 91 def search(self, query, channel, options={}): 92 """search("search phrase", options={}) 93 94 Valid options are: 95 smallsearch - True/False (Default: False) 96 filter - {active,moderate,off} (Default: "moderate") 97 language - Restrict search to documents in the given language 98 (Default: "lang_en") 99 """ 100 self.log.warning('The Google plugin search is deprecated since ' 101 'Google closed their public API and will be removed in a ' 102 'future release. Please consider switching to an other ' 103 'plugin for your searches, like ' 104 '<https://github.com/Hoaas/Supybot-plugins/tree/master/DuckDuckGo>, ' 105 '<https://github.com/joulez/GoogleCSE>, or ' 106 '<https://github.com/GLolol/SupyPlugins/tree/master/DDG>.') 107 ref = self.registryValue('referer') 108 if not ref: 109 ref = 'http://%s/%s' % (dynamic.irc.server, 110 dynamic.irc.nick) 111 headers = dict(utils.web.defaultHeaders) 112 headers['Referer'] = ref 113 opts = {'q': query, 'gbv': '2'} 114 for (k, v) in options.items(): 115 if k == 'smallsearch': 116 if v: 117 opts['rsz'] = 'small' 118 else: 119 opts['rsz'] = 'large' 120 elif k == 'filter': 121 opts['safe'] = v 122 elif k == 'language': 123 opts['hl'] = v 124 defLang = self.registryValue('defaultLanguage', channel) 125 if 'hl' not in opts and defLang: 126 opts['hl'] = defLang.strip('lang_') 127 if 'safe' not in opts: 128 opts['safe'] = self.registryValue('searchFilter', dynamic.channel) 129 if 'rsz' not in opts: 130 opts['rsz'] = 'large' 131 132 text = utils.web.getUrl('%s?%s' % (self._gsearchUrl, 133 utils.web.urlencode(opts)), 134 headers=headers).decode('utf8') 135 return text 136 137 def formatData(self, data, bold=True, max=0, onetoone=False): 138 data = self.decode(data) 139 results = [] 140 if max: 141 data = data[:max] 142 for result in data: 143 title = utils.web.htmlToText(result['title']\ 144 .encode('utf-8')) 145 url = result['url'] 146 if minisix.PY2: 147 url = url.encode('utf-8') 148 if title: 149 if bold: 150 title = ircutils.bold(title) 151 results.append(format('%s: %u', title, url)) 152 else: 153 results.append(url) 154 if minisix.PY2: 155 repl = lambda x:x if isinstance(x, unicode) else unicode(x, 'utf8') 156 results = list(map(repl, results)) 157 if not results: 158 return [_('No matches found.')] 159 elif onetoone: 160 return results 161 else: 162 return [minisix.u('; ').join(results)] 163 164 @internationalizeDocstring 165 def lucky(self, irc, msg, args, opts, text): 166 """[--snippet] <search> 167 168 Does a google search, but only returns the first result. 169 If option --snippet is given, returns also the page text snippet. 170 """ 171 opts = dict(opts) 172 data = self.search(text, msg.args[0], {'smallsearch': True}) 173 data = self.decode(data) 174 if data: 175 url = data[0]['url'] 176 if 'snippet' in opts: 177 snippet = data[0]['content'] 178 snippet = " | " + utils.web.htmlToText(snippet, tagReplace='') 179 else: 180 snippet = "" 181 result = url + snippet 182 irc.reply(result) 183 else: 184 irc.reply(_('Google found nothing.')) 185 lucky = wrap(lucky, [getopts({'snippet':'',}), 'text']) 186 187 @internationalizeDocstring 188 def google(self, irc, msg, args, optlist, text): 189 """<search> [--{filter,language} <value>] 190 191 Searches google.com for the given string. As many results as can fit 192 are included. --language accepts a language abbreviation; --filter 193 accepts a filtering level ('active', 'moderate', 'off'). 194 """ 195 if 'language' in optlist and optlist['language'].lower() not in \ 196 conf.supybot.plugins.Google.safesearch.validStrings: 197 irc.errorInvalid('language') 198 data = self.search(text, msg.args[0], dict(optlist)) 199 bold = self.registryValue('bold', msg.args[0]) 200 max = self.registryValue('maximumResults', msg.args[0]) 201 # We don't use supybot.reply.oneToOne here, because you generally 202 # do not want @google to echo ~20 lines of results, even if you 203 # have reply.oneToOne enabled. 204 onetoone = self.registryValue('oneToOne', msg.args[0]) 205 for result in self.formatData(data, 206 bold=bold, max=max, onetoone=onetoone): 207 irc.reply(result) 208 google = wrap(google, [getopts({'language':'something', 209 'filter':''}), 210 'text']) 211 212 @internationalizeDocstring 213 def cache(self, irc, msg, args, url): 214 """<url> 215 216 Returns a link to the cached version of <url> if it is available. 217 """ 218 data = self.search(url, msg.args[0], {'smallsearch': True}) 219 if data: 220 m = data[0] 221 if m['cacheUrl']: 222 url = m['cacheUrl'].encode('utf-8') 223 irc.reply(url) 224 return 225 irc.error(_('Google seems to have no cache for that site.')) 226 cache = wrap(cache, ['url']) 227 228 _fight_re = re.compile(r'id="resultStats"[^>]*>(?P<stats>[^<]*)') 229 @internationalizeDocstring 230 def fight(self, irc, msg, args): 231 """<search string> <search string> [<search string> ...] 232 233 Returns the results of each search, in order, from greatest number 234 of results to least. 235 """ 236 channel = msg.args[0] 237 results = [] 238 for arg in args: 239 text = self.search(arg, channel, {'smallsearch': True}) 240 i = text.find('id="resultStats"') 241 stats = utils.web.htmlToText(self._fight_re.search(text).group('stats')) 242 if stats == '': 243 results.append((0, args)) 244 continue 245 count = ''.join(filter('0123456789'.__contains__, stats)) 246 results.append((int(count), arg)) 247 results.sort() 248 results.reverse() 249 if self.registryValue('bold', msg.args[0]): 250 bold = ircutils.bold 251 else: 252 bold = repr 253 s = ', '.join([format('%s: %i', bold(s), i) for (i, s) in results]) 254 irc.reply(s) 255 256 257 def _translate(self, sourceLang, targetLang, text): 258 headers = dict(utils.web.defaultHeaders) 259 headers['User-Agent'] = ('Mozilla/5.0 (X11; U; Linux i686) ' 260 'Gecko/20071127 Firefox/2.0.0.11') 261 262 sourceLang = utils.web.urlquote(sourceLang) 263 targetLang = utils.web.urlquote(targetLang) 264 265 text = utils.web.urlquote(text) 266 267 result = utils.web.getUrlFd('http://translate.googleapis.com/translate_a/single' 268 '?client=gtx&dt=t&sl=%s&tl=%s&q=' 269 '%s' % (sourceLang, targetLang, text), 270 headers).read().decode('utf8') 271 272 while ',,' in result: 273 result = result.replace(',,', ',null,') 274 while '[,' in result: 275 result = result.replace('[,', '[') 276 data = json.loads(result) 277 278 try: 279 language = data[2] 280 except: 281 language = 'unknown' 282 283 if data[0]: 284 return (''.join(x[0] for x in data[0]), language) 285 else: 286 return (_('No translations found.'), language) 287 288 @internationalizeDocstring 289 def translate(self, irc, msg, args, sourceLang, targetLang, text): 290 """<source language> [to] <target language> <text> 291 292 Returns <text> translated from <source language> into <target 293 language>. <source language> and <target language> take language 294 codes (not language names), which are listed here: 295 https://cloud.google.com/translate/docs/languages 296 """ 297 channel = msg.args[0] 298 (text, language) = self._translate(sourceLang, targetLang, text) 299 irc.reply(text, language) 300 translate = wrap(translate, ['something', 'to', 'something', 'text']) 301 302 def googleSnarfer(self, irc, msg, match): 303 r"^google\s+(.*)$" 304 if not self.registryValue('searchSnarfer', msg.args[0]): 305 return 306 searchString = match.group(1) 307 data = self.search(searchString, msg.args[0], {'smallsearch': True}) 308 if data['responseData']['results']: 309 url = data['responseData']['results'][0]['unescapedUrl'] 310 irc.reply(url, prefixNick=False) 311 googleSnarfer = urlSnarfer(googleSnarfer) 312 313 def _googleUrl(self, s, channel): 314 s = utils.web.urlquote_plus(s) 315 url = r'http://%s/search?q=%s' % \ 316 (self.registryValue('baseUrl', channel), s) 317 return url 318 319 _calcRe1 = re.compile(r'<span class="cwcot".*?>(.*?)</span>', re.I) 320 _calcRe2 = re.compile(r'<div class="vk_ans.*?>(.*?)</div>', re.I | re.S) 321 _calcRe3 = re.compile(r'<div class="side_div" id="rhs_div">.*?<input class="ucw_data".*?value="(.*?)"', re.I) 322 @internationalizeDocstring 323 def calc(self, irc, msg, args, expr): 324 """<expression> 325 326 Uses Google's calculator to calculate the value of <expression>. 327 """ 328 channel = msg.args[0] 329 if not irc.isChannel(channel): 330 channel = None 331 url = self._googleUrl(expr, channel) 332 h = {"User-Agent":"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36"} 333 html = utils.web.getUrl(url, headers=h).decode('utf8') 334 match = self._calcRe1.search(html) 335 if not match: 336 match = self._calcRe2.search(html) 337 if not match: 338 match = self._calcRe3.search(html) 339 if not match: 340 irc.reply("I could not find an output from Google Calc for: %s" % expr) 341 return 342 else: 343 s = match.group(1) 344 else: 345 s = match.group(1) 346 else: 347 s = match.group(1) 348 # do some cleanup of text 349 s = re.sub(r'<sup>(.*)</sup>⁄<sub>(.*)</sub>', r' \1/\2', s) 350 s = re.sub(r'<sup>(.*)</sup>', r'^\1', s) 351 s = utils.web.htmlToText(s) 352 irc.reply("%s = %s" % (expr, s)) 353 calc = wrap(calc, ['text']) 354 355 _phoneRe = re.compile(r'Phonebook.*?<font size=-1>(.*?)<a href') 356 @internationalizeDocstring 357 def phonebook(self, irc, msg, args, phonenumber): 358 """<phone number> 359 360 Looks <phone number> up on Google. 361 """ 362 channel = msg.args[0] 363 if not irc.isChannel(channel): 364 channel = None 365 url = self._googleUrl(phonenumber, channel) 366 html = utils.web.getUrl(url).decode('utf8') 367 m = self._phoneRe.search(html) 368 if m is not None: 369 s = m.group(1) 370 s = s.replace('<b>', '') 371 s = s.replace('</b>', '') 372 s = utils.web.htmlToText(s) 373 irc.reply(s) 374 else: 375 irc.reply(_('Google\'s phonebook didn\'t come up with anything.')) 376 phonebook = wrap(phonebook, ['text']) 377 378 379Class = Google 380 381 382# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: 383