1# -*- coding: iso-8859-1 -*- 2""" 3 This implements a global (and a local) blacklist against wiki spammers. 4 5 @copyright: 2005-2008 MoinMoin:ThomasWaldmann 6 @license: GNU GPL, see COPYING for details 7""" 8 9import re, time, datetime 10 11from MoinMoin import log 12logging = log.getLogger(__name__) 13 14from MoinMoin.security import Permissions 15from MoinMoin import caching, wikiutil 16 17# Errors --------------------------------------------------------------- 18 19class Error(Exception): 20 """Base class for antispam errors.""" 21 22 def __str__(self): 23 return repr(self) 24 25class WikirpcError(Error): 26 """ Raised when we get xmlrpclib.Fault """ 27 28 def __init__(self, msg, fault): 29 """ Init with msg and xmlrpclib.Fault dict """ 30 self.msg = msg 31 self.fault = fault 32 33 def __str__(self): 34 """ Format the using description and data from the fault """ 35 return self.msg + ": [%(faultCode)s] %(faultString)s" % self.fault 36 37 38# Functions ------------------------------------------------------------ 39 40def makelist(text): 41 """ Split text into lines, strip them, skip # comments """ 42 lines = text.splitlines() 43 result = [] 44 for line in lines: 45 line = line.split(' # ', 1)[0] # rest of line comment 46 line = line.strip() 47 if line and not line.startswith('#'): 48 result.append(line) 49 return result 50 51 52def getblacklist(request, pagename, do_update): 53 """ Get blacklist, possibly downloading new copy 54 55 @param request: current request (request instance) 56 @param pagename: bad content page name (unicode) 57 @rtype: list 58 @return: list of blacklisted regular expressions 59 """ 60 from MoinMoin.PageEditor import PageEditor 61 p = PageEditor(request, pagename, uid_override="Antispam subsystem") 62 mymtime = wikiutil.version2timestamp(p.mtime_usecs()) 63 if do_update: 64 tooold = time.time() - 1800 65 failure = caching.CacheEntry(request, "antispam", "failure", scope='wiki') 66 fail_time = failure.mtime() # only update if no failure in last hour 67 if (mymtime < tooold) and (fail_time < tooold): 68 logging.info("%d *BadContent too old, have to check for an update..." % tooold) 69 import xmlrpclib 70 import socket 71 72 timeout = 15 # time out for reaching the master server via xmlrpc 73 old_timeout = socket.getdefaulttimeout() 74 socket.setdefaulttimeout(timeout) 75 76 master_url = request.cfg.antispam_master_url 77 master = xmlrpclib.ServerProxy(master_url) 78 try: 79 # Get BadContent info 80 master.putClientInfo('ANTISPAM-CHECK', request.url) 81 response = master.getPageInfo(pagename) 82 83 # It seems that response is always a dict 84 if isinstance(response, dict) and 'faultCode' in response: 85 raise WikirpcError("failed to get BadContent information", 86 response) 87 88 # Compare date against local BadContent copy 89 masterdate = response['lastModified'] 90 91 if isinstance(masterdate, datetime.datetime): 92 # for python 2.5 93 mydate = datetime.datetime(*tuple(time.gmtime(mymtime))[0:6]) 94 else: 95 # for python <= 2.4.x 96 mydate = xmlrpclib.DateTime(tuple(time.gmtime(mymtime))) 97 98 logging.debug("master: %s mine: %s" % (masterdate, mydate)) 99 if mydate < masterdate: 100 # Get new copy and save 101 logging.info("Fetching page from %s..." % master_url) 102 master.putClientInfo('ANTISPAM-FETCH', request.url) 103 response = master.getPage(pagename) 104 if isinstance(response, dict) and 'faultCode' in response: 105 raise WikirpcError("failed to get BadContent data", response) 106 p._write_file(response) 107 mymtime = wikiutil.version2timestamp(p.mtime_usecs()) 108 else: 109 failure.update("") # we didn't get a modified version, this avoids 110 # permanent polling for every save when there 111 # is no updated master page 112 113 except (socket.error, xmlrpclib.ProtocolError), err: 114 logging.error('Timeout / socket / protocol error when accessing %s: %s' % (master_url, str(err))) 115 # update cache to wait before the next try 116 failure.update("") 117 118 except (xmlrpclib.Fault, ), err: 119 logging.error('Fault on %s: %s' % (master_url, str(err))) 120 # update cache to wait before the next try 121 failure.update("") 122 123 except Error, err: 124 # In case of Error, we log the error and use the local BadContent copy. 125 logging.error(str(err)) 126 127 # set back socket timeout 128 socket.setdefaulttimeout(old_timeout) 129 130 blacklist = p.get_raw_body() 131 return mymtime, makelist(blacklist) 132 133 134class SecurityPolicy(Permissions): 135 """ Extend the default security policy with antispam feature """ 136 137 def save(self, editor, newtext, rev, **kw): 138 BLACKLISTPAGES = ["BadContent", "LocalBadContent"] 139 if not editor.page_name in BLACKLISTPAGES: 140 request = editor.request 141 142 # Start timing of antispam operation 143 request.clock.start('antispam') 144 145 blacklist = [] 146 latest_mtime = 0 147 for pn in BLACKLISTPAGES: 148 do_update = (pn != "LocalBadContent" and 149 request.cfg.interwikiname != 'MoinMaster') # MoinMaster wiki shall not fetch updates from itself 150 blacklist_mtime, blacklist_entries = getblacklist(request, pn, do_update) 151 blacklist += blacklist_entries 152 latest_mtime = max(latest_mtime, blacklist_mtime) 153 154 if blacklist: 155 invalid_cache = not getattr(request.cfg.cache, "antispam_blacklist", None) 156 if invalid_cache or request.cfg.cache.antispam_blacklist[0] < latest_mtime: 157 mmblcache = [] 158 for blacklist_re in blacklist: 159 try: 160 mmblcache.append(re.compile(blacklist_re, re.I)) 161 except re.error, err: 162 logging.error("Error in regex '%s': %s. Please check the pages %s." % ( 163 blacklist_re, 164 str(err), 165 ', '.join(BLACKLISTPAGES))) 166 request.cfg.cache.antispam_blacklist = (latest_mtime, mmblcache) 167 168 from MoinMoin.Page import Page 169 170 oldtext = "" 171 if rev > 0: # rev is the revision of the old page 172 page = Page(request, editor.page_name, rev=rev) 173 oldtext = page.get_raw_body() 174 175 newset = frozenset(newtext.splitlines(1)) 176 oldset = frozenset(oldtext.splitlines(1)) 177 difference = newset - oldset 178 addedtext = kw.get('comment', u'') + u''.join(difference) 179 180 for blacklist_re in request.cfg.cache.antispam_blacklist[1]: 181 match = blacklist_re.search(addedtext) 182 if match: 183 # Log error and raise SaveError, PageEditor should handle this. 184 _ = editor.request.getText 185 msg = _('Sorry, can not save page because "%(content)s" is not allowed in this wiki.') % { 186 'content': wikiutil.escape(match.group()) 187 } 188 logging.info(msg) 189 raise editor.SaveError(msg) 190 request.clock.stop('antispam') 191 192 # No problem to save if my base class agree 193 return Permissions.save(self, editor, newtext, rev, **kw) 194 195