1# -*- coding: iso-8859-1 -*-
2"""
3    This implements a global (and a local) blacklist against wiki spammers.
4
5    @copyright: 2005-2008 MoinMoin:ThomasWaldmann
6    @license: GNU GPL, see COPYING for details
7"""
8
9import re, time, datetime
10
11from MoinMoin import log
12logging = log.getLogger(__name__)
13
14from MoinMoin.security import Permissions
15from MoinMoin import caching, wikiutil
16
17# Errors ---------------------------------------------------------------
18
19class Error(Exception):
20    """Base class for antispam errors."""
21
22    def __str__(self):
23        return repr(self)
24
25class WikirpcError(Error):
26    """ Raised when we get xmlrpclib.Fault """
27
28    def __init__(self, msg, fault):
29        """ Init with msg and xmlrpclib.Fault dict """
30        self.msg = msg
31        self.fault = fault
32
33    def __str__(self):
34        """ Format the using description and data from the fault """
35        return self.msg + ": [%(faultCode)s]  %(faultString)s" % self.fault
36
37
38# Functions ------------------------------------------------------------
39
40def makelist(text):
41    """ Split text into lines, strip them, skip # comments """
42    lines = text.splitlines()
43    result = []
44    for line in lines:
45        line = line.split(' # ', 1)[0] # rest of line comment
46        line = line.strip()
47        if line and not line.startswith('#'):
48            result.append(line)
49    return result
50
51
52def getblacklist(request, pagename, do_update):
53    """ Get blacklist, possibly downloading new copy
54
55    @param request: current request (request instance)
56    @param pagename: bad content page name (unicode)
57    @rtype: list
58    @return: list of blacklisted regular expressions
59    """
60    from MoinMoin.PageEditor import PageEditor
61    p = PageEditor(request, pagename, uid_override="Antispam subsystem")
62    mymtime = wikiutil.version2timestamp(p.mtime_usecs())
63    if do_update:
64        tooold = time.time() - 1800
65        failure = caching.CacheEntry(request, "antispam", "failure", scope='wiki')
66        fail_time = failure.mtime() # only update if no failure in last hour
67        if (mymtime < tooold) and (fail_time < tooold):
68            logging.info("%d *BadContent too old, have to check for an update..." % tooold)
69            import xmlrpclib
70            import socket
71
72            timeout = 15 # time out for reaching the master server via xmlrpc
73            old_timeout = socket.getdefaulttimeout()
74            socket.setdefaulttimeout(timeout)
75
76            master_url = request.cfg.antispam_master_url
77            master = xmlrpclib.ServerProxy(master_url)
78            try:
79                # Get BadContent info
80                master.putClientInfo('ANTISPAM-CHECK', request.url)
81                response = master.getPageInfo(pagename)
82
83                # It seems that response is always a dict
84                if isinstance(response, dict) and 'faultCode' in response:
85                    raise WikirpcError("failed to get BadContent information",
86                                       response)
87
88                # Compare date against local BadContent copy
89                masterdate = response['lastModified']
90
91                if isinstance(masterdate, datetime.datetime):
92                    # for python 2.5
93                    mydate = datetime.datetime(*tuple(time.gmtime(mymtime))[0:6])
94                else:
95                    # for python <= 2.4.x
96                    mydate = xmlrpclib.DateTime(tuple(time.gmtime(mymtime)))
97
98                logging.debug("master: %s mine: %s" % (masterdate, mydate))
99                if mydate < masterdate:
100                    # Get new copy and save
101                    logging.info("Fetching page from %s..." % master_url)
102                    master.putClientInfo('ANTISPAM-FETCH', request.url)
103                    response = master.getPage(pagename)
104                    if isinstance(response, dict) and 'faultCode' in response:
105                        raise WikirpcError("failed to get BadContent data", response)
106                    p._write_file(response)
107                    mymtime = wikiutil.version2timestamp(p.mtime_usecs())
108                else:
109                    failure.update("") # we didn't get a modified version, this avoids
110                                       # permanent polling for every save when there
111                                       # is no updated master page
112
113            except (socket.error, xmlrpclib.ProtocolError), err:
114                logging.error('Timeout / socket / protocol error when accessing %s: %s' % (master_url, str(err)))
115                # update cache to wait before the next try
116                failure.update("")
117
118            except (xmlrpclib.Fault, ), err:
119                logging.error('Fault on %s: %s' % (master_url, str(err)))
120                # update cache to wait before the next try
121                failure.update("")
122
123            except Error, err:
124                # In case of Error, we log the error and use the local BadContent copy.
125                logging.error(str(err))
126
127            # set back socket timeout
128            socket.setdefaulttimeout(old_timeout)
129
130    blacklist = p.get_raw_body()
131    return mymtime, makelist(blacklist)
132
133
134class SecurityPolicy(Permissions):
135    """ Extend the default security policy with antispam feature """
136
137    def save(self, editor, newtext, rev, **kw):
138        BLACKLISTPAGES = ["BadContent", "LocalBadContent"]
139        if not editor.page_name in BLACKLISTPAGES:
140            request = editor.request
141
142            # Start timing of antispam operation
143            request.clock.start('antispam')
144
145            blacklist = []
146            latest_mtime = 0
147            for pn in BLACKLISTPAGES:
148                do_update = (pn != "LocalBadContent" and
149                             request.cfg.interwikiname != 'MoinMaster') # MoinMaster wiki shall not fetch updates from itself
150                blacklist_mtime, blacklist_entries = getblacklist(request, pn, do_update)
151                blacklist += blacklist_entries
152                latest_mtime = max(latest_mtime, blacklist_mtime)
153
154            if blacklist:
155                invalid_cache = not getattr(request.cfg.cache, "antispam_blacklist", None)
156                if invalid_cache or request.cfg.cache.antispam_blacklist[0] < latest_mtime:
157                    mmblcache = []
158                    for blacklist_re in blacklist:
159                        try:
160                            mmblcache.append(re.compile(blacklist_re, re.I))
161                        except re.error, err:
162                            logging.error("Error in regex '%s': %s. Please check the pages %s." % (
163                                          blacklist_re,
164                                          str(err),
165                                          ', '.join(BLACKLISTPAGES)))
166                    request.cfg.cache.antispam_blacklist = (latest_mtime, mmblcache)
167
168                from MoinMoin.Page import Page
169
170                oldtext = ""
171                if rev > 0: # rev is the revision of the old page
172                    page = Page(request, editor.page_name, rev=rev)
173                    oldtext = page.get_raw_body()
174
175                newset = frozenset(newtext.splitlines(1))
176                oldset = frozenset(oldtext.splitlines(1))
177                difference = newset - oldset
178                addedtext = kw.get('comment', u'') + u''.join(difference)
179
180                for blacklist_re in request.cfg.cache.antispam_blacklist[1]:
181                    match = blacklist_re.search(addedtext)
182                    if match:
183                        # Log error and raise SaveError, PageEditor should handle this.
184                        _ = editor.request.getText
185                        msg = _('Sorry, can not save page because "%(content)s" is not allowed in this wiki.') % {
186                                  'content': wikiutil.escape(match.group())
187                              }
188                        logging.info(msg)
189                        raise editor.SaveError(msg)
190            request.clock.stop('antispam')
191
192        # No problem to save if my base class agree
193        return Permissions.save(self, editor, newtext, rev, **kw)
194
195