1 /*
2 ** Copyright (c) 2002 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the GNU General Public
6 ** License as published by the Free Software Foundation; either
7 ** version 2 of the License, or (at your option) any later version.
8 **
9 ** This program is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ** General Public License for more details.
13 **
14 ** You should have received a copy of the GNU General Public
15 ** License along with this library; if not, write to the
16 ** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 ** Boston, MA 02111-1307, USA.
18 **
19 ** Author contact information:
20 ** drh@hwaci.com
21 ** http://www.hwaci.com/drh/
22 **
23 *******************************************************************************
24 **
25 ** This file contains code used to throttle output to misbehaving spiders.
26 */
27 #include <stdlib.h>
28 #include <math.h>
29 #include <time.h>
30 #include "throttle.h"
31
32 /*
33 ** The following is an approximation of -ln(0.5)/3600.0. Euler's constant
34 ** (e) raised to this power is 0.5.
35 */
36 #define DECAY_RATE 0.00019254
37
38 /* Number of idle seconds before captcha cookie expires */
39 #define CAPTCHA_EXPIRE 86400
40
41 /* Maximum number of hits on the captcha page before a lockout */
42 #define CAPTCHA_LOCKOUT 5
43
44 /* Lock the user out if they fail the is_edit_allowed() check this many
45 ** times.
46 */
47 #define WIKI_EDIT_LOCKOUT 2
48
49 /*
50 ** Return the name of the captcha cookie
51 */
captcha_cookie_name(void)52 static char *captcha_cookie_name(void){
53 return mprintf("%s_captcha", g.zName);
54 }
55
captcha_set_cookie(void)56 static void captcha_set_cookie(void){
57 /* The captcha cookie is just an expiry time value. Nothing fancy,
58 ** we don't need to keep much information. We do want to update it
59 ** to a later expiry on successful connection...
60 */
61 char *zExpiry = mprintf("%d",time(NULL)+CAPTCHA_EXPIRE);
62 cgi_set_cookie(captcha_cookie_name(), zExpiry, 0, 0);
63 }
64
captcha_clear_cookie(void)65 static void captcha_clear_cookie(void){
66 cgi_set_cookie(captcha_cookie_name(), "0", 0, 0);
67 }
68
lockout()69 static void lockout(){
70 cgi_reset_content();
71 common_standard_menu(0,0);
72 common_header("Access denied");
73 @ <p>Your access to this website has been temporarily suspended because
74 @ you are using it excessively. You can retry your request later.</p>
75 common_footer();
76 cgi_append_header("Pragma: no-cache\r\n");
77 cgi_set_status(403,"Forbidden");
78 cgi_reply();
79 exit(0);
80 }
81
82 /*
83 ** Check to see if there have been too many recent accesses from the
84 ** same IP address.
85 **
86 ** If there is an overload, the resulting action depends on the exitOnOverload
87 ** parameter. If that parameter is true, an error reply is constructed
88 ** and an exit performed without returning. If the parameter is false,
89 ** a non-zero value is returned.
90 **
91 ** If the needCaptcha flag is non-zero, the client will be required to
92 ** pass the captcha test regardless of the load. However, failure to
93 ** pass the test won't result in eventual lockout.
94 **
95 ** If there is no overload, a zero is returned.
96 */
throttle(int exitOnOverload,int needCaptcha)97 int throttle(int exitOnOverload,int needCaptcha){
98 const char *zLimit;
99 double rLimit;
100 const char *zAddr;
101 char **az;
102 double rLoad;
103 double rLastLoad;
104 int lastAccess;
105 time_t now;
106 int overload;
107 int captcha = CAPTCHA_LOCKOUT;
108 int useCaptcha = atoi(db_config("enable_captcha","0"));
109 const char *zCookie = P(captcha_cookie_name());
110 const char *zUrl = getenv("REQUEST_URI");
111
112 time(&now);
113
114 if( !g.isAnon ) return 0; /* Throttling does not occur for identified users */
115 zLimit = db_config("throttle", 0);
116 if( zLimit==0 ) return 0; /* Throttling is turned off */
117 rLimit = atof(zLimit);
118 if( rLimit<=0.0 ) return 0; /* Throttling is turned off */
119
120 /* Users with valid captcha cookies are okay.
121 */
122 if( useCaptcha && zCookie && zCookie[0] && atoi(zCookie) > now ){
123 /* update the cookie to a new expiry time
124 */
125 captcha_set_cookie();
126 return 0;
127 }
128
129 zAddr = getenv("REMOTE_ADDR");
130 if( zAddr==0 ) return 0; /* No remote IP address provided */
131
132 az = db_query("SELECT load, lastaccess, captcha FROM access_load "
133 "WHERE ipaddr='%q'", zAddr);
134 if( az[0] && az[1] ){
135 rLastLoad = rLoad = atof(az[0]);
136 lastAccess = atoi(az[1]);
137 if( lastAccess>now ) lastAccess = now;
138 rLoad = 1.0 + exp(DECAY_RATE*(lastAccess-now))*rLoad;
139 if( rLoad>rLimit && rLoad<rLimit*2.0 ){
140 /* If the throttler triggers, make sure it locks out the IP address
141 ** for at least 1 hour */
142 rLoad = rLimit*2.0;
143 }
144 if( rLoad>rLimit && rLastLoad>rLimit && az[2] && useCaptcha ){
145 /* Once the client blows the limit, repeated hits on anything
146 ** other than the captcha page will decrement the captcha
147 ** counter, eventually resulting in lockout.
148 */
149 captcha = atoi(az[2])-1;
150 }
151 }else{
152 rLastLoad = rLoad = 1.0;
153 lastAccess = 0;
154 }
155
156 db_execute("REPLACE INTO access_load(ipaddr,load,lastaccess,captcha) "
157 "VALUES('%q',%g,%d,%d)", zAddr, rLoad, now, captcha);
158 overload = rLoad>=rLimit;
159 if( useCaptcha && (overload || needCaptcha) ){
160 if( captcha <= 0 && exitOnOverload ) {
161 /* Captcha lockout count exceeded, client is blocked until the
162 ** load drops again.
163 */
164 lockout();
165 }
166
167 if( zUrl==0 ) zUrl = "index";
168 cgi_redirect(mprintf("captcha?cnxp=%T", zUrl));
169 }else if( overload && exitOnOverload ){
170 /* Just block the client */
171 lockout();
172 }
173 return overload;
174 }
175
176 /*
177 ** WEBPAGE: /honeypot
178 **
179 ** This page gives fair warning to real users not to click on any of the
180 ** hyperlinks.
181 */
honeypot(void)182 void honeypot(void){
183 login_check_credentials();
184 if( !g.isAnon ){
185 cgi_redirect("index");
186 }
187 common_add_action_item("stopper","I am a spider");
188 common_add_action_item("index","I am human");
189 common_add_help_item("CvstracAdminAbuse");
190 common_header("Honey Pot");
191 @ <p>This page is intended to capture spiders that
192 @ ignore the "robots.txt" file.
193 @ If you are not a spider, click on the "I am human" link
194 @ above. If you click on the "I am a spider" link, your access to this
195 @ server will be suspended for about an hour.</p>
196 common_footer();
197 }
198
199 /*
200 ** WEBPAGE: /stopper
201 **
202 ** Only robots should come to this page, never legitimate users. Disable
203 ** any IP address that comes to this page.
204 */
stopper(void)205 void stopper(void){
206 const char *zAddr = getenv("REMOTE_ADDR");
207 const char *zLimit = db_config("throttle", 0);
208 double rLimit;
209
210 login_check_credentials();
211 if( zLimit!=0 && (rLimit = atof(zLimit))>0.0 && zAddr!=0 ){
212 time_t now;
213 time(&now);
214 db_execute("REPLACE INTO access_load(ipaddr,load,lastaccess) "
215 "VALUES('%q',%g,%d)", zAddr, rLimit*2.0, now);
216 throttle(1,1);
217 }
218 cgi_redirect("index");
219 }
220
221
222 /*
223 ** WEBPAGE: /info_throttle
224 **
225 ** Provide information about the current throttling database.
226 */
throttle_info(void)227 void throttle_info(void){
228 const char *zReset = P("reset");
229 const char *zOrderBy;
230 char **az;
231 int limit;
232 int i;
233
234 login_check_credentials();
235 if( !g.okSetup ){
236 login_needed();
237 return;
238 }
239 zOrderBy = PD("ob","1");
240 limit = atoi(PD("limit","50"));
241 if( zOrderBy[0]=='1' ){
242 zOrderBy = "ORDER BY load DESC";
243 }else if( zOrderBy[0]=='2' ){
244 zOrderBy = "ORDER BY ipaddr";
245 }else{
246 zOrderBy = "ORDER BY lastaccess DESC";
247 }
248 if( zReset ){
249 time_t now;
250 time(&now);
251 db_execute("DELETE FROM access_load WHERE lastaccess<%d", now-86400);
252 }
253 common_add_nav_item("setup", "Main Setup Menu");
254 common_add_help_item("CvstracAdminAbuse");
255 common_add_action_item("info_throttle?reset=1","Remove Older Entries");
256 if( limit>0 ){
257 common_add_action_item("info_throttle?limit=-1","View All");
258 }else{
259 common_add_action_item("info_throttle?limit=50","View Top 50");
260 }
261 common_header("Throttle Results");
262 @ Contents of the ACCESS_LOAD table:
263 @ <table border="1" cellspacing="0" cellpadding="2">
264 @ <tr>
265 @ <th><a href="info_throttle?ob=2">IP Address</a></th>
266 @ <th><a href="info_throttle?ob=3">Last Access</a></th>
267 @ <th><a href="info_throttle?ob=1">Load</a></th></tr>
268 az = db_query("SELECT ipaddr, lastaccess, load FROM access_load %s LIMIT %d",
269 zOrderBy, limit);
270 for(i=0; az[i]; i+=3){
271 struct tm *pTm;
272 time_t atime;
273 char zTime[200];
274 atime = atoi(az[i+1]);
275 pTm = localtime(&atime);
276 strftime(zTime, sizeof(zTime), "%Y-%m-%d %H:%M:%S", pTm);
277 @ <tr><td> %h(az[i]) </td>
278 @ <td> %h(zTime) </td>
279 @ <td> %s(az[i+2]) </td></tr>
280 }
281 @ </table>
282 @ <p>
283 @ <a href="info_throttle?reset=1">Remove older entries</a>
284 common_footer();
285 }
286
287 /*
288 ** WEBPAGE: /captcha
289 **
290 ** Generate the captcha page. This is basically a honeypot with a cookie
291 ** for state. Once the client exceeds the throttle threshold, they risk
292 ** getting locked out unless they (eventually) get this question correct
293 ** or slow down on the hits.
294 */
captcha_page(void)295 void captcha_page(void){
296 time_t now = time(NULL);
297 int q1, q2;
298
299 if( atoi(PD("a","0")) == (atoi(PD("q1","-1")) + atoi(PD("q2","-1"))) ){
300 /* User gave the right answer. Set cookie and continue on.
301 */
302 captcha_set_cookie();
303 cgi_redirect(PD("nxp","index"));
304 return;
305 }
306
307 /* Note that we don't do _any_ credential checks in this page... However,
308 ** some flags are needed for sane header generation. For example, we
309 ** want a "Login" in the menu rather than "Logout" so isAnon should be
310 ** set.
311 */
312 g.isAnon = 1;
313
314 common_standard_menu(0, 0);
315 common_add_help_item("CvstracAdminAbuse");
316 common_header("Abbreviated Turing Test");
317
318 /* small numbers */
319 srand(now);
320 q1 = (rand()%5)+1;
321 q2 = (rand()%5)+1;
322
323 @ In order to continue, you must show you're a human. Please answer
324 @ the following mathematical skill testing question (and ensure cookies
325 @ are enabled):
326 @ <p>
327 @ <form action="captcha" method="POST">
328 @ What is <tt>%d(q1) + %d(q2)</tt>?
329 @ <input type="text" name="a" value="" size=4>
330 @ <font size="-1">Hint: %d(q1+q2)</font>
331 @ <input type="hidden" name="q1" value="%d(q1)">
332 @ <input type="hidden" name="q2" value="%d(q2)">
333 if( P("nxp") ){
334 @ <input type="hidden" name="nxp" value="%h(P("nxp"))">
335 }
336 @ </p>
337 @ <p>
338 @ <input type="submit" name="in" value="Submit"></td>
339 @ </p>
340 @ </form>
341 common_footer();
342 }
343
344 static int count_links(const char *zText){
345 int nLink = 0;
346
347 if( zText!=0 ){
348 int i, j;
349
350 for(i=0;zText[i];i++){
351 char c = zText[i];
352 if( (c=='h' || c=='f' || c=='m') && (j=is_url(&zText[i]))>0 ){
353 nLink ++;
354 i += j;
355 continue;
356 }
357 }
358 }
359 return nLink;
360 }
361
362 /* If someone blows the limit, tweak the current throttler counter
363 ** for their IP. Each failure increases it by a defined fraction
364 ** of the throttle limit, which mean they'll get locked out after
365 ** triggering the failure too many times. Currently, that's
366 ** twice.
367 **
368 ** Hopefully this strikes a balance between stopping spammers and
369 ** not annoying legitimate users too much...
370 */
371 static void increase_load(){
372 const char *zAddr = getenv("REMOTE_ADDR");
373 const char *zLimit = db_config("throttle", 0);
374 double rLimit;
375
376 if( zLimit!=0 && (rLimit = atof(zLimit))>0.0 && zAddr!=0 ){
377 time_t now = time(0);
378 char *zLastLoad = db_short_query("SELECT load FROM access_load "
379 "WHERE ipaddr='%q'", zAddr);
380 db_execute("REPLACE INTO access_load(ipaddr,load,lastaccess) "
381 "VALUES('%q',%g,%d)", zAddr,
382 (zLastLoad ? atof(zLastLoad) : 0)
383 + rLimit/WIKI_EDIT_LOCKOUT,
384 now);
385 if(zLastLoad) free(zLastLoad);
386 }
387
388 /* we also need to clear any captcha cookie since having it
389 ** bypasses the throttler. This is also handy since it's not
390 ** IP specific, so users who are behind variable IP's are going
391 ** to still get caught.
392 */
393 captcha_clear_cookie();
394 }
395
396 /*
397 ** Apply any appropriate anti-spam heuristics to the provided wiki edit.
398 **
399 ** Obviously, we're only going to apply this restriction to anonymous
400 ** users. Currently.
401 **
402 ** Returns NULL if the change is acceptable. Otherwise, it returns a string
403 ** containing an explanation for the rejection.
404 */
405 char *is_edit_allowed(const char *zOld, const char *zNew){
406 if( g.isAnon ){
407 const char *zKeys = db_config("keywords","");
408 int nMscore = atoi(db_config("keywords_max_score","0"));
409 int nMax = atoi(db_config("max_links_per_edit","0"));
410
411 /*
412 ** Check for too many "bad words" in the new text. Checking the "diff"
413 ** might be better?
414 */
415 if( nMscore && zKeys[0] ) {
416 db_add_functions();
417 if( db_exists("SELECT 1 WHERE search('%q','%q')>%d",zKeys,zNew,nMscore)){
418 increase_load();
419 return "Forbidden keywords!";
420 }
421 }
422
423 /*
424 ** Check to see if the threshold of external links was exceeded.
425 */
426 if( nMax ){
427 int nOld = count_links(zOld);
428 int nNew = count_links(zNew);
429
430 /* Note that someone could bypass this by replacing a whole bunch of
431 ** links in an existing page. If that starts to happen it might be
432 ** necessary to compare the list of links or something.
433 **
434 ** Some keyword filtering would help a bit, too.
435 */
436 if( nNew - nOld >= nMax ){
437 increase_load();
438 return "Too many external links for one edit!";
439 }
440 }
441 }
442 return 0;
443 }
444