1 /*
2 ** Copyright (c) 2002 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the GNU General Public
6 ** License as published by the Free Software Foundation; either
7 ** version 2 of the License, or (at your option) any later version.
8 **
9 ** This program is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 ** General Public License for more details.
13 **
14 ** You should have received a copy of the GNU General Public
15 ** License along with this library; if not, write to the
16 ** Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 ** Boston, MA  02111-1307, USA.
18 **
19 ** Author contact information:
20 **   drh@hwaci.com
21 **   http://www.hwaci.com/drh/
22 **
23 *******************************************************************************
24 **
25 ** This file contains code used to throttle output to misbehaving spiders.
26 */
27 #include <stdlib.h>
28 #include <math.h>
29 #include <time.h>
30 #include "throttle.h"
31 
32 /*
33 ** The following is an approximation of -ln(0.5)/3600.0.  Euler's constant
34 ** (e) raised to this power is 0.5.
35 */
36 #define DECAY_RATE  0.00019254
37 
38 /* Number of idle seconds before captcha cookie expires */
39 #define CAPTCHA_EXPIRE 86400
40 
41 /* Maximum number of hits on the captcha page before a lockout */
42 #define CAPTCHA_LOCKOUT 5
43 
44 /* Lock the user out if they fail the is_edit_allowed() check this many
45 ** times.
46 */
47 #define WIKI_EDIT_LOCKOUT 2
48 
49 /*
50 ** Return the name of the captcha cookie
51 */
captcha_cookie_name(void)52 static char *captcha_cookie_name(void){
53   return mprintf("%s_captcha", g.zName);
54 }
55 
captcha_set_cookie(void)56 static void captcha_set_cookie(void){
57   /* The captcha cookie is just an expiry time value. Nothing fancy,
58   ** we don't need to keep much information. We do want to update it
59   ** to a later expiry on successful connection...
60   */
61   char *zExpiry = mprintf("%d",time(NULL)+CAPTCHA_EXPIRE);
62   cgi_set_cookie(captcha_cookie_name(), zExpiry, 0, 0);
63 }
64 
captcha_clear_cookie(void)65 static void captcha_clear_cookie(void){
66   cgi_set_cookie(captcha_cookie_name(), "0", 0, 0);
67 }
68 
lockout()69 static void lockout(){
70   cgi_reset_content();
71   common_standard_menu(0,0);
72   common_header("Access denied");
73   @ <p>Your access to this website has been temporarily suspended because
74   @ you are using it excessively.  You can retry your request later.</p>
75   common_footer();
76   cgi_append_header("Pragma: no-cache\r\n");
77   cgi_set_status(403,"Forbidden");
78   cgi_reply();
79   exit(0);
80 }
81 
82 /*
83 ** Check to see if there have been too many recent accesses from the
84 ** same IP address.
85 **
86 ** If there is an overload, the resulting action depends on the exitOnOverload
87 ** parameter.  If that parameter is true, an error reply is constructed
88 ** and an exit performed without returning.  If the parameter is false,
89 ** a non-zero value is returned.
90 **
91 ** If the needCaptcha flag is non-zero, the client will be required to
92 ** pass the captcha test regardless of the load. However, failure to
93 ** pass the test won't result in eventual lockout.
94 **
95 ** If there is no overload, a zero is returned.
96 */
throttle(int exitOnOverload,int needCaptcha)97 int throttle(int exitOnOverload,int needCaptcha){
98   const char *zLimit;
99   double rLimit;
100   const char *zAddr;
101   char **az;
102   double rLoad;
103   double rLastLoad;
104   int lastAccess;
105   time_t now;
106   int overload;
107   int captcha = CAPTCHA_LOCKOUT;
108   int useCaptcha = atoi(db_config("enable_captcha","0"));
109   const char *zCookie = P(captcha_cookie_name());
110   const char *zUrl = getenv("REQUEST_URI");
111 
112   time(&now);
113 
114   if( !g.isAnon ) return 0; /* Throttling does not occur for identified users */
115   zLimit = db_config("throttle", 0);
116   if( zLimit==0 ) return 0; /* Throttling is turned off */
117   rLimit = atof(zLimit);
118   if( rLimit<=0.0 ) return 0;  /* Throttling is turned off */
119 
120   /* Users with valid captcha cookies are okay.
121   */
122   if( useCaptcha && zCookie && zCookie[0] && atoi(zCookie) > now ){
123     /* update the cookie to a new expiry time
124     */
125     captcha_set_cookie();
126     return 0;
127   }
128 
129   zAddr = getenv("REMOTE_ADDR");
130   if( zAddr==0 ) return 0;  /* No remote IP address provided */
131 
132   az = db_query("SELECT load, lastaccess, captcha FROM access_load "
133                 "WHERE ipaddr='%q'", zAddr);
134   if( az[0] && az[1] ){
135     rLastLoad = rLoad = atof(az[0]);
136     lastAccess = atoi(az[1]);
137     if( lastAccess>now ) lastAccess = now;
138     rLoad = 1.0 + exp(DECAY_RATE*(lastAccess-now))*rLoad;
139     if( rLoad>rLimit && rLoad<rLimit*2.0 ){
140       /* If the throttler triggers, make sure it locks out the IP address
141       ** for at least 1 hour */
142       rLoad = rLimit*2.0;
143     }
144     if( rLoad>rLimit && rLastLoad>rLimit && az[2] && useCaptcha ){
145       /* Once the client blows the limit, repeated hits on anything
146       ** other than the captcha page will decrement the captcha
147       ** counter, eventually resulting in lockout.
148       */
149       captcha = atoi(az[2])-1;
150     }
151   }else{
152     rLastLoad = rLoad = 1.0;
153     lastAccess = 0;
154   }
155 
156   db_execute("REPLACE INTO access_load(ipaddr,load,lastaccess,captcha) "
157              "VALUES('%q',%g,%d,%d)", zAddr, rLoad, now, captcha);
158   overload = rLoad>=rLimit;
159   if( useCaptcha && (overload || needCaptcha) ){
160     if( captcha <= 0 && exitOnOverload ) {
161       /* Captcha lockout count exceeded, client is blocked until the
162       ** load drops again.
163       */
164       lockout();
165     }
166 
167     if( zUrl==0 ) zUrl = "index";
168     cgi_redirect(mprintf("captcha?cnxp=%T", zUrl));
169   }else if( overload && exitOnOverload ){
170     /* Just block the client */
171     lockout();
172   }
173   return overload;
174 }
175 
176 /*
177 ** WEBPAGE: /honeypot
178 **
179 ** This page gives fair warning to real users not to click on any of the
180 ** hyperlinks.
181 */
honeypot(void)182 void honeypot(void){
183   login_check_credentials();
184   if( !g.isAnon ){
185     cgi_redirect("index");
186   }
187   common_add_action_item("stopper","I am a spider");
188   common_add_action_item("index","I am human");
189   common_add_help_item("CvstracAdminAbuse");
190   common_header("Honey Pot");
191   @ <p>This page is intended to capture spiders that
192   @ ignore the "robots.txt" file.
193   @ If you are not a spider, click on the "I am human" link
194   @ above.  If you click on the "I am a spider" link, your access to this
195   @ server will be suspended for about an hour.</p>
196   common_footer();
197 }
198 
199 /*
200 ** WEBPAGE: /stopper
201 **
202 ** Only robots should come to this page, never legitimate users.  Disable
203 ** any IP address that comes to this page.
204 */
stopper(void)205 void stopper(void){
206   const char *zAddr = getenv("REMOTE_ADDR");
207   const char *zLimit = db_config("throttle", 0);
208   double rLimit;
209 
210   login_check_credentials();
211   if( zLimit!=0 && (rLimit = atof(zLimit))>0.0 && zAddr!=0 ){
212     time_t now;
213     time(&now);
214     db_execute("REPLACE INTO access_load(ipaddr,load,lastaccess) "
215                "VALUES('%q',%g,%d)", zAddr, rLimit*2.0, now);
216     throttle(1,1);
217   }
218   cgi_redirect("index");
219 }
220 
221 
222 /*
223 ** WEBPAGE: /info_throttle
224 **
225 ** Provide information about the current throttling database.
226 */
throttle_info(void)227 void throttle_info(void){
228   const char *zReset = P("reset");
229   const char *zOrderBy;
230   char **az;
231   int limit;
232   int i;
233 
234   login_check_credentials();
235   if( !g.okSetup ){
236     login_needed();
237     return;
238   }
239   zOrderBy = PD("ob","1");
240   limit = atoi(PD("limit","50"));
241   if( zOrderBy[0]=='1' ){
242     zOrderBy = "ORDER BY load DESC";
243   }else if( zOrderBy[0]=='2' ){
244     zOrderBy = "ORDER BY ipaddr";
245   }else{
246     zOrderBy = "ORDER BY lastaccess DESC";
247   }
248   if( zReset ){
249     time_t now;
250     time(&now);
251     db_execute("DELETE FROM access_load WHERE lastaccess<%d", now-86400);
252   }
253   common_add_nav_item("setup", "Main Setup Menu");
254   common_add_help_item("CvstracAdminAbuse");
255   common_add_action_item("info_throttle?reset=1","Remove Older Entries");
256   if( limit>0 ){
257     common_add_action_item("info_throttle?limit=-1","View All");
258   }else{
259     common_add_action_item("info_throttle?limit=50","View Top 50");
260   }
261   common_header("Throttle Results");
262   @ Contents of the ACCESS_LOAD table:
263   @ <table border="1" cellspacing="0" cellpadding="2">
264   @ <tr>
265   @ <th><a href="info_throttle?ob=2">IP Address</a></th>
266   @ <th><a href="info_throttle?ob=3">Last Access</a></th>
267   @ <th><a href="info_throttle?ob=1">Load</a></th></tr>
268   az = db_query("SELECT ipaddr, lastaccess, load FROM access_load %s LIMIT %d",
269                zOrderBy, limit);
270   for(i=0; az[i]; i+=3){
271     struct tm *pTm;
272     time_t atime;
273     char zTime[200];
274     atime = atoi(az[i+1]);
275     pTm = localtime(&atime);
276     strftime(zTime, sizeof(zTime), "%Y-%m-%d %H:%M:%S", pTm);
277     @ <tr><td>&nbsp;&nbsp;%h(az[i])&nbsp;&nbsp;</td>
278     @ <td>&nbsp;&nbsp;%h(zTime)&nbsp;&nbsp;</td>
279     @ <td>&nbsp;&nbsp;%s(az[i+2])&nbsp;&nbsp;</td></tr>
280   }
281   @ </table>
282   @ <p>
283   @ <a href="info_throttle?reset=1">Remove older entries</a>
284   common_footer();
285 }
286 
287 /*
288 ** WEBPAGE: /captcha
289 **
290 ** Generate the captcha page. This is basically a honeypot with a cookie
291 ** for state. Once the client exceeds the throttle threshold, they risk
292 ** getting locked out unless they (eventually) get this question correct
293 ** or slow down on the hits.
294 */
captcha_page(void)295 void captcha_page(void){
296   time_t now = time(NULL);
297   int q1, q2;
298 
299   if( atoi(PD("a","0")) == (atoi(PD("q1","-1")) + atoi(PD("q2","-1"))) ){
300     /* User gave the right answer. Set cookie and continue on.
301     */
302     captcha_set_cookie();
303     cgi_redirect(PD("nxp","index"));
304     return;
305   }
306 
307   /* Note that we don't do _any_ credential checks in this page... However,
308   ** some flags are needed for sane header generation. For example, we
309   ** want a "Login" in the menu rather than "Logout" so isAnon should be
310   ** set.
311   */
312   g.isAnon = 1;
313 
314   common_standard_menu(0, 0);
315   common_add_help_item("CvstracAdminAbuse");
316   common_header("Abbreviated Turing Test");
317 
318   /* small numbers */
319   srand(now);
320   q1 = (rand()%5)+1;
321   q2 = (rand()%5)+1;
322 
323   @ In order to continue, you must show you're a human. Please answer
324   @ the following mathematical skill testing question (and ensure cookies
325   @ are enabled):
326   @ <p>
327   @ <form action="captcha" method="POST">
328   @ What is <tt>%d(q1) + %d(q2)</tt>?
329   @ <input type="text" name="a" value="" size=4>
330   @ <font size="-1">Hint: %d(q1+q2)</font>
331   @ <input type="hidden" name="q1" value="%d(q1)">
332   @ <input type="hidden" name="q2" value="%d(q2)">
333   if( P("nxp") ){
334     @ <input type="hidden" name="nxp" value="%h(P("nxp"))">
335   }
336   @ </p>
337   @ <p>
338   @ <input type="submit" name="in" value="Submit"></td>
339   @ </p>
340   @ </form>
341   common_footer();
342 }
343 
344 static int count_links(const char *zText){
345   int nLink = 0;
346 
347   if( zText!=0 ){
348     int i, j;
349 
350     for(i=0;zText[i];i++){
351       char c = zText[i];
352       if( (c=='h' || c=='f' || c=='m') && (j=is_url(&zText[i]))>0 ){
353         nLink ++;
354         i += j;
355         continue;
356       }
357     }
358   }
359   return nLink;
360 }
361 
362 /* If someone blows the limit, tweak the current throttler counter
363 ** for their IP. Each failure increases it by a defined fraction
364 ** of the throttle limit, which mean they'll get locked out after
365 ** triggering the failure too many times. Currently, that's
366 ** twice.
367 **
368 ** Hopefully this strikes a balance between stopping spammers and
369 ** not annoying legitimate users too much...
370 */
371 static void increase_load(){
372   const char *zAddr = getenv("REMOTE_ADDR");
373   const char *zLimit = db_config("throttle", 0);
374   double rLimit;
375 
376   if( zLimit!=0 && (rLimit = atof(zLimit))>0.0 && zAddr!=0 ){
377     time_t now = time(0);
378     char *zLastLoad = db_short_query("SELECT load FROM access_load "
379                                      "WHERE ipaddr='%q'", zAddr);
380     db_execute("REPLACE INTO access_load(ipaddr,load,lastaccess) "
381                "VALUES('%q',%g,%d)", zAddr,
382                (zLastLoad ? atof(zLastLoad) : 0)
383                             + rLimit/WIKI_EDIT_LOCKOUT,
384                now);
385     if(zLastLoad) free(zLastLoad);
386   }
387 
388   /* we also need to clear any captcha cookie since having it
389   ** bypasses the throttler. This is also handy since it's not
390   ** IP specific, so users who are behind variable IP's are going
391   ** to still get caught.
392   */
393   captcha_clear_cookie();
394 }
395 
396 /*
397 ** Apply any appropriate anti-spam heuristics to the provided wiki edit.
398 **
399 ** Obviously, we're only going to apply this restriction to anonymous
400 ** users. Currently.
401 **
402 ** Returns NULL if the change is acceptable. Otherwise, it returns a string
403 ** containing an explanation for the rejection.
404 */
405 char *is_edit_allowed(const char *zOld, const char *zNew){
406   if( g.isAnon ){
407     const char *zKeys = db_config("keywords","");
408     int nMscore = atoi(db_config("keywords_max_score","0"));
409     int nMax = atoi(db_config("max_links_per_edit","0"));
410 
411     /*
412     ** Check for too many "bad words" in the new text. Checking the "diff"
413     ** might be better?
414     */
415     if( nMscore && zKeys[0] ) {
416       db_add_functions();
417       if( db_exists("SELECT 1 WHERE search('%q','%q')>%d",zKeys,zNew,nMscore)){
418         increase_load();
419         return "Forbidden keywords!";
420       }
421     }
422 
423     /*
424     ** Check to see if the threshold of external links was exceeded.
425     */
426     if( nMax ){
427       int nOld = count_links(zOld);
428       int nNew = count_links(zNew);
429 
430       /* Note that someone could bypass this by replacing a whole bunch of
431       ** links in an existing page. If that starts to happen it might be
432       ** necessary to compare the list of links or something.
433       **
434       ** Some keyword filtering would help a bit, too.
435       */
436       if( nNew - nOld >= nMax ){
437         increase_load();
438         return "Too many external links for one edit!";
439       }
440     }
441   }
442   return 0;
443 }
444