1 /*
2    ratproxy
3    --------
4 
5    A simple HTTP proxy to use for code audits of rich web 2.0 applications.
6    Meant to detect JSON-related and other script-accessible content problems as
7    you interact with the tested application and otherwise just mind your business.
8 
9    Please use this tool responsibly and in good faith. Thanks.
10 
11    Author: Michal Zalewski <lcamtuf@google.com>
12 
13    Copyright 2007, 2008 by Google Inc. All Rights Reserved.
14 
15    Licensed under the Apache License, Version 2.0 (the "License");
16    you may not use this file except in compliance with the License.
17    You may obtain a copy of the License at
18 
19      http://www.apache.org/licenses/LICENSE-2.0
20 
21    Unless required by applicable law or agreed to in writing, software
22    distributed under the License is distributed on an "AS IS" BASIS,
23    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24    See the License for the specific language governing permissions and
25    limitations under the License.
26 
27 
28  */
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <sys/socket.h>
34 #include <netinet/in.h>
35 #include <signal.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <fcntl.h>
39 #include <string.h>
40 #include <sys/wait.h>
41 #include <ctype.h>
42 #include <netdb.h>
43 #include <openssl/md5.h>
44 #include <time.h>
45 
46 #include "config.h"
47 #include "types.h"
48 #include "debug.h"
49 #include "nlist.h"
50 #include "http.h"
51 #include "mime.h"
52 #include "ssl.h"
53 #include "string-inl.h"
54 
55 static struct naive_list  domains;		/* Domains to keep track of   */
56 
57 static _u8  check_png,				/* Check all PNG files?       */
58             dump_urls,				/* Dump all visited URLs?     */
59             all_files,				/* Report all file inclusions */
60             all_flash,				/* Report all Flash documents */
61             get_xsrf,				/* Report GET XSRF status     */
62             bad_js,				/* Report risky Javascript    */
63             all_post,				/* Report all POST requests   */
64             all_cookie,				/* Report all cookie URLs     */
65             picky_cache,			/* Be picky about chdrs       */
66             use_double,				/* Make 2, not 1 extra req    */
67             try_attacks,			/* Validate XSRF/XSS suspects */
68             fix_attacks,			/* Correct XSRF/XSS fallout   */
69             log_active,				/* Log cross-domain content   */
70             log_mixed,				/* Log mixed content          */
71             use_any,				/* Listen on any address      */
72             all_xss;				/* Report all XSS suspects    */
73 
74 static _u32 use_port = DEFAULT_PORT;		/* Proxy port to listen on    */
75 
76 _u8* use_proxy;					/* Upstream proxy             */
77 _u8* trace_dir;					/* Trace directory            */
78 _u32 proxy_port = 8080;				/* Upstream proxy port        */
79 _u8  use_len;					/* Use length, not cksum      */
80 
81 static FILE* outfile;				/* Output file descriptor     */
82 
83 /* Display usage information */
usage(_u8 * argv0)84 static void usage(_u8* argv0) {
85 
86   debug("Usage: %s [ -w logfile ] [ -v logdir ] [ -p port ] [ -d domain ] [ -P host:port ] "
87         "[ -xtifkgmjscael2XCr ]\n"
88         "   -w logfile    - write results to a specified file (default: stdout)\n"
89         "   -v logdir     - write HTTP traces to a specified directory (default: none)\n"
90         "   -p port       - listen on a custom TCP port (default: 8080)\n"
91         "   -d domain     - analyze requests to specified domains only (default: all)\n"
92         "   -P host:port  - use upstream proxy for all requests (format host:port)\n"
93         "   -r            - accept remote connections (default: 127.0.0.1 only)\n"
94         "   -l            - use response length, not checksum, for identity check\n"
95         "   -2            - perform two, not one, page identity check\n"
96         "   -e            - perform pedantic caching headers checks\n"
97         "   -x            - log all XSS candidates\n"
98         "   -t            - log all directory traversal candidates\n"
99         "   -i            - log all PNG files served inline\n"
100         "   -f            - log all Flash applications for analysis (add -v to decompile)\n"
101         "   -s            - log all POST requests for analysis\n"
102         "   -c            - log all cookie setting URLs for analysis\n"
103         "   -g            - perform XSRF token checks on all GET requests\n"
104         "   -j            - report on risky Javascript constructions\n"
105         "   -m            - log all active content referenced across domains\n"
106         "   -X            - disruptively validate XSRF, XSS protections\n"
107         "   -C            - try to auto-correct persistent side effects of -X\n"
108         "   -k            - flag HTTP requests as bad (for HTTPS-only applications)\n"
109         "   -a            - indiscriminately report all visited URLs\n\n"
110 
111         "Example settings suitable for most tests:\n"
112         "  1) Low verbosity  : -v <outdir> -w <outfile> -d <domain> -lfscm\n"
113         "  2) High verbosity : -v <outdir> -w <outfile> -d <domain> -lextifscgjm\n"
114         "  3) Active testing : -v <outdir> -w <outfile> -d <domain> -XClfscm\n\n"
115 
116         "Multiple -d options are allowed. Consult the documentation for more.\n", argv0);
117 
118   exit(1);
119 
120 }
121 
122 
123 #define sayf(x...) fprintf(outfile,x)
124 
125 
126 /* Check hostname against a list of tracked ones. */
host_ok(_u8 * hname)127 static _u8 host_ok(_u8* hname) {
128   _u32 i, hlen;
129 
130   /* If no domains defined, accept all. */
131   if (!domains.c) return 1;
132 
133   hlen = strlen(hname);
134 
135   for (i=0;i<domains.c;i++) {
136     _u32 dlen = strlen(domains.v[i]);
137     if (dlen > hlen) continue;
138     if (!strcmp(hname + (hlen - dlen), domains.v[i])) return 1;
139   }
140 
141   return 0;
142 }
143 
144 
145 /* Test for XSSable payload */
xss_field(_u8 * value,_u8 head)146 static _u8 xss_field(_u8* value, _u8 head) {
147   _u32 c = 0;
148 
149   if (strlen(value) < (head ? MIN_XSS_HEAD : MIN_XSS_LEN)) return 0;
150 
151   while (no_xss_text[c]) {
152     if (!strncasecmp(value,no_xss_text[c],strlen(no_xss_text[c]))) return 0;
153     c++;
154   }
155   return 1;
156 }
157 
158 
159 #define MOD_PRED    1
160 #define MOD_AUTH    2
161 #define MOD_ECHO    4
162 
163 #define NOECHO(_x) ((_x) & ~MOD_ECHO)
164 #define ECHO(_x) ((_x) & MOD_ECHO)
165 
166 /* Check if the page has a predictable URL, user-specific content, echoed parameters. */
get_modifiers(struct http_request * req,struct http_response * res)167 static _u8 get_modifiers(struct http_request* req, struct http_response* res) {
168   FILE *server;
169   static struct http_response* mini = 0;
170   _u32 ret = 0;
171   _u32 fno = 0;
172   _u32 i;
173 
174   /* Test for echoed query parameters in response body... */
175 
176   if (res->is_text && res->payload_len) {
177 
178 #ifdef CHECK_ECHO_PATH
179     if (req->path && strstr(res->payload,req->path)) ret = MOD_ECHO;
180 #endif /* CHECK_ECHO_PATH */
181 
182     for (i=0;!ret && i<req->p.c;i++)
183       if (!req->p.fn[i][0] && xss_field(req->p.v2[i],0) && strstr(res->payload,req->p.v2[i]))
184         { ret = MOD_ECHO; break; }
185 
186   }
187 
188   /* ...and in HTTP header values. */
189 
190   for (i=0;!ret && i<req->p.c;i++)
191     if (!req->p.fn[i][0] && xss_field(req->p.v2[i],1)) {
192       _u32 j;
193       for (j=0;j<res->h.c;j++)
194         if (strstr(res->h.v2[j],req->p.v2[i])) { ret = MOD_ECHO; break; }
195     }
196 
197   /* Check for predictable URLs. */
198 
199   if (!req->xsrf_safe) ret |= MOD_PRED;
200 
201   /* Check for authentication. */
202   /* Some field names may override our checks. */
203 
204   while (auth_fields[fno]) {
205     _u32 i;
206     for (i=0;i<req->p.c;i++) {
207       if (auth_fields[fno][0] == '=') {
208         if (!strcasecmp(req->p.v1[i],auth_fields[fno] + 1)) return ret | MOD_AUTH;
209       } else {
210         if (rp_strcasestr(req->p.v1[i],auth_fields[fno])) return ret | MOD_AUTH;
211       }
212     }
213     fno++;
214   }
215 
216   /* No cookies? Then do not resend. */
217   if (!req->cookies.c) return ret;
218 
219   /* Try to verify that the request requires authentication by replaying it with
220      no cookies. This should have no side effects in sanely written applications. */
221 
222   /* TODO: We should continue also if custom HTTP headers or HTTP auth is detected;
223      we currently bail out on this, however. */
224 
225   if (!mini) {
226 
227     server = open_server_complete(0,req);
228 
229     if (req->from_ssl) {
230       ssl_setup();
231       ssl_start(fileno(server),-1);
232       fclose(server);
233       server = fdopen(ssl_srv_tap,"w+");
234     }
235 
236     mini = send_request(0,server,req,1);
237     if (req->from_ssl) ssl_shutdown();
238 
239     checksum_response(mini);
240 
241     if (use_double) {
242       _u64 temp = mini->cksum;
243 
244       /* ...and do it again! */
245 
246       server = open_server_complete(0,req);
247 
248       if (req->from_ssl) {
249         ssl_setup();
250         ssl_start(fileno(server),-1);
251         fclose(server);
252         server = fdopen(ssl_srv_tap,"w+");
253       }
254 
255       mini = send_request(0,server,req,1);
256       if (req->from_ssl) ssl_shutdown();
257 
258       checksum_response(mini);
259 
260       /* If checksum changes over time, give up. */
261       if (temp != mini->cksum) mini->cksum = res->cksum;
262 
263     }
264 
265 
266   }
267 
268   if (mini->cksum != res->cksum) ret |= MOD_AUTH;
269 
270   return ret;
271 
272 }
273 
274 
275 /* DISRUPTIVE CHECK: Try removing XSRF protection, see what happens. */
try_replay_xsrf(struct http_request * req,struct http_response * res)276 static void try_replay_xsrf(struct http_request* req, struct http_response* res) {
277 
278   FILE *server;
279   struct http_response* not;
280   struct http_request r2;
281   _u32 i;
282   _u8 got_token = 0;
283 
284   if (!req->xsrf_safe || req->authsub) return;
285 
286   memcpy(&r2,req,sizeof(struct http_request));
287 
288   /* Duplicate parameter value pointer array, so that we may modify it at will. */
289 
290   r2.p.v2 = malloc(r2.p.c * sizeof(_u8*));
291   if (!r2.p.v2) fatal("out of memory");
292   memcpy(r2.p.v2,req->p.v2,r2.p.c * sizeof(_u8*));
293 
294   /* Do not run contains_token() checks on file fields. */
295 
296   for (i=0;i<req->p.c;i++)
297     if (!req->p.fn[i][0] && contains_token(req->p.v1[i],req->p.v2[i])) {
298       got_token = 1;
299       r2.p.v2[i] = "0"; /* Clobber value. */
300     }
301 
302   /* Ooops! */
303   if (!got_token) return;
304 
305   /* Rebuild query / payload strings. */
306   reconstruct_request(&r2);
307 
308   server = open_server_complete(0,req);
309 
310   if (req->from_ssl) {
311     ssl_setup();
312     ssl_start(fileno(server),-1);
313     fclose(server);
314     server = fdopen(ssl_srv_tap,"w+");
315   }
316 
317 
318   not = send_request(0,server,&r2,0);
319   if (req->from_ssl) ssl_shutdown();
320 
321   /* Fix potential side effects of our request. */
322 
323   if (fix_attacks) {
324     server = open_server_complete(0,req);
325 
326     if (req->from_ssl) {
327       ssl_setup();
328       ssl_start(fileno(server),-1);
329       fclose(server);
330       server = fdopen(ssl_srv_tap,"w+");
331     }
332 
333     send_request(0,server,req,0); /* sink response */
334     if (req->from_ssl) ssl_shutdown();
335   }
336 
337   checksum_response(not);
338 
339   /* Clobbering all XSRF-ish tokens caused no change? */
340 
341   if (not->cksum == res->cksum) req->xsrf_safe = 0;
342 
343 }
344 
345 
346 
347 
348 /* DISRUPTIVE CHECK: Try injecting XSS payload, see what happens. */
try_replay_xss(struct http_request * req,struct http_response * res)349 static _u8 try_replay_xss(struct http_request* req, struct http_response* res) {
350 
351   FILE *server;
352   struct http_response* not;
353   struct http_request r2;
354   _u32 i;
355   _u8 got_candidate = 0;
356   _u8* cur;
357   _u8 htmlstate = 0, htmlurl = 0;
358 
359   if (!res->is_text) return 0;
360 
361   memcpy(&r2,req,sizeof(struct http_request));
362 
363   /* Duplicate parameter value pointer array, so that we may modify it at will. */
364 
365   r2.p.v2 = malloc(r2.p.c * sizeof(_u8*));
366   if (!r2.p.v2) fatal("out of memory");
367   memcpy(r2.p.v2,req->p.v2,r2.p.c * sizeof(_u8*));
368 
369   for (i=0;i<req->p.c;i++)
370     if (!req->p.fn[i][0] && xss_field(req->p.v2[i],0) && strstr(res->payload,req->p.v2[i])
371 #ifndef XSS_XSRF_TOKENS
372          && !contains_token(req->p.v1[i],req->p.v2[i])
373 #endif /* !XSS_XSRF_TOKENS */
374   ) {
375 
376       /* This does not account for all scenarios possible XSS scenarios, but is a
377          pretty good all-around string. Since we want to minimize the number of
378          requests generated, it will have to do. */
379 
380       r2.p.v2[i] = "qg:qg qg=-->qg\"qg>qg'qg>qg+qg<qg>";
381       got_candidate = 1;
382 
383     }
384 
385   if (!got_candidate) return 0;
386 
387   /* Rebuild query / payload strings. */
388   reconstruct_request(&r2);
389 
390   server = open_server_complete(0,req);
391 
392   if (req->from_ssl) {
393     ssl_setup();
394     ssl_start(fileno(server),-1);
395     fclose(server);
396     server = fdopen(ssl_srv_tap,"w+");
397   }
398 
399   not = send_request(0,server,&r2,0);
400   if (req->from_ssl) ssl_shutdown();
401 
402   /* Fix potential side effects of our request. */
403 
404   if (fix_attacks) {
405     server = open_server_complete(0,req);
406 
407     if (req->from_ssl) {
408       ssl_setup();
409       ssl_start(fileno(server),-1);
410       fclose(server);
411       server = fdopen(ssl_srv_tap,"w+");
412     }
413 
414     send_request(0,server,req,0); /* sink response */
415     if (req->from_ssl) ssl_shutdown();
416   }
417 
418   if (!not->payload_len) return 0;
419 
420   detect_mime(not);
421 
422   if (not->is_text)
423     detect_charset(not);
424 
425   /* Do some minimal and dumbed down HTML parsing on the response to detect q9g
426      strings in dangerous configurations. */
427 
428 #define HS_IN_TAG   1
429 #define HS_IN_DBLQ  2
430 #define HS_IN_SNGQ  4
431 #define HS_IN_COMM  8
432 #define HS_IN_CDATA 16
433 
434   cur = not->payload;
435 
436   while (*cur) {
437 
438     /* Detect successful XSS attempts... */
439 
440     if (!strncasecmp(cur,"qg",2)) {
441 
442       /* <tag foo=bar onload=...> */
443       if (htmlstate == HS_IN_TAG && !strncasecmp(cur+2," qg=",4)) return 1;
444 
445       /* <tag src=foo:bar...> */
446       if (htmlurl && !strncasecmp(cur+2,":qg",3)) return 1;
447 
448       /* <tag><script>... */
449       if (htmlstate == 0 && !strncasecmp(cur+2,"<qg",3)) return 1;
450 
451       /* <tag>+ADw-script+AD4-... */
452       if (htmlstate == 0 && (!not->charset || not->bad_cset) && !strncasecmp(cur+2,"+qg",3)) return 1;
453 
454       /* <tag foo="bar"onload=...> */
455       if (htmlstate == (HS_IN_TAG|HS_IN_DBLQ) && !strncasecmp(cur+2,"\"qg",3)) return 1;
456 
457       /* <tag foo='bar'onload=...> */
458       if (htmlstate == (HS_IN_TAG|HS_IN_SNGQ) && !strncasecmp(cur+2,"'qg",3)) return 1;
459 
460     } else {
461 
462       /* Handle CDATA blocks */
463       if (htmlstate == 0 && !strncasecmp(cur,"<![CDATA[",9)) { htmlstate = HS_IN_CDATA; cur += 9; continue; }
464       if (htmlstate == HS_IN_CDATA && !strncmp(cur,"]]>",3)) { htmlstate = 0; cur += 3; continue; }
465 
466       /* Handle <!-- --> blocks (this depends on rendering mode, but hey). */
467       if (htmlstate == 0 && !strncmp(cur,"<!--",4)) { htmlstate = HS_IN_COMM; cur += 4; continue; }
468       if (htmlstate == HS_IN_COMM && !strncmp(cur,"-->",3)) { htmlstate = 0; cur += 3; continue; }
469 
470       /* Detect what could pass for tag opening / closure... */
471       if (htmlstate == 0 && *cur == '<' && (isalpha(cur[1]) || cur[1] == '!' || cur[1] == '?')) { htmlstate = HS_IN_TAG; cur++; continue; }
472       if (htmlstate == HS_IN_TAG && *cur == '>') { htmlstate = 0; htmlurl = 0; cur++; continue; }
473 
474 
475       /* Handle double quotes around HTML parameters */
476       if (htmlstate == HS_IN_TAG && cur[-1] == '=' && *cur == '"') { htmlstate |= HS_IN_DBLQ; cur++; continue; }
477       if (htmlstate == (HS_IN_TAG|HS_IN_DBLQ) && *cur == '"') { htmlstate = HS_IN_TAG; cur++; continue; }
478 
479       /* Handle single quotes around HTML parameters */
480       if (htmlstate == HS_IN_TAG && cur[-1] == '=' && *cur == '\'') { htmlstate |= HS_IN_SNGQ; cur++; continue; }
481       if (htmlstate == (HS_IN_TAG|HS_IN_SNGQ) && *cur == '\'') { htmlstate = HS_IN_TAG; cur++; continue; }
482 
483       /* Special handling for SRC= and HREF= locations. */
484 
485       if (htmlstate == HS_IN_TAG && isspace(cur[-1]) && !strncasecmp(cur,"href=",5)) {
486         htmlurl = 1; cur += 5; continue;
487       }
488 
489 
490       if (htmlstate == HS_IN_TAG && isspace(cur[-1]) && !strncasecmp(cur,"src=",4)) {
491         htmlurl = 1; cur += 4; continue;
492       }
493 
494       /* Cancel mode if any character other than ", ', or qg: URL is encountered. */
495       if (htmlurl) htmlurl = 0;
496 
497     }
498 
499     cur++;
500 
501   }
502 
503   /* So, no XSS? Bummer. */
504   return 0;
505 
506 }
507 
508 
509 /* Check for publicly cacheable documents. Returns 0 if not public,
510    1 if apparently meant to be public, 2 if partly protected. */
is_public(struct http_request * req,struct http_response * res)511 static _u8 is_public(struct http_request* req, struct http_response* res) {
512   _u8 http10intent;
513 
514   /* "Expires" and "Pragma" should say the same. */
515   if (res->pr10intent && res->ex10intent && res->pr10intent != res->ex10intent) return 2;
516 
517   http10intent = res->ex10intent ? res->ex10intent : res->pr10intent;
518 
519   /* HTTP/1.0 and HTTP/1.1 intents should say the same. */
520   if (http10intent && res->cc11intent && http10intent != res->cc11intent) return 2;
521 
522   /* [Picky] HTTP/1.0 and HTTP/1.1 intents should not appear at all, or appear at once */
523   if (picky_cache && (http10intent ^ res->cc11intent)) {
524     if (strcmp(req->method,"GET")) return 0; /* Non-GET requests won't be cached. */
525     return 2;
526   }
527 
528   if (res->cc11intent == INTENT_PRIV || http10intent == INTENT_PRIV) return 0;
529 
530   /* No interest in making this document private was expressed... */
531 
532   if (strcmp(req->method,"GET")) return 0; /* Non-GET requests won't be cached. */
533   return 1;
534 
535 }
536 
537 
538 
539 static _u8 dump_fn[1024];
540 static _u8 dumped_already;
541 
542 /* Save trace data to file, if requested. */
save_trace(struct http_request * req,struct http_response * res)543 static _u8* save_trace(struct http_request* req, struct http_response* res) {
544   _s32 f;
545   _u32 i;
546   FILE* out;
547 
548   if (!trace_dir) return "-";
549 
550   /* Do not save the same request twice. */
551   if (dumped_already) return dump_fn;
552   dumped_already = 1;
553 
554   sprintf(dump_fn,"%.512s/%08x-%04x.trace",trace_dir,(_u32)time(0),getpid());
555 
556   f = open(dump_fn, O_WRONLY | O_CREAT | O_EXCL, 0600);
557   if (f < 0) {
558     debug(">>> Unable to open trace file '%s'! <<<\n",dump_fn);
559     return "-";
560   }
561   out = fdopen(f,"w");
562 
563   fprintf(out,"== REQUEST TO %s:%u (%u headers, %u byte payload) ==\n\n%s /%s%s%s HTTP/1.0\n",
564      req->host, req->port, req->h.c, req->payload_len,
565      req->method, req->path, req->query ? "?" : "", req->query ? req->query : (_u8*)"");
566 
567   for (i=0;i<req->h.c;i++)
568     fprintf(out,"%s: %s\n", req->h.v1[i], req->h.v2[i]);
569 
570   fprintf(out,"\n");
571   if (req->payload_len)
572     fwrite(req->payload,req->payload_len > MAXTRACEITEM ? MAXTRACEITEM : req->payload_len,1,out);
573 
574   if (req->payload_len > MAXTRACEITEM)
575     fprintf(out,"\n*** DATA TRUNCATED DUE TO SIZE LIMITS ***");
576 
577   fprintf(out,"\n\n== SERVER RESPONSE (%u headers, %u byte payload, detected MIME %s) ==\n\n"
578     "HTTP/1.0 %u \n",
579     res->h.c, res->payload_len, res->mime_type ? res->mime_type : (_u8*)"(none)",
580     res->code);
581 
582   for (i=0;i<res->h.c;i++)
583     fprintf(out,"%s: %s\n", res->h.v1[i], res->h.v2[i]);
584 
585   fprintf(out,"\n");
586   if (res->payload_len)
587     fwrite(res->payload,res->payload_len > MAXTRACEITEM ? MAXTRACEITEM : res->payload_len,1,out);
588 
589   if (res->payload_len > MAXTRACEITEM)
590     fprintf(out,"\n*** DATA TRUNCATED DUE TO SIZE LIMITS ***");
591 
592   fprintf(out,"\n\n== END OF TRANSACTION ==\n");
593 
594   fclose(out);
595   close(f);
596 
597   return dump_fn;
598 
599 }
600 
601 
602 /* Use Flare to decode Flash file, if available. */
decode_flash(struct http_response * res)603 static void decode_flash(struct http_response* res) {
604   _s32 f, pid;
605   _u8 tmp[1024];
606   struct stat st;
607 
608   if (!dumped_already || !res->payload_len) return; /* ? */
609 
610   sprintf(tmp,"%s.swf",dump_fn);
611 
612   f = open(tmp, O_WRONLY | O_CREAT | O_EXCL, 0600);
613   if (f < 0) return;
614 
615   write(f, res->payload, res->payload_len);
616   close(f);
617 
618   if (!(pid = fork())) {
619     /* Flare is way too noisy, let's close stderr. */
620     close(2);
621     execl("./flare","flare",tmp,NULL);
622     execlp("flare","flare",tmp,NULL);
623     exit(1);
624   }
625 
626   if (pid > 0) waitpid(pid, (int*)&f, 0);
627 
628   unlink(tmp);
629 
630   sprintf(tmp,"%s.flr",dump_fn);
631   if (stat(tmp,&st) || !st.st_size) unlink(tmp);
632 
633   /* So we should have a non-zero length .flr file next to a trace file
634      now; ratproxy-report.sh will detect this. */
635 
636 }
637 
638 
639 /* A "fuzzy" comparator to avoid reporting "refresher" cookies where some minor parameters
640    were changed as new cookie arrivals; but to detect blanking or other major overwrites. */
unique_cookies(struct naive_list2 * reqc,struct naive_list2 * resc)641 static _u8 unique_cookies(struct naive_list2* reqc, struct naive_list2* resc) {
642   _u32 i,j;
643 
644   if (!resc->c) return 0; /* No cookies set at all. */
645   if (!reqc->c) return 1; /* All set cookies must be new. */
646 
647   for (i=0;i<resc->c;i++) {
648 
649     for (j=0;j<reqc->c;j++) {
650       if (!strcasecmp(resc->v1[i],reqc->v1[j]) &&       /* Same name   */
651           strlen(resc->v2[i]) == strlen(reqc->v2[j]))   /* Same length */
652             break; /* ...must be a refresher cookie. */
653     }
654 
655     /* No refresher cookie matches for one cookie? Good enough. */
656     if (j == reqc->c) return 1;
657 
658   }
659 
660   /* All cookies were refreshers. */
661   return 0;
662 
663 }
664 
665 
666 /* Cookie renderer, for reporting purposes. */
make_cookies(struct naive_list2 * reqc,struct naive_list2 * resc)667 static _u8* make_cookies(struct naive_list2* reqc, struct naive_list2* resc) {
668   _u8* ret = 0;
669   _u32 i,j;
670   _u8 had_some = 0;
671 
672   if (!resc->c) return "-";
673 
674 #define ALLOC_STRCAT(dest,src) do { \
675     _u32 _sl = strlen(src); \
676     _u32 _dl = 0; \
677     if (dest) _dl = strlen(dest); \
678     dest = realloc(dest,_sl + _dl + 1); \
679     if (!dest) fatal("out of memory"); \
680     strcpy(dest + _dl, src); \
681   } while (0)
682 
683   for (i=0;i<resc->c;i++) {
684 
685     /* Render only newly set cookies! */
686 
687     for (j=0;j<reqc->c;j++) {
688       if (!strcasecmp(resc->v1[i],reqc->v1[j]) &&       /* Same name   */
689           strlen(resc->v2[i]) == strlen(reqc->v2[j]))   /* Same length */
690             break; /* ...must be a refresher cookie. */
691     }
692 
693     if (j == reqc->c) {
694       if (!had_some) had_some = 1; else ALLOC_STRCAT(ret,"; ");
695       ALLOC_STRCAT(ret,resc->v1[i]);
696       ALLOC_STRCAT(ret,"=");
697       ALLOC_STRCAT(ret,resc->v2[i]);
698     }
699 
700   }
701 
702   return ret ? ret : (_u8*)"-";
703 }
704 
705 
706 /* Check for safe JSON prologues. */
is_json_safe(_u8 * str)707 static _u8 is_json_safe(_u8* str) {
708   _u32 i = 0;
709 
710   while (json_safe[i]) {
711     if (!strncmp(str,json_safe[i],strlen(json_safe[i]))) return 1;
712     i++;
713   }
714 
715   return 0;
716 
717 }
718 
719 
720 /* Check for scripts that appear to be standalone or empty (as opposed to
721    JSON-like dynamically generated response snippets for on-page execution). */
standalone_script(_u8 * str)722 static _u8 standalone_script(_u8* str) {
723 
724   if (!str) return 1; /* Empty */
725 
726 skip_more:
727 
728   while (*str && isspace(*str)) str++;
729 
730   if (!strncmp(str,"/*",2)) {
731     str = strstr(str+2, "*/");
732     if (!str) return 1; /* Empty */
733     goto skip_more;
734   }
735 
736   if (!strncmp(str,"//",2)) {
737     str += 2;
738     while (*str && strchr("\r\n",*str)) str++;
739     goto skip_more;
740   }
741 
742   if (*str == '(') { str++; goto skip_more; }
743 
744   if (!*str) return 1; /* Empty */
745 
746   /* This is not very scientific - in fact, there is no good way to
747      settle this - but should be a pretty good predictor in most cases. */
748 
749   if (!strncasecmp(str,"var",3) && isspace(str[3])) return 1; /* Script */
750   if (!strncasecmp(str,"function",8) && isspace(str[8])) return 1; /* Script */
751 
752   return 0; /* Probably JSON */
753 
754 }
755 
756 
757 /* The main request handling and routing routine. */
handle_client(FILE * client)758 static void handle_client(FILE* client) {
759   FILE *server;
760   struct http_request* req;
761   struct http_response* res;
762   _u8 m;
763   _u32 i;
764   _u8 got_xss = 0;
765 
766 #define BEST_MIME (res->sniffed_mime ? res->sniffed_mime : \
767                    (res->mime_type ? res->mime_type : (_u8*)""))
768 
769   /* TODO: Ideally, S() shouldn't do HTML escaping in machine
770      output (just filter | and control chars); but this requires
771      ratproxy-report.sh to be reworked. */
772 
773 // Request printer macros - since most of the data does not change.
774 #define SHOW_REF_MSG(warn,mesg,mod) \
775     sayf("%u|%u|%s|-|%u|%u|%s|http%s://%s:%u/%s%s%s|-|%s|-|%s|-|-|-\n", \
776       warn, mod, mesg, res->code, res->payload_len, res->mime_type ? \
777       res->mime_type : (_u8*)"-", req->from_ssl ? "s" : "", S(req->host,0), req->port, \
778       S(req->path,0), req->query ? "?" : "", req->query ? \
779       S(req->query,0) : (_u8*)"", save_trace(req,res), S(req->referer,0))
780 
781 #define SHOW_MSG(warn,mesg,off_par,mod) \
782     sayf("%u|%u|%s|%s|%u|%u|%s|%s|%s|%s|%s|http%s://%s:%u/%s%s%s|%s|%s|%s\n", \
783       warn, mod ,mesg, off_par ? S(off_par,0) : (_u8*)"-", \
784       res->code, res->payload_len, \
785       res->mime_type ? S(res->mime_type,0) : (_u8*)"-", \
786       res->sniffed_mime ? S(res->sniffed_mime,0) : (_u8*)"-", \
787       res->charset ? S(res->charset,0) : (_u8*)"-", \
788       save_trace(req,res), \
789       S(req->method,0), req->from_ssl ? "s" : "", S(req->host,0), \
790       req->port, S(req->path,0), req->query ? "?" : "", \
791       req->query ? S(req->query,0) : (_u8*)"", \
792       S(make_cookies(&req->cookies,&res->cookies),0), \
793       req->payload_len ? S(stringify_payload(req),0) : (_u8*)"-", \
794       res->payload_len ? S(res->payload,0) : (_u8*)"-")
795 
796   /* First, let's collect and complete the request */
797 
798   req = collect_request(client,0,0);
799 
800   server = open_server_complete(client, req);
801 
802   if (req->is_connect) {
803     ssl_setup();
804     ssl_start(fileno(server),fileno(client));
805 
806     fclose(client); fclose(server);
807     client = fdopen(ssl_cli_tap,"w+");
808     server = fdopen(ssl_srv_tap,"w+");
809     if (!client || !server) fatal("out of memory");
810     req = collect_request(client, req->host, req->port);
811     req->is_connect = 1;
812 
813   }
814 
815   res = send_request(client, server, req, 0);
816   send_response(client,res);
817   if (req->from_ssl) ssl_shutdown();
818 
819   /* Now, if the target is not within the set of tested domains,
820      there are several things we want to check if it originated
821      from within the tested locations. */
822 
823   if (!host_ok(req->host)) {
824     _u8 *refq;
825 
826     if (!req->ref_host) goto skip_tests;
827 
828     /* Requests between non-analyzed sites do not concern us. */
829 
830     if (!host_ok(req->ref_host)) goto skip_tests;
831 
832     /* Referer token leakage test: contains_token() succeeds on "Referer" query */
833 
834     if ((refq=strchr(req->referer,'?'))) {
835       struct naive_list_p p = { 0, 0, 0, 0, 0 };
836       _u32 i;
837 
838       parse_urlencoded(&p,refq + 1);
839 
840       for (i=0;i<p.c;i++)
841         if (contains_token(p.v1[i],p.v2[i])) break;
842 
843       if (i != p.c)
844         SHOW_REF_MSG(3,"Referer may leak session tokens",1);
845 
846     }
847 
848     /* Cross-domain script inclusion check */
849 
850     detect_mime(res);
851 
852     if (rp_strcasestr(BEST_MIME,"script") ||
853         !strcasecmp(BEST_MIME,"application/json")|| !strcasecmp(BEST_MIME,"text/css"))
854       SHOW_REF_MSG(3,"External code inclusion",1);
855 
856     /* POST requests between domains - outgoing. */
857 
858     if (strcmp(req->method,"GET")) {
859       SHOW_REF_MSG(2,"Cross-domain POST requests",0);
860     } else if (log_active) {
861 
862       i = 0;
863       while (active_mime[i]) {
864         if (!strcasecmp(BEST_MIME,active_mime[i])) {
865           SHOW_REF_MSG(1,"References to external active content",1);
866           break;
867         }
868         i++;
869       }
870 
871     }
872 
873     goto skip_tests;
874 
875   }
876 
877   /* All right, everything below pertains to checks on URLs within
878      the tested domain. Let's do some basic information gathering first. */
879 
880   checksum_response(res);
881 
882   detect_mime(res);
883 
884   if (res->is_text)
885     detect_charset(res);
886 
887   if (dump_urls) SHOW_MSG(0,"!All visited URLs",0,0);
888 
889   /* If requested to do so, we need to log non-HTTPS traffic and
890      prioritize it depending on document type. */
891 
892   if (log_mixed && !req->from_ssl) {
893 
894     m = get_modifiers(req,res);
895 
896     if (!strcasecmp(BEST_MIME,"text/html") || rp_strcasestr(BEST_MIME,"script") ||
897         !strcasecmp(BEST_MIME,"application/json") ||
898         !strcasecmp(BEST_MIME,"text/css") || !strcasecmp(BEST_MIME,"application/xhtml+xml"))
899       SHOW_MSG(2,"Potential mixed content",0,m);
900       else SHOW_MSG(0,"Potential mixed content",0,m);
901 
902   }
903 
904   /* If instructed to do so, adjust XSRF "safety" rating based on packet
905      replay now. */
906 
907   if (try_attacks) try_replay_xsrf(req,res);
908 
909   /***********************
910    * HEADER BASED CHECKS *
911    ***********************/
912 
913   if (res->code < 200 || res->code >= 400) {
914 
915     switch (NOECHO(m = get_modifiers(req,res))) {
916 
917       /* No big deal, but warrants an investigation; more important if
918          the content is user-specific. */
919 
920       case 0:
921       case MOD_PRED:
922         SHOW_MSG(0,"HTTP errors",0,m); break;
923       case MOD_AUTH:
924       case MOD_PRED | MOD_AUTH:
925         SHOW_MSG(1,"HTTP errors",0,m); break;
926 
927     }
928 
929   }
930 
931   /* Detect 302 with Location: that contains req->query or req->payload,
932      and lacks XSRF token? */
933 
934   if (res->location && (req->query || req->payload) && !req->xsrf_safe) {
935      _u8* hname = strdup(res->location), *y;
936      if (!hname) fatal("out of memory");
937      if (!strncasecmp(hname,"http://",7)) hname += 7; else
938      if (!strncasecmp(hname,"https://",8)) hname += 8;
939      y = hname;
940      while (isalnum(*y) || *y == '-' || *y == '.') y++;
941      *y = 0;
942 
943      if (hname[0] && ((req->query   && rp_strcasestr(req->query,hname)) ||
944                       (req->payload && rp_strcasestr(req->payload,hname)))) {
945        SHOW_MSG(3,"HTTP redirector",0,1);
946      }
947 
948   }
949 
950   /* If not a HTTP redirector, examine for HTML redirection anyway */
951 
952   if (!res->location && (req->query || req->payload) && res->payload && !req->xsrf_safe) {
953 
954     _u8* mref=rp_strcasestr(res->payload,"HTTP-EQUIV=\"Refresh\"");
955     _u8* hname = mref ? rp_strcasestr(mref + 20, ";URL=") : 0;
956 
957     if (hname) {
958 
959        _u8* mrefend = strchr(mref + 20,'>'), *y;
960 
961        if (mrefend && hname < mrefend) {
962          hname = strdup(hname + 5);
963          if (!hname) fatal("out of memory");
964          if (!strncasecmp(hname,"http://",7)) hname += 7; else
965          if (!strncasecmp(hname,"https://",8)) hname += 8;
966          y = hname;
967          while (isalnum(*y) || *y == '-' || *y == '.') y++;
968          *y = 0;
969 
970          if (hname[0] && ((req->query   && rp_strcasestr(req->query,hname)) ||
971                           (req->payload && rp_strcasestr(req->payload,hname)))) {
972            SHOW_MSG(3,"HTML META redirector",0,1);
973          }
974 
975        }
976 
977      }
978 
979   }
980 
981   if (req->multipart) {
982     m = get_modifiers(req,res);
983     SHOW_MSG(0,"File upload forms",0,m);
984   }
985 
986   if (all_post && req->payload && strcasecmp(req->method,"GET")) {
987     m = get_modifiers(req,res);
988     SHOW_MSG(0,"All POST requests",0,m);
989   }
990 
991   if (unique_cookies(&req->cookies,&res->cookies) && !req->xsrf_safe && (req->payload || req->query)) {
992     m = get_modifiers(req,res);
993     SHOW_MSG(2,"Cookie issuer with no XSRF protection",0,m);
994 
995     /* TODO: Maybe check if query data copied over to cookies. */
996 
997   }
998 
999   if (all_cookie && unique_cookies(&req->cookies,&res->cookies)) {
1000     m = get_modifiers(req,res);
1001     SHOW_MSG(0,"All cookie setting URLs",0,m);
1002   }
1003 
1004   /* If there's a request that requires authentication and accept parameters,
1005      it should probably employ anti-XSRF protection of some sort. */
1006 
1007   if (!req->xsrf_safe && (req->payload || req->query)) {
1008 
1009     m = get_modifiers(req,res);
1010 
1011     if (m & MOD_AUTH) {
1012 
1013       if (!strcasecmp(req->method,"GET")) {
1014         if (get_xsrf)
1015           SHOW_MSG(0,"GET query with no XSRF protection",0,m);
1016       } else
1017         SHOW_MSG(3,"POST query with no XSRF protection",0,m);
1018 
1019     } else {
1020 
1021       /* POST requests that do not require authentication are interesting,
1022          though not necessarily very troubling. */
1023 
1024       if (strcasecmp(req->method,"GET"))
1025         SHOW_MSG(1,"POST query with no XSRF protection",0,m);
1026     }
1027 
1028   }
1029 
1030   if (res->has_multiple) {
1031 
1032     /* Duplicate Content-Type or Content-Disposition headers are a sure
1033        way to get into trouble. */
1034 
1035     switch (NOECHO(m = get_modifiers(req,res))) {
1036 
1037       case 0:
1038         SHOW_MSG(1,"Ambiguous HTTP content headers",0,m); break;
1039       case MOD_PRED:
1040       case MOD_AUTH:
1041         SHOW_MSG(2,"Ambiguous HTTP content headers",0,m); break;
1042       case MOD_PRED | MOD_AUTH:
1043         SHOW_MSG(3,"Ambiguous HTTP content headers",0,m); break;
1044 
1045     }
1046 
1047   }
1048 
1049   /* Unusual, but hey, let's report it because we can. */
1050 
1051   if (res->has_badclen)
1052     SHOW_MSG(3,"Misstated Content-Length",0,0);
1053 
1054   /* POST requests that pass auth tokens between domains. If coming
1055      from an excluded domain, this is more important. */
1056 
1057   if (req->ref_host && strcmp(req->method,"GET") &&
1058       strcasecmp(req->host,req->ref_host)) {
1059     if (!host_ok(req->ref_host))
1060       SHOW_REF_MSG(2,"Cross-domain POST requests",0);
1061     else
1062       SHOW_REF_MSG(1,"Cross-domain POST requests",0);
1063   }
1064 
1065   /* Report caching headers issues (but only once!) */
1066 
1067   if (!req->from_ssl && unique_cookies(&req->cookies,&res->cookies) && is_public(req,res)) {
1068 
1069     switch (NOECHO(m = get_modifiers(req,res))) {
1070       case 0:
1071       case MOD_AUTH:
1072         SHOW_MSG(1,"Bad caching headers","cacheable SetCookie",m);
1073         break;
1074       case MOD_PRED:
1075       case MOD_AUTH | MOD_PRED:
1076         SHOW_MSG(3,"Bad caching headers","cacheable SetCookie",m);
1077         break;
1078     }
1079 
1080   } else if (!req->from_ssl && is_public(req,res) == 2 && res->payload_len && res->code < 300) {
1081 
1082     m = get_modifiers(req,res);
1083 
1084     if (NOECHO(m) == (MOD_AUTH | MOD_PRED)) SHOW_MSG(3,"Bad caching headers","Expires/Date/Cache-Control mismatch",m);
1085       else if (NOECHO(m) == MOD_AUTH) SHOW_MSG(2,"Bad caching headers","Expires/Date/Cache-Control mismatch",m);
1086 
1087   }
1088 
1089   /************************
1090    * PAYLOAD BASED CHECKS *
1091    ************************/
1092 
1093   /* If the document is empty, bail out (everything below relies on non-NULL res->payload). */
1094 
1095   if (!res->payload_len) goto skip_tests;
1096 
1097   /* Cross-domain files are always worth a look. */
1098 
1099   if ((strstr(req->path,"crossdomain.xml") || strstr(req->path,"clientaccesspolicy.xml")) &&
1100       (strstr(res->payload,"<cross-domain-policy>") || strstr(res->payload,"<access-policy>"))) {
1101     m = get_modifiers(req,res);
1102     SHOW_MSG(1,"Cross-domain access policy",0,m);
1103   }
1104 
1105   if (res->is_text && (!res->charset || res->bad_cset)) {
1106 
1107     /* Missing charsets and typos lead to UTF-7 cross-site scripting. */
1108 
1109     if (strcasecmp(BEST_MIME,"text/css")) {
1110 
1111       /* Cases where content is echoed back are higher risk, but we care
1112          about stored attacks too. */
1113 
1114       switch (NOECHO(m = get_modifiers(req,res))) {
1115 
1116         case 0:
1117           SHOW_MSG(ECHO(m) ? 3 : 1,"Bad or no charset declared for renderable file",0,m); break;
1118         case MOD_PRED:
1119         case MOD_AUTH:
1120           SHOW_MSG(ECHO(m) ? 3 : 1,"Bad or no charset declared for renderable file",0,m); break;
1121         case MOD_PRED | MOD_AUTH:
1122           SHOW_MSG(ECHO(m) ? 3 : 2,"Bad or no charset declared for renderable file",0,m); break;
1123 
1124       }
1125 
1126     }
1127 
1128   }
1129 
1130   if (res->mime_type && !strcasecmp(res->mime_type,"text/plain")) {
1131 
1132     /* Modern interactive websites have very few reasons to serve
1133        text/plain documents, and if these documents are user-controlled,
1134        content sniffing can lead to XSS. */
1135 
1136     /* Let's just ignore text/css; the next check will catch it anyway,
1137        and it's nearly guaranteed to be harmless. */
1138 
1139     if (strcasecmp(BEST_MIME,"text/css"))
1140       switch (NOECHO(m = get_modifiers(req,res))) {
1141 
1142       case 0:
1143         SHOW_MSG(1,"MIME type set to text/plain",0,m); break;
1144       case MOD_AUTH:
1145       case MOD_PRED:
1146         SHOW_MSG(ECHO(m) ? 2 : 1,"MIME type set to text/plain",0,m); break;
1147       case MOD_PRED | MOD_AUTH:
1148         SHOW_MSG(ECHO(m) ? 3 : 2,"MIME type set to text/plain",0,m); break;
1149 
1150     }
1151 
1152   }
1153 
1154   if (!res->mime_type) {
1155 
1156     /* Having no MIME type almost always warrants scrutiny, as content
1157        sniffing runs rampant and may have a browser-specific outcome. */
1158 
1159     switch (NOECHO(m = get_modifiers(req,res))) {
1160 
1161       case 0:
1162         SHOW_MSG(1,"MIME type missing",0,m); break;
1163       case MOD_PRED:
1164       case MOD_AUTH:
1165         SHOW_MSG(ECHO(m) ? 2 : 1,"MIME type missing",0,m); break;
1166       case MOD_PRED | MOD_AUTH:
1167         SHOW_MSG(ECHO(m) ? 3 : 2,"MIME type missing",0,m); break;
1168 
1169     }
1170 
1171   }
1172 
1173   /* Let's be annoying here for initial betas, why not?. */
1174 
1175   if (res->payload_len > 10 && res->mime_type && !res->sniffed_mime)
1176     debug(">>> Failed to detect MIME type '%s' (%s:%u/%s?%s), tell lcamtuf@google.com <<<\n",
1177       S(res->mime_type,0), S(req->host,0), req->port, S(req->path,0), req->query ?
1178       S(req->query,0) : (_u8*)"");
1179 
1180   if (res->sniffed_mime && res->mime_type &&
1181       strcasecmp(res->mime_type, res->sniffed_mime)) {
1182 
1183     if (res->is_text) {
1184 
1185       /* MIME mismatch on text formats that are rendered by the browser
1186          is usually a major problem and may lead to XSS. */
1187 
1188       /* Do not be too picky about HTML - XHTML mismatches, though... */
1189 
1190       if (res->mime_type && res->sniffed_mime &&
1191           !strcasecmp(res->mime_type,"text/html") &&
1192           !strcasecmp(res->sniffed_mime,"application/xhtml+xml"))
1193         goto ignore_mime_mismatch;
1194 
1195       if (strcasecmp(BEST_MIME,"text/css"))
1196         switch (NOECHO(m = get_modifiers(req,res))) {
1197 
1198         case 0:
1199           SHOW_MSG(1,"MIME type mismatch on renderable file",0,m);
1200           break;
1201         case MOD_AUTH:
1202         case MOD_PRED:
1203           SHOW_MSG(ECHO(m) ? 2 : 1,"MIME type mismatch on renderable file",0,m); break;
1204         case MOD_PRED | MOD_AUTH:
1205           SHOW_MSG(ECHO(m) ? 3 : 2,"MIME type mismatch on renderable file",0,m); break;
1206 
1207       }
1208 
1209     } else if (!strncasecmp(BEST_MIME,"image/",6)) {
1210 
1211       /* Subtle mismatches with images may have disastrous effects as
1212          content sniffing inevitably kicks in and may lead to HTML
1213          parsing in EXIF or comment data.*/
1214 
1215       switch (NOECHO(m = get_modifiers(req,res))) {
1216 
1217         case 0:
1218           SHOW_MSG(1,"MIME type mismatch on image file",0,m); break;
1219         case MOD_AUTH:
1220         case MOD_PRED:
1221           SHOW_MSG(2,"MIME type mismatch on image file",0,m); break;
1222         case MOD_PRED | MOD_AUTH:
1223           SHOW_MSG(3,"MIME type mismatch on image file",0,m); break;
1224 
1225       }
1226 
1227     } else {
1228 
1229       if (!strcasecmp(res->mime_type,"application/octet-stream")) {
1230 
1231         /* Defaulting to application/octet-stream may trigger content
1232            sniffing. */
1233 
1234         switch (NOECHO(m = get_modifiers(req,res))) {
1235 
1236           case 0:
1237             SHOW_MSG(1,"Generic MIME type used",0,m); break;
1238           case MOD_AUTH:
1239           case MOD_PRED:
1240             SHOW_MSG(ECHO(m) ? 2 : 1,"Generic MIME type used",0,m); break;
1241           case MOD_PRED | MOD_AUTH:
1242             SHOW_MSG(ECHO(m) ? 3 : 2,"Generic MIME type used",0,m); break;
1243 
1244         }
1245 
1246       } else {
1247 
1248         /* Other MIME type mismatches still warrant attention, as this
1249            might be a result of a typo or the like. */
1250 
1251         switch (NOECHO(m = get_modifiers(req,res))) {
1252 
1253           case 0:
1254           case MOD_AUTH:
1255           case MOD_PRED:
1256             SHOW_MSG(1,"MIME type mismatch on binary file",0,m); break;
1257           case MOD_PRED | MOD_AUTH:
1258             SHOW_MSG(2,"MIME type mismatch on binary file",0,m); break;
1259 
1260         }
1261 
1262       }
1263 
1264     }
1265 
1266   }
1267 
1268 ignore_mime_mismatch:
1269 
1270   if ((rp_strcasestr(BEST_MIME,"script") || !strcasecmp(BEST_MIME,"application/json"))) {
1271 
1272     /* JSON is almost always worth inspecting - doubly so if not secured against XSRF. */
1273 
1274     switch (NOECHO(m = get_modifiers(req,res))) {
1275 
1276       case 0:
1277       case MOD_PRED:
1278         break;
1279 
1280       case MOD_AUTH:
1281         SHOW_MSG(standalone_script(res->payload) ? 0 : 1,
1282                  "Dynamic Javascript for direct inclusion",0,m); break;
1283       case MOD_PRED | MOD_AUTH:
1284 
1285         /* TODO: Move this to a proper Javascript analyzer instead. */
1286 
1287         if (standalone_script(res->payload)) {
1288           SHOW_MSG(0,"Dynamic Javascript for direct inclusion",0,m);
1289         } else if (is_json_safe(res->payload)) {
1290           SHOW_MSG(ECHO(m) ? 1 : 0,"Dynamic Javascript for direct inclusion",0,m);
1291         } else {
1292           SHOW_MSG(ECHO(m) ? 3 : 2,"Dynamic Javascript for direct inclusion",0,m);
1293         }
1294         break;
1295 
1296     }
1297 
1298   }
1299 
1300   if (!strcasecmp(BEST_MIME,"image/png") && !res->is_attach) {
1301 
1302     switch (NOECHO(m = get_modifiers(req,res))) {
1303 
1304       case 0:
1305       case MOD_PRED:
1306         if (check_png) SHOW_MSG(2,"Inline PNG image",0,m); break;
1307       case MOD_AUTH:
1308         SHOW_MSG(2,"Inline PNG image",0,m); break;
1309       case MOD_PRED | MOD_AUTH:
1310         SHOW_MSG(3,"Inline PNG image",0,m); break;
1311 
1312     }
1313 
1314   }
1315 
1316   /* Echoed markup in a query is bad. */
1317 
1318   for (i=0;i<req->p.c;i++)
1319     if (!req->p.fn[i][0] && strchr(req->p.v2[i],'<') && strstr(res->payload,req->p.v2[i])) {
1320 
1321       switch (NOECHO(m = get_modifiers(req,res))) {
1322 
1323         case 0:
1324         case MOD_AUTH:
1325           SHOW_MSG(2,"Direct markup echoed back",req->p.v2[i],m); break;
1326         case MOD_PRED:
1327         case MOD_PRED | MOD_AUTH:
1328           SHOW_MSG(3,"Direct markup echoed back",req->p.v2[i],m); break;
1329       }
1330 
1331       break;
1332 
1333     }
1334 
1335   /* Non-echoed paths in query are often bad, though there are some common patterns
1336      of false psoitives. */
1337 
1338   for (i=0;i<req->p.c;i++) if (!req->p.fn[i][0] && strlen(req->p.v2[i]) < MAX_FPATH &&
1339     strcmp(req->p.v1[i],"utmp") /* Analytics-specific. */ ) {
1340     _u8* x = strchr(req->p.v2[i],'/');
1341     _u8* y = strchr(req->p.v2[i],'.');
1342 
1343     if (!x) continue;				/* No slash - no problem       */
1344     if (y && y <= x) continue;			/* "www.foo.com/bar/baz.jpg"   */
1345     if (x[1] == '/') continue;			/* "http://www.foo.com/"       */
1346 
1347     if (isdigit(x[1]) && isdigit(x[2]) && x[3] == '/') continue; /* 01/02/2007 */
1348     if (isdigit(x[1]) && isdigit(x[3]) && x[2] == '/') continue; /* 01/2/2007 */
1349 
1350     do { x++; } while (isalnum(*x) || *x == '_');
1351 
1352     if (*x != '/') continue;			/* "text/plain"                */
1353 
1354     if (strstr(res->payload,req->p.v2[i]))	/* Text simply echoed back?    */
1355       continue;
1356 
1357     switch (NOECHO(m = get_modifiers(req,res))) {
1358 
1359       case 0:
1360         case MOD_AUTH:
1361           SHOW_MSG(2,"File path in query parameters",req->p.v2[i],m); break;
1362         case MOD_PRED:
1363         case MOD_PRED | MOD_AUTH:
1364           SHOW_MSG(3,"File path in query parameters",req->p.v2[i],m); break;
1365 
1366     }
1367 
1368     /* Report only once per URL. */
1369     goto no_more_paths;
1370 
1371   }
1372 
1373   /* Non-echoed filenames are not necessarily evil, but worth examining. */
1374 
1375   if (all_files)
1376     for (i=0;i<req->p.c;i++) if (!req->p.fn[i][0] && strlen(req->p.v2[i]) < MAX_FPATH &&
1377     strcmp(req->p.v1[i],"utmp") /* Analytics-specific again. */ ) {
1378 
1379       _u8* x = req->p.v2[i];
1380       while (isalnum(*x) || *x == '_' || *x == '/') x++;
1381       if (*x == '.' && isalpha(x[1]) && isalpha(x[2]) && strlen(x+1) <= 5 &&
1382           !strstr(res->payload,req->p.v2[i])) {
1383 
1384         m = get_modifiers(req,res);
1385         SHOW_MSG(1,"File name in query parameters",req->p.v2[i],m);
1386         break;
1387 
1388       }
1389     }
1390 
1391 no_more_paths:
1392 
1393   /* Java method names in a query are bad. */
1394 
1395   for (i=0;i<req->p.c;i++) if (!req->p.fn[i][0]) {
1396     _u8* x = strstr(req->p.v2[i],"com.");
1397     if (x && isalpha(x[4]) && strchr(x+4,'.') && !strstr(res->payload,req->p.v2[i]) &&
1398         !strchr(x,'/')) {
1399       switch (NOECHO(m = get_modifiers(req,res))) {
1400 
1401         case 0:
1402         case MOD_AUTH:
1403           SHOW_MSG(2,"Java method call in query parameters",req->p.v2[i],m); break;
1404         case MOD_PRED:
1405         case MOD_PRED | MOD_AUTH:
1406           SHOW_MSG(3,"Java method call in query parameters",req->p.v2[i],m); break;
1407       }
1408 
1409       break;
1410     }
1411   }
1412 
1413   /* Javascript code in a query is bad; ignore alert(...) though, as this is almost
1414      always a sign of manual XSS testing, not a legitimate functionality. */
1415 
1416   for (i=0;i<req->p.c;i++) if (!req->p.fn[i][0]) {
1417     _u8* x = strchr(req->p.v2[i],'(');
1418     if (x && (x == req->p.v2[i] || isalpha(x[-1])) && strchr(x+1,')') &&
1419         !rp_strcasestr(req->p.v2[i],"alert(") &&
1420         strstr(res->payload,req->p.v2[i])) {
1421       switch (NOECHO(m = get_modifiers(req,res))) {
1422 
1423         case 0:
1424         case MOD_AUTH:
1425           SHOW_MSG(2,"Javascript code echoed back",req->p.v2[i],m); break;
1426         case MOD_PRED:
1427         case MOD_PRED | MOD_AUTH:
1428           SHOW_MSG(3,"Javascript code echoed back",req->p.v2[i],m); break;
1429       }
1430 
1431       break;
1432     }
1433   }
1434 
1435   /* SQL statement in a query is bad. */
1436 
1437   for (i=0;i<req->p.c;i++) if (!req->p.fn[i][0]) {
1438     _u8* x = rp_strcasestr(req->p.v2[i],"SELECT");
1439     if (x && rp_strcasestr(x+1,"FROM") && !strstr(res->payload,req->p.v2[i])) {
1440       switch (NOECHO(m = get_modifiers(req,res))) {
1441 
1442         case 0:
1443         case MOD_AUTH:
1444           SHOW_MSG(2,"SQL code in query parameters",req->p.v2[i],m); break;
1445         case MOD_PRED:
1446         case MOD_PRED | MOD_AUTH:
1447           SHOW_MSG(3,"SQL code in query parameters",req->p.v2[i],m); break;
1448       }
1449 
1450       break;
1451     }
1452   }
1453 
1454   /* Check for OGNL-style parameter names. */
1455 
1456   if (!req->non_param)
1457   for (i=0;i<req->p.c;i++) {
1458     if (!req->p.fn[i][0] && req->p.v1[i][0] && req->p.v2[i][0]) {
1459       _u8* x = strchr(req->p.v1[i] + 1, '.');		// 'user.lname'
1460       _u8* x1 = x ? strchr(x + 1, '.') : 0;		// 'user.lname.foo'
1461       _u8* y = strchr(req->p.v1[i] + 1, '[');		// 'users[0].lname'
1462       if (((x && x1) || y) && req->p.v1[i][0] != '[') {
1463         switch (NOECHO(m = get_modifiers(req,res))) {
1464           case 0:
1465           case MOD_AUTH:
1466             SHOW_MSG(1,"Suspicious parameter passing scheme",req->p.v1[i],m); break;
1467           case MOD_PRED:
1468           case MOD_PRED | MOD_AUTH:
1469             SHOW_MSG(2,"Suspicious parameter passing scheme",req->p.v1[i],m); break;
1470         }
1471         break;
1472       }
1473     }
1474   }
1475 
1476   /* Locate generic XSS candidates. */
1477 
1478   if (try_attacks)
1479     if (try_replay_xss(req,res)) {
1480       m = get_modifiers(req,res);
1481       SHOW_MSG(3,"Confirmed XSS vectors",0,m);
1482       got_xss = 1;
1483     }
1484 
1485   if (all_xss && !got_xss && res->is_text)
1486     for (i=0;i<req->p.c;i++)
1487       if (!req->p.fn[i][0] && xss_field(req->p.v2[i],0) && strstr(res->payload,req->p.v2[i])) {
1488         m = get_modifiers(req,res);
1489         if (!rp_strcasestr(BEST_MIME,"script") && strcasecmp(BEST_MIME,"application/json"))
1490           SHOW_MSG(0,"XSS candidates",req->p.v1[i],m);
1491           else SHOW_MSG(1,"XSS candidates (script)",req->p.v1[i],m);
1492         break;
1493       }
1494 
1495   for (i=0;i<req->p.c;i++)
1496     if (!req->p.fn[i][0] && xss_field(req->p.v2[i],1)) {
1497       _u32 j;
1498       for (j=0;j<res->h.c;j++)
1499         if (strstr(res->h.v2[j],req->p.v2[i])) {
1500           m = get_modifiers(req,res);
1501           SHOW_MSG(0,"Request splitting candidates",req->p.v1[i],m);
1502           goto xss_done;
1503         }
1504     }
1505 
1506 xss_done:
1507 
1508   /* Check for what looks like JSON with inline HTML (we skip standalone scripts,
1509      as they often contain static HTML to be rendered). We do some basic quote
1510      state tracking not to get confused by regular arithmetic. No comment
1511      tracking, but that shouldn't break easily. */
1512 
1513   if ((rp_strcasestr(BEST_MIME,"script") || !strcasecmp(BEST_MIME,"application/json")) &&
1514       !standalone_script(res->payload)) {
1515     _u8* p = res->payload, qstate = 0, got_html = 0, esc_next = 0, pv = ' ';
1516 
1517     do {
1518 
1519       if (esc_next) { esc_next = 0; continue; }
1520 
1521       /* TODO: This should be replaced with a proper Javascript analyzer. */
1522 
1523       switch (*p) {
1524         case '\\': esc_next = 1; break;
1525         case '\'': case '"':
1526           if (qstate == *p) qstate = 0; else if (!qstate) qstate = *p;
1527           break;
1528         case '<': if (qstate) got_html = 1; break;
1529         case '>': if (qstate && got_html) got_html = 2; break;
1530       }
1531 
1532     } while (got_html < 2 && (pv=*(p++)));
1533 
1534     if (got_html == 2) {
1535       switch (NOECHO(m = get_modifiers(req,res))) {
1536         case 0: case MOD_PRED: case MOD_AUTH:
1537           SHOW_MSG(1,"Markup in dynamic Javascript",0,m); break;
1538         case MOD_AUTH | MOD_PRED:
1539           SHOW_MSG(ECHO(m) ? 2 : 1,"Markup in dynamic Javascript",0,m); break;
1540       }
1541     }
1542 
1543   }
1544 
1545   if (all_flash && !strcasecmp(BEST_MIME,"application/x-shockwave-flash")) {
1546     m = get_modifiers(req,res);
1547     SHOW_MSG(0,"All Flash applications",0,m);
1548     if (trace_dir) decode_flash(res);
1549   }
1550 
1551   /* TODO: Add more index checks and other troubling server responses. */
1552 
1553   if (strstr(res->payload,">[To Parent Directory]<") ||
1554       strstr(res->payload,"<title>Index of /")) {
1555     m = get_modifiers(req,res);
1556     SHOW_MSG(0,"Directory indexes",0,m);
1557   }
1558 
1559   /* TODO: This should be replaced with a proper Javascript analyzer. */
1560 
1561   if (bad_js && res->is_text && (
1562                  rp_strcasestr(res->payload,".write(") ||
1563                  rp_strcasestr(res->payload,".writeln("))) {
1564     m = get_modifiers(req,res);
1565     SHOW_MSG(1,"Risky Javascript code","document.write",m);
1566   }
1567 
1568   if (bad_js && res->is_text &&
1569                  (rp_strcasestr(res->payload,".innerHtml") ||
1570                  rp_strcasestr(res->payload,".outerHtml"))) {
1571     m = get_modifiers(req,res);
1572     SHOW_MSG(1,"Risky Javascript code","innerHTML",m);
1573   }
1574 
1575   if (bad_js && res->is_text &&
1576                  rp_strcasestr(res->payload,"document.referrer")) {
1577     m = get_modifiers(req,res);
1578     SHOW_MSG(2,"Risky Javascript code","document.referrer",m);
1579   }
1580 
1581   if (bad_js && res->is_text &&
1582                  rp_strcasestr(res->payload,"document.domain")) {
1583     m = get_modifiers(req,res);
1584     SHOW_MSG(2,"Risky Javascript code","document.domain",m);
1585   }
1586 
1587 skip_tests:
1588 
1589   fflush(outfile);
1590   exit(0);
1591 
1592 }
1593 
1594 
listen_loop(void)1595 static void listen_loop(void) {
1596   _s32 lsock, csock, on = 1;
1597   _u32 x;
1598   static struct sockaddr_in saddr;
1599 
1600   lsock=socket(AF_INET, SOCK_STREAM, 0);
1601   if (lsock < 0) pfatal("cannot create socket");
1602 
1603   if (setsockopt(lsock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(_s32)) == -1)
1604     pfatal("cannot setsockopt()");
1605 
1606   saddr.sin_family      = AF_INET;
1607 
1608   if (!use_any) {
1609     saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1610   } else {
1611     saddr.sin_addr.s_addr = htonl(INADDR_ANY);
1612   }
1613 
1614   saddr.sin_port        = htons(use_port);
1615 
1616   x = sizeof(saddr);
1617 
1618   if (bind(lsock, (struct sockaddr*)&saddr, x)) pfatal("cannot bind to port");
1619   if (listen(lsock, 10)) pfatal("listen() failed");
1620 
1621   debug("[*] Proxy configured successfully. Have fun, and please do not be evil.\n");
1622 
1623   if (use_proxy)
1624     debug("    Upstream proxy is %s:%u\n",use_proxy,proxy_port);
1625 
1626   if (try_attacks)
1627     debug("    WARNING: Disruptive tests enabled. use with care.\n");
1628 
1629   debug("[+] Accepting connections on port %u/tcp (%s)...\n", use_port,
1630         use_any ? "any source" : "local only");
1631 
1632   while ((csock = accept(lsock, (struct sockaddr*)&saddr, &x)) >= 0) {
1633 
1634     /* Bury zombies */
1635     while (waitpid(-1,&x,WNOHANG) > 0);
1636 
1637     if (!fork()) {
1638       FILE* client;
1639       close(lsock);
1640       client = fdopen(csock,"w+");
1641       if (!client) fatal("fdopen() failed");
1642       handle_client(client);
1643       /* Not reached */
1644       exit(0);
1645     }
1646 
1647     close(csock);
1648 
1649   }
1650 
1651   pfatal("accept() failed");
1652 
1653 }
1654 
1655 
1656 
main(int argc,char ** argv)1657 int main(int argc, char** argv) {
1658   _s32 opt;
1659   _u8* x;
1660 
1661   signal(SIGPIPE, SIG_IGN);
1662 
1663   debug("ratproxy version " VERSION " by <lcamtuf@google.com>\n");
1664 
1665   while ((opt = getopt(argc,argv,"+w:v:p:d:P:itxgjmafske2clXCr")) > 0)
1666     switch (opt) {
1667 
1668       case 'w': {
1669           _s32 f;
1670           if (outfile) fatal("multiple -w options make no sense");
1671           unlink(optarg); /* Ignore errors */
1672           f = open(optarg,O_WRONLY|O_CREAT|O_EXCL,0600);
1673           if (f < 0) pfatal("cannot open log file");
1674           outfile = fdopen(f,"w");
1675           if (!outfile) pfatal("fdopen failed");
1676         }
1677         break;
1678 
1679       case 'v': {
1680           if (trace_dir) fatal("multiple -v options make no sense");
1681           trace_dir = optarg;
1682           mkdir(trace_dir,0700); /* Ignore errors */
1683           if (access(trace_dir,X_OK)) pfatal("cannot create -v directory");
1684         }
1685         break;
1686 
1687       case 'p':
1688         use_port = atoi(optarg);
1689         if (!use_port || use_port > 65535) fatal("invalid -p value");
1690         break;
1691 
1692       case 'P':
1693         use_proxy = optarg;
1694         x = strchr(optarg,':');
1695         if (!x) break;
1696         *(x++) = 0;
1697         proxy_port = atoi(x);
1698         if (!proxy_port || proxy_port > 65535) fatal("invalid proxy port");
1699         break;
1700 
1701       case '2':
1702         use_double = 1;
1703         break;
1704 
1705       case 'd':
1706         ADD(domains,optarg);
1707         break;
1708 
1709       case 'i':
1710         check_png = 1;
1711         break;
1712 
1713       case 'e':
1714         picky_cache = 1;
1715         break;
1716 
1717       case 't':
1718         all_files = 1;
1719         break;
1720 
1721       case 'f':
1722         all_flash = 1;
1723         break;
1724 
1725       case 'x':
1726         all_xss = 1;
1727         break;
1728 
1729       case 'g':
1730         get_xsrf = 1;
1731         break;
1732 
1733       case 'j':
1734         bad_js = 1;
1735         break;
1736 
1737       case 'l':
1738         use_len = 1;
1739         break;
1740 
1741       case 's':
1742         all_post = 1;
1743         break;
1744 
1745       case 'a':
1746         dump_urls = 1;
1747         break;
1748 
1749       case 'c':
1750         all_cookie = 1;
1751         break;
1752 
1753       case 'X':
1754         try_attacks = 1;
1755         break;
1756 
1757       case 'm':
1758         log_active = 1;
1759         break;
1760 
1761       case 'C':
1762         fix_attacks = 1;
1763         break;
1764 
1765       case 'k':
1766         log_mixed = 1;
1767          break;
1768 
1769       case 'r':
1770         use_any = 1;
1771         break;
1772 
1773       default:
1774         usage(argv[0]);
1775     }
1776 
1777   if (optind != argc) usage(argv[0]);
1778 
1779   if (optind == 1)
1780     debug("\n[!] WARNING: Running with no command-line config options specified. This is\n"
1781             "    almost certainly not what you want, as most checks are disabled. Please\n"
1782             "    consult the documentation or use --help for more information.\n\n");
1783   else if (!domains.c)
1784     debug("\n[!] WARNING: Running with no 'friendly' domains specified. Many cross-domain\n"
1785             "    checks will not work. Please consult the documentation for advice.\n\n");
1786 
1787   if (!outfile) outfile = stdout;
1788 
1789   listen_loop();
1790 
1791   /* Not reached */
1792   return 0;
1793 
1794 }
1795