1 /*
2    ratproxy - HTTP request handling
3    --------------------------------
4 
5    The following routines take care of HTTP request handling, parsing,
6    and error reporting.
7 
8    Note that this code is one-shot, process is terminated when request
9    handling is done - and as such, we rely on the OS to do garbage
10    collection.
11 
12    Author: Michal Zalewski <lcamtuf@google.com>
13 
14    Copyright 2007, 2008 by Google Inc. All Rights Reserved.
15 
16    Licensed under the Apache License, Version 2.0 (the "License");
17    you may not use this file except in compliance with the License.
18    You may obtain a copy of the License at
19 
20      http://www.apache.org/licenses/LICENSE-2.0
21 
22    Unless required by applicable law or agreed to in writing, software
23    distributed under the License is distributed on an "AS IS" BASIS,
24    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25    See the License for the specific language governing permissions and
26    limitations under the License.
27 
28  */
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <sys/socket.h>
34 #include <netinet/in.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <sys/wait.h>
40 #include <ctype.h>
41 #include <netdb.h>
42 #include <openssl/md5.h>
43 #include <time.h>
44 
45 #include "config.h"
46 #include "types.h"
47 #include "debug.h"
48 #include "nlist.h"
49 #include "http.h"
50 #include "ssl.h"
51 #include "string-inl.h"
52 
53 extern _u8* use_proxy;		/* Runtime setting exports from ratproxy. */
54 extern _u32 proxy_port;
55 extern _u8  use_len;
56 
57 static _u8 srv_buf[MAXLINE],	/* libc IO buffers */
58            cli_buf[MAXLINE];
59 
60 
61 /* Read a single line of HTTP headers, strip whitespaces */
grab_line(FILE * where)62 static _u8* grab_line(FILE* where) {
63   static _u8 inbuf[MAXLINE];
64   _u32 l;
65 
66   if (!fgets(inbuf,MAXLINE,where)) return 0;
67 
68   l = strlen(inbuf);
69 
70   /* Excessive line length is bad, let's bail out. */
71   if (l == MAXLINE-1) return 0;
72 
73   while (l && isspace(inbuf[l-1])) inbuf[--l] = 0;
74 
75   return inbuf;
76 }
77 
78 
79 /* Return a generic HTTP error message, end current process.
80    Note that this function should not handle user-controlled data. */
http_error(FILE * client,_u8 * message,_u8 sink)81 static void http_error(FILE* client, _u8* message,_u8 sink) {
82 
83   if (client) {
84     _u8* l;
85 
86     if (sink) while ((l=grab_line(client)) && l[0]);
87 
88     fprintf(client,
89       "HTTP/1.0 500 %s\n"
90       "Content-type: text/html\n\n"
91 
92       "<font face=\"Bitstream Vera Sans Mono,Andale Mono,Lucida Console\">\n"
93       "The proxy is unable to process your request.\n"
94       "<h1><font color=red><b>%s.</b></font></h1>\n", message, message);
95 
96     fflush(client);
97     fclose(client);
98 
99   }
100 
101   debug("[!] WARNING: %s.\n", message);
102   exit(0);
103 
104 }
105 
106 
107 static _u8* BASE16 = "0123456789ABCDEF";
108 
109 /* Decode URL-encoded parameter string */
parse_urlencoded(struct naive_list_p * p,_u8 * string)110 void parse_urlencoded(struct naive_list_p* p, _u8* string) {
111   _u8 val_now = 0;
112   _u8 name[MAXLINE+1], val[MAXLINE+1];
113   _u32 nlen = 0, vlen = 0;
114 
115   name[0] = 0;
116   val[0] = 0;
117 
118   do {
119 
120     _u8 dec = 0;
121 
122     switch (*string) {
123       case '+':
124         dec = ' ';
125         break;
126 
127       case '=':
128         val_now = 1;
129         break;
130 
131       case '%': {
132           _u8 *a, *b;
133 
134           /* Parse %nn code, if valid; default to '?nn' if not, replace with ? if \0. */
135 
136           if (!string[1] || !string[2] || !(a=strchr(BASE16,toupper(string[1]))) ||
137               !(b=strchr(BASE16,toupper(string[2])))) { dec = '?'; break; }
138 
139           dec = (a-BASE16) * 16 + (b-BASE16);
140           string += 2;
141           if (!dec) dec = '?';
142 
143           break;
144 
145         }
146 
147       case '&':
148       case 0:
149 
150         /* Handle parameter terminator; note that we also iterate over \0
151            because of loop condition placement. */
152 
153         if (nlen) {
154           name[nlen] = 0;
155           val[vlen] = 0;
156           DYN_ADDP(*p,name,val,"");
157         }
158 
159         val_now = 0;
160         nlen = 0;
161         vlen = 0;
162         break;
163 
164       default:
165         if (!(dec=*string)) dec = '?';
166 
167     }
168 
169     /* Append decoded char, if any, to field name or value as needed. */
170 
171     if (dec) {
172       if (!val_now) { if (nlen < MAXLINE) name[nlen++] = dec; }
173         else { if (vlen < MAXLINE) val[vlen++] = dec; }
174     }
175 
176   } while (*(string++));
177 
178 }
179 
180 
181 /* Read a line of multipart data from a linear buffer, advance buffer pointer. */
get_multipart_line(_u8 ** buf)182 static _u8* get_multipart_line(_u8** buf) {
183   static _u8* retbuf;
184   _u8* x;
185   _u32 cnt;
186 
187   if (retbuf) free(retbuf);
188 
189   /* We assume \r\n formatting here, which is RFC-mandated and implemtned
190      by well-behaved browsers. */
191 
192   x = strchr(*buf,'\r');
193 
194   if (!x || x[1] != '\n') {
195     _u32 l = strlen(*buf);
196     retbuf = malloc(l + 1);
197     if (!retbuf) fatal("out of memory");
198     strcpy(retbuf,*buf);
199     *buf += l;
200     return retbuf;
201   }
202 
203   cnt = x - *buf;
204 
205   retbuf = malloc(cnt + 1);
206   if (!retbuf) fatal("out of memory");
207   memcpy(retbuf,*buf,cnt);
208   retbuf[cnt] = 0;
209 
210   *buf += cnt + 2;
211 
212   return retbuf;
213 
214 }
215 
216 
217 /* Collect multipart data from a reasonably well-behaved browser. This routine
218    makes multiple assumptions that might be not true for maliciously formatted
219    data, but we do not strive to serve such requests well. */
parse_multipart(struct naive_list_p * p,_u8 * string,_u32 slen)220 void parse_multipart(struct naive_list_p* p, _u8* string, _u32 slen) {
221   _u8* field, *fname;
222   _u8* endptr = string + slen;
223 
224   do {
225 
226     _u8 *l, *end, *c;
227 
228     field = 0;
229     fname = 0;
230 
231     /* Skip boundary */
232     l = get_multipart_line(&string);
233     if (l[0] != '-' || l[1] != '-') return;
234 
235     /* Sink headers, but grab field name if any */
236     while ((l = get_multipart_line(&string)) && l[0]) {
237       if (!strncasecmp(l,"Content-Disposition:",20)) {
238 
239         /* Grab field name. */
240         _u8* f = rp_strcasestr(l,"; name=\"");
241         if (!f) continue;
242         f += 7;
243         c = strchr(++f,'"');
244         if (!c) continue;
245         *c = 0;
246 
247         field = strdup(f);
248         if (!field) fatal("out of memory");
249 
250         /* Grab file name, if any. */
251 
252         f = rp_strcasestr(c + 1,"; filename=\"");
253         if (!f) continue;
254         f += 11;
255         c = strchr(++f,'"');
256         if (!c) continue;
257         *c = 0;
258         fname = strdup(f);
259         if (!fname) fatal("out of memory");
260 
261       }
262 
263     }
264 
265     end = rp_memmem(string,endptr - string, "\r\n--", 4);
266     if (!end) return;
267 
268     if (field)
269       DYN_ADDP_RAWMEM(*p,field,string,end-string,fname ? fname : (_u8*)"");
270 
271     string = end + 2;
272 
273   } while (1);
274 
275 }
276 
277 
278 
279 #define BASE64 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ+/_-"
280 
281 /* Looks for what could pass for a reasonably robust session token or XSRF protection. */
contains_token(_u8 * name,_u8 * value)282 _u8 contains_token(_u8* name, _u8* value) {
283   _u32 run16 = 0, run16_num = 0,
284        run64 = 0, run64_true = 0, run64_num = 0, run64_up = 0, run64_low = 0;
285   _u8* st = 0;
286   static _u32 tmin,tmax;
287   _u32 fno = 0;
288 
289   if (!tmin) {
290     tmin = time(0);
291     tmax = tmin + (60 * 60 * 24 * 30); /* One month forward */
292     tmin -= (60 * 60 * 24 * 365 * 5);  /* Five years back */
293   }
294 
295   /* Known bad field names - return 0. */
296 
297   fno = 0;
298 
299   while (no_xsrf_fields[fno]) {
300     if (no_xsrf_fields[fno][0] == '=') {
301       if (!strcasecmp(name,no_xsrf_fields[fno] + 1)) return 0;
302     } else {
303       if (rp_strcasestr(name,no_xsrf_fields[fno])) return 0;
304     }
305     fno++;
306   }
307 
308   /* Known safe field names - return 1. */
309 
310   fno = 0;
311 
312   while (xsrf_fields[fno]) {
313     if (xsrf_fields[fno][0] == '=') {
314       if (!strcasecmp(name,xsrf_fields[fno] + 1)) return 1;
315     } else {
316       if (rp_strcasestr(name,xsrf_fields[fno])) return 1;
317     }
318     fno++;
319   }
320 
321   /* URLs are not anti-XSRF tokens, no matter how random they look. */
322 
323   if (!strncmp(value,"http",4)) return 0;
324 
325   /* Iterate over value data, compute base16 / base64 runs, collect
326      basic character disttributin data, rule out patterns such as unix
327      time, and make the call. */
328 
329   do {
330 
331     if (*value && strchr(BASE16,toupper(*value))) {
332 
333       run16++;
334       if (isdigit(*value)) run16_num++;
335 
336     } else {
337 
338       if (run16 >= XSRF_B16_MIN && run16 <= XSRF_B16_MAX &&
339           run16_num >= XSRF_B16_NUM) {
340         _u8 tmp[5];
341         _u32 val;
342 
343         strncpy(tmp,st,4);
344         tmp[4] = 0;
345         val = atoi(tmp);
346 
347         if ((val < tmin / 1000000 || val > tmax / 1000000) &&
348             (st[0] != st[1] || st[0] != st[2])) return 1;
349       }
350 
351       run16 = 0;
352       run16_num = 0;
353 
354     }
355 
356     if (*value && strchr(BASE64,toupper(*value))) {
357 
358       if (!isalpha(*value)) run64_num++;
359       if (isupper(*value)) run64_up++;
360       if (islower(*value)) run64_low++;
361       if (!run16) run64_true = 1;
362       if (!run64) st = value;
363       run64++;
364 
365     } else {
366 
367       if (run64 >= XSRF_B64_MIN && run64 <= XSRF_B64_MAX &&
368           ((run64_num >= XSRF_B64_NUM && run64_up >= XSRF_B64_CASE &&
369             run64_low >= XSRF_B64_CASE) ||
370            (run64_num >= XSRF_B64_NUM2)) && run64_true)
371         if (st[0] != st[1] || st[0] != st[2]) return 1;
372       run64 = 0;
373       run64_num = 0;
374       run64_up = 0;
375       run64_low = 0;
376       run64_true = 0;
377       st = 0;
378 
379     }
380 
381   } while (*(value++));
382 
383   return 0;
384 
385 }
386 
387 
388 /* Try to parse cookie header values. */
parse_cookies(_u8 * str,struct naive_list2 * c)389 static void parse_cookies(_u8* str, struct naive_list2* c) {
390   _u8 name[128], val[128];
391 
392   /* Iterate over cookies. We ignore cookies over 128 bytes for
393      name / value, and "special" values such as expiration date,
394      version, etc. */
395 
396   while (str) {
397     while (isspace(*str)) str++;
398     if (sscanf(str,"%127[^;=]=%127[^;]",name,val) == 2) {
399       if (strcasecmp(name,"expires") && strcasecmp(name,"comment") &&
400           strcasecmp(name,"version") && strcasecmp(name,"max-age") &&
401           strcasecmp(name,"path") && strcasecmp(name,"domain") && name[0] != '$')
402         DYN_ADD2(*c,name,val);
403     }
404 
405     str = strchr(str + 1 ,';');
406     if (str) str++;
407 
408   }
409 
410 }
411 
412 
413 /* Process the entire HTTP request, parse fields, and extract some preliminary signals. */
collect_request(FILE * client,_u8 * ssl_host,_u32 ssl_port)414 struct http_request* collect_request(FILE* client,_u8* ssl_host, _u32 ssl_port) {
415   struct http_request* ret;
416   _u8 *line, *x;
417   _u32 i;
418 
419   /* Begin carefully - on CONNECT requests, we do not want to read more than
420      absolutely necessary. As soon as non-CONNECT is confirmed, we switch
421      to proper buffering. */
422 
423   setvbuf(client, cli_buf, _IONBF, 0);
424 
425   ret = calloc(1, sizeof(struct http_request));
426   if (!ret) fatal("out of memory");
427 
428   line = grab_line(client);
429   if (!line || !line[0]) exit(0);
430 
431   x = strchr(line,' ');
432   if (!x || x == line) http_error(client, "URL address missing or malformed request",1);
433   *(x++) = 0;
434 
435   ret->method = strdup(line);
436   if (!ret->method) fatal("out of memory");
437 
438   if (strcmp(line,"CONNECT")) {
439 
440     /* Ok, safe to handle HTTP at full speed now. */
441 
442     setvbuf(client, cli_buf, _IOFBF, sizeof(cli_buf));
443 
444     if (!ssl_host) {
445 
446       /* Unless coming from within CONNECT, we want a
447          properly specified protocol and so forth. */
448 
449       if (x[0] == '/')
450         http_error(client, "Direct HTTP requests not allowed",1);
451 
452       if (strncmp(x,"http://",7))
453         http_error(client, "Unsupported protocol",1);
454 
455       x += 7;
456 
457     }
458 
459   } else {
460 
461     /* We do not want CONNECT requests within CONNECT requests, really. */
462     if (ssl_host) http_error(client,"Evil CONNECT nesting",1);
463 
464     ret->is_connect = 1;
465 
466   }
467 
468   ret->host = x;
469 
470   x = strchr(ret->host,' ');
471   if (!x) http_error(client,"Missing HTTP protocol version",1);
472 
473   if (strcmp(x," HTTP/1.0") && strcmp(x," HTTP/1.1"))
474     http_error(client,"unsupported HTTP protocol version",1);
475 
476   /* Trim HTTP/1.x part now, we do not need it */
477 
478   *x = 0;
479 
480   if (!ret->is_connect) {
481 
482     ret->path = strchr(ret->host,'/');
483     if (!ret->path) http_error(client,"Incomplete request URL",1);
484     *(ret->path++) = 0;
485 
486   }
487 
488   /* Try to find port, if any */
489 
490   x = strchr(ret->host,':');
491 
492   if (x) {
493 
494     ret->port = atoi(x+1);
495 
496     if (!ret->port || ret->port > 65535)
497       http_error(client,"Illegal port specification",1);
498 
499     if (ret->port < 1024 && ret->port != 80 && ret->port != 443)
500       http_error(client,"Access to this port denied",1);
501 
502     *x = 0;
503 
504   } else {
505     if (ret->is_connect) ret->port = 443;
506       else ret->port = 80;
507   }
508 
509   /* Populate HTTP envelope data with higher-level CONNECT
510      information if one present. */
511 
512   if (ssl_host) {
513     ret->host = ssl_host;
514     ret->port = ssl_port;
515     ret->from_ssl = 1;
516   }
517 
518   if (!ret->host[0])
519     http_error(client,"Host name is missing",1);
520 
521   ret->host = strdup(ret->host);
522   if (!ret->host) fatal("out of memory");
523 
524   /* Grab query data */
525 
526   if (!ret->is_connect && (x = strchr(ret->path,'?'))) {
527 
528     *(x++) = 0;
529     ret->query = strdup(x);
530     if (!ret->query) fatal("out of memory");
531 
532   }
533 
534   /* Grab path data */
535 
536   if (!ret->is_connect) {
537 
538     ret->path = strdup(ret->path);
539     if (!ret->path) fatal("out of memory");
540 
541     x = strrchr(ret->path,'.');
542 
543     if (x) ret->ext = x + 1;
544 
545   }
546 
547   /* Request target is now fully parsed. Let's collect headers, if any. */
548 
549   while (1) {
550 
551     line = grab_line(client);
552 
553     if (!line) http_error(client,"Incomplete or malformed request headers",1);
554 
555     /* Empty line == end of headers */
556     if (!line[0]) break;
557 
558     x = strchr(line,':');
559     if (!x) http_error(client,"Invalid request header",1);
560     *x = 0;
561     while (isspace(*(++x)));
562 
563     if (!strcasecmp(line,"Content-Length")) {
564 
565       ret->payload_len = atoi(x);
566 
567       if (ret->payload_len > MAXPAYLOAD)
568         http_error(client,"Payload size limit exceeded",1);
569 
570     }
571 
572     if (!strncasecmp(line,"Cookie",6))
573       parse_cookies(x,&ret->cookies);
574 
575     if (!strcasecmp(line,"Referer")) {
576       _u8* rh;
577 
578       ret->referer = strdup(x);
579       if (!ret->referer) fatal("out of memory");
580 
581       /* Extract referer host to simplify other checks later on. */
582 
583       if ((rh = strstr(x,"://"))) {
584         _u8* x;
585 
586         rh = strdup(rh + 3);
587         if (!rh) fatal("out of memory");
588         if ((x = strchr(rh,'/'))) *x = 0;
589         if ((x = strchr(rh,':'))) *x = 0;
590 
591         ret->ref_host = rh;
592 
593       }
594 
595     }
596 
597     if (!strcasecmp(line,"X-Ratproxy-Loop"))
598       http_error(client,"Proxy loop detected",1);
599 
600     /* These are specific to publicly documented anti-XSRF features of
601        Google Web Toolkit and Google Data APIs; this might be further
602        extended to accomodate other custom schemes in popular frameworks. */
603 
604     if (!strcasecmp(line,"Authorization") && !strncasecmp(x,"GoogleLogin auth=",17)) {
605       ret->xsrf_safe = 1;
606       ret->authsub = 1;
607     }
608 
609     if (!strcasecmp(line,"Content-Type")) {
610 
611       if (rp_strcasestr(x,"text/x-gwt-rpc")) { ret->xsrf_safe = 1; ret->authsub = 1; }
612 
613       if (rp_strcasestr(x,"multipart/form-data")) ret->multipart = 1;
614       else if (!rp_strcasestr(x,"application/x-www-form-urlencoded")) ret->non_param = 1;
615     }
616 
617     DYN_ADD2(ret->h,line,x);
618 
619   }
620 
621   /* Get POST payload */
622 
623   if (ret->payload_len) {
624 
625     ret->payload = malloc(ret->payload_len + 1);
626     if (!ret->payload) fatal("out of memory");
627 
628     if (fread(ret->payload,ret->payload_len,1,client) != 1)
629       http_error(client,"Premature end of payload data",0);
630 
631     /* To make string matching safe. */
632     ret->payload[ret->payload_len] = 0;
633 
634   }
635 
636   /* Parse GET/POST parameters */
637 
638   if (ret->query) parse_urlencoded(&ret->p, ret->query);
639 
640   ret->ppar_bound = ret->p.c;
641 
642   /* Do not parse payloads of arcane types. */
643 
644   if (ret->payload && !ret->non_param) {
645     if (ret->multipart) parse_multipart(&ret->p, ret->payload, ret->payload_len);
646      else parse_urlencoded(&ret->p, ret->payload);
647   }
648 
649   /* Locate XSRF tokens, if any */
650   /* Do not perform contains_token() checks on file fields. */
651 
652   for (i=0;i<ret->p.c;i++)
653     if (!ret->p.fn[i][0] && contains_token(ret->p.v1[i],ret->p.v2[i]))
654       { ret->xsrf_safe = 1; break; }
655 
656   return ret;
657 
658 }
659 
660 
661 /* Connect to server */
open_server(FILE * client,_u8 * host,_u32 port)662 static FILE* open_server(FILE* client, _u8* host, _u32 port) {
663   FILE* ret;
664   struct sockaddr_in sin;
665   struct hostent* he;
666   _s32 ss;
667 
668   if (!(he = gethostbyname(host)) || !(he->h_addr_list[0]))
669     http_error(client,"Unable to find target host",0);
670 
671   ss = socket(PF_INET, SOCK_STREAM, 0);
672   if (ss < 0) pfatal("socket() failed");
673 
674   sin.sin_family = PF_INET;
675   sin.sin_port   = htons(port);
676 
677   memcpy(&sin.sin_addr, he->h_addr_list[0], 4);
678 
679   if (connect(ss,(struct sockaddr*)&sin,sizeof(struct sockaddr_in)))
680     http_error(client,"Connection to target failed",0);
681 
682   ret = fdopen(ss,"w+");
683   if (!ret) fatal("fdopen() failed");
684 
685   setvbuf(ret, srv_buf, _IOFBF, sizeof(srv_buf));
686 
687   return ret;
688 
689 }
690 
691 
692 /* Connect to server, take proxy CONNECT handling into account */
open_server_complete(FILE * client,struct http_request * r)693 FILE* open_server_complete(FILE* client, struct http_request* r) {
694   FILE* ret;
695   _u8* l;
696 
697   if (use_proxy)
698     ret = open_server(client, use_proxy, proxy_port);
699   else
700     ret = open_server(client, r->host, r->port);
701 
702   if (r->is_connect) {
703 
704     if (use_proxy) {
705       fprintf(ret,"CONNECT %s:%u HTTP/1.0\r\n\r\n",r->host,r->port);
706       fflush(ret);
707 
708       setvbuf(ret, srv_buf, _IONBF, 0);
709       /* Sink proxy response */
710       while ((l=grab_line(ret)) && l[0]);
711     }
712 
713     if (client) {
714       fprintf(client,"HTTP/1.0 200 Go ahead, please.\r\n\r\n");
715       fflush(client);
716     }
717 
718   }
719 
720   return ret;
721 
722 }
723 
724 
725 #define NEEDS_URLENC(x) \
726    (!(x) || !strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.",toupper(x)))
727 
728 
729 /* Rewrite GET and POST parameters as needed. */
reconstruct_request(struct http_request * r)730 void reconstruct_request(struct http_request* r) {
731   struct dyn_str p = { 0, 0 }, q = { 0, 0 };
732   _u32 cp = 0, i;
733   _u8 c;
734   _u8 tmp[32];
735 
736   /* Encode params to query string, until ppar boundary is hit. */
737 
738   for (;cp<r->p.c;cp++) {
739 
740     if (cp == r->ppar_bound) break;
741 
742     if (q.l) STR_APPEND_CHAR(q,'&');
743 
744     i = 0;
745     while ((c=r->p.v1[cp][i])) {
746       if (NEEDS_URLENC(c)) {
747         sprintf(tmp,"%%%02X",c);
748       } else {
749         tmp[0] = c;
750         tmp[1] = 0;
751       }
752       STR_APPEND(q,tmp);
753       i++;
754     }
755 
756     STR_APPEND_CHAR(q,'=');
757 
758     i = 0;
759     while ((c=r->p.v2[cp][i])) {
760       if (NEEDS_URLENC(c)) {
761         sprintf(tmp,"%%%02X",c);
762       } else {
763         tmp[0] = c;
764         tmp[1] = 0;
765       }
766       STR_APPEND(q,tmp);
767       i++;
768     }
769 
770   }
771 
772   /* Update query string. */
773   if (q.l) r->query = q.v;
774 
775 
776   /* Deal with the rest of parameters, putting them in a multipart
777      envelope or as urlencoded payload, as needed. */
778 
779   if (r->multipart) {
780 
781     /* Update boundary; be just random enough to prevent accidents. */
782 
783     sprintf(tmp,"ratproxybound%08x",rand());
784     r->use_boundary = strdup(tmp);
785     if (!r->use_boundary) fatal("out of memory");
786 
787     for (;cp<r->p.c;cp++) {
788       STR_APPEND(p,"--");
789       STR_APPEND(p,r->use_boundary);
790       STR_APPEND(p,"\r\nContent-Disposition: form-data; name=\"");
791       STR_APPEND(p,r->p.v1[cp]);
792 
793       if (r->p.fn[cp][0]) {
794         STR_APPEND(p,"\"; filename=\"");
795         STR_APPEND(p,r->p.fn[cp]);
796       }
797 
798       STR_APPEND(p,"\"\r\n\r\n");
799       if (r->p.l2[cp]) {
800         STR_APPEND_RAWMEM(p,r->p.v2[cp],r->p.l2[cp]);
801       } else {
802         STR_APPEND(p,r->p.v2[cp]);
803       }
804 
805       STR_APPEND(p,"\r\n");
806 
807     }
808 
809     STR_APPEND(p,"--");
810     STR_APPEND(p,r->use_boundary);
811     STR_APPEND(p,"--\r\n");
812 
813   } else if (!r->non_param) {
814 
815     for (;cp<r->p.c;cp++) {
816 
817       if (p.l) STR_APPEND_CHAR(p,'&');
818 
819       i = 0;
820       while ((c=r->p.v1[cp][i])) {
821         if (NEEDS_URLENC(c)) {
822           sprintf(tmp,"%%%02X",c);
823         } else {
824           tmp[0] = c;
825           tmp[1] = 0;
826         }
827         STR_APPEND(p,tmp);
828         i++;
829       }
830 
831       STR_APPEND_CHAR(p,'=');
832 
833       i = 0;
834       while ((c=r->p.v2[cp][i])) {
835         if (NEEDS_URLENC(c)) {
836           sprintf(tmp,"%%%02X",c);
837         } else {
838           tmp[0] = c;
839           tmp[1] = 0;
840         }
841         STR_APPEND(p,tmp);
842         i++;
843 
844       }
845 
846     }
847 
848     if (p.l) STR_APPEND(p,"\r\n");
849 
850   } else return; /* Leave payload intact. */
851 
852   /* Update POST string. */
853   if (p.l) {
854     r->payload = p.v;
855     r->payload_len = p.l;
856   }
857 
858   return;
859 
860 }
861 
862 
863 /* Detect and convert GWT RPC syntax where appropriate. This is specific to
864    Google Web Toolkit. */
maybe_gwt_rpc(_u8 * str)865 static _u8* maybe_gwt_rpc(_u8* str) {
866   struct dyn_str p = { 0, 0 };
867   _u8 *c = str, *n;
868   _u32 num = 0;
869 
870   _u32 l = strlen(str);
871   if (l < 3 || str[l-3] != 0xEF || str[l-2] != 0xBF || str[l-1] != 0xBF) return str;
872 
873   STR_APPEND(p,"GWT_RPC[");
874 
875   while ((n = strstr(c,"\xEF\xBF\xBF"))) {
876     *n = 0;
877 
878     if (num > 4) {
879       if (num != 5) STR_APPEND_CHAR(p,',');
880       STR_APPEND_CHAR(p,'\'');
881 
882       if (!strncmp(c,"[L",2)) c += 2;
883 
884       if (!strncmp(c,"com.google.",11) || !strncmp(c,"java.",5)) c = strrchr(c,'.') + 1;
885 
886       /* We *could* escape here, but it's probably not worth the effort. */
887 
888       STR_APPEND(p,c);
889       STR_APPEND_CHAR(p,'\'');
890     }
891 
892     num++;
893     *n = '\xEF';
894     c = n + 3;
895   }
896 
897   STR_APPEND_CHAR(p,']');
898 
899   return p.v;
900 
901 }
902 
903 
904 /* Convert multipart data to URLencoded string, to simplify reporting. */
stringify_payload(struct http_request * r)905 _u8* stringify_payload(struct http_request* r) {
906   struct dyn_str p = { 0, 0 };
907   _u32 cp, i, c;
908   _u8 tmp[32];
909 
910   if (!r->multipart) return maybe_gwt_rpc(r->payload);
911 
912   /* Reconstruct payload from multipart boundary... */
913 
914   for (cp=r->ppar_bound;cp<r->p.c;cp++) {
915 
916     if (p.l) STR_APPEND_CHAR(p,'&');
917 
918     i = 0;
919     while ((c=r->p.v1[cp][i])) {
920       if (NEEDS_URLENC(c)) {
921         sprintf(tmp,"%%%02X",c);
922       } else {
923         tmp[0] = c;
924         tmp[1] = 0;
925       }
926       STR_APPEND(p,tmp);
927       i++;
928     }
929 
930     STR_APPEND_CHAR(p,'=');
931 
932     /* When dealing with a file field, use field name, rather than
933        field data. */
934 
935     if (r->p.fn[cp][0]) {
936       STR_APPEND(p,"FILE[");
937 
938       i = 0;
939       while ((c=r->p.fn[cp][i])) {
940         if (NEEDS_URLENC(c)) {
941           sprintf(tmp,"%%%02X",c);
942         } else {
943           tmp[0] = c;
944           tmp[1] = 0;
945         }
946         STR_APPEND(p,tmp);
947         i++;
948       }
949 
950       STR_APPEND_CHAR(p,']');
951 
952     } else {
953 
954       i = 0;
955       while ((c=r->p.v2[cp][i])) {
956         if (NEEDS_URLENC(c)) {
957           sprintf(tmp,"%%%02X",c);
958         } else {
959           tmp[0] = c;
960           tmp[1] = 0;
961         }
962         STR_APPEND(p,tmp);
963         i++;
964       }
965 
966     }
967 
968   }
969 
970   return p.v;
971 
972 }
973 
974 
975 /* Do a naive date comparison for t-1 sec/min/hr scenarios. */
comp_dates(_u8 * exp,_u8 * dat)976 _u8 comp_dates(_u8* exp, _u8* dat) {
977   _s32 i = strlen(dat), dc = 0;
978 
979   if (i != strlen(exp)) return 1;
980 
981   while (--i >= 0) {
982     if (exp[i] != dat[i]) {
983       if (!isdigit(dat[i]) || exp[i] > dat[i] || ++dc > 1) return 1;
984     }
985   }
986 
987   return 0;
988 }
989 
990 
991 
992 /* Send HTTP request, collect and parse response, spot header-related problems. */
send_request(FILE * client,FILE * server,struct http_request * r,_u8 strip_state)993 struct http_response* send_request(FILE* client, FILE* server, struct http_request* r,
994                                    _u8 strip_state) {
995   struct http_response* ret;
996   _u8 *line, *x;
997   _s32 decl_clen = -1;
998   _u32 i;
999   _u8 port_spec[16] = { 0 };
1000   _u8 *exp_value = 0, *dat_value = 0;
1001 
1002   /* Send the request... unfortunately, we cannot specify :80 on all
1003      standard requests, as some URL rewriters that redirect to https
1004      will copy this over and cause problems. */
1005 
1006   if (!r->from_ssl) {
1007     if (r->port != 80) sprintf(port_spec,":%u",r->port);
1008   } else {
1009     if (r->port != 443) sprintf(port_spec,":%u",r->port);
1010   }
1011 
1012   if (use_proxy && !r->from_ssl)
1013     fprintf(server,
1014       "%s http://%s:%u/%s%s%s HTTP/1.0\r\n"
1015       "Connection: close\r\n"
1016       "Host: %s%s\r\n"
1017       "Accept-Encoding: identity\r\n"
1018       "X-Ratproxy-Loop: 1\r\n"
1019       "Content-Length: %u\r\n", r->method, r->host, r->port, r->path,
1020       r->query ? "?" : "", r->query ? r->query : (_u8*)"",
1021       r->host, port_spec, r->payload_len);
1022   else
1023     fprintf(server,
1024       "%s /%s%s%s HTTP/1.0\r\n"
1025       "Connection: close\r\n"
1026       "Host: %s%s\r\n"
1027       "Accept-Encoding: identity\r\n"
1028       "X-Ratproxy-Loop: 1\r\n"
1029       "Content-Length: %u\r\n", r->method, r->path,
1030       r->query ? "?" : "", r->query ? r->query : (_u8*)"",
1031       r->host, port_spec, r->payload_len);
1032 
1033   if (!strip_state)
1034     for (i=0;i<r->h.c;i++) {
1035 
1036       /* There are several types of headers we'd rather skip
1037          and override elsewhere. */
1038 
1039 #ifdef FORCE_NOCACHE
1040       if (!strncasecmp(r->h.v1[i],"If-",3)) continue;
1041 #endif /* FORCE_NOCACHE */
1042 
1043       if (!strcasecmp(r->h.v1[i],"Host")) continue;
1044       if (!strcasecmp(r->h.v1[i],"Range")) continue;
1045       if (!strcasecmp(r->h.v1[i],"Connection")) continue;
1046       if (!strcasecmp(r->h.v1[i],"Keep-Alive")) continue;
1047       if (!strncasecmp(r->h.v1[i],"Proxy-",6)) continue;
1048       if (!strcasecmp(r->h.v1[i],"Accept-Encoding")) continue;
1049       if (!strcasecmp(r->h.v1[i],"Content-Length")) continue;
1050 
1051       /* Override multipart boundary on requests after rewriting. */
1052 
1053       if (!strcasecmp(r->h.v1[i],"Content-Type") && r->use_boundary) {
1054         fprintf(server,"Content-Type: multipart/form-data; boundary=%s\r\n",r->use_boundary);
1055         continue;
1056       }
1057 
1058       fprintf(server,"%s: %s\r\n",r->h.v1[i],r->h.v2[i]);
1059 
1060     }
1061 
1062   fprintf(server,"\r\n");
1063 
1064   if (r->payload_len)
1065     fwrite(r->payload,r->payload_len,1,server);
1066 
1067   fflush(server);
1068 
1069   /* Ok, sending complete. */
1070 
1071   /* Process the response... */
1072 
1073   ret = calloc(1,sizeof(struct http_response));
1074   if (!ret) fatal("out of memory");
1075 
1076   ret->ext = r->ext;
1077 
1078   line = grab_line(server);
1079 
1080   if (!line || !line[0]) http_error(client,"Malformed HTTP response",0);
1081 
1082   x = strchr(line,' ');
1083   if (!x || x == line) http_error(client,"HTTP response code missing",0);
1084   *(x++) = 0;
1085 
1086   ret->code = atoi(x);
1087   if (ret->code < 100 || ret->code > 999)
1088     http_error(client,"Invalid HTTP response code",0);
1089 
1090   while (1) {
1091 
1092     line = grab_line(server);
1093     if (!line) http_error(client,"Premature end of server headers",0);
1094 
1095     if (!line[0]) break;
1096 
1097     x = strchr(line,':');
1098     if (!x) http_error(client,"Invalid response header",0);
1099     *x = 0;
1100     while (isspace(*(++x)));
1101 
1102     for (i=0;i<ret->h.c;i++)
1103       if (!strcasecmp(line,ret->h.v1[i]) && strcmp(x,ret->h.v2[i]) &&
1104            strncasecmp(line,"Set-Cookie",10) && strncasecmp(line,"X-Cache",7) &&
1105            strncasecmp(line,"Server",7))
1106         ret->has_multiple = 1;
1107 
1108     /* Again, some headers need to be analyzed in more detail or skipped. */
1109 
1110     /* Caching headers checks... */
1111     if (!strcasecmp(line,"Expires")) {
1112       exp_value = strdup(x);
1113       if (!exp_value) fatal("out of memory");
1114     }
1115 
1116     if (!strcasecmp(line,"Date")) {
1117       dat_value = strdup(x);
1118       if (!dat_value) fatal("out of memory");
1119     }
1120 
1121     /* Both "no-store" and "max-age=0" are generally discouraged, but in practice,
1122        should be sufficient, so let's be polite. */
1123 
1124     /* TODO: These checks should be probably more robust to detect typos
1125        such as missing whitespaces. */
1126 
1127     if (!strcasecmp(line,"Cache-Control")) {
1128       if (strstr(x,"no-cache") || strstr(x,"private") ||
1129           strstr(x,"max-age=0") || strstr(x,"no-store"))
1130         ret->cc11intent = INTENT_PRIV; else ret->cc11intent = INTENT_PUB;
1131     }
1132 
1133     if (!strcasecmp(line,"Pragma")) {
1134       if (strstr(x,"no-cache")) ret->pr10intent = INTENT_PRIV;
1135         else ret->pr10intent = INTENT_PUB;
1136     }
1137 
1138     if (!strcasecmp(line,"Connection")) continue;
1139     if (!strcasecmp(line,"Keep-Alive")) continue;
1140     if (!strcasecmp(line,"Content-Range")) continue;
1141 
1142     if (!strcasecmp(line,"Content-Type")) {
1143       _u8 *copy = strdup(x), *y;
1144       if (!copy) fatal("out of memory");
1145 
1146       if ((y = strrchr(copy,';'))) {
1147         *(y++) = 0;
1148         while (isspace(*y)) y++;
1149         if (!strncasecmp(y,"charset=",8)) {
1150           y += 8;
1151           if (*y == '"' && y[strlen(y)-1] == '"') {
1152             y[strlen(y)-1]=0;
1153             y++;
1154           }
1155           ret->charset = y;
1156         }
1157       }
1158 
1159       ret->mime_type  = copy;
1160 
1161     }
1162 
1163     if (!strcasecmp(line,"Content-Disposition")) {
1164       _u8* y;
1165 
1166       ret->is_attach = (strncasecmp(x,"attachment;",11) == 0) ||
1167                        (strcasecmp(x,"attachment") == 0);
1168 
1169       /* If filename is specified, try to grab it (it supersedes
1170          any URL-derived ones). */
1171 
1172       y=strrchr(x,'.');
1173       if (y && y[1] && y[1] != '"') {
1174         ret->ext = strdup(y + 1);
1175         if (!ret->ext) fatal("out of memory");
1176         y = strchr(y + 1,'"');
1177         if (y) *y=0;
1178       }
1179 
1180     }
1181 
1182     if (!strcasecmp(line,"Location")) {
1183       ret->location = strdup(x);
1184       if (!ret->location) fatal("out of memory");
1185     }
1186 
1187     if (!strcasecmp(line,"Set-Cookie")) parse_cookies(x,&ret->cookies);
1188 
1189     if (!strcasecmp(line,"Content-Length")) {
1190 
1191       decl_clen = atoi(x);
1192       if (decl_clen < 0)
1193         http_error(client,"Bogus content length returned by server.",0);
1194 
1195       continue;
1196     }
1197 
1198     DYN_ADD2(ret->h,line,x);
1199 
1200   }
1201 
1202   /* Some final "Expires" parsing for caching headers checks... */
1203 
1204   if (exp_value) {
1205 
1206     _u8* year = 0, *z = strchr(exp_value,',');
1207 
1208     ret->ex10intent = INTENT_PUB;
1209 
1210     /* Try to extract the year, at least roughly... */
1211 
1212     if (!isalnum(exp_value[0])) {
1213 
1214       /* "Expires: -1" is a nasty trick, but it works. */
1215       ret->ex10intent = INTENT_PRIV;
1216 
1217     } else if (dat_value && (!strcmp(exp_value,dat_value) || !comp_dates(exp_value,dat_value))) {
1218 
1219       /* Date == Expires is an alternative and valid method. */
1220       ret->ex10intent = INTENT_PRIV;
1221 
1222     } else {
1223 
1224       if (z && z == exp_value + 3 && strlen(exp_value) > 11) {
1225 
1226         /* Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123 */
1227         year = exp_value + 11;
1228         if (*year == ' ') year++;
1229 
1230       } else if (z) {
1231 
1232         /* Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 */
1233         year = strchr(z,'-');
1234         if (year) year = strchr(year + 1,'-');
1235         if (year) year++;
1236 
1237       } else if (strlen(x) > 19) {
1238 
1239         /* Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format */
1240         year = exp_value + 19;
1241         if (*year == ' ') year++;
1242 
1243       }
1244 
1245       if (year) {
1246         _u32 yval = atoi(year);
1247 
1248         if (yval < 1000) {
1249           yval += 1900;			  /* 94 -> 1994, 104 -> 2004 */
1250           if (yval < 1970) yval += 100;   /* 03 -> 2003, 93 -> 1993 */
1251         }
1252 
1253         if (yval < 2008) ret->ex10intent = INTENT_PRIV;
1254 
1255       }
1256 
1257     }
1258 
1259   }
1260 
1261   /* Headers read. Grab the actual payload, regardless of content
1262      length (but note a discrepancy, if present).  */
1263 
1264   while (1) {
1265     _u8 buf[1024];
1266     _s32 i;
1267 
1268     if ((i = fread(buf,1,1024,server)) <= 0) break;
1269 
1270     ret->payload = realloc(ret->payload, ret->payload_len + i + 1);
1271     if (!ret->payload) fatal("out of memory");
1272 
1273     memcpy(ret->payload + ret->payload_len, buf, i);
1274     ret->payload_len += i;
1275 
1276     if (ret->payload_len > MAXPAYLOAD)
1277       http_error(client,"Response size limit exceeded",0);
1278 
1279   }
1280 
1281   if (ret->payload_len)
1282     ret->payload[ret->payload_len] = 0;
1283 
1284   /* Let payload_len < decl_clen slip through - transmission errors happen. */
1285 
1286   if (decl_clen >= 0 && ret->payload_len > decl_clen)
1287     ret->has_badclen = 1;
1288 
1289   fflush(server);
1290   fclose(server);
1291 
1292   return ret;
1293 
1294 }
1295 
1296 
1297 /* Just send data back to client. */
send_response(FILE * client,struct http_response * r)1298 void send_response(FILE* client, struct http_response* r) {
1299   _u32 i;
1300 
1301   setvbuf(client, cli_buf, _IOFBF, sizeof(cli_buf));
1302 
1303   fprintf(client,
1304     "HTTP/1.0 %u Proxied response\r\n"
1305     "Connection: close\r\n"
1306 #ifdef FORCE_NOCACHE
1307     "Pragma: no-cache\r\n"
1308     "Expires: Fri, 01 Jan 1990 00:00:00 GMT\r\n"
1309     "Cache-Control: no-cache, must-revalidate\r\n"
1310 #endif /* FORCE_NOCACHE */
1311     "Content-Length: %u\r\n", r->code, r->payload_len);
1312 
1313   for (i=0;i<r->h.c;i++) {
1314 
1315 #ifdef FORCE_NOCACHE
1316     if (!strcasecmp(r->h1[i],"Expires")) continue;
1317     if (!strcasecmp(r->h1[i],"Last-Modified")) continue;
1318     if (!strcasecmp(r->h1[i],"Cache-Control")) continue;
1319     if (!strcasecmp(r->h1[i],"Pragma")) continue;
1320 #endif /* FORCE_NOCACHE */
1321 
1322     fprintf(client,"%s: %s\r\n",r->h.v1[i],r->h.v2[i]);
1323   }
1324 
1325   fprintf(client,"\r\n");
1326 
1327   if (r->payload_len)
1328     fwrite(r->payload,r->payload_len,1,client);
1329 
1330   fflush(client);
1331   fclose(client);
1332 
1333 }
1334 
1335 
1336 
1337 /* Calculate a checksum for response payload */
checksum_response(struct http_response * r)1338 void checksum_response(struct http_response* r) {
1339   MD5_CTX ctx;
1340   _u8  res[16];
1341 
1342   if (use_len) {
1343     r->cksum = r->payload_len;
1344     return;
1345   }
1346 
1347   if (!r->payload_len) return;
1348 
1349   MD5_Init(&ctx);
1350   MD5_Update(&ctx, r->payload, r->payload_len);
1351   MD5_Final((char*)res, &ctx);
1352 
1353   r->cksum = *(_u64*)res;
1354 
1355 }
1356 
1357 
1358 /* Attempt charset sniffing inside the payload; currently, supports HTML http-equiv only;
1359    kinda fuzzy, but should be good enough. */
1360 
1361 /* TODO: Make this a bit more robust; reversed http-equiv / content order is
1362    not detected, for example. */
1363 
detect_charset(struct http_response * r)1364 void detect_charset(struct http_response* r) {
1365   _u8  sniffed[33];
1366   _u32 i, max;
1367   _u8 got_equiv = 0;
1368 
1369   if (r->payload_len > CHARSNIFF) max = CHARSNIFF; else max = r->payload_len;
1370 
1371   for (i=0;i<max;i++) {
1372 
1373     if (r->payload[i] < 0x20 && !isspace(r->payload[i])) break;
1374 
1375     if (!strncasecmp(r->payload+i,"http-equiv",10)) got_equiv = 1;
1376 
1377     if (r->payload[i] == '>') got_equiv = 0;
1378 
1379     if (got_equiv && !strncasecmp(r->payload+i,"charset=",8)) {
1380       _u32 p = 0;
1381       _u8* cp = r->payload + i + 8;
1382       while (p < 32 && (isalnum(*cp) || *cp == '-' || *cp == '_')) sniffed[p++] = *(cp++);
1383       sniffed[p] = 0;
1384       break;
1385     }
1386 
1387   }
1388 
1389   if (i != max) {
1390     if (r->charset && strcasecmp(sniffed,r->charset)) r->has_multiple = 1;
1391     r->charset = strdup(sniffed);
1392     if (!r->charset) fatal("out of memory");
1393   }
1394 
1395   if (!r->charset) return;
1396 
1397   i = 0;
1398   while (valid_charsets[i]) {
1399     if (!strcasecmp(r->charset,valid_charsets[i])) return;
1400     i++;
1401   }
1402 
1403   /* But note that utf8, iso_8859_2, etc, are not recognized and lead to XSS... */
1404   r->bad_cset = 1;
1405 
1406   if (!r->charset[0]) r->charset = 0;
1407 
1408 }
1409 
1410 
1411 #define TOHEX(c) ("0123456789abcdef"[c])
1412 
1413 /* Sanitize output; make sure it's easily reversible, too. */
S(_u8 * string,_u8 nl)1414 _u8* S(_u8* string, _u8 nl) {
1415   _u8* ret = malloc(MAXTOKEN + 10 /* &#x00;...\0 */), *wp = ret;
1416   if (!ret) fatal("out of memory");
1417 
1418   while (*string) {
1419     switch (tolower(*string)) {
1420 
1421       /* Well, we kind-of want to maintain readaibility of text output, so let's
1422          pay the price and let '&' through. */
1423 
1424       case '&':
1425 
1426       /* Quote literally */
1427       case 'a' ... 'z':
1428       case '0' ... '9':
1429       case ' ':  case '+':  case '!':  case '@':  case '#':  case '$':
1430       case '%':  case '^':  case '*':  case '(':  case ')':  case '-':
1431       case '_':  case '=':  case '{':  case '[':  case '}':  case ']':
1432       case ':':  case ';':  case ',':  case '.':  case '?':  case '/':
1433       case '~':  case '`':  case '\\':
1434         *(wp++) = *string;
1435         break;
1436 
1437       /* These can be harmful or confusing, so replace with HTML entities */
1438       case '"':
1439       case '\'':
1440       case '<':
1441       case '>':
1442       case '|':
1443       case 127 ... 255:
1444 
1445 entitify:
1446 
1447         *(wp++) = '&';
1448         *(wp++) = '#';
1449         *(wp++) = 'x';
1450         *(wp++) = TOHEX(*string / 16);
1451         *(wp++) = TOHEX(*string % 16);
1452         *(wp++) = ';';
1453         break;
1454 
1455       /* Replace with shorthand codes */
1456       case '\r':
1457         if (nl) {
1458           *(wp++) = *string;
1459         } else {
1460           *(wp++) = '\\';
1461           *(wp++) = 'r';
1462         }
1463         break;
1464 
1465       case '\n':
1466         if (nl) {
1467           *(wp++) = *string;
1468         } else {
1469           *(wp++) = '\\';
1470           *(wp++) = 'n';
1471         }
1472         break;
1473 
1474       case '\t':
1475         if (nl) {
1476           *(wp++) = *string;
1477         } else {
1478           *(wp++) = '\\';
1479           *(wp++) = 't';
1480         }
1481         break;
1482 
1483       /* Replace with hex tokens */
1484       default:
1485         if (nl) goto entitify;
1486         *(wp++) = '\\';
1487         *(wp++) = 'x';
1488         *(wp++) = TOHEX(*string / 16);
1489         *(wp++) = TOHEX(*string % 16);
1490 
1491     }
1492 
1493     if (wp - ret >= MAXTOKEN) {
1494       *(wp++) = '.';
1495       *(wp++) = '.';
1496       *(wp++) = '.';
1497       break;
1498     }
1499 
1500     string++;
1501 
1502   }
1503 
1504   *(wp++) = 0;
1505   return ret;
1506 
1507 }
1508 
1509 
1510