1 /*
2 ratproxy - HTTP request handling
3 --------------------------------
4
5 The following routines take care of HTTP request handling, parsing,
6 and error reporting.
7
8 Note that this code is one-shot, process is terminated when request
9 handling is done - and as such, we rely on the OS to do garbage
10 collection.
11
12 Author: Michal Zalewski <lcamtuf@google.com>
13
14 Copyright 2007, 2008 by Google Inc. All Rights Reserved.
15
16 Licensed under the Apache License, Version 2.0 (the "License");
17 you may not use this file except in compliance with the License.
18 You may obtain a copy of the License at
19
20 http://www.apache.org/licenses/LICENSE-2.0
21
22 Unless required by applicable law or agreed to in writing, software
23 distributed under the License is distributed on an "AS IS" BASIS,
24 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 See the License for the specific language governing permissions and
26 limitations under the License.
27
28 */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <unistd.h>
33 #include <sys/socket.h>
34 #include <netinet/in.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <sys/wait.h>
40 #include <ctype.h>
41 #include <netdb.h>
42 #include <openssl/md5.h>
43 #include <time.h>
44
45 #include "config.h"
46 #include "types.h"
47 #include "debug.h"
48 #include "nlist.h"
49 #include "http.h"
50 #include "ssl.h"
51 #include "string-inl.h"
52
53 extern _u8* use_proxy; /* Runtime setting exports from ratproxy. */
54 extern _u32 proxy_port;
55 extern _u8 use_len;
56
57 static _u8 srv_buf[MAXLINE], /* libc IO buffers */
58 cli_buf[MAXLINE];
59
60
61 /* Read a single line of HTTP headers, strip whitespaces */
grab_line(FILE * where)62 static _u8* grab_line(FILE* where) {
63 static _u8 inbuf[MAXLINE];
64 _u32 l;
65
66 if (!fgets(inbuf,MAXLINE,where)) return 0;
67
68 l = strlen(inbuf);
69
70 /* Excessive line length is bad, let's bail out. */
71 if (l == MAXLINE-1) return 0;
72
73 while (l && isspace(inbuf[l-1])) inbuf[--l] = 0;
74
75 return inbuf;
76 }
77
78
79 /* Return a generic HTTP error message, end current process.
80 Note that this function should not handle user-controlled data. */
http_error(FILE * client,_u8 * message,_u8 sink)81 static void http_error(FILE* client, _u8* message,_u8 sink) {
82
83 if (client) {
84 _u8* l;
85
86 if (sink) while ((l=grab_line(client)) && l[0]);
87
88 fprintf(client,
89 "HTTP/1.0 500 %s\n"
90 "Content-type: text/html\n\n"
91
92 "<font face=\"Bitstream Vera Sans Mono,Andale Mono,Lucida Console\">\n"
93 "The proxy is unable to process your request.\n"
94 "<h1><font color=red><b>%s.</b></font></h1>\n", message, message);
95
96 fflush(client);
97 fclose(client);
98
99 }
100
101 debug("[!] WARNING: %s.\n", message);
102 exit(0);
103
104 }
105
106
107 static _u8* BASE16 = "0123456789ABCDEF";
108
109 /* Decode URL-encoded parameter string */
parse_urlencoded(struct naive_list_p * p,_u8 * string)110 void parse_urlencoded(struct naive_list_p* p, _u8* string) {
111 _u8 val_now = 0;
112 _u8 name[MAXLINE+1], val[MAXLINE+1];
113 _u32 nlen = 0, vlen = 0;
114
115 name[0] = 0;
116 val[0] = 0;
117
118 do {
119
120 _u8 dec = 0;
121
122 switch (*string) {
123 case '+':
124 dec = ' ';
125 break;
126
127 case '=':
128 val_now = 1;
129 break;
130
131 case '%': {
132 _u8 *a, *b;
133
134 /* Parse %nn code, if valid; default to '?nn' if not, replace with ? if \0. */
135
136 if (!string[1] || !string[2] || !(a=strchr(BASE16,toupper(string[1]))) ||
137 !(b=strchr(BASE16,toupper(string[2])))) { dec = '?'; break; }
138
139 dec = (a-BASE16) * 16 + (b-BASE16);
140 string += 2;
141 if (!dec) dec = '?';
142
143 break;
144
145 }
146
147 case '&':
148 case 0:
149
150 /* Handle parameter terminator; note that we also iterate over \0
151 because of loop condition placement. */
152
153 if (nlen) {
154 name[nlen] = 0;
155 val[vlen] = 0;
156 DYN_ADDP(*p,name,val,"");
157 }
158
159 val_now = 0;
160 nlen = 0;
161 vlen = 0;
162 break;
163
164 default:
165 if (!(dec=*string)) dec = '?';
166
167 }
168
169 /* Append decoded char, if any, to field name or value as needed. */
170
171 if (dec) {
172 if (!val_now) { if (nlen < MAXLINE) name[nlen++] = dec; }
173 else { if (vlen < MAXLINE) val[vlen++] = dec; }
174 }
175
176 } while (*(string++));
177
178 }
179
180
181 /* Read a line of multipart data from a linear buffer, advance buffer pointer. */
get_multipart_line(_u8 ** buf)182 static _u8* get_multipart_line(_u8** buf) {
183 static _u8* retbuf;
184 _u8* x;
185 _u32 cnt;
186
187 if (retbuf) free(retbuf);
188
189 /* We assume \r\n formatting here, which is RFC-mandated and implemtned
190 by well-behaved browsers. */
191
192 x = strchr(*buf,'\r');
193
194 if (!x || x[1] != '\n') {
195 _u32 l = strlen(*buf);
196 retbuf = malloc(l + 1);
197 if (!retbuf) fatal("out of memory");
198 strcpy(retbuf,*buf);
199 *buf += l;
200 return retbuf;
201 }
202
203 cnt = x - *buf;
204
205 retbuf = malloc(cnt + 1);
206 if (!retbuf) fatal("out of memory");
207 memcpy(retbuf,*buf,cnt);
208 retbuf[cnt] = 0;
209
210 *buf += cnt + 2;
211
212 return retbuf;
213
214 }
215
216
217 /* Collect multipart data from a reasonably well-behaved browser. This routine
218 makes multiple assumptions that might be not true for maliciously formatted
219 data, but we do not strive to serve such requests well. */
parse_multipart(struct naive_list_p * p,_u8 * string,_u32 slen)220 void parse_multipart(struct naive_list_p* p, _u8* string, _u32 slen) {
221 _u8* field, *fname;
222 _u8* endptr = string + slen;
223
224 do {
225
226 _u8 *l, *end, *c;
227
228 field = 0;
229 fname = 0;
230
231 /* Skip boundary */
232 l = get_multipart_line(&string);
233 if (l[0] != '-' || l[1] != '-') return;
234
235 /* Sink headers, but grab field name if any */
236 while ((l = get_multipart_line(&string)) && l[0]) {
237 if (!strncasecmp(l,"Content-Disposition:",20)) {
238
239 /* Grab field name. */
240 _u8* f = rp_strcasestr(l,"; name=\"");
241 if (!f) continue;
242 f += 7;
243 c = strchr(++f,'"');
244 if (!c) continue;
245 *c = 0;
246
247 field = strdup(f);
248 if (!field) fatal("out of memory");
249
250 /* Grab file name, if any. */
251
252 f = rp_strcasestr(c + 1,"; filename=\"");
253 if (!f) continue;
254 f += 11;
255 c = strchr(++f,'"');
256 if (!c) continue;
257 *c = 0;
258 fname = strdup(f);
259 if (!fname) fatal("out of memory");
260
261 }
262
263 }
264
265 end = rp_memmem(string,endptr - string, "\r\n--", 4);
266 if (!end) return;
267
268 if (field)
269 DYN_ADDP_RAWMEM(*p,field,string,end-string,fname ? fname : (_u8*)"");
270
271 string = end + 2;
272
273 } while (1);
274
275 }
276
277
278
279 #define BASE64 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ+/_-"
280
281 /* Looks for what could pass for a reasonably robust session token or XSRF protection. */
contains_token(_u8 * name,_u8 * value)282 _u8 contains_token(_u8* name, _u8* value) {
283 _u32 run16 = 0, run16_num = 0,
284 run64 = 0, run64_true = 0, run64_num = 0, run64_up = 0, run64_low = 0;
285 _u8* st = 0;
286 static _u32 tmin,tmax;
287 _u32 fno = 0;
288
289 if (!tmin) {
290 tmin = time(0);
291 tmax = tmin + (60 * 60 * 24 * 30); /* One month forward */
292 tmin -= (60 * 60 * 24 * 365 * 5); /* Five years back */
293 }
294
295 /* Known bad field names - return 0. */
296
297 fno = 0;
298
299 while (no_xsrf_fields[fno]) {
300 if (no_xsrf_fields[fno][0] == '=') {
301 if (!strcasecmp(name,no_xsrf_fields[fno] + 1)) return 0;
302 } else {
303 if (rp_strcasestr(name,no_xsrf_fields[fno])) return 0;
304 }
305 fno++;
306 }
307
308 /* Known safe field names - return 1. */
309
310 fno = 0;
311
312 while (xsrf_fields[fno]) {
313 if (xsrf_fields[fno][0] == '=') {
314 if (!strcasecmp(name,xsrf_fields[fno] + 1)) return 1;
315 } else {
316 if (rp_strcasestr(name,xsrf_fields[fno])) return 1;
317 }
318 fno++;
319 }
320
321 /* URLs are not anti-XSRF tokens, no matter how random they look. */
322
323 if (!strncmp(value,"http",4)) return 0;
324
325 /* Iterate over value data, compute base16 / base64 runs, collect
326 basic character disttributin data, rule out patterns such as unix
327 time, and make the call. */
328
329 do {
330
331 if (*value && strchr(BASE16,toupper(*value))) {
332
333 run16++;
334 if (isdigit(*value)) run16_num++;
335
336 } else {
337
338 if (run16 >= XSRF_B16_MIN && run16 <= XSRF_B16_MAX &&
339 run16_num >= XSRF_B16_NUM) {
340 _u8 tmp[5];
341 _u32 val;
342
343 strncpy(tmp,st,4);
344 tmp[4] = 0;
345 val = atoi(tmp);
346
347 if ((val < tmin / 1000000 || val > tmax / 1000000) &&
348 (st[0] != st[1] || st[0] != st[2])) return 1;
349 }
350
351 run16 = 0;
352 run16_num = 0;
353
354 }
355
356 if (*value && strchr(BASE64,toupper(*value))) {
357
358 if (!isalpha(*value)) run64_num++;
359 if (isupper(*value)) run64_up++;
360 if (islower(*value)) run64_low++;
361 if (!run16) run64_true = 1;
362 if (!run64) st = value;
363 run64++;
364
365 } else {
366
367 if (run64 >= XSRF_B64_MIN && run64 <= XSRF_B64_MAX &&
368 ((run64_num >= XSRF_B64_NUM && run64_up >= XSRF_B64_CASE &&
369 run64_low >= XSRF_B64_CASE) ||
370 (run64_num >= XSRF_B64_NUM2)) && run64_true)
371 if (st[0] != st[1] || st[0] != st[2]) return 1;
372 run64 = 0;
373 run64_num = 0;
374 run64_up = 0;
375 run64_low = 0;
376 run64_true = 0;
377 st = 0;
378
379 }
380
381 } while (*(value++));
382
383 return 0;
384
385 }
386
387
388 /* Try to parse cookie header values. */
parse_cookies(_u8 * str,struct naive_list2 * c)389 static void parse_cookies(_u8* str, struct naive_list2* c) {
390 _u8 name[128], val[128];
391
392 /* Iterate over cookies. We ignore cookies over 128 bytes for
393 name / value, and "special" values such as expiration date,
394 version, etc. */
395
396 while (str) {
397 while (isspace(*str)) str++;
398 if (sscanf(str,"%127[^;=]=%127[^;]",name,val) == 2) {
399 if (strcasecmp(name,"expires") && strcasecmp(name,"comment") &&
400 strcasecmp(name,"version") && strcasecmp(name,"max-age") &&
401 strcasecmp(name,"path") && strcasecmp(name,"domain") && name[0] != '$')
402 DYN_ADD2(*c,name,val);
403 }
404
405 str = strchr(str + 1 ,';');
406 if (str) str++;
407
408 }
409
410 }
411
412
413 /* Process the entire HTTP request, parse fields, and extract some preliminary signals. */
collect_request(FILE * client,_u8 * ssl_host,_u32 ssl_port)414 struct http_request* collect_request(FILE* client,_u8* ssl_host, _u32 ssl_port) {
415 struct http_request* ret;
416 _u8 *line, *x;
417 _u32 i;
418
419 /* Begin carefully - on CONNECT requests, we do not want to read more than
420 absolutely necessary. As soon as non-CONNECT is confirmed, we switch
421 to proper buffering. */
422
423 setvbuf(client, cli_buf, _IONBF, 0);
424
425 ret = calloc(1, sizeof(struct http_request));
426 if (!ret) fatal("out of memory");
427
428 line = grab_line(client);
429 if (!line || !line[0]) exit(0);
430
431 x = strchr(line,' ');
432 if (!x || x == line) http_error(client, "URL address missing or malformed request",1);
433 *(x++) = 0;
434
435 ret->method = strdup(line);
436 if (!ret->method) fatal("out of memory");
437
438 if (strcmp(line,"CONNECT")) {
439
440 /* Ok, safe to handle HTTP at full speed now. */
441
442 setvbuf(client, cli_buf, _IOFBF, sizeof(cli_buf));
443
444 if (!ssl_host) {
445
446 /* Unless coming from within CONNECT, we want a
447 properly specified protocol and so forth. */
448
449 if (x[0] == '/')
450 http_error(client, "Direct HTTP requests not allowed",1);
451
452 if (strncmp(x,"http://",7))
453 http_error(client, "Unsupported protocol",1);
454
455 x += 7;
456
457 }
458
459 } else {
460
461 /* We do not want CONNECT requests within CONNECT requests, really. */
462 if (ssl_host) http_error(client,"Evil CONNECT nesting",1);
463
464 ret->is_connect = 1;
465
466 }
467
468 ret->host = x;
469
470 x = strchr(ret->host,' ');
471 if (!x) http_error(client,"Missing HTTP protocol version",1);
472
473 if (strcmp(x," HTTP/1.0") && strcmp(x," HTTP/1.1"))
474 http_error(client,"unsupported HTTP protocol version",1);
475
476 /* Trim HTTP/1.x part now, we do not need it */
477
478 *x = 0;
479
480 if (!ret->is_connect) {
481
482 ret->path = strchr(ret->host,'/');
483 if (!ret->path) http_error(client,"Incomplete request URL",1);
484 *(ret->path++) = 0;
485
486 }
487
488 /* Try to find port, if any */
489
490 x = strchr(ret->host,':');
491
492 if (x) {
493
494 ret->port = atoi(x+1);
495
496 if (!ret->port || ret->port > 65535)
497 http_error(client,"Illegal port specification",1);
498
499 if (ret->port < 1024 && ret->port != 80 && ret->port != 443)
500 http_error(client,"Access to this port denied",1);
501
502 *x = 0;
503
504 } else {
505 if (ret->is_connect) ret->port = 443;
506 else ret->port = 80;
507 }
508
509 /* Populate HTTP envelope data with higher-level CONNECT
510 information if one present. */
511
512 if (ssl_host) {
513 ret->host = ssl_host;
514 ret->port = ssl_port;
515 ret->from_ssl = 1;
516 }
517
518 if (!ret->host[0])
519 http_error(client,"Host name is missing",1);
520
521 ret->host = strdup(ret->host);
522 if (!ret->host) fatal("out of memory");
523
524 /* Grab query data */
525
526 if (!ret->is_connect && (x = strchr(ret->path,'?'))) {
527
528 *(x++) = 0;
529 ret->query = strdup(x);
530 if (!ret->query) fatal("out of memory");
531
532 }
533
534 /* Grab path data */
535
536 if (!ret->is_connect) {
537
538 ret->path = strdup(ret->path);
539 if (!ret->path) fatal("out of memory");
540
541 x = strrchr(ret->path,'.');
542
543 if (x) ret->ext = x + 1;
544
545 }
546
547 /* Request target is now fully parsed. Let's collect headers, if any. */
548
549 while (1) {
550
551 line = grab_line(client);
552
553 if (!line) http_error(client,"Incomplete or malformed request headers",1);
554
555 /* Empty line == end of headers */
556 if (!line[0]) break;
557
558 x = strchr(line,':');
559 if (!x) http_error(client,"Invalid request header",1);
560 *x = 0;
561 while (isspace(*(++x)));
562
563 if (!strcasecmp(line,"Content-Length")) {
564
565 ret->payload_len = atoi(x);
566
567 if (ret->payload_len > MAXPAYLOAD)
568 http_error(client,"Payload size limit exceeded",1);
569
570 }
571
572 if (!strncasecmp(line,"Cookie",6))
573 parse_cookies(x,&ret->cookies);
574
575 if (!strcasecmp(line,"Referer")) {
576 _u8* rh;
577
578 ret->referer = strdup(x);
579 if (!ret->referer) fatal("out of memory");
580
581 /* Extract referer host to simplify other checks later on. */
582
583 if ((rh = strstr(x,"://"))) {
584 _u8* x;
585
586 rh = strdup(rh + 3);
587 if (!rh) fatal("out of memory");
588 if ((x = strchr(rh,'/'))) *x = 0;
589 if ((x = strchr(rh,':'))) *x = 0;
590
591 ret->ref_host = rh;
592
593 }
594
595 }
596
597 if (!strcasecmp(line,"X-Ratproxy-Loop"))
598 http_error(client,"Proxy loop detected",1);
599
600 /* These are specific to publicly documented anti-XSRF features of
601 Google Web Toolkit and Google Data APIs; this might be further
602 extended to accomodate other custom schemes in popular frameworks. */
603
604 if (!strcasecmp(line,"Authorization") && !strncasecmp(x,"GoogleLogin auth=",17)) {
605 ret->xsrf_safe = 1;
606 ret->authsub = 1;
607 }
608
609 if (!strcasecmp(line,"Content-Type")) {
610
611 if (rp_strcasestr(x,"text/x-gwt-rpc")) { ret->xsrf_safe = 1; ret->authsub = 1; }
612
613 if (rp_strcasestr(x,"multipart/form-data")) ret->multipart = 1;
614 else if (!rp_strcasestr(x,"application/x-www-form-urlencoded")) ret->non_param = 1;
615 }
616
617 DYN_ADD2(ret->h,line,x);
618
619 }
620
621 /* Get POST payload */
622
623 if (ret->payload_len) {
624
625 ret->payload = malloc(ret->payload_len + 1);
626 if (!ret->payload) fatal("out of memory");
627
628 if (fread(ret->payload,ret->payload_len,1,client) != 1)
629 http_error(client,"Premature end of payload data",0);
630
631 /* To make string matching safe. */
632 ret->payload[ret->payload_len] = 0;
633
634 }
635
636 /* Parse GET/POST parameters */
637
638 if (ret->query) parse_urlencoded(&ret->p, ret->query);
639
640 ret->ppar_bound = ret->p.c;
641
642 /* Do not parse payloads of arcane types. */
643
644 if (ret->payload && !ret->non_param) {
645 if (ret->multipart) parse_multipart(&ret->p, ret->payload, ret->payload_len);
646 else parse_urlencoded(&ret->p, ret->payload);
647 }
648
649 /* Locate XSRF tokens, if any */
650 /* Do not perform contains_token() checks on file fields. */
651
652 for (i=0;i<ret->p.c;i++)
653 if (!ret->p.fn[i][0] && contains_token(ret->p.v1[i],ret->p.v2[i]))
654 { ret->xsrf_safe = 1; break; }
655
656 return ret;
657
658 }
659
660
661 /* Connect to server */
open_server(FILE * client,_u8 * host,_u32 port)662 static FILE* open_server(FILE* client, _u8* host, _u32 port) {
663 FILE* ret;
664 struct sockaddr_in sin;
665 struct hostent* he;
666 _s32 ss;
667
668 if (!(he = gethostbyname(host)) || !(he->h_addr_list[0]))
669 http_error(client,"Unable to find target host",0);
670
671 ss = socket(PF_INET, SOCK_STREAM, 0);
672 if (ss < 0) pfatal("socket() failed");
673
674 sin.sin_family = PF_INET;
675 sin.sin_port = htons(port);
676
677 memcpy(&sin.sin_addr, he->h_addr_list[0], 4);
678
679 if (connect(ss,(struct sockaddr*)&sin,sizeof(struct sockaddr_in)))
680 http_error(client,"Connection to target failed",0);
681
682 ret = fdopen(ss,"w+");
683 if (!ret) fatal("fdopen() failed");
684
685 setvbuf(ret, srv_buf, _IOFBF, sizeof(srv_buf));
686
687 return ret;
688
689 }
690
691
692 /* Connect to server, take proxy CONNECT handling into account */
open_server_complete(FILE * client,struct http_request * r)693 FILE* open_server_complete(FILE* client, struct http_request* r) {
694 FILE* ret;
695 _u8* l;
696
697 if (use_proxy)
698 ret = open_server(client, use_proxy, proxy_port);
699 else
700 ret = open_server(client, r->host, r->port);
701
702 if (r->is_connect) {
703
704 if (use_proxy) {
705 fprintf(ret,"CONNECT %s:%u HTTP/1.0\r\n\r\n",r->host,r->port);
706 fflush(ret);
707
708 setvbuf(ret, srv_buf, _IONBF, 0);
709 /* Sink proxy response */
710 while ((l=grab_line(ret)) && l[0]);
711 }
712
713 if (client) {
714 fprintf(client,"HTTP/1.0 200 Go ahead, please.\r\n\r\n");
715 fflush(client);
716 }
717
718 }
719
720 return ret;
721
722 }
723
724
725 #define NEEDS_URLENC(x) \
726 (!(x) || !strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.",toupper(x)))
727
728
729 /* Rewrite GET and POST parameters as needed. */
reconstruct_request(struct http_request * r)730 void reconstruct_request(struct http_request* r) {
731 struct dyn_str p = { 0, 0 }, q = { 0, 0 };
732 _u32 cp = 0, i;
733 _u8 c;
734 _u8 tmp[32];
735
736 /* Encode params to query string, until ppar boundary is hit. */
737
738 for (;cp<r->p.c;cp++) {
739
740 if (cp == r->ppar_bound) break;
741
742 if (q.l) STR_APPEND_CHAR(q,'&');
743
744 i = 0;
745 while ((c=r->p.v1[cp][i])) {
746 if (NEEDS_URLENC(c)) {
747 sprintf(tmp,"%%%02X",c);
748 } else {
749 tmp[0] = c;
750 tmp[1] = 0;
751 }
752 STR_APPEND(q,tmp);
753 i++;
754 }
755
756 STR_APPEND_CHAR(q,'=');
757
758 i = 0;
759 while ((c=r->p.v2[cp][i])) {
760 if (NEEDS_URLENC(c)) {
761 sprintf(tmp,"%%%02X",c);
762 } else {
763 tmp[0] = c;
764 tmp[1] = 0;
765 }
766 STR_APPEND(q,tmp);
767 i++;
768 }
769
770 }
771
772 /* Update query string. */
773 if (q.l) r->query = q.v;
774
775
776 /* Deal with the rest of parameters, putting them in a multipart
777 envelope or as urlencoded payload, as needed. */
778
779 if (r->multipart) {
780
781 /* Update boundary; be just random enough to prevent accidents. */
782
783 sprintf(tmp,"ratproxybound%08x",rand());
784 r->use_boundary = strdup(tmp);
785 if (!r->use_boundary) fatal("out of memory");
786
787 for (;cp<r->p.c;cp++) {
788 STR_APPEND(p,"--");
789 STR_APPEND(p,r->use_boundary);
790 STR_APPEND(p,"\r\nContent-Disposition: form-data; name=\"");
791 STR_APPEND(p,r->p.v1[cp]);
792
793 if (r->p.fn[cp][0]) {
794 STR_APPEND(p,"\"; filename=\"");
795 STR_APPEND(p,r->p.fn[cp]);
796 }
797
798 STR_APPEND(p,"\"\r\n\r\n");
799 if (r->p.l2[cp]) {
800 STR_APPEND_RAWMEM(p,r->p.v2[cp],r->p.l2[cp]);
801 } else {
802 STR_APPEND(p,r->p.v2[cp]);
803 }
804
805 STR_APPEND(p,"\r\n");
806
807 }
808
809 STR_APPEND(p,"--");
810 STR_APPEND(p,r->use_boundary);
811 STR_APPEND(p,"--\r\n");
812
813 } else if (!r->non_param) {
814
815 for (;cp<r->p.c;cp++) {
816
817 if (p.l) STR_APPEND_CHAR(p,'&');
818
819 i = 0;
820 while ((c=r->p.v1[cp][i])) {
821 if (NEEDS_URLENC(c)) {
822 sprintf(tmp,"%%%02X",c);
823 } else {
824 tmp[0] = c;
825 tmp[1] = 0;
826 }
827 STR_APPEND(p,tmp);
828 i++;
829 }
830
831 STR_APPEND_CHAR(p,'=');
832
833 i = 0;
834 while ((c=r->p.v2[cp][i])) {
835 if (NEEDS_URLENC(c)) {
836 sprintf(tmp,"%%%02X",c);
837 } else {
838 tmp[0] = c;
839 tmp[1] = 0;
840 }
841 STR_APPEND(p,tmp);
842 i++;
843
844 }
845
846 }
847
848 if (p.l) STR_APPEND(p,"\r\n");
849
850 } else return; /* Leave payload intact. */
851
852 /* Update POST string. */
853 if (p.l) {
854 r->payload = p.v;
855 r->payload_len = p.l;
856 }
857
858 return;
859
860 }
861
862
863 /* Detect and convert GWT RPC syntax where appropriate. This is specific to
864 Google Web Toolkit. */
maybe_gwt_rpc(_u8 * str)865 static _u8* maybe_gwt_rpc(_u8* str) {
866 struct dyn_str p = { 0, 0 };
867 _u8 *c = str, *n;
868 _u32 num = 0;
869
870 _u32 l = strlen(str);
871 if (l < 3 || str[l-3] != 0xEF || str[l-2] != 0xBF || str[l-1] != 0xBF) return str;
872
873 STR_APPEND(p,"GWT_RPC[");
874
875 while ((n = strstr(c,"\xEF\xBF\xBF"))) {
876 *n = 0;
877
878 if (num > 4) {
879 if (num != 5) STR_APPEND_CHAR(p,',');
880 STR_APPEND_CHAR(p,'\'');
881
882 if (!strncmp(c,"[L",2)) c += 2;
883
884 if (!strncmp(c,"com.google.",11) || !strncmp(c,"java.",5)) c = strrchr(c,'.') + 1;
885
886 /* We *could* escape here, but it's probably not worth the effort. */
887
888 STR_APPEND(p,c);
889 STR_APPEND_CHAR(p,'\'');
890 }
891
892 num++;
893 *n = '\xEF';
894 c = n + 3;
895 }
896
897 STR_APPEND_CHAR(p,']');
898
899 return p.v;
900
901 }
902
903
904 /* Convert multipart data to URLencoded string, to simplify reporting. */
stringify_payload(struct http_request * r)905 _u8* stringify_payload(struct http_request* r) {
906 struct dyn_str p = { 0, 0 };
907 _u32 cp, i, c;
908 _u8 tmp[32];
909
910 if (!r->multipart) return maybe_gwt_rpc(r->payload);
911
912 /* Reconstruct payload from multipart boundary... */
913
914 for (cp=r->ppar_bound;cp<r->p.c;cp++) {
915
916 if (p.l) STR_APPEND_CHAR(p,'&');
917
918 i = 0;
919 while ((c=r->p.v1[cp][i])) {
920 if (NEEDS_URLENC(c)) {
921 sprintf(tmp,"%%%02X",c);
922 } else {
923 tmp[0] = c;
924 tmp[1] = 0;
925 }
926 STR_APPEND(p,tmp);
927 i++;
928 }
929
930 STR_APPEND_CHAR(p,'=');
931
932 /* When dealing with a file field, use field name, rather than
933 field data. */
934
935 if (r->p.fn[cp][0]) {
936 STR_APPEND(p,"FILE[");
937
938 i = 0;
939 while ((c=r->p.fn[cp][i])) {
940 if (NEEDS_URLENC(c)) {
941 sprintf(tmp,"%%%02X",c);
942 } else {
943 tmp[0] = c;
944 tmp[1] = 0;
945 }
946 STR_APPEND(p,tmp);
947 i++;
948 }
949
950 STR_APPEND_CHAR(p,']');
951
952 } else {
953
954 i = 0;
955 while ((c=r->p.v2[cp][i])) {
956 if (NEEDS_URLENC(c)) {
957 sprintf(tmp,"%%%02X",c);
958 } else {
959 tmp[0] = c;
960 tmp[1] = 0;
961 }
962 STR_APPEND(p,tmp);
963 i++;
964 }
965
966 }
967
968 }
969
970 return p.v;
971
972 }
973
974
975 /* Do a naive date comparison for t-1 sec/min/hr scenarios. */
comp_dates(_u8 * exp,_u8 * dat)976 _u8 comp_dates(_u8* exp, _u8* dat) {
977 _s32 i = strlen(dat), dc = 0;
978
979 if (i != strlen(exp)) return 1;
980
981 while (--i >= 0) {
982 if (exp[i] != dat[i]) {
983 if (!isdigit(dat[i]) || exp[i] > dat[i] || ++dc > 1) return 1;
984 }
985 }
986
987 return 0;
988 }
989
990
991
992 /* Send HTTP request, collect and parse response, spot header-related problems. */
send_request(FILE * client,FILE * server,struct http_request * r,_u8 strip_state)993 struct http_response* send_request(FILE* client, FILE* server, struct http_request* r,
994 _u8 strip_state) {
995 struct http_response* ret;
996 _u8 *line, *x;
997 _s32 decl_clen = -1;
998 _u32 i;
999 _u8 port_spec[16] = { 0 };
1000 _u8 *exp_value = 0, *dat_value = 0;
1001
1002 /* Send the request... unfortunately, we cannot specify :80 on all
1003 standard requests, as some URL rewriters that redirect to https
1004 will copy this over and cause problems. */
1005
1006 if (!r->from_ssl) {
1007 if (r->port != 80) sprintf(port_spec,":%u",r->port);
1008 } else {
1009 if (r->port != 443) sprintf(port_spec,":%u",r->port);
1010 }
1011
1012 if (use_proxy && !r->from_ssl)
1013 fprintf(server,
1014 "%s http://%s:%u/%s%s%s HTTP/1.0\r\n"
1015 "Connection: close\r\n"
1016 "Host: %s%s\r\n"
1017 "Accept-Encoding: identity\r\n"
1018 "X-Ratproxy-Loop: 1\r\n"
1019 "Content-Length: %u\r\n", r->method, r->host, r->port, r->path,
1020 r->query ? "?" : "", r->query ? r->query : (_u8*)"",
1021 r->host, port_spec, r->payload_len);
1022 else
1023 fprintf(server,
1024 "%s /%s%s%s HTTP/1.0\r\n"
1025 "Connection: close\r\n"
1026 "Host: %s%s\r\n"
1027 "Accept-Encoding: identity\r\n"
1028 "X-Ratproxy-Loop: 1\r\n"
1029 "Content-Length: %u\r\n", r->method, r->path,
1030 r->query ? "?" : "", r->query ? r->query : (_u8*)"",
1031 r->host, port_spec, r->payload_len);
1032
1033 if (!strip_state)
1034 for (i=0;i<r->h.c;i++) {
1035
1036 /* There are several types of headers we'd rather skip
1037 and override elsewhere. */
1038
1039 #ifdef FORCE_NOCACHE
1040 if (!strncasecmp(r->h.v1[i],"If-",3)) continue;
1041 #endif /* FORCE_NOCACHE */
1042
1043 if (!strcasecmp(r->h.v1[i],"Host")) continue;
1044 if (!strcasecmp(r->h.v1[i],"Range")) continue;
1045 if (!strcasecmp(r->h.v1[i],"Connection")) continue;
1046 if (!strcasecmp(r->h.v1[i],"Keep-Alive")) continue;
1047 if (!strncasecmp(r->h.v1[i],"Proxy-",6)) continue;
1048 if (!strcasecmp(r->h.v1[i],"Accept-Encoding")) continue;
1049 if (!strcasecmp(r->h.v1[i],"Content-Length")) continue;
1050
1051 /* Override multipart boundary on requests after rewriting. */
1052
1053 if (!strcasecmp(r->h.v1[i],"Content-Type") && r->use_boundary) {
1054 fprintf(server,"Content-Type: multipart/form-data; boundary=%s\r\n",r->use_boundary);
1055 continue;
1056 }
1057
1058 fprintf(server,"%s: %s\r\n",r->h.v1[i],r->h.v2[i]);
1059
1060 }
1061
1062 fprintf(server,"\r\n");
1063
1064 if (r->payload_len)
1065 fwrite(r->payload,r->payload_len,1,server);
1066
1067 fflush(server);
1068
1069 /* Ok, sending complete. */
1070
1071 /* Process the response... */
1072
1073 ret = calloc(1,sizeof(struct http_response));
1074 if (!ret) fatal("out of memory");
1075
1076 ret->ext = r->ext;
1077
1078 line = grab_line(server);
1079
1080 if (!line || !line[0]) http_error(client,"Malformed HTTP response",0);
1081
1082 x = strchr(line,' ');
1083 if (!x || x == line) http_error(client,"HTTP response code missing",0);
1084 *(x++) = 0;
1085
1086 ret->code = atoi(x);
1087 if (ret->code < 100 || ret->code > 999)
1088 http_error(client,"Invalid HTTP response code",0);
1089
1090 while (1) {
1091
1092 line = grab_line(server);
1093 if (!line) http_error(client,"Premature end of server headers",0);
1094
1095 if (!line[0]) break;
1096
1097 x = strchr(line,':');
1098 if (!x) http_error(client,"Invalid response header",0);
1099 *x = 0;
1100 while (isspace(*(++x)));
1101
1102 for (i=0;i<ret->h.c;i++)
1103 if (!strcasecmp(line,ret->h.v1[i]) && strcmp(x,ret->h.v2[i]) &&
1104 strncasecmp(line,"Set-Cookie",10) && strncasecmp(line,"X-Cache",7) &&
1105 strncasecmp(line,"Server",7))
1106 ret->has_multiple = 1;
1107
1108 /* Again, some headers need to be analyzed in more detail or skipped. */
1109
1110 /* Caching headers checks... */
1111 if (!strcasecmp(line,"Expires")) {
1112 exp_value = strdup(x);
1113 if (!exp_value) fatal("out of memory");
1114 }
1115
1116 if (!strcasecmp(line,"Date")) {
1117 dat_value = strdup(x);
1118 if (!dat_value) fatal("out of memory");
1119 }
1120
1121 /* Both "no-store" and "max-age=0" are generally discouraged, but in practice,
1122 should be sufficient, so let's be polite. */
1123
1124 /* TODO: These checks should be probably more robust to detect typos
1125 such as missing whitespaces. */
1126
1127 if (!strcasecmp(line,"Cache-Control")) {
1128 if (strstr(x,"no-cache") || strstr(x,"private") ||
1129 strstr(x,"max-age=0") || strstr(x,"no-store"))
1130 ret->cc11intent = INTENT_PRIV; else ret->cc11intent = INTENT_PUB;
1131 }
1132
1133 if (!strcasecmp(line,"Pragma")) {
1134 if (strstr(x,"no-cache")) ret->pr10intent = INTENT_PRIV;
1135 else ret->pr10intent = INTENT_PUB;
1136 }
1137
1138 if (!strcasecmp(line,"Connection")) continue;
1139 if (!strcasecmp(line,"Keep-Alive")) continue;
1140 if (!strcasecmp(line,"Content-Range")) continue;
1141
1142 if (!strcasecmp(line,"Content-Type")) {
1143 _u8 *copy = strdup(x), *y;
1144 if (!copy) fatal("out of memory");
1145
1146 if ((y = strrchr(copy,';'))) {
1147 *(y++) = 0;
1148 while (isspace(*y)) y++;
1149 if (!strncasecmp(y,"charset=",8)) {
1150 y += 8;
1151 if (*y == '"' && y[strlen(y)-1] == '"') {
1152 y[strlen(y)-1]=0;
1153 y++;
1154 }
1155 ret->charset = y;
1156 }
1157 }
1158
1159 ret->mime_type = copy;
1160
1161 }
1162
1163 if (!strcasecmp(line,"Content-Disposition")) {
1164 _u8* y;
1165
1166 ret->is_attach = (strncasecmp(x,"attachment;",11) == 0) ||
1167 (strcasecmp(x,"attachment") == 0);
1168
1169 /* If filename is specified, try to grab it (it supersedes
1170 any URL-derived ones). */
1171
1172 y=strrchr(x,'.');
1173 if (y && y[1] && y[1] != '"') {
1174 ret->ext = strdup(y + 1);
1175 if (!ret->ext) fatal("out of memory");
1176 y = strchr(y + 1,'"');
1177 if (y) *y=0;
1178 }
1179
1180 }
1181
1182 if (!strcasecmp(line,"Location")) {
1183 ret->location = strdup(x);
1184 if (!ret->location) fatal("out of memory");
1185 }
1186
1187 if (!strcasecmp(line,"Set-Cookie")) parse_cookies(x,&ret->cookies);
1188
1189 if (!strcasecmp(line,"Content-Length")) {
1190
1191 decl_clen = atoi(x);
1192 if (decl_clen < 0)
1193 http_error(client,"Bogus content length returned by server.",0);
1194
1195 continue;
1196 }
1197
1198 DYN_ADD2(ret->h,line,x);
1199
1200 }
1201
1202 /* Some final "Expires" parsing for caching headers checks... */
1203
1204 if (exp_value) {
1205
1206 _u8* year = 0, *z = strchr(exp_value,',');
1207
1208 ret->ex10intent = INTENT_PUB;
1209
1210 /* Try to extract the year, at least roughly... */
1211
1212 if (!isalnum(exp_value[0])) {
1213
1214 /* "Expires: -1" is a nasty trick, but it works. */
1215 ret->ex10intent = INTENT_PRIV;
1216
1217 } else if (dat_value && (!strcmp(exp_value,dat_value) || !comp_dates(exp_value,dat_value))) {
1218
1219 /* Date == Expires is an alternative and valid method. */
1220 ret->ex10intent = INTENT_PRIV;
1221
1222 } else {
1223
1224 if (z && z == exp_value + 3 && strlen(exp_value) > 11) {
1225
1226 /* Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 */
1227 year = exp_value + 11;
1228 if (*year == ' ') year++;
1229
1230 } else if (z) {
1231
1232 /* Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036 */
1233 year = strchr(z,'-');
1234 if (year) year = strchr(year + 1,'-');
1235 if (year) year++;
1236
1237 } else if (strlen(x) > 19) {
1238
1239 /* Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format */
1240 year = exp_value + 19;
1241 if (*year == ' ') year++;
1242
1243 }
1244
1245 if (year) {
1246 _u32 yval = atoi(year);
1247
1248 if (yval < 1000) {
1249 yval += 1900; /* 94 -> 1994, 104 -> 2004 */
1250 if (yval < 1970) yval += 100; /* 03 -> 2003, 93 -> 1993 */
1251 }
1252
1253 if (yval < 2008) ret->ex10intent = INTENT_PRIV;
1254
1255 }
1256
1257 }
1258
1259 }
1260
1261 /* Headers read. Grab the actual payload, regardless of content
1262 length (but note a discrepancy, if present). */
1263
1264 while (1) {
1265 _u8 buf[1024];
1266 _s32 i;
1267
1268 if ((i = fread(buf,1,1024,server)) <= 0) break;
1269
1270 ret->payload = realloc(ret->payload, ret->payload_len + i + 1);
1271 if (!ret->payload) fatal("out of memory");
1272
1273 memcpy(ret->payload + ret->payload_len, buf, i);
1274 ret->payload_len += i;
1275
1276 if (ret->payload_len > MAXPAYLOAD)
1277 http_error(client,"Response size limit exceeded",0);
1278
1279 }
1280
1281 if (ret->payload_len)
1282 ret->payload[ret->payload_len] = 0;
1283
1284 /* Let payload_len < decl_clen slip through - transmission errors happen. */
1285
1286 if (decl_clen >= 0 && ret->payload_len > decl_clen)
1287 ret->has_badclen = 1;
1288
1289 fflush(server);
1290 fclose(server);
1291
1292 return ret;
1293
1294 }
1295
1296
1297 /* Just send data back to client. */
send_response(FILE * client,struct http_response * r)1298 void send_response(FILE* client, struct http_response* r) {
1299 _u32 i;
1300
1301 setvbuf(client, cli_buf, _IOFBF, sizeof(cli_buf));
1302
1303 fprintf(client,
1304 "HTTP/1.0 %u Proxied response\r\n"
1305 "Connection: close\r\n"
1306 #ifdef FORCE_NOCACHE
1307 "Pragma: no-cache\r\n"
1308 "Expires: Fri, 01 Jan 1990 00:00:00 GMT\r\n"
1309 "Cache-Control: no-cache, must-revalidate\r\n"
1310 #endif /* FORCE_NOCACHE */
1311 "Content-Length: %u\r\n", r->code, r->payload_len);
1312
1313 for (i=0;i<r->h.c;i++) {
1314
1315 #ifdef FORCE_NOCACHE
1316 if (!strcasecmp(r->h1[i],"Expires")) continue;
1317 if (!strcasecmp(r->h1[i],"Last-Modified")) continue;
1318 if (!strcasecmp(r->h1[i],"Cache-Control")) continue;
1319 if (!strcasecmp(r->h1[i],"Pragma")) continue;
1320 #endif /* FORCE_NOCACHE */
1321
1322 fprintf(client,"%s: %s\r\n",r->h.v1[i],r->h.v2[i]);
1323 }
1324
1325 fprintf(client,"\r\n");
1326
1327 if (r->payload_len)
1328 fwrite(r->payload,r->payload_len,1,client);
1329
1330 fflush(client);
1331 fclose(client);
1332
1333 }
1334
1335
1336
1337 /* Calculate a checksum for response payload */
checksum_response(struct http_response * r)1338 void checksum_response(struct http_response* r) {
1339 MD5_CTX ctx;
1340 _u8 res[16];
1341
1342 if (use_len) {
1343 r->cksum = r->payload_len;
1344 return;
1345 }
1346
1347 if (!r->payload_len) return;
1348
1349 MD5_Init(&ctx);
1350 MD5_Update(&ctx, r->payload, r->payload_len);
1351 MD5_Final((char*)res, &ctx);
1352
1353 r->cksum = *(_u64*)res;
1354
1355 }
1356
1357
1358 /* Attempt charset sniffing inside the payload; currently, supports HTML http-equiv only;
1359 kinda fuzzy, but should be good enough. */
1360
1361 /* TODO: Make this a bit more robust; reversed http-equiv / content order is
1362 not detected, for example. */
1363
detect_charset(struct http_response * r)1364 void detect_charset(struct http_response* r) {
1365 _u8 sniffed[33];
1366 _u32 i, max;
1367 _u8 got_equiv = 0;
1368
1369 if (r->payload_len > CHARSNIFF) max = CHARSNIFF; else max = r->payload_len;
1370
1371 for (i=0;i<max;i++) {
1372
1373 if (r->payload[i] < 0x20 && !isspace(r->payload[i])) break;
1374
1375 if (!strncasecmp(r->payload+i,"http-equiv",10)) got_equiv = 1;
1376
1377 if (r->payload[i] == '>') got_equiv = 0;
1378
1379 if (got_equiv && !strncasecmp(r->payload+i,"charset=",8)) {
1380 _u32 p = 0;
1381 _u8* cp = r->payload + i + 8;
1382 while (p < 32 && (isalnum(*cp) || *cp == '-' || *cp == '_')) sniffed[p++] = *(cp++);
1383 sniffed[p] = 0;
1384 break;
1385 }
1386
1387 }
1388
1389 if (i != max) {
1390 if (r->charset && strcasecmp(sniffed,r->charset)) r->has_multiple = 1;
1391 r->charset = strdup(sniffed);
1392 if (!r->charset) fatal("out of memory");
1393 }
1394
1395 if (!r->charset) return;
1396
1397 i = 0;
1398 while (valid_charsets[i]) {
1399 if (!strcasecmp(r->charset,valid_charsets[i])) return;
1400 i++;
1401 }
1402
1403 /* But note that utf8, iso_8859_2, etc, are not recognized and lead to XSS... */
1404 r->bad_cset = 1;
1405
1406 if (!r->charset[0]) r->charset = 0;
1407
1408 }
1409
1410
1411 #define TOHEX(c) ("0123456789abcdef"[c])
1412
1413 /* Sanitize output; make sure it's easily reversible, too. */
S(_u8 * string,_u8 nl)1414 _u8* S(_u8* string, _u8 nl) {
1415 _u8* ret = malloc(MAXTOKEN + 10 /* �...\0 */), *wp = ret;
1416 if (!ret) fatal("out of memory");
1417
1418 while (*string) {
1419 switch (tolower(*string)) {
1420
1421 /* Well, we kind-of want to maintain readaibility of text output, so let's
1422 pay the price and let '&' through. */
1423
1424 case '&':
1425
1426 /* Quote literally */
1427 case 'a' ... 'z':
1428 case '0' ... '9':
1429 case ' ': case '+': case '!': case '@': case '#': case '$':
1430 case '%': case '^': case '*': case '(': case ')': case '-':
1431 case '_': case '=': case '{': case '[': case '}': case ']':
1432 case ':': case ';': case ',': case '.': case '?': case '/':
1433 case '~': case '`': case '\\':
1434 *(wp++) = *string;
1435 break;
1436
1437 /* These can be harmful or confusing, so replace with HTML entities */
1438 case '"':
1439 case '\'':
1440 case '<':
1441 case '>':
1442 case '|':
1443 case 127 ... 255:
1444
1445 entitify:
1446
1447 *(wp++) = '&';
1448 *(wp++) = '#';
1449 *(wp++) = 'x';
1450 *(wp++) = TOHEX(*string / 16);
1451 *(wp++) = TOHEX(*string % 16);
1452 *(wp++) = ';';
1453 break;
1454
1455 /* Replace with shorthand codes */
1456 case '\r':
1457 if (nl) {
1458 *(wp++) = *string;
1459 } else {
1460 *(wp++) = '\\';
1461 *(wp++) = 'r';
1462 }
1463 break;
1464
1465 case '\n':
1466 if (nl) {
1467 *(wp++) = *string;
1468 } else {
1469 *(wp++) = '\\';
1470 *(wp++) = 'n';
1471 }
1472 break;
1473
1474 case '\t':
1475 if (nl) {
1476 *(wp++) = *string;
1477 } else {
1478 *(wp++) = '\\';
1479 *(wp++) = 't';
1480 }
1481 break;
1482
1483 /* Replace with hex tokens */
1484 default:
1485 if (nl) goto entitify;
1486 *(wp++) = '\\';
1487 *(wp++) = 'x';
1488 *(wp++) = TOHEX(*string / 16);
1489 *(wp++) = TOHEX(*string % 16);
1490
1491 }
1492
1493 if (wp - ret >= MAXTOKEN) {
1494 *(wp++) = '.';
1495 *(wp++) = '.';
1496 *(wp++) = '.';
1497 break;
1498 }
1499
1500 string++;
1501
1502 }
1503
1504 *(wp++) = 0;
1505 return ret;
1506
1507 }
1508
1509
1510