1 /*
2 * Copyright 2007 Niels Provos <provos@citi.umich.edu>
3 * All rights reserved.
4 */
5
6 #include <sys/types.h>
7
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11
12 #include <sys/time.h>
13 #include <sys/param.h>
14 #include <sys/queue.h>
15 #include <sys/tree.h>
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <err.h>
20 #include <unistd.h>
21 #include <assert.h>
22 #include <time.h>
23
24 #include <event.h>
25 #include <evhttp.h>
26
27 #include "spybye.gen.h"
28 #include "status.h"
29 #include "utils.h"
30 #include "log.h"
31 #include "proxy.h"
32
33 extern int debug;
34
35 struct stats statistics;
36
37 static struct pattern_obj good_patterns;
38 static struct pattern_obj bad_patterns;
39
40 static int status_patterns(struct pattern_obj *data, struct evbuffer *databuf);
41 static void site_print_analysis(struct evbuffer *databuf, struct site *site);
42 static void inform_cache_notfound(struct evhttp_request *request,
43 const char *url);
44
45 /* structure where we keep track of sites */
46
47 #ifndef MIN
48 #define MIN(x, y) ((x) < (y) ? (x) : (y))
49 #endif
50
51 int
site_compare(struct site * a,struct site * b)52 site_compare(struct site *a, struct site *b)
53 {
54 static char atmp[HTTP_MAX_URL], btmp[HTTP_MAX_URL];
55 char *a_url, *b_url;
56 char *a_slash, *b_slash;
57 int host_len, res;
58 assert(strlen(a->url) >= sizeof(HTTP_PREFIX));
59 assert(strlen(b->url) >= sizeof(HTTP_PREFIX));
60
61 a_url = a->url;
62 b_url = b->url;
63 a_slash = strchr(a_url + sizeof(HTTP_PREFIX), '/');
64 b_slash = strchr(b_url + sizeof(HTTP_PREFIX), '/');
65 if (a_slash == NULL) {
66 snprintf(atmp, sizeof(atmp), "%s/", a_url);
67 a_url = atmp;
68 a_slash = strchr(a_url + sizeof(HTTP_PREFIX), '/');
69 assert(a_slash != NULL);
70 }
71
72 if (b_slash == NULL) {
73 snprintf(btmp, sizeof(btmp), "%s/", b_url);
74 b_url = btmp;
75 b_slash = strchr(b_url + sizeof(HTTP_PREFIX), '/');
76 assert(b_slash != NULL);
77 }
78
79 host_len = MIN((int)(a_slash - a_url), (int)(b_slash - b_url));
80 res = strncasecmp(a_url, b_url, host_len);
81 if (res)
82 return (res);
83
84 return strcmp(a_slash, b_slash);
85 }
86
87 static SPLAY_HEAD(site_tree, site) root;
88
89 SPLAY_PROTOTYPE(site_tree, site, node, site_compare);
90 SPLAY_GENERATE(site_tree, site, node, site_compare);
91
92 static int
find_url_in_patterns(struct patternq * head,const char * url)93 find_url_in_patterns(struct patternq* head, const char *url)
94 {
95 struct pattern *entry;
96 TAILQ_FOREACH(entry, head, next) {
97 if (match_url(url, entry->pattern_host, entry->pattern_uri))
98 return (1);
99 }
100
101 return (0);
102 }
103
104 int
site_same_as_parent(struct site * site)105 site_same_as_parent(struct site *site)
106 {
107 static char parent_host[1024];
108 struct site *parent = site->parent;
109 char *host, *uri;
110 u_short port;
111
112 if (parent == NULL)
113 return (1);
114
115 while (parent) {
116 if (parent->parent == NULL)
117 break;
118 parent = parent->parent;
119 }
120
121 if (http_hostportfile(parent->url, &host, &port, &uri) == -1)
122 return (0);
123 strlcpy(parent_host, host, sizeof(parent_host));
124
125 return (match_url(site->url, parent_host, NULL));
126 }
127
128 /*
129 * Returns true if the site itself matches the bad patterns list
130 */
131
132 int
site_matches_bad_patterns(struct site * site)133 site_matches_bad_patterns(struct site *site)
134 {
135 return (find_url_in_patterns(&good_patterns.head, site->url));
136 }
137
138 enum DANGER_TYPES
site_child_danger(struct site * site)139 site_child_danger(struct site *site)
140 {
141 enum DANGER_TYPES danger = UNKNOWN;
142
143 if (site_same_as_parent(site))
144 danger = HARMLESS;
145
146 if (find_url_in_patterns(&good_patterns.head, site->url))
147 danger = HARMLESS;
148 if (find_url_in_patterns(&bad_patterns.head, site->url))
149 danger = DANGEROUS;
150
151 return (danger);
152 }
153
154 enum DANGER_TYPES
site_recurse_danger(struct site * site)155 site_recurse_danger(struct site* site)
156 {
157 enum DANGER_TYPES danger = site->danger;
158 struct site *child;
159
160 TAILQ_FOREACH(child, &site->children, next) {
161 enum DANGER_TYPES cur = site_recurse_danger(child);
162 if (cur > danger)
163 danger = cur;
164 }
165
166 return (danger);
167 }
168
169 enum DANGER_TYPES
site_analyze_danger(struct site * site)170 site_analyze_danger(struct site *site)
171 {
172 enum DANGER_TYPES danger = HARMLESS;
173 struct site *child;
174
175 if (site->parent != NULL)
176 return site_child_danger(site);
177
178 if (site->html_size == 0 && TAILQ_FIRST(&site->children) == NULL)
179 return (UNKNOWN);
180
181 TAILQ_FOREACH(child, &site->children, next) {
182 enum DANGER_TYPES cur = site_recurse_danger(child);
183 if (cur > danger)
184 danger = cur;
185 }
186
187 /* find the highest danger of children for the root */
188
189 return (danger);
190 }
191
192 static void
site_dispatch_callbacks(struct site * site)193 site_dispatch_callbacks(struct site *site)
194 {
195 struct site_callback *cb;
196 while ((cb = TAILQ_FIRST(&site->callbacks)) != NULL) {
197 DNFPRINTF(1, (stderr, "[DEBUG] Dispatching callbacks for %s\n",
198 site->url));
199 TAILQ_REMOVE(&site->callbacks, cb, next);
200 (*cb->cb)(site, cb->cb_arg);
201 free(cb);
202 }
203 }
204
205 /*
206 * makes everything up the tree dangerous
207 * XXX: is this the right thing to do???
208 */
209
210 void
site_make_dangerous(struct site * site)211 site_make_dangerous(struct site *site)
212 {
213 /* trigger the callbacks up the tree */
214 while (site) {
215 DNFPRINTF(1, (stderr, "[DEBUG] Making %s dangerous\n",
216 site->url));
217 site->danger = DANGEROUS;
218
219 site_dispatch_callbacks(site);
220 site = site->parent;
221 }
222 }
223
224 int
site_count_dangerous(struct site * site)225 site_count_dangerous(struct site *site)
226 {
227 struct site *child;
228 int total = 0;
229
230 TAILQ_FOREACH(child, &site->children, next) {
231 total += site_count_dangerous(child);
232 }
233
234 if (site->danger == DANGEROUS)
235 total += 1;
236
237 return (total);
238 }
239
240 void
site_complete(int fd,short what,void * arg)241 site_complete(int fd, short what, void *arg)
242 {
243 struct site *site = arg;
244 struct timeval tv;
245
246 site->danger = site_analyze_danger(site);
247
248 gettimeofday(&tv, NULL);
249 timersub(&tv, &site->tv_change, &tv);
250 assert(site->tv_change.tv_sec);
251 if (tv.tv_sec >= IDLE_TIME) {
252 DNFPRINTF(1, (stderr, "[DEBUG] Analysis for %s complete\n",
253 site->url));
254 site->flags |= ANALYSIS_COMPLETE;
255 site_dispatch_callbacks(site);
256 } else {
257 timerclear(&tv);
258 tv.tv_sec = 1;
259 evtimer_add(&site->ev_complete, &tv);
260 }
261 }
262
263 void
site_expire(int fd,short what,void * arg)264 site_expire(int fd, short what, void *arg)
265 {
266 struct site *site = arg;
267
268 fprintf(stderr, "[STATE] Expiring %s\n", site->url);
269 site_free(site);
270 }
271
272 void
site_change_time(struct site * parent,struct timeval * tv)273 site_change_time(struct site *parent, struct timeval *tv)
274 {
275 while (parent != NULL) {
276 /* only expire from the top */
277 if (parent->parent == NULL) {
278 struct timeval tv_timeout;
279 /* update the expiration time */
280 timerclear(&tv_timeout);
281 tv_timeout.tv_sec = STATE_EXPIRATION_TIME;
282
283 evtimer_add(&parent->ev_timeout, &tv_timeout);
284 }
285
286 parent->tv_change = *tv;
287 parent = parent->parent;
288 }
289 }
290
291 void
site_disassociate_parent(struct site * site)292 site_disassociate_parent(struct site *site)
293 {
294 struct timeval tv;
295 struct site *parent = site->parent;
296 if (parent == NULL)
297 return;
298
299 TAILQ_REMOVE(&parent->children, site, next);
300 site->parent = NULL;
301
302 /* make sure that we get an expiration time for this site */
303 gettimeofday(&tv, NULL);
304 site_change_time(site, &tv);
305 }
306
307 struct site *
site_find(const char * url)308 site_find(const char *url)
309 {
310 struct site tmp;
311
312 tmp.url = (char *)url;
313 return (SPLAY_FIND(site_tree, &root, &tmp));
314 }
315
316 struct site *
site_new(const char * url,const char * parent_url)317 site_new(const char *url, const char *parent_url)
318 {
319 struct site *site, tmp, *parent = NULL;
320 struct timeval tv;
321
322 tmp.url = (char *)url;
323 if ((site = SPLAY_FIND(site_tree, &root, &tmp)) != NULL) {
324 /* we already got a match - what now? */
325 goto done;
326 }
327
328 if (parent_url != NULL) {
329 tmp.url = (char *)parent_url;
330 parent = SPLAY_FIND(site_tree, &root, &tmp);
331
332 /* nobody should be able to fake a request */
333 if (parent == NULL)
334 return (NULL);
335 }
336
337 if ((site = calloc(1, sizeof(struct site))) == NULL)
338 err(1, "calloc");
339
340 TAILQ_INIT(&site->callbacks);
341
342 TAILQ_INIT(&site->children);
343 if (parent != NULL) {
344 site->parent = parent;
345 TAILQ_INSERT_TAIL(&parent->children, site, next);
346 }
347
348 if ((site->url = strdup(url)) == NULL)
349 err(1, "strdup");
350
351 site->danger = site_analyze_danger(site);
352 if (site->danger == DANGEROUS) {
353 /* allows us to find callbacks */
354 site_make_dangerous(site);
355 }
356 SPLAY_INSERT(site_tree, &root, site);
357
358 evtimer_set(&site->ev_timeout, site_expire, site);
359 evtimer_set(&site->ev_complete, site_complete, site);
360
361 done:
362 /* update the last time a tree was updated */
363 gettimeofday(&tv, NULL);
364 site_change_time(site, &tv);
365
366 return (site);
367 }
368
369 void
site_free(struct site * site)370 site_free(struct site *site)
371 {
372 struct site *child;
373 struct site_callback *cb;
374
375 SPLAY_REMOVE(site_tree, &root, site);
376
377 event_del(&site->ev_timeout);
378 event_del(&site->ev_complete);
379
380 while ((child = TAILQ_FIRST(&site->children)) != NULL) {
381 TAILQ_REMOVE(&site->children, child, next);
382 child->parent = NULL;
383 site_free(child);
384 }
385
386 while ((cb = TAILQ_FIRST(&site->callbacks)) != NULL) {
387 TAILQ_REMOVE(&site->callbacks, cb, next);
388 (*cb->cb)(site, cb->cb_arg);
389 free(cb);
390 }
391
392 if (site->parent) {
393 TAILQ_REMOVE(&site->parent->children, site, next);
394 }
395
396 if (site->virus_result != NULL)
397 free(site->virus_result);
398 if (site->firstline != NULL)
399 free(site->firstline);
400 if (site->html_data != NULL)
401 free(site->html_data);
402 free(site->url);
403 free(site);
404 }
405
406 void
site_insert_callback(struct site * site,void (* cb)(struct site *,void *),void * cb_arg)407 site_insert_callback(struct site *site,
408 void (*cb)(struct site *, void *), void *cb_arg)
409 {
410 struct site_callback *ctx = malloc(sizeof(struct site_callback));
411 assert(ctx != NULL);
412
413 ctx->cb = cb;
414 ctx->cb_arg = cb_arg;
415 TAILQ_INSERT_TAIL(&site->callbacks, ctx, next);
416 }
417
418 #define HTML_PRINT(...) evbuffer_add_printf(databuf, __VA_ARGS__)
419
420 /* stores the data associated with this site */
421
422 void
site_cache_data(struct site * site,const struct evhttp_request * req)423 site_cache_data(struct site *site, const struct evhttp_request *req)
424 {
425 static char firstline[128];
426
427 if (site->firstline != NULL)
428 free(site->firstline);
429 if (site->html_data != NULL)
430 free(site->html_data);
431
432 fprintf(stderr, "[CACHE] Caching %ld bytes for %s (%s)\n",
433 EVBUFFER_LENGTH(req->input_buffer),
434 site->url, danger_to_text(site->danger));
435
436 site->html_size = EVBUFFER_LENGTH(req->input_buffer);
437 site->html_data = malloc(site->html_size);
438 if (site->html_data == NULL)
439 err(1, "malloc");
440
441 memcpy(site->html_data, EVBUFFER_DATA(req->input_buffer),
442 site->html_size);
443
444 snprintf(firstline, sizeof(firstline), "HTTP/1.%d %d %s",
445 req->minor, req->response_code, req->response_code_line);
446 if ((site->firstline = strdup(firstline)) == NULL)
447 err(1, "strdup");
448 }
449
450
451 const char *
danger_to_text(enum DANGER_TYPES danger)452 danger_to_text(enum DANGER_TYPES danger)
453 {
454 switch (danger) {
455 case HARMLESS:
456 return "harmless";
457 case DANGEROUS:
458 return "dangerous";
459 case UNKNOWN:
460 default:
461 return "unknown";
462 }
463 }
464
465 static void
site_print_children(struct evbuffer * databuf,struct site * site,enum DANGER_TYPES desired_level)466 site_print_children(struct evbuffer *databuf, struct site *site,
467 enum DANGER_TYPES desired_level)
468 {
469 struct site *child;
470 TAILQ_FOREACH(child, &site->children, next) {
471 if (child->danger != desired_level)
472 continue;
473
474 HTML_PRINT("<li>");
475 site_print_analysis(databuf, child);
476 HTML_PRINT("</li>");
477 }
478 }
479
480 static void
site_print_analysis(struct evbuffer * databuf,struct site * site)481 site_print_analysis(struct evbuffer *databuf, struct site *site)
482 {
483 char *uri_escaped = evhttp_encode_uri(site->url);
484 char *html_escaped = evhttp_htmlescape(site->url);
485 HTML_PRINT("<span class=%s>%s</span> ",
486 danger_to_text(site->danger),
487 danger_to_text(site->danger));
488
489 HTML_PRINT(
490 "<a href=\"/cache/?url=%s\" target=\"_blank\">%s</a>"
491 " <span class=firstline>%s</span>"
492 " <span class=virus>%s</span>",
493 uri_escaped, html_escaped,
494 site->firstline,
495 site->virus_result != NULL ? site->virus_result : "unknown");
496
497 free(uri_escaped);
498 free(html_escaped);
499 if (TAILQ_FIRST(&site->children) != NULL) {
500 HTML_PRINT("<ul>");
501 site_print_children(databuf, site, DANGEROUS);
502 site_print_children(databuf, site, UNKNOWN);
503 site_print_children(databuf, site, HARMLESS);
504 HTML_PRINT("</ul>");
505 }
506 }
507
508 /* code to display status related html */
509
510 static const char *css_style =
511 ".tiny {\n"
512 " color: #bbbbcc;\n"
513 " padding: 2px 0px 0px 2px;"
514 " margin-bottom: -10em;"
515 " font-size: 0.5em;\n"
516 " font-family: Verdana, Arial;\n"
517 "}\n"
518 ".version {\n"
519 " width: 100%;"
520 " color: #8888bb;\n"
521 " padding: 0px 4px 2px 0px;"
522 " margin-top: -1em;"
523 " font-size: 0.5em;\n"
524 " text-align: right;\n"
525 " font-family: Verdana, Arial;\n"
526 "}\n"
527 ".statistics h1 {\n"
528 " padding: 3px;"
529 " font-size: small;\n"
530 " background-color: #ccccee;\n"
531 " border: 1px solid;\n"
532 "}\n"
533 ".about {\n"
534 " width: 90%;\n"
535 " margin: 10px;\n"
536 " background-color: #dcdcee;\n"
537 " font-family: Verdana, Arial;\n"
538 " border: 1px solid;\n"
539 " padding: 1em;\n"
540 "}\n"
541 ".about h1 {\n"
542 " width: 60%;\n"
543 " background-color: #ddaa66;\n"
544 " border: 1px solid;\n"
545 " margin-top: 1em;\n"
546 " padding-left: 0.5em;\n"
547 " font-size: 1em;\n"
548 "}\n"
549 ".about p {\n"
550 " font-size: 0.9em;\n"
551 " margin-left: 2em;\n"
552 "}\n"
553 ".statistics {\n"
554 " width: 80%;\n"
555 " margin: 10px;\n"
556 " background-color: #dcdcee;\n"
557 " font-family: Verdana, Arial;\n"
558 " font-size: 0.8em;\n"
559 " padding: 1em;\n"
560 "}\n"
561 "table.traffic {"
562 " width: 300px;"
563 " border-width: 0px 0px 1px 1px;"
564 " border-spacing: 2px;"
565 " border-style: inset;"
566 " border-color: black;"
567 " border-collapse: collapse;"
568 "}"
569 "table.traffic td {"
570 " border-width: 1px 1px 0px 0px;"
571 " padding: 2px;"
572 " border-style: inset;"
573 " border-color: black;"
574 " background-color: rgb(255, 250, 220);"
575 " font-family: Verdana, Arial;\n"
576 " font-size: small;\n"
577 "}\n"
578 "table.sites {"
579 " border-width: 0px 0px 1px 1px;"
580 " border-spacing: 2px;"
581 " border-style: inset;"
582 " border-color: black;"
583 " border-collapse: collapse;"
584 "}"
585 "table.sites td {"
586 " border-width: 1px 1px 0px 0px;"
587 " padding: 2px;"
588 " border-style: inset;"
589 " border-color: black;"
590 " background-color: rgb(255, 250, 220);"
591 " font-family: Verdana, Arial;\n"
592 " font-size: small;\n"
593 "}\n"
594 ".analysis {\n"
595 " font-family: Verdana, Arial;\n"
596 " font-size: small;\n"
597 "}\n"
598 ".harmless { color: green }\n"
599 ".unknown { color: orange }\n"
600 ".firstline {\n"
601 " font-size: 0.9em;\n"
602 " font-family: Verdana, Arial;\n"
603 " font-weight: bold }\n"
604 ".virus {\n"
605 " font-size: 0.9em;\n"
606 " font-family: Verdana, Arial;\n"
607 "}\n"
608 ".dangerous { color: red }\n"
609 ".banner {\n"
610 " background: #ddddff;\n"
611 " font-family: Verdana, Arial;\n"
612 " border: 1px solid;\n"
613 "}\n"
614 ".banner h1 {\n"
615 " margin-top: 0em;\n"
616 " margin-bottom: -0.7em;\n"
617 " text-align: center;\n"
618 "}\n"
619 ".banner ul {\n"
620 " margin-bottom: -0.7em;\n"
621 "}\n"
622 ".banner li {\n"
623 " display: inline;\n"
624 "}\n"
625 "\n"
626 ".footer {\n"
627 " font-family: Verdana, Arial;\n"
628 " font-size: 0.7em;\n"
629 " text-align: center;\n"
630 "}\n";
631
632 void
css_server(struct evhttp_request * request,void * arg)633 css_server(struct evhttp_request *request, void *arg)
634 {
635 struct evbuffer *databuf = evbuffer_new();
636 assert(databuf != NULL);
637 evhttp_add_header(request->output_headers, "Content-Type", "text/css");
638 evbuffer_add(databuf, css_style, strlen(css_style));
639
640 /* send along our data */
641 evhttp_send_reply(request, HTTP_OK, "OK", databuf);
642 evbuffer_free(databuf);
643 }
644
645 static void
print_blurb(struct evbuffer * databuf)646 print_blurb(struct evbuffer *databuf)
647 {
648 HTML_PRINT("<div class=footer><hr />\n");
649 HTML_PRINT("<center>"
650 "Copyright (c) 2007 <a href=http://www.citi.umich.edu/u/provos/>"
651 "Niels Provos</a>. All Rights Reserved."
652 "</center>\n");
653 HTML_PRINT("</div>\n");
654 }
655
656 static void
print_footer(struct evbuffer * databuf)657 print_footer(struct evbuffer *databuf)
658 {
659 HTML_PRINT("</body></html>");
660 }
661
662 static void
print_header(struct evbuffer * databuf)663 print_header(struct evbuffer *databuf)
664 {
665 extern struct spybye_share spybye_share;
666 extern int behave_as_proxy;
667
668 HTML_PRINT(
669 "<html><head><title>"
670 "SpyBye: At Your Service"
671 "</title></head>");
672 HTML_PRINT("<link rel=stylesheet type=text/css href=/styles/css>\n");
673
674 HTML_PRINT(
675 "<body><div class=banner>\n"
676 "<span class=tiny>sharing %s</span> "
677 "<span class=tiny>proxy %s</span>"
678 "<h1>SpyBye</h1>\n"
679 "<ul>"
680 "<li><a href=\"/\">Main</a> </li>"
681 "<li><a href=\"/stats\">Statistics</a> </li>"
682 "<li><a href=\"/about\">About</a> </li>"
683 "</ul>\n"
684 "<div class=version>Version %s</div>"
685 "</div>\n",
686 spybye_share.evcon_report == NULL ?
687 "disabled" : "enabled",
688 behave_as_proxy ?
689 "on" : "off",
690 VERSION
691 );
692 }
693
694 static void
print_form(struct evbuffer * databuf)695 print_form(struct evbuffer *databuf)
696 {
697 HTML_PRINT(
698 "<p><center>\n"
699 "<form name=\"input\" action=\"/\" method=\"get\">\n"
700 "Url: <input type=\"text\" name=\"url\" size=100>\n"
701 "<input type=\"submit\" value=\"Submit\">\n"
702 "</form></center>");
703 }
704
705 static void
print_done_sites(struct evbuffer * databuf)706 print_done_sites(struct evbuffer *databuf)
707 {
708 struct site *site;
709 extern int behave_as_proxy;
710
711 if (SPLAY_ROOT(&root) == NULL)
712 return;
713
714 HTML_PRINT(
715 "<hr><div class=statistics>\n"
716 "<h1>Recent Site Analysis</h1>\n"
717 "<ul>\n");
718
719 for (site = SPLAY_MIN(site_tree, &root);
720 site != NULL; site = SPLAY_NEXT(site_tree, &root, site)) {
721 int done;
722 if (site->parent != NULL)
723 continue;
724
725 /* if we behave as proxy then all sites are done all the time */
726 done = behave_as_proxy || (site->flags & ANALYSIS_COMPLETE);
727 if (!done) {
728 char *html_escaped = evhttp_htmlescape(site->url);
729 HTML_PRINT(
730 "<li>"
731 "<span class=unknown>pending</span> %s"
732 "</li>", html_escaped);
733 free(html_escaped);
734 } else {
735 char *uri_escaped = evhttp_encode_uri(site->url);
736 char *html_escaped = evhttp_htmlescape(site->url);
737 HTML_PRINT(
738 "<li>"
739 "<span class=%s>%s</span> "
740 "<a href=\"/?url=%s&noiframe=1\">%s</a>"
741 "</li>\n",
742 danger_to_text(site->danger),
743 danger_to_text(site->danger),
744 uri_escaped,
745 html_escaped);
746 free(uri_escaped);
747 free(html_escaped);
748 }
749 }
750
751 HTML_PRINT("</ul></div>");
752 }
753
754 static void
main_server(struct evhttp_request * request,void * arg)755 main_server(struct evhttp_request *request, void *arg)
756 {
757 struct evbuffer *databuf = evbuffer_new();
758 assert(databuf != NULL);
759
760 print_header(databuf);
761
762 if (request->uri[0] != '/')
763 print_form(databuf);
764
765 print_done_sites(databuf);
766
767 print_blurb(databuf);
768
769 print_footer(databuf);
770
771 /* send along our data */
772 evhttp_send_reply(request, HTTP_OK, "OK", databuf);
773 evbuffer_free(databuf);
774 }
775
776 static void
format_dangerousload(struct evhttp_request * request,struct evbuffer * databuf,struct dangerousload * dl)777 format_dangerousload(struct evhttp_request *request,
778 struct evbuffer *databuf, struct dangerousload *dl)
779 {
780 char output[64];
781 struct tm *tm;
782 u_int tmp;
783 time_t seconds;
784 char *parent_url, *danger_url, *escaped;
785 char *virus_scan = "unknown";
786
787 EVTAG_GET(dl, time_in_seconds, &tmp);
788 EVTAG_GET(dl, parent_url, &parent_url);
789 EVTAG_GET(dl, dangerous_url, &danger_url);
790 if (EVTAG_HAS(dl, virus_result))
791 EVTAG_GET(dl, virus_result, &virus_scan);
792
793 seconds = tmp;
794 tm = localtime(&seconds);
795 strftime(output, sizeof(output), "%Y-%m-%d %H:%M:%S", tm);
796
797 escaped = evhttp_encode_uri(parent_url);
798
799 HTML_PRINT(
800 "<tr><td><span class=time>%s</span></td>"
801 "<td><span class=harmless>"
802 "<a href=\"/?url=%s\">%s</a></span></td>"
803 "<td><span class=dangerous>%s</span></li></td>"
804 "<td><span class=harmless>%s</span></li></td>"
805 "</tr>",
806 output, escaped, parent_url, danger_url, virus_scan);
807
808 free(escaped);
809 }
810
811 static void
stats_server(struct evhttp_request * request,void * arg)812 stats_server(struct evhttp_request *request, void *arg)
813 {
814 extern struct dangerq danger;
815 struct dangerous_container *entry;
816 struct evbuffer *databuf = evbuffer_new();
817 char good_time[30], bad_time[30];
818 struct tm *tm;
819 int count = 0;
820 assert(databuf != NULL);
821
822 print_header(databuf);
823
824 if (request->uri[0] != '/')
825 print_form(databuf);
826
827 HTML_PRINT("<hr />");
828
829 /* some basic statistics */
830 HTML_PRINT(
831 "<div class=statistics>\n"
832 "<h1>Traffic Statistics</h1>\n"
833 "<table><tr><td valign=top>"
834 "<table class=traffic>"
835 "<tr><td>Requests</td><td>%d</td></tr>\n"
836 "<tr><td>Harmless</td><td>%d</td></tr>\n"
837 "<tr><td>Unknown</td><td>%d</td></tr>\n"
838 "<tr><td>Dangerous</td><td>%d</td></tr>\n"
839 "</table></td><td valign=top>",
840 statistics.num_requests,
841 statistics.num_harmless,
842 statistics.num_unknown,
843 statistics.num_dangerous);
844
845 tm = localtime((time_t *)&good_patterns.tv_load.tv_sec);
846 strftime(good_time, sizeof(good_time), "%Y-%m-%d %H:%M:%S", tm);
847 tm = localtime((time_t *)&bad_patterns.tv_load.tv_sec);
848 strftime(bad_time, sizeof(bad_time), "%Y-%m-%d %H:%M:%S", tm);
849
850 HTML_PRINT(
851 "<table class=traffic>"
852 "<tr><td>Bad Patterns</td><td>%d</td><td>%s</td></tr>\n"
853 "<tr><td>Good Patterns</td><td>%d</td><td>%s</td></tr>\n"
854 "</table></td></tr></table>",
855 bad_patterns.count,
856 bad_time,
857 good_patterns.count,
858 good_time
859 );
860
861 HTML_PRINT("</div><div style=\"clear:both;\"></div>\n");
862
863 HTML_PRINT(
864 "<div class=statistics>\n"
865 "<h1>Dangerous Sites</h1>\n"
866 "<table class=sites>");
867
868 TAILQ_FOREACH(entry, &danger, next) {
869 if (count++ > MAX_RECENT_RESULTS)
870 break;
871
872 format_dangerousload(request, databuf, entry->dl);
873 }
874
875 HTML_PRINT("</table></div>\n");
876 print_blurb(databuf);
877
878 print_footer(databuf);
879
880 /* send along our data */
881 evhttp_send_reply(request, HTTP_OK, "OK", databuf);
882 evbuffer_free(databuf);
883 }
884
885 static void
about_server(struct evhttp_request * request,void * arg)886 about_server(struct evhttp_request *request, void *arg)
887 {
888 struct evbuffer *databuf = evbuffer_new();
889 assert(databuf != NULL);
890
891 print_header(databuf);
892
893 if (request->uri[0] != '/')
894 print_form(databuf);
895
896 HTML_PRINT("<hr>"
897 "<div class=about>"
898 "<h1>What is SpyBye?</h1>"
899 "<p>SpyBye is a tool to help web masters determine if their web "
900 "pages are hosting browser exploits that can infect visiting "
901 "users with malware. It functions as an HTTP proxy server and "
902 "intercepts all browser requests. SpyBye uses a few simple rules "
903 "to determine if embedded links on your web page are harmlesss, "
904 "unknown or maybe even dangerous.</p>"
905 "<h1>Why did you write SpyBye?</h1>"
906 "<p>It has become increasingly common for web sites to get "
907 "compromised. This can happen either due to vulnerable "
908 "web applications that you run or due to compromised servers "
909 "via vectors completely out of your control. Nonetheless, it "
910 "is important for web masters to be able to tell if their pages "
911 "are dangerous to their users. SpyBye provides a very simple "
912 "mechanism to determine how a site works on the HTTP level. "
913 "This often gives us clues about potentially dangerous content. "
914 "I hope that SpyBye can be of use to anyone who wants to verify "
915 "if their web site could be compromised and dangerous.</p>"
916 "<p>The unoffical explanation is that I needed some code to "
917 "test <a href=http://www.monkey.org/~provos/libevent>"
918 "libevent</a>'s HTTP layer; writing a proxy exercises most "
919 "of the code paths.</p>"
920 "<h1>How does SpyBye work?</h1>"
921 "<p>SpyBye operates as a proxy server and gets to see all the "
922 "web fetches that your browser makes. It applies very simple "
923 "rules to each URL that is fetched as a result of loading a "
924 "web page. These rules allows us to classify a URL into three "
925 "categories: harmless, unknown or dangerous. Although, there is "
926 "great margin of error, the categories allow a web master to "
927 "look at the URLs and determine if they should be there or not. "
928 "If you see that a URL is being fetched that you would not "
929 "expect, it's a good indication you have been copromised.</p>"
930 "<h1>Disclaimer</h1>"
931 "<p>SpyBye does not protect you from getting exploited yourself. "
932 "It tries to take reasonable precautions to avoid infection while "
933 "using it. However, ideally, you would run your browser in a "
934 "virtual machine and revert to a clean snapshot when done. "
935 "You have been warned. Today's malware is capable of rendering "
936 "your computer unusable - and empty your bank accounts! "
937 "<span style=\"font-size: 0.25em\">"
938 "THIS SOFTWARE IS PROVIDED BY THE AUTHOR "
939 "``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, "
940 "BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY "
941 "AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO "
942 "EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, "
943 "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "
944 "(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE "
945 "GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS "
946 "INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, "
947 "WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING "
948 "NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF "
949 "THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
950 "</span>"
951 "</p>"
952 "</div>");
953
954 print_blurb(databuf);
955
956 print_footer(databuf);
957
958 /* send along our data */
959 evhttp_send_reply(request, HTTP_OK, "OK", databuf);
960 evbuffer_free(databuf);
961 }
962
963
964 void
results_server(struct evhttp_request * request,void * arg)965 results_server(struct evhttp_request *request, void *arg)
966 {
967 struct site *site, tmp;
968 struct evkeyvalq args;
969 struct evbuffer *databuf = evbuffer_new();
970 const char *url = NULL;
971 int done = 0;
972 assert(databuf != NULL);
973
974 TAILQ_INIT(&args);
975
976 evhttp_parse_query(request->uri, &args);
977
978 url = evhttp_find_header(&args, "url");
979
980 if (url == NULL)
981 goto fail;
982
983 tmp.url = (char *)url;
984 site = SPLAY_FIND(site_tree, &root, &tmp);
985
986 HTML_PRINT(
987 "<html><head><title>"
988 "SpyBye: Results</title>\n");
989 done = site == NULL ||
990 (site->flags & ANALYSIS_COMPLETE) ||
991 !event_pending(&site->ev_complete, EV_TIMEOUT, NULL);
992 if (!done)
993 HTML_PRINT("<meta http-equiv=\"refresh\" content=\"2\">\n");
994
995 HTML_PRINT("</head>");
996 HTML_PRINT("<link rel=stylesheet type=text/css href=/styles/css>\n");
997
998 HTML_PRINT("<body>");
999
1000 tmp.url = (char *)url;
1001 if (site != NULL) {
1002 char *url_escaped = evhttp_htmlescape(site->url);
1003 HTML_PRINT("%s %s found %d dangerous links.<p>\n",
1004 done ? "<span class=harmless>Complete</span>" :
1005 "<span class=unknown>Pending</span>",
1006 url_escaped, site_count_dangerous(site));
1007 free(url_escaped);
1008
1009 HTML_PRINT("<div class=analysis>\n");
1010 site_print_analysis(databuf, site);
1011 HTML_PRINT("</div>\n");
1012 }
1013
1014 if (done) {
1015 HTML_PRINT("<p>The analysis of this URL is complete. "
1016 "Take a look at all URLs that have been marked "
1017 "either <b>unknown</b> or <b>dangerous</b>");
1018 } else {
1019 /* completion timers fire only in non-proxy mode */
1020 struct timeval tv;
1021 gettimeofday(&tv, NULL);
1022 timersub(&tv, &site->tv_change, &tv);
1023
1024 HTML_PRINT("<p>Analysis is going to take %d more seconds.",
1025 IDLE_TIME - tv.tv_sec);
1026 }
1027
1028 HTML_PRINT("</body></html>");
1029
1030 evhttp_clear_headers(&args);
1031
1032 /* send along our data */
1033 evhttp_send_reply(request, HTTP_OK, "OK", databuf);
1034 evbuffer_free(databuf);
1035 return;
1036
1037 fail:
1038 evhttp_clear_headers(&args);
1039 evhttp_send_error(request, HTTP_BADREQUEST, "You must be kidding.");
1040 }
1041
1042 void
query_server(struct evhttp_request * request,void * arg)1043 query_server(struct evhttp_request *request, void *arg)
1044 {
1045 static char fixed_url[PATH_MAX];
1046 struct evkeyvalq args;
1047 struct evbuffer *databuf = evbuffer_new();
1048 struct site *site = NULL;
1049 char *url_relative = NULL;
1050 const char *no_iframe_arg = NULL;
1051 const char *url = NULL;
1052 int no_iframe = 0;
1053 assert(databuf != NULL);
1054
1055 TAILQ_INIT(&args);
1056
1057 evhttp_parse_query(request->uri, &args);
1058
1059 url = evhttp_find_header(&args, "url");
1060 no_iframe_arg = evhttp_find_header(&args, "noiframe");
1061 no_iframe = no_iframe_arg != NULL && strcmp(no_iframe_arg, "1") == 0;
1062
1063 print_header(databuf);
1064
1065 if (request->uri[0] != '/' && !no_iframe)
1066 print_form(databuf);
1067
1068 print_blurb(databuf);
1069
1070 if (url == NULL) {
1071 HTML_PRINT("Did not receive a URL. You loose.");
1072 goto done;
1073 }
1074
1075 if (http_hostportfile(url, NULL, NULL, NULL) == -1) {
1076 /* if they did not prefix with http://, try to fix for them */
1077 strlcpy(fixed_url, HTTP_PREFIX, sizeof(fixed_url));
1078 strlcat(fixed_url, url, sizeof(fixed_url));
1079 url = fixed_url;
1080 if (http_hostportfile(url, NULL, NULL, NULL) == -1) {
1081 HTML_PRINT("Did not receive a URL. You loose.");
1082 goto done;
1083 }
1084 }
1085
1086 site = site_new(url, NULL);
1087 url_relative = evhttp_encode_uri(url);
1088
1089 if (site == NULL)
1090 goto fail;
1091
1092 HTML_PRINT(
1093 "<iframe src=\"/results/?url=%s\" "
1094 "width=100%% height=50%%>\n"
1095 "Missing iframe support</iframe><p>\n",
1096 url_relative);
1097 free(url_relative);
1098
1099 /*
1100 * do not inject an iframe for the site to be tested if the analysis
1101 * is complete; or if the user directly requested that no iframe be
1102 * displayed.
1103 */
1104 if ((site->flags & ANALYSIS_COMPLETE) == 0 && !no_iframe) {
1105 char *url_escaped = evhttp_htmlescape(url);
1106 HTML_PRINT(
1107 "<iframe src=\"%s\" "
1108 "width=100%% height=50%% security=restricted>\n"
1109 "Missing iframe support"
1110 "</iframe>\n",
1111 url_escaped);
1112 free(url_escaped);
1113
1114 /* start the completion timer */
1115 site_complete(-1, 0, site);
1116 }
1117
1118 done:
1119 print_footer(databuf);
1120
1121 evhttp_clear_headers(&args);
1122
1123 /* send along our data */
1124 evhttp_send_reply(request, HTTP_OK, "OK", databuf);
1125 evbuffer_free(databuf);
1126 return;
1127
1128 fail:
1129 evhttp_clear_headers(&args);
1130 evhttp_send_error(request, HTTP_SERVUNAVAIL, "Another eval is on");
1131 }
1132
1133 void
cache_server(struct evhttp_request * request,void * arg)1134 cache_server(struct evhttp_request *request, void *arg)
1135 {
1136 struct timeval tv;
1137 struct evkeyvalq args;
1138 struct evbuffer *databuf = evbuffer_new();
1139 struct site *site;
1140 const char *url = NULL;
1141 char *escaped;
1142 assert(databuf != NULL);
1143
1144 TAILQ_INIT(&args);
1145
1146 evhttp_parse_query(request->uri, &args);
1147
1148 url = evhttp_find_header(&args, "url");
1149 site = site_find(url);
1150
1151 if (site == NULL || site->html_data == NULL)
1152 goto fail;
1153
1154 evhttp_clear_headers(&args);
1155
1156 /* somebody showed interst in this page - let it not expire yet */
1157 gettimeofday(&tv, NULL);
1158 site_change_time(site, &tv);
1159
1160 /* NUL terminate */
1161 evbuffer_add(databuf, site->html_data, site->html_size);
1162 evbuffer_add(databuf, "", 1);
1163
1164 escaped = evhttp_htmlescape((char *)EVBUFFER_DATA(databuf));
1165
1166 evbuffer_drain(databuf, -1);
1167
1168 HTML_PRINT("<html><head><title>raw dump</title></head><body>");
1169 HTML_PRINT("<pre>%s</pre>", escaped);
1170 free(escaped);
1171 HTML_PRINT("</body></html>");
1172
1173 /* send along our data */
1174 evhttp_send_reply(request, HTTP_OK, "OK", databuf);
1175 evbuffer_free(databuf);
1176 return;
1177
1178 fail:
1179 inform_cache_notfound(request, url);
1180 evhttp_clear_headers(&args);
1181 }
1182
1183 int
spybye_handle_request(struct evhttp_request * request,void * arg)1184 spybye_handle_request(struct evhttp_request *request, void *arg)
1185 {
1186 char *host, *uri;
1187 u_short port;
1188
1189 if (http_hostportfile(request->uri, &host, &port, &uri) == -1) {
1190 /* if it's not fully qualified assume we can just use the uri */
1191 uri = request->uri;
1192 }
1193
1194 /*
1195 * this is a little bit silly, we are not taking advantage of
1196 * the http layer dispatch support.
1197 */
1198 if (strcmp(uri, "/styles/css") == 0) {
1199 css_server(request, arg);
1200 return (0);
1201 } else if (strcmp(uri, "/control.js") == 0) {
1202 serve_control_javascript(request, arg);
1203 return (0);
1204 } else if (strcmp(uri, "/") == 0) {
1205 main_server(request, arg);
1206 return (0);
1207 } else if (strcmp(uri, "/stats") == 0) {
1208 stats_server(request, arg);
1209 return (0);
1210 } else if (strcmp(uri, "/about") == 0) {
1211 about_server(request, arg);
1212 return (0);
1213 } else if (strncmp(uri, "/?", 2) == 0) {
1214 query_server(request, arg);
1215 return (0);
1216 } else if (strncmp(uri, "/results/", 9) == 0) {
1217 results_server(request, arg);
1218 return (0);
1219 } else if (strncmp(uri, "/cache/", 7) == 0) {
1220 cache_server(request, arg);
1221 return (0);
1222 }
1223
1224 return (-1);
1225 }
1226
1227 static void
status_free_patterns(struct patternq * head)1228 status_free_patterns(struct patternq *head)
1229 {
1230 struct pattern *entry;
1231
1232 while ((entry = TAILQ_FIRST(head)) != NULL) {
1233 TAILQ_REMOVE(head, entry, next);
1234 if (entry->pattern_host != NULL)
1235 free(entry->pattern_host);
1236 if (entry->pattern_uri != NULL)
1237 free(entry->pattern_uri);
1238 free(entry);
1239 }
1240 }
1241
1242 static void
patterns_web_done(struct evhttp_request * request,void * arg)1243 patterns_web_done(struct evhttp_request *request, void *arg)
1244 {
1245 struct pattern_obj *patterns = arg;
1246 if (request == NULL || request->response_code != HTTP_OK ||
1247 EVBUFFER_LENGTH(request->input_buffer) == 0) {
1248 fprintf(stderr, "[PATTERN] Failed to read patterns from %s\n",
1249 patterns->location);
1250 return;
1251 }
1252
1253 status_free_patterns(&patterns->head);
1254 status_patterns(patterns, request->input_buffer);
1255 }
1256
1257 static void
patterns_refresh(int fd,short what,void * arg)1258 patterns_refresh(int fd, short what, void *arg)
1259 {
1260 struct timeval tv;
1261 struct pattern_obj *patterns = arg;
1262
1263 timerclear(&tv);
1264 tv.tv_sec = PATTERN_REFRESH_SECONDS;
1265 evtimer_add(&patterns->ev_refresh, &tv);
1266
1267 fprintf(stderr, "[PATTERN] Refreshing patterns from %s\n",
1268 patterns->location);
1269
1270 if (strncasecmp(HTTP_PREFIX, patterns->location,
1271 strlen(HTTP_PREFIX))) {
1272 /* from file */
1273 struct evbuffer *data = read_data(patterns->location);
1274 /* xxx - need to check successful read */
1275 status_free_patterns(&patterns->head);
1276 status_patterns(patterns, data);
1277 evbuffer_free(data);
1278 } else {
1279 if (patterns->evcon != NULL)
1280 evhttp_connection_free(patterns->evcon);
1281 patterns->evcon = read_from_web_prepare(patterns->location,
1282 patterns_web_done, patterns);
1283 }
1284 }
1285
1286 void
status_init(const char * goodness,const char * badness)1287 status_init(const char *goodness, const char *badness)
1288 {
1289 struct timeval tv;
1290 struct evbuffer *data;
1291 SPLAY_INIT(&root);
1292
1293 good_patterns.location = goodness;
1294 bad_patterns.location = badness;
1295
1296 evtimer_set(&good_patterns.ev_refresh,
1297 patterns_refresh, &good_patterns);
1298 evtimer_set(&bad_patterns.ev_refresh,
1299 patterns_refresh, &bad_patterns);
1300
1301 timerclear(&tv);
1302 tv.tv_sec = PATTERN_REFRESH_SECONDS;
1303 evtimer_add(&good_patterns.ev_refresh, &tv);
1304 evtimer_add(&bad_patterns.ev_refresh, &tv);
1305
1306 /* initial setup of the contents */
1307 if (strlen(goodness)) {
1308 data =
1309 strncasecmp(HTTP_PREFIX, goodness, strlen(HTTP_PREFIX)) ?
1310 read_data(goodness) : read_from_web(goodness);
1311 status_good_patterns(data);
1312 evbuffer_free(data);
1313 }
1314
1315 if (strlen(badness)) {
1316 data =
1317 strncasecmp(HTTP_PREFIX, badness, strlen(HTTP_PREFIX)) ?
1318 read_data(badness) : read_from_web(badness);
1319 status_bad_patterns(data);
1320 evbuffer_free(data);
1321 }
1322 }
1323
1324 static int
status_patterns(struct pattern_obj * data,struct evbuffer * databuf)1325 status_patterns(struct pattern_obj *data, struct evbuffer *databuf)
1326 {
1327 char *line;
1328 int count = 0;
1329
1330 while ((line = evbuffer_readline(databuf)) != NULL) {
1331 struct pattern *pattern;
1332 char *host = line, *uri;
1333 if (line[0] == '#' || !strlen(line))
1334 continue;
1335
1336 uri = strchr(line, ' ');
1337 if (uri != NULL)
1338 *uri++ = '\0';
1339
1340 pattern = malloc(sizeof(struct pattern));
1341 if (pattern == NULL)
1342 err(1, "malloc");
1343 pattern->pattern_host = strdup(host);
1344 if (uri != NULL)
1345 pattern->pattern_uri = strdup(uri);
1346 else
1347 pattern->pattern_uri = NULL;
1348
1349 if (pattern->pattern_host == NULL ||
1350 (uri != NULL && pattern->pattern_uri == NULL))
1351 err(1, "strdup");
1352
1353 TAILQ_INSERT_TAIL(&data->head, pattern, next);
1354 DNFPRINTF(2,(stderr, "[PATTERN] Adding pattern: %s/%s\n",
1355 host, uri));
1356 count++;
1357
1358 free(line);
1359 }
1360 data->count = count;
1361 gettimeofday(&data->tv_load, NULL);
1362
1363 return (count);
1364 }
1365
1366 void
status_good_patterns(struct evbuffer * data)1367 status_good_patterns(struct evbuffer *data)
1368 {
1369 int count;
1370 TAILQ_INIT(&good_patterns.head);
1371 count = status_patterns(&good_patterns, data);
1372
1373 fprintf(stderr, "[PATTERN] Added %d good patterns\n", count);
1374 }
1375
1376 void
status_bad_patterns(struct evbuffer * data)1377 status_bad_patterns(struct evbuffer *data)
1378 {
1379 int count;
1380 TAILQ_INIT(&bad_patterns.head);
1381 count = status_patterns(&bad_patterns, data);
1382
1383 fprintf(stderr, "[PATTERN] Added %d bad patterns\n", count);
1384 }
1385
1386
1387 /* error messaging */
1388
1389 static void
inform_cache_notfound(struct evhttp_request * request,const char * url)1390 inform_cache_notfound(struct evhttp_request *request, const char *url)
1391 {
1392 struct evbuffer *databuf = evbuffer_new();
1393 char *html_escaped = evhttp_htmlescape(url != NULL ? url : "<unknown>");
1394 assert(databuf != NULL);
1395
1396 evbuffer_add_printf(databuf,
1397 "<html><head><title>Cache Not Found</title></head>"
1398 "<body><div style=\"border: solid 1px; padding: 2px; "
1399 "width: 40%%; "
1400 "background-color: #dcdcee; font-family: Verdana, Arial;\">"
1401 "<h2>Cache Not Found</h2>\n"
1402 "The URL %s that you requested could not be found in the cache. "
1403 "It's possible that it could not be fetched from its web "
1404 "server.</div></body></html>",
1405 html_escaped);
1406 free(html_escaped);
1407
1408 /* we cannot allow this request */
1409 evhttp_send_reply(request, HTTP_NOTFOUND, "Not Found", databuf);
1410 evbuffer_free(databuf);
1411 }
1412