1 /*
2  * Copyright 2007 Niels Provos <provos@citi.umich.edu>
3  * All rights reserved.
4  */
5 
6 #include <sys/types.h>
7 
8 #ifdef HAVE_CONFIG_H
9 #include "config.h"
10 #endif
11 
12 #include <sys/time.h>
13 #include <sys/param.h>
14 #include <sys/queue.h>
15 #include <sys/tree.h>
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <err.h>
20 #include <unistd.h>
21 #include <assert.h>
22 #include <time.h>
23 
24 #include <event.h>
25 #include <evhttp.h>
26 
27 #include "spybye.gen.h"
28 #include "status.h"
29 #include "utils.h"
30 #include "log.h"
31 #include "proxy.h"
32 
33 extern int debug;
34 
35 struct stats statistics;
36 
37 static struct pattern_obj good_patterns;
38 static struct pattern_obj bad_patterns;
39 
40 static int status_patterns(struct pattern_obj *data, struct evbuffer *databuf);
41 static void site_print_analysis(struct evbuffer *databuf, struct site *site);
42 static void inform_cache_notfound(struct evhttp_request *request,
43     const char *url);
44 
45 /* structure where we keep track of sites */
46 
47 #ifndef MIN
48 #define MIN(x, y) ((x) < (y) ? (x) : (y))
49 #endif
50 
51 int
site_compare(struct site * a,struct site * b)52 site_compare(struct site *a, struct site *b)
53 {
54 	static char atmp[HTTP_MAX_URL], btmp[HTTP_MAX_URL];
55 	char *a_url, *b_url;
56 	char *a_slash, *b_slash;
57 	int host_len, res;
58 	assert(strlen(a->url) >= sizeof(HTTP_PREFIX));
59 	assert(strlen(b->url) >= sizeof(HTTP_PREFIX));
60 
61 	a_url = a->url;
62 	b_url = b->url;
63 	a_slash = strchr(a_url + sizeof(HTTP_PREFIX), '/');
64 	b_slash = strchr(b_url + sizeof(HTTP_PREFIX), '/');
65 	if (a_slash == NULL) {
66 		snprintf(atmp, sizeof(atmp), "%s/", a_url);
67 		a_url = atmp;
68 		a_slash = strchr(a_url + sizeof(HTTP_PREFIX), '/');
69 		assert(a_slash != NULL);
70 	}
71 
72 	if (b_slash == NULL) {
73 		snprintf(btmp, sizeof(btmp), "%s/", b_url);
74 		b_url = btmp;
75 		b_slash = strchr(b_url + sizeof(HTTP_PREFIX), '/');
76 		assert(b_slash != NULL);
77 	}
78 
79 	host_len = MIN((int)(a_slash - a_url), (int)(b_slash - b_url));
80 	res = strncasecmp(a_url, b_url, host_len);
81 	if (res)
82 		return (res);
83 
84 	return strcmp(a_slash, b_slash);
85 }
86 
87 static SPLAY_HEAD(site_tree, site) root;
88 
89 SPLAY_PROTOTYPE(site_tree, site, node, site_compare);
90 SPLAY_GENERATE(site_tree, site, node, site_compare);
91 
92 static int
find_url_in_patterns(struct patternq * head,const char * url)93 find_url_in_patterns(struct patternq* head, const char *url)
94 {
95 	struct pattern *entry;
96 	TAILQ_FOREACH(entry, head, next) {
97 		if (match_url(url, entry->pattern_host, entry->pattern_uri))
98 			return (1);
99 	}
100 
101 	return (0);
102 }
103 
104 int
site_same_as_parent(struct site * site)105 site_same_as_parent(struct site *site)
106 {
107 	static char parent_host[1024];
108 	struct site *parent = site->parent;
109 	char *host, *uri;
110 	u_short port;
111 
112 	if (parent == NULL)
113 		return (1);
114 
115 	while (parent) {
116 		if (parent->parent == NULL)
117 			break;
118 		parent = parent->parent;
119 	}
120 
121 	if (http_hostportfile(parent->url, &host, &port, &uri) == -1)
122 		return (0);
123 	strlcpy(parent_host, host, sizeof(parent_host));
124 
125 	return (match_url(site->url, parent_host, NULL));
126 }
127 
128 /*
129  * Returns true if the site itself matches the bad patterns list
130  */
131 
132 int
site_matches_bad_patterns(struct site * site)133 site_matches_bad_patterns(struct site *site)
134 {
135 	return (find_url_in_patterns(&good_patterns.head, site->url));
136 }
137 
138 enum DANGER_TYPES
site_child_danger(struct site * site)139 site_child_danger(struct site *site)
140 {
141 	enum DANGER_TYPES danger = UNKNOWN;
142 
143 	if (site_same_as_parent(site))
144 		danger = HARMLESS;
145 
146 	if (find_url_in_patterns(&good_patterns.head, site->url))
147 		danger = HARMLESS;
148 	if (find_url_in_patterns(&bad_patterns.head, site->url))
149 		danger = DANGEROUS;
150 
151 	return (danger);
152 }
153 
154 enum DANGER_TYPES
site_recurse_danger(struct site * site)155 site_recurse_danger(struct site* site)
156 {
157 	enum DANGER_TYPES danger = site->danger;
158 	struct site *child;
159 
160 	TAILQ_FOREACH(child, &site->children, next) {
161 		enum DANGER_TYPES cur = site_recurse_danger(child);
162 		if (cur > danger)
163 			danger = cur;
164 	}
165 
166 	return (danger);
167 }
168 
169 enum DANGER_TYPES
site_analyze_danger(struct site * site)170 site_analyze_danger(struct site *site)
171 {
172 	enum DANGER_TYPES danger = HARMLESS;
173 	struct site *child;
174 
175 	if (site->parent != NULL)
176 		return site_child_danger(site);
177 
178 	if (site->html_size == 0 && TAILQ_FIRST(&site->children) == NULL)
179 		return (UNKNOWN);
180 
181 	TAILQ_FOREACH(child, &site->children, next) {
182 		enum DANGER_TYPES cur = site_recurse_danger(child);
183 		if (cur > danger)
184 			danger = cur;
185 	}
186 
187 	/* find the highest danger of children for the root */
188 
189 	return (danger);
190 }
191 
192 static void
site_dispatch_callbacks(struct site * site)193 site_dispatch_callbacks(struct site *site)
194 {
195 	struct site_callback *cb;
196 	while ((cb = TAILQ_FIRST(&site->callbacks)) != NULL) {
197 		DNFPRINTF(1, (stderr, "[DEBUG] Dispatching callbacks for %s\n",
198 			site->url));
199 		TAILQ_REMOVE(&site->callbacks, cb, next);
200 		(*cb->cb)(site, cb->cb_arg);
201 		free(cb);
202 	}
203 }
204 
205 /*
206  * makes everything up the tree dangerous
207  * XXX: is this the right thing to do???
208  */
209 
210 void
site_make_dangerous(struct site * site)211 site_make_dangerous(struct site *site)
212 {
213 	/* trigger the callbacks up the tree */
214 	while (site) {
215 		DNFPRINTF(1, (stderr, "[DEBUG] Making %s dangerous\n",
216 			site->url));
217 		site->danger = DANGEROUS;
218 
219 		site_dispatch_callbacks(site);
220 		site = site->parent;
221 	}
222 }
223 
224 int
site_count_dangerous(struct site * site)225 site_count_dangerous(struct site *site)
226 {
227 	struct site *child;
228 	int total = 0;
229 
230 	TAILQ_FOREACH(child, &site->children, next) {
231 		total += site_count_dangerous(child);
232 	}
233 
234 	if (site->danger == DANGEROUS)
235 		total += 1;
236 
237 	return (total);
238 }
239 
240 void
site_complete(int fd,short what,void * arg)241 site_complete(int fd, short what, void *arg)
242 {
243 	struct site *site = arg;
244 	struct timeval tv;
245 
246 	site->danger = site_analyze_danger(site);
247 
248 	gettimeofday(&tv, NULL);
249 	timersub(&tv, &site->tv_change, &tv);
250 	assert(site->tv_change.tv_sec);
251 	if (tv.tv_sec >= IDLE_TIME) {
252 		DNFPRINTF(1, (stderr, "[DEBUG] Analysis for %s complete\n",
253 			site->url));
254 		site->flags |= ANALYSIS_COMPLETE;
255 		site_dispatch_callbacks(site);
256 	} else {
257 		timerclear(&tv);
258 		tv.tv_sec = 1;
259 		evtimer_add(&site->ev_complete, &tv);
260 	}
261 }
262 
263 void
site_expire(int fd,short what,void * arg)264 site_expire(int fd, short what, void *arg)
265 {
266 	struct site *site = arg;
267 
268 	fprintf(stderr, "[STATE] Expiring %s\n", site->url);
269 	site_free(site);
270 }
271 
272 void
site_change_time(struct site * parent,struct timeval * tv)273 site_change_time(struct site *parent, struct timeval *tv)
274 {
275 	while (parent != NULL) {
276 		/* only expire from the top */
277 		if (parent->parent == NULL) {
278 			struct timeval tv_timeout;
279 			/* update the expiration time */
280 			timerclear(&tv_timeout);
281 			tv_timeout.tv_sec = STATE_EXPIRATION_TIME;
282 
283 			evtimer_add(&parent->ev_timeout, &tv_timeout);
284 		}
285 
286 		parent->tv_change = *tv;
287 		parent = parent->parent;
288 	}
289 }
290 
291 void
site_disassociate_parent(struct site * site)292 site_disassociate_parent(struct site *site)
293 {
294 	struct timeval tv;
295 	struct site *parent = site->parent;
296 	if (parent == NULL)
297 		return;
298 
299 	TAILQ_REMOVE(&parent->children, site, next);
300 	site->parent = NULL;
301 
302 	/* make sure that we get an expiration time for this site */
303 	gettimeofday(&tv, NULL);
304 	site_change_time(site, &tv);
305 }
306 
307 struct site *
site_find(const char * url)308 site_find(const char *url)
309 {
310 	struct site tmp;
311 
312 	tmp.url = (char *)url;
313 	return (SPLAY_FIND(site_tree, &root, &tmp));
314 }
315 
316 struct site *
site_new(const char * url,const char * parent_url)317 site_new(const char *url, const char *parent_url)
318 {
319 	struct site *site, tmp, *parent = NULL;
320 	struct timeval tv;
321 
322 	tmp.url = (char *)url;
323 	if ((site = SPLAY_FIND(site_tree, &root, &tmp)) != NULL) {
324 		/* we already got a match - what now? */
325 		goto done;
326 	}
327 
328 	if (parent_url != NULL) {
329 		tmp.url = (char *)parent_url;
330 		parent = SPLAY_FIND(site_tree, &root, &tmp);
331 
332 		/* nobody should be able to fake a request */
333 		if (parent == NULL)
334 			return (NULL);
335 	}
336 
337 	if ((site = calloc(1, sizeof(struct site))) == NULL)
338 		err(1, "calloc");
339 
340 	TAILQ_INIT(&site->callbacks);
341 
342 	TAILQ_INIT(&site->children);
343 	if (parent != NULL) {
344 		site->parent = parent;
345 		TAILQ_INSERT_TAIL(&parent->children, site, next);
346 	}
347 
348 	if ((site->url = strdup(url)) == NULL)
349 		err(1, "strdup");
350 
351 	site->danger = site_analyze_danger(site);
352 	if (site->danger == DANGEROUS) {
353 		/* allows us to find callbacks */
354 		site_make_dangerous(site);
355 	}
356 	SPLAY_INSERT(site_tree, &root, site);
357 
358 	evtimer_set(&site->ev_timeout, site_expire, site);
359 	evtimer_set(&site->ev_complete, site_complete, site);
360 
361 done:
362 	/* update the last time a tree was updated */
363 	gettimeofday(&tv, NULL);
364 	site_change_time(site, &tv);
365 
366 	return (site);
367 }
368 
369 void
site_free(struct site * site)370 site_free(struct site *site)
371 {
372 	struct site *child;
373 	struct site_callback *cb;
374 
375 	SPLAY_REMOVE(site_tree, &root, site);
376 
377 	event_del(&site->ev_timeout);
378 	event_del(&site->ev_complete);
379 
380 	while ((child = TAILQ_FIRST(&site->children)) != NULL) {
381 		TAILQ_REMOVE(&site->children, child, next);
382 		child->parent = NULL;
383 		site_free(child);
384 	}
385 
386 	while ((cb = TAILQ_FIRST(&site->callbacks)) != NULL) {
387 		TAILQ_REMOVE(&site->callbacks, cb, next);
388 		(*cb->cb)(site, cb->cb_arg);
389 		free(cb);
390 	}
391 
392 	if (site->parent) {
393 		TAILQ_REMOVE(&site->parent->children, site, next);
394 	}
395 
396 	if (site->virus_result != NULL)
397 		free(site->virus_result);
398 	if (site->firstline != NULL)
399 		free(site->firstline);
400 	if (site->html_data != NULL)
401 		free(site->html_data);
402 	free(site->url);
403 	free(site);
404 }
405 
406 void
site_insert_callback(struct site * site,void (* cb)(struct site *,void *),void * cb_arg)407 site_insert_callback(struct site *site,
408     void (*cb)(struct site *, void *), void *cb_arg)
409 {
410 	struct site_callback *ctx = malloc(sizeof(struct site_callback));
411 	assert(ctx != NULL);
412 
413 	ctx->cb = cb;
414 	ctx->cb_arg = cb_arg;
415 	TAILQ_INSERT_TAIL(&site->callbacks, ctx, next);
416 }
417 
418 #define HTML_PRINT(...) evbuffer_add_printf(databuf, __VA_ARGS__)
419 
420 /* stores the data associated with this site */
421 
422 void
site_cache_data(struct site * site,const struct evhttp_request * req)423 site_cache_data(struct site *site, const struct evhttp_request *req)
424 {
425 	static char firstline[128];
426 
427 	if (site->firstline != NULL)
428 		free(site->firstline);
429 	if (site->html_data != NULL)
430 		free(site->html_data);
431 
432 	fprintf(stderr, "[CACHE] Caching %ld bytes for %s (%s)\n",
433 	    EVBUFFER_LENGTH(req->input_buffer),
434 	    site->url, danger_to_text(site->danger));
435 
436 	site->html_size = EVBUFFER_LENGTH(req->input_buffer);
437 	site->html_data = malloc(site->html_size);
438 	if (site->html_data == NULL)
439 		err(1, "malloc");
440 
441 	memcpy(site->html_data, EVBUFFER_DATA(req->input_buffer),
442 	    site->html_size);
443 
444 	snprintf(firstline, sizeof(firstline), "HTTP/1.%d %d %s",
445 	    req->minor, req->response_code, req->response_code_line);
446 	if ((site->firstline = strdup(firstline)) == NULL)
447 		err(1, "strdup");
448 }
449 
450 
451 const char *
danger_to_text(enum DANGER_TYPES danger)452 danger_to_text(enum DANGER_TYPES danger)
453 {
454 	switch (danger) {
455 	case HARMLESS:
456 		return "harmless";
457 	case DANGEROUS:
458 		return "dangerous";
459 	case UNKNOWN:
460 	default:
461 		return "unknown";
462 	}
463 }
464 
465 static void
site_print_children(struct evbuffer * databuf,struct site * site,enum DANGER_TYPES desired_level)466 site_print_children(struct evbuffer *databuf, struct site *site,
467     enum DANGER_TYPES desired_level)
468 {
469 	struct site *child;
470 	TAILQ_FOREACH(child, &site->children, next) {
471 		if (child->danger != desired_level)
472 			continue;
473 
474 		HTML_PRINT("<li>");
475 		site_print_analysis(databuf, child);
476 		HTML_PRINT("</li>");
477 	}
478 }
479 
480 static void
site_print_analysis(struct evbuffer * databuf,struct site * site)481 site_print_analysis(struct evbuffer *databuf, struct site *site)
482 {
483 	char *uri_escaped = evhttp_encode_uri(site->url);
484 	char *html_escaped = evhttp_htmlescape(site->url);
485 	HTML_PRINT("<span class=%s>%s</span> ",
486 	    danger_to_text(site->danger),
487 	    danger_to_text(site->danger));
488 
489 	HTML_PRINT(
490 		"<a href=\"/cache/?url=%s\" target=\"_blank\">%s</a>"
491 		" <span class=firstline>%s</span>"
492 		" <span class=virus>%s</span>",
493 		uri_escaped, html_escaped,
494 		site->firstline,
495 		site->virus_result != NULL ? site->virus_result : "unknown");
496 
497 	free(uri_escaped);
498 	free(html_escaped);
499 	if (TAILQ_FIRST(&site->children) != NULL) {
500 		HTML_PRINT("<ul>");
501 		site_print_children(databuf, site, DANGEROUS);
502 		site_print_children(databuf, site, UNKNOWN);
503 		site_print_children(databuf, site, HARMLESS);
504 		HTML_PRINT("</ul>");
505 	}
506 }
507 
508 /* code to display status related html */
509 
510 static const char *css_style =
511     ".tiny {\n"
512     "  color: #bbbbcc;\n"
513     "  padding: 2px 0px 0px 2px;"
514     "  margin-bottom: -10em;"
515     "  font-size: 0.5em;\n"
516     "  font-family: Verdana, Arial;\n"
517     "}\n"
518     ".version {\n"
519     "  width: 100%;"
520     "  color: #8888bb;\n"
521     "  padding: 0px 4px 2px 0px;"
522     "  margin-top: -1em;"
523     "  font-size: 0.5em;\n"
524     "  text-align: right;\n"
525     "  font-family: Verdana, Arial;\n"
526     "}\n"
527     ".statistics h1 {\n"
528     "  padding: 3px;"
529     "  font-size: small;\n"
530     "  background-color: #ccccee;\n"
531     "  border: 1px solid;\n"
532     "}\n"
533     ".about {\n"
534     "  width: 90%;\n"
535     "  margin: 10px;\n"
536     "  background-color: #dcdcee;\n"
537     "  font-family: Verdana, Arial;\n"
538     "  border: 1px solid;\n"
539     "  padding: 1em;\n"
540     "}\n"
541     ".about h1 {\n"
542     "  width: 60%;\n"
543     "  background-color: #ddaa66;\n"
544     "  border: 1px solid;\n"
545     "  margin-top: 1em;\n"
546     "  padding-left: 0.5em;\n"
547     "  font-size: 1em;\n"
548     "}\n"
549     ".about p {\n"
550     "  font-size: 0.9em;\n"
551     "  margin-left: 2em;\n"
552     "}\n"
553     ".statistics {\n"
554     "  width: 80%;\n"
555     "  margin: 10px;\n"
556     "  background-color: #dcdcee;\n"
557     "  font-family: Verdana, Arial;\n"
558     "  font-size: 0.8em;\n"
559     "  padding: 1em;\n"
560     "}\n"
561     "table.traffic {"
562     "  width: 300px;"
563     "  border-width: 0px 0px 1px 1px;"
564     "  border-spacing: 2px;"
565     "  border-style: inset;"
566     "  border-color: black;"
567     "  border-collapse: collapse;"
568     "}"
569     "table.traffic td {"
570     "  border-width: 1px 1px 0px 0px;"
571     "  padding: 2px;"
572     "  border-style: inset;"
573     "  border-color: black;"
574     "  background-color: rgb(255, 250, 220);"
575     "  font-family: Verdana, Arial;\n"
576     "  font-size: small;\n"
577     "}\n"
578     "table.sites {"
579     "  border-width: 0px 0px 1px 1px;"
580     "  border-spacing: 2px;"
581     "  border-style: inset;"
582     "  border-color: black;"
583     "  border-collapse: collapse;"
584     "}"
585     "table.sites td {"
586     "  border-width: 1px 1px 0px 0px;"
587     "  padding: 2px;"
588     "  border-style: inset;"
589     "  border-color: black;"
590     "  background-color: rgb(255, 250, 220);"
591     "  font-family: Verdana, Arial;\n"
592     "  font-size: small;\n"
593     "}\n"
594     ".analysis {\n"
595     "  font-family: Verdana, Arial;\n"
596     "  font-size: small;\n"
597     "}\n"
598     ".harmless { color: green }\n"
599     ".unknown { color: orange }\n"
600     ".firstline {\n"
601     "  font-size: 0.9em;\n"
602     "  font-family: Verdana, Arial;\n"
603     "  font-weight: bold }\n"
604     ".virus {\n"
605     "  font-size: 0.9em;\n"
606     "  font-family: Verdana, Arial;\n"
607     "}\n"
608     ".dangerous { color: red }\n"
609     ".banner {\n"
610     "  background: #ddddff;\n"
611     "  font-family: Verdana, Arial;\n"
612     "  border: 1px solid;\n"
613     "}\n"
614     ".banner h1 {\n"
615     "  margin-top: 0em;\n"
616     "  margin-bottom: -0.7em;\n"
617     "  text-align: center;\n"
618     "}\n"
619     ".banner ul {\n"
620     "  margin-bottom: -0.7em;\n"
621     "}\n"
622     ".banner li {\n"
623     "  display: inline;\n"
624     "}\n"
625     "\n"
626     ".footer {\n"
627     "  font-family: Verdana, Arial;\n"
628     "  font-size: 0.7em;\n"
629     "  text-align: center;\n"
630     "}\n";
631 
632 void
css_server(struct evhttp_request * request,void * arg)633 css_server(struct evhttp_request *request, void *arg)
634 {
635 	struct evbuffer *databuf = evbuffer_new();
636 	assert(databuf != NULL);
637 	evhttp_add_header(request->output_headers, "Content-Type", "text/css");
638 	evbuffer_add(databuf, css_style, strlen(css_style));
639 
640 	/* send along our data */
641 	evhttp_send_reply(request, HTTP_OK, "OK", databuf);
642 	evbuffer_free(databuf);
643 }
644 
645 static void
print_blurb(struct evbuffer * databuf)646 print_blurb(struct evbuffer *databuf)
647 {
648 	HTML_PRINT("<div class=footer><hr />\n");
649 	HTML_PRINT("<center>"
650 	    "Copyright (c) 2007 <a href=http://www.citi.umich.edu/u/provos/>"
651 	    "Niels Provos</a>.  All Rights Reserved."
652 	    "</center>\n");
653 	HTML_PRINT("</div>\n");
654 }
655 
656 static void
print_footer(struct evbuffer * databuf)657 print_footer(struct evbuffer *databuf)
658 {
659 	HTML_PRINT("</body></html>");
660 }
661 
662 static void
print_header(struct evbuffer * databuf)663 print_header(struct evbuffer *databuf)
664 {
665 	extern struct spybye_share spybye_share;
666 	extern int behave_as_proxy;
667 
668 	HTML_PRINT(
669 		"<html><head><title>"
670 		"SpyBye: At Your Service"
671 		"</title></head>");
672 	HTML_PRINT("<link rel=stylesheet type=text/css href=/styles/css>\n");
673 
674 	HTML_PRINT(
675 		"<body><div class=banner>\n"
676 		"<span class=tiny>sharing %s</span> "
677 		"<span class=tiny>proxy %s</span>"
678 		"<h1>SpyBye</h1>\n"
679 		"<ul>"
680 		"<li><a href=\"/\">Main</a> </li>"
681 		"<li><a href=\"/stats\">Statistics</a> </li>"
682 		"<li><a href=\"/about\">About</a> </li>"
683 		"</ul>\n"
684 		"<div class=version>Version %s</div>"
685 		"</div>\n",
686 		spybye_share.evcon_report == NULL ?
687 		"disabled" : "enabled",
688 		behave_as_proxy ?
689 		"on" : "off",
690 		VERSION
691 	    );
692 }
693 
694 static void
print_form(struct evbuffer * databuf)695 print_form(struct evbuffer *databuf)
696 {
697 	HTML_PRINT(
698 		"<p><center>\n"
699 		"<form name=\"input\" action=\"/\" method=\"get\">\n"
700 		"Url: <input type=\"text\" name=\"url\" size=100>\n"
701 		"<input type=\"submit\" value=\"Submit\">\n"
702 		"</form></center>");
703 }
704 
705 static void
print_done_sites(struct evbuffer * databuf)706 print_done_sites(struct evbuffer *databuf)
707 {
708 	struct site *site;
709 	extern int behave_as_proxy;
710 
711 	if (SPLAY_ROOT(&root) == NULL)
712 		return;
713 
714 	HTML_PRINT(
715 		"<hr><div class=statistics>\n"
716 		"<h1>Recent Site Analysis</h1>\n"
717 		"<ul>\n");
718 
719 	for (site = SPLAY_MIN(site_tree, &root);
720 	    site != NULL; site = SPLAY_NEXT(site_tree, &root, site)) {
721 		int done;
722 		if (site->parent != NULL)
723 			continue;
724 
725 		/* if we behave as proxy then all sites are done all the time */
726 		done = behave_as_proxy || (site->flags & ANALYSIS_COMPLETE);
727 		if (!done) {
728 			char *html_escaped = evhttp_htmlescape(site->url);
729 			HTML_PRINT(
730 				"<li>"
731 				"<span class=unknown>pending</span> %s"
732 				"</li>", html_escaped);
733 			free(html_escaped);
734 		} else {
735 			char *uri_escaped = evhttp_encode_uri(site->url);
736 			char *html_escaped = evhttp_htmlescape(site->url);
737 			HTML_PRINT(
738 				"<li>"
739 				"<span class=%s>%s</span> "
740 				"<a href=\"/?url=%s&noiframe=1\">%s</a>"
741 				"</li>\n",
742 				danger_to_text(site->danger),
743 				danger_to_text(site->danger),
744 				uri_escaped,
745 				html_escaped);
746 			free(uri_escaped);
747 			free(html_escaped);
748 		}
749 	}
750 
751 	HTML_PRINT("</ul></div>");
752 }
753 
754 static void
main_server(struct evhttp_request * request,void * arg)755 main_server(struct evhttp_request *request, void *arg)
756 {
757 	struct evbuffer *databuf = evbuffer_new();
758 	assert(databuf != NULL);
759 
760 	print_header(databuf);
761 
762 	if (request->uri[0] != '/')
763 		print_form(databuf);
764 
765 	print_done_sites(databuf);
766 
767 	print_blurb(databuf);
768 
769 	print_footer(databuf);
770 
771 	/* send along our data */
772 	evhttp_send_reply(request, HTTP_OK, "OK", databuf);
773 	evbuffer_free(databuf);
774 }
775 
776 static void
format_dangerousload(struct evhttp_request * request,struct evbuffer * databuf,struct dangerousload * dl)777 format_dangerousload(struct evhttp_request *request,
778     struct evbuffer *databuf, struct dangerousload *dl)
779 {
780 	char output[64];
781 	struct tm *tm;
782 	u_int tmp;
783 	time_t seconds;
784 	char *parent_url, *danger_url, *escaped;
785 	char *virus_scan = "unknown";
786 
787 	EVTAG_GET(dl, time_in_seconds, &tmp);
788 	EVTAG_GET(dl, parent_url, &parent_url);
789 	EVTAG_GET(dl, dangerous_url, &danger_url);
790 	if (EVTAG_HAS(dl, virus_result))
791 		EVTAG_GET(dl, virus_result, &virus_scan);
792 
793 	seconds = tmp;
794 	tm = localtime(&seconds);
795 	strftime(output, sizeof(output), "%Y-%m-%d %H:%M:%S", tm);
796 
797 	escaped = evhttp_encode_uri(parent_url);
798 
799 	HTML_PRINT(
800 		"<tr><td><span class=time>%s</span></td>"
801 		"<td><span class=harmless>"
802 		"<a href=\"/?url=%s\">%s</a></span></td>"
803 		"<td><span class=dangerous>%s</span></li></td>"
804 		"<td><span class=harmless>%s</span></li></td>"
805 		"</tr>",
806 		output, escaped, parent_url, danger_url, virus_scan);
807 
808 	free(escaped);
809 }
810 
811 static void
stats_server(struct evhttp_request * request,void * arg)812 stats_server(struct evhttp_request *request, void *arg)
813 {
814 	extern struct dangerq danger;
815 	struct dangerous_container *entry;
816 	struct evbuffer *databuf = evbuffer_new();
817 	char good_time[30], bad_time[30];
818 	struct tm *tm;
819 	int count = 0;
820 	assert(databuf != NULL);
821 
822 	print_header(databuf);
823 
824 	if (request->uri[0] != '/')
825 		print_form(databuf);
826 
827 	HTML_PRINT("<hr />");
828 
829 	/* some basic statistics */
830 	HTML_PRINT(
831 		"<div class=statistics>\n"
832 		"<h1>Traffic Statistics</h1>\n"
833 		"<table><tr><td valign=top>"
834 		"<table class=traffic>"
835 		"<tr><td>Requests</td><td>%d</td></tr>\n"
836 		"<tr><td>Harmless</td><td>%d</td></tr>\n"
837 		"<tr><td>Unknown</td><td>%d</td></tr>\n"
838 		"<tr><td>Dangerous</td><td>%d</td></tr>\n"
839 		"</table></td><td valign=top>",
840 		statistics.num_requests,
841 		statistics.num_harmless,
842 		statistics.num_unknown,
843 		statistics.num_dangerous);
844 
845 	tm = localtime((time_t *)&good_patterns.tv_load.tv_sec);
846 	strftime(good_time, sizeof(good_time), "%Y-%m-%d %H:%M:%S", tm);
847 	tm = localtime((time_t *)&bad_patterns.tv_load.tv_sec);
848 	strftime(bad_time, sizeof(bad_time), "%Y-%m-%d %H:%M:%S", tm);
849 
850 	HTML_PRINT(
851 		"<table class=traffic>"
852 		"<tr><td>Bad Patterns</td><td>%d</td><td>%s</td></tr>\n"
853 		"<tr><td>Good Patterns</td><td>%d</td><td>%s</td></tr>\n"
854 		"</table></td></tr></table>",
855 		bad_patterns.count,
856 		bad_time,
857 		good_patterns.count,
858 		good_time
859 	    );
860 
861 	HTML_PRINT("</div><div style=\"clear:both;\"></div>\n");
862 
863 	HTML_PRINT(
864 		"<div class=statistics>\n"
865 		"<h1>Dangerous Sites</h1>\n"
866 		"<table class=sites>");
867 
868 	TAILQ_FOREACH(entry, &danger, next) {
869 		if (count++ > MAX_RECENT_RESULTS)
870 			break;
871 
872 		format_dangerousload(request, databuf, entry->dl);
873 	}
874 
875 	HTML_PRINT("</table></div>\n");
876 	print_blurb(databuf);
877 
878 	print_footer(databuf);
879 
880 	/* send along our data */
881 	evhttp_send_reply(request, HTTP_OK, "OK", databuf);
882 	evbuffer_free(databuf);
883 }
884 
885 static void
about_server(struct evhttp_request * request,void * arg)886 about_server(struct evhttp_request *request, void *arg)
887 {
888 	struct evbuffer *databuf = evbuffer_new();
889 	assert(databuf != NULL);
890 
891 	print_header(databuf);
892 
893 	if (request->uri[0] != '/')
894 		print_form(databuf);
895 
896 	HTML_PRINT("<hr>"
897 	    "<div class=about>"
898 	    "<h1>What is SpyBye?</h1>"
899 	    "<p>SpyBye is a tool to help web masters determine if their web "
900 	    "pages are hosting browser exploits that can infect visiting "
901 	    "users with malware.  It functions as an HTTP proxy server and "
902 	    "intercepts all browser requests.  SpyBye uses a few simple rules "
903 	    "to determine if embedded links on your web page are harmlesss, "
904 	    "unknown or maybe even dangerous.</p>"
905 	    "<h1>Why did you write SpyBye?</h1>"
906 	    "<p>It has become increasingly common for web sites to get "
907 	    "compromised.  This can happen either due to vulnerable "
908 	    "web applications that you run or due to compromised servers "
909 	    "via vectors completely out of your control.  Nonetheless, it "
910 	    "is important for web masters to be able to tell if their pages "
911 	    "are dangerous to their users.  SpyBye provides a very simple "
912 	    "mechanism to determine how a site works on the HTTP level. "
913 	    "This often gives us clues about potentially dangerous content. "
914 	    "I hope that SpyBye can be of use to anyone who wants to verify "
915 	    "if their web site could be compromised and dangerous.</p>"
916 	    "<p>The unoffical explanation is that I needed some code to "
917 	    "test <a href=http://www.monkey.org/~provos/libevent>"
918 	    "libevent</a>'s HTTP layer; writing a proxy exercises most "
919 	    "of the code paths.</p>"
920 	    "<h1>How does SpyBye work?</h1>"
921 	    "<p>SpyBye operates as a proxy server and gets to see all the "
922 	    "web fetches that your browser makes.   It applies very simple "
923 	    "rules to each URL that is fetched as a result of loading a "
924 	    "web page.  These rules allows us to classify a URL into three "
925 	    "categories: harmless, unknown or dangerous.  Although, there is "
926 	    "great margin of error, the categories allow a web master to "
927 	    "look at the URLs and determine if they should be there or not. "
928 	    "If you see that a URL is being fetched that you would not "
929 	    "expect, it's a good indication you have been copromised.</p>"
930 	    "<h1>Disclaimer</h1>"
931 	    "<p>SpyBye does not protect you from getting exploited yourself. "
932 	    "It tries to take reasonable precautions to avoid infection while "
933 	    "using it.  However, ideally, you would run your browser in a "
934 	    "virtual machine and revert to a clean snapshot when done. "
935 	    "You have been warned.  Today's malware is capable of rendering "
936 	    "your computer unusable - and empty your bank accounts! "
937 	    "<span style=\"font-size: 0.25em\">"
938 	    "THIS SOFTWARE IS PROVIDED BY THE AUTHOR "
939 	    "``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, "
940 	    "BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY "
941 	    "AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO "
942 	    "EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, "
943 	    "INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES "
944 	    "(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE "
945 	    "GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS "
946 	    "INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, "
947 	    "WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING "
948 	    "NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF "
949 	    "THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
950 	    "</span>"
951 	    "</p>"
952 	    "</div>");
953 
954 	print_blurb(databuf);
955 
956 	print_footer(databuf);
957 
958 	/* send along our data */
959 	evhttp_send_reply(request, HTTP_OK, "OK", databuf);
960 	evbuffer_free(databuf);
961 }
962 
963 
964 void
results_server(struct evhttp_request * request,void * arg)965 results_server(struct evhttp_request *request, void *arg)
966 {
967 	struct site *site, tmp;
968 	struct evkeyvalq args;
969 	struct evbuffer *databuf = evbuffer_new();
970 	const char *url = NULL;
971 	int done = 0;
972 	assert(databuf != NULL);
973 
974 	TAILQ_INIT(&args);
975 
976 	evhttp_parse_query(request->uri, &args);
977 
978 	url = evhttp_find_header(&args, "url");
979 
980 	if (url == NULL)
981 		goto fail;
982 
983 	tmp.url = (char *)url;
984 	site = SPLAY_FIND(site_tree, &root, &tmp);
985 
986 	HTML_PRINT(
987 		"<html><head><title>"
988 		"SpyBye: Results</title>\n");
989 	done = site == NULL ||
990 	    (site->flags & ANALYSIS_COMPLETE) ||
991 	    !event_pending(&site->ev_complete, EV_TIMEOUT, NULL);
992 	if (!done)
993 		HTML_PRINT("<meta http-equiv=\"refresh\" content=\"2\">\n");
994 
995 	HTML_PRINT("</head>");
996 	HTML_PRINT("<link rel=stylesheet type=text/css href=/styles/css>\n");
997 
998 	HTML_PRINT("<body>");
999 
1000 	tmp.url = (char *)url;
1001 	if (site != NULL) {
1002 		char *url_escaped = evhttp_htmlescape(site->url);
1003 		HTML_PRINT("%s %s found %d dangerous links.<p>\n",
1004 		    done ? "<span class=harmless>Complete</span>" :
1005 		    "<span class=unknown>Pending</span>",
1006 		    url_escaped, site_count_dangerous(site));
1007 		free(url_escaped);
1008 
1009 		HTML_PRINT("<div class=analysis>\n");
1010 		site_print_analysis(databuf, site);
1011 		HTML_PRINT("</div>\n");
1012 	}
1013 
1014 	if (done) {
1015 		HTML_PRINT("<p>The analysis of this URL is complete. "
1016 		    "Take a look at all URLs that have been marked "
1017 		    "either <b>unknown</b> or <b>dangerous</b>");
1018 	} else {
1019 		/* completion timers fire only in non-proxy mode */
1020 		struct timeval tv;
1021 		gettimeofday(&tv, NULL);
1022 		timersub(&tv, &site->tv_change, &tv);
1023 
1024 		HTML_PRINT("<p>Analysis is going to take %d more seconds.",
1025 		    IDLE_TIME - tv.tv_sec);
1026 	}
1027 
1028 	HTML_PRINT("</body></html>");
1029 
1030 	evhttp_clear_headers(&args);
1031 
1032 	/* send along our data */
1033 	evhttp_send_reply(request, HTTP_OK, "OK", databuf);
1034 	evbuffer_free(databuf);
1035 	return;
1036 
1037 fail:
1038 	evhttp_clear_headers(&args);
1039 	evhttp_send_error(request, HTTP_BADREQUEST, "You must be kidding.");
1040 }
1041 
1042 void
query_server(struct evhttp_request * request,void * arg)1043 query_server(struct evhttp_request *request, void *arg)
1044 {
1045 	static char fixed_url[PATH_MAX];
1046 	struct evkeyvalq args;
1047 	struct evbuffer *databuf = evbuffer_new();
1048 	struct site *site = NULL;
1049 	char *url_relative = NULL;
1050 	const char *no_iframe_arg = NULL;
1051 	const char *url = NULL;
1052 	int no_iframe = 0;
1053 	assert(databuf != NULL);
1054 
1055 	TAILQ_INIT(&args);
1056 
1057 	evhttp_parse_query(request->uri, &args);
1058 
1059 	url = evhttp_find_header(&args, "url");
1060 	no_iframe_arg = evhttp_find_header(&args, "noiframe");
1061 	no_iframe = no_iframe_arg != NULL && strcmp(no_iframe_arg, "1") == 0;
1062 
1063 	print_header(databuf);
1064 
1065 	if (request->uri[0] != '/' && !no_iframe)
1066 		print_form(databuf);
1067 
1068 	print_blurb(databuf);
1069 
1070 	if (url == NULL) {
1071 		HTML_PRINT("Did not receive a URL.  You loose.");
1072 		goto done;
1073 	}
1074 
1075 	if (http_hostportfile(url, NULL, NULL, NULL) == -1) {
1076 		/* if they did not prefix with http://, try to fix for them */
1077 		strlcpy(fixed_url, HTTP_PREFIX, sizeof(fixed_url));
1078 		strlcat(fixed_url, url, sizeof(fixed_url));
1079 		url = fixed_url;
1080 		if (http_hostportfile(url, NULL, NULL, NULL) == -1) {
1081 			HTML_PRINT("Did not receive a URL.  You loose.");
1082 			goto done;
1083 		}
1084 	}
1085 
1086 	site = site_new(url, NULL);
1087 	url_relative = evhttp_encode_uri(url);
1088 
1089 	if (site == NULL)
1090 		goto fail;
1091 
1092 	HTML_PRINT(
1093 		"<iframe src=\"/results/?url=%s\" "
1094 		"width=100%% height=50%%>\n"
1095 		"Missing iframe support</iframe><p>\n",
1096 		url_relative);
1097 	free(url_relative);
1098 
1099 	/*
1100 	 * do not inject an iframe for the site to be tested if the analysis
1101 	 * is complete; or if the user directly requested that no iframe be
1102 	 * displayed.
1103 	 */
1104 	if ((site->flags & ANALYSIS_COMPLETE) == 0 && !no_iframe) {
1105 		char *url_escaped = evhttp_htmlescape(url);
1106 		HTML_PRINT(
1107 			"<iframe src=\"%s\" "
1108 			"width=100%% height=50%% security=restricted>\n"
1109 			"Missing iframe support"
1110 			"</iframe>\n",
1111 			url_escaped);
1112 		free(url_escaped);
1113 
1114 		/* start the completion timer */
1115 		site_complete(-1, 0, site);
1116 	}
1117 
1118 done:
1119 	print_footer(databuf);
1120 
1121 	evhttp_clear_headers(&args);
1122 
1123 	/* send along our data */
1124 	evhttp_send_reply(request, HTTP_OK, "OK", databuf);
1125 	evbuffer_free(databuf);
1126 	return;
1127 
1128 fail:
1129 	evhttp_clear_headers(&args);
1130 	evhttp_send_error(request, HTTP_SERVUNAVAIL, "Another eval is on");
1131 }
1132 
1133 void
cache_server(struct evhttp_request * request,void * arg)1134 cache_server(struct evhttp_request *request, void *arg)
1135 {
1136 	struct timeval tv;
1137 	struct evkeyvalq args;
1138 	struct evbuffer *databuf = evbuffer_new();
1139 	struct site *site;
1140 	const char *url = NULL;
1141 	char *escaped;
1142 	assert(databuf != NULL);
1143 
1144 	TAILQ_INIT(&args);
1145 
1146 	evhttp_parse_query(request->uri, &args);
1147 
1148 	url = evhttp_find_header(&args, "url");
1149 	site = site_find(url);
1150 
1151 	if (site == NULL || site->html_data == NULL)
1152 		goto fail;
1153 
1154 	evhttp_clear_headers(&args);
1155 
1156 	/* somebody showed interst in this page - let it not expire yet */
1157 	gettimeofday(&tv, NULL);
1158 	site_change_time(site, &tv);
1159 
1160 	/* NUL terminate */
1161 	evbuffer_add(databuf, site->html_data, site->html_size);
1162 	evbuffer_add(databuf, "", 1);
1163 
1164 	escaped = evhttp_htmlescape((char *)EVBUFFER_DATA(databuf));
1165 
1166 	evbuffer_drain(databuf, -1);
1167 
1168 	HTML_PRINT("<html><head><title>raw dump</title></head><body>");
1169 	HTML_PRINT("<pre>%s</pre>", escaped);
1170 	free(escaped);
1171 	HTML_PRINT("</body></html>");
1172 
1173 	/* send along our data */
1174 	evhttp_send_reply(request, HTTP_OK, "OK", databuf);
1175 	evbuffer_free(databuf);
1176 	return;
1177 
1178 fail:
1179 	inform_cache_notfound(request, url);
1180 	evhttp_clear_headers(&args);
1181 }
1182 
1183 int
spybye_handle_request(struct evhttp_request * request,void * arg)1184 spybye_handle_request(struct evhttp_request *request, void *arg)
1185 {
1186 	char *host, *uri;
1187 	u_short port;
1188 
1189 	if (http_hostportfile(request->uri, &host, &port, &uri) == -1) {
1190 		/* if it's not fully qualified assume we can just use the uri */
1191 		uri = request->uri;
1192 	}
1193 
1194 	/*
1195 	 * this is a little bit silly, we are not taking advantage of
1196 	 * the http layer dispatch support.
1197 	 */
1198 	if (strcmp(uri, "/styles/css") == 0) {
1199 		css_server(request, arg);
1200 		return (0);
1201 	} else if (strcmp(uri, "/control.js") == 0) {
1202 		serve_control_javascript(request, arg);
1203 		return (0);
1204 	} else if (strcmp(uri, "/") == 0) {
1205 		main_server(request, arg);
1206 		return (0);
1207 	} else if (strcmp(uri, "/stats") == 0) {
1208 		stats_server(request, arg);
1209 		return (0);
1210 	} else if (strcmp(uri, "/about") == 0) {
1211 		about_server(request, arg);
1212 		return (0);
1213 	} else if (strncmp(uri, "/?", 2) == 0) {
1214 		query_server(request, arg);
1215 		return (0);
1216 	} else if (strncmp(uri, "/results/", 9) == 0) {
1217 		results_server(request, arg);
1218 		return (0);
1219 	} else if (strncmp(uri, "/cache/", 7) == 0) {
1220 		cache_server(request, arg);
1221 		return (0);
1222 	}
1223 
1224 	return (-1);
1225 }
1226 
1227 static void
status_free_patterns(struct patternq * head)1228 status_free_patterns(struct patternq *head)
1229 {
1230 	struct pattern *entry;
1231 
1232 	while ((entry = TAILQ_FIRST(head)) != NULL) {
1233 		TAILQ_REMOVE(head, entry, next);
1234 		if (entry->pattern_host != NULL)
1235 			free(entry->pattern_host);
1236 		if (entry->pattern_uri != NULL)
1237 			free(entry->pattern_uri);
1238 		free(entry);
1239 	}
1240 }
1241 
1242 static void
patterns_web_done(struct evhttp_request * request,void * arg)1243 patterns_web_done(struct evhttp_request *request, void *arg)
1244 {
1245 	struct pattern_obj *patterns = arg;
1246 	if (request == NULL || request->response_code != HTTP_OK ||
1247 	    EVBUFFER_LENGTH(request->input_buffer) == 0) {
1248 		fprintf(stderr, "[PATTERN] Failed to read patterns from %s\n",
1249 		    patterns->location);
1250 		return;
1251 	}
1252 
1253 	status_free_patterns(&patterns->head);
1254 	status_patterns(patterns, request->input_buffer);
1255 }
1256 
1257 static void
patterns_refresh(int fd,short what,void * arg)1258 patterns_refresh(int fd, short what, void *arg)
1259 {
1260 	struct timeval tv;
1261 	struct pattern_obj *patterns = arg;
1262 
1263 	timerclear(&tv);
1264 	tv.tv_sec = PATTERN_REFRESH_SECONDS;
1265 	evtimer_add(&patterns->ev_refresh, &tv);
1266 
1267 	fprintf(stderr, "[PATTERN] Refreshing patterns from %s\n",
1268 	    patterns->location);
1269 
1270 	if (strncasecmp(HTTP_PREFIX, patterns->location,
1271 		strlen(HTTP_PREFIX))) {
1272 		/* from file */
1273 		struct evbuffer *data = read_data(patterns->location);
1274 		/* xxx - need to check successful read */
1275 		status_free_patterns(&patterns->head);
1276 		status_patterns(patterns, data);
1277 		evbuffer_free(data);
1278 	} else {
1279 		if (patterns->evcon != NULL)
1280 			evhttp_connection_free(patterns->evcon);
1281 		patterns->evcon = read_from_web_prepare(patterns->location,
1282 		    patterns_web_done, patterns);
1283 	}
1284 }
1285 
1286 void
status_init(const char * goodness,const char * badness)1287 status_init(const char *goodness, const char *badness)
1288 {
1289 	struct timeval tv;
1290 	struct evbuffer *data;
1291 	SPLAY_INIT(&root);
1292 
1293 	good_patterns.location = goodness;
1294 	bad_patterns.location = badness;
1295 
1296 	evtimer_set(&good_patterns.ev_refresh,
1297 	    patterns_refresh, &good_patterns);
1298 	evtimer_set(&bad_patterns.ev_refresh,
1299 	    patterns_refresh, &bad_patterns);
1300 
1301 	timerclear(&tv);
1302 	tv.tv_sec = PATTERN_REFRESH_SECONDS;
1303 	evtimer_add(&good_patterns.ev_refresh, &tv);
1304 	evtimer_add(&bad_patterns.ev_refresh, &tv);
1305 
1306 	/* initial setup of the contents */
1307 	if (strlen(goodness)) {
1308 		data =
1309 		    strncasecmp(HTTP_PREFIX, goodness, strlen(HTTP_PREFIX)) ?
1310 		    read_data(goodness) : read_from_web(goodness);
1311 		status_good_patterns(data);
1312 		evbuffer_free(data);
1313 	}
1314 
1315 	if (strlen(badness)) {
1316 		data =
1317 		    strncasecmp(HTTP_PREFIX, badness, strlen(HTTP_PREFIX)) ?
1318 		    read_data(badness) : read_from_web(badness);
1319 		status_bad_patterns(data);
1320 		evbuffer_free(data);
1321 	}
1322 }
1323 
1324 static int
status_patterns(struct pattern_obj * data,struct evbuffer * databuf)1325 status_patterns(struct pattern_obj *data, struct evbuffer *databuf)
1326 {
1327 	char *line;
1328 	int count = 0;
1329 
1330 	while ((line = evbuffer_readline(databuf)) != NULL) {
1331 		struct pattern *pattern;
1332 		char *host = line, *uri;
1333 		if (line[0] == '#' || !strlen(line))
1334 			continue;
1335 
1336 		uri = strchr(line, ' ');
1337 		if (uri != NULL)
1338 			*uri++ = '\0';
1339 
1340 		pattern = malloc(sizeof(struct pattern));
1341 		if (pattern == NULL)
1342 			err(1, "malloc");
1343 		pattern->pattern_host = strdup(host);
1344 		if (uri != NULL)
1345 			pattern->pattern_uri = strdup(uri);
1346 		else
1347 			pattern->pattern_uri = NULL;
1348 
1349 		if (pattern->pattern_host == NULL ||
1350 		    (uri != NULL && pattern->pattern_uri == NULL))
1351 			err(1, "strdup");
1352 
1353 		TAILQ_INSERT_TAIL(&data->head, pattern, next);
1354 		DNFPRINTF(2,(stderr, "[PATTERN] Adding pattern: %s/%s\n",
1355 			host, uri));
1356 		count++;
1357 
1358 		free(line);
1359 	}
1360 	data->count = count;
1361 	gettimeofday(&data->tv_load, NULL);
1362 
1363 	return (count);
1364 }
1365 
1366 void
status_good_patterns(struct evbuffer * data)1367 status_good_patterns(struct evbuffer *data)
1368 {
1369 	int count;
1370 	TAILQ_INIT(&good_patterns.head);
1371 	count = status_patterns(&good_patterns, data);
1372 
1373 	fprintf(stderr, "[PATTERN] Added %d good patterns\n", count);
1374 }
1375 
1376 void
status_bad_patterns(struct evbuffer * data)1377 status_bad_patterns(struct evbuffer *data)
1378 {
1379 	int count;
1380 	TAILQ_INIT(&bad_patterns.head);
1381 	count = status_patterns(&bad_patterns, data);
1382 
1383 	fprintf(stderr, "[PATTERN] Added %d bad patterns\n", count);
1384 }
1385 
1386 
1387 /* error messaging */
1388 
1389 static void
inform_cache_notfound(struct evhttp_request * request,const char * url)1390 inform_cache_notfound(struct evhttp_request *request, const char *url)
1391 {
1392 	struct evbuffer *databuf = evbuffer_new();
1393 	char *html_escaped = evhttp_htmlescape(url != NULL ? url : "<unknown>");
1394 	assert(databuf != NULL);
1395 
1396 	evbuffer_add_printf(databuf,
1397 	    "<html><head><title>Cache Not Found</title></head>"
1398 	    "<body><div style=\"border: solid 1px; padding: 2px; "
1399 	    "width: 40%%; "
1400 	    "background-color: #dcdcee; font-family: Verdana, Arial;\">"
1401 	    "<h2>Cache Not Found</h2>\n"
1402 	    "The URL %s that you requested could not be found in the cache. "
1403 	    "It's possible that it could not be fetched from its web "
1404 	    "server.</div></body></html>",
1405 	    html_escaped);
1406 	free(html_escaped);
1407 
1408 	/* we cannot allow this request */
1409 	evhttp_send_reply(request, HTTP_NOTFOUND, "Not Found", databuf);
1410 	evbuffer_free(databuf);
1411 }
1412