1 /* visitors -- very fast web logs analyzer.
2  *
3  * Copyright (C) 2004-2006 Salvatore Sanfilippo <antirez@invece.org>
4  * All Rights Reserved.
5  *
6  * This software is released under the terms of the GPL license version 2.
7  * Read the COPYING file in this distribution for more details. */
8 
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <time.h>
13 #include <stdarg.h>
14 #include <errno.h>
15 #include <locale.h>
16 #include <ctype.h>
17 
18 #include "aht.h"
19 #include "antigetopt.h"
20 #include "sleep.h"
21 #include "blacklist.h"
22 
23 /* Max length of an error stored in the visitors handle */
24 #define VI_ERROR_MAX 1024
25 /* Max length of a log line */
26 #define VI_LINE_MAX 4096
27 /* Max number of filenames in the command line */
28 #define VI_FILENAMES_MAX 1024
29 /* Max number of prefixes in the command line */
30 #define VI_PREFIXES_MAX 1024
31 /* Max number of --grep --exclude patterns in the command line */
32 #define VI_GREP_PATTERNS_MAX 1024
33 /* Abbreviation length for HTML outputs */
34 #define VI_HTML_ABBR_LEN 100
35 /* Version as a string */
36 #define VI_DATE_MAX 64
37 /* Max length of a log entry date */
38 #define VI_VERSION_STR "0.7"
39 
40 /*------------------------------- data structures ----------------------------*/
41 
42 /* visitors handle */
43 struct vih {
44 	int startt;
45 	int endt;
46 	int processed;
47 	int invalid;
48         int blacklisted;
49 	int hour[24];
50 	int weekday[7];
51 	int weekdayhour[7][24]; /* hour and weekday combined data */
52 	int monthday[12][31]; /* month and day combined data */
53 	struct hashtable visitors;
54 	struct hashtable googlevisitors;
55 	struct hashtable pages;
56 	struct hashtable images;
57 	struct hashtable error404;
58 	struct hashtable pageviews;
59 	struct hashtable pageviews_grouped;
60 	struct hashtable referers;
61 	struct hashtable referersage;
62 	struct hashtable date;
63 	struct hashtable googledate;
64         struct hashtable adsensed;
65 	struct hashtable month;
66 	struct hashtable googlemonth;
67 	struct hashtable agents;
68 	struct hashtable googled;
69 	struct hashtable googlevisits;
70 	struct hashtable googlekeyphrases;
71 	struct hashtable googlekeyphrasesage;
72 	struct hashtable trails;
73 	struct hashtable tld;
74 	struct hashtable os;
75 	struct hashtable browsers;
76 	struct hashtable robots;
77         struct hashtable googlehumanlanguage;
78         struct hashtable screenres;
79         struct hashtable screendepth;
80 	char *error;
81 };
82 
83 /* info associated with a line of log */
84 struct logline {
85 	char *host;
86 	char *date;
87 	char *hour;
88 	char *timezone;
89 	char *req;
90 	char *ref;
91 	char *agent;
92 	time_t time;
93 	struct tm tm;
94 };
95 
96 /* output module structure. See below for the definition of
97  * the text and html output modules. */
98 struct outputmodule {
99 	void (*print_header)(FILE *fp);
100 	void (*print_footer)(FILE *fp);
101 	void (*print_title)(FILE *fp, char *title);
102 	void (*print_subtitle)(FILE *fp, char *title);
103 	void (*print_numkey_info)(FILE *fp, char *key, int val);
104 	void (*print_keykey_entry)(FILE *fp, char *key1, char *key2, int num);
105 	void (*print_numkey_entry)(FILE *fp, char *key, int val, char *link,
106 			int num);
107 	void (*print_numkeybar_entry)(FILE *fp, char *key, int max, int tot,
108 			int this);
109 	void (*print_numkeycomparativebar_entry)(FILE *fp, char *key, int tot,
110 			int this);
111 	void (*print_bidimentional_map)(FILE *fp, int xlen, int ylen,
112 			char **xlabel, char **ylabel, int *value);
113 	void (*print_hline)(FILE *fp);
114 	void (*print_credits)(FILE *fp);
115 	void (*print_report_link)(FILE *fp, char *report);
116 };
117 
118 /* Just a string with cached length */
119 struct vistring {
120 	char *str;
121 	int len;
122 };
123 
124 /* Grep pattern for --grep --exclude */
125 #define VI_PATTERNTYPE_GREP 0
126 #define VI_PATTERNTYPE_EXCLUDE 1
127 struct greppat {
128     int type;
129     char *pattern;
130 };
131 
132 /* ---------------------- global configuration parameters ------------------- */
133 int Config_debug = 0;
134 int Config_max_referers = 20;
135 int Config_max_referers_age = 20;
136 int Config_max_pages = 20;
137 int Config_max_images = 20;
138 int Config_max_error404 = 20;
139 int Config_max_agents = 20;
140 int Config_max_googled = 20;
141 int Config_max_adsensed = 20;
142 int Config_max_google_keyphrases = 20;
143 int Config_max_google_keyphrases_age = 20;
144 int Config_max_trails = 20;
145 int Config_max_tld = 20;
146 int Config_max_robots = 20;
147 int Config_process_agents = 0;
148 int Config_process_google = 0;
149 int Config_process_google_keyphrases = 0;
150 int Config_process_google_keyphrases_age = 0;
151 int Config_process_google_human_language = 0;
152 int Config_process_web_trails = 0;
153 int Config_process_weekdayhour_map = 0;
154 int Config_process_monthday_map = 0;
155 int Config_process_referers_age = 0;
156 int Config_process_tld = 0;
157 int Config_process_os = 0;
158 int Config_process_browsers = 0;
159 int Config_process_error404 = 0;
160 int Config_process_pageviews = 0;
161 int Config_process_monthly_visitors = 1;
162 int Config_process_robots = 0;
163 int Config_process_screen_info = 0;
164 int Config_graphviz_mode = 0;
165 int Config_graphviz_ignorenode_google = 0;
166 int Config_graphviz_ignorenode_external = 0;
167 int Config_graphviz_ignorenode_noreferer = 0;
168 int Config_tail_mode = 0;
169 int Config_stream_mode = 0;
170 int Config_update_every = 60*10; /* update every 10 minutes for default. */
171 int Config_reset_every = 0;	/* never reset for default */
172 int Config_time_delta = 0;	/* adjustable time difference */
173 int Config_filter_spam = 0;
174 int Config_ignore_404 = 0;
175 char *Config_output_file = NULL; /* stdout if not set. */
176 struct outputmodule *Output = NULL; /* intialized to 'text' in main() */
177 
178 /* Prefixes */
179 int Config_prefix_num = 0;	/* number of set prefixes */
180 struct vistring Config_prefix[VI_PREFIXES_MAX];
181 
182 /* Grep/Exclude array */
183 struct greppat Config_grep_pattern[VI_GREP_PATTERNS_MAX];
184 int Config_grep_pattern_num = 0;    /* number of set patterns */
185 
186 /*----------------------------------- Tables ---------------------------------*/
187 static char *vi_wdname[7] = {"Mo", "Tu", "We", "Th", "Fr", "Sa", "Su"};
188 #if 0
189 static int vi_monthdays[12] = {31, 29, 31, 30, 31, 30 , 31, 31, 30, 31, 30, 31};
190 #endif
191 
192 /* -------------------------------- prototypes ------------------------------ */
193 void vi_clear_error(struct vih *vih);
194 void vi_tail(int filec, char **filev);
195 
196 /*------------------- Options parsing help functions ------------------------ */
ConfigAddGrepPattern(char * pattern,int type)197 void ConfigAddGrepPattern(char *pattern, int type)
198 {
199     char *s;
200     int len = strlen(pattern);
201 
202     if (Config_grep_pattern_num == VI_GREP_PATTERNS_MAX) {
203         fprintf(stderr, "Too many grep/exclude options specified\n");
204         exit(1);
205     }
206     s = malloc(strlen(pattern)+3);
207     s[0] = '*';
208     memcpy(s+1, pattern, len);
209     s[len+1] = '*';
210     s[len+2] = '\0';
211     Config_grep_pattern[Config_grep_pattern_num].type = type;
212     Config_grep_pattern[Config_grep_pattern_num].pattern = s;
213     Config_grep_pattern_num++;
214 }
215 
216 /*------------------------------ support functions -------------------------- */
217 /* Returns non-zero if the link seems like a google link, zero otherwise.
218  * Note that this function only checks for a prefix of www.google.<something>.
219  * so may be fooled. */
vi_is_google_link(char * s)220 int vi_is_google_link(char *s)
221 {
222 	return !strncmp(s, "http://www.google.", 18);
223 }
224 
225 /* Returns non-zero if the user agent appears to be the GoogleBot. */
vi_is_googlebot_agent(char * agent)226 int vi_is_googlebot_agent(char *agent) {
227 	if (strstr(agent, "Googlebot") ||
228             strstr(agent, "googlebot")) return 1;
229         return 0;
230 }
231 
232 /* Returns non-zero if the user agent appears to be the Mediapartners-Google. */
vi_is_adsensebot_agent(char * agent)233 int vi_is_adsensebot_agent(char *agent) {
234 	if (strstr(agent, "Mediapartners-Google")) return 1;
235         return 0;
236 }
237 
vi_is_yahoobot_agent(char * agent)238 int vi_is_yahoobot_agent(char *agent) {
239         if (strstr(agent, "Yahoo! Slurp")) return 1;
240         return 0;
241 }
242 
vi_is_msbot_agent(char * agent)243 int vi_is_msbot_agent(char *agent) {
244         if (strstr(agent, "msn.com/msnbot.htm")) return 1;
245         return 0;
246 }
247 
248 /* Try to guess if a given agent string is about a crawler/bot
249  * of some time. This function MUST be conservative, because
250  * false negatives are acceptable while false positives arent. */
vi_is_genericbot_agent(char * agent)251 int vi_is_genericbot_agent(char *agent) {
252         if (strstr(agent, "crawler") ||
253             strstr(agent, "Crawler") ||
254             strstr(agent, "bot/") ||
255             strstr(agent, "Bot/") ||
256             strstr(agent, "bot.htm") ||
257             strstr(agent, "+http://")) return 1;
258         return 0;
259 }
260 
vi_is_bot_agent(char * agent)261 int vi_is_bot_agent(char *agent) {
262     if (vi_is_googlebot_agent(agent) ||
263         vi_is_adsensebot_agent(agent) ||
264         vi_is_yahoobot_agent(agent) ||
265         vi_is_msbot_agent(agent)) return 1;
266     return 0;
267 }
268 
269 /* Returns non-zero if the url matches some user-specified prefix.
270  * being a link "internal" to the site. Otherwise zero is returned.
271  *
272  * When there is a match, the value returned is the length of
273  * the matching prefix. */
vi_is_internal_link(char * url)274 int vi_is_internal_link(char *url)
275 {
276 	int i, l;
277 
278 	if (!Config_prefix_num) return 0; /* no prefixes set? */
279 	l = strlen(url);
280 	for (i = 0; i < Config_prefix_num; i++) {
281 		if (Config_prefix[i].len <= l &&
282 		    !strncasecmp(url, Config_prefix[i].str,
283 			    Config_prefix[i].len))
284 		{
285 			return Config_prefix[i].len;
286 		}
287 	}
288 	return 0;
289 }
290 
291 /* returns non-zero if the URL 's' seems an image or a CSS file. */
vi_is_image(char * s)292 int vi_is_image(char *s)
293 {
294 	int l = strlen(s);
295 	char *end = s + l; /* point to the nul term */
296 
297 	if (l < 5) return 0;
298 	if (!memcmp(end-4, ".css", 4) ||
299 	    !memcmp(end-4, ".jpg", 4) ||
300 	    !memcmp(end-4, ".gif", 4) ||
301 	    !memcmp(end-4, ".png", 4) ||
302 	    !memcmp(end-4, ".ico", 4) ||
303 	    !memcmp(end-4, ".swf", 4) ||
304 	    !memcmp(end-3, ".js", 3) ||
305 	    !memcmp(end-5, ".jpeg", 5) ||
306 	    !memcmp(end-4, ".CSS", 4) ||
307 	    !memcmp(end-4, ".JPG", 4) ||
308 	    !memcmp(end-4, ".GIF", 4) ||
309 	    !memcmp(end-4, ".PNG", 4) ||
310 	    !memcmp(end-4, ".ICO", 4) ||
311 	    !memcmp(end-4, ".SWF", 4) ||
312 	    !memcmp(end-3, ".JS", 3) ||
313 	    !memcmp(end-5, ".JPEG", 5)) return 1;
314 	return 0;
315 }
316 
317 /* returns non-zero if the URL 's' seems a real page. */
vi_is_pageview(char * s)318 int vi_is_pageview(char *s)
319 {
320 	int l = strlen(s);
321 	char *end = s + l; /* point to the nul term */
322 	char *dot, *slash;
323 
324 	if (s[l-1] == '/') return 1;
325 	if (l >= 6 &&
326 	    (!memcmp(end-5, ".html", 5) ||
327 	    !memcmp(end-4, ".htm", 4) ||
328 	    !memcmp(end-4, ".php", 4) ||
329 	    !memcmp(end-4, ".asp", 4) ||
330 	    !memcmp(end-4, ".jsp", 4) ||
331 	    !memcmp(end-4, ".xdl", 4) ||
332 	    !memcmp(end-5, ".xhtml", 5) ||
333 	    !memcmp(end-4, ".xml", 4) ||
334 	    !memcmp(end-4, ".cgi", 4) ||
335 	    !memcmp(end-3, ".pl", 3) ||
336 	    !memcmp(end-6, ".shtml", 6) ||
337 	    !memcmp(end-5, ".HTML", 5) ||
338 	    !memcmp(end-4, ".HTM", 4) ||
339 	    !memcmp(end-4, ".PHP", 4) ||
340 	    !memcmp(end-4, ".ASP", 4) ||
341 	    !memcmp(end-4, ".JSP", 4) ||
342 	    !memcmp(end-4, ".XDL", 4) ||
343 	    !memcmp(end-6, ".XHTML", 6) ||
344 	    !memcmp(end-4, ".XML", 4) ||
345 	    !memcmp(end-4, ".CGI", 4) ||
346 	    !memcmp(end-3, ".PL", 3) ||
347 	    !memcmp(end-6, ".SHTML", 6))) return 1;
348 	dot = strrchr(s, '.');
349 	if (!dot) return 1;
350 	slash = strrchr(s, '/');
351 	if (slash && slash > dot) return 1;
352 	return 0;
353 }
354 
355 /* returns non-zero if 'ip' seems a string representing an IP address
356  * like "1.2.3.4". Note that 'ip' is always an IP or an hostname
357  * so this function actually test if the string pointed by 'ip' only
358  * contains characters in the "[0-9.]" set */
vi_is_numeric_address(char * ip)359 int vi_is_numeric_address(char *ip)
360 {
361 	unsigned int l = strlen(ip);
362 	return strspn(ip, "0123456789.") == l;
363 }
364 
365 /* returns the time converted into a time_t value.
366  * On error (time_t) -1 is returned.
367  * Note that this function is specific for the following format:
368  * "10/May/2004:04:15:33". Works if the month is not an abbreviation, or if the
369  * year is abbreviated to only the last two digits.
370  * The time can be omitted like in "10/May/2004". */
parse_date(char * s,struct tm * tmptr)371 time_t parse_date(char *s, struct tm *tmptr)
372 {
373 	struct tm tm;
374 	time_t t;
375 	char *months[] = {
376 		"jan", "feb", "mar", "apr", "may", "jun",
377 		"jul", "aug", "sep", "oct", "nov", "dec",
378 	};
379 	char *day, *month, *year, *time = NULL;
380 	char monthaux[32];
381 	int i, len;
382 
383 	/* make a copy to mess with it */
384 	len = strlen(s);
385 	if (len >= 32) goto fmterr;
386 	memcpy(monthaux, s, len);
387 	monthaux[len] = '\0';
388 
389 	/* Inizialize the tm structure. We just fill three fields */
390 	tm.tm_sec = 0;
391 	tm.tm_min = 0;
392 	tm.tm_hour = 0;
393 	tm.tm_mday = 0;
394 	tm.tm_mon = 0;
395 	tm.tm_year = 0;
396 	tm.tm_wday = 0;
397 	tm.tm_yday = 0;
398 	tm.tm_isdst = -1;
399 
400 	/* search delimiters */
401 	day = monthaux;
402 	if ((month = strchr(day, '/')) == NULL) goto fmterr;
403 	*month++ = '\0';
404 	if ((year = strchr(month, '/')) == NULL) goto fmterr;
405 	*year++ = '\0';
406 	/* time, optional for this parser. */
407 	if ((time = strchr(year, ':')) != NULL) {
408 		*time++ = '\0';
409 	}
410 	/* convert day */
411 	tm.tm_mday = atoi(day);
412 	if (tm.tm_mday < 1 || tm.tm_mday > 31) goto fmterr;
413 	/* convert month */
414 	if (strlen(month) < 3) goto fmterr;
415 	month[0] = tolower(month[0]);
416 	month[1] = tolower(month[1]);
417 	month[2] = tolower(month[2]);
418 	for (i = 0; i < 12; i++) {
419 		if (memcmp(month, months[i], 3) == 0) break;
420 	}
421 	if (i == 12) goto fmterr;
422 	tm.tm_mon = i;
423 	/* convert year */
424 	tm.tm_year = atoi(year);
425 	if (tm.tm_year > 100) {
426 		if (tm.tm_year < 1900 || tm.tm_year > 2500) goto fmterr;
427 		tm.tm_year -= 1900;
428 	} else {
429 		/* if the year is in two-digits form, the 0 - 68 range
430 		 * is converted to 2000 - 2068 */
431 		if (tm.tm_year < 69)
432 			tm.tm_year += 100;
433 	}
434 	/* convert time */
435 	if (time) { /* format is HH:MM:SS */
436 		if (strlen(time) < 8) goto fmterr;
437 		tm.tm_hour = ((time[0]-'0')*10)+(time[1]-'0');
438 		if (tm.tm_hour < 0 || tm.tm_hour > 23) goto fmterr;
439 		tm.tm_min = ((time[3]-'0')*10)+(time[4]-'0');
440 		if (tm.tm_min < 0 || tm.tm_min > 59) goto fmterr;
441 		tm.tm_sec = ((time[6]-'0')*10)+(time[7]-'0');
442 		if (tm.tm_sec < 0 || tm.tm_sec > 60) goto fmterr;
443 	}
444 	t = mktime(&tm);
445 	if (t == (time_t)-1) goto fmterr;
446 	t += (Config_time_delta*3600);
447 	if (tmptr) {
448 		struct tm *auxtm;
449 
450 		if ((auxtm = localtime(&t)) != NULL)
451 			*tmptr = *auxtm;
452 	}
453 	return t;
454 
455 fmterr: /* format error */
456 	return (time_t) -1;
457 }
458 
459 /* returns 1 if the given date is Saturday or Sunday.
460  * Zero is otherwise returned. */
vi_is_weekend(char * s)461 int vi_is_weekend(char *s)
462 {
463 	struct tm tm;
464 
465 	if (parse_date(s, &tm) != (time_t)-1) {
466 		if (tm.tm_wday == 0 || tm.tm_wday == 6)
467 			return 1;
468 	}
469 	return 0;
470 }
471 
472 #if 0
473 /* Returns true if 'year' is a leap year. */
474 int isleap(int year)
475 {
476 	int conda, condb, condc;
477 
478 	conda = (year%4) == 0;
479 	condb = (year%100) == 0;
480 	condc = (year%400) == 0;
481 	return conda && !(condb && !condc);
482 }
483 #endif
484 
485 /* URL decoding and white spaces trimming function.
486  * Input: the encoded string 's'.
487  * Output: the decoded string written at 'd' that has room for at least 'n'
488  * bytes of data. */
vi_urldecode(char * d,char * s,int n)489 void vi_urldecode(char *d, char *s, int n)
490 {
491 	char *start = d;
492 	if (n < 1) return;
493 	while(*s && n > 1) {
494 		int c = *s;
495 		switch(c) {
496 		case '+': c = ' '; break;
497 		case '%':
498 			  if (*(s+1) && *(s+2)) {
499 				  int high = toupper(*(s+1));
500 				  int low = toupper(*(s+2));
501 
502 				  if (high <= '9') high -= '0';
503 				  else high = (high - 'A') + 10;
504 				  if (low <= '9') low -= '0';
505 				  else low = (low - 'A') + 10;
506 				  c = (high << 4)+low;
507 				  s += 2;
508 			  }
509 			  break;
510 		}
511 		if (c != ' ' || d != start) {
512 			*d++ = c;
513 			n--;
514 		}
515 		s++;
516 	}
517 	/* Right trim */
518 	*d = '\0';
519 	d--;
520 	while (d >= start && *d == ' ') {
521 		*d = '\0';
522 		d--;
523 	}
524 }
525 
526 /* URL encoding function
527  * Input: the unencoded string 's'.
528  * Output: the url-encoded string written at 'd' that has room for at least 'n'
529  * bytes of data. */
vi_urlencode(char * d,char * s,int n)530 void vi_urlencode(char *d, char *s, int n)
531 {
532 	if (n < 1) return;
533 	n--;
534 	while(*s && n > 0) {
535 		int c = *s;
536 		if ((c >= 'A' && c <= 'Z') ||
537 		    (c >= 'a' && c <= 'z') ||
538 		    (c >= '0' && c <= '9'))
539 		{
540 			*d++ = c;
541 			n--;
542 		} else if (c == ' ') {
543 			*d++ = '+';
544 			n--;
545 		} else if (c == '\n') {
546 			if (n < 6) break;
547 			memcpy(d, "%0d%0a", 6);
548 			d += 6;
549 			n -= 6;
550 		} else {
551 			unsigned int t;
552 			char *hexset = "0123456789abcdef";
553 
554 			if (n < 3) break;
555 			t = (unsigned) c;
556 			*d++ = '%';
557 			*d++ = hexset [(t & 0xF0) >> 4];
558 			*d++ = hexset [(t & 0x0F)];
559 			n -= 3;
560 		}
561 		s++;
562 	}
563 	*d = '\0';
564 }
565 
566 /* Convert a nul-term string to lowercase in place */
vi_strtolower(char * s)567 void vi_strtolower(char *s)
568 {
569 	while (*s) {
570 		*s = tolower(*s);
571 		s++;
572 	}
573 }
574 
575 /* Note: the following function strlcat and strlcpy are (possibly) modified
576  * version of OpenBSD's functions. Original copyright notice:
577  * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
578  * Originally under the BSD license. */
vi_strlcpy(char * dst,char * src,int siz)579 int vi_strlcpy(char *dst, char *src, int siz)
580 {
581         char *d = dst;
582         const char *s = src;
583         int n = siz;
584 
585         /* Copy as many bytes as will fit */
586         if (n != 0 && --n != 0) {
587                 do {
588                         if ((*d++ = *s++) == 0)
589                                 break;
590                 } while (--n != 0);
591         }
592         /* Not enough room in dst, add NUL and traverse rest of src */
593         if (n == 0) {
594                 if (siz != 0)
595                         *d = '\0';              /* NUL-terminate dst */
596                 while (*s++)
597                         ;
598         }
599         return(s - src - 1);    /* count does not include NUL */
600 }
601 
vi_strlcat(char * dst,const char * src,int siz)602 int vi_strlcat(char *dst, const char *src, int siz)
603 {
604         char *d = dst;
605         const char *s = src;
606         size_t n = siz;
607         size_t dlen;
608 
609         /* Find the end of dst and adjust bytes left but don't go past end */
610         while (n-- != 0 && *d != '\0')
611                 d++;
612         dlen = d - dst;
613         n = siz - dlen;
614 
615         if (n == 0)
616                 return(dlen + strlen(s));
617         while (*s != '\0') {
618                 if (n != 1) {
619                         *d++ = *s;
620                         n--;
621                 }
622                 s++;
623         }
624         *d = '\0';
625 
626         return(dlen + (s - src));       /* count does not include NUL */
627 }
628 
629 /* Returns non-zero if the url matches one of the keywords in
630  * blacklist.h, otherwise zero is returned. Warning!!! This function
631  * run time is proportional to the size of blacklist.h, so it is
632  * very slow. */
vi_is_blacklisted_url(struct vih * vih,char * url)633 int vi_is_blacklisted_url(struct vih *vih, char *url)
634 {
635     unsigned int i;
636 
637     for (i = 0; i < VI_BLACKLIST_LEN; i++) {
638         if (strstr(url, vi_blacklist[i])) {
639             vih->blacklisted++;
640             return 1;
641         }
642     }
643     return 0;
644 }
645 
646 /* Glob-style pattern matching. */
vi_match_len(const char * pattern,int patternLen,const char * string,int stringLen,int nocase)647 int vi_match_len(const char *pattern, int patternLen,
648         const char *string, int stringLen, int nocase)
649 {
650     while(patternLen) {
651         switch(pattern[0]) {
652         case '*':
653             while (pattern[1] == '*') {
654                 pattern++;
655                 patternLen--;
656             }
657             if (patternLen == 1)
658                 return 1; /* match */
659             while(stringLen) {
660                 if (vi_match_len(pattern+1, patternLen-1,
661                             string, stringLen, nocase))
662                     return 1; /* match */
663                 string++;
664                 stringLen--;
665             }
666             return 0; /* no match */
667             break;
668         case '?':
669             if (stringLen == 0)
670                 return 0; /* no match */
671             string++;
672             stringLen--;
673             break;
674         case '[':
675         {
676             int not, match;
677 
678             pattern++;
679             patternLen--;
680             not = pattern[0] == '^';
681             if (not) {
682                 pattern++;
683                 patternLen--;
684             }
685             match = 0;
686             while(1) {
687                 if (pattern[0] == '\\') {
688                     pattern++;
689                     patternLen--;
690                     if (pattern[0] == string[0])
691                         match = 1;
692                 } else if (pattern[0] == ']') {
693                     break;
694                 } else if (patternLen == 0) {
695                     pattern--;
696                     patternLen++;
697                     break;
698                 } else if (pattern[1] == '-' && patternLen >= 3) {
699                     int start = pattern[0];
700                     int end = pattern[2];
701                     int c = string[0];
702                     if (start > end) {
703                         int t = start;
704                         start = end;
705                         end = t;
706                     }
707                     if (nocase) {
708                         start = tolower(start);
709                         end = tolower(end);
710                         c = tolower(c);
711                     }
712                     pattern += 2;
713                     patternLen -= 2;
714                     if (c >= start && c <= end)
715                         match = 1;
716                 } else {
717                     if (!nocase) {
718                         if (pattern[0] == string[0])
719                             match = 1;
720                     } else {
721                         if (tolower((int)pattern[0]) == tolower((int)string[0]))
722                             match = 1;
723                     }
724                 }
725                 pattern++;
726                 patternLen--;
727             }
728             if (not)
729                 match = !match;
730             if (!match)
731                 return 0; /* no match */
732             string++;
733             stringLen--;
734             break;
735         }
736         case '\\':
737             if (patternLen >= 2) {
738                 pattern++;
739                 patternLen--;
740             }
741             /* fall through */
742         default:
743             if (!nocase) {
744                 if (pattern[0] != string[0])
745                     return 0; /* no match */
746             } else {
747                 if (tolower((int)pattern[0]) != tolower((int)string[0]))
748                     return 0; /* no match */
749             }
750             string++;
751             stringLen--;
752             break;
753         }
754         pattern++;
755         patternLen--;
756         if (stringLen == 0) {
757             while(*pattern == '*') {
758                 pattern++;
759                 patternLen--;
760             }
761             break;
762         }
763     }
764     if (patternLen == 0 && stringLen == 0)
765         return 1;
766     return 0;
767 }
768 
769 /* Like vi_match_len but more handly if used against nul-term strings. */
vi_match(const char * pattern,const char * string,int nocase)770 int vi_match(const char *pattern, const char *string, int nocase)
771 {
772     int patternLen = strlen(pattern);
773     int stringLen = strlen(string);
774     return vi_match_len(pattern, patternLen, string, stringLen, nocase);
775 }
776 
777 /*-------------------------- visitors handler functions --------------------- */
778 /* Init the hashtable with methods suitable for an "occurrences counter" */
vi_ht_init(struct hashtable * ht)779 void vi_ht_init(struct hashtable *ht)
780 {
781 	ht_init(ht);
782 	ht_set_hash(ht, ht_hash_string);
783 	ht_set_key_destructor(ht, ht_destructor_free);
784 	ht_set_val_destructor(ht, ht_no_destructor);
785 	ht_set_key_compare(ht, ht_compare_string);
786 }
787 
788 /* Reset the weekday/hour info in the visitors handler. */
vi_reset_combined_maps(struct vih * vih)789 void vi_reset_combined_maps(struct vih *vih)
790 {
791 	int i, j;
792 
793 	for (i = 0; i < 24; i++) {
794 		vih->hour[i] = 0;
795 		for (j = 0; j < 7; j++)
796 			vih->weekdayhour[j][i] = 0;
797 	}
798 	for (i = 0; i < 7; i++) vih->weekday[i] = 0;
799 	for (i = 0; i < 31; i++)
800 		for (j = 0; j < 12; j++)
801 			vih->monthday[j][i] = 0;
802 }
803 
804 /* Reset the hashtables from the handler, that are left
805  * in a reusable state (but all empty). */
vi_reset_hashtables(struct vih * vih)806 void vi_reset_hashtables(struct vih *vih)
807 {
808 	ht_destroy(&vih->visitors);
809 	ht_destroy(&vih->googlevisitors);
810 	ht_destroy(&vih->pages);
811 	ht_destroy(&vih->images);
812 	ht_destroy(&vih->error404);
813 	ht_destroy(&vih->pageviews);
814 	ht_destroy(&vih->pageviews_grouped);
815 	ht_destroy(&vih->referers);
816 	ht_destroy(&vih->referersage);
817 	ht_destroy(&vih->agents);
818 	ht_destroy(&vih->googled);
819 	ht_destroy(&vih->adsensed);
820 	ht_destroy(&vih->googlekeyphrases);
821 	ht_destroy(&vih->googlekeyphrasesage);
822 	ht_destroy(&vih->googlevisits);
823 	ht_destroy(&vih->trails);
824 	ht_destroy(&vih->tld);
825 	ht_destroy(&vih->os);
826 	ht_destroy(&vih->browsers);
827 	ht_destroy(&vih->date);
828 	ht_destroy(&vih->googledate);
829 	ht_destroy(&vih->month);
830 	ht_destroy(&vih->googlemonth);
831 	ht_destroy(&vih->robots);
832 	ht_destroy(&vih->googlehumanlanguage);
833 	ht_destroy(&vih->screenres);
834 	ht_destroy(&vih->screendepth);
835 }
836 
837 /* Reset handler informations to support --reset option in
838  * stream mode. */
vi_reset(struct vih * vih)839 void vi_reset(struct vih *vih)
840 {
841 	vi_reset_combined_maps(vih);
842 	vi_reset_hashtables(vih);
843 }
844 
845 /* Return a new visitors handle.
846  * On out of memory NULL is returned.
847  * The handle obtained with this call must be released with vi_free()
848  * when no longer useful. */
vi_new(void)849 struct vih *vi_new(void)
850 {
851 	struct vih *vih;
852 
853 	if ((vih = malloc(sizeof(*vih))) == NULL)
854 		return NULL;
855 	/* Initialization */
856 	vih->startt = vih->endt = time(NULL);
857 	vih->processed = 0;
858 	vih->invalid = 0;
859         vih->blacklisted = 0;
860 	vi_reset_combined_maps(vih);
861 	vih->error = NULL;
862 	vi_ht_init(&vih->visitors);
863 	vi_ht_init(&vih->googlevisitors);
864 	vi_ht_init(&vih->pages);
865 	vi_ht_init(&vih->images);
866 	vi_ht_init(&vih->error404);
867 	vi_ht_init(&vih->pageviews);
868 	vi_ht_init(&vih->pageviews_grouped);
869 	vi_ht_init(&vih->referers);
870 	vi_ht_init(&vih->referersage);
871 	vi_ht_init(&vih->agents);
872 	vi_ht_init(&vih->googled);
873 	vi_ht_init(&vih->adsensed);
874 	vi_ht_init(&vih->googlevisits);
875 	vi_ht_init(&vih->googlekeyphrases);
876 	vi_ht_init(&vih->googlekeyphrasesage);
877 	vi_ht_init(&vih->trails);
878 	vi_ht_init(&vih->tld);
879 	vi_ht_init(&vih->os);
880 	vi_ht_init(&vih->browsers);
881 	vi_ht_init(&vih->date);
882 	vi_ht_init(&vih->month);
883 	vi_ht_init(&vih->googledate);
884 	vi_ht_init(&vih->googlemonth);
885 	vi_ht_init(&vih->robots);
886 	vi_ht_init(&vih->googlehumanlanguage);
887 	vi_ht_init(&vih->screenres);
888 	vi_ht_init(&vih->screendepth);
889 	return vih;
890 }
891 
892 /* Free an handle created with vi_new(). */
vi_free(struct vih * vih)893 void vi_free(struct vih *vih)
894 {
895 	if (!vih) return;
896 	vi_reset_hashtables(vih);
897 	vi_clear_error(vih);
898 	free(vih);
899 }
900 
901 /* Add a new entry in the counter hashtable. If the key does not
902  * exists creates a new entry with "1" as number of hits, otherwise
903  * increment the old value.
904  *
905  * Return the value of hits after the increment or creation. If the
906  * returned value is greater than one, the key was already seen.
907  *
908  * Return 0 on out of memory.
909  *
910  * NOTE: the pointer of the "value" part of the hashtable entry is
911  * used as a counter casting it to a "long" integer. */
vi_counter_incr(struct hashtable * ht,char * key)912 int vi_counter_incr(struct hashtable *ht, char *key)
913 {
914 	char *k;
915 	unsigned int idx;
916 	int r;
917 	long val;
918 
919 	r = ht_search(ht, key, &idx);
920 	if (r == HT_NOTFOUND) {
921 		k = strdup(key);
922 		if (k == NULL) return 0;
923 		if (ht_add(ht, k, (void*)1) != HT_OK) {
924 			free(k);
925 			return 0;
926 		}
927 		return 1;
928 	} else {
929 		val = (long) ht_value(ht, idx);
930 		val++;
931 		ht_value(ht, idx) = (void*) val;
932 		return val;
933 	}
934 }
935 
936 /* Similar to vi_counter_incr, but only read the old value of
937  * the counter without to alter it. If the specified key does not
938  * exists zero is returned. */
vi_counter_val(struct hashtable * ht,char * key)939 int vi_counter_val(struct hashtable *ht, char *key)
940 {
941 	unsigned int idx;
942 	int r;
943 	long val;
944 
945 	r = ht_search(ht, key, &idx);
946 	if (r == HT_NOTFOUND) {
947 		return 0;
948 	} else {
949 		val = (long) ht_value(ht, idx);
950 		return val;
951 	}
952 }
953 
954 /* Set a key/value pair inside the hash table with
955  * a create-else-replace semantic.
956  *
957  * Return non-zero on out of memory. */
vi_replace(struct hashtable * ht,char * key,char * value)958 int vi_replace(struct hashtable *ht, char *key, char *value)
959 {
960 	char *k, *v;
961 
962 	k = strdup(key);
963 	v = strdup(value);
964 	if (!k || !v) goto err;
965 	if (ht_replace(ht, k, v) != HT_OK)
966 		goto err;
967 	return 0;
968 err:
969 	if (k) free(k);
970 	if (v) free(v);
971 	return 1;
972 }
973 
974 /* Replace the time value of the given key with the new one if this
975  * is newer/older of the old one. If the key is new, it's just added
976  * to the hash table with the specified time as value.
977  *
978  * If the 'ifolder' flag is set, values are replaced with older one,
979  * otherwise with newer.
980  * This function is only used by wrappers replace_if_older() and
981  * replace_if_newer().
982  *
983  * Return 0 on success, non-zero on out of memory. */
vi_replace_time(struct hashtable * ht,char * key,time_t time,int ifolder)984 int vi_replace_time(struct hashtable *ht, char *key, time_t time, int ifolder)
985 {
986 	char *k = NULL;
987 	unsigned int idx;
988 	int r;
989 
990 	r = ht_search(ht, key, &idx);
991 	if (r == HT_NOTFOUND) {
992 		k = strdup(key);
993 		if (!k) goto err;
994 		if (ht_add(ht, k, (void*)time) != HT_OK) goto err;
995 	} else {
996 		time_t oldt = (time_t) ht_value(ht, idx);
997 		/* Update the date if this one is older/nwer. */
998 		if (ifolder) {
999 			if (time < oldt)
1000 				ht_value(ht, idx) = (void*) time;
1001 		} else {
1002 			if (time > oldt)
1003 				ht_value(ht, idx) = (void*) time;
1004 		}
1005 	}
1006 	return 0;
1007 err:
1008 	if (k) free(k);
1009 	return 1;
1010 }
1011 
1012 /* see vi_replace_time */
vi_replace_if_older(struct hashtable * ht,char * key,time_t time)1013 int vi_replace_if_older(struct hashtable *ht, char *key, time_t time)
1014 {
1015 	return vi_replace_time(ht, key, time, 1);
1016 }
1017 
1018 /* see vi_replace_time */
vi_replace_if_newer(struct hashtable * ht,char * key,time_t time)1019 int vi_replace_if_newer(struct hashtable *ht, char *key, time_t time)
1020 {
1021 	return vi_replace_time(ht, key, time, 0);
1022 }
1023 
1024 /* Set an error in the visitors handle */
vi_set_error(struct vih * vih,char * fmt,...)1025 void vi_set_error(struct vih *vih, char *fmt, ...)
1026 {
1027 	va_list ap;
1028 	char buf[VI_ERROR_MAX];
1029 
1030 	va_start(ap, fmt);
1031 	vsnprintf(buf, VI_ERROR_MAX, fmt, ap);
1032 	buf[VI_ERROR_MAX-1] = '\0';
1033 	free(vih->error);
1034 	vih->error = strdup(buf);
1035 	va_end(ap);
1036 }
1037 
1038 /* Get the error */
vi_get_error(struct vih * vih)1039 char *vi_get_error(struct vih *vih)
1040 {
1041 	if (!vih->error) {
1042 		return "No error";
1043 	}
1044 	return vih->error;
1045 }
1046 
1047 /* Clear the error */
vi_clear_error(struct vih * vih)1048 void vi_clear_error(struct vih *vih)
1049 {
1050 	free(vih->error);
1051 	vih->error = NULL;
1052 }
1053 
1054 /*----------------------------------- parsing   ----------------------------- */
1055 /* Parse a line of log, and fill the logline structure with
1056  * appropriate values. On error (bad line format) non-zero is returned. */
vi_parse_line(struct logline * ll,char * l)1057 int vi_parse_line(struct logline *ll, char *l)
1058 {
1059 	char *date, *hour, *timezone, *host, *agent, *req, *ref, *p;
1060 	char *agent_start = NULL, *req_end = NULL, *ref_end = NULL;
1061         int agent_without_parens = 0;
1062 
1063 	/* Seek the start of the different components */
1064 
1065 	/* host */
1066 	host = l;
1067 	/* date */
1068 	if ((date = strchr(l, '[')) == NULL) return 1;
1069 	date++;
1070 	/* Identify user-agent start char. */
1071 	if ((agent = strchr(l, '(')) == NULL) {
1072                 /* Bad... user agent without (...) string, makes
1073                  * the detection a bit slower and guessworkish. */
1074 
1075                 /* Check if the count of '"' chars in the string
1076                  * is equal to six. If so, it's very likely that the
1077                  * last field inside "" is the User Agent string, so
1078                  * we get it. */
1079                 char *aux = l, *last = NULL;
1080                 int count = 0;
1081 
1082                 /* Count '"' chars, save the last occurence found. */
1083                 while (*aux) {
1084                     if (*aux == '"') {
1085                         count++;
1086                         last = aux;
1087                     }
1088                     aux++;
1089                 }
1090 
1091                 if (count == 6) {
1092                     /* Ok! it seems like Combined log format.
1093                      * Set a flag and get it later when the
1094                      * rest of the log file is splitted. Now it's
1095                      * too early to add \0 chars inside the line. */
1096                     agent_without_parens = 1;
1097                     agent_start = last-1;
1098                     while(*agent_start != '"')
1099                         agent_start--;
1100                 } else {
1101                     /* No way... no user agent detected in this line. */
1102 		    agent = "";
1103                 }
1104 	} else {
1105                 /* User agent with () inside. Simple to detect, just
1106                  * search the left and the right '"' chars enclosing
1107                  * it. */
1108 		p = agent;
1109 		while (p >= l) {
1110 			if (*p == '"') {
1111 				agent_start = p;
1112 				break;
1113 			}
1114 			p--;
1115 		}
1116 	}
1117 	/* req */
1118 	if ((req = strstr(l, "\"GET")) != NULL ||
1119 	    (req = strstr(l, "\"POST")) != NULL ||
1120 	    (req = strstr(l, "\"HEAD")) != NULL ||
1121 	    (req = strstr(l, "\"get")) != NULL ||
1122 	    (req = strstr(l, "\"post")) != NULL ||
1123 	    (req = strstr(l, "\"head")) != NULL)
1124 	{
1125 		req++;
1126 	} else {
1127 		req = "";
1128 	}
1129 	/* ref */
1130 	if ((ref = strstr(l, "\"http")) != NULL ||
1131 	    (ref = strstr(l, "\"HTTP")) != NULL)
1132 	{
1133 		ref++;
1134 	} else {
1135 		ref = "";
1136 	}
1137 
1138 	/* Nul-term the components */
1139 
1140 	/* host */
1141 	if ((p = strchr(host, ' ')) == NULL) return 1;
1142 	*p = '\0';
1143 	/* date */
1144 	if ((p = strchr(date, ']')) == NULL) return 1;
1145 	*p = '\0';
1146 	ll->time = parse_date(date, &ll->tm);
1147 	if (ll->time == (time_t)-1) return 1;
1148 	/* hour */
1149 	if ((p = strchr(date, ':')) == NULL) return 1;
1150 	hour = p+1;
1151 	*p = '\0';
1152 	/* timezone */
1153 	if ((p = strchr(hour, ' ')) == NULL) return 1;
1154 	timezone = p+1;
1155 	*p = '\0';
1156 	/* req */
1157 	if ((p = strchr(req, '"')) == NULL) {
1158 		req = "";
1159 	} else {
1160 		req_end = p;
1161 		*p = '\0';
1162 		if ((p = strchr(req, ' ')) != NULL) {
1163 			req = p+1;
1164 			if ((p = strchr(req, ' ')) != NULL)
1165 				*p = '\0';
1166 		}
1167 	}
1168 	/* ref */
1169 	if ((p = strchr(ref, '"')) == NULL) {
1170 		ref = "";
1171 	} else {
1172 		ref_end = p;
1173 		*p = '\0';
1174 	}
1175 	/* agent */
1176         if (agent_without_parens) {
1177             /* User agent without (...) inside in a string with six '"' chars.
1178              * Just search for the end. */
1179             char *aux = strchr(agent_start+1, '"');
1180             if (!aux) {
1181                 /* No way! */
1182                 agent = "";
1183             } else {
1184                 *aux = '\0';
1185                 agent = agent_start+1;
1186             }
1187         } else if ((p = strchr(agent, ')')) == NULL) {
1188 		agent = "";
1189 	} else {
1190 		char *aux;
1191 
1192 		aux = strchr(p, '"');
1193 		if (aux)
1194 			*aux = '\0';
1195 		else
1196 			*(p+1) = '\0';
1197 		if (agent_start) {
1198 			if ((!req_end || (req_end != agent_start)) &&
1199 			    (!ref_end || (ref_end != agent_start))) {
1200 				agent = agent_start+1;
1201 			}
1202 		}
1203 	}
1204 
1205 	/* Fill the struture */
1206 	ll->host = host;
1207 	ll->date = date;
1208 	ll->hour = hour;
1209 	ll->timezone = timezone;
1210 	ll->agent = agent;
1211 	ll->req = req;
1212 	ll->ref = ref;
1213 	return 0;
1214 }
1215 
1216 /* process the weekday and hour information */
vi_process_date_and_hour(struct vih * vih,int weekday,int hour)1217 void vi_process_date_and_hour(struct vih *vih, int weekday, int hour)
1218 {
1219 	/* Note, the following sanity check is useless in theory. */
1220 	if (weekday < 0 || weekday > 6 || hour < 0 || hour > 23) return;
1221 	vih->weekday[weekday]++;
1222 	vih->hour[hour]++;
1223 	/* store the combined info. We always compute this information
1224 	 * even if the report is disabled because it's cheap. */
1225 	vih->weekdayhour[weekday][hour]++;
1226 }
1227 
1228 /* process the month and day information */
vi_process_month_and_day(struct vih * vih,int month,int day)1229 void vi_process_month_and_day(struct vih *vih, int month, int day)
1230 {
1231 	if (month < 0 || month > 11 || day < 0 || day > 30) return;
1232 	vih->monthday[month][day]++;
1233 }
1234 
1235 /* Process unique visitors populating the relative hash table.
1236  * Return non-zero on out of memory. This is also used to populate
1237  * the hashtable used for the "pageviews per user" statistics.
1238  *
1239  * Note that the last argument 'seen', is an integer passed by reference
1240  * that is set to '1' if this is not a new visit (otherwise it's set to zero) */
vi_process_visitors_per_day(struct vih * vih,char * host,char * agent,char * date,char * ref,char * req,int * seen)1241 int vi_process_visitors_per_day(struct vih *vih, char *host, char *agent, char *date, char *ref, char *req, int *seen)
1242 {
1243 	char visday[VI_LINE_MAX], *p, *month = "fixme if I'm here!";
1244         char buf[64];
1245 	int res, host_len, agent_len, date_len, hash_len;
1246         unsigned long h;
1247 
1248         /* Ignore visits from Bots */
1249         if (vi_is_bot_agent(agent)) {
1250             if (seen != NULL) seen = 0;
1251             return 0;
1252         }
1253 
1254         /* Build an unique identifier for this visit
1255          * adding together host, date and hash(user agent) */
1256 	host_len = strlen(host);
1257 	agent_len = strlen(agent);
1258 	date_len = strlen(date);
1259         h = djb_hash((unsigned char*) agent, agent_len);
1260         sprintf(buf, "%lu", h);
1261         hash_len = strlen(buf);
1262 	if (host_len+agent_len+date_len+4 > VI_LINE_MAX)
1263 		return 0;
1264 	p = visday;
1265 	memcpy(p, host, host_len); p += host_len;
1266 	*p++ = '|';
1267 	memcpy(p, date, date_len); p += date_len;
1268 	*p++ = '|';
1269 	memcpy(p, buf, hash_len); p += hash_len;
1270 	*p = '\0';
1271         /* fprintf(stderr, "%s\n", visday); */
1272 
1273 	if (Config_process_monthly_visitors) {
1274 		/* Skip the day number. */
1275 		month = strchr(date, '/');
1276 		if (!month) return 0; /* should never happen */
1277 		month++;
1278 	}
1279 
1280 	/* Visits with Google as referer are also stored in another hash
1281 	 * table. */
1282 	if (vi_is_google_link(ref)) {
1283 		res = vi_counter_incr(&vih->googlevisitors, visday);
1284 		if (res == 0) return 1; /* out of memory */
1285 		if (res == 1) { /* new visit! */
1286 			res = vi_counter_incr(&vih->googledate, date);
1287 			if (res == 0) return 1; /* out of memory */
1288 			if (Config_process_monthly_visitors) {
1289 				res = vi_counter_incr(&vih->googlemonth, month);
1290 				if (res == 0) return 1; /* out of memory */
1291 			}
1292 		}
1293 	}
1294 	/* Populate the 'pageviews per visitor' hash table */
1295 	if (Config_process_pageviews && vi_is_pageview(req)) {
1296 		res = vi_counter_incr(&vih->pageviews, visday);
1297 		if (res == 0) return 1; /* out of memory */
1298 	}
1299 	/* Mark the visit in the non-google-specific hashtable */
1300 	res = vi_counter_incr(&vih->visitors, visday);
1301 	if (res == 0) return 1; /* out of memory */
1302 	if (res > 1) {
1303 		if (seen) *seen = 1;
1304 		return 0; /* visit alredy seen. */
1305 	}
1306 	if (seen) *seen = 0; /* new visitor */
1307 	res = vi_counter_incr(&vih->date, date);
1308 	if (res == 0) return 1;
1309 	if (Config_process_monthly_visitors) {
1310 		res = vi_counter_incr(&vih->month, month);
1311 		if (res == 0) return 1;
1312 	}
1313 	return 0;
1314 }
1315 
1316 /* Process referers populating the relative hash tables.
1317  * Return non-zero on out of memory. */
vi_process_referer(struct vih * vih,char * ref,time_t age)1318 int vi_process_referer(struct vih *vih, char *ref, time_t age)
1319 {
1320 	int res;
1321 
1322         /* Check the url against the blacklist if needed
1323          * this can be very slow... */
1324         if (Config_filter_spam && vi_is_blacklisted_url(vih, ref))
1325             return 0;
1326 	/* Don't count internal referer (specified by the user
1327 	 * using --prefix options), nor google referers. */
1328 	if (vi_is_internal_link(ref))
1329 		return !vi_counter_incr(&vih->referers, "Internal Link");
1330 	if (vi_is_google_link(ref))
1331 		return !vi_counter_incr(&vih->referers, "Google Search Engine");
1332 	res = vi_counter_incr(&vih->referers, ref);
1333 	if (res == 0) return 1;
1334 	/* Process the referers age if enabled */
1335 	if (Config_process_referers_age) {
1336 		if (vi_replace_if_older(&vih->referersage, ref, age)) return 1;
1337 	}
1338 	return 0;
1339 }
1340 
1341 /* Process requested URLs. Split the entries in two hash tables,
1342  * one for pages and one for images.
1343  * Return non-zero on out of memory. */
vi_process_page_request(struct vih * vih,char * url)1344 int vi_process_page_request(struct vih *vih, char *url)
1345 {
1346 	int res;
1347 	char urldecoded[VI_LINE_MAX];
1348 
1349 	vi_urldecode(urldecoded, url, VI_LINE_MAX);
1350 	if (vi_is_image(url))
1351 		res = vi_counter_incr(&vih->images, urldecoded);
1352 	else
1353 		res = vi_counter_incr(&vih->pages, urldecoded);
1354 	if (res == 0) return 1;
1355 	return 0;
1356 }
1357 
1358 /* Process log lines for 404 errors report. */
vi_process_error404(struct vih * vih,char * l,char * url,int * is404)1359 int vi_process_error404(struct vih *vih, char *l, char *url, int *is404)
1360 {
1361 	char urldecoded[VI_LINE_MAX];
1362 
1363         if (is404) *is404 = 0;
1364 	vi_urldecode(urldecoded, url, VI_LINE_MAX);
1365 	if (strstr(l, " 404 ") && !strstr(l, " 200 ")) {
1366                 if (is404) *is404 = 1;
1367 		return !vi_counter_incr(&vih->error404, urldecoded);
1368         }
1369 	return 0;
1370 }
1371 
1372 /* Process agents populating the relative hash table.
1373  * Return non-zero on out of memory. */
vi_process_agents(struct vih * vih,char * agent)1374 int vi_process_agents(struct vih *vih, char *agent)
1375 {
1376 	int res;
1377 
1378 	res = vi_counter_incr(&vih->agents, agent);
1379 	if (res == 0) return 1;
1380 	return 0;
1381 }
1382 
1383 /* Match the list of keywords 't' against the string 's', and if
1384  * a match is found increment the matching keyword in the hashtable.
1385  * Return zero on success, non-zero on out of memory . */
vi_counter_incr_matchtable(struct hashtable * ht,char * s,char ** t)1386 int vi_counter_incr_matchtable(struct hashtable *ht, char *s, char **t)
1387 {
1388 	while(*t) {
1389 		int res;
1390 		if ((*t)[0] == '\0' || strstr(s, *t) != NULL) {
1391 			char *key = *(t+1) ? *(t+1) : *t;
1392 			res = vi_counter_incr(ht, key);
1393 			if (res == 0) return 1;
1394 			return 0;
1395 		}
1396 		t += 2;
1397 	}
1398 	return 0;
1399 }
1400 
1401 /* Process Operating Systems populating the relative hash table.
1402  * Return non-zero on out of memory. */
vi_process_os(struct vih * vih,char * agent)1403 int vi_process_os(struct vih *vih, char *agent)
1404 {
1405 	/* Order may matter. */
1406 	char *oslist[] = {
1407 		"Windows", NULL,
1408 		"Win98", "Windows",
1409 		"Win95", "Windows",
1410 		"WinNT", "Windows",
1411 		"Win32", "Windows",
1412 		"Linux", NULL,
1413 		"-linux-", "Linux",
1414 		"Macintosh", NULL,
1415 		"Mac_PowerPC", "Macintosh",
1416 		"SunOS", NULL,
1417 		"FreeBSD", NULL,
1418 		"OpenBSD", NULL,
1419 		"NetBSD", NULL,
1420 		"BEOS", NULL,
1421 		"", "Unknown",
1422 		NULL, NULL,
1423 	};
1424 	return vi_counter_incr_matchtable(&vih->os, agent, oslist);
1425 }
1426 
1427 /* Process browsers information. */
vi_process_browsers(struct vih * vih,char * agent)1428 int vi_process_browsers(struct vih *vih, char *agent)
1429 {
1430 	/* Note that the order matters. For example Safari
1431 	 * send an user agent where there is the string "Gecko"
1432 	 * so it must be before Gecko. */
1433 	char *browserslist[] = {
1434 		"Opera", NULL,
1435 		"MSIE 4", "Explorer 4.x",
1436 		"MSIE 5", "Explorer 5.x",
1437 		"MSIE 6", "Explorer 6.x",
1438 		"MSIE", "Explorer unknown version",
1439 		"Safari", NULL,
1440 		"Konqueror", NULL,
1441 		"Galeon", NULL,
1442 		"Firefox", NULL,
1443 		"MultiZilla", NULL,
1444 		"Gecko", "Other Mozilla based",
1445 		"Wget", NULL,
1446 		"Lynx", NULL,
1447 		"Links ", "Links",
1448 		"ELinks ", "Links",
1449 		"Elinks ", "Links",
1450 		"w3m", "W3M",
1451 		"NATSU-MICAN", NULL,
1452 		"msnbot", "MSNbot",
1453 		"Slurp", "Yahoo Slurp",
1454 		"Jeeves", "Ask Jeeves",
1455 		"ZyBorg", NULL,
1456 		"asteria", NULL,
1457 		"contype", "Explorer",
1458 		"Gigabot", NULL,
1459 		"Windows-Media-Player", "Windows-MP",
1460 		"NSPlayer", NULL,
1461 		"Googlebot", "GoogleBot",
1462 		"googlebot", "GoogleBot",
1463 		"", "Unknown",
1464 		NULL, NULL,
1465 	};
1466 	return vi_counter_incr_matchtable(&vih->browsers, agent, browserslist);
1467 }
1468 
1469 /* Process req/agents to get information about pages retrivied by Google.
1470  * Return non-zero on out of memory. */
vi_process_googled(struct vih * vih,char * req,char * agent,time_t age)1471 int vi_process_googled(struct vih *vih, char *req, char *agent, time_t age)
1472 {
1473         if (vi_is_googlebot_agent(agent)) {
1474 	    return vi_replace_if_newer(&vih->googled, req, age);
1475         } else if (vi_is_adsensebot_agent(agent)) {
1476 	    return vi_replace_if_newer(&vih->adsensed, req, age);
1477         }
1478         return 0;
1479 }
1480 
1481 /* Process screen resolution and color depth info, if the javascript
1482  * code needed was inserted in the pages (see the README file). */
vi_process_screen_info(struct vih * vih,char * req)1483 int vi_process_screen_info(struct vih *vih, char *req) {
1484     char *p;
1485 
1486     if ((p = strstr(req, "visitors-screen-res-check.jpg?"))) {
1487         char buf[64];
1488 
1489         p += 30;
1490         if (p[0] == '\0' || strstr(p, "undefined")) goto parseerror;
1491         vi_strlcpy(buf, p, 64);
1492         /* The string is somethink like: 1024x768x32, so we
1493          * search for the second 'x' char. */
1494         p = strchr(buf,'x'); if (!p) goto parseerror;
1495         p = strchr(p+1,'x'); if (!p) goto parseerror;
1496         *p = '\0'; p++;
1497         /* Populate the screen resolution hash table */
1498         if (vi_counter_incr(&vih->screenres, buf) == 0)
1499             return 1;
1500         /* ... and the screen color depth one. */
1501         if (vi_counter_incr(&vih->screendepth, p) == 0)
1502             return 1;
1503     }
1504 parseerror:
1505     return 0;
1506 }
1507 
1508 /* Process accesses with the referer from google.
1509  * This is used to populate the keyphrases hashtable.
1510  * TODO: url decoding */
vi_process_google_keyphrases(struct vih * vih,char * ref,time_t age)1511 int vi_process_google_keyphrases(struct vih *vih, char *ref, time_t age)
1512 {
1513 	char *s, *p, *e;
1514 	int res, page;
1515 	char urldecoded[VI_LINE_MAX];
1516 	char buf[64];
1517 
1518 	if (!vi_is_google_link(ref)) return 0;
1519         /* Try to process gogoe human language info first. */
1520         if (Config_process_google_human_language) {
1521             s = strstr(ref+18, "&hl=");
1522             if (s == NULL) s = strstr(ref+18, "?hl=");
1523             if (s && s[4] && s[5]) {
1524                 buf[0] = s[4];
1525                 buf[1] = s[5];
1526                 buf[2] = '\0';
1527 	        if (vi_counter_incr(&vih->googlehumanlanguage, buf) == 0)
1528                     return 1;
1529             }
1530         }
1531 
1532 	/* It's possible to start the search for the query 18 chars
1533 	 * after the start of the referer because all the
1534 	 * google links will start with "http://www.google.". */
1535 	if ((s = strstr(ref+18, "?q=")) == NULL &&
1536 	    (s = strstr(ref+18, "&q=")) == NULL) return 0;
1537 	if ((p = strstr(ref+18, "&start=")) == NULL)
1538 		p = strstr(ref+18, "?start=");
1539 	if ((e = strchr(s+3, '&')) != NULL)
1540 		*e = '\0';
1541 	if (p && (e = strchr(p+7, '&')) != NULL)
1542 		*e = '\0';
1543 	if (!strncmp(s+3, "cache:", 6))
1544 		return !vi_counter_incr(&vih->googlekeyphrases, "Google Cache Access");
1545 	vi_urldecode(urldecoded, s+3, VI_LINE_MAX);
1546 	vi_strtolower(urldecoded);
1547 	page = p ? (1+(atoi(p+7)/10)) : 1;
1548 	snprintf(buf, 64, " (page %d)", page);
1549 	buf[63] = '\0';
1550 	vi_strlcat(urldecoded, buf, VI_LINE_MAX);
1551 	res = vi_counter_incr(&vih->googlekeyphrases, urldecoded);
1552 	if (e) *e = '&';
1553 	if (res == 0) return 1;
1554 	/* Process keyphrases by first time */
1555 	if (Config_process_google_keyphrases_age) {
1556 		if (vi_replace_if_older(&vih->googlekeyphrasesage,
1557 					urldecoded, age)) return 1;
1558 	}
1559 	return 0;
1560 }
1561 
1562 /* Process robots information. For visitors every client accessing
1563  * to robots.txt is considered a robot.
1564  * Returns 1 on out of memory, otherwise zero is returned. */
vi_process_robots(struct vih * vih,char * req,char * agent)1565 int vi_process_robots(struct vih *vih, char *req, char *agent)
1566 {
1567 	if (strncmp(req, "/robots.txt", 11) != 0) return 0;
1568 	if (strstr(agent, "MSIECrawler")) return 0;
1569 	return !vi_counter_incr(&vih->robots, agent);
1570 }
1571 
1572 /* Process referer -> request pairs for web trails */
vi_process_web_trails(struct vih * vih,char * ref,char * req)1573 int vi_process_web_trails(struct vih *vih, char *ref, char *req)
1574 {
1575 	int res, plen, google;
1576 	char buf[VI_LINE_MAX];
1577 	char *src;
1578 
1579 	if (vi_is_image(req)) return 0;
1580 	plen = vi_is_internal_link(ref);
1581 	google = vi_is_google_link(ref);
1582 	if (plen) {
1583 		src = (ref[plen] == '\0') ? "/" : ref+plen;
1584 	} else if (google) {
1585 		if (Config_graphviz_ignorenode_google) return 0;
1586 		src = "Google";
1587 	} else if (ref[0] != '\0') {
1588 		if (Config_graphviz_ignorenode_external) return 0;
1589 		src = "External Link";
1590 	} else {
1591 		if (Config_graphviz_ignorenode_noreferer) return 0;
1592 		src = "No Referer";
1593 	}
1594 	if (!strcmp(src, req)) return 0; /* avoid self references */
1595 
1596 	snprintf(buf, VI_LINE_MAX, "%s -> %s", src, req);
1597 	buf[VI_LINE_MAX-1] = '\0';
1598 	res = vi_counter_incr(&vih->trails, buf);
1599 	if (res == 0) return 1;
1600 	return 0;
1601 }
1602 
1603 /* Process Top Level Domains.
1604  * Returns zero on success. Non zero is returned on out of memory. */
vi_process_tld(struct vih * vih,char * hostname)1605 int vi_process_tld(struct vih *vih, char *hostname)
1606 {
1607 	char *tld;
1608 	int res;
1609 
1610 	if (vi_is_numeric_address(hostname)) {
1611 		tld = "numeric IP";
1612 	} else {
1613 		tld = strrchr(hostname, '.');
1614 		if (!tld) return 0;
1615 		tld++;
1616 	}
1617 	res = vi_counter_incr(&vih->tld, tld);
1618 	if (res == 0) return 1;
1619 	return 0;
1620 }
1621 
1622 /* Match a log line against --grep and --exclude patters to check
1623  * if the line must be processed or not. */
vi_match_line(char * line)1624 int vi_match_line(char *line)
1625 {
1626     int i;
1627 
1628     for (i = 0; i < Config_grep_pattern_num; i++) {
1629         char *pattern = Config_grep_pattern[i].pattern;
1630         int nocase = 1;
1631 
1632         /* Patterns starting with 'cs:' are matched in a case-sensitive
1633          * way after the 'cs:' prefix is discarded. */
1634         if (pattern[0] == 'c' && pattern[1] == 's' && pattern[2] == ':') {
1635             nocase = 0;
1636             pattern += 3;
1637         }
1638         if (vi_match(Config_grep_pattern[i].pattern, line, nocase)) {
1639             if (Config_grep_pattern[i].type == VI_PATTERNTYPE_EXCLUDE)
1640                 return 0;
1641         } else {
1642             if (Config_grep_pattern[i].type == VI_PATTERNTYPE_GREP)
1643                 return 0;
1644         }
1645     }
1646     return 1;
1647 }
1648 
1649 /* Process a line of log. Returns non-zero on error. */
vi_process_line(struct vih * vih,char * l)1650 int vi_process_line(struct vih *vih, char *l)
1651 {
1652 	struct logline ll;
1653 	char origline[VI_LINE_MAX];
1654 
1655         /* Test the line against --grep --exclude patterns before
1656          * to process it. */
1657         if (Config_grep_pattern_num) {
1658             if (vi_match_line(l) == 0)
1659                 return 0; /* No match? skip. */
1660         }
1661 
1662 	vih->processed++;
1663 	/* Take a copy of the original log line before to
1664 	 * copy it. Will be useful for some processing.
1665 	 * Do it only if required in order to speedup. */
1666 	if (Config_process_error404 || Config_debug)
1667 		vi_strlcpy(origline, l, VI_LINE_MAX);
1668 	/* Split the line and run all the selected processing. */
1669 	if (vi_parse_line(&ll, l) == 0) {
1670 		int seen, is404;
1671 
1672                 /* We process 404 errors first, in order to skip
1673                  * all the other reports if --ignore-404 option is active. */
1674 		if (Config_process_error404 &&
1675 		    vi_process_error404(vih, origline, ll.req, &is404))
1676                         goto oom;
1677                 /* Process screen info if needed. */
1678                 if (Config_process_screen_info && is404)
1679                     if (vi_process_screen_info(vih, ll.req)) goto oom;
1680                 /* 404 error AND --ignore-404? Stop processing of this line. */
1681                 if (Config_ignore_404 && is404)
1682                     return 0;
1683 
1684                 /* Now it's time to process unique visitors. The 'save'
1685                  * local var saves if this log line is about a new visit
1686                  * or not. Some report is generated only against the first
1687                  * line of every visitor, other reports are generated
1688                  * for every single log line. */
1689 		if (vi_process_visitors_per_day(vih, ll.host, ll.agent,
1690 					ll.date, ll.ref, ll.req, &seen))
1691 			goto oom;
1692 
1693 		/* The following are processed for every log line */
1694 		if (vi_process_page_request(vih, ll.req)) goto oom;
1695 		if (Config_process_google &&
1696 		    vi_process_googled(vih, ll.req, ll.agent, ll.time))
1697 			goto oom;
1698 		if (Config_process_web_trails &&
1699 		    vi_process_web_trails(vih, ll.ref, ll.req)) goto oom;
1700 		if (Config_process_google_keyphrases &&
1701 		    vi_process_google_keyphrases(vih, ll.ref, ll.time))
1702 			goto oom;
1703 
1704 		/* The following are processed only for new visits */
1705 		if (seen) return 0;
1706 		vi_process_date_and_hour(vih, (ll.tm.tm_wday+6)%7,
1707 				ll.tm.tm_hour);
1708 		vi_process_month_and_day(vih, ll.tm.tm_mon, ll.tm.tm_mday-1);
1709 		if (vi_process_referer(vih, ll.ref, ll.time)) goto oom;
1710 		if (Config_process_agents &&
1711 		    vi_process_agents(vih, ll.agent)) goto oom;
1712 		if (Config_process_os &&
1713 		    vi_process_os(vih, ll.agent)) goto oom;
1714 		if (Config_process_browsers &&
1715 		    vi_process_browsers(vih, ll.agent)) goto oom;
1716 		if (Config_process_tld &&
1717 		    vi_process_tld(vih, ll.host)) goto oom;
1718 		if (Config_process_robots &&
1719 		    vi_process_robots(vih, ll.req, ll.agent)) goto oom;
1720 		return 0;
1721 	} else {
1722 		vih->invalid++;
1723                 if (Config_debug)
1724                     fprintf(stderr, "Invalid line: %s\n", origline);
1725 		return 0;
1726 	}
1727 oom:
1728 	vi_set_error(vih, "Out of memory processing data");
1729 	return 1;
1730 }
1731 
1732 /* Process the specified log file. Returns zero on success.
1733  * On error non zero is returned and an error is set in the handle. */
vi_scan(struct vih * vih,char * filename)1734 int vi_scan(struct vih *vih, char *filename)
1735 {
1736 	FILE *fp;
1737 	char buf[VI_LINE_MAX];
1738 	int use_stdin = 0;
1739 
1740 	if (filename[0] == '-' && filename[1] == '\0') {
1741 		/* If we are in stream mode, just return. Stdin
1742 		 * is implicit in this mode and will be read
1743 		 * after all the other files are processed. */
1744 		if (Config_stream_mode) return 0;
1745 		fp = stdin;
1746 		use_stdin = 1;
1747 	} else {
1748 		if ((fp = fopen(filename, "r")) == NULL) {
1749 			vi_set_error(vih, "Unable to open '%s': '%s'", filename, strerror(errno));
1750 			return 1;
1751 		}
1752 	}
1753 	while (fgets(buf, VI_LINE_MAX, fp) != NULL) {
1754 		if (vi_process_line(vih, buf)) {
1755 			fclose(fp);
1756 			fprintf(stderr, "%s: %s\n", filename, vi_get_error(vih));
1757 			return 1;
1758 		}
1759 	}
1760 	if (!use_stdin)
1761 		fclose(fp);
1762 	vih->endt = time(NULL);
1763 	return 0;
1764 }
1765 
1766 /* Postprocessing of pageviews per visit data.
1767  * The source hashtable entries are in the form: uniqe-visitor -> pageviews.
1768  * After the postprocessing we obtain another hashtable in the form:
1769  * pageviews-range -> quantity. This hashtable can be used directly
1770  * with generic output functions to generate the output. */
vi_postprocess_pageviews(struct vih * vih)1771 int vi_postprocess_pageviews(struct vih *vih)
1772 {
1773 	void **table;
1774 	int len = ht_used(&vih->pageviews), i;
1775 
1776 	if ((table = ht_get_array(&vih->pageviews)) == NULL) {
1777 		fprintf(stderr, "Out of memory in vi_postprocess_pageviews()\n");
1778 		return 1;
1779 	}
1780 	/* Run the hashtable in order to populate 'pageviews_grouped' */
1781 	for (i = 0; i < len; i++) {
1782 		int pv = (long) table[(i*2)+1]; /* pageviews of visit */
1783 		int res;
1784 		char *key;
1785 
1786 		if (pv == 1) key = "1";
1787 		else if (pv == 2) key = "2";
1788 		else if (pv == 3) key = "3";
1789 		else if (pv == 4) key = "4";
1790 		else if (pv == 5) key = "5";
1791 		else if (pv == 6) key = "6";
1792 		else if (pv == 7) key = "7";
1793 		else if (pv == 8) key = "8";
1794 		else if (pv == 9) key = "9";
1795 		else if (pv == 10) key = "10";
1796 		else if (pv >= 11 && pv <= 20) key = "11-20";
1797 		else if (pv >= 21 && pv <= 30) key = "21-30";
1798 		else key = "> 30";
1799 
1800 		res = vi_counter_incr(&vih->pageviews_grouped, key);
1801 		if (res == 0) {
1802 			free(table);
1803 			return 1; /* out of memory */
1804 		}
1805 	}
1806 	free(table);
1807 	return 0;
1808 }
1809 
1810 /* This function is called from vi_print_report() in order to
1811  * run some postprocessing to raw data collected needed to generate reports. */
vi_postprocess(struct vih * vih)1812 int vi_postprocess(struct vih *vih)
1813 {
1814 	if (vi_postprocess_pageviews(vih)) goto oom;
1815 	return 0;
1816 oom:
1817 	vi_set_error(vih, "Out of memory");
1818 	return 1;
1819 }
1820 
1821 /* ---------------------------- text output module -------------------------- */
om_text_print_header(FILE * fp)1822 void om_text_print_header(FILE *fp)
1823 {
1824 	fp = fp;
1825 	return;
1826 }
1827 
om_text_print_footer(FILE * fp)1828 void om_text_print_footer(FILE *fp)
1829 {
1830 	fp = fp;
1831 	return;
1832 }
1833 
om_text_print_title(FILE * fp,char * title)1834 void om_text_print_title(FILE *fp, char *title)
1835 {
1836 	fprintf(fp, "=== %s ===\n", title);
1837 }
1838 
om_text_print_subtitle(FILE * fp,char * subtitle)1839 void om_text_print_subtitle(FILE *fp, char *subtitle)
1840 {
1841 	fprintf(fp, "--- %s\n", subtitle);
1842 }
1843 
om_text_print_numkey_info(FILE * fp,char * key,int val)1844 void om_text_print_numkey_info(FILE *fp, char *key, int val)
1845 {
1846 	fprintf(fp, "* %s: %d\n", key, val);
1847 }
1848 
om_text_print_keykey_entry(FILE * fp,char * key1,char * key2,int num)1849 void om_text_print_keykey_entry(FILE *fp, char *key1, char *key2, int num)
1850 {
1851 	fprintf(fp, "%d)    %s: %s\n", num, key1, key2);
1852 }
1853 
om_text_print_numkey_entry(FILE * fp,char * key,int val,char * link,int num)1854 void om_text_print_numkey_entry(FILE *fp, char *key, int val, char *link,
1855 		int num)
1856 {
1857 	link = link; /* avoid warning. Text output don't use this argument. */
1858 	fprintf(fp, "%d)    %s: %d\n", num, key, val);
1859 }
1860 
1861 /* Print a bar, c1 and c2 are the colors of the left and right parts.
1862  * Max is the maximum value of the bar, the bar length is printed
1863  * to be porportional to max. tot is the "total" needed to compute
1864  * the precentage value. */
om_text_print_bar(FILE * fp,int max,int tot,int this,int cols,char c1,char c2)1865 void om_text_print_bar(FILE *fp, int max, int tot, int this, int cols,
1866 		char c1, char c2)
1867 {
1868 	int l;
1869 	float p;
1870 	char *bar;
1871 	if (tot == 0) tot++;
1872 	if (max == 0) max++;
1873 	l = ((float)(cols*this))/max;
1874 	p = ((float)(100*this))/tot;
1875 	bar = malloc(cols+1);
1876 	if (!bar) return;
1877 	memset(bar, c2, cols+1);
1878 	memset(bar, c1, l);
1879 	bar[cols] = '\0';
1880 	fprintf(fp, "%s %02.1f%%", bar, p);
1881 	free(bar);
1882 }
1883 
om_text_print_numkeybar_entry(FILE * fp,char * key,int max,int tot,int this)1884 void om_text_print_numkeybar_entry(FILE *fp, char *key, int max, int tot, int this)
1885 {
1886 	fprintf(fp, "   %-12s: %-9d |", key, this);
1887 	om_text_print_bar(fp, max, tot, this, 44, '#', ' ');
1888 	fprintf(fp, "\n");
1889 }
1890 
om_text_print_numkeycomparativebar_entry(FILE * fp,char * key,int tot,int this)1891 void om_text_print_numkeycomparativebar_entry(FILE *fp, char *key, int tot, int this)
1892 {
1893 	fprintf(fp, "   %s: %-10d |", key, this);
1894 	om_text_print_bar(fp, tot, tot, this, 44, '#', '.');
1895 	fprintf(fp, "\n");
1896 }
1897 
om_text_print_bidimentional_map(FILE * fp,int xlen,int ylen,char ** xlabel,char ** ylabel,int * value)1898 void om_text_print_bidimentional_map(FILE *fp, int xlen, int ylen,
1899 			char **xlabel, char **ylabel, int *value)
1900 {
1901 	char *asciipal = " .-+#";
1902 	int pallen = strlen(asciipal);
1903 	int x, y, l, max = 0;
1904 
1905 	/* Get the max value */
1906 	l = xlen*ylen;
1907 	for (x = 0; x < l; x++)
1908 		if (max < value[x])
1909 			max = value[x];
1910 	if (max == 0) max++; /* avoid division by zero */
1911 	/* print the map */
1912 	for (y = 0; y < ylen; y++) {
1913 		fprintf(fp, "%15s: ", ylabel[y]);
1914 		for (x = 0; x < xlen; x++) {
1915 			int coloridx;
1916 			int val = value[(y*xlen)+x];
1917 
1918 			coloridx = ((pallen-1)*val)/max;
1919 			fputc(asciipal[coloridx], fp);
1920 		}
1921 		fprintf(fp, "\n");
1922 	}
1923 	fprintf(fp, "\n");
1924 	/* print the x-labels in vertical */
1925 	{
1926 		char **p = malloc(sizeof(char*)*xlen);
1927 		/* The 'p' pointers array is initialized at the
1928 		 * start of all the x-labels. */
1929 		for (x = 0; x < xlen; x++)
1930 			p[x] = xlabel[x];
1931 		while(1) {
1932 			int sentinel = 0;
1933 			fprintf(fp, "%15s  ", "");
1934 			for (x = 0; x < xlen; x++) {
1935 				if (*(p[x]) != '\0') {
1936 					fputc(*(p[x]), fp);
1937 					p[x]++;
1938 					sentinel++;
1939 				} else {
1940 					fputc(' ', fp);
1941 				}
1942 			}
1943 			fputc('\n', fp);
1944 			if (sentinel == 0) break;
1945 		}
1946 		free(p);
1947 	}
1948 }
1949 
om_text_print_hline(FILE * fp)1950 void om_text_print_hline(FILE *fp)
1951 {
1952 	fprintf(fp, "\n");
1953 }
1954 
om_text_print_credits(FILE * fp)1955 void om_text_print_credits(FILE *fp)
1956 {
1957 	fprintf(fp, "Statistics generated with VISITORS version %s\n"
1958 	       "http://www.hping.org/visitors for more information\n",
1959 	       VI_VERSION_STR);
1960 }
1961 
om_text_print_report_link(FILE * fp,char * report)1962 void om_text_print_report_link(FILE *fp, char *report)
1963 {
1964 	fprintf(fp, "-> %s\n", report);
1965 	return;
1966 }
1967 
1968 struct outputmodule OutputModuleText = {
1969 	om_text_print_header,
1970 	om_text_print_footer,
1971 	om_text_print_title,
1972 	om_text_print_subtitle,
1973 	om_text_print_numkey_info,
1974 	om_text_print_keykey_entry,
1975 	om_text_print_numkey_entry,
1976 	om_text_print_numkeybar_entry,
1977 	om_text_print_numkeycomparativebar_entry,
1978 	om_text_print_bidimentional_map,
1979 	om_text_print_hline,
1980 	om_text_print_credits,
1981 	om_text_print_report_link,
1982 };
1983 
1984 /* ---------------------------- html output module -------------------------- */
1985 /* Use html entities for special chars. Abbreviates at 'maxlen' if needed. */
om_html_entities_abbr(FILE * fp,char * s,int maxlen)1986 void om_html_entities_abbr(FILE *fp, char *s, int maxlen)
1987 {
1988 	while(*s) {
1989 		if (maxlen-- == 0) {
1990 			fprintf(fp, "...");
1991 			break;
1992 		}
1993 		switch(*s) {
1994 		case '\'': fprintf(fp, "&#39;"); break;
1995 		case '"': fprintf(fp, "&#34;"); break;
1996 		case '&': fprintf(fp, "&amp;"); break;
1997 		case '<': fprintf(fp, "&lt;"); break;
1998 		case '>': fprintf(fp, "&gt;"); break;
1999 		default: fputc(*s, fp); break;
2000 		}
2001 		s++;
2002 	}
2003 }
2004 
2005 /* A wrapper to om_html_entities_abbr() with a fixed abbreviation length */
om_html_entities(FILE * fp,char * s)2006 void om_html_entities(FILE *fp, char *s)
2007 {
2008 	om_html_entities_abbr(fp, s, VI_HTML_ABBR_LEN);
2009 }
2010 
om_html_print_header(FILE * fp)2011 void om_html_print_header(FILE *fp)
2012 {
2013 	fprintf(fp,
2014 "<html>\n"
2015 "<head>\n"
2016 "<style>\n"
2017 "BODY, TD, B, LI, U, DIV, SPAN {\n"
2018 "	background-color: #ffffff;\n"
2019 "	color: #000000;\n"
2020 "	font-family: Verdana, Arial, Helvetica, Sans-Serif;\n"
2021 "	font-size: 10px;\n"
2022 "}\n"
2023 "A {\n"
2024 "	color: #0066ff;\n"
2025 "	text-decoration: none;\n"
2026 "}\n"
2027 "A:visited {\n"
2028 "	color: #000099;\n"
2029 "	text-decoration: none;\n"
2030 "}\n"
2031 "A:active {\n"
2032 "	color: #26a0be;\n"
2033 "	text-decoration: none;\n"
2034 "}\n"
2035 "A:hover {\n"
2036 "	color: #ffffff;\n"
2037 "	text-decoration: none;\n"
2038 "	background-color: #26a0be;\n"
2039 "}\n"
2040 ".barfill {\n"
2041 "	background-color: #96ef94;\n"
2042 "	border-left: 1px;\n"
2043 "	border-right: 1px;\n"
2044 "	border-top: 1px;\n"
2045 "	border-bottom: 1px;\n"
2046 "	border-color: #4c934a;\n"
2047 "	border-style: solid;\n"
2048 "	font-size: 10px;\n"
2049 "	height: 3px;\n"
2050 "	line-height: 4px;\n"
2051 "}\n"
2052 ".barempty {\n"
2053 "	font-size: 10px;\n"
2054 "	line-height: 4px;\n"
2055 "}\n"
2056 ".barleft {\n"
2057 "	background-color: #ff9696;\n"
2058 "	border-left: 1px;\n"
2059 "	border-right: 1px;\n"
2060 "	border-top: 1px;\n"
2061 "	border-bottom: 1px;\n"
2062 "	border-color: #4c934a;\n"
2063 "	border-style: solid;\n"
2064 "	font-size: 10px;\n"
2065 "	height: 3px;\n"
2066 "	line-height: 4px;\n"
2067 "}\n"
2068 ".barright {\n"
2069 "	background-color: #f8f8f8;\n"
2070 "	border-left: 0px;\n"
2071 "	border-right: 1px;\n"
2072 "	border-top: 1px;\n"
2073 "	border-bottom: 1px;\n"
2074 "	border-color: #4c934a;\n"
2075 "	border-style: solid;\n"
2076 "	font-size: 10px;\n"
2077 "	height: 3px;\n"
2078 "	line-height: 4px;\n"
2079 "}\n"
2080 ".title {\n"
2081 "	background-color: #007f9e;\n"
2082 "	font-size: 12px;\n"
2083 "	font-weight: bold;\n"
2084 "	padding: 3px;\n"
2085 "	color: #ffffff;\n"
2086 "}\n"
2087 ".reportlink {\n"
2088 "	background-color: #ffffff;\n"
2089 "	font-size: 12px;\n"
2090 "	font-weight: bold;\n"
2091 "	color: #000000;\n"
2092 "	padding-left: 3px;\n"
2093 "}\n"
2094 ".subtitle {\n"
2095 "	background-color: #007f9e;\n"
2096 "	font-size: 12px;\n"
2097 "	font-weight: normal;\n"
2098 "	padding: 3px;\n"
2099 "	color: #ffffff;\n"
2100 "}\n"
2101 ".info {\n"
2102 "	background-color: #badfee;\n"
2103 "	font-size: 12px;\n"
2104 "	padding-left: 3px;\n"
2105 "	padding-right: 3px;\n"
2106 "}\n"
2107 ".keyentry {\n"
2108 "	font-size: 10px;\n"
2109 "	padding-left: 2px;\n"
2110 "	border-bottom: 1px dashed #bcbcbc;\n"
2111 "}\n"
2112 ".keyentrywe {\n"
2113 "	background-color: #f0f090;\n"
2114 "	font-size: 10px;\n"
2115 "	padding-left: 2px;\n"
2116 "	border-bottom: 1px dashed #bcbcbc;\n"
2117 "}\n"
2118 ".valueentry {\n"
2119 "	font-size: 10px;\n"
2120 "	padding-left: 2px;\n"
2121 "	color: #905d14;\n"
2122 "	border-bottom: 1px dashed #f6c074;\n"
2123 "}\n"
2124 ".credits {\n"
2125 "	font-size: 12px;\n"
2126 "	font-weight: bold;\n"
2127 "}\n"
2128 ".maintable {\n"
2129 "	border-style: solid;\n"
2130 "	border-color: #0b4b5b;\n"
2131 "	border-width: 1px;\n"
2132 "}\n"
2133 "</style>\n"
2134 "</head>\n"
2135 "<body><table border=\"0\" cellpadding=\"0\" cellspacing=\"0\" class=\"maintable\">\n"
2136 	);
2137 }
2138 
om_html_print_footer(FILE * fp)2139 void om_html_print_footer(FILE *fp)
2140 {
2141 	fprintf(fp, "</table></body></html>\n");
2142 }
2143 
om_html_print_title(FILE * fp,char * title)2144 void om_html_print_title(FILE *fp, char *title)
2145 {
2146 	fprintf(fp, "<tr><td align=\"center\" class=\"title\" colspan=\"3\"><a name=\"%s\"></a>", title);
2147 	om_html_entities(fp, title);
2148 	fprintf(fp, "</td></tr>\n");
2149 }
2150 
om_html_print_subtitle(FILE * fp,char * subtitle)2151 void om_html_print_subtitle(FILE *fp, char *subtitle)
2152 {
2153 	fprintf(fp, "<tr><td align=\"center\" class=\"subtitle\" colspan=\"3\">");
2154 	om_html_entities(fp, subtitle);
2155 	fprintf(fp, "</td></tr>\n");
2156 }
2157 
om_html_print_numkey_info(FILE * fp,char * key,int val)2158 void om_html_print_numkey_info(FILE *fp, char *key, int val)
2159 {
2160 	fprintf(fp, "<tr><td align=\"left\" colspan=\"3\" class=\"info\">");
2161 	om_html_entities(fp, key);
2162 	fprintf(fp, " %d", val);
2163 	fprintf(fp, "</td></tr>\n");
2164 }
2165 
om_html_print_keykey_entry(FILE * fp,char * key1,char * key2,int num)2166 void om_html_print_keykey_entry(FILE *fp, char *key1, char *key2, int num)
2167 {
2168 	fprintf(fp, "<tr><td align=\"left\" class=\"keyentry\">");
2169 	fprintf(fp, "%d)", num);
2170 	fprintf(fp, "<td align=\"left\" class=\"valueentry\">");
2171 	om_html_entities(fp, key1);
2172 	fprintf(fp, "</td><td align=\"left\" class=\"keyentry\">");
2173 	if (!strncmp(key2, "http://", 7)) {
2174 		fprintf(fp, "<a class=\"url\" href=\"%s\">", key2);
2175 		om_html_entities(fp, key2);
2176 		fprintf(fp, "</a>");
2177 	} else {
2178 		om_html_entities(fp, key2);
2179 	}
2180 	fprintf(fp, "</td></tr>\n");
2181 }
2182 
om_html_print_numkey_entry(FILE * fp,char * key,int val,char * link,int num)2183 void om_html_print_numkey_entry(FILE *fp, char *key, int val, char *link,
2184 		int num)
2185 {
2186 	fprintf(fp, "<tr><td align=\"left\" class=\"keyentry\">");
2187 	fprintf(fp, "%d)", num);
2188 	fprintf(fp, "<td align=\"left\" class=\"valueentry\">");
2189 	fprintf(fp, "%d", val);
2190 	fprintf(fp, "</td><td align=\"left\" class=\"keyentry\">");
2191 	if (link != NULL) {
2192 		fprintf(fp, "<a class=\"url\" href=\"%s\">", link);
2193 		om_html_entities(fp, key);
2194 		fprintf(fp, "</a>");
2195 	} else if (!strncmp(key, "http://", 7)) {
2196 		fprintf(fp, "<a class=\"url\" href=\"%s\">", key);
2197 		om_html_entities(fp, key);
2198 		fprintf(fp, "</a>");
2199 	} else {
2200 		om_html_entities(fp, key);
2201 	}
2202 	fprintf(fp, "</td></tr>\n");
2203 }
2204 
om_html_print_bar(FILE * fp,int l,char * leftclass,char * rightclass)2205 void om_html_print_bar(FILE *fp, int l, char *leftclass, char *rightclass)
2206 {
2207 	fprintf(fp, "<table cellpadding=\"0\" cellspacing=\"0\" width=\"400\" border=\"0\">\n");
2208 	fprintf(fp, "<tr><td align=\"center\" class=\"%s\" width=\"%d%%\">%s</td>\n", leftclass, l, l ? "&nbsp;" : "");
2209 	fprintf(fp, "<td align=\"center\" class=\"%s\" width=\"%d%%\">%s</td></tr>\n", rightclass, 100-l, (l!=100) ? "&nbsp;" : "");
2210 	fprintf(fp, "</table>\n");
2211 }
2212 
om_html_print_numkeybar_entry(FILE * fp,char * key,int max,int tot,int this)2213 void om_html_print_numkeybar_entry(FILE *fp, char *key, int max, int tot, int this)
2214 {
2215 	int l, weekend;
2216 	float p;
2217 
2218 	if (tot == 0) tot++;
2219 	if (max == 0) max++;
2220 	l = ((float)(100*this))/max;
2221 	p = ((float)(100*this))/tot;
2222 	weekend = vi_is_weekend(key);
2223 
2224 	if (weekend)
2225 		fprintf(fp, "<tr><td align=\"left\" class=\"keyentrywe\">");
2226 	else
2227 		fprintf(fp, "<tr><td align=\"left\" class=\"keyentry\">");
2228 	om_html_entities(fp, key);
2229 	fprintf(fp, "&nbsp;&nbsp;&nbsp;</td><td align=\"left\" class=\"valueentry\">");
2230 	fprintf(fp, "%d (%02.1f%%)", this, p);
2231 	fprintf(fp, "</td><td align=\"left\" class=\"bar\">");
2232 	om_html_print_bar(fp, l, "barfill", "barempty");
2233 	fprintf(fp, "</td></tr>\n");
2234 }
2235 
om_html_print_numkeycomparativebar_entry(FILE * fp,char * key,int tot,int this)2236 void om_html_print_numkeycomparativebar_entry(FILE *fp, char *key, int tot, int this)
2237 {
2238 	int l, weekend;
2239 	float p;
2240 
2241 	if (tot == 0) tot++;
2242 	p = ((float)(100*this))/tot;
2243 	l = (int) p;
2244 	weekend = vi_is_weekend(key);
2245 
2246 	if (weekend)
2247 		fprintf(fp, "<tr><td align=\"left\" class=\"keyentrywe\">");
2248 	else
2249 		fprintf(fp, "<tr><td align=\"left\" class=\"keyentry\">");
2250 	om_html_entities(fp, key);
2251 	fprintf(fp, "&nbsp;&nbsp;&nbsp;</td><td align=\"left\" class=\"valueentry\">");
2252 	fprintf(fp, "%d (%02.1f%%)", this, p);
2253 	fprintf(fp, "</td><td align=\"left\" class=\"bar\">");
2254 	om_html_print_bar(fp, l, "barleft", "barright");
2255 	fprintf(fp, "</td></tr>\n");
2256 }
2257 
om_html_print_bidimentional_map(FILE * fp,int xlen,int ylen,char ** xlabel,char ** ylabel,int * value)2258 void om_html_print_bidimentional_map(FILE *fp, int xlen, int ylen,
2259 			char **xlabel, char **ylabel, int *value)
2260 {
2261 	int x, y, l, max = 0;
2262 
2263 	/* Get the max value */
2264 	l = xlen*ylen;
2265 	for (x = 0; x < l; x++)
2266 		if (max < value[x])
2267 			max = value[x];
2268 	if (max == 0) max++; /* avoid division by zero */
2269 	/* print the map */
2270 	fprintf(fp, "<tr><td colspan=\"3\" align=\"center\">");
2271 	fprintf(fp, "<table border=\"0\" cellpadding=\"0\" cellspacing=\"0\">");
2272 	for (y = 0; y < ylen; y++) {
2273 		fprintf(fp, "<tr>");
2274 		fprintf(fp, "<td class=\"valueentry\">%s</td>", ylabel[y]);
2275 		for (x = 0; x < xlen; x++) {
2276 			int r, g, b;
2277 			int val = value[(y*xlen)+x];
2278 
2279 			r = (0xAA*val)/max;
2280 			g = (0xBB*val)/max;
2281 			b = (0xFF*val)/max;
2282 			fprintf(fp, "<td style=\"background-color: #%02X%02X%02X;\">&nbsp;</td>\n", r, g, b);
2283 		}
2284 		fprintf(fp, "</tr>\n");
2285 	}
2286 	fprintf(fp, "<tr><td>&nbsp;</td>");
2287 	for (x = 0; x < xlen; x++) {
2288 		fprintf(fp, "<td class=\"keyentry\">%s</td>", xlabel[x]);
2289 	}
2290 	fprintf(fp, "</tr></table></td></tr>");
2291 }
2292 
om_html_print_hline(FILE * fp)2293 void om_html_print_hline(FILE *fp)
2294 {
2295 	fprintf(fp, "<tr><td colspan=\"3\">&nbsp;</td></tr>");
2296 }
2297 
om_html_print_credits(FILE * fp)2298 void om_html_print_credits(FILE *fp)
2299 {
2300 	fprintf(fp, "<tr><td colspan=\"3\" align=\"center\" class=\"credits\">Statistics generated with <a href=\"http://www.hping.org/visitors\">VISITORS Web Log Analyzer</a> version %s\n</td></tr>", VI_VERSION_STR);
2301 }
2302 
om_html_print_report_link(FILE * fp,char * report)2303 void om_html_print_report_link(FILE *fp, char *report)
2304 {
2305 	fprintf(fp, "<tr><td align=\"left\" class=\"reportlink\" colspan=\"3\"><a href=\"#%s\">", report);
2306 	om_html_entities(fp, report);
2307 	fprintf(fp, "</a></td></tr>\n");
2308 	return;
2309 }
2310 
2311 struct outputmodule OutputModuleHtml = {
2312 	om_html_print_header,
2313 	om_html_print_footer,
2314 	om_html_print_title,
2315 	om_html_print_subtitle,
2316 	om_html_print_numkey_info,
2317 	om_html_print_keykey_entry,
2318 	om_html_print_numkey_entry,
2319 	om_html_print_numkeybar_entry,
2320 	om_html_print_numkeycomparativebar_entry,
2321 	om_html_print_bidimentional_map,
2322 	om_html_print_hline,
2323 	om_html_print_credits,
2324 	om_html_print_report_link,
2325 };
2326 
2327 
2328 /* ---------------------------------- output -------------------------------- */
vi_print_statistics(struct vih * vih)2329 void vi_print_statistics(struct vih *vih)
2330 {
2331 	time_t elapsed = vih->endt - vih->startt;
2332 
2333 	if (elapsed == 0) elapsed++;
2334 	fprintf(stderr, "--\n%d lines processed in %ld seconds\n"
2335 	       "%d invalid lines, %d blacklisted referers\n",
2336 			vih->processed, (long) elapsed,
2337 			vih->invalid, vih->blacklisted);
2338 }
2339 
vi_print_hours_report(FILE * fp,struct vih * vih)2340 void vi_print_hours_report(FILE *fp, struct vih *vih)
2341 {
2342 	int i, max = 0, tot = 0;
2343 	for (i = 0; i < 24; i++) {
2344 		if (vih->hour[i] > max)
2345 			max = vih->hour[i];
2346 		tot += vih->hour[i];
2347 	}
2348 	Output->print_title(fp, "Hours distribution");
2349 	Output->print_subtitle(fp, "Percentage of hits in every hour of the day");
2350 	for (i = 0; i < 24; i++) {
2351 		char buf[8];
2352 		sprintf(buf, "%02d", i);
2353 		Output->print_numkeybar_entry(fp, buf, max, tot, vih->hour[i]);
2354 	}
2355 }
2356 
vi_print_weekdays_report(FILE * fp,struct vih * vih)2357 void vi_print_weekdays_report(FILE *fp, struct vih *vih)
2358 {
2359 	int i, max = 0, tot = 0;
2360 	for (i = 0; i < 7; i++) {
2361 		if (vih->weekday[i] > max)
2362 			max = vih->weekday[i];
2363 		tot += vih->weekday[i];
2364 	}
2365 	Output->print_title(fp, "Weekdays distribution");
2366 	Output->print_subtitle(fp, "Percentage of hits in every day of the week");
2367 	for (i = 0; i < 7; i++) {
2368 		Output->print_numkeybar_entry(fp, vi_wdname[i], max, tot, vih->weekday[i]);
2369 	}
2370 }
2371 
2372 /* Generic function for qsort(3) called to sort a table.
2373  * this function is actually only used by the following wrappers. */
qsort_cmp_dates_generic(const void * a,const void * b,int off,int mul)2374 int qsort_cmp_dates_generic(const void *a, const void *b, int off, int mul)
2375 {
2376 	time_t ta, tb;
2377 	void **A = (void**) a;
2378 	void **B = (void**) b;
2379 	char *dateA = (char*) *(A+off);
2380 	char *dateB = (char*) *(B+off);
2381 
2382 	ta = parse_date(dateA, NULL);
2383 	tb = parse_date(dateB, NULL);
2384 	if (ta == (time_t)-1 && tb == (time_t)-1) return 0;
2385 	if (ta == (time_t)-1) return 1*mul;
2386 	if (tb == (time_t)-1) return -1*mul;
2387 	if (ta > tb) return 1*mul;
2388 	if (ta < tb) return -1*mul;
2389 	return 0;
2390 }
2391 
2392 /* Compare dates in the log format: hashtable key part version */
qsort_cmp_dates_key(const void * a,const void * b)2393 int qsort_cmp_dates_key(const void *a, const void *b)
2394 {
2395 	return qsort_cmp_dates_generic(a, b, 0, 1);
2396 }
2397 
2398 /* Compare dates (only the month/year part) in the log format:
2399  * hashtable key part version */
qsort_cmp_months_key(const void * a,const void * b)2400 int qsort_cmp_months_key(const void *a, const void *b)
2401 {
2402 	int ret;
2403 	char dateA[VI_DATE_MAX];
2404 	char dateB[VI_DATE_MAX];
2405 	void *savedA, *savedB; /* backups of the original pointers */
2406 	void **A = (void**) a;
2407 	void **B = (void**) b;
2408 
2409 	/* We use an hack here, in order to call qsort_cmp_dates_generic
2410 	 * even in this case, we substitute the hashtable entries
2411 	 * with versions of the strings prefixed with "01", so they
2412 	 * will be parseble by parse_date().
2413 	 * In pratice for "May/2004" we instead put "01/May/2004" and so on. */
2414 	savedA = *A;
2415 	savedB = *B;
2416 	dateA[0] = dateB[0] = '0';
2417 	dateA[1] = dateB[1] = '1';
2418 	dateA[2] = dateB[2] = '/';
2419 	dateA[3] = dateB[3] = '\0';
2420 	vi_strlcat(dateA, (char*)*A, VI_DATE_MAX);
2421 	vi_strlcat(dateB, (char*)*B, VI_DATE_MAX);
2422 	*A = dateA;
2423 	*B = dateB;
2424 	ret = qsort_cmp_dates_generic(a, b, 0, 1);
2425 	/* Restore */
2426 	*A = savedA;
2427 	*B = savedB;
2428 	return ret;
2429 }
2430 
2431 /* Compare dates in the log format: hashtable value part version.
2432  * this sorts in reverse order, more recent dates first. */
qsort_cmp_dates_value(const void * a,const void * b)2433 int qsort_cmp_dates_value(const void *a, const void *b)
2434 {
2435 	return qsort_cmp_dates_generic(a, b, 1, -1);
2436 }
2437 
qsort_cmp_long_value(const void * a,const void * b)2438 int qsort_cmp_long_value(const void *a, const void *b)
2439 {
2440 	void **A = (void**) a;
2441 	void **B = (void**) b;
2442 	long la = (long) *(A+1);
2443 	long lb = (long) *(B+1);
2444 	if (la > lb) return -1;
2445 	if (lb > la) return 1;
2446 	return 0;
2447 }
2448 
qsort_cmp_time_value(const void * a,const void * b)2449 int qsort_cmp_time_value(const void *a, const void *b)
2450 {
2451 	void **A = (void**) a;
2452 	void **B = (void**) b;
2453 	time_t ta = (time_t) *(A+1);
2454 	time_t tb = (time_t) *(B+1);
2455 	if (ta > tb) return -1;
2456 	if (tb > ta) return 1;
2457 	return 0;
2458 }
2459 
vi_print_visits_report(FILE * fp,struct vih * vih)2460 void vi_print_visits_report(FILE *fp, struct vih *vih)
2461 {
2462 	int days = ht_used(&vih->date), i, tot = 0, max = 0;
2463 	int months;
2464 	void **table;
2465 
2466 	Output->print_title(fp, "Unique visitors in each day");
2467 	Output->print_subtitle(fp, "Multiple hits with the same IP, user agent and access day, are considered a single visit");
2468 	Output->print_numkey_info(fp, "Number of unique visitors",
2469 			ht_used(&vih->visitors));
2470 	Output->print_numkey_info(fp, "Different days in logfile",
2471 			ht_used(&vih->date));
2472 
2473 	if ((table = ht_get_array(&vih->date)) == NULL) {
2474 		fprintf(stderr, "Out Of Memory in print_visits_report()\n");
2475 		return;
2476 	}
2477 	qsort(table, days, sizeof(void*)*2, qsort_cmp_dates_key);
2478 	for (i = 0; i < days; i++) {
2479 		long value = (long) table[(i*2)+1];
2480 		if (value > max)
2481 			max = value;
2482 		tot += value;
2483 	}
2484 	for (i = 0; i < days; i++) {
2485 		char *key = table[i*2];
2486 		long value = (long) table[(i*2)+1];
2487 		Output->print_numkeybar_entry(fp, key, max, tot, value);
2488 	}
2489 	free(table);
2490         Output->print_hline(fp);
2491 
2492 	/* Montly */
2493 	if (Config_process_monthly_visitors == 0) return;
2494 	tot = max = 0;
2495 	months = ht_used(&vih->month);
2496 	Output->print_title(fp, "Unique visitors in each month");
2497 	Output->print_subtitle(fp, "Multiple hits with the same IP, user agent and access day, are considered a single visit");
2498 	Output->print_numkey_info(fp, "Number of unique visitors",
2499 			ht_used(&vih->visitors));
2500 	Output->print_numkey_info(fp, "Different months in logfile",
2501 			ht_used(&vih->month));
2502 
2503 	if ((table = ht_get_array(&vih->month)) == NULL) {
2504 		fprintf(stderr, "Out Of Memory in print_visits_report()\n");
2505 		return;
2506 	}
2507 	qsort(table, months, sizeof(void*)*2, qsort_cmp_months_key);
2508 	for (i = 0; i < months; i++) {
2509 		long value = (long) table[(i*2)+1];
2510 		if (value > max)
2511 			max = value;
2512 		tot += value;
2513 	}
2514 	for (i = 0; i < months; i++) {
2515 		char *key = table[i*2];
2516 		long value = (long) table[(i*2)+1];
2517 		Output->print_numkeybar_entry(fp, key, max, tot, value);
2518 	}
2519 	free(table);
2520 }
2521 
2522 /* A report to compare visits originating from google VS all the rest. */
vi_print_googlevisits_report(FILE * fp,struct vih * vih)2523 void vi_print_googlevisits_report(FILE *fp, struct vih *vih)
2524 {
2525 	int days = ht_used(&vih->date), i, months;
2526 	void **table;
2527 
2528 	Output->print_title(fp, "Unique visitors from Google in each day");
2529 	Output->print_subtitle(fp, "The red part of the bar expresses the percentage of visits originated from Google");
2530 	Output->print_numkey_info(fp, "Number of unique visitors",
2531 			ht_used(&vih->visitors));
2532 	Output->print_numkey_info(fp, "Number of unique visitors from google",
2533 			ht_used(&vih->googlevisitors));
2534 	Output->print_numkey_info(fp, "Different days in logfile",
2535 			ht_used(&vih->date));
2536 
2537 	if ((table = ht_get_array(&vih->date)) == NULL) {
2538 		fprintf(stderr, "Out Of Memory in print_visits_report()\n");
2539 		return;
2540 	}
2541 	qsort(table, days, sizeof(void*)*2, qsort_cmp_dates_key);
2542 	for (i = 0; i < days; i++) {
2543 		char *key = table[i*2];
2544 		long value = (long) table[(i*2)+1];
2545 		long googlevalue;
2546 
2547 		googlevalue = vi_counter_val(&vih->googledate, key);
2548 		Output->print_numkeycomparativebar_entry(fp, key, value, googlevalue);
2549 	}
2550 	free(table);
2551         Output->print_hline(fp);
2552 
2553 	/* Montly */
2554 	if (Config_process_monthly_visitors == 0) return;
2555 	months = ht_used(&vih->month);
2556 	Output->print_title(fp, "Unique visitors from Google in each month");
2557 	Output->print_subtitle(fp, "The red part of the bar expresses the percentage of visits originated from Google");
2558 	Output->print_numkey_info(fp, "Number of unique visitors",
2559 			ht_used(&vih->visitors));
2560 	Output->print_numkey_info(fp, "Number of unique visitors from google",
2561 			ht_used(&vih->googlevisitors));
2562 	Output->print_numkey_info(fp, "Different months in logfile",
2563 			ht_used(&vih->month));
2564 
2565 	if ((table = ht_get_array(&vih->month)) == NULL) {
2566 		fprintf(stderr, "Out Of Memory in print_visits_report()\n");
2567 		return;
2568 	}
2569 	qsort(table, months, sizeof(void*)*2, qsort_cmp_months_key);
2570 	for (i = 0; i < months; i++) {
2571 		char *key = table[i*2];
2572 		long value = (long) table[(i*2)+1];
2573 		long googlevalue;
2574 
2575 		googlevalue = vi_counter_val(&vih->googlemonth, key);
2576 		Output->print_numkeycomparativebar_entry(fp, key, value, googlevalue);
2577 	}
2578 	free(table);
2579 }
2580 
vi_print_generic_keyval_report(FILE * fp,char * title,char * subtitle,char * info,int maxlines,struct hashtable * ht,int (* compar)(const void *,const void *))2581 void vi_print_generic_keyval_report(FILE *fp, char *title, char *subtitle,
2582 		char *info, int maxlines,
2583 		struct hashtable *ht,
2584 		int(*compar)(const void *, const void *))
2585 {
2586 	int items = ht_used(ht), i;
2587 	void **table;
2588 
2589 	Output->print_title(fp, title);
2590 	Output->print_subtitle(fp, subtitle);
2591 	Output->print_numkey_info(fp, info, items);
2592 	if ((table = ht_get_array(ht)) == NULL) {
2593 		fprintf(stderr, "Out of memory in print_generic_report()\n");
2594 		return;
2595 	}
2596 	qsort(table, items, sizeof(void*)*2, compar);
2597 	for (i = 0; i < items; i++) {
2598 		char *key = table[i*2];
2599 		long value = (long) table[(i*2)+1];
2600 		if (i >= maxlines) break;
2601 		if (key[0] == '\0')
2602 			Output->print_numkey_entry(fp, "none", value, NULL,
2603 					i+1);
2604 		else
2605 			Output->print_numkey_entry(fp, key, value, NULL, i+1);
2606 	}
2607 	free(table);
2608 }
2609 
vi_print_generic_keyvalbar_report(FILE * fp,char * title,char * subtitle,char * info,int maxlines,struct hashtable * ht,int (* compar)(const void *,const void *))2610 void vi_print_generic_keyvalbar_report(FILE *fp, char *title, char *subtitle,
2611 		char *info, int maxlines,
2612 		struct hashtable *ht,
2613 		int(*compar)(const void *, const void *))
2614 {
2615 	int items = ht_used(ht), i, max = 0, tot = 0;
2616 	void **table;
2617 
2618 	Output->print_title(fp, title);
2619 	Output->print_subtitle(fp, subtitle);
2620 	Output->print_numkey_info(fp, info, items);
2621 	if ((table = ht_get_array(ht)) == NULL) {
2622 		fprintf(stderr, "Out of memory in print_generic_report()\n");
2623 		return;
2624 	}
2625 	qsort(table, items, sizeof(void*)*2, compar);
2626 	for (i = 0; i < items; i++) {
2627 		long value = (long) table[(i*2)+1];
2628 		tot += value;
2629 		if (value > max) max = value;
2630 	}
2631 	for (i = 0; i < items; i++) {
2632 		char *key = table[i*2];
2633 		long value = (long) table[(i*2)+1];
2634 		if (i >= maxlines) break;
2635 		if (key[0] == '\0')
2636 			Output->print_numkeybar_entry(fp, "none", max, tot, value);
2637 		else
2638 			Output->print_numkeybar_entry(fp, key, max, tot, value);
2639 	}
2640 	free(table);
2641 }
2642 
2643 /* This is similar to the generic key/val report, but
2644  * different enough to be better served by a specific function. */
vi_print_keyphrases_report(FILE * fp,char * title,char * subtitle,char * info,int maxlines,struct hashtable * ht,int (* compar)(const void *,const void *))2645 void vi_print_keyphrases_report(FILE *fp, char *title, char *subtitle,
2646 		char *info, int maxlines,
2647 		struct hashtable *ht,
2648 		int(*compar)(const void *, const void *))
2649 {
2650 	int items = ht_used(ht), i;
2651 	void **table;
2652 
2653 	Output->print_title(fp, title);
2654 	Output->print_subtitle(fp, subtitle);
2655 	Output->print_numkey_info(fp, info, items);
2656 	if ((table = ht_get_array(ht)) == NULL) {
2657 		fprintf(stderr, "Out of memory in print_keyphrases_report()\n");
2658 		return;
2659 	}
2660 	qsort(table, items, sizeof(void*)*2, compar);
2661 	for (i = 0; i < items; i++) {
2662 		char *key = table[i*2];
2663 		long value = (long) table[(i*2)+1];
2664 		if (i >= maxlines) break;
2665 		if (key[0] == '\0')
2666 			Output->print_numkey_entry(fp, "none", value, NULL,
2667 					i+1);
2668 		else {
2669 			char *p;
2670 			char link[VI_LINE_MAX];
2671 			char aux[VI_LINE_MAX];
2672 			char encodedkey[VI_LINE_MAX];
2673 
2674 			vi_strlcpy(link, "http://www.google.com/search?q=", VI_LINE_MAX);
2675 			vi_strlcpy(aux, key, VI_LINE_MAX);
2676 			p = strrchr(aux, '(');
2677 			if (p) {
2678 				if (p > aux) p--; /* seek the space on left */
2679 				*p = '\0';
2680 			}
2681 			vi_urlencode(encodedkey, aux, VI_LINE_MAX);
2682 			vi_strlcat(link, encodedkey, VI_LINE_MAX);
2683 			Output->print_numkey_entry(fp, key, value, link, i+1);
2684 		}
2685 	}
2686 	free(table);
2687 }
2688 
vi_print_referers_report(FILE * fp,struct vih * vih)2689 void vi_print_referers_report(FILE *fp, struct vih *vih)
2690 {
2691 	vi_print_generic_keyval_report(
2692 			fp,
2693 			"Referers",
2694 			"Referers ordered by visits (google excluded)",
2695 			"Different referers",
2696 			Config_max_referers,
2697 			&vih->referers,
2698 			qsort_cmp_long_value);
2699 }
2700 
vi_print_pages_report(FILE * fp,struct vih * vih)2701 void vi_print_pages_report(FILE *fp, struct vih *vih)
2702 {
2703 	vi_print_generic_keyval_report(
2704 			fp,
2705 			"Requested pages",
2706 			"Page requests ordered by hits",
2707 			"Different pages requested",
2708 			Config_max_pages,
2709 			&vih->pages,
2710 			qsort_cmp_long_value);
2711 }
2712 
vi_print_error404_report(FILE * fp,struct vih * vih)2713 void vi_print_error404_report(FILE *fp, struct vih *vih)
2714 {
2715 	vi_print_generic_keyval_report(
2716 			fp,
2717 			"404 Errors",
2718 			"Requests for missing documents",
2719 			"Different missing documents requested",
2720 			Config_max_error404,
2721 			&vih->error404,
2722 			qsort_cmp_long_value);
2723 }
2724 
vi_print_pageviews_report(FILE * fp,struct vih * vih)2725 void vi_print_pageviews_report(FILE *fp, struct vih *vih)
2726 {
2727 	vi_print_generic_keyvalbar_report(
2728 			fp,
2729 			"Pageviews per visit",
2730 			"Number of pages requested per visit",
2731 			"Only documents are counted (not images). Reported ranges:",
2732 			100,
2733 			&vih->pageviews_grouped,
2734 			qsort_cmp_long_value);
2735 }
2736 
vi_print_images_report(FILE * fp,struct vih * vih)2737 void vi_print_images_report(FILE *fp, struct vih *vih)
2738 {
2739 	vi_print_generic_keyval_report(
2740 			fp,
2741 			"Requested images and CSS",
2742 			"Images and CSS requests ordered by hits",
2743 			"Different images and CSS requested",
2744 			Config_max_images,
2745 			&vih->images,
2746 			qsort_cmp_long_value);
2747 }
2748 
vi_print_agents_report(FILE * fp,struct vih * vih)2749 void vi_print_agents_report(FILE *fp, struct vih *vih)
2750 {
2751 	vi_print_generic_keyval_report(
2752 			fp,
2753 			"User agents",
2754 			"The entire user agent string ordered by visits",
2755 			"Different agents",
2756 			Config_max_agents,
2757 			&vih->agents,
2758 			qsort_cmp_long_value);
2759 }
2760 
vi_print_os_report(FILE * fp,struct vih * vih)2761 void vi_print_os_report(FILE *fp, struct vih *vih)
2762 {
2763 	vi_print_generic_keyvalbar_report(
2764 			fp,
2765 			"Operating Systems",
2766 			"Operating Systems by visits",
2767 			"Different operating systems listed",
2768 			100,
2769 			&vih->os,
2770 			qsort_cmp_long_value);
2771 }
2772 
vi_print_browsers_report(FILE * fp,struct vih * vih)2773 void vi_print_browsers_report(FILE *fp, struct vih *vih)
2774 {
2775 	vi_print_generic_keyvalbar_report(
2776 			fp,
2777 			"Browsers",
2778 			"Browsers used by visits",
2779 			"Different browsers listed",
2780 			100,
2781 			&vih->browsers,
2782 			qsort_cmp_long_value);
2783 }
2784 
vi_print_trails_report(FILE * fp,struct vih * vih)2785 void vi_print_trails_report(FILE *fp, struct vih *vih)
2786 {
2787 	vi_print_generic_keyval_report(
2788 			fp,
2789 			"Web trails",
2790 			"Referer -> Target common moves",
2791 			"Total number of trails",
2792 			Config_max_trails,
2793 			&vih->trails,
2794 			qsort_cmp_long_value);
2795 }
2796 
vi_print_google_keyphrases_report(FILE * fp,struct vih * vih)2797 void vi_print_google_keyphrases_report(FILE *fp, struct vih *vih)
2798 {
2799 	vi_print_keyphrases_report(
2800 			fp,
2801 			"Google Keyphrases",
2802 			"Keyphrases used in google searches ordered by visits",
2803 			"Total number of keyphrases",
2804 			Config_max_google_keyphrases,
2805 			&vih->googlekeyphrases,
2806 			qsort_cmp_long_value);
2807 }
2808 
vi_print_tld_report(FILE * fp,struct vih * vih)2809 void vi_print_tld_report(FILE *fp, struct vih *vih)
2810 {
2811 	vi_print_generic_keyvalbar_report(
2812 			fp,
2813 			"Domains",
2814 			"Top Level Domains sorted by visits",
2815 			"Total number of Top Level Domains",
2816 			Config_max_tld,
2817 			&vih->tld,
2818 			qsort_cmp_long_value);
2819 }
2820 
vi_print_robots_report(FILE * fp,struct vih * vih)2821 void vi_print_robots_report(FILE *fp, struct vih *vih)
2822 {
2823 	vi_print_generic_keyval_report(
2824 			fp,
2825 			"Robots and web spiders",
2826 			"Agents requesting robots.txt. MSIECrawler excluded.",
2827 			"Total number of different robots",
2828 			Config_max_robots,
2829 			&vih->robots,
2830 			qsort_cmp_long_value);
2831 }
2832 
2833 /* Print a generic report where the two report items are strings
2834  * (usually url and date). Used to print the 'googled' and 'referers age'
2835  * reports. */
vi_print_generic_keytime_report(FILE * fp,char * title,char * subtitle,char * info,int maxlines,struct hashtable * ht,int (* compar)(const void *,const void *))2836 void vi_print_generic_keytime_report(FILE *fp, char *title, char *subtitle,
2837 		char *info, int maxlines,
2838 		struct hashtable *ht,
2839 		int(*compar)(const void *, const void *))
2840 {
2841 	int items = ht_used(ht), i;
2842 	void **table;
2843 
2844 	Output->print_title(fp, title);
2845 	Output->print_subtitle(fp, subtitle);
2846 	Output->print_numkey_info(fp, info, items);
2847 	if ((table = ht_get_array(ht)) == NULL) {
2848 		fprintf(stderr, "Out Of Memory in print_generic_keytime_report()\n");
2849 		return;
2850 	}
2851 	qsort(table, items, sizeof(void*)*2, compar);
2852 	for (i = 0; i < items; i++) {
2853 		struct tm *tm;
2854 		char ftime[1024];
2855 		char *url = table[i*2];
2856 		time_t time = (time_t) table[(i*2)+1];
2857 		if (i >= maxlines) break;
2858 		tm = localtime(&time);
2859 		if (tm) {
2860 			ftime[0] = '\0';
2861 			strftime(ftime, 1024, "%d/%b/%Y", tm);
2862 			Output->print_keykey_entry(fp, ftime,
2863 					(url[0] == '\0') ? "none" : url, i+1);
2864 		}
2865 	}
2866 	free(table);
2867 }
2868 
vi_print_googled_report(FILE * fp,struct vih * vih)2869 void vi_print_googled_report(FILE *fp, struct vih *vih)
2870 {
2871 	vi_print_generic_keytime_report(
2872 			fp,
2873 			"Googled pages",
2874 			"Pages accessed by the Google crawler, last access reported",
2875 			"Number of pages googled",
2876 			Config_max_googled,
2877 			&vih->googled,
2878 			qsort_cmp_time_value);
2879 }
2880 
vi_print_adsensed_report(FILE * fp,struct vih * vih)2881 void vi_print_adsensed_report(FILE *fp, struct vih *vih)
2882 {
2883 	vi_print_generic_keytime_report(
2884 			fp,
2885 			"Adsensed pages",
2886 			"Pages accessed by the Adsense crawler, last access reported",
2887 			"Number of pages adsensed",
2888 			Config_max_adsensed,
2889 			&vih->adsensed,
2890 			qsort_cmp_time_value);
2891 }
2892 
vi_print_referers_age_report(FILE * fp,struct vih * vih)2893 void vi_print_referers_age_report(FILE *fp, struct vih *vih)
2894 {
2895 	vi_print_generic_keytime_report(
2896 			fp,
2897 			"Referers by first time",
2898 			"Referers ordered by first time date, newer on top (referers from google excluded)",
2899 			"Different referers",
2900 			Config_max_referers_age,
2901 			&vih->referersage,
2902 			qsort_cmp_time_value);
2903 }
2904 
vi_print_google_keyphrases_age_report(FILE * fp,struct vih * vih)2905 void vi_print_google_keyphrases_age_report(FILE *fp, struct vih *vih)
2906 {
2907 	vi_print_generic_keytime_report(
2908 			fp,
2909 			"Google Keyphrases by first time",
2910 			"Keyphrases ordered by first time date, newer on top",
2911 			"Different referers",
2912 			Config_max_google_keyphrases_age,
2913 			&vih->googlekeyphrasesage,
2914 			qsort_cmp_time_value);
2915 }
2916 
vi_print_google_human_language_report(FILE * fp,struct vih * vih)2917 void vi_print_google_human_language_report(FILE *fp, struct vih *vih)
2918 {
2919 	vi_print_generic_keyval_report(
2920 			fp,
2921 			"Google Human Language",
2922 			"The 'hl' field in the query string of google searches",
2923 			"Different human languages",
2924 			1000,
2925 			&vih->googlehumanlanguage,
2926 			qsort_cmp_long_value);
2927 }
2928 
vi_print_screen_res_report(FILE * fp,struct vih * vih)2929 void vi_print_screen_res_report(FILE *fp, struct vih *vih) {
2930 	vi_print_generic_keyval_report(
2931 			fp,
2932 			"Screen resolution",
2933 			"user screen width x height resolution",
2934 			"Different resolutions",
2935 			1000,
2936 			&vih->screenres,
2937 			qsort_cmp_long_value);
2938 }
2939 
vi_print_screen_depth_report(FILE * fp,struct vih * vih)2940 void vi_print_screen_depth_report(FILE *fp, struct vih *vih) {
2941 	vi_print_generic_keyval_report(
2942 			fp,
2943 			"Screen color depth",
2944 			"user screen color depth in bits per pixel",
2945 			"Different color depths",
2946 			1000,
2947 			&vih->screendepth,
2948 			qsort_cmp_long_value);
2949 }
2950 
vi_print_information_report(FILE * fp,struct vih * vih)2951 void vi_print_information_report(FILE *fp, struct vih *vih)
2952 {
2953 	char buf[VI_LINE_MAX];
2954 	time_t now = time(NULL);
2955 	snprintf(buf, VI_LINE_MAX, "Generated: %s", ctime(&now));
2956 	Output->print_title(fp, "General information");
2957 	Output->print_subtitle(fp, "Information about analyzed log files");
2958 	Output->print_subtitle(fp, buf);
2959 	Output->print_numkey_info(fp, "Number of entries processed", vih->processed);
2960 	Output->print_numkey_info(fp, "Number of invalid entries", vih->invalid);
2961 	Output->print_numkey_info(fp, "Processing time in seconds", (vih->endt)-(vih->startt));
2962 }
2963 
vi_print_report_links(FILE * fp)2964 void vi_print_report_links(FILE *fp)
2965 {
2966 	void *l[] = {
2967 	"Unique visitors in each day", NULL,
2968 	"Unique visitors in each month", &Config_process_monthly_visitors,
2969 	"Unique visitors from Google in each day", NULL,
2970 	"Unique visitors from Google in each month", &Config_process_monthly_visitors,
2971 	"Pageviews per visit", &Config_process_pageviews,
2972 	"Weekday-Hour combined map", &Config_process_weekdayhour_map,
2973 	"Month-Day combined map", &Config_process_monthday_map,
2974 	"Requested pages", NULL,
2975 	"Requested images and CSS", NULL,
2976 	"Referers", NULL,
2977 	"Referers by first time", &Config_process_referers_age,
2978 	"Robots and web spiders", &Config_process_robots,
2979 	"User agents", &Config_process_agents,
2980 	"Operating Systems", &Config_process_os,
2981 	"Browsers", &Config_process_browsers,
2982 	"404 Errors", &Config_process_error404,
2983 	"Domains", &Config_process_tld,
2984 	"Googled pages", &Config_process_google,
2985 	"Adsensed pages", &Config_process_google,
2986 	"Google Keyphrases", &Config_process_google_keyphrases,
2987 	"Google Keyphrases by first time", &Config_process_google_keyphrases_age,
2988 	"Google Human Language", &Config_process_google_human_language,
2989         "Screen resolution", &Config_process_screen_info,
2990         "Screen color depth", &Config_process_screen_info,
2991 	"Web trails", &Config_process_web_trails,
2992 	"Weekday distribution", NULL,
2993 	"Hours distribution", NULL,
2994 	};
2995 	unsigned int i, num = 0;
2996 
2997 	Output->print_title(fp, "Generated reports");
2998 	Output->print_subtitle(fp, "Click on the report name you want to see");
2999 	for (i = 0; i < sizeof(l)/sizeof(void*); i += 2) {
3000 		int active = l[i+1] == NULL ? 1 : *((int*)l[i+1]);
3001 		if (active) num++;
3002 	}
3003 	Output->print_numkey_info(fp, "Number of reports generated", num);
3004 	for (i = 0; i < sizeof(l)/sizeof(void*); i += 2) {
3005 		int active = l[i+1] == NULL ? 1 : *((int*)l[i+1]);
3006 		if (active)
3007 			Output->print_report_link(fp, (char*)l[i]);
3008 	}
3009 }
3010 
vi_print_weekdayhour_map_report(FILE * fp,struct vih * vih)3011 void vi_print_weekdayhour_map_report(FILE *fp, struct vih *vih)
3012 {
3013 	char *xlabel[24] = {
3014 		"00", "01", "02", "03", "04", "05", "06", "07",
3015 		"08", "09", "10", "11", "12", "13", "14", "15",
3016 		"16", "17", "18", "19", "20", "21", "22", "23"};
3017 	char **ylabel = vi_wdname;
3018 	int j, minj = 0, maxj = 0;
3019 	int *hw = (int*) vih->weekdayhour;
3020 	char buf[VI_LINE_MAX];
3021 
3022 	/* Check idexes of minimum and maximum in the array. */
3023 	for (j = 0; j < 24*7; j++) {
3024 		if (hw[j] > hw[maxj])
3025 			maxj = j;
3026 		if (hw[j] < hw[minj])
3027 			minj = j;
3028 	}
3029 
3030 	Output->print_title(fp, "Weekday-Hour combined map");
3031 	Output->print_subtitle(fp, "Brighter means higher level of hits");
3032 	snprintf(buf, VI_LINE_MAX, "Hour with max traffic starting at %s %s:00 with hits",
3033 			ylabel[maxj/24], xlabel[maxj%24]);
3034 	Output->print_numkey_info(fp, buf, hw[maxj]);
3035 	snprintf(buf, VI_LINE_MAX, "Hour with min traffic starting at %s %s:00 with hits",
3036 			ylabel[minj/24], xlabel[minj%24]);
3037 	Output->print_numkey_info(fp, buf, hw[minj]);
3038 	Output->print_hline(fp);
3039 	Output->print_bidimentional_map(fp, 24, 7, xlabel, ylabel, hw);
3040 }
3041 
vi_print_monthday_map_report(FILE * fp,struct vih * vih)3042 void vi_print_monthday_map_report(FILE *fp, struct vih *vih)
3043 {
3044 	char *xlabel[31] = {
3045 		"01", "02", "03", "04", "05", "06", "07", "08",
3046 		"09", "10", "11", "12", "13", "14", "15", "16",
3047 		"17", "18", "19", "20", "21", "22", "23", "24",
3048 		"25", "26", "27", "28", "29", "30", "31"};
3049 	char *ylabel[12] = {
3050 		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
3051 		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3052 	};
3053 	int j, minj = 0, maxj = 0;
3054 	int *md = (int*) vih->monthday;
3055 	char buf[VI_LINE_MAX];
3056 
3057 	/* Check idexes of minimum and maximum in the array. */
3058 	for (j = 0; j < 12*31; j++) {
3059 		if (md[j] > md[maxj])
3060 			maxj = j;
3061 		if (md[j] != 0 && (md[j] < md[minj] || md[minj] == 0))
3062 			minj = j;
3063 	}
3064 
3065 	Output->print_title(fp, "Month-Day combined map");
3066 	Output->print_subtitle(fp, "Brighter means higher level of hits");
3067 	snprintf(buf, VI_LINE_MAX, "Day with max traffic is %s %s with hits",
3068 			ylabel[maxj/31], xlabel[maxj%31]);
3069 	Output->print_numkey_info(fp, buf, md[maxj]);
3070 	snprintf(buf, VI_LINE_MAX, "Day with min traffic is %s %s with hits",
3071 			ylabel[minj/31], xlabel[minj%31]);
3072 	Output->print_numkey_info(fp, buf, md[minj]);
3073 	Output->print_hline(fp);
3074 	Output->print_bidimentional_map(fp, 31, 12, xlabel, ylabel, md);
3075 }
3076 
vi_print_hline(FILE * fp)3077 void vi_print_hline(FILE *fp)
3078 {
3079 	Output->print_hline(fp);
3080 }
3081 
vi_print_credits(FILE * fp)3082 void vi_print_credits(FILE *fp)
3083 {
3084 	Output->print_credits(fp);
3085 }
3086 
vi_print_header(FILE * fp)3087 void vi_print_header(FILE *fp)
3088 {
3089 	Output->print_header(fp);
3090 }
3091 
vi_print_footer(FILE * fp)3092 void vi_print_footer(FILE *fp)
3093 {
3094 	Output->print_footer(fp);
3095 }
3096 
3097 /* Generate the report writing it to the output file 'of'.
3098  * If op is NULL, output the report to standard output.
3099  * On success zero is returned. Otherwise the function returns
3100  * non-zero and set an error in the vih handler. */
vi_print_report(char * of,struct vih * vih)3101 int vi_print_report(char *of, struct vih *vih)
3102 {
3103 	FILE *fp;
3104 
3105 	if (of == NULL) {
3106 		fp = stdout;
3107 	} else {
3108 		fp = fopen(of, "w");
3109 		if (fp == NULL) {
3110 			vi_set_error(vih, "Writing the report to '%s': %s",
3111 					of, strerror(errno));
3112 			return 1;
3113 		}
3114 	}
3115 
3116         /* Disable specific reports when there is no data. */
3117         if (ht_used(&vih->screenres) == 0)
3118                 Config_process_screen_info = 0;
3119 	/* Do some data postprocessing needed to generate reports */
3120 	if (vi_postprocess(vih))
3121 		return 1;
3122 	/* Report generation */
3123 	vi_print_header(fp);
3124 	vi_print_credits(fp);
3125 	vi_print_hline(fp);
3126 	vi_print_information_report(fp, vih);
3127 	vi_print_hline(fp);
3128 	vi_print_report_links(fp);
3129 	vi_print_hline(fp);
3130 	vi_print_visits_report(fp, vih);
3131 	vi_print_hline(fp);
3132 	vi_print_googlevisits_report(fp, vih);
3133 	vi_print_hline(fp);
3134 	if (Config_process_weekdayhour_map) {
3135 		vi_print_weekdayhour_map_report(fp, vih);
3136 		vi_print_hline(fp);
3137 	}
3138 	if (Config_process_monthday_map) {
3139 		vi_print_monthday_map_report(fp, vih);
3140 		vi_print_hline(fp);
3141 	}
3142 	if (Config_process_pageviews) {
3143 		vi_print_pageviews_report(fp, vih);
3144 		vi_print_hline(fp);
3145 	}
3146 	vi_print_pages_report(fp, vih);
3147 	vi_print_hline(fp);
3148 	vi_print_images_report(fp, vih);
3149 	vi_print_hline(fp);
3150 	vi_print_referers_report(fp, vih);
3151 	vi_print_hline(fp);
3152 	if (Config_process_referers_age) {
3153 		vi_print_referers_age_report(fp, vih);
3154 		vi_print_hline(fp);
3155 	}
3156 	if (Config_process_robots) {
3157 		vi_print_robots_report(fp, vih);
3158 		vi_print_hline(fp);
3159 	}
3160 	if (Config_process_agents) {
3161 		vi_print_agents_report(fp, vih);
3162 		vi_print_hline(fp);
3163 	}
3164 	if (Config_process_os) {
3165 		vi_print_os_report(fp, vih);
3166 		vi_print_hline(fp);
3167 	}
3168 	if (Config_process_browsers) {
3169 		vi_print_browsers_report(fp, vih);
3170 		vi_print_hline(fp);
3171 	}
3172 	if (Config_process_error404) {
3173 		vi_print_error404_report(fp, vih);
3174 		vi_print_hline(fp);
3175 	}
3176 	if (Config_process_tld) {
3177 		vi_print_tld_report(fp, vih);
3178 		vi_print_hline(fp);
3179 	}
3180 	if (Config_process_google) {
3181 		vi_print_googled_report(fp, vih);
3182 		vi_print_hline(fp);
3183 		vi_print_adsensed_report(fp, vih);
3184 		vi_print_hline(fp);
3185 	}
3186 	if (Config_process_google_keyphrases) {
3187 		vi_print_google_keyphrases_report(fp, vih);
3188 		vi_print_hline(fp);
3189 	}
3190 	if (Config_process_google_keyphrases) {
3191 		vi_print_google_keyphrases_age_report(fp, vih);
3192 		vi_print_hline(fp);
3193 	}
3194         if (Config_process_google_human_language) {
3195 		vi_print_google_human_language_report(fp, vih);
3196 		vi_print_hline(fp);
3197         }
3198         if (Config_process_screen_info) {
3199                 vi_print_screen_res_report(fp, vih);
3200                 vi_print_hline(fp);
3201                 vi_print_screen_depth_report(fp, vih);
3202                 vi_print_hline(fp);
3203         }
3204 	if (Config_process_web_trails) {
3205 		vi_print_trails_report(fp, vih);
3206 		vi_print_hline(fp);
3207 	}
3208 	vi_print_weekdays_report(fp, vih);
3209 	vi_print_hline(fp);
3210 	vi_print_hours_report(fp, vih);
3211 	vi_print_hline(fp);
3212 	vi_print_credits(fp);
3213 	vi_print_hline(fp);
3214 	vi_print_footer(fp);
3215 	if (of != NULL)
3216 		fclose(fp);
3217 	return 0;
3218 }
3219 
3220 /* ------------------------- graphviz graph generation ---------------------- */
vi_print_graphviz(struct vih * vih)3221 void vi_print_graphviz(struct vih *vih)
3222 {
3223 	int items = ht_used(&vih->trails), i, max = 0, tot = 0;
3224 	void **table;
3225 
3226 	printf("digraph webtrails {\n");
3227 	printf("\tgraph [splines=true overlap=false rankdir=LR]\n");
3228 	printf("\tnode [color=lightblue2,style=\"filled\"]\n");
3229 	printf("\tedge [style=bold]\n");
3230 	if ((table = ht_get_array(&vih->trails)) == NULL) {
3231 		fprintf(stderr, "Out of memory in vi_print_graphviz()\n");
3232 		return;
3233 	}
3234 	qsort(table, items, sizeof(void*)*2, qsort_cmp_long_value);
3235 	for (i = 0; i < items; i++) {
3236 		long value = (long) table[(i*2)+1];
3237 		tot += value;
3238 		if (i > Config_max_trails) continue;
3239 		if (max < value)
3240 			max = value;
3241 	}
3242 	if (max == 0) max = 1; /* avoid division by zero */
3243 	if (tot == 0) tot = 1;
3244 	for (i = 0; i < items; i++) {
3245 		int color;
3246 		char *key = table[i*2];
3247 		char *t;
3248 		long value = (long) table[(i*2)+1];
3249 		float percentage = ((float)value/tot)*100;
3250 		if (i > Config_max_trails) break;
3251 		color = (value*255)/max;
3252 		t = strstr(key, " -> ");
3253 		*t = '\0'; /* alter */
3254 		printf("\t\"%s\" -> \"%s\" [color=\"#%02X00%02X\" label=\"%.2f\"]\n", key, t+4, color, 255-color, percentage);
3255 		*t = ' '; /* restore */
3256 	}
3257 	if (!Config_graphviz_ignorenode_google)
3258 		printf("\tGoogle [color=\"#c0ffc0\"]\n");
3259 	if (!Config_graphviz_ignorenode_external)
3260 		printf("\t\"External Link\" [color=\"#c0ffc0\"]\n");
3261 	if (!Config_graphviz_ignorenode_noreferer)
3262 		printf("\t\"No Referer\" [color=\"#c0ffc0\"]\n");
3263 	free(table);
3264 	printf("}\n");
3265 }
3266 
3267 /* -------------------------------- stream mode ----------------------------- */
vi_stream_mode(struct vih * vih)3268 void vi_stream_mode(struct vih *vih)
3269 {
3270 	time_t lastupdate_t, lastreset_t, now_t;
3271 
3272 	lastupdate_t = lastreset_t = time(NULL);
3273 	while(1) {
3274 		char buf[VI_LINE_MAX];
3275 
3276 		if (fgets(buf, VI_LINE_MAX, stdin) == NULL) {
3277 			vi_sleep(1);
3278 			continue;
3279 		}
3280 		if (vi_process_line(vih, buf)) {
3281 			fprintf(stderr, "%s\n", vi_get_error(vih));
3282 		}
3283 		now_t = time(NULL);
3284 		/* update */
3285 		if ((now_t - lastupdate_t) >= Config_update_every) {
3286 			lastupdate_t = now_t;
3287 			if (vi_print_report(Config_output_file, vih)) {
3288 				fprintf(stderr, "%s\n", vi_get_error(vih));
3289 			}
3290 		}
3291 		/* reset */
3292 		if (Config_reset_every &&
3293 		    ((now_t - lastreset_t) >= Config_reset_every))
3294 		{
3295 			lastreset_t = now_t;
3296 			vi_reset(vih);
3297 		}
3298 	}
3299 }
3300 
3301 /* ----------------------------------- main --------------------------------- */
3302 
3303 /* command line switche IDs */
3304 enum { OPT_MAXREFERERS, OPT_MAXPAGES, OPT_MAXIMAGES, OPT_USERAGENTS, OPT_ALL, OPT_MAXLINES, OPT_GOOGLE, OPT_MAXGOOGLED, OPT_MAXUSERAGENTS, OPT_OUTPUT, OPT_VERSION, OPT_HELP, OPT_PREFIX, OPT_TRAILS, OPT_GOOGLEKEYPHRASES, OPT_GOOGLEKEYPHRASESAGE, OPT_MAXGOOGLEKEYPHRASES, OPT_MAXGOOGLEKEYPHRASESAGE, OPT_MAXTRAILS, OPT_GRAPHVIZ, OPT_WEEKDAYHOUR_MAP, OPT_MONTHDAY_MAP, OPT_REFERERSAGE, OPT_MAXREFERERSAGE, OPT_TAIL, OPT_TLD, OPT_MAXTLD, OPT_STREAM, OPT_OUTPUTFILE, OPT_UPDATEEVERY, OPT_RESETEVERY, OPT_OS, OPT_BROWSERS, OPT_ERROR404, OPT_MAXERROR404, OPT_TIMEDELTA, OPT_PAGEVIEWS, OPT_ROBOTS, OPT_MAXROBOTS, OPT_GRAPHVIZ_ignorenode_GOOGLE, OPT_GRAPHVIZ_ignorenode_EXTERNAL, OPT_GRAPHVIZ_ignorenode_NOREFERER, OPT_GOOGLEHUMANLANGUAGE, OPT_FILTERSPAM, OPT_MAXADSENSED, OPT_GREP, OPT_EXCLUDE, OPT_IGNORE404, OPT_DEBUG, OPT_SCREENINFO};
3305 
3306 /* command line switches definition:
3307  * the rule with short options is to take upper case the
3308  * 'special' options (the option a normal user should not use) */
3309 static struct ago_optlist visitors_optlist[] = {
3310 	{ 'A',  "all",			OPT_ALL,		AGO_NOARG},
3311 	{ 'T',  "trails",		OPT_TRAILS,		AGO_NOARG},
3312 	{ 'G',	"google",		OPT_GOOGLE,		AGO_NOARG},
3313 	{ 'K',	"google-keyphrases",	OPT_GOOGLEKEYPHRASES,	AGO_NOARG},
3314 	{ 'Z',	"google-keyphrases-age", OPT_GOOGLEKEYPHRASESAGE, AGO_NOARG},
3315         { 'H',  "google-human-language", OPT_GOOGLEHUMANLANGUAGE, AGO_NOARG},
3316 	{ 'U',	"user-agents",		OPT_USERAGENTS,		AGO_NOARG},
3317 	{ 'W',  "weekday-hour-map",	OPT_WEEKDAYHOUR_MAP,	AGO_NOARG},
3318 	{ 'M',  "month-day-map",	OPT_MONTHDAY_MAP,	AGO_NOARG},
3319 	{ 'R',  "referers-age",		OPT_REFERERSAGE,	AGO_NOARG},
3320 	{ 'D',  "domains",		OPT_TLD,		AGO_NOARG},
3321 	{ 'O',  "operating-systems",	OPT_OS,			AGO_NOARG},
3322 	{ 'B',  "browsers",		OPT_BROWSERS,		AGO_NOARG},
3323 	{ 'X',  "error404",		OPT_ERROR404,		AGO_NOARG},
3324 	{ 'Y',  "pageviews",		OPT_PAGEVIEWS,		AGO_NOARG},
3325 	{ 'S',	"robots",		OPT_ROBOTS,		AGO_NOARG},
3326 	{ '\0',	"screen-info",		OPT_SCREENINFO,		AGO_NOARG},
3327 	{ '\0', "stream",		OPT_STREAM,		AGO_NOARG},
3328 	{ '\0', "update-every",		OPT_UPDATEEVERY,	AGO_NEEDARG},
3329 	{ '\0',	"reset-every",		OPT_RESETEVERY,		AGO_NEEDARG},
3330 	{ 'f',	"output-file",		OPT_OUTPUTFILE,		AGO_NEEDARG},
3331 	{ 'm',	"max-lines",		OPT_MAXLINES,		AGO_NEEDARG},
3332 	{ 'r',	"max-referers",		OPT_MAXREFERERS,	AGO_NEEDARG},
3333 	{ 'p',	"max-pages",		OPT_MAXPAGES,		AGO_NEEDARG},
3334 	{ 'i',	"max-images",		OPT_MAXIMAGES,		AGO_NEEDARG},
3335 	{ 'x',	"max-error404",		OPT_MAXERROR404,	AGO_NEEDARG},
3336 	{ 'u',	"max-useragents",	OPT_MAXUSERAGENTS,	AGO_NEEDARG},
3337 	{ 't',	"max-trails",		OPT_MAXTRAILS,		AGO_NEEDARG},
3338 	{ 'g',	"max-googled",		OPT_MAXGOOGLED,		AGO_NEEDARG},
3339 	{ '\0',	"max-adsensed",		OPT_MAXADSENSED,	AGO_NEEDARG},
3340 	{ 'k',	"max-google-keyphrases",OPT_MAXGOOGLEKEYPHRASES,AGO_NEEDARG},
3341 	{ 'z',	"max-google-keyphrases-age",OPT_MAXGOOGLEKEYPHRASESAGE,
3342 		AGO_NEEDARG},
3343 	{ 'a',	"max-referers-age",	OPT_MAXREFERERSAGE,	AGO_NEEDARG},
3344 	{ 'd',	"max-domains",		OPT_MAXTLD,		AGO_NEEDARG},
3345 	{ 's',	"max-robots",		OPT_MAXROBOTS,		AGO_NEEDARG},
3346         { '\0', "grep",                 OPT_GREP,               AGO_NEEDARG},
3347         { '\0', "exclude",              OPT_EXCLUDE,            AGO_NEEDARG},
3348 	{ 'P',  "prefix",		OPT_PREFIX,		AGO_NEEDARG},
3349 	{ 'o',  "output",		OPT_OUTPUT,		AGO_NEEDARG},
3350 	{ 'V',  "graphviz",		OPT_GRAPHVIZ,		AGO_NOARG},
3351 	{ '\0', "graphviz-ignorenode-google", OPT_GRAPHVIZ_ignorenode_GOOGLE,
3352 		AGO_NOARG},
3353 	{ '\0', "graphviz-ignorenode-external", OPT_GRAPHVIZ_ignorenode_EXTERNAL,
3354 		AGO_NOARG},
3355 	{ '\0', "graphviz-ignorenode-noreferer", OPT_GRAPHVIZ_ignorenode_NOREFERER,
3356 		AGO_NOARG},
3357 	{ 'v',  "version",		OPT_VERSION,		AGO_NOARG},
3358 	{ '\0', "tail",			OPT_TAIL,		AGO_NOARG},
3359 	{ '\0', "time-delta",		OPT_TIMEDELTA,		AGO_NEEDARG},
3360         { '\0', "filter-spam",          OPT_FILTERSPAM,         AGO_NOARG},
3361         { '\0', "ignore-404",           OPT_IGNORE404,          AGO_NOARG},
3362 	{ '\0',	"debug",		OPT_DEBUG,		AGO_NOARG},
3363 	{ 'h',	"help",			OPT_HELP,		AGO_NOARG},
3364 	AGO_LIST_TERM
3365 };
3366 
visitors_show_help(void)3367 void visitors_show_help(void)
3368 {
3369 	int i;
3370 
3371 	printf("Usage: visitors [options] <filename> [<filename> ...]\n");
3372 	printf("Available options:\n");
3373 	for (i = 0; visitors_optlist[i].ao_long != NULL; i++) {
3374 		if (visitors_optlist[i].ao_short != '\0') {
3375 			printf("  -%c ", visitors_optlist[i].ao_short);
3376 		} else {
3377 			printf("     ");
3378 		}
3379 		printf("--%-30s %s\n",
3380 				visitors_optlist[i].ao_long,
3381 				(visitors_optlist[i].ao_flags & AGO_NEEDARG) ?
3382 					"<argument>" : "");
3383 	}
3384         printf("\nNOTE: --filter-spam can be *very* slow. Use with care.\n\n");
3385 	printf("For more information visit http://www.hping.org/visitors\n"
3386 	       "Visitors is Copyright(C) 2004-2006 Salvatore Sanfilippo <antirez@invece.org>\n");
3387 }
3388 
main(int argc,char ** argv)3389 int main(int argc, char **argv)
3390 {
3391 	int i, o;
3392 	struct vih *vih;
3393 	char *filenames[VI_FILENAMES_MAX];
3394 	int filenamec = 0;
3395 
3396 	/* Handle command line options */
3397 	while((o = antigetopt(argc, argv, visitors_optlist)) != AGO_EOF) {
3398 		switch(o) {
3399 		case AGO_UNKNOWN:
3400 		case AGO_REQARG:
3401 		case AGO_AMBIG:
3402 			ago_gnu_error("visitors", o);
3403 			visitors_show_help();
3404 			exit(1);
3405 			break;
3406 		case OPT_HELP:
3407 			visitors_show_help();
3408 			exit(0);
3409 			break;
3410 		case OPT_VERSION:
3411 			printf("Visitors %s\n", VI_VERSION_STR);
3412 			exit(0);
3413 		case OPT_MAXREFERERS:
3414 			Config_max_referers = atoi(ago_optarg);
3415 			break;
3416 		case OPT_MAXPAGES:
3417 			Config_max_pages = atoi(ago_optarg);
3418 			break;
3419 		case OPT_MAXIMAGES:
3420 			Config_max_images = atoi(ago_optarg);
3421 			break;
3422 		case OPT_MAXERROR404:
3423 			Config_max_error404 = atoi(ago_optarg);
3424 			break;
3425 		case OPT_MAXUSERAGENTS:
3426 			Config_max_agents = atoi(ago_optarg);
3427 			break;
3428 		case OPT_MAXTRAILS:
3429 			Config_max_trails = atoi(ago_optarg);
3430 			break;
3431 		case OPT_MAXGOOGLED:
3432 			Config_max_googled = atoi(ago_optarg);
3433 			break;
3434 		case OPT_MAXADSENSED:
3435 			Config_max_adsensed = atoi(ago_optarg);
3436 			break;
3437 		case OPT_MAXGOOGLEKEYPHRASES:
3438 			Config_max_google_keyphrases = atoi(ago_optarg);
3439 			break;
3440 		case OPT_MAXGOOGLEKEYPHRASESAGE:
3441 			Config_max_google_keyphrases_age = atoi(ago_optarg);
3442 			break;
3443 		case OPT_MAXREFERERSAGE:
3444 			Config_max_referers_age = atoi(ago_optarg);
3445 			break;
3446 		case OPT_MAXTLD:
3447 			Config_max_tld = atoi(ago_optarg);
3448 			break;
3449 		case OPT_MAXROBOTS:
3450 			Config_max_robots = atoi(ago_optarg);
3451 			break;
3452 		case OPT_USERAGENTS:
3453 			Config_process_agents = 1;
3454 			break;
3455 		case OPT_GOOGLE:
3456 			Config_process_google = 1;
3457 			break;
3458 		case OPT_GOOGLEKEYPHRASES:
3459 			Config_process_google_keyphrases = 1;
3460 			break;
3461 		case OPT_GOOGLEKEYPHRASESAGE:
3462 			Config_process_google_keyphrases_age = 1;
3463 			break;
3464 		case OPT_GOOGLEHUMANLANGUAGE:
3465                         Config_process_google_keyphrases = 1;
3466 			Config_process_google_human_language = 1;
3467 			break;
3468 		case OPT_TLD:
3469 			Config_process_tld = 1;
3470 			break;
3471 		case OPT_OS:
3472 			Config_process_os = 1;
3473 			break;
3474 		case OPT_BROWSERS:
3475 			Config_process_browsers = 1;
3476 			break;
3477 		case OPT_ERROR404:
3478 			Config_process_error404 = 1;
3479 			break;
3480 		case OPT_PAGEVIEWS:
3481 			Config_process_pageviews = 1;
3482 			break;
3483 		case OPT_ROBOTS:
3484 			Config_process_robots = 1;
3485 			break;
3486 		case OPT_ALL:
3487 			Config_process_agents = 1;
3488 			Config_process_google = 1;
3489 			Config_process_google_keyphrases = 1;
3490 			Config_process_google_keyphrases_age = 1;
3491 			Config_process_google_human_language = 1;
3492 			Config_process_weekdayhour_map = 1;
3493 			Config_process_monthday_map = 1;
3494 			Config_process_referers_age = 1;
3495 			Config_process_tld = 1;
3496 			Config_process_os = 1;
3497 			Config_process_browsers = 1;
3498 			Config_process_error404 = 1;
3499 			Config_process_pageviews = 1;
3500 			Config_process_robots = 1;
3501                         Config_process_screen_info = 1;
3502 			break;
3503 		case OPT_PREFIX:
3504 			if (Config_prefix_num < VI_PREFIXES_MAX) {
3505 				Config_prefix[Config_prefix_num].str = ago_optarg;
3506 				Config_prefix[Config_prefix_num].len = strlen(ago_optarg);
3507 				Config_prefix_num++;
3508 			} else {
3509 				fprintf(stderr, "Error: too many prefixes specified\n");
3510 				exit(1);
3511 			}
3512 			break;
3513 		case OPT_TRAILS:
3514 			Config_process_web_trails = 1;
3515 			break;
3516 		case OPT_MAXLINES:
3517 			{
3518 				int aux = atoi(ago_optarg);
3519 				Config_max_referers = aux;
3520 				Config_max_pages = aux;
3521 				Config_max_images = aux;
3522 				Config_max_error404 = aux;
3523 				Config_max_agents = aux;
3524 				Config_max_googled = aux;
3525 				Config_max_adsensed = aux;
3526 				Config_max_trails = aux;
3527 				Config_max_google_keyphrases = aux;
3528 				Config_max_google_keyphrases_age = aux;
3529 				Config_max_referers_age = aux;
3530 				Config_max_tld = aux;
3531 				Config_max_robots = aux;
3532 			}
3533 			break;
3534 		case OPT_OUTPUT:
3535 			if (!strcasecmp(ago_optarg, "text"))
3536 				Output = &OutputModuleText;
3537 			else if (!strcasecmp(ago_optarg, "html"))
3538 				Output = &OutputModuleHtml;
3539 			else {
3540 				fprintf(stderr, "Unknown output module '%s'\n",
3541 						ago_optarg);
3542 				exit(1);
3543 			}
3544 			break;
3545 		case OPT_GRAPHVIZ:
3546 			Config_graphviz_mode = 1;
3547 			Config_process_web_trails = 1;
3548 			break;
3549 		case OPT_GRAPHVIZ_ignorenode_GOOGLE:
3550 			Config_graphviz_ignorenode_google = 1;
3551 			break;
3552 		case OPT_GRAPHVIZ_ignorenode_EXTERNAL:
3553 			Config_graphviz_ignorenode_external= 1;
3554 			break;
3555 		case OPT_GRAPHVIZ_ignorenode_NOREFERER:
3556 			Config_graphviz_ignorenode_noreferer = 1;
3557 			break;
3558 		case OPT_TAIL:
3559 			Config_tail_mode = 1;
3560 			break;
3561 		case OPT_WEEKDAYHOUR_MAP:
3562 			Config_process_weekdayhour_map = 1;
3563 			break;
3564 		case OPT_MONTHDAY_MAP:
3565 			Config_process_monthday_map = 1;
3566 			break;
3567 		case OPT_REFERERSAGE:
3568 			Config_process_referers_age = 1;
3569 			break;
3570 		case OPT_STREAM:
3571 			Config_stream_mode = 1;
3572 			break;
3573 		case OPT_OUTPUTFILE:
3574 			Config_output_file = ago_optarg;
3575 			break;
3576 		case OPT_UPDATEEVERY:
3577 			Config_update_every = atoi(ago_optarg);
3578 			break;
3579 		case OPT_RESETEVERY:
3580 			Config_reset_every = atoi(ago_optarg);
3581 			break;
3582 		case OPT_TIMEDELTA:
3583 			Config_time_delta = atoi(ago_optarg);
3584 			break;
3585                 case OPT_FILTERSPAM:
3586                         Config_filter_spam = 1;
3587                         break;
3588                 case OPT_GREP:
3589                         ConfigAddGrepPattern(ago_optarg, VI_PATTERNTYPE_GREP);
3590                         break;
3591                 case OPT_EXCLUDE:
3592                         ConfigAddGrepPattern(ago_optarg, VI_PATTERNTYPE_EXCLUDE);
3593                         break;
3594                 case OPT_IGNORE404:
3595                         Config_ignore_404 = 1;
3596                         break;
3597                 case OPT_DEBUG:
3598                         Config_debug = 1;
3599                         break;
3600                 case OPT_SCREENINFO:
3601                         Config_process_screen_info = 1;
3602                         break;
3603 		case AGO_ALONE:
3604 			if (filenamec < VI_FILENAMES_MAX)
3605 				filenames[filenamec++] = ago_optarg;
3606 			break;
3607 		}
3608 	}
3609 	/* If the user specified the 'tail' mode, we
3610 	 * just emulate a "tail -f" for the specified files. */
3611 	if (Config_tail_mode) {
3612 		vi_tail(filenamec, filenames);
3613 		return 0;
3614 	}
3615 	/* Check if at least one file was specified */
3616 	if (filenamec == 0 && !Config_stream_mode) {
3617 		fprintf(stderr, "No logfile specified\n");
3618 		visitors_show_help();
3619 		exit(1);
3620 	}
3621 	/* If the prefix was not set, but the user asks for
3622 	 * web trails, notify it and exit. */
3623 	if (Config_process_web_trails && !Config_prefix_num) {
3624 		fprintf(stderr, "At least one prefix must be specified (using --prefix) for web trails\nExample: --prefix http://your.site.org\n");
3625 		exit(1);
3626 	}
3627         /* If screen-info is enabled, error 404 must be too, auto-enable it. */
3628         if (Config_process_screen_info && !Config_process_error404) {
3629             fprintf(stderr, "Note: 404 error processing enabled for screen-info report\n");
3630             Config_process_error404 = 1;
3631         }
3632 	/* If stream-mode is enabled, --output-file should be specified. */
3633 	if (Config_stream_mode && Config_output_file == NULL) {
3634 		fprintf(stderr, "--stream requires --output-file\n");
3635 		exit(1);
3636 	}
3637 	/* Set the default output module */
3638 	if (Output == NULL)
3639 		Output = &OutputModuleHtml;
3640 	/* Change to "C" locale for date/time related functions */
3641 	setlocale(LC_ALL, "C");
3642 	/* Process all the log files specified. */
3643 	vih = vi_new();
3644 	for (i = 0; i < filenamec; i++) {
3645 		if (vi_scan(vih, filenames[i])) {
3646 			fprintf(stderr, "%s: %s\n", filenames[i], vi_get_error(vih));
3647 			exit(1);
3648 		}
3649 	}
3650 	if (Config_graphviz_mode) {
3651 		vi_print_graphviz(vih);
3652 	} else {
3653 		if (vi_print_report(Config_output_file, vih)) {
3654 			fprintf(stderr, "%s\n", vi_get_error(vih));
3655 			exit(1);
3656 		}
3657 		if (Config_stream_mode) {
3658 			vi_stream_mode(vih);
3659 		}
3660 	}
3661 	vi_print_statistics(vih);
3662         /* The following is commented in releases as to free the hashtable
3663          * memory is very slow, it's better to just exit the program.
3664          * Still it is important to be able to re-enable a good cleanup
3665          * in order to run visitors against valgrind to check for memory
3666          * leaks. */
3667         /* vi_free(vih); */
3668 	return 0;
3669 }
3670