1 /*
2 AWFFull - A Webalizer Fork, Full o' features
3
4 awffull.c
5 The main program
6
7 Copyright (C) 1997-2001 Bradford L. Barrett (brad@mrunix.net)
8 Copyright 2002, 2004 by Stanislaw Yurievich Pusep
9 Copyright (C) 2004-2008 by Stephen McInerney (spm@stedee.id.au)
10 Copyright (C) 2006 by Alexander Lazic (al-awffull@none.at)
11 Copyright (C) 2006 by Benoit Rouits (brouits@free.fr)
12
13 This file is part of AWFFull.
14
15 AWFFull is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 AWFFull is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with AWFFull. If not, see <http://www.gnu.org/licenses/>.
27
28 This software uses the gd graphics library, which is copyright by
29 Quest Protein Database Center, Cold Spring Harbor Labs. Please
30 see the documentation supplied with the library for additional
31 information and license terms, or visit www.boutell.com/gd/ for the
32 most recent version of the library and supporting documentation.
33
34 */
35
36 /*********************************************/
37 /* STANDARD INCLUDES */
38 /*********************************************/
39 #include "awffull.h" /* main header */
40 #ifdef HAVE_ICONV
41 #include <iconv.h>
42 #endif
43
44 /* internal function prototypes */
45
46 void clear_month(void); /* clear monthly stuff */
47 char *unescape(char *); /* unescape URL's */
48 char from_hex(char); /* convert hex to dec */
49 int isurlchar(unsigned char); /* valid URL char fnc. */
50 static char *save_opt(char *); /* save conf option */
51 void srch_string(char *, char *); /* srch str analysis */
52 char *get_domain(char *); /* return domain name */
53 char *our_gzgets(gzFile, char *, int); /* our gzgets */
54 int do_agent_mangling(char *);
55 void option_checks(void); /* Various early checks */
56 void *process_log_line(void *);
57 bool isaffirmitive(char *); /* Is the passed in string == to Y | y | N | n | Yes or No
58 or any other case combo of same. Can also accept True or False */
59
60 int response_code_index(int);
61 static void process_end_of_month(void); /* Do all the bits for an end of month */
62 static void init_run_counters(void);
63
64 /* The cleaner functions. These de-gunk the individual parts of a log entry. */
65 int cleanup_host(char *);
66 int cleanup_user(char *);
67 int cleanup_date_time(char *, struct tm *);
68 int cleanup_url(char *);
69 int cleanup_refer(char *, char *);
70 int cleanup_agent(char *);
71
72
73 /*********************************************/
74 /* GLOBAL VARIABLES */
75 /*********************************************/
76
77 const char *version = PACKAGE_VERSION; /* program version */
78
79 struct global_settings g_settings; /* All Settings & Flags */
80 struct global_counters g_counters; /* All counters */
81 struct global_run_counters g_run_counters;
82
83
84 char *log_fname = NULL; /* log file pointer */
85 char *blank_str = ""; /* blank string */
86
87 #if HAVE_GEOIP_H
88 GeoIP *gi; /* GeoIP access */
89 #endif
90
91 int g_cur_year = 0, g_cur_month = 1, /* year/month/day/hour */
92 g_cur_day = 0, g_cur_hour = 0, /* tracking variables */
93 g_cur_min = 0, g_cur_sec = 0;
94
95 unsigned long cur_tstamp = 0; /* Timestamp... */
96 unsigned long rec_tstamp = 0;
97 unsigned long req_tstamp = 0;
98 unsigned long epoch; /* used for timestamp adj. */
99
100 int gz_log = 0; /* gziped log? (0=no) */
101
102 unsigned long ht_hit = 0, mh_hit = 0; /* hourly hits totals */
103
104 struct utsname system_info; /* system info structure */
105
106 unsigned long ul_bogus = 0; /* Dummy counter for groups */
107
108 time_t now; /* used by current_time funct */
109 struct tm *tp; /* to generate timestamp */
110 char timestamp[64]; /* for the reports */
111 time_t temp_time_squid; /* For pulling in squid times */
112
113 gzFile gzlog_fp; /* gzip logfile pointer */
114 FILE *log_fp; /* regular logfile pointer */
115
116 char buffer[BUFSIZE]; /* log file record buffer */
117 char tmp_buf[BUFSIZE]; /* used to temp save above */
118
119 CLISTPTR *top_ctrys = NULL; /* Top countries table */
120
121 #define GZ_BUFSIZE 16384 /* our_getfs buffer size */
122 char f_buf[GZ_BUFSIZE]; /* our_getfs buffer */
123 char *f_cp = f_buf + GZ_BUFSIZE; /* pointer into the buffer */
124 int f_end; /* count to end of buffer */
125
126 char hit_color[] = DKGREEN; /* graph hit color */
127 char file_color[] = BLUE; /* graph file color */
128 char site_color[] = ORANGE; /* graph site color */
129 char kbyte_color[] = RED; /* graph kbyte color */
130 char page_color[] = CYAN; /* graph page color */
131 char visit_color[] = YELLOW; /* graph visit color */
132 char bookm_color[] = PURPLE; /* graph bookm color */
133 char pie_color1[] = DKGREEN; /* pie additional color 1 */
134 char pie_color2[] = ORANGE; /* pie additional color 2 */
135 char pie_color3[] = BLUE; /* pie additional color 3 */
136 char pie_color4[] = RED; /* pie additional color 4 */
137
138 static char const ab_month_name[][4] = {
139 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
140 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
141 };
142
143 #ifdef HAVE_ICONV
144 iconv_t cd_from_sjis, cd_from_eucj;
145 #endif
146
147 /*********************************************/
148 /* MAIN - start here */
149 /*********************************************/
150
151 int
main(int argc,char * argv[])152 main(int argc, char *argv[])
153 {
154 int i, j; /* generic counters */
155
156 extern char *optarg; /* used for command line */
157 extern int optind; /* parsing routine 'getopt' */
158 extern int opterr;
159
160 bool loaded_default_config = false; /* Did we load the default config file? */
161
162 time_t start_time, end_time; /* program timers */
163 float temp_time; /* temporary time storage */
164 struct tms mytms; /* bogus tms structure */
165
166 bool good_rec = false; /* true if we had at least ONE good record */
167
168 bool isapage = false; /* Flag. Is this a page or not? */
169 bool isnewsite = false; /* Flag. True if this is a new site */
170
171 struct tm time_rec; /* Gotta convert that string'ed time into a timerec first */
172 struct tm prev_time_rec; /* Hang onto the pevious time_rec in case of out of sequence errors */
173
174 char str_previous_time[MAXDATETIME] = "";
175 struct log_struct parsed_log; /* The log entries post PCRE'ing */
176
177 char *current_locale = NULL; /* the locale, as returned from setlocale */
178 char *message_catalog_dir = NULL; /* Directory for all the translations */
179 char *message_domain = NULL; /* current message domain for translations */
180 char *envlang, *envlanguage;
181
182 #if ENABLE_NLS
183 /* Reduce Surprises. Unset most LC_* env settings. Basically, only let LANG and/or LANGUAGE change stuff.
184 * Too many cross polination mess ups otherwise. */
185 /*
186 unsetenv("LC_CTYPE");
187 unsetenv("LC_MESSAGES");
188 unsetenv("LC_COLLATE");
189 unsetenv("LC_MONETARY");
190 unsetenv("LC_NUMERIC");
191 unsetenv("LC_TIME");
192 unsetenv("LC_PAPER");
193 unsetenv("LC_NAME");
194 unsetenv("LC_ADDRESS");
195 unsetenv("LC_TELEPHONE");
196 unsetenv("LC_MEASUREMENT");
197 unsetenv("LC_IDENTIFICATION");
198 */
199
200 current_locale = setlocale(LC_ALL, "");
201 message_domain = textdomain(PACKAGE);
202 message_catalog_dir = bindtextdomain(PACKAGE, LOCALEDIR);
203 envlang = getenv("LANG");
204 envlanguage = getenv("LANGUAGE");
205 if (envlang != NULL) {
206 if (strncmp("ja_JP", envlang, 5) == 0) {
207 g_settings.graphs.use_kanji = true;
208 }
209 } else if (envlanguage != NULL) {
210 if (strncmp("ja_JP", envlanguage, 5) == 0) {
211 g_settings.graphs.use_kanji = true;
212 }
213 }
214 #endif
215
216 assign_messages(); /* Load up AWFFull's general messages, country names etc */
217
218 /* initalize epoch */
219 epoch = jdate(1, 1, 1970); /* used for timestamp adj. */
220
221 /* Initialise all flags and base settings */
222 set_defaults();
223 init_run_counters();
224
225 /* check for default config file */
226 if (!access(g_settings.settings.config_filename, F_OK)) {
227 get_config(g_settings.settings.config_filename);
228 loaded_default_config = true;
229 }
230
231 process_options(argc, argv);
232 if (loaded_default_config == true) {
233 VPRINT(VERBOSE1, "%s: %s\n", _("Initially processed default config file"), g_settings.settings.config_filename);
234 }
235 display_options();
236 option_checks();
237
238 #if ENABLE_NLS
239 VPRINT(VERBOSE2, "Lang: %s\nLanguage: %s\n", envlang, envlanguage);
240 VPRINT(VERBOSE2, "Current Locale: %s\nMessage Catalog: %s\nNessage Domain: %s\n", current_locale, message_catalog_dir, message_domain);
241 #endif
242
243 if (argc - optind != 0) {
244 log_fname = argv[optind];
245 }
246 if (log_fname && (log_fname[0] == '-')) {
247 log_fname = NULL; /* force STDIN? */
248 }
249
250 /* check for gzipped file - .gz */
251 if (log_fname) {
252 if (!strcmp((log_fname + strlen(log_fname) - 3), ".gz")) {
253 gz_log = 1;
254 }
255 }
256
257 /* setup our internal variables */
258 init_counters(); /* initalize main counters */
259
260 for (i = 0; i < MAXHASH; i++) {
261 sm_htab[i] = sd_htab[i] = NULL; /* initalize hash tables */
262 um_htab[i] = NULL;
263 rm_htab[i] = NULL;
264 am_htab[i] = NULL;
265 sr_htab[i] = NULL;
266 }
267
268 /* open log file */
269 if (gz_log) {
270 gzlog_fp = gzopen(log_fname, "rb");
271 if (gzlog_fp == Z_NULL) {
272 /* Error: Can't open log file ... */
273 ERRVPRINT(VERBOSE0, "%s %s\n", _("Error: Can't open log file"), log_fname);
274 exit(1);
275 }
276 } else {
277 if (log_fname) {
278 log_fp = fopen(log_fname, "r");
279 if (log_fp == NULL) {
280 /* Error: Can't open log file ... */
281 ERRVPRINT(VERBOSE0, "%s %s\n", _("Error: Can't open log file"), log_fname);
282 exit(1);
283 }
284 }
285 }
286
287 /* Using logfile ... */
288 VPRINT(VERBOSE1, "%s %s", _("Using logfile"), log_fname ? log_fname : "STDIN");
289 if (gz_log) {
290 VPRINT(VERBOSE1, " (gzip)");
291 }
292 VPRINT(VERBOSE1, "\n");
293
294
295 /* switch directories if needed */
296 if (g_settings.settings.out_dir) {
297 if (chdir(g_settings.settings.out_dir) != 0) {
298 /* Error: Can't change directory to ... */
299 ERRVPRINT(VERBOSE0, "%s %s\n", _("Error: Can't change directory to"), g_settings.settings.out_dir);
300 exit(1);
301 }
302 }
303
304 /* Creating output in ... */
305 VPRINT(VERBOSE1, "%s %s\n", _("Creating output in"), g_settings.settings.out_dir ? g_settings.settings.out_dir : _("current directory"));
306
307 /* prep hostname */
308 if (!g_settings.settings.hostname) {
309 if (uname(&system_info)) {
310 g_settings.settings.hostname = "localhost";
311 } else {
312 g_settings.settings.hostname = system_info.nodename;
313 }
314 }
315
316 /* Hostname for reports is ... */
317 VPRINT(VERBOSE1, "%s '%s'\n", _("Hostname for reports is"), g_settings.settings.hostname);
318 VPRINT(VERBOSE1, "%s %s\n", _("Using CSS file:"), g_settings.settings.css_filename);
319
320 /* get past history */
321 if (g_settings.flags.ignore_history) {
322 VPRINT(VERBOSE1, "%s\n", _("Ignoring previous history..."));
323 } else {
324 get_history();
325 }
326
327 if (g_settings.flags.incremental) {
328 i = restore_state();
329 if (i > 0) {
330 /* Error: Unable to restore run data (error num) */
331 ERRVPRINT(VERBOSE0, "%s (%d)\n", _("Error: Unable to restore run data"), i);
332 exit(1);
333 } else if (i == 0) {
334 g_settings.flags.incremental_duplicate_check = true;
335 }
336 /* Do nothing on an i == -1 --> No previous data to load */
337 }
338
339 /* Allocate memory for our TOP countries array */
340 if (g_settings.top.countries > 0) {
341 top_ctrys = XMALLOC(CLISTPTR, g_settings.top.countries);
342 }
343
344 /* Do any graphical output Setup */
345 initialise_graphs();
346
347 start_time = times(&mytms);
348
349 #ifdef HAVE_ICONV
350 cd_from_sjis = iconv_open("UTF-8", "Shift_JIS");
351 cd_from_eucj = iconv_open("UTF-8", "EUC-JP");
352 #endif
353
354 /*********************************************
355 * MAIN PROCESS LOOP - read through log file *
356 *********************************************/
357 while ((gz_log) ? (our_gzgets(gzlog_fp, buffer, BUFSIZE) != Z_NULL) : (fgets(buffer, BUFSIZE, log_fname ? log_fp : stdin) != NULL)) {
358 g_run_counters.total_run++;
359 if (strlen(buffer) == (BUFSIZE - 1)) {
360 ERRVPRINT(VERBOSE1, "%s %lu", _("Error: Skipping oversized log record:"), g_run_counters.total_run);
361 ERRVPRINT(VERBOSE3, " ==> %s\n", buffer);
362 g_run_counters.bad_run++; /* bump bad record counter */
363
364 /* get the rest of the record */
365 while ((gz_log) ? (our_gzgets(gzlog_fp, buffer, BUFSIZE) != Z_NULL) : (fgets(buffer, BUFSIZE, log_fname ? log_fp : stdin) != NULL)) {
366 ERRVPRINT(VERBOSE3, " %s\n", buffer);
367 if (strlen(buffer) < BUFSIZE - 1) {
368 break;
369 }
370 }
371 continue; /* go get next record if any */
372 }
373
374 /* got a record... */
375 VPRINT(VERBOSE3, "==> %s", buffer);
376 if (parse_record(buffer, &parsed_log)) { /* parse the record */
377 /*********************************************
378 * DO SOME PRE-PROCESS FORMATTING *
379 *********************************************/
380 cleanup_host(parsed_log.hostname);
381 cleanup_user(parsed_log.ident);
382 cleanup_url(parsed_log.url);
383 cleanup_refer(parsed_log.refer, parsed_log.srchstr);
384 cleanup_agent(parsed_log.agent);
385
386
387 /*********************************************
388 * PASSED MINIMAL CHECKS, DO A LITTLE MORE *
389 *********************************************/
390
391 /********************************************
392 * PROCESS RECORD *
393 ********************************************/
394 if (strncmp(str_previous_time, parsed_log.datetime, MAXDATETIME) != 0) {
395 /* strptime is $$$ - avoid if same date/time */
396 memset(&time_rec, 0, sizeof(time_rec));
397 if (g_settings.settings.log_type == LOG_FTP) {
398 strptime(parsed_log.datetime, DATE_TIME_XFERLOG_FORMAT, &time_rec);
399 } else if (g_settings.settings.log_type == LOG_SQUID) {
400 temp_time_squid = strtoul(parsed_log.datetime, NULL, 10);
401 localtime_r(&temp_time_squid, &time_rec);
402 } else {
403 /* Alternate date/time extraction for Web Logs. Effectively revert to webalizer method.
404 * Issue with FreeBSD and localised extraction. See list discusion 2008/03/2 */
405 /*strptime(parsed_log.datetime, DATE_TIME_FORMAT, &time_rec); */
406 time_rec.tm_mday = atoi(&parsed_log.datetime[0]);
407 time_rec.tm_year = atoi(&parsed_log.datetime[7]) - 1900;
408 time_rec.tm_hour = atoi(&parsed_log.datetime[12]);
409 time_rec.tm_min = atoi(&parsed_log.datetime[15]);
410 time_rec.tm_sec = atoi(&parsed_log.datetime[18]);
411
412 /* Find the month */
413 j = 0;
414 i = g_cur_month - 1;
415 while (j < 12) {
416 if (strncmp(ab_month_name[i], &parsed_log.datetime[3], 3) == 0) {
417 time_rec.tm_mon = i;
418 break;
419 }
420 i++;
421 j++;
422 if (i >= 12)
423 i = 0;
424 }
425 if (j >= 12) {
426 /* If we get here? Something is broken! */
427 ERRVPRINT(VERBOSE1, _("Error! Corrupted Date/Time Record. Line: %lu Value: %s\n"), g_run_counters.total_run, parsed_log.datetime);
428 g_run_counters.bad_run++;
429 continue;
430 }
431 }
432 /*ERRVPRINT(0, " Rec Date-Time: %d/%d/%d:%d:%d:%d\n", time_rec.tm_mday,time_rec.tm_mon,time_rec.tm_year,time_rec.tm_hour,time_rec.tm_min,time_rec.tm_sec); */
433 time_rec.tm_isdst = -1; /* stop mktime from resetting for daylight savings */
434 rec_tstamp = mktime(&time_rec);
435 if (rec_tstamp < 0) {
436 ERRVPRINT(VERBOSE1, _("Error! Corrupted Date/Time Record. Line: %lu Value: %s\n"), g_run_counters.total_run, parsed_log.datetime);
437 g_run_counters.bad_run++;
438 continue;
439 }
440 strlcpy(str_previous_time, parsed_log.datetime, MAXDATETIME);
441 }
442
443 /* Do we need to check for duplicate records? (incremental mode) */
444 if (g_settings.flags.incremental_duplicate_check == true) {
445 /* check if less than/equal to last record processed */
446 if (rec_tstamp <= cur_tstamp) {
447 /* if it is, assume we have already processed and ignore it */
448 g_run_counters.ignored_run++;
449 VPRINT(VERBOSE4, "IGNORE_DUPCHK: %s", buffer);
450 continue;
451 } else {
452 /* if it isn't.. disable any more checks this run */
453 g_settings.flags.incremental_duplicate_check = false;
454 /* now check if it's a new month */
455 if (g_cur_month != (time_rec.tm_mon + 1)) {
456 clear_month();
457 g_cur_month = time_rec.tm_mon + 1;
458 g_cur_year = time_rec.tm_year + 1900;
459 g_cur_day = time_rec.tm_mday;
460 g_cur_hour = time_rec.tm_hour;
461 g_cur_min = time_rec.tm_min;
462 g_cur_sec = time_rec.tm_sec;
463 g_counters.month.first_day = g_counters.month.last_day = time_rec.tm_mday; /* reset first and last day */
464 cur_tstamp = rec_tstamp;
465 }
466 }
467 }
468
469 /* Ignore records that are too far incorrect of the previous timestamp - older records as in */
470 /* TODO - Auto FoldSeq.Err when we would try and revert a day */
471 if (rec_tstamp < cur_tstamp) {
472 if (!g_settings.flags.fold_seq_err && ((rec_tstamp + SLOP_VAL) < cur_tstamp)) {
473 g_run_counters.ignored_run++;
474 VPRINT(VERBOSE4, "IGNORE_SEQERR: %s", buffer);
475 continue;
476 } else {
477 rec_tstamp = cur_tstamp;
478 memcpy(&time_rec, &prev_time_rec, sizeof(prev_time_rec));
479 }
480 }
481 cur_tstamp = rec_tstamp; /* update current timestamp */
482
483 /* first time through? */
484 if (g_settings.flags.is_first_run == true) {
485 /* if yes, init our date vars */
486 g_cur_month = time_rec.tm_mon + 1;
487 g_cur_year = time_rec.tm_year + 1900;
488 g_cur_day = time_rec.tm_mday;
489 g_cur_hour = time_rec.tm_hour;
490 g_cur_min = time_rec.tm_min;
491 g_cur_sec = time_rec.tm_sec;
492 if (g_counters.month.first_day == 0) {
493 g_counters.month.first_day = time_rec.tm_mday;
494 }
495 g_counters.month.last_day = time_rec.tm_mday;
496 g_settings.flags.is_first_run = false;
497 }
498
499 /* We don't track below "hour", so always update seconds and minutes */
500 g_cur_sec = time_rec.tm_sec;
501 g_cur_min = time_rec.tm_min;
502
503 /* We're now past all the "is bad time?" checks. ie. This is now a known "good" time record.
504 * So can save the current timerec as the 'previous' timerec.
505 * Just don't try and reference 'previous' time after this point! */
506 memcpy(&prev_time_rec, &time_rec, sizeof(prev_time_rec));
507
508 /* check for hour change */
509 if (g_cur_hour != time_rec.tm_hour) {
510 /* if yes, init hourly stuff */
511 if (ht_hit > mh_hit)
512 mh_hit = ht_hit;
513 ht_hit = 0;
514 g_cur_hour = time_rec.tm_hour;
515 }
516
517 /* check for day change */
518 if (g_cur_day != time_rec.tm_mday) {
519 /* if yes, init daily stuff */
520 g_counters.day.visit[g_cur_day - 1] = tot_visit(sd_htab);
521 del_hlist(sd_htab);
522 segment_htab_cleanup(seg_ref_htab);
523 g_cur_day = time_rec.tm_mday;
524 }
525
526 /* check for month change */
527 if (g_cur_month != (time_rec.tm_mon + 1)) {
528 /* if yes, do monthly stuff */
529 process_end_of_month();
530 clear_month();
531 g_cur_month = time_rec.tm_mon + 1; /* update our flags */
532 g_cur_year = time_rec.tm_year + 1900;
533 g_counters.month.first_day = time_rec.tm_mday;
534 }
535
536 g_counters.month.last_day = time_rec.tm_mday; /* update new last day, *after* we do any end of month calculations */
537
538 /* Segmenting Check(s)
539 * Inverse of the Ignore checks
540 * To pass segmenting, a given log line MUST match all checks.
541 * Can then be filtered for Includes/Ignores.
542 * Segmenting is the highest precedence
543 * Ignore/Include checks will only be done AFTER Segmenting
544 */
545 if (g_settings.flags.segmenting == true) {
546 if (!segment_by_country(parsed_log.hostname)) {
547 VPRINT(VERBOSE4, "IGNORESEG_CTRY: %s", buffer);
548 g_run_counters.ignored_run++;
549 continue;
550 }
551 VPRINT(VERBOSE3, "SEGMENT: Country OK: %s\n", parsed_log.hostname);
552 if (!segment_by_referer(parsed_log.refer, parsed_log.hostname, rec_tstamp)) {
553 VPRINT(VERBOSE4, "IGNORESEG_REF: %s", buffer);
554 g_run_counters.ignored_run++;
555 continue;
556 }
557 VPRINT(VERBOSE3, "SEGMENT: Referrer OK: %s --> %s\n", parsed_log.hostname, parsed_log.refer);
558 // if (! segment_by_searchstr(parsed_log.srchstr)) {
559 // total_ignore++;
560 // continue;
561 // }
562 // VPRINT(VERBOSE1, "SEGMENT: Search String OK: %s\n", parsed_log.srchstr);
563 }
564
565 /* Ignore/Include check */
566 /* TODO: Build up a hash tab of ignored entries for fast/cached matching??? */
567 if ((isinlist(include_sites, parsed_log.hostname) == NULL) &&
568 (isinlist(include_urls, parsed_log.url) == NULL) && (isinlist(include_refs, parsed_log.refer) == NULL) && (isinlist(include_agents, parsed_log.agent) == NULL)
569 && (isinlist(include_users, parsed_log.ident) == NULL)) {
570 if (isinlist(ignored_sites, parsed_log.hostname) != NULL) {
571 g_run_counters.ignored_run++;
572 VPRINT(VERBOSE5, "IGNORES: %s", buffer);
573 continue;
574 }
575 if (isinlist(ignored_urls, parsed_log.url) != NULL) {
576 g_run_counters.ignored_run++;
577 VPRINT(VERBOSE5, "IGNOREU: %s", buffer);
578 continue;
579 }
580 if (isinlist(ignored_agents, parsed_log.agent) != NULL) {
581 g_run_counters.ignored_run++;
582 VPRINT(VERBOSE5, "IGNOREA: %s", buffer);
583 continue;
584 }
585 if (isinlist(ignored_refs, parsed_log.refer) != NULL) {
586 g_run_counters.ignored_run++;
587 VPRINT(VERBOSE5, "IGNORER: %s", buffer);
588 continue;
589 }
590 if (isinlist(ignored_users, parsed_log.ident) != NULL) {
591 g_run_counters.ignored_run++;
592 VPRINT(VERBOSE5, "IGNOREZ: %s", buffer);
593 continue;
594 }
595 }
596 VPRINT(VERBOSE4, "LOGLINE: %s", buffer);
597
598 /* By this stage we have a known Good Record */
599 good_rec = true;
600
601 /* Bump response code totals */
602 response[response_code_index(parsed_log.resp_code)].count++;
603
604 /* now save in the various hash tables... */
605 if (parsed_log.resp_code == RC_OK || parsed_log.resp_code == RC_PARTIALCONTENT) {
606 i = 1;
607 } else {
608 i = 0;
609 }
610
611 /* Pages (pageview) calculation */
612 isapage = parse_is_page(parsed_log.url);
613 if (isapage) {
614 g_counters.month.page++;
615 g_counters.day.page[time_rec.tm_mday - 1]++;
616 g_counters.hour.page[time_rec.tm_hour]++;
617
618 /* do search string stuff if needed */
619 if (g_settings.top.search) {
620 srch_string(parsed_log.refer, parsed_log.srchstr);
621 }
622 }
623
624 /* URL/ident hash table (only if valid response code) */
625 if ((parsed_log.resp_code == RC_OK) || (parsed_log.resp_code == RC_NOMOD) || (parsed_log.resp_code == RC_PARTIALCONTENT)) {
626 /* URL hash table */
627 if (put_unode
628 (parsed_log.url, OBJ_REG, (unsigned long) 1, parsed_log.xfer_size, &g_counters.month.url, (unsigned long) 0, (unsigned long) 0, (unsigned long) 0,
629 parsed_log.resp_code, um_htab)) {
630 /* Error adding URL node, skipping ... */
631 ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding URL node, skipping"), parsed_log.url);
632 }
633
634 /* ident (username) hash table */
635 if (put_inode(parsed_log.ident, OBJ_REG, 1, (unsigned long) i, parsed_log.xfer_size, &g_counters.month.user, 0, rec_tstamp, im_htab, isapage)) {
636 /* Error adding ident node, skipping .... */
637 ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding Username node, skipping"), parsed_log.ident);
638 }
639 }
640
641 if (g_settings.top.error) {
642 if (parsed_log.resp_code == RC_NOTFOUND) {
643 if (put_enode(parsed_log.url, parsed_log.refer ? parsed_log.refer : _("Direct Request"), OBJ_REG, (unsigned long) 1, &g_counters.generic.error_month, ep_htab)) {
644 ERRVPRINT(VERBOSE1, _("Warning: Can't add %s with referer %s to errorpagehash"), parsed_log.url, parsed_log.refer);
645 }
646 }
647 }
648
649 /* referrer hash table */
650 if (g_settings.top.refs) {
651 if (parsed_log.refer[0] != '\0' && isapage)
652 if (put_rnode(parsed_log.refer, OBJ_REG, (unsigned long) 1, &g_counters.month.ref, rm_htab)) {
653 ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding Referrer node, skipping"), parsed_log.refer);
654 }
655 }
656
657 isnewsite = false;
658 /* hostname (site) hash table - daily */
659 if (put_hnode
660 (parsed_log.hostname, OBJ_REG, 1, (unsigned long) i, parsed_log.xfer_size, &g_counters.day.site[g_cur_day - 1], 0, isapage, rec_tstamp, parsed_log.url, "", sd_htab,
661 false, isapage, &isnewsite)) {
662 /* Error adding host node (daily), skipping .... */
663 ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding host node (daily), skipping"), parsed_log.hostname);
664 }
665 if (isnewsite) {
666 g_counters.hour.site[time_rec.tm_hour]++;
667 }
668
669 isnewsite = false;
670 /* hostname (site) hash table - monthly */
671 if (put_hnode
672 (parsed_log.hostname, OBJ_REG, 1, (unsigned long) i, parsed_log.xfer_size, &g_counters.month.site, 0, isapage, rec_tstamp, parsed_log.url, "", sm_htab, false,
673 isapage, &isnewsite)) {
674 /* Error adding host node (monthly), skipping .... */
675 ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding host node (monthly), skipping"), parsed_log.hostname);
676 }
677
678 /* user agent hash table */
679 if (g_settings.top.agents) {
680 if (parsed_log.agent[0] != '\0' && isapage)
681 if (put_anode(parsed_log.agent, OBJ_REG, (unsigned long) 1, &g_counters.month.agent, am_htab)) {
682 ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding User Agent node, skipping"), parsed_log.agent);
683 }
684 }
685
686 /* bump monthly/daily/hourly totals */
687 g_counters.month.hit++;
688 ht_hit++; /* daily/hourly hits */
689 g_counters.month.vol += parsed_log.xfer_size; /* total xfer size */
690 g_counters.day.vol[time_rec.tm_mday - 1] += parsed_log.xfer_size; /* daily xfer total */
691 g_counters.day.hit[time_rec.tm_mday - 1]++; /* daily hits total */
692 g_counters.hour.vol[time_rec.tm_hour] += parsed_log.xfer_size; /* hourly xfer total */
693 g_counters.hour.hit[time_rec.tm_hour]++; /* hourly hits total */
694
695 /* if file data was sent, increase file counters */
696 if (parsed_log.resp_code == RC_OK || parsed_log.resp_code == RC_PARTIALCONTENT) {
697 g_counters.month.file++;
698 g_counters.day.file[time_rec.tm_mday - 1]++;
699 g_counters.hour.file[time_rec.tm_hour]++;
700 }
701 }
702
703 /*********************************************
704 * BAD RECORD *
705 *********************************************/
706
707 else {
708 /* If first record, check if stupid Netscape header stuff */
709 if ((g_run_counters.total_run == 1) && (strncmp(buffer, "format=", 7) == 0)) {
710 /* Skipping Netscape header record */
711 VPRINT(VERBOSE1, "%s\n", _("Skipping Netscape header record"));
712 /* count it as ignored... */
713 g_run_counters.ignored_run++;
714 } else {
715 /* really bad record... */
716 g_run_counters.bad_run++;
717 ERRVPRINT(VERBOSE1, "%s (%lu)\n", _("Skipping bad record"), g_run_counters.total_run);
718 }
719 }
720 } /* ---- END MAIN WHILE LOOP ---- */
721
722 /*********************************************
723 * DONE READING LOG FILE - final processing *
724 *********************************************/
725
726 /* close log file if needed */
727 if (gz_log)
728 gzclose(gzlog_fp);
729 else if (log_fname)
730 fclose(log_fp);
731
732 if (good_rec) { /* were any good records? */
733 g_counters.day.visit[g_cur_day - 1] = tot_visit(sd_htab);
734 g_counters.month.visit = tot_visit(sm_htab);
735 if (ht_hit > mh_hit) {
736 mh_hit = ht_hit;
737 }
738
739 if (g_run_counters.total_run > (g_run_counters.ignored_run + g_run_counters.bad_run)) { /* did we process any? */
740 // g_counters.generic.bad += total_bad;
741 // g_counters.generic.ignored += total_ignore;
742 if (g_settings.flags.incremental) {
743 segment_htab_cleanup(seg_ref_htab);
744 if (save_state()) { /* incremental stuff */
745 /* Error: Unable to save current run data */
746 ERRVPRINT(VERBOSE1, "%s\n", _("Error: Unable to save current run data"));
747 unlink(g_settings.settings.state_filename);
748 }
749 }
750 process_end_of_month();
751 // month_update_exit(rec_tstamp); /* calculate exit pages */
752 // write_month_html(); /* write monthly HTML file */
753 write_main_index(); /* write main HTML file */
754 put_history(); /* write history */
755 }
756
757 if (g_settings.flags.display_match_counts) {
758 VPRINT(VERBOSE0, "%s\n", _("List Match Counts:"));
759 show_matched(group_sites, "GroupSite");
760 show_matched(group_urls, "GroupURL");
761 show_matched(group_refs, "GroupReferrer");
762 show_matched(group_agents, "GroupAgent");
763 show_matched(group_users, "GroupUser");
764
765 // show_matched(hidden_sites, "HideSite");
766 // show_matched(hidden_urls, "HideURL");
767 // show_matched(hidden_refs, "HideReferrer");
768 // show_matched(hidden_agents, "HideAgent");
769 // show_matched(hidden_users, "HideUser");
770 show_matched(ignored_sites, "IgnoreSite");
771 show_matched(ignored_urls, "IgnoreURL");
772 show_matched(ignored_refs, "IgnoreReferrer");
773 show_matched(ignored_agents, "IgnoreAgent");
774 show_matched(ignored_users, "IgnoreUser");
775
776 show_matched(include_sites, "IncludeSite");
777 show_matched(include_urls, "IncludeURL");
778 show_matched(include_refs, "IncludeReferrer");
779 show_matched(include_agents, "IncludeAgent");
780 show_matched(include_users, "IncludeUser");
781 // show_matched(page_type, "");
782 // show_matched(not_page_type, "");
783 show_matched(search_list, "SearchEngine");
784 show_matched(assign_country, "AssignToCountry");
785 }
786
787 end_time = times(&mytms); /* display timing totals? */
788 if (g_settings.flags.time_me || (g_settings.settings.verbosity >= 1)) {
789 printf("%lu %s ", g_run_counters.total_run, _("records"));
790 if (g_run_counters.ignored_run) {
791 printf("(%lu %s", g_run_counters.ignored_run, _("ignored"));
792 if (g_run_counters.bad_run)
793 printf(", %lu %s) ", g_run_counters.bad_run, _("bad"));
794 else
795 printf(") ");
796 } else if (g_run_counters.bad_run)
797 printf("(%lu %s) ", g_run_counters.bad_run, _("bad"));
798
799 /* get processing time (end-start) */
800 temp_time = (float) (end_time - start_time) / sysconf(_SC_CLK_TCK);
801 printf("%s %.2f %s", _("in"), temp_time, _("seconds"));
802
803 /* calculate records per second */
804 if (temp_time)
805 i = ((int) ((float) g_run_counters.total_run / temp_time));
806 else
807 i = 0;
808
809 if ((i > 0) && (i <= g_run_counters.total_run))
810 printf(_(", %d l/sec\n"), i);
811 else
812 printf("\n");
813 }
814
815 del_htabs();
816 #ifdef HAVE_ICONV
817 iconv_close(cd_from_sjis);
818 iconv_close(cd_from_eucj);
819 #endif
820 /* Whew, all done! Exit with completion status (0) */
821 exit(0);
822 } else {
823 #ifdef HAVE_ICONV
824 iconv_close(cd_from_sjis);
825 iconv_close(cd_from_eucj);
826 #endif
827 /* No valid records found... exit with error (1) */
828 VPRINT(VERBOSE1, "%s\n", _("No valid records found!"));
829 exit(1);
830 }
831 } /* ---- END OF MAIN ---- */
832
833 /*********************************************
834 * GET_CONFIG - get configuration file info *
835 *********************************************/
836
837 void
get_config(const char * fname)838 get_config(const char *fname)
839 {
840 const char *kwords[] = { "undefined", /* 0 = undefined keyword 0 */
841 "outputdir", /* Output directory 1 */
842 "logfile", /* Log file to use for input 2 */
843 "reporttitle", /* Title for reports 3 */
844 "hostname", /* Hostname to use 4 */
845 "ignorehist", /* Ignore history file 5 */
846 "quiet", /* Run in quiet mode 6 */
847 "timeme", /* Produce timing results 7 */
848 "debug", /* Produce debug information 8 - Deprecated */
849 "hourlygraph", /* Hourly stats graph 9 */
850 "hourlystats", /* Hourly stats table 10 */
851 "topsites", /* Top sites 11 */
852 "topurls", /* Top URL's 12 */
853 "topreferrers", /* Top Referrers 13 */
854 "topagents", /* Top User Agents 14 */
855 "topcountries", /* Top Countries 15 */
856 "hidesite", /* Sites to hide 16 */
857 "hideurl", /* URL's to hide 17 */
858 "hidereferrer", /* Referrers to hide 18 */
859 "hideagent", /* User Agents to hide 19 */
860 "indexalias", /* Aliases for index.html 20 */
861 "htmlhead", /* HTML Top1 code 21 */
862 "htmlpost", /* HTML Top2 code 22 */
863 "htmltail", /* HTML Tail code 23 */
864 "mangleagents", /* Mangle User Agents 24 */
865 "ignoresite", /* Sites to ignore 25 */
866 "ignoreurl", /* Url's to ignore 26 */
867 "ignorereferrer", /* Referrers to ignore 27 */
868 "ignoreagent", /* User Agents to ignore 28 */
869 "reallyquiet", /* Dont display ANY messages 29 */
870 "gmttime", /* Local or UTC time? 30 */
871 "groupurl", /* Group URL's 31 */
872 "groupsite", /* Group Sites 32 */
873 "groupreferrer", /* Group Referrers 33 */
874 "groupagent", /* Group Agents 34 */
875 "groupshading", /* Shade Grouped entries 35 */
876 "grouphighlight", /* BOLD Grouped entries 36 */
877 "incremental", /* Incremental runs 37 */
878 "incrementalname", /* Filename for state data 38 */
879 "historyname", /* Filename for history data 39 */
880 "htmlextension", /* HTML filename extension 40 */
881 "htmlpre", /* HTML code at beginning 41 */
882 "htmlbody", /* HTML body code 42 */
883 "htmlend", /* HTML code at end 43 */
884 "usehttps", /* Use https:// on URL's 44 */
885 "includesite", /* Sites to always include 45 */
886 "includeurl", /* URL's to always include 46 */
887 "includereferrer", /* Referrers to include 47 */
888 "includeagent", /* User Agents to include 48 */
889 "pagetype", /* Page Type (pageview) 49 */
890 "visittimeout", /* Visit timeout (seconds) 50 */
891 "graphlegend", /* Graph Legends (yes/no) 51 */
892 "graphlines", /* Graph Lines (0=none) 52 */
893 "foldseqerr", /* Fold sequence errors 53 */
894 "countrygraph", /* Display ctry graph (0=no) 54 */
895 "topksites", /* Top sites (by KBytes) 55 */
896 "topkurls", /* Top URL's (by KBytes) 56 */
897 "topentry", /* Top Entry Pages 57 */
898 "topexit", /* Top Exit Pages 58 */
899 "topsearch", /* Top Search Strings 59 */
900 "logtype", /* Log Type (clf/ftp/squid) 60 */
901 "searchengine", /* SearchEngine strings 61 */
902 "groupdomains", /* Group domains (n=level) 62 */
903 "hideallsites", /* Hide ind. sites (0=no) 63 */
904 "allsites", /* List all sites? 64 */
905 "allurls", /* List all URLs? 65 */
906 "allreferrers", /* List all Referrers? 66 */
907 "allagents", /* List all User Agents? 67 */
908 "allsearchstr", /* List all Search Strings? 68 */
909 "allusers", /* List all Users? 69 */
910 "topusers", /* Top Usernames to show 70 */
911 "hideuser", /* Usernames to hide 71 */
912 "ignoreuser", /* Usernames to ignore 72 */
913 "includeuser", /* Usernames to include 73 */
914 "groupuser", /* Usernames to group 74 */
915 "dumppath", /* Path for dump files 75 */
916 "dumpextension", /* Dump filename extension 76 */
917 "dumpheader", /* Dump header as first rec? 77 */
918 "dumpsites", /* Dump sites tab file 78 */
919 "dumpurls", /* Dump urls tab file 79 */
920 "dumpreferrers", /* Dump referrers tab file 80 */
921 "dumpagents", /* Dump user agents tab file 81 */
922 "dumpusers", /* Dump usernames tab file 82 */
923 "dumpsearchstr", /* Dump search str tab file 83 */
924 "dnscache", /* DNS Cache file name 84 */
925 "dnschildren", /* DNS Children (0=no DNS) 85 */
926 "dailygraph", /* Daily Graph (0=no) 86 */
927 "dailystats", /* Daily Stats (0=no) 87 */
928 "geoip", /* Use GeoIP library (0=no) 88 */
929 "geoipdatabase", /* GeoIP database 89 */
930 "indexmonths", /* Number Months on Main Page 90 */
931 "graphindexx", /* Size of Main Graph X 91 */
932 "graphindexy", /* Size of Main Graph Y 92 */
933 "graphdailyx", /* Size of Daily Graph X 93 */
934 "graphdailyy", /* Size of Daily Graph Y 94 */
935 "graphhourlyx", /* Size of Hourly Graph X 95 */
936 "graphhourlyy", /* Size of Hourly Graph Y 96 */
937 "graphpiex", /* Size of Pie Graph X 97 */
938 "graphpiey", /* Size of Pie Graph Y 98 */
939 "topurlsbyhitsgraph", /* Display Top URL's by Hits graph (0=no) 99 */
940 "topurlsbyvolgraph", /* Display Top URL's by Volume graph (0=no) 100 */
941 "topexitpagesgraph", /* Display Top Exit Pages Pie Chart 101 */
942 "topentrypagesgraph", /* Display Top Entry Pages Pie Chart 102 */
943 "topsitesbypagesgraph", /* Display TOP Sites by Volume Graph 103 */
944 "topsitesbyvolgraph", /* Display TOP Sites by Pages Graph 104 */
945 "topagentsgraph", /* Display TOP Agents Graph (by Pages) 105 */
946 "colorhit", /* Hit Color (def=00805c) 106 */
947 "colorfile", /* File Color (def=0000ff) 107 */
948 "colorsite", /* Site Color (def=ff8000) 108 */
949 "colorkbyte", /* Kbyte Color (def=ff0000) 109 */
950 "colorpage", /* Page Color (def=00c0ff) 110 */
951 "colorvisit", /* Visit Color (def=ffff00) 111 */
952 "colorbookm", /* Bookm Color (def=ff00ff) 112 */
953 "piecolor1", /* Pie Color 1 (def=800080) 113 */
954 "piecolor2", /* Pie Color 2 (def=80ffc0) 114 */
955 "piecolor3", /* Pie Color 3 (def=ff00ff) 115 */
956 "piecolor4", /* Pie Color 4 (def=ffc480) 116 */
957 "notpagetype", /* Opposite of PageType - specify what is NOT a page 117 */
958 "top404errors", /* Display TOP 404 Errors 118 */
959 "all404errors", /* Display All 404 Errors 119 */
960 "assigntocountry", /* Assign this address to a country code 120 */
961 "groupandhideagent", /* Group & Hide Agents 121 */
962 "groupandhidesite", /* Group & Hide Sites 122 */
963 "groupandhidereferrer", /* Group & Hide Referrer 123 */
964 "groupandhideurl", /* Group & Hide Referrer 124 */
965 "groupandhideuser", /* Group & Hide User 125 */
966 "dumpcountries", /* Dump countries tab file 126 */
967 "dumpentrypages", /* Dump Entry Pages tab file 127 */
968 "dumpexitpages", /* Dump Exit Pages tab file 128 */
969 "cssfilename", /* CSS File filename 129 */
970 "yearlysubtotals", /* Display Yearly Subtotals on main page 130 */
971 "trackpartialrequests", /* Track 206 Requests 131 */
972 "flagslocation", /* Display Country Flags if not null 132 */
973 "allentrypages", /* Display All Entry Pages 133 */
974 "allexitpages", /* Display All Exit Pages 134 */
975 "disablefilechecks", /* Disable Report File Checks 135 */
976 "segcountry", /* Segmenting by Country 136 */
977 "segreferer", /* Segmenting by Referer 137 */
978 "ignoreindexalias" /* Ignore Index Alias Settings 138 */
979 };
980 FILE *fp;
981
982 char config_buffer[BUFSIZE];
983 char keyword[32];
984 char keyword_org[32];
985 char value[132];
986 char *cp1, *cp2, *cp3;
987 int i, key, count;
988 int num_kwords = sizeof(kwords) / sizeof(char *);
989
990 if ((fp = fopen(fname, "r")) == NULL) {
991 ERRVPRINT(VERBOSE1, "%s %s\n", _("Error: Unable to open configuration file"), fname);
992 return;
993 }
994
995 VPRINT(VERBOSE1, "%s %s\n", _("Using config file:"), fname);
996
997 while ((fgets(config_buffer, BUFSIZE, fp)) != NULL) {
998 /* skip comments and blank lines */
999 if ((config_buffer[0] == '#') || isspace((int) config_buffer[0])) {
1000 continue;
1001 }
1002
1003 /* Get keyword */
1004 cp1 = config_buffer;
1005 cp2 = keyword;
1006 cp3 = keyword_org;
1007 count = 31;
1008 /* Convert read in keyword to lower case.
1009 * Maintain copy of original case'd keyword in case of error - for display. */
1010 while ((isalnum((int) *cp1)) && (count > 0)) {
1011 *cp2 = tolower(*cp1);
1012 *cp3 = *cp1;
1013 cp1++;
1014 cp2++;
1015 cp3++;
1016 count--;
1017 }
1018 *cp2 = '\0';
1019 *cp3 = '\0';
1020
1021 /* Get value */
1022 cp2 = value;
1023 count = 131;
1024 while ((*cp1 != '\n') && (*cp1 != '\0') && (isspace((int) *cp1))) {
1025 cp1++;
1026 }
1027 while ((*cp1 != '\n') && (*cp1 != '\0') && (count > 0)) {
1028 *cp2++ = *cp1++;
1029 count--;
1030 }
1031 *cp2-- = '\0';
1032 while ((isspace((int) *cp2)) && (cp2 != value)) {
1033 *cp2-- = '\0';
1034 }
1035
1036 /* check if blank keyword/value */
1037 if ((keyword[0] == '\0') || (value[0] == '\0'))
1038 continue;
1039
1040 key = 0;
1041 for (i = 0; i < num_kwords; i++) {
1042 if (!strcmp(keyword, kwords[i])) {
1043 key = i;
1044 break;
1045 }
1046 }
1047
1048 if (key == 0) { /* Invalid keyword */
1049 ERRVPRINT(VERBOSE1, "%s '%s' (%s)\n", _("Warning: Invalid keyword"), keyword_org, fname);
1050 continue;
1051 }
1052
1053 VPRINT(VERBOSE5, "New Key: %d, Value: %s\n", key, value);
1054
1055 switch (key) {
1056 case 1:
1057 g_settings.settings.out_dir = save_opt(value);
1058 break; /* OutputDir */
1059 case 2:
1060 log_fname = save_opt(value);
1061 break; /* LogFile */
1062 case 3:
1063 g_settings.settings.title_message = save_opt(value);
1064 break; /* ReportTitle */
1065 case 4:
1066 g_settings.settings.hostname = save_opt(value);
1067 break; /* HostName */
1068 case 5:
1069 g_settings.flags.ignore_history = isaffirmitive(value);
1070 break; /* IgnoreHist */
1071 case 6:
1072 ERRVPRINT(VERBOSE1, "%s\n", _("Use of \"Quiet (-q)\" has been deprecated."));
1073 break; /* Quiet */
1074 case 7:
1075 g_settings.flags.time_me = isaffirmitive(value);
1076 break; /* TimeMe */
1077 case 8:
1078 ERRVPRINT(VERBOSE1, "%s\n", _("Use of \"Debug\" has been deprecated."));
1079 break; /* Debug */
1080 case 9:
1081 g_settings.graphs.hourly = isaffirmitive(value);
1082 break; /* HourlyGraph */
1083 case 10:
1084 g_settings.stats.hourly = isaffirmitive(value);
1085 break; /* HourlyStats */
1086 case 11:
1087 g_settings.top.sites = atoi(value);
1088 break; /* TopSites */
1089 case 12:
1090 g_settings.top.urls = atoi(value);
1091 break; /* TopURLs */
1092 case 13:
1093 g_settings.top.refs = atoi(value);
1094 break; /* TopRefs */
1095 case 14:
1096 g_settings.top.agents = atoi(value);
1097 break; /* TopAgents */
1098 case 15:
1099 g_settings.top.countries = atoi(value);
1100 break; /* TopCountries */
1101 case 16:
1102 add_list_member(value, &hidden_sites, USESPACE);
1103 break; /* HideSite */
1104 case 17:
1105 add_list_member(value, &hidden_urls, USESPACE);
1106 break; /* HideURL */
1107 case 18:
1108 add_list_member(value, &hidden_refs, USESPACE);
1109 break; /* HideReferrer */
1110 case 19:
1111 add_list_member(value, &hidden_agents, USESPACE);
1112 break; /* HideAgent */
1113 case 20:
1114 add_list_member(value, &index_alias, USESPACE);
1115 break; /* IndexAlias */
1116 case 21:
1117 add_list_member(value, &html_head, IGNORESPACE);
1118 break; /* HTMLHead */
1119 case 22:
1120 add_list_member(value, &html_post, IGNORESPACE);
1121 break; /* HTMLPost */
1122 case 23:
1123 add_list_member(value, &html_tail, IGNORESPACE);
1124 break; /* HTMLTail */
1125 case 24:
1126 g_settings.settings.mangle_agent = atoi(value);
1127 break; /* MangleAgents */
1128 case 25:
1129 add_list_member(value, &ignored_sites, USESPACE);
1130 break; /* IgnoreSite */
1131 case 26:
1132 add_list_member(value, &ignored_urls, USESPACE);
1133 break; /* IgnoreURL */
1134 case 27:
1135 add_list_member(value, &ignored_refs, USESPACE);
1136 break; /* IgnoreReferrer */
1137 case 28:
1138 add_list_member(value, &ignored_agents, USESPACE);
1139 break; /* IgnoreAgent */
1140 case 29:
1141 ERRVPRINT(VERBOSE1, "%s\n", _("Use of \"ReallyQuiet (-Q)\" has been deprecated."));
1142 break; /* ReallyQuiet */
1143 case 30:
1144 g_settings.flags.local_time = !isaffirmitive(value);
1145 break; /* GMTTime */
1146 case 31:
1147 add_list_member(value, &group_urls, USESPACE);
1148 break; /* GroupURL */
1149 case 32:
1150 add_list_member(value, &group_sites, USESPACE);
1151 break; /* GroupSite */
1152 case 33:
1153 add_list_member(value, &group_refs, USESPACE);
1154 break; /* GroupReferrer */
1155 case 34:
1156 add_list_member(value, &group_agents, USESPACE);
1157 break; /* GroupAgent */
1158 case 35:
1159 g_settings.flags.shade_groups = isaffirmitive(value);
1160 break; /* GroupShading */
1161 case 36:
1162 g_settings.flags.highlight_groups = isaffirmitive(value);
1163 break; /* GroupHighlight */
1164 case 37:
1165 g_settings.flags.incremental = isaffirmitive(value);
1166 break; /* Incremental */
1167 case 38:
1168 g_settings.settings.state_filename = save_opt(value);
1169 break; /* State FName */
1170 case 39:
1171 g_settings.settings.history_filename = save_opt(value);
1172 break; /* History FName */
1173 case 40:
1174 g_settings.settings.html_ext = save_opt(value);
1175 break; /* HTML extension */
1176 case 41:
1177 add_list_member(value, &html_pre, IGNORESPACE);
1178 break; /* HTML Pre code */
1179 case 42:
1180 add_list_member(value, &html_body, IGNORESPACE);
1181 break; /* HTML Body code */
1182 case 43:
1183 add_list_member(value, &html_end, IGNORESPACE);
1184 break; /* HTML End code */
1185 case 44:
1186 g_settings.flags.use_https = isaffirmitive(value);
1187 break; /* Use https:// */
1188 case 45:
1189 add_list_member(value, &include_sites, USESPACE);
1190 break; /* IncludeSite */
1191 case 46:
1192 add_list_member(value, &include_urls, USESPACE);
1193 break; /* IncludeURL */
1194 case 47:
1195 add_list_member(value, &include_refs, USESPACE);
1196 break; /* IncludeReferrer */
1197 case 48:
1198 add_list_member(value, &include_agents, USESPACE);
1199 break; /* IncludeAgent */
1200 case 49:
1201 add_list_member(value, &page_type, USESPACE);
1202 break; /* PageType */
1203 case 50:
1204 g_settings.settings.visit_timeout = atoi(value);
1205 break; /* VisitTimeout */
1206 case 51:
1207 g_settings.graphs.legend = isaffirmitive(value);
1208 break; /* GraphLegend */
1209 case 52:
1210 if (atoi(value) > 0) {
1211 g_settings.graphs.lines = true;
1212 } else {
1213 g_settings.graphs.lines = isaffirmitive(value);
1214 }
1215 break; /* GraphLines */
1216 case 53:
1217 g_settings.flags.fold_seq_err = isaffirmitive(value);
1218 break; /* FoldSeqErr */
1219 case 54:
1220 g_settings.graphs.country = isaffirmitive(value);
1221 break; /* CountryGraph */
1222 case 55:
1223 g_settings.top.sites_by_vol = atoi(value);
1224 break; /* TopKSites (KB) */
1225 case 56:
1226 g_settings.top.urls_by_vol = atoi(value);
1227 break; /* TopKUrls (KB) */
1228 case 57:
1229 g_settings.top.entry = atoi(value);
1230 break; /* Top Entry pgs */
1231 case 58:
1232 g_settings.top.exit = atoi(value);
1233 break; /* Top Exit pages */
1234 case 59:
1235 g_settings.top.search = atoi(value);
1236 break; /* Top Search pgs */
1237 case 60:
1238 g_settings.flags.force_log_type = true;
1239 if (strncmp(value, "auto", 4) == 0) {
1240 g_settings.settings.log_type = LOG_AUTO;
1241 g_settings.flags.force_log_type = false;
1242 } else if (strncmp(value, "clf", 3) == 0) {
1243 g_settings.settings.log_type = LOG_CLF;
1244 } else if (strncmp(value, "ftp", 3) == 0) {
1245 g_settings.settings.log_type = LOG_FTP;
1246 } else if (strncmp(value, "squid", 5) == 0) {
1247 g_settings.settings.log_type = LOG_SQUID;
1248 } else if (strncmp(value, "combined", 8) == 0) {
1249 g_settings.settings.log_type = LOG_COMBINED;
1250 } else if (strncmp(value, "domino", 6) == 0) {
1251 g_settings.settings.log_type = LOG_DOMINO;
1252 } else {
1253 ERRVPRINT(VERBOSE0, "%s %s\n", _("Unknown Log Type:"), value);
1254 exit(1);
1255 }
1256 break; /* LogType */
1257 case 61:
1258 add_list_member(value, &search_list, USESPACE);
1259 break; /* SearchEngine */
1260 case 62:
1261 g_settings.settings.group_domains = atoi(value);
1262 break; /* GroupDomains */
1263 case 63:
1264 g_settings.flags.hide_sites = isaffirmitive(value);
1265 break; /* HideAllSites */
1266 case 64:
1267 g_settings.all.sites = isaffirmitive(value);
1268 break; /* All Sites? */
1269 case 65:
1270 g_settings.all.urls = isaffirmitive(value);
1271 break; /* All URL's? */
1272 case 66:
1273 g_settings.all.refs = isaffirmitive(value);
1274 break; /* All Refs */
1275 case 67:
1276 g_settings.all.agents = isaffirmitive(value);
1277 break; /* All Agents? */
1278 case 68:
1279 g_settings.all.search = isaffirmitive(value);
1280 break; /* All Srch str */
1281 case 69:
1282 g_settings.all.users = isaffirmitive(value);
1283 break; /* All Users? */
1284 case 70:
1285 g_settings.top.users = atoi(value);
1286 break; /* TopUsers */
1287 case 71:
1288 add_list_member(value, &hidden_users, USESPACE);
1289 break; /* HideUser */
1290 case 72:
1291 add_list_member(value, &ignored_users, USESPACE);
1292 break; /* IgnoreUser */
1293 case 73:
1294 add_list_member(value, &include_users, USESPACE);
1295 break; /* IncludeUser */
1296 case 74:
1297 add_list_member(value, &group_users, USESPACE);
1298 break; /* GroupUser */
1299 case 75:
1300 g_settings.dump.dump_path = save_opt(value);
1301 break; /* DumpPath */
1302 case 76:
1303 g_settings.dump.dump_ext = save_opt(value);
1304 break; /* Dumpfile ext */
1305 case 77:
1306 g_settings.dump.header = isaffirmitive(value);
1307 break; /* DumpHeader? */
1308 case 78:
1309 g_settings.dump.sites = isaffirmitive(value);
1310 break; /* DumpSites? */
1311 case 79:
1312 g_settings.dump.urls = isaffirmitive(value);
1313 break; /* DumpURLs? */
1314 case 80:
1315 g_settings.dump.refs = isaffirmitive(value);
1316 break; /* DumpReferrers? */
1317 case 81:
1318 g_settings.dump.agents = isaffirmitive(value);
1319 break; /* DumpAgents? */
1320 case 82:
1321 g_settings.dump.users = isaffirmitive(value);
1322 break; /* DumpUsers? */
1323 case 83:
1324 g_settings.dump.search = isaffirmitive(value);
1325 break; /* DumpSrchStrs? */
1326 case 84: /* Disable DNSCache and DNSChildren */
1327 case 85:
1328 ERRVPRINT(VERBOSE1, "%s '%s' (%s)\n", _("Warning: Invalid keyword"), keyword, fname);
1329 break;
1330 case 86:
1331 g_settings.graphs.daily = isaffirmitive(value);
1332 break; /* HourlyGraph */
1333 case 87:
1334 g_settings.stats.daily = isaffirmitive(value);
1335 break; /* HourlyStats */
1336 case 88:
1337 g_settings.flags.use_geoip = isaffirmitive(value);
1338 break; /* Use GeoIP */
1339 case 89:
1340 g_settings.settings.geoip_database = save_opt(value);
1341 break; /* GeoIP Database File */
1342 case 90:
1343 g_settings.settings.index_months = atoi(value);
1344 break; /* Months to Display */
1345 case 91:
1346 g_settings.graphs.index_x = atoi(value);
1347 break; /* Size of Main Graph X */
1348 case 92:
1349 g_settings.graphs.index_y = atoi(value);
1350 break; /* Size of Main Graph Y */
1351 case 93:
1352 g_settings.graphs.daily_x = atoi(value);
1353 break; /* Size of Daily Graph X */
1354 case 94:
1355 g_settings.graphs.daily_y = atoi(value);
1356 break; /* Size of Daily Graph Y */
1357 case 95:
1358 g_settings.graphs.hourly_x = atoi(value);
1359 break; /* Size of Hourly Graph X */
1360 case 96:
1361 g_settings.graphs.hourly_y = atoi(value);
1362 break; /* Size of Hourly Graph Y */
1363 case 97:
1364 g_settings.graphs.pie_x = atoi(value);
1365 break; /* Size of Pie Graph X */
1366 case 98:
1367 g_settings.graphs.pie_y = atoi(value);
1368 break; /* Size of Pie Graph Y */
1369 case 99:
1370 g_settings.graphs.url_by_hits = isaffirmitive(value);
1371 break; /* URLs by HITS Graph */
1372 case 100:
1373 g_settings.graphs.url_by_vol = isaffirmitive(value);
1374 break; /* URLs by Volume Graph */
1375 case 101:
1376 if (value[0] == 'h') {
1377 g_settings.graphs.exit_pages = 1;
1378 }
1379 if (value[0] == 'v') {
1380 g_settings.graphs.exit_pages = 2;
1381 }
1382 break; /* Top Exit Pages Pie Chart */
1383 case 102:
1384 if (value[0] == 'h') {
1385 g_settings.graphs.entry_pages = 1;
1386 }
1387 if (value[0] == 'v') {
1388 g_settings.graphs.entry_pages = 2;
1389 }
1390 break; /* Top Entry Pages Pie Chart */
1391 case 103:
1392 g_settings.graphs.sites_by_pages = isaffirmitive(value);
1393 break; /* TOP Sites by Pages Graph */
1394 case 104:
1395 g_settings.graphs.sites_by_vol = isaffirmitive(value);
1396 break; /* TOP Sites by Volume Graph */
1397 case 105:
1398 g_settings.graphs.agents = isaffirmitive(value);
1399 break; /* TOP User Agents (by pages) Pie Chart */
1400 case 106:
1401 strncpy(hit_color + 1, value, 6);
1402 break; /* Hit Color (def=00805c) 106 */
1403 case 107:
1404 strncpy(file_color + 1, value, 6);
1405 break; /* File Color (def=0000ff) 107 */
1406 case 108:
1407 strncpy(site_color + 1, value, 6);
1408 break; /* Site Color (def=ff8000) 108 */
1409 case 109:
1410 strncpy(kbyte_color + 1, value, 6);
1411 break; /* Kbyte Color (def=ff0000) 109 */
1412 case 110:
1413 strncpy(page_color + 1, value, 6);
1414 break; /* Page Color (def=00c0ff) 110 */
1415 case 111:
1416 strncpy(visit_color + 1, value, 6);
1417 break; /* Visit Color (def=ffff00) 111 */
1418 case 112:
1419 ERRVPRINT(VERBOSE1, "%s\n", _("Use of \"ColorBookM\" has been deprecated."));
1420 break; /* Bookm Color (def=ff00ff) 112 */
1421 case 113:
1422 strncpy(pie_color1 + 1, value, 6);
1423 break; /* Pie Color 1 (def=800080) 113 */
1424 case 114:
1425 strncpy(pie_color2 + 1, value, 6);
1426 break; /* Pie Color 2 (def=80ffc0) 114 */
1427 case 115:
1428 strncpy(pie_color3 + 1, value, 6);
1429 break; /* Pie Color 3 (def=ff00ff) 115 */
1430 case 116:
1431 strncpy(pie_color4 + 1, value, 6);
1432 break; /* Pie Color 4 (def=ffc480) 116 */
1433 case 117:
1434 add_list_member(value, ¬_page_type, USESPACE);
1435 break; /* NotPageType */
1436 case 118:
1437 g_settings.top.error = atoi(value);
1438 break; /* Top404Error */
1439 case 119:
1440 g_settings.all.errors = isaffirmitive(value);
1441 break; /* All 404errors? */
1442 case 120:
1443 add_list_member(value, &assign_country, USESPACE);
1444 break; /* Assign Address to Country */
1445 case 121:
1446 add_list_member(value, &hidden_agents, USESPACE);
1447 add_list_member(value, &group_agents, USESPACE);
1448 break; /* GroupAndHideAgent */
1449 case 122:
1450 add_list_member(value, &hidden_sites, USESPACE);
1451 add_list_member(value, &group_sites, USESPACE);
1452 break; /* GroupAndHideSite */
1453 case 123:
1454 add_list_member(value, &hidden_refs, USESPACE);
1455 add_list_member(value, &group_refs, USESPACE);
1456 break; /* GroupAndHideReferrer */
1457 case 124:
1458 add_list_member(value, &hidden_urls, USESPACE);
1459 add_list_member(value, &group_urls, USESPACE);
1460 break; /* GroupAndHideURL */
1461 case 125:
1462 add_list_member(value, &hidden_users, USESPACE);
1463 add_list_member(value, &group_users, USESPACE);
1464 break; /* GroupAndHideUser */
1465 case 126:
1466 g_settings.dump.countries = isaffirmitive(value);
1467 break; /* DumpCountries? */
1468 case 127:
1469 g_settings.dump.entry_pages = isaffirmitive(value);
1470 break; /* DumpEntryPages? */
1471 case 128:
1472 g_settings.dump.exit_pages = isaffirmitive(value);
1473 break; /* DumpExitPages? */
1474 case 129:
1475 g_settings.settings.css_filename = save_opt(value);
1476 break; /* CSS file filename */
1477 case 130:
1478 g_settings.flags.display_yearly_subtotals = isaffirmitive(value);
1479 break; /* DumpExitPages? */
1480 case 131:
1481 g_settings.flags.track_206_reqs = isaffirmitive(value);
1482 break; /* TrackPartialRequests? */
1483 case 132:
1484 g_settings.settings.flags_location = save_opt(value);
1485 break; /* FlagsLocation */
1486 case 133:
1487 g_settings.all.entry = isaffirmitive(value);
1488 break; /* Display All Entry Pages */
1489 case 134:
1490 g_settings.all.exit = isaffirmitive(value);
1491 break; /* Display All Exit Pages */
1492 case 135:
1493 g_settings.flags.disable_report_file_checks = true;
1494 break; /* Disable Report File Checks */
1495 case 136:
1496 add_list_member(strtoupper(value), &seg_countries, IGNORESPACE);
1497 g_settings.flags.segmenting = true;
1498 g_settings.flags.segcountry = true;
1499 break; /* Segmenting by Country */
1500 case 137:
1501 add_list_member(value, &seg_referers, IGNORESPACE);
1502 g_settings.flags.segmenting = true;
1503 g_settings.flags.segreferer = true;
1504 break; /* Segmenting by Referer */
1505 case 138:
1506 g_settings.flags.ignore_index_alias = isaffirmitive(value);
1507 break; /* IgnoreIndexAlias */
1508 }
1509 }
1510 if ((page_type != NULL) && (not_page_type != NULL)) {
1511 ERRVPRINT(VERBOSE0, "%s\n", _("FATAL! You may not specify both PageType and NotPageType in the config file.%s"));
1512 exit(1);
1513 }
1514
1515 fclose(fp);
1516 }
1517
1518
1519 /********************************************
1520 * option_checks *
1521 * *
1522 * Check various options for funky stuff. *
1523 * Alert/Exit as necessary. *
1524 ********************************************/
1525 void
option_checks()1526 option_checks()
1527 {
1528 int max_ctry; /* max countries defined */
1529 int i;
1530
1531 /* Be polite and announce yourself... */
1532 uname(&system_info);
1533 VPRINT(VERBOSE1, "%s (%s %s) %s\n", PACKAGE_STRING, system_info.sysname, system_info.release, _("English"));
1534
1535 /* GEOIP Checks */
1536 #if HAVE_GEOIP_H
1537 if (g_settings.flags.use_geoip) {
1538 g_settings.flags.have_geoip = true;
1539 gi = GeoIP_open(g_settings.settings.geoip_database, GEOIP_MEMORY_CACHE);
1540 if (gi == NULL) {
1541 ERRVPRINT(VERBOSE0, "%s: %s\n", _("FATAL. Unable to open the GeoIP database"), g_settings.settings.geoip_database);
1542 exit(1);
1543 }
1544 VPRINT(VERBOSE1, "%s: %s\n", _("Using GeoIP for IP Address Lookups"), g_settings.settings.geoip_database);
1545 }
1546 #endif
1547 if (g_settings.flags.use_geoip && !g_settings.flags.have_geoip) {
1548 VPRINT(VERBOSE1, "%s\n", _("GeoIP is not available in this binary. Ignoring request to use."));
1549 }
1550
1551 if (page_type == NULL) { /* check if page types present */
1552 if ((g_settings.settings.log_type == LOG_AUTO) || (g_settings.settings.log_type == LOG_CLF) || (g_settings.settings.log_type == LOG_COMBINED)
1553 || (g_settings.settings.log_type == LOG_SQUID)) {
1554 add_list_member("htm", &page_type, USESPACE); /* if no page types specified, we */
1555 add_list_member("html", &page_type, USESPACE); /* use the default ones here... */
1556 add_list_member("php", &page_type, USESPACE);
1557 if (!isinlist(page_type, (char *) g_settings.settings.html_ext))
1558 add_list_member((char *) g_settings.settings.html_ext, &page_type, USESPACE);
1559 } else
1560 add_list_member("txt", &page_type, USESPACE); /* FTP logs default to .txt */
1561 }
1562
1563 if (g_settings.flags.ignore_index_alias == false) {
1564 /* add default index. alias */
1565 add_list_member("index.", &index_alias, USESPACE);
1566 }
1567
1568 for (max_ctry = 0; ctry[max_ctry].desc; max_ctry++);
1569 if (g_settings.top.countries > max_ctry) {
1570 g_settings.top.countries = max_ctry; /* force upper limit */
1571 }
1572
1573 if (g_settings.settings.log_type == LOG_FTP) {
1574 /* disable stuff for ftp logs */
1575 g_settings.top.entry = g_settings.top.exit = 0;
1576 g_settings.top.search = 0;
1577 } else {
1578 if (search_list == NULL) {
1579 /* If no search engines defined, define some :) */
1580 add_list_member("google. q=", &search_list, USESPACE);
1581 add_list_member("yahoo. p=", &search_list, USESPACE);
1582 add_list_member("msn. q=", &search_list, USESPACE);
1583 add_list_member("search.aol. query=", &search_list, USESPACE);
1584 add_list_member("altavista.com q=", &search_list, USESPACE);
1585 add_list_member("netscape.com query=", &search_list, USESPACE);
1586 add_list_member("ask.com q=", &search_list, USESPACE);
1587 add_list_member("alltheweb.com query=", &search_list, USESPACE);
1588 add_list_member("lycos.com query=", &search_list, USESPACE);
1589 add_list_member("hotbot. query=", &search_list, USESPACE);
1590 add_list_member("mamma.com query=", &search_list, USESPACE);
1591 add_list_member("search. q=", &search_list, USESPACE); /* Generic Catchall... */
1592 }
1593 }
1594
1595 /* ensure entry/exits don't exceed urls */
1596 i = (g_settings.top.urls > g_settings.top.urls_by_vol) ? g_settings.top.urls : g_settings.top.urls_by_vol;
1597 if (g_settings.top.entry > i)
1598 g_settings.top.entry = i;
1599 if (g_settings.top.exit > i)
1600 g_settings.top.exit = i;
1601
1602 }
1603
1604 /*********************************************/
1605 /* SAVE_OPT - save option from config file */
1606 /*********************************************/
1607
1608 static char *
save_opt(char * str)1609 save_opt(char *str)
1610 {
1611 char *cp1;
1612 size_t string_length;
1613
1614 string_length = strlen(str);
1615 cp1 = XMALLOC(char, string_length + 1);
1616
1617 strlcpy(cp1, str, string_length + 1);
1618
1619 return cp1;
1620 }
1621
1622 /*********************************************/
1623 /* CLEAR_MONTH - initalize monthly stuff */
1624 /*********************************************/
1625
1626 void
clear_month(void)1627 clear_month(void)
1628 {
1629 int i;
1630
1631 init_counters(); /* reset monthly counters */
1632 del_htabs(); /* clear hash tables */
1633 if (g_settings.top.countries != 0) {
1634 for (i = 0; i < g_settings.top.countries; i++) {
1635 top_ctrys[i] = NULL;
1636 }
1637 }
1638 }
1639
1640 /*********************************************/
1641 /* INIT_COUNTERS - prep counters for use */
1642 /*********************************************/
1643
1644 void
init_counters(void)1645 init_counters(void)
1646 {
1647 int i;
1648
1649 memset(&g_counters, 0, sizeof(g_counters));
1650
1651 for (i = 0; i < TOTAL_RC; i++)
1652 response[i].count = 0;
1653 for (i = 0; ctry[i].desc; i++) { /* country totals */
1654 ctry[i].count = 0;
1655 ctry[i].files = 0;
1656 ctry[i].xfer = 0;
1657 ctry[i].pages = 0;
1658 }
1659 mh_hit = 0;
1660 g_counters.month.first_day = 0;
1661 g_counters.month.last_day = 0;
1662 }
1663
1664 static void
init_run_counters(void)1665 init_run_counters(void)
1666 {
1667 memset(&g_run_counters, 0, sizeof(g_run_counters));
1668 }
1669
1670
1671 static void
process_end_of_month(void)1672 process_end_of_month(void)
1673 {
1674 g_counters.month.visit = tot_visit(sm_htab);
1675 g_counters.generic.bad_month += g_run_counters.bad_run;
1676 g_counters.generic.ignored_month += g_run_counters.ignored_run;
1677 update_history_array();
1678 month_update_exit(req_tstamp); /* process exit pages */
1679
1680 write_month_html(); /* generate HTML for month */
1681 /* Update Grand Total Bad/Ignore Counters, before zeroing the actual counters. */
1682 // clear_month();
1683
1684 }
1685
1686 /*********************************************/
1687 /* CURRENT_TIME - return date/time as a string */
1688 /*********************************************/
1689
1690 char *
current_time(void)1691 current_time(void)
1692 {
1693 /* get system time */
1694 now = time(NULL);
1695 /* convert to timestamp string */
1696 if (g_settings.flags.local_time) {
1697 strftime(timestamp, sizeof(timestamp), "%d-%b-%Y %H:%M %Z", localtime(&now));
1698 } else {
1699 strftime(timestamp, sizeof(timestamp), "%d-%b-%Y %H:%M GMT", gmtime(&now));
1700 }
1701
1702 return timestamp;
1703 }
1704
1705 /*********************************************/
1706 /* ISURLCHAR - checks for valid URL chars */
1707 /*********************************************/
1708
1709 int
isurlchar(unsigned char ch)1710 isurlchar(unsigned char ch)
1711 {
1712 if (isalnum((int) ch))
1713 return 1; /* allow letters, numbers... */
1714 if (ch > 127)
1715 return 1; /* allow extended chars... */
1716 return (strchr(":/\\.,' *-+_@~()[]$", ch) != NULL); /* and a few special ones */
1717 }
1718
1719 /*********************************************/
1720 /* CTRY_IDX - create unique # from domain */
1721 /*********************************************/
1722
1723 unsigned long
ctry_idx(char * str)1724 ctry_idx(char *str)
1725 {
1726 int i = strlen(str), j = 0;
1727 unsigned long idx = 0;
1728 char *cp1 = str + i;
1729
1730 for (; i > 0; i--) {
1731 idx += ((*--cp1 - 'a' + 1) << j);
1732 j += 5;
1733 }
1734 return idx;
1735 }
1736
1737 /*********************************************/
1738 /* FROM_HEX - convert hex char to decimal */
1739 /*********************************************/
1740
1741 char
from_hex(char c)1742 from_hex(char c)
1743 { /* convert hex to dec */
1744 c = (c >= '0' && c <= '9') ? c - '0' : /* 0-9? */
1745 (c >= 'A' && c <= 'F') ? c - 'A' + 10 : /* A-F? */
1746 c - 'a' + 10; /* lowercase... */
1747 return (c < 0 || c > 15) ? 0 : c; /* return 0 if bad... */
1748 }
1749
1750 /*********************************************/
1751 /* UNESCAPE - convert escape seqs to chars */
1752 /*********************************************/
1753
1754 char *
unescape(char * str)1755 unescape(char *str)
1756 {
1757 unsigned char *cp1 = (unsigned char *) str; /* force unsigned so we */
1758 unsigned char *cp2 = (unsigned char *) str; /* can do > 127 */
1759
1760 if (!str)
1761 return NULL; /* make sure strings valid */
1762
1763 /* for apache log's escape code. */
1764 while (*cp1) {
1765 if (*cp1 == '\\' && *(cp1 + 1) == 'x' &&
1766 isxdigit(*(cp1 + 2)) && isxdigit(*(cp1 + 3))) {
1767 *cp2 = from_hex(*(cp1 + 2)) * 16 + from_hex(*(cp1 + 3));
1768 if ((*cp2 < 32) || (*cp2 == 127))
1769 *cp2 = '_';
1770 cp1 += 4;
1771 cp2++;
1772 } else if (*cp1 == '\\' && *(cp1 + 1) == '\\') {
1773 *cp2 = '\\';
1774 cp1 += 2;
1775 cp2++;
1776 } else {
1777 *cp2++ = *cp1++;
1778 }
1779 }
1780 *cp2 = *cp1;
1781 cp1 = cp2 = str;
1782
1783 while (*cp1) {
1784 if (*cp1 == '%') { /* Found an escape? */
1785 cp1++;
1786 if (isxdigit(*cp1)) { /* ensure a hex digit */
1787 if (*cp1)
1788 *cp2 = from_hex(*cp1++) * 16; /* convert hex to an ascii */
1789 if (*cp1)
1790 *cp2 += from_hex(*cp1); /* (hopefully) character */
1791 if ((*cp2 < 32) || (*cp2 == 127))
1792 *cp2 = '_'; /* make '_' if its bad */
1793 if (*cp1)
1794 cp2++;
1795 cp1++;
1796 } else
1797 *cp2++ = '%';
1798 } else
1799 *cp2++ = *cp1++; /* if not, just continue */
1800 }
1801 *cp2 = *cp1; /* don't forget terminator */
1802 return str; /* return the string */
1803 }
1804
1805 #ifdef HAVE_ICONV
1806
1807 /*********************************************/
1808 /* SCORE_XXX - calculate score */
1809 /*********************************************/
1810
score_eucj(unsigned char * str)1811 int score_eucj(unsigned char *str)
1812 {
1813 int stat=0;
1814 int score=0;
1815 int bad=0;
1816 if(str==NULL) return -1;
1817
1818 for(; *str!=0;str++){
1819 switch(stat){
1820 case 0:
1821 if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
1822 else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1)
1823 else if(*str == 0x8f); // HOJYO KANJI
1824 else if(*str == 0x8e) stat=2; // KANA
1825 else if(*str < 0x20); //CTRL
1826 else bad=1;
1827 break;
1828 case 1:
1829 if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2)
1830 else bad=1;
1831 stat=0;
1832 break;
1833 case 2:
1834 if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0
1835 else bad=1;
1836 stat=0;
1837 break;
1838 }
1839 }
1840 if(bad != 0) score = -1;
1841 return score;
1842 }
1843
score_sjis(unsigned char * str)1844 int score_sjis(unsigned char *str)
1845 {
1846 int stat=0;
1847 int score=0;
1848 int bad=0;
1849 if(str==NULL) return -1;
1850
1851 for(; *str != 0; str++){
1852 switch(stat){
1853 case 0:
1854 if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII
1855 else if((*str >= 0x81 && *str <= 0x9f) ||
1856 (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1)
1857 else if(*str >= 0xa1 && *str <= 0xdf); // KANA
1858 else if(*str < 0x20); // CTRL
1859 else bad=1;
1860 break;
1861 case 1:
1862 if((*str >= 0x40 && *str <= 0x7e) ||
1863 (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2)
1864 else bad=1;
1865 stat=0;
1866 break;
1867 }
1868 }
1869 if(bad != 0) score = -1;
1870 return score;
1871 }
1872
score_utf8(unsigned char * str)1873 int score_utf8(unsigned char *str)
1874 {
1875 int stat=0;
1876 int score=0;
1877 int bad=0;
1878 if(str==NULL) return -1;
1879
1880 for(; *str != 0; str++){
1881 switch(stat){
1882 case 0:
1883 if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
1884 else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc.
1885 else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc.
1886 else if(*str >= 0xf0 && *str <= 0xf7) stat=4;
1887 else if(*str < 0x20); //CTRL
1888 else bad=1;
1889 break;
1890 case 1:
1891 if(*str >= 0x80 && *str <= 0xbf) score++;
1892 else bad=1;
1893 stat=0;
1894 break;
1895 case 2:
1896 if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2)
1897 else {bad=1; stat=0;}
1898 break;
1899 case 3:
1900 if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3)
1901 else bad=1;
1902 stat=0;
1903 break;
1904 case 4:
1905 case 5:
1906 if(*str >= 0x80 && *str <= 0xbf) stat++;
1907 else {bad=1; stat=0;}
1908 break;
1909 case 6:
1910 if(*str >= 0x80 && *str <= 0xbf) score+=4;
1911 else bad=1;
1912 stat=0;
1913 break;
1914 }
1915 }
1916 if(bad != 0) score = -1;
1917 return score;
1918 }
1919
1920 #endif
1921
1922 /*********************************************/
1923 /* SRCH_STRING - get search strings from ref */
1924 /*********************************************/
1925 void
srch_string(char * refer,char * ptr)1926 srch_string(char *refer, char *ptr)
1927 {
1928 unsigned char tmpbuf[BUFSIZE];
1929 unsigned char srch[80] = "";
1930 unsigned char *cp1, *cp2, *cps;
1931 int sp_flg = 0;
1932 #ifdef HAVE_ICONV
1933 int sjis, eucj, utf8;
1934 unsigned char tmpbuf2[BUFSIZE];
1935 unsigned char *cp3;
1936 size_t inlen, outlen;
1937 #endif
1938
1939 /* Check if search engine referrer or return */
1940 if ((cps = isinlist(search_list, refer)) == NULL)
1941 return;
1942
1943 /* Try to find query variable */
1944 srch[0] = '?';
1945 srch[sizeof(srch) - 1] = '\0';
1946 strcpy(&srch[1], cps); /* First, try "?..." */
1947 if ((cp1 = strstr(ptr, srch)) == NULL) {
1948 srch[0] = '&'; /* Next, try "&..." */
1949 if ((cp1 = strstr(ptr, srch)) == NULL)
1950 return; /* If not found, split... */
1951 }
1952 cp2 = tmpbuf;
1953
1954 while (*cp1 != '=' && *cp1 != '\0') {
1955 cp1++;
1956 }
1957 if (*cp1 != '\0') {
1958 cp1++;
1959 }
1960
1961 while (*cp1 != '&' && *cp1 != '\0') {
1962 if (*cp1 == '"' || *cp1 == ',' || *cp1 == '?') {
1963 cp1++;
1964 continue;
1965 } /* skip bad ones.. */
1966 else {
1967 if (*cp1 == '+')
1968 *cp1 = ' '; /* change + to space */
1969 if (sp_flg && *cp1 == ' ') {
1970 cp1++;
1971 continue;
1972 } /* compress spaces */
1973 if (*cp1 == ' ')
1974 sp_flg = 1;
1975 else
1976 sp_flg = 0; /* (flag spaces here) */
1977 *cp2++ = tolower(*cp1); /* normal character */
1978 cp1++;
1979 }
1980 }
1981 *cp2 = '\0';
1982 cp2 = tmpbuf;
1983 if (tmpbuf[0] == '?')
1984 tmpbuf[0] = ' '; /* format fix ? */
1985 while (*cp2 != 0 && isspace(*cp2))
1986 cp2++; /* skip leading sps. */
1987 if (*cp2 == '\0')
1988 return;
1989
1990 /* any trailing spaces? */
1991 cp1 = cp2 + strlen(cp2) - 1;
1992 while (cp1 != cp2)
1993 if (isspace(*cp1))
1994 *cp1-- = '\0';
1995 else
1996 break;
1997
1998 #ifdef HAVE_ICONV
1999 utf8 = score_utf8(cp2);
2000 sjis = score_sjis(cp2);
2001 eucj = score_eucj(cp2);
2002 if (sjis > utf8 && sjis > eucj) {
2003 iconv(cd_from_sjis, NULL, 0, NULL, 0);
2004 cp3 = cp2;
2005 inlen = strlen(cp2) + 1;
2006 cp1 = tmpbuf2;
2007 outlen = sizeof(tmpbuf2);
2008 if (iconv(cd_from_sjis,
2009 (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
2010 inlen == 0) {
2011 cp2 = tmpbuf2;
2012 }
2013 } else if (eucj > utf8 && eucj > sjis) {
2014 iconv(cd_from_eucj, NULL, 0, NULL, 0);
2015 cp3 = cp2;
2016 inlen = strlen(cp2) + 1;
2017 cp1 = tmpbuf2;
2018 outlen = sizeof(tmpbuf2);
2019 if (iconv(cd_from_eucj,
2020 (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
2021 inlen == 0) {
2022 cp2 = tmpbuf2;
2023 }
2024 }
2025 #endif
2026
2027 /* strip invalid chars */
2028 cp1 = cp2;
2029 while (*cp1 != '\0') {
2030 if (((*cp1 > 0) && (*cp1 < 32)) || (*cp1 == 127))
2031 *cp1 = '_';
2032 cp1++;
2033 }
2034
2035 if (put_snode(cp2, (unsigned long) 1, sr_htab)) {
2036 /* Error adding search string node, skipping .... */
2037 ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding Search String Node, skipping"), tmpbuf);
2038 }
2039 return;
2040 }
2041
2042 /*********************************************/
2043 /* GET_DOMAIN - Get domain portion of host */
2044 /*********************************************/
2045
2046 char *
get_domain(char * str)2047 get_domain(char *str)
2048 {
2049 char *cp;
2050 int i = g_settings.settings.group_domains + 1;
2051
2052 cp = str + strlen(str) - 1;
2053 if (isdigit((int) *cp))
2054 return NULL; /* ignore IP addresses */
2055
2056 while (cp != str) {
2057 if (*cp == '.')
2058 if (!(--i))
2059 return ++cp;
2060 cp--;
2061 }
2062 return cp;
2063 }
2064
2065 /*********************************************/
2066 /* OUR_GZGETS - enhanced gzgets for log only */
2067 /*********************************************/
2068
2069 char *
our_gzgets(gzFile fp,char * buf,int size)2070 our_gzgets(gzFile fp, char *buf, int size)
2071 {
2072 char *out_cp = buf; /* point to output */
2073
2074 while (1) {
2075 if (f_cp > (f_buf + f_end - 1)) { /* load? */
2076 f_end = gzread(fp, f_buf, GZ_BUFSIZE);
2077 if (f_end <= 0)
2078 return Z_NULL;
2079 f_cp = f_buf;
2080 }
2081
2082 if (--size) { /* more? */
2083 *out_cp++ = *f_cp;
2084 if (*f_cp++ == '\n') {
2085 *out_cp = '\0';
2086 return buf;
2087 }
2088 } else {
2089 *out_cp = '\0';
2090 return buf;
2091 }
2092 }
2093 }
2094
2095 /*****************************************************************/
2096 /* */
2097 /* JDATE - Julian date calculator */
2098 /* */
2099 /* Calculates the number of days since Jan 1, 0000. */
2100 /* */
2101 /* Originally written by Bradford L. Barrett (03/17/1988) */
2102 /* Returns an unsigned long value representing the number of */
2103 /* days since January 1, 0000. */
2104 /* */
2105 /* Note: Due to the changes made by Pope Gregory XIII in the */
2106 /* 16th Centyry (Feb 24, 1582), dates before 1583 will */
2107 /* not return a truely accurate number (will be at least */
2108 /* 10 days off). Somehow, I don't think this will */
2109 /* present much of a problem for most situations :) */
2110 /* */
2111 /* Usage: days = jdate(day, month, year) */
2112 /* */
2113 /* The number returned is adjusted by 5 to facilitate day of */
2114 /* week calculations. The mod of the returned value gives the */
2115 /* day of the week the date is. (ie: dow = days % 7 ) where */
2116 /* dow will return 0=Sunday, 1=Monday, 2=Tuesday, etc... */
2117 /* */
2118 /*****************************************************************/
2119
2120 unsigned long
jdate(int day,int month,int year)2121 jdate(int day, int month, int year)
2122 {
2123 unsigned long days; /* value returned */
2124 int mtable[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
2125
2126 /* First, calculate base number including leap and Centenial year stuff */
2127 days = (((unsigned long) year * 365) + day + mtable[month - 1] + ((year + 4) / 4) - ((year / 100) - (year / 400)));
2128
2129 /* now adjust for leap year before March 1st */
2130 if ((year % 4 == 0) && !((year % 100 == 0) && (year % 400 != 0)) && (month < 3))
2131 --days;
2132
2133 /* done, return with calculated value */
2134
2135 return (days + 5);
2136 }
2137
2138
2139 /************************************************************************
2140 * do_agent_mangling *
2141 * *
2142 * Tries to reduce a complex Agent string down to a simpler level. *
2143 * *
2144 * Arguments: *
2145 * char *agent The Agent to reduce. This function will "mangle" this! *
2146 * *
2147 * Returns: *
2148 * int. 0 on Success. *
2149 * *
2150 * TODO: Return something else if fails! *
2151 * TODO: Do this nicer in PCRE or equiv. *
2152 ************************************************************************/
2153 int
do_agent_mangling(char * agent)2154 do_agent_mangling(char *agent)
2155 {
2156 char *cp1, *cp2; /* generic char pointers */
2157 char *agent_start; /* Start and End of the Agent string - mainly bounds checking */
2158 char *agent_end;
2159
2160 agent_start = cp2 = agent;
2161 agent_end = agent + sizeof(agent) - 2;
2162
2163 cp1 = strstr(agent_start, "ompatible"); /* check known fakers */
2164 if (cp1 != NULL) {
2165 while (*cp1 != ';' && *cp1 != '\0' && (cp1 < agent_end)) {
2166 cp1++;
2167 }
2168 /* kludge for Mozilla/3.01 (compatible;) */
2169 if (*cp1++ == ';' && strcmp(cp1, ")\"")) { /* success! */
2170 while (*cp1 == ' ' && (cp1 < agent_end)) {
2171 cp1++; /* eat spaces */
2172 }
2173 while (*cp1 != '.' && *cp1 != '\0' && *cp1 != ';' && (cp1 < agent_end) && (cp2 < agent_end)) {
2174 *cp2++ = *cp1++;
2175 }
2176 if (g_settings.settings.mangle_agent < 5) {
2177 while (*cp1 != '.' && *cp1 != ';' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end)) {
2178 *cp2++ = *cp1++;
2179 }
2180 if (*cp1 != ';' && *cp1 != '\0' && (cp1 < agent_end - 2) && (cp2 < agent_end - 2)) {
2181 *cp2++ = *cp1++;
2182 *cp2++ = *cp1++;
2183 }
2184 }
2185 if (g_settings.settings.mangle_agent < 4) {
2186 if (*cp1 >= '0' && *cp1 <= '9' && (cp1 < agent_end) && (cp2 < agent_end)) {
2187 *cp2++ = *cp1++;
2188 }
2189 }
2190 if (g_settings.settings.mangle_agent < 3) {
2191 while (*cp1 != ';' && *cp1 != '\0' && *cp1 != '(' && (cp1 < agent_end) && (cp2 < agent_end)) {
2192 *cp2++ = *cp1++;
2193 }
2194 }
2195 if (g_settings.settings.mangle_agent < 2) {
2196 /* Level 1 - try to get OS */
2197 cp1 = strstr(agent_start, ")");
2198 if (cp1 != NULL) {
2199 *cp2++ = ' ';
2200 *cp2++ = '(';
2201 while (*cp1 != ';' && *cp1 != '(' && cp1 > agent_start) {
2202 cp1--;
2203 }
2204 if (cp1 != agent_start && *cp1 != '\0' && (cp1 < agent_end)) {
2205 cp1++;
2206 }
2207 while (*cp1 == ' ' && *cp1 != '\0' && (cp1 < agent_end)) {
2208 cp1++;
2209 }
2210 while (*cp1 != ')' && *cp1 != '\0' && cp1 > cp2 && (cp1 < agent_end) && (cp2 < agent_end)) {
2211 *cp2++ = *cp1++;
2212 }
2213 *cp2++ = ')';
2214 }
2215 }
2216 *cp2 = '\0';
2217 } else { /* nothing after "compatible", should we mangle? */
2218 /* not for now */
2219 }
2220 } else {
2221 cp1 = strstr(agent_start, "Opera"); /* Opera flavor */
2222 if (cp1 != NULL) {
2223 while (*cp1 != '/' && *cp1 != ' ' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end)) {
2224 *cp2++ = *cp1++;
2225 }
2226 while (*cp1 != '.' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end)) {
2227 *cp2++ = *cp1++;
2228 }
2229 if (g_settings.settings.mangle_agent < 5) {
2230 while (*cp1 != '.' && *cp1 != '\0' && (cp1 < agent_end - 2) && (cp2 < agent_end - 2)) {
2231 *cp2++ = *cp1++;
2232 }
2233 *cp2++ = *cp1++;
2234 *cp2++ = *cp1++;
2235 }
2236 if (g_settings.settings.mangle_agent < 4)
2237 if (*cp1 >= '0' && *cp1 <= '9' && (cp1 < agent_end) && (cp2 < agent_end)) {
2238 *cp2++ = *cp1++;
2239 }
2240 if (g_settings.settings.mangle_agent < 3)
2241 while (*cp1 != ' ' && *cp1 != '\0' && *cp1 != '(' && (cp1 < agent_end) && (cp2 < agent_end)) {
2242 *cp2++ = *cp1++;
2243 }
2244 if (g_settings.settings.mangle_agent < 2) {
2245 cp1 = strstr(agent_start, "(");
2246 if (cp1 != NULL) {
2247 cp1++;
2248 *cp2++ = ' ';
2249 *cp2++ = '(';
2250 while (*cp1 != ';' && *cp1 != ')' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end - 1)) {
2251 *cp2++ = *cp1++;
2252 }
2253 *cp2++ = ')';
2254 }
2255 }
2256 *cp2 = '\0';
2257 } else {
2258 cp1 = strstr(agent_start, "Mozilla"); /* Netscape flavor */
2259 if (cp1 != NULL) {
2260 while (*cp1 != '/' && *cp1 != ' ' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end)) {
2261 *cp2++ = *cp1++;
2262 }
2263 if (*cp1 == ' ') {
2264 *cp1 = '/';
2265 }
2266 while (*cp1 != '.' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end)) {
2267 *cp2++ = *cp1++;
2268 }
2269 if (g_settings.settings.mangle_agent < 5) {
2270 while (*cp1 != '.' && *cp1 != '\0' && (cp1 < agent_end - 2) && (cp2 < agent_end - 2)) {
2271 *cp2++ = *cp1++;
2272 }
2273 *cp2++ = *cp1++;
2274 *cp2++ = *cp1++;
2275 }
2276 if (g_settings.settings.mangle_agent < 4)
2277 if (*cp1 >= '0' && *cp1 <= '9') {
2278 *cp2++ = *cp1++;
2279 }
2280 if (g_settings.settings.mangle_agent < 3) {
2281 while (*cp1 != ' ' && *cp1 != '\0' && *cp1 != '(' && (cp1 < agent_end) && (cp2 < agent_end)) {
2282 *cp2++ = *cp1++;
2283 }
2284 }
2285 if (g_settings.settings.mangle_agent < 2) {
2286 /* Level 1 - Try to get OS */
2287 cp1 = strstr(agent_start, "(");
2288 if (cp1 != NULL) {
2289 cp1++;
2290 *cp2++ = ' ';
2291 *cp2++ = '(';
2292 while (*cp1 != ';' && *cp1 != ')' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end - 1)) {
2293 *cp2++ = *cp1++;
2294 }
2295 *cp2++ = ')';
2296 }
2297 }
2298 *cp2 = '\0';
2299 }
2300 }
2301 }
2302 return (0);
2303 }
2304
2305
2306 /************************************************************************
2307 * response_code_index *
2308 * *
2309 * Returns the index for a given response code *
2310 * *
2311 * Arguments: *
2312 * int resp_code: The response code to have the index discovered for. *
2313 * *
2314 * Returns: *
2315 * int. Response Code Index *
2316 ************************************************************************/
2317 int
response_code_index(int resp_code)2318 response_code_index(int resp_code)
2319 {
2320 int i;
2321
2322 switch (resp_code) {
2323 case RC_CONTINUE:
2324 i = IDX_CONTINUE;
2325 break;
2326 case RC_SWITCHPROTO:
2327 i = IDX_SWITCHPROTO;
2328 break;
2329 case RC_OK:
2330 i = IDX_OK;
2331 break;
2332 case RC_CREATED:
2333 i = IDX_CREATED;
2334 break;
2335 case RC_ACCEPTED:
2336 i = IDX_ACCEPTED;
2337 break;
2338 case RC_NONAUTHINFO:
2339 i = IDX_NONAUTHINFO;
2340 break;
2341 case RC_NOCONTENT:
2342 i = IDX_NOCONTENT;
2343 break;
2344 case RC_RESETCONTENT:
2345 i = IDX_RESETCONTENT;
2346 break;
2347 case RC_PARTIALCONTENT:
2348 i = IDX_PARTIALCONTENT;
2349 break;
2350 case RC_MULTIPLECHOICES:
2351 i = IDX_MULTIPLECHOICES;
2352 break;
2353 case RC_MOVEDPERM:
2354 i = IDX_MOVEDPERM;
2355 break;
2356 case RC_MOVEDTEMP:
2357 i = IDX_MOVEDTEMP;
2358 break;
2359 case RC_SEEOTHER:
2360 i = IDX_SEEOTHER;
2361 break;
2362 case RC_NOMOD:
2363 i = IDX_NOMOD;
2364 break;
2365 case RC_USEPROXY:
2366 i = IDX_USEPROXY;
2367 break;
2368 case RC_MOVEDTEMPORARILY:
2369 i = IDX_MOVEDTEMPORARILY;
2370 break;
2371 case RC_BAD:
2372 i = IDX_BAD;
2373 break;
2374 case RC_UNAUTH:
2375 i = IDX_UNAUTH;
2376 break;
2377 case RC_PAYMENTREQ:
2378 i = IDX_PAYMENTREQ;
2379 break;
2380 case RC_FORBIDDEN:
2381 i = IDX_FORBIDDEN;
2382 break;
2383 case RC_NOTFOUND:
2384 i = IDX_NOTFOUND;
2385 break;
2386 case RC_METHODNOTALLOWED:
2387 i = IDX_METHODNOTALLOWED;
2388 break;
2389 case RC_NOTACCEPTABLE:
2390 i = IDX_NOTACCEPTABLE;
2391 break;
2392 case RC_PROXYAUTHREQ:
2393 i = IDX_PROXYAUTHREQ;
2394 break;
2395 case RC_TIMEOUT:
2396 i = IDX_TIMEOUT;
2397 break;
2398 case RC_CONFLICT:
2399 i = IDX_CONFLICT;
2400 break;
2401 case RC_GONE:
2402 i = IDX_GONE;
2403 break;
2404 case RC_LENGTHREQ:
2405 i = IDX_LENGTHREQ;
2406 break;
2407 case RC_PREFAILED:
2408 i = IDX_PREFAILED;
2409 break;
2410 case RC_REQENTTOOLARGE:
2411 i = IDX_REQENTTOOLARGE;
2412 break;
2413 case RC_REQURITOOLARGE:
2414 i = IDX_REQURITOOLARGE;
2415 break;
2416 case RC_UNSUPMEDIATYPE:
2417 i = IDX_UNSUPMEDIATYPE;
2418 break;
2419 case RC_RNGNOTSATISFIABLE:
2420 i = IDX_RNGNOTSATISFIABLE;
2421 break;
2422 case RC_EXPECTATIONFAILED:
2423 i = IDX_EXPECTATIONFAILED;
2424 break;
2425 case RC_SERVERERR:
2426 i = IDX_SERVERERR;
2427 break;
2428 case RC_NOTIMPLEMENTED:
2429 i = IDX_NOTIMPLEMENTED;
2430 break;
2431 case RC_BADGATEWAY:
2432 i = IDX_BADGATEWAY;
2433 break;
2434 case RC_UNAVAIL:
2435 i = IDX_UNAVAIL;
2436 break;
2437 case RC_GATEWAYTIMEOUT:
2438 i = IDX_GATEWAYTIMEOUT;
2439 break;
2440 case RC_BADHTTPVER:
2441 i = IDX_BADHTTPVER;
2442 break;
2443 default:
2444 i = IDX_UNDEFINED;
2445 break;
2446 }
2447 return (i);
2448 }
2449
2450
2451 /************************************************************************
2452 * cleanup_host *
2453 * *
2454 * Does what the name says, given a host field, remove all junk and *
2455 * clean it. *
2456 * *
2457 * Arguments: *
2458 * char *hostname: The hostname. Is modified by this function. *
2459 * *
2460 * Returns: *
2461 * int. 1 on failure, 0 on success. *
2462 ************************************************************************/
2463 int
cleanup_host(char * hostname)2464 cleanup_host(char *hostname)
2465 {
2466 if (hostname[0] == '\0') {
2467 /* Catch blank hostnames */
2468 strncpy(hostname, _("Unknown"), strlen(_("Unknown")));
2469 } else {
2470 strtolower(hostname);
2471 }
2472 return (0);
2473 }
2474
2475
2476 /************************************************************************
2477 * cleanup_user *
2478 * *
2479 * Does what the name says, given a user/ident field, remove all junk *
2480 * and clean it. *
2481 * *
2482 * Arguments: *
2483 * char *ident: The user/ident field. Is modified by this function. *
2484 * *
2485 * Returns: *
2486 * int. 1 on failure, 0 on success. *
2487 ************************************************************************/
2488 int
cleanup_user(char * ident)2489 cleanup_user(char *ident)
2490 {
2491 char *cp1; /* generic char pointer */
2492
2493 /* fix username if needed */
2494 if (ident[0] == 0) {
2495 ident[0] = '-';
2496 ident[1] = '\0';
2497 } else {
2498 cp1 = ident;
2499 while (*cp1 >= 32 && *cp1 != '"') {
2500 cp1++;
2501 }
2502 *cp1 = '\0';
2503 }
2504 /* unescape user name */
2505 unescape(ident);
2506
2507 return (0);
2508 }
2509
2510
2511 /************************************************************************
2512 * cleanup_url *
2513 * *
2514 * Does what the name says, given a URL, remove all junk and clean it *
2515 * *
2516 * Arguments: *
2517 * char *url: The URL. Is modified by this function. *
2518 * *
2519 * Returns: *
2520 * int. 1 on failure, 0 on success. *
2521 ************************************************************************/
2522 int
cleanup_url(char * url)2523 cleanup_url(char *url)
2524 {
2525 char *cp1; /* generic char pointers */
2526 LISTPTR lptr; /* generic list pointer */
2527
2528 /* un-escape URL */
2529 unescape(url);
2530
2531 if (g_settings.flags.ignore_index_alias == false) {
2532 /* strip query portion of cgi scripts */
2533 cp1 = url;
2534 while (*cp1 != '\0')
2535 if (!isurlchar(*cp1)) {
2536 *cp1 = '\0';
2537 break;
2538 } else
2539 cp1++;
2540 if (url[0] == '\0') {
2541 url[0] = '/';
2542 url[1] = '\0';
2543 }
2544
2545 /* strip off index.html (or any aliases) */
2546 lptr = index_alias;
2547 while (lptr != NULL) {
2548 if ((cp1 = strstr(url, lptr->string)) != NULL) {
2549 if ((cp1 == url) || (*(cp1 - 1) == '/')) {
2550 *cp1 = '\0';
2551 if (url[0] == '\0') {
2552 url[0] = '/';
2553 url[1] = '\0';
2554 }
2555 break;
2556 }
2557 }
2558 lptr = lptr->next;
2559 }
2560 }
2561 return (0);
2562 }
2563
2564
2565 /************************************************************************
2566 * cleanup_refer *
2567 * *
2568 * Does what the name says, given a refer field, remove all junk and *
2569 * clean it. *
2570 * Will also extract a search string if appropriate. *
2571 * *
2572 * Arguments: *
2573 * char *refer: The Referer Field. Is modified by this function. *
2574 * char *srchstr: The search string, which *may* be extracted *
2575 * *
2576 * Returns: *
2577 * int. 1 on failure, 0 on success. *
2578 ************************************************************************/
2579 int
cleanup_refer(char * refer,char * srchstr)2580 cleanup_refer(char *refer, char *srchstr)
2581 {
2582 char *cp1, *cp2; /* generic char pointers */
2583
2584 /* unescape referrer */
2585 unescape(refer);
2586 unescape(refer); /* XXX */
2587
2588 /* fix referrer field */
2589 cp1 = refer;
2590 cp2 = refer;
2591 if (*cp2 != '\0') {
2592 while (*cp1 != '\0') {
2593 if ((*cp1 < 32 && *cp1 > 0) || *cp1 == 127 || *cp1 == '<') {
2594 *cp1 = 0;
2595 } else {
2596 *cp2++ = *cp1++;
2597 }
2598 }
2599 cp2 = '\0';
2600 }
2601
2602 /* strip query portion of cgi referrals */
2603 cp1 = refer;
2604 if (*cp1 != '\0') {
2605 while (*cp1 != '\0') {
2606 if (!isurlchar(*cp1)) {
2607 /* Save query portion in srchstr */
2608 strlcpy(srchstr, cp1, MAXSRCH - 1);
2609 *cp1++ = '\0';
2610 break;
2611 } else {
2612 cp1++;
2613 }
2614 }
2615 /* handle null referrer */
2616 if (refer[0] == '\0') {
2617 refer[0] = '-';
2618 refer[1] = '\0';
2619 }
2620 }
2621
2622 /* if HTTP request, lowercase http://sitename/ portion */
2623 cp1 = refer;
2624 if ((*cp1 == 'h') || (*cp1 == 'H')) {
2625 while ((*cp1 != '/') && (*cp1 != '\0')) {
2626 *cp1 = tolower(*cp1);
2627 cp1++;
2628 }
2629 /* now do hostname */
2630 if ((*cp1 == '/') && (*(cp1 + 1) == '/')) {
2631 cp1++;
2632 cp1++;
2633 }
2634 while ((*cp1 != '/') && (*cp1 != '\0')) {
2635 *cp1 = tolower(*cp1);
2636 cp1++;
2637 }
2638 }
2639 return (0);
2640 }
2641
2642
2643 /************************************************************************
2644 * cleanup_agent *
2645 * *
2646 * Does what the name says, given an agent field, remove all junk and *
2647 * clean it. *
2648 * *
2649 * Arguments: *
2650 * char *agent: The Agent Field. Is modified by this function. *
2651 * *
2652 * Returns: *
2653 * int. 1 on failure, 0 on success. *
2654 ************************************************************************/
2655 int
cleanup_agent(char * agent)2656 cleanup_agent(char *agent)
2657 {
2658 char *cp1, *cp2, *cp3; /* generic char pointers */
2659
2660 /* Do we need to mangle? */
2661 if (g_settings.settings.mangle_agent) {
2662 do_agent_mangling(agent);
2663 }
2664
2665 /* fix user agent field */
2666 cp1 = agent;
2667 cp3 = cp2 = cp1++;
2668 if ((*cp2 != '\0') && ((*cp2 == '"') || (*cp2 == '('))) {
2669 while (*cp1 |= '\0') {
2670 cp3 = cp2;
2671 *cp2++ = *cp1++;
2672 }
2673 *cp3 = '\0';
2674 }
2675
2676 cp1 = agent;
2677 while (*cp1 != 0) { /* get rid of more common _bad_ chars ;) */
2678 if ((*cp1 < 32) || (*cp1 == 127) || (*cp1 == '<') || (*cp1 == '>')) {
2679 *cp1 = '\0';
2680 break;
2681 } else {
2682 cp1++;
2683 }
2684 }
2685
2686 return (0);
2687 }
2688
2689
2690 /************************************************************************
2691 * isaffirmitive
2692 *
2693 * Will return true for any obvious, case insensitive, affirmative
2694 * value.
2695 * False for otherwise.
2696 *
2697 * TODO: Make Language independent?
2698 * Assumes value is a string of up to 20 chars long!
2699 ***********************************************************************/
2700 bool
isaffirmitive(char * value)2701 isaffirmitive(char *value)
2702 {
2703 int i;
2704 int length;
2705 char lowered_value[21];
2706
2707 length = strlen(value);
2708 if (length > 20) {
2709 length = 20;
2710 } else if (length <= 0) {
2711 return false;
2712 }
2713
2714 for (i = 0; i < length; i++) {
2715 lowered_value[i] = tolower(value[i]);
2716 }
2717 lowered_value[i] = '\0';
2718
2719 if (strncmp(lowered_value, "yes", length) == 0 || strncmp(lowered_value, "true", length) == 0 || strncmp(lowered_value, "y", length) == 0) {
2720 return true;
2721 }
2722 if (!(strncmp(lowered_value, "no", length) == 0 || strncmp(lowered_value, "false", length) == 0 || strncmp(lowered_value, "n", length) == 0)) {
2723 ERRVPRINT(VERBOSE1, "%s: %s\n", _("Invalid Yes/No choice. Defaulting to No. Was"), value);
2724 }
2725 return false;
2726 }
2727
2728
2729 /************************************************************************
2730 * strtoupper
2731 *
2732 * Converts a string to Upper Case
2733 * Returns a pointer to the string.
2734 ***********************************************************************/
2735 char *
strtoupper(char * str)2736 strtoupper(char *str)
2737 {
2738 unsigned int i = 0;
2739
2740 while (*(str + i) != '\0') {
2741 *(str + i) = toupper(*(str + i));
2742 i++;
2743 }
2744 return str;
2745 }
2746
2747
2748 /************************************************************************
2749 * strtolower
2750 *
2751 * Converts a string to Lower Case
2752 * Returns a pointer to the string.
2753 ***********************************************************************/
2754 char *
strtolower(char * str)2755 strtolower(char *str)
2756 {
2757 unsigned int i = 0;
2758
2759 while (*(str + i) != '\0') {
2760 *(str + i) = tolower(*(str + i));
2761 i++;
2762 }
2763 return str;
2764 }
2765
2766 /************************************************************************
2767 ************************************************************************
2768 * END OF FILE *
2769 ************************************************************************/
2770