1 /*
2     AWFFull - A Webalizer Fork, Full o' features
3 
4     awffull.c
5         The main program
6 
7     Copyright (C) 1997-2001  Bradford L. Barrett (brad@mrunix.net)
8     Copyright 2002, 2004 by Stanislaw Yurievich Pusep
9     Copyright (C) 2004-2008 by Stephen McInerney (spm@stedee.id.au)
10     Copyright (C) 2006 by Alexander Lazic (al-awffull@none.at)
11     Copyright (C) 2006 by Benoit Rouits (brouits@free.fr)
12 
13     This file is part of AWFFull.
14 
15     AWFFull is free software: you can redistribute it and/or modify
16     it under the terms of the GNU General Public License as published by
17     the Free Software Foundation, either version 3 of the License, or
18     (at your option) any later version.
19 
20     AWFFull is distributed in the hope that it will be useful,
21     but WITHOUT ANY WARRANTY; without even the implied warranty of
22     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23     GNU General Public License for more details.
24 
25     You should have received a copy of the GNU General Public License
26     along with AWFFull.  If not, see <http://www.gnu.org/licenses/>.
27 
28     This software uses the gd graphics library, which is copyright by
29     Quest Protein Database Center, Cold Spring Harbor Labs.  Please
30     see the documentation supplied with the library for additional
31     information and license terms, or visit www.boutell.com/gd/ for the
32     most recent version of the library and supporting documentation.
33 
34 */
35 
36 /*********************************************/
37 /* STANDARD INCLUDES                         */
38 /*********************************************/
39 #include "awffull.h"                            /* main header              */
40 #ifdef HAVE_ICONV
41 #include <iconv.h>
42 #endif
43 
44 /* internal function prototypes */
45 
46 void clear_month(void);                         /* clear monthly stuff */
47 char *unescape(char *);                         /* unescape URL's      */
48 char from_hex(char);                            /* convert hex to dec  */
49 int isurlchar(unsigned char);                   /* valid URL char fnc. */
50 static char *save_opt(char *);                  /* save conf option    */
51 void srch_string(char *, char *);               /* srch str analysis   */
52 char *get_domain(char *);                       /* return domain name  */
53 char *our_gzgets(gzFile, char *, int);          /* our gzgets          */
54 int do_agent_mangling(char *);
55 void option_checks(void);                       /* Various early checks */
56 void *process_log_line(void *);
57 bool isaffirmitive(char *);                     /* Is the passed in string == to Y | y | N | n | Yes or No
58                                                    or any other case combo of same. Can also accept True or False */
59 
60 int response_code_index(int);
61 static void process_end_of_month(void);         /* Do all the bits for an end of month */
62 static void init_run_counters(void);
63 
64 /* The cleaner functions. These de-gunk the individual parts of a log entry. */
65 int cleanup_host(char *);
66 int cleanup_user(char *);
67 int cleanup_date_time(char *, struct tm *);
68 int cleanup_url(char *);
69 int cleanup_refer(char *, char *);
70 int cleanup_agent(char *);
71 
72 
73 /*********************************************/
74 /* GLOBAL VARIABLES                          */
75 /*********************************************/
76 
77 const char *version = PACKAGE_VERSION;          /* program version          */
78 
79 struct global_settings g_settings;              /* All Settings & Flags     */
80 struct global_counters g_counters;              /* All counters             */
81 struct global_run_counters g_run_counters;
82 
83 
84 char *log_fname = NULL;                         /* log file pointer         */
85 char *blank_str = "";                           /* blank string             */
86 
87 #if HAVE_GEOIP_H
88 GeoIP *gi;                                      /* GeoIP access             */
89 #endif
90 
91 int g_cur_year = 0, g_cur_month = 1,            /* year/month/day/hour      */
92     g_cur_day = 0, g_cur_hour = 0,              /* tracking variables       */
93     g_cur_min = 0, g_cur_sec = 0;
94 
95 unsigned long cur_tstamp = 0;                   /* Timestamp...             */
96 unsigned long rec_tstamp = 0;
97 unsigned long req_tstamp = 0;
98 unsigned long epoch;                            /* used for timestamp adj.  */
99 
100 int gz_log = 0;                                 /* gziped log? (0=no)       */
101 
102 unsigned long ht_hit = 0, mh_hit = 0;           /* hourly hits totals       */
103 
104 struct utsname system_info;                     /* system info structure    */
105 
106 unsigned long ul_bogus = 0;                     /* Dummy counter for groups */
107 
108 time_t now;                                     /* used by current_time funct   */
109 struct tm *tp;                                  /* to generate timestamp    */
110 char timestamp[64];                             /* for the reports          */
111 time_t temp_time_squid;                         /* For pulling in squid times */
112 
113 gzFile gzlog_fp;                                /* gzip logfile pointer     */
114 FILE *log_fp;                                   /* regular logfile pointer  */
115 
116 char buffer[BUFSIZE];                           /* log file record buffer   */
117 char tmp_buf[BUFSIZE];                          /* used to temp save above  */
118 
119 CLISTPTR *top_ctrys = NULL;                     /* Top countries table      */
120 
121 #define GZ_BUFSIZE 16384                        /* our_getfs buffer size    */
122 char f_buf[GZ_BUFSIZE];                         /* our_getfs buffer         */
123 char *f_cp = f_buf + GZ_BUFSIZE;                /* pointer into the buffer  */
124 int f_end;                                      /* count to end of buffer   */
125 
126 char hit_color[] = DKGREEN;                     /* graph hit color          */
127 char file_color[] = BLUE;                       /* graph file color         */
128 char site_color[] = ORANGE;                     /* graph site color         */
129 char kbyte_color[] = RED;                       /* graph kbyte color        */
130 char page_color[] = CYAN;                       /* graph page color         */
131 char visit_color[] = YELLOW;                    /* graph visit color        */
132 char bookm_color[] = PURPLE;                    /* graph bookm color        */
133 char pie_color1[] = DKGREEN;                    /* pie additional color 1  */
134 char pie_color2[] = ORANGE;                     /* pie additional color 2  */
135 char pie_color3[] = BLUE;                       /* pie additional color 3  */
136 char pie_color4[] = RED;                        /* pie additional color 4  */
137 
138 static char const ab_month_name[][4] = {
139     "Jan", "Feb", "Mar", "Apr", "May", "Jun",
140     "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
141 };
142 
143 #ifdef HAVE_ICONV
144 iconv_t cd_from_sjis, cd_from_eucj;
145 #endif
146 
147 /*********************************************/
148 /* MAIN - start here                         */
149 /*********************************************/
150 
151 int
main(int argc,char * argv[])152 main(int argc, char *argv[])
153 {
154     int i, j;                                   /* generic counters            */
155 
156     extern char *optarg;                        /* used for command line       */
157     extern int optind;                          /* parsing routine 'getopt'    */
158     extern int opterr;
159 
160     bool loaded_default_config = false;         /* Did we load the default config file? */
161 
162     time_t start_time, end_time;                /* program timers              */
163     float temp_time;                            /* temporary time storage      */
164     struct tms mytms;                           /* bogus tms structure         */
165 
166     bool good_rec = false;                      /* true if we had at least ONE good record   */
167 
168     bool isapage = false;                       /* Flag. Is this a page or not? */
169     bool isnewsite = false;                     /* Flag. True if this is a new site */
170 
171     struct tm time_rec;                         /* Gotta convert that string'ed time into a timerec first */
172     struct tm prev_time_rec;                    /* Hang onto the pevious time_rec in case of out of sequence errors */
173 
174     char str_previous_time[MAXDATETIME] = "";
175     struct log_struct parsed_log;               /* The log entries post PCRE'ing */
176 
177     char *current_locale = NULL;                /* the locale, as returned from setlocale */
178     char *message_catalog_dir = NULL;           /* Directory for all the translations */
179     char *message_domain = NULL;                /* current message domain for translations */
180     char *envlang, *envlanguage;
181 
182 #if ENABLE_NLS
183     /* Reduce Surprises. Unset most LC_* env settings. Basically, only let LANG and/or LANGUAGE change stuff.
184      * Too many cross polination mess ups otherwise. */
185     /*
186        unsetenv("LC_CTYPE");
187        unsetenv("LC_MESSAGES");
188        unsetenv("LC_COLLATE");
189        unsetenv("LC_MONETARY");
190        unsetenv("LC_NUMERIC");
191        unsetenv("LC_TIME");
192        unsetenv("LC_PAPER");
193        unsetenv("LC_NAME");
194        unsetenv("LC_ADDRESS");
195        unsetenv("LC_TELEPHONE");
196        unsetenv("LC_MEASUREMENT");
197        unsetenv("LC_IDENTIFICATION");
198      */
199 
200     current_locale = setlocale(LC_ALL, "");
201     message_domain = textdomain(PACKAGE);
202     message_catalog_dir = bindtextdomain(PACKAGE, LOCALEDIR);
203     envlang = getenv("LANG");
204     envlanguage = getenv("LANGUAGE");
205     if (envlang != NULL) {
206         if (strncmp("ja_JP", envlang, 5) == 0) {
207             g_settings.graphs.use_kanji = true;
208         }
209     } else if (envlanguage != NULL) {
210         if (strncmp("ja_JP", envlanguage, 5) == 0) {
211             g_settings.graphs.use_kanji = true;
212         }
213     }
214 #endif
215 
216     assign_messages();                          /* Load up AWFFull's general messages, country names etc */
217 
218     /* initalize epoch */
219     epoch = jdate(1, 1, 1970);                  /* used for timestamp adj.     */
220 
221     /* Initialise all flags and base settings */
222     set_defaults();
223     init_run_counters();
224 
225     /* check for default config file */
226     if (!access(g_settings.settings.config_filename, F_OK)) {
227         get_config(g_settings.settings.config_filename);
228         loaded_default_config = true;
229     }
230 
231     process_options(argc, argv);
232     if (loaded_default_config == true) {
233         VPRINT(VERBOSE1, "%s: %s\n", _("Initially processed default config file"), g_settings.settings.config_filename);
234     }
235     display_options();
236     option_checks();
237 
238 #if ENABLE_NLS
239     VPRINT(VERBOSE2, "Lang: %s\nLanguage: %s\n", envlang, envlanguage);
240     VPRINT(VERBOSE2, "Current Locale: %s\nMessage Catalog: %s\nNessage Domain: %s\n", current_locale, message_catalog_dir, message_domain);
241 #endif
242 
243     if (argc - optind != 0) {
244         log_fname = argv[optind];
245     }
246     if (log_fname && (log_fname[0] == '-')) {
247         log_fname = NULL;                       /* force STDIN?   */
248     }
249 
250     /* check for gzipped file - .gz */
251     if (log_fname) {
252         if (!strcmp((log_fname + strlen(log_fname) - 3), ".gz")) {
253             gz_log = 1;
254         }
255     }
256 
257     /* setup our internal variables */
258     init_counters();                            /* initalize main counters         */
259 
260     for (i = 0; i < MAXHASH; i++) {
261         sm_htab[i] = sd_htab[i] = NULL;         /* initalize hash tables           */
262         um_htab[i] = NULL;
263         rm_htab[i] = NULL;
264         am_htab[i] = NULL;
265         sr_htab[i] = NULL;
266     }
267 
268     /* open log file */
269     if (gz_log) {
270         gzlog_fp = gzopen(log_fname, "rb");
271         if (gzlog_fp == Z_NULL) {
272             /* Error: Can't open log file ... */
273             ERRVPRINT(VERBOSE0, "%s %s\n", _("Error: Can't open log file"), log_fname);
274             exit(1);
275         }
276     } else {
277         if (log_fname) {
278             log_fp = fopen(log_fname, "r");
279             if (log_fp == NULL) {
280                 /* Error: Can't open log file ... */
281                 ERRVPRINT(VERBOSE0, "%s %s\n", _("Error: Can't open log file"), log_fname);
282                 exit(1);
283             }
284         }
285     }
286 
287     /* Using logfile ... */
288     VPRINT(VERBOSE1, "%s %s", _("Using logfile"), log_fname ? log_fname : "STDIN");
289     if (gz_log) {
290         VPRINT(VERBOSE1, " (gzip)");
291     }
292     VPRINT(VERBOSE1, "\n");
293 
294 
295     /* switch directories if needed */
296     if (g_settings.settings.out_dir) {
297         if (chdir(g_settings.settings.out_dir) != 0) {
298             /* Error: Can't change directory to ... */
299             ERRVPRINT(VERBOSE0, "%s %s\n", _("Error: Can't change directory to"), g_settings.settings.out_dir);
300             exit(1);
301         }
302     }
303 
304     /* Creating output in ... */
305     VPRINT(VERBOSE1, "%s %s\n", _("Creating output in"), g_settings.settings.out_dir ? g_settings.settings.out_dir : _("current directory"));
306 
307     /* prep hostname */
308     if (!g_settings.settings.hostname) {
309         if (uname(&system_info)) {
310             g_settings.settings.hostname = "localhost";
311         } else {
312             g_settings.settings.hostname = system_info.nodename;
313         }
314     }
315 
316     /* Hostname for reports is ... */
317     VPRINT(VERBOSE1, "%s '%s'\n", _("Hostname for reports is"), g_settings.settings.hostname);
318     VPRINT(VERBOSE1, "%s %s\n", _("Using CSS file:"), g_settings.settings.css_filename);
319 
320     /* get past history */
321     if (g_settings.flags.ignore_history) {
322         VPRINT(VERBOSE1, "%s\n", _("Ignoring previous history..."));
323     } else {
324         get_history();
325     }
326 
327     if (g_settings.flags.incremental) {
328         i = restore_state();
329         if (i > 0) {
330             /* Error: Unable to restore run data (error num) */
331             ERRVPRINT(VERBOSE0, "%s (%d)\n", _("Error: Unable to restore run data"), i);
332             exit(1);
333         } else if (i == 0) {
334             g_settings.flags.incremental_duplicate_check = true;
335         }
336         /* Do nothing on an i == -1 --> No previous data to load */
337     }
338 
339     /* Allocate memory for our TOP countries array */
340     if (g_settings.top.countries > 0) {
341         top_ctrys = XMALLOC(CLISTPTR, g_settings.top.countries);
342     }
343 
344     /* Do any graphical output Setup */
345     initialise_graphs();
346 
347     start_time = times(&mytms);
348 
349 #ifdef HAVE_ICONV
350     cd_from_sjis = iconv_open("UTF-8", "Shift_JIS");
351     cd_from_eucj = iconv_open("UTF-8", "EUC-JP");
352 #endif
353 
354     /*********************************************
355      * MAIN PROCESS LOOP - read through log file *
356      *********************************************/
357     while ((gz_log) ? (our_gzgets(gzlog_fp, buffer, BUFSIZE) != Z_NULL) : (fgets(buffer, BUFSIZE, log_fname ? log_fp : stdin) != NULL)) {
358         g_run_counters.total_run++;
359         if (strlen(buffer) == (BUFSIZE - 1)) {
360             ERRVPRINT(VERBOSE1, "%s %lu", _("Error: Skipping oversized log record:"), g_run_counters.total_run);
361             ERRVPRINT(VERBOSE3, "  ==> %s\n", buffer);
362             g_run_counters.bad_run++;           /* bump bad record counter      */
363 
364             /* get the rest of the record */
365             while ((gz_log) ? (our_gzgets(gzlog_fp, buffer, BUFSIZE) != Z_NULL) : (fgets(buffer, BUFSIZE, log_fname ? log_fp : stdin) != NULL)) {
366                 ERRVPRINT(VERBOSE3, "      %s\n", buffer);
367                 if (strlen(buffer) < BUFSIZE - 1) {
368                     break;
369                 }
370             }
371             continue;                           /* go get next record if any    */
372         }
373 
374         /* got a record... */
375         VPRINT(VERBOSE3, "==> %s", buffer);
376         if (parse_record(buffer, &parsed_log)) {        /* parse the record             */
377             /*********************************************
378              * DO SOME PRE-PROCESS FORMATTING            *
379              *********************************************/
380             cleanup_host(parsed_log.hostname);
381             cleanup_user(parsed_log.ident);
382             cleanup_url(parsed_log.url);
383             cleanup_refer(parsed_log.refer, parsed_log.srchstr);
384             cleanup_agent(parsed_log.agent);
385 
386 
387             /*********************************************
388              * PASSED MINIMAL CHECKS, DO A LITTLE MORE   *
389              *********************************************/
390 
391             /********************************************
392              * PROCESS RECORD                           *
393              ********************************************/
394             if (strncmp(str_previous_time, parsed_log.datetime, MAXDATETIME) != 0) {
395                 /* strptime is $$$ - avoid if same date/time */
396                 memset(&time_rec, 0, sizeof(time_rec));
397                 if (g_settings.settings.log_type == LOG_FTP) {
398                     strptime(parsed_log.datetime, DATE_TIME_XFERLOG_FORMAT, &time_rec);
399                 } else if (g_settings.settings.log_type == LOG_SQUID) {
400                     temp_time_squid = strtoul(parsed_log.datetime, NULL, 10);
401                     localtime_r(&temp_time_squid, &time_rec);
402                 } else {
403                     /* Alternate date/time extraction for Web Logs. Effectively revert to webalizer method.
404                      * Issue with FreeBSD and localised extraction. See list discusion 2008/03/2 */
405                     /*strptime(parsed_log.datetime, DATE_TIME_FORMAT, &time_rec); */
406                     time_rec.tm_mday = atoi(&parsed_log.datetime[0]);
407                     time_rec.tm_year = atoi(&parsed_log.datetime[7]) - 1900;
408                     time_rec.tm_hour = atoi(&parsed_log.datetime[12]);
409                     time_rec.tm_min = atoi(&parsed_log.datetime[15]);
410                     time_rec.tm_sec = atoi(&parsed_log.datetime[18]);
411 
412                     /* Find the month */
413                     j = 0;
414                     i = g_cur_month - 1;
415                     while (j < 12) {
416                         if (strncmp(ab_month_name[i], &parsed_log.datetime[3], 3) == 0) {
417                             time_rec.tm_mon = i;
418                             break;
419                         }
420                         i++;
421                         j++;
422                         if (i >= 12)
423                             i = 0;
424                     }
425                     if (j >= 12) {
426                         /* If we get here? Something is broken! */
427                         ERRVPRINT(VERBOSE1, _("Error! Corrupted Date/Time Record. Line: %lu Value: %s\n"), g_run_counters.total_run, parsed_log.datetime);
428                         g_run_counters.bad_run++;
429                         continue;
430                     }
431                 }
432                 /*ERRVPRINT(0, " Rec  Date-Time: %d/%d/%d:%d:%d:%d\n", time_rec.tm_mday,time_rec.tm_mon,time_rec.tm_year,time_rec.tm_hour,time_rec.tm_min,time_rec.tm_sec); */
433                 time_rec.tm_isdst = -1;         /* stop mktime from resetting for daylight savings */
434                 rec_tstamp = mktime(&time_rec);
435                 if (rec_tstamp < 0) {
436                     ERRVPRINT(VERBOSE1, _("Error! Corrupted Date/Time Record. Line: %lu Value: %s\n"), g_run_counters.total_run, parsed_log.datetime);
437                     g_run_counters.bad_run++;
438                     continue;
439                 }
440                 strlcpy(str_previous_time, parsed_log.datetime, MAXDATETIME);
441             }
442 
443             /* Do we need to check for duplicate records? (incremental mode)   */
444             if (g_settings.flags.incremental_duplicate_check == true) {
445                 /* check if less than/equal to last record processed            */
446                 if (rec_tstamp <= cur_tstamp) {
447                     /* if it is, assume we have already processed and ignore it  */
448                     g_run_counters.ignored_run++;
449                     VPRINT(VERBOSE4, "IGNORE_DUPCHK: %s", buffer);
450                     continue;
451                 } else {
452                     /* if it isn't.. disable any more checks this run            */
453                     g_settings.flags.incremental_duplicate_check = false;
454                     /* now check if it's a new month                             */
455                     if (g_cur_month != (time_rec.tm_mon + 1)) {
456                         clear_month();
457                         g_cur_month = time_rec.tm_mon + 1;
458                         g_cur_year = time_rec.tm_year + 1900;
459                         g_cur_day = time_rec.tm_mday;
460                         g_cur_hour = time_rec.tm_hour;
461                         g_cur_min = time_rec.tm_min;
462                         g_cur_sec = time_rec.tm_sec;
463                         g_counters.month.first_day = g_counters.month.last_day = time_rec.tm_mday;      /* reset first and last day */
464                         cur_tstamp = rec_tstamp;
465                     }
466                 }
467             }
468 
469             /* Ignore records that are too far incorrect of the previous timestamp - older records as in */
470             /* TODO - Auto FoldSeq.Err when we would try and revert a day */
471             if (rec_tstamp < cur_tstamp) {
472                 if (!g_settings.flags.fold_seq_err && ((rec_tstamp + SLOP_VAL) < cur_tstamp)) {
473                     g_run_counters.ignored_run++;
474                     VPRINT(VERBOSE4, "IGNORE_SEQERR: %s", buffer);
475                     continue;
476                 } else {
477                     rec_tstamp = cur_tstamp;
478                     memcpy(&time_rec, &prev_time_rec, sizeof(prev_time_rec));
479                 }
480             }
481             cur_tstamp = rec_tstamp;            /* update current timestamp */
482 
483             /* first time through? */
484             if (g_settings.flags.is_first_run == true) {
485                 /* if yes, init our date vars */
486                 g_cur_month = time_rec.tm_mon + 1;
487                 g_cur_year = time_rec.tm_year + 1900;
488                 g_cur_day = time_rec.tm_mday;
489                 g_cur_hour = time_rec.tm_hour;
490                 g_cur_min = time_rec.tm_min;
491                 g_cur_sec = time_rec.tm_sec;
492                 if (g_counters.month.first_day == 0) {
493                     g_counters.month.first_day = time_rec.tm_mday;
494                 }
495                 g_counters.month.last_day = time_rec.tm_mday;
496                 g_settings.flags.is_first_run = false;
497             }
498 
499             /* We don't track below "hour", so always update seconds and minutes */
500             g_cur_sec = time_rec.tm_sec;
501             g_cur_min = time_rec.tm_min;
502 
503             /* We're now past all the "is bad time?" checks. ie. This is now a known "good" time record.
504              *   So can save the current timerec as the 'previous' timerec.
505              * Just don't try and reference 'previous' time after this point! */
506             memcpy(&prev_time_rec, &time_rec, sizeof(prev_time_rec));
507 
508             /* check for hour change  */
509             if (g_cur_hour != time_rec.tm_hour) {
510                 /* if yes, init hourly stuff */
511                 if (ht_hit > mh_hit)
512                     mh_hit = ht_hit;
513                 ht_hit = 0;
514                 g_cur_hour = time_rec.tm_hour;
515             }
516 
517             /* check for day change   */
518             if (g_cur_day != time_rec.tm_mday) {
519                 /* if yes, init daily stuff */
520                 g_counters.day.visit[g_cur_day - 1] = tot_visit(sd_htab);
521                 del_hlist(sd_htab);
522                 segment_htab_cleanup(seg_ref_htab);
523                 g_cur_day = time_rec.tm_mday;
524             }
525 
526             /* check for month change */
527             if (g_cur_month != (time_rec.tm_mon + 1)) {
528                 /* if yes, do monthly stuff */
529                 process_end_of_month();
530                 clear_month();
531                 g_cur_month = time_rec.tm_mon + 1;      /* update our flags        */
532                 g_cur_year = time_rec.tm_year + 1900;
533                 g_counters.month.first_day = time_rec.tm_mday;
534             }
535 
536             g_counters.month.last_day = time_rec.tm_mday;       /* update new last day, *after* we do any end of month calculations */
537 
538             /* Segmenting Check(s)
539              * Inverse of the Ignore checks
540              * To pass segmenting, a given log line MUST match all checks.
541              *   Can then be filtered for Includes/Ignores.
542              * Segmenting is the highest precedence
543              *   Ignore/Include checks will only be done AFTER Segmenting
544              */
545             if (g_settings.flags.segmenting == true) {
546                 if (!segment_by_country(parsed_log.hostname)) {
547                     VPRINT(VERBOSE4, "IGNORESEG_CTRY: %s", buffer);
548                     g_run_counters.ignored_run++;
549                     continue;
550                 }
551                 VPRINT(VERBOSE3, "SEGMENT: Country OK: %s\n", parsed_log.hostname);
552                 if (!segment_by_referer(parsed_log.refer, parsed_log.hostname, rec_tstamp)) {
553                     VPRINT(VERBOSE4, "IGNORESEG_REF: %s", buffer);
554                     g_run_counters.ignored_run++;
555                     continue;
556                 }
557                 VPRINT(VERBOSE3, "SEGMENT: Referrer OK: %s --> %s\n", parsed_log.hostname, parsed_log.refer);
558 //                if (! segment_by_searchstr(parsed_log.srchstr)) {
559 //                    total_ignore++;
560 //                    continue;
561 //                }
562 //                VPRINT(VERBOSE1, "SEGMENT: Search String OK: %s\n", parsed_log.srchstr);
563             }
564 
565             /* Ignore/Include check */
566             /* TODO: Build up a hash tab of ignored entries for fast/cached matching??? */
567             if ((isinlist(include_sites, parsed_log.hostname) == NULL) &&
568                 (isinlist(include_urls, parsed_log.url) == NULL) && (isinlist(include_refs, parsed_log.refer) == NULL) && (isinlist(include_agents, parsed_log.agent) == NULL)
569                 && (isinlist(include_users, parsed_log.ident) == NULL)) {
570                 if (isinlist(ignored_sites, parsed_log.hostname) != NULL) {
571                     g_run_counters.ignored_run++;
572                     VPRINT(VERBOSE5, "IGNORES: %s", buffer);
573                     continue;
574                 }
575                 if (isinlist(ignored_urls, parsed_log.url) != NULL) {
576                     g_run_counters.ignored_run++;
577                     VPRINT(VERBOSE5, "IGNOREU: %s", buffer);
578                     continue;
579                 }
580                 if (isinlist(ignored_agents, parsed_log.agent) != NULL) {
581                     g_run_counters.ignored_run++;
582                     VPRINT(VERBOSE5, "IGNOREA: %s", buffer);
583                     continue;
584                 }
585                 if (isinlist(ignored_refs, parsed_log.refer) != NULL) {
586                     g_run_counters.ignored_run++;
587                     VPRINT(VERBOSE5, "IGNORER: %s", buffer);
588                     continue;
589                 }
590                 if (isinlist(ignored_users, parsed_log.ident) != NULL) {
591                     g_run_counters.ignored_run++;
592                     VPRINT(VERBOSE5, "IGNOREZ: %s", buffer);
593                     continue;
594                 }
595             }
596             VPRINT(VERBOSE4, "LOGLINE: %s", buffer);
597 
598             /* By this stage we have a known Good Record */
599             good_rec = true;
600 
601             /* Bump response code totals */
602             response[response_code_index(parsed_log.resp_code)].count++;
603 
604             /* now save in the various hash tables... */
605             if (parsed_log.resp_code == RC_OK || parsed_log.resp_code == RC_PARTIALCONTENT) {
606                 i = 1;
607             } else {
608                 i = 0;
609             }
610 
611             /* Pages (pageview) calculation */
612             isapage = parse_is_page(parsed_log.url);
613             if (isapage) {
614                 g_counters.month.page++;
615                 g_counters.day.page[time_rec.tm_mday - 1]++;
616                 g_counters.hour.page[time_rec.tm_hour]++;
617 
618                 /* do search string stuff if needed     */
619                 if (g_settings.top.search) {
620                     srch_string(parsed_log.refer, parsed_log.srchstr);
621                 }
622             }
623 
624             /* URL/ident hash table (only if valid response code) */
625             if ((parsed_log.resp_code == RC_OK) || (parsed_log.resp_code == RC_NOMOD) || (parsed_log.resp_code == RC_PARTIALCONTENT)) {
626                 /* URL hash table */
627                 if (put_unode
628                     (parsed_log.url, OBJ_REG, (unsigned long) 1, parsed_log.xfer_size, &g_counters.month.url, (unsigned long) 0, (unsigned long) 0, (unsigned long) 0,
629                      parsed_log.resp_code, um_htab)) {
630                     /* Error adding URL node, skipping ... */
631                     ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding URL node, skipping"), parsed_log.url);
632                 }
633 
634                 /* ident (username) hash table */
635                 if (put_inode(parsed_log.ident, OBJ_REG, 1, (unsigned long) i, parsed_log.xfer_size, &g_counters.month.user, 0, rec_tstamp, im_htab, isapage)) {
636                     /* Error adding ident node, skipping .... */
637                     ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding Username node, skipping"), parsed_log.ident);
638                 }
639             }
640 
641             if (g_settings.top.error) {
642                 if (parsed_log.resp_code == RC_NOTFOUND) {
643                     if (put_enode(parsed_log.url, parsed_log.refer ? parsed_log.refer : _("Direct Request"), OBJ_REG, (unsigned long) 1, &g_counters.generic.error_month, ep_htab)) {
644                         ERRVPRINT(VERBOSE1, _("Warning: Can't add %s with referer %s to errorpagehash"), parsed_log.url, parsed_log.refer);
645                     }
646                 }
647             }
648 
649             /* referrer hash table */
650             if (g_settings.top.refs) {
651                 if (parsed_log.refer[0] != '\0' && isapage)
652                     if (put_rnode(parsed_log.refer, OBJ_REG, (unsigned long) 1, &g_counters.month.ref, rm_htab)) {
653                         ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding Referrer node, skipping"), parsed_log.refer);
654                     }
655             }
656 
657             isnewsite = false;
658             /* hostname (site) hash table - daily */
659             if (put_hnode
660                 (parsed_log.hostname, OBJ_REG, 1, (unsigned long) i, parsed_log.xfer_size, &g_counters.day.site[g_cur_day - 1], 0, isapage, rec_tstamp, parsed_log.url, "", sd_htab,
661                  false, isapage, &isnewsite)) {
662                 /* Error adding host node (daily), skipping .... */
663                 ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding host node (daily), skipping"), parsed_log.hostname);
664             }
665             if (isnewsite) {
666                 g_counters.hour.site[time_rec.tm_hour]++;
667             }
668 
669             isnewsite = false;
670             /* hostname (site) hash table - monthly */
671             if (put_hnode
672                 (parsed_log.hostname, OBJ_REG, 1, (unsigned long) i, parsed_log.xfer_size, &g_counters.month.site, 0, isapage, rec_tstamp, parsed_log.url, "", sm_htab, false,
673                  isapage, &isnewsite)) {
674                 /* Error adding host node (monthly), skipping .... */
675                 ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding host node (monthly), skipping"), parsed_log.hostname);
676             }
677 
678             /* user agent hash table */
679             if (g_settings.top.agents) {
680                 if (parsed_log.agent[0] != '\0' && isapage)
681                     if (put_anode(parsed_log.agent, OBJ_REG, (unsigned long) 1, &g_counters.month.agent, am_htab)) {
682                         ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding User Agent node, skipping"), parsed_log.agent);
683                     }
684             }
685 
686             /* bump monthly/daily/hourly totals        */
687             g_counters.month.hit++;
688             ht_hit++;                           /* daily/hourly hits    */
689             g_counters.month.vol += parsed_log.xfer_size;       /* total xfer size      */
690             g_counters.day.vol[time_rec.tm_mday - 1] += parsed_log.xfer_size;   /* daily xfer total     */
691             g_counters.day.hit[time_rec.tm_mday - 1]++; /* daily hits total     */
692             g_counters.hour.vol[time_rec.tm_hour] += parsed_log.xfer_size;      /* hourly xfer total    */
693             g_counters.hour.hit[time_rec.tm_hour]++;    /* hourly hits total    */
694 
695             /* if file data was sent, increase file counters */
696             if (parsed_log.resp_code == RC_OK || parsed_log.resp_code == RC_PARTIALCONTENT) {
697                 g_counters.month.file++;
698                 g_counters.day.file[time_rec.tm_mday - 1]++;
699                 g_counters.hour.file[time_rec.tm_hour]++;
700             }
701         }
702 
703         /*********************************************
704          * BAD RECORD                                *
705          *********************************************/
706 
707         else {
708             /* If first record, check if stupid Netscape header stuff      */
709             if ((g_run_counters.total_run == 1) && (strncmp(buffer, "format=", 7) == 0)) {
710                 /* Skipping Netscape header record */
711                 VPRINT(VERBOSE1, "%s\n", _("Skipping Netscape header record"));
712                 /* count it as ignored... */
713                 g_run_counters.ignored_run++;
714             } else {
715                 /* really bad record... */
716                 g_run_counters.bad_run++;
717                 ERRVPRINT(VERBOSE1, "%s (%lu)\n", _("Skipping bad record"), g_run_counters.total_run);
718             }
719         }
720     }                                           /* ---- END MAIN WHILE LOOP ---- */
721 
722     /*********************************************
723      * DONE READING LOG FILE - final processing  *
724      *********************************************/
725 
726     /* close log file if needed */
727     if (gz_log)
728         gzclose(gzlog_fp);
729     else if (log_fname)
730         fclose(log_fp);
731 
732     if (good_rec) {                             /* were any good records?   */
733         g_counters.day.visit[g_cur_day - 1] = tot_visit(sd_htab);
734         g_counters.month.visit = tot_visit(sm_htab);
735         if (ht_hit > mh_hit) {
736             mh_hit = ht_hit;
737         }
738 
739         if (g_run_counters.total_run > (g_run_counters.ignored_run + g_run_counters.bad_run)) { /* did we process any?   */
740 //            g_counters.generic.bad += total_bad;
741 //            g_counters.generic.ignored += total_ignore;
742             if (g_settings.flags.incremental) {
743                 segment_htab_cleanup(seg_ref_htab);
744                 if (save_state()) {             /* incremental stuff        */
745                     /* Error: Unable to save current run data */
746                     ERRVPRINT(VERBOSE1, "%s\n", _("Error: Unable to save current run data"));
747                     unlink(g_settings.settings.state_filename);
748                 }
749             }
750             process_end_of_month();
751 //            month_update_exit(rec_tstamp);      /* calculate exit pages     */
752 //            write_month_html();                 /* write monthly HTML file  */
753             write_main_index();                 /* write main HTML file     */
754             put_history();                      /* write history            */
755         }
756 
757         if (g_settings.flags.display_match_counts) {
758             VPRINT(VERBOSE0, "%s\n", _("List Match Counts:"));
759             show_matched(group_sites, "GroupSite");
760             show_matched(group_urls, "GroupURL");
761             show_matched(group_refs, "GroupReferrer");
762             show_matched(group_agents, "GroupAgent");
763             show_matched(group_users, "GroupUser");
764 
765 //            show_matched(hidden_sites, "HideSite");
766 //            show_matched(hidden_urls, "HideURL");
767 //            show_matched(hidden_refs, "HideReferrer");
768 //            show_matched(hidden_agents, "HideAgent");
769 //            show_matched(hidden_users, "HideUser");
770             show_matched(ignored_sites, "IgnoreSite");
771             show_matched(ignored_urls, "IgnoreURL");
772             show_matched(ignored_refs, "IgnoreReferrer");
773             show_matched(ignored_agents, "IgnoreAgent");
774             show_matched(ignored_users, "IgnoreUser");
775 
776             show_matched(include_sites, "IncludeSite");
777             show_matched(include_urls, "IncludeURL");
778             show_matched(include_refs, "IncludeReferrer");
779             show_matched(include_agents, "IncludeAgent");
780             show_matched(include_users, "IncludeUser");
781 //            show_matched(page_type, "");
782 //            show_matched(not_page_type, "");
783             show_matched(search_list, "SearchEngine");
784             show_matched(assign_country, "AssignToCountry");
785         }
786 
787         end_time = times(&mytms);               /* display timing totals?   */
788         if (g_settings.flags.time_me || (g_settings.settings.verbosity >= 1)) {
789             printf("%lu %s ", g_run_counters.total_run, _("records"));
790             if (g_run_counters.ignored_run) {
791                 printf("(%lu %s", g_run_counters.ignored_run, _("ignored"));
792                 if (g_run_counters.bad_run)
793                     printf(", %lu %s) ", g_run_counters.bad_run, _("bad"));
794                 else
795                     printf(") ");
796             } else if (g_run_counters.bad_run)
797                 printf("(%lu %s) ", g_run_counters.bad_run, _("bad"));
798 
799             /* get processing time (end-start) */
800             temp_time = (float) (end_time - start_time) / sysconf(_SC_CLK_TCK);
801             printf("%s %.2f %s", _("in"), temp_time, _("seconds"));
802 
803             /* calculate records per second */
804             if (temp_time)
805                 i = ((int) ((float) g_run_counters.total_run / temp_time));
806             else
807                 i = 0;
808 
809             if ((i > 0) && (i <= g_run_counters.total_run))
810                 printf(_(", %d l/sec\n"), i);
811             else
812                 printf("\n");
813         }
814 
815         del_htabs();
816 #ifdef HAVE_ICONV
817 	iconv_close(cd_from_sjis);
818 	iconv_close(cd_from_eucj);
819 #endif
820         /* Whew, all done! Exit with completion status (0) */
821         exit(0);
822     } else {
823 #ifdef HAVE_ICONV
824 	iconv_close(cd_from_sjis);
825 	iconv_close(cd_from_eucj);
826 #endif
827         /* No valid records found... exit with error (1) */
828         VPRINT(VERBOSE1, "%s\n", _("No valid records found!"));
829         exit(1);
830     }
831 }                                               /* ---- END OF MAIN ---- */
832 
833 /*********************************************
834  * GET_CONFIG - get configuration file info  *
835  *********************************************/
836 
837 void
get_config(const char * fname)838 get_config(const char *fname)
839 {
840     const char *kwords[] = { "undefined",       /* 0 = undefined keyword       0  */
841         "outputdir",                            /* Output directory            1  */
842         "logfile",                              /* Log file to use for input   2  */
843         "reporttitle",                          /* Title for reports           3  */
844         "hostname",                             /* Hostname to use             4  */
845         "ignorehist",                           /* Ignore history file         5  */
846         "quiet",                                /* Run in quiet mode           6  */
847         "timeme",                               /* Produce timing results      7  */
848         "debug",                                /* Produce debug information   8  - Deprecated */
849         "hourlygraph",                          /* Hourly stats graph          9  */
850         "hourlystats",                          /* Hourly stats table         10  */
851         "topsites",                             /* Top sites                  11  */
852         "topurls",                              /* Top URL's                  12  */
853         "topreferrers",                         /* Top Referrers              13  */
854         "topagents",                            /* Top User Agents            14  */
855         "topcountries",                         /* Top Countries              15  */
856         "hidesite",                             /* Sites to hide              16  */
857         "hideurl",                              /* URL's to hide              17  */
858         "hidereferrer",                         /* Referrers to hide          18  */
859         "hideagent",                            /* User Agents to hide        19  */
860         "indexalias",                           /* Aliases for index.html     20  */
861         "htmlhead",                             /* HTML Top1 code             21  */
862         "htmlpost",                             /* HTML Top2 code             22  */
863         "htmltail",                             /* HTML Tail code             23  */
864         "mangleagents",                         /* Mangle User Agents         24  */
865         "ignoresite",                           /* Sites to ignore            25  */
866         "ignoreurl",                            /* Url's to ignore            26  */
867         "ignorereferrer",                       /* Referrers to ignore        27  */
868         "ignoreagent",                          /* User Agents to ignore      28  */
869         "reallyquiet",                          /* Dont display ANY messages  29  */
870         "gmttime",                              /* Local or UTC time?         30  */
871         "groupurl",                             /* Group URL's                31  */
872         "groupsite",                            /* Group Sites                32  */
873         "groupreferrer",                        /* Group Referrers            33  */
874         "groupagent",                           /* Group Agents               34  */
875         "groupshading",                         /* Shade Grouped entries      35  */
876         "grouphighlight",                       /* BOLD Grouped entries       36  */
877         "incremental",                          /* Incremental runs           37  */
878         "incrementalname",                      /* Filename for state data    38  */
879         "historyname",                          /* Filename for history data  39  */
880         "htmlextension",                        /* HTML filename extension    40  */
881         "htmlpre",                              /* HTML code at beginning     41  */
882         "htmlbody",                             /* HTML body code             42  */
883         "htmlend",                              /* HTML code at end           43  */
884         "usehttps",                             /* Use https:// on URL's      44  */
885         "includesite",                          /* Sites to always include    45  */
886         "includeurl",                           /* URL's to always include    46  */
887         "includereferrer",                      /* Referrers to include       47  */
888         "includeagent",                         /* User Agents to include     48  */
889         "pagetype",                             /* Page Type (pageview)       49  */
890         "visittimeout",                         /* Visit timeout (seconds)    50  */
891         "graphlegend",                          /* Graph Legends (yes/no)     51  */
892         "graphlines",                           /* Graph Lines (0=none)       52  */
893         "foldseqerr",                           /* Fold sequence errors       53  */
894         "countrygraph",                         /* Display ctry graph (0=no)  54  */
895         "topksites",                            /* Top sites (by KBytes)      55  */
896         "topkurls",                             /* Top URL's (by KBytes)      56  */
897         "topentry",                             /* Top Entry Pages            57  */
898         "topexit",                              /* Top Exit Pages             58  */
899         "topsearch",                            /* Top Search Strings         59  */
900         "logtype",                              /* Log Type (clf/ftp/squid)   60  */
901         "searchengine",                         /* SearchEngine strings       61  */
902         "groupdomains",                         /* Group domains (n=level)    62  */
903         "hideallsites",                         /* Hide ind. sites (0=no)     63  */
904         "allsites",                             /* List all sites?            64  */
905         "allurls",                              /* List all URLs?             65  */
906         "allreferrers",                         /* List all Referrers?        66  */
907         "allagents",                            /* List all User Agents?      67  */
908         "allsearchstr",                         /* List all Search Strings?   68  */
909         "allusers",                             /* List all Users?            69  */
910         "topusers",                             /* Top Usernames to show      70  */
911         "hideuser",                             /* Usernames to hide          71  */
912         "ignoreuser",                           /* Usernames to ignore        72  */
913         "includeuser",                          /* Usernames to include       73  */
914         "groupuser",                            /* Usernames to group         74  */
915         "dumppath",                             /* Path for dump files        75  */
916         "dumpextension",                        /* Dump filename extension    76  */
917         "dumpheader",                           /* Dump header as first rec?  77  */
918         "dumpsites",                            /* Dump sites tab file        78  */
919         "dumpurls",                             /* Dump urls tab file         79  */
920         "dumpreferrers",                        /* Dump referrers tab file    80  */
921         "dumpagents",                           /* Dump user agents tab file  81  */
922         "dumpusers",                            /* Dump usernames tab file    82  */
923         "dumpsearchstr",                        /* Dump search str tab file   83  */
924         "dnscache",                             /* DNS Cache file name        84  */
925         "dnschildren",                          /* DNS Children (0=no DNS)    85  */
926         "dailygraph",                           /* Daily Graph (0=no)         86  */
927         "dailystats",                           /* Daily Stats (0=no)         87  */
928         "geoip",                                /* Use GeoIP library (0=no)   88  */
929         "geoipdatabase",                        /* GeoIP database             89  */
930         "indexmonths",                          /* Number Months on Main Page 90  */
931         "graphindexx",                          /* Size of Main Graph X       91  */
932         "graphindexy",                          /* Size of Main Graph Y       92  */
933         "graphdailyx",                          /* Size of Daily Graph X      93  */
934         "graphdailyy",                          /* Size of Daily Graph Y      94  */
935         "graphhourlyx",                         /* Size of Hourly Graph X     95  */
936         "graphhourlyy",                         /* Size of Hourly Graph Y     96  */
937         "graphpiex",                            /* Size of Pie Graph X        97  */
938         "graphpiey",                            /* Size of Pie Graph Y        98  */
939         "topurlsbyhitsgraph",                   /* Display Top URL's by Hits graph (0=no)    99  */
940         "topurlsbyvolgraph",                    /* Display Top URL's by Volume graph (0=no) 100  */
941         "topexitpagesgraph",                    /* Display Top Exit Pages Pie Chart         101  */
942         "topentrypagesgraph",                   /* Display Top Entry Pages Pie Chart        102  */
943         "topsitesbypagesgraph",                 /* Display TOP Sites by Volume Graph        103  */
944         "topsitesbyvolgraph",                   /* Display TOP Sites by Pages Graph         104  */
945         "topagentsgraph",                       /* Display TOP Agents Graph (by Pages)      105  */
946         "colorhit",                             /* Hit Color   (def=00805c)   106  */
947         "colorfile",                            /* File Color  (def=0000ff)   107  */
948         "colorsite",                            /* Site Color  (def=ff8000)   108  */
949         "colorkbyte",                           /* Kbyte Color (def=ff0000)   109  */
950         "colorpage",                            /* Page Color  (def=00c0ff)   110  */
951         "colorvisit",                           /* Visit Color (def=ffff00)   111  */
952         "colorbookm",                           /* Bookm Color (def=ff00ff)   112  */
953         "piecolor1",                            /* Pie Color 1 (def=800080)   113  */
954         "piecolor2",                            /* Pie Color 2 (def=80ffc0)   114  */
955         "piecolor3",                            /* Pie Color 3 (def=ff00ff)   115  */
956         "piecolor4",                            /* Pie Color 4 (def=ffc480)   116  */
957         "notpagetype",                          /* Opposite of PageType - specify what is NOT a page     117 */
958         "top404errors",                         /* Display TOP 404 Errors     118  */
959         "all404errors",                         /* Display All 404 Errors     119  */
960         "assigntocountry",                      /* Assign this address to a country code    120 */
961         "groupandhideagent",                    /* Group & Hide Agents        121  */
962         "groupandhidesite",                     /* Group & Hide Sites         122  */
963         "groupandhidereferrer",                 /* Group & Hide Referrer      123 */
964         "groupandhideurl",                      /* Group & Hide Referrer      124 */
965         "groupandhideuser",                     /* Group & Hide User          125 */
966         "dumpcountries",                        /* Dump countries tab file    126 */
967         "dumpentrypages",                       /* Dump Entry Pages tab file  127 */
968         "dumpexitpages",                        /* Dump Exit Pages tab file   128 */
969         "cssfilename",                          /* CSS File filename          129 */
970         "yearlysubtotals",                      /* Display Yearly Subtotals on main page    130 */
971         "trackpartialrequests",                 /* Track 206 Requests         131 */
972         "flagslocation",                        /* Display Country Flags if not null        132 */
973         "allentrypages",                        /* Display All Entry Pages    133 */
974         "allexitpages",                         /* Display All Exit Pages     134 */
975         "disablefilechecks",                    /* Disable Report File Checks 135 */
976         "segcountry",                           /* Segmenting by Country      136 */
977         "segreferer",                           /* Segmenting by Referer      137 */
978         "ignoreindexalias"                      /* Ignore Index Alias Settings 138 */
979     };
980     FILE *fp;
981 
982     char config_buffer[BUFSIZE];
983     char keyword[32];
984     char keyword_org[32];
985     char value[132];
986     char *cp1, *cp2, *cp3;
987     int i, key, count;
988     int num_kwords = sizeof(kwords) / sizeof(char *);
989 
990     if ((fp = fopen(fname, "r")) == NULL) {
991         ERRVPRINT(VERBOSE1, "%s %s\n", _("Error: Unable to open configuration file"), fname);
992         return;
993     }
994 
995     VPRINT(VERBOSE1, "%s %s\n", _("Using config file:"), fname);
996 
997     while ((fgets(config_buffer, BUFSIZE, fp)) != NULL) {
998         /* skip comments and blank lines */
999         if ((config_buffer[0] == '#') || isspace((int) config_buffer[0])) {
1000             continue;
1001         }
1002 
1003         /* Get keyword */
1004         cp1 = config_buffer;
1005         cp2 = keyword;
1006         cp3 = keyword_org;
1007         count = 31;
1008         /* Convert read in keyword to lower case.
1009          * Maintain copy of original case'd keyword in case of error - for display. */
1010         while ((isalnum((int) *cp1)) && (count > 0)) {
1011             *cp2 = tolower(*cp1);
1012             *cp3 = *cp1;
1013             cp1++;
1014             cp2++;
1015             cp3++;
1016             count--;
1017         }
1018         *cp2 = '\0';
1019         *cp3 = '\0';
1020 
1021         /* Get value */
1022         cp2 = value;
1023         count = 131;
1024         while ((*cp1 != '\n') && (*cp1 != '\0') && (isspace((int) *cp1))) {
1025             cp1++;
1026         }
1027         while ((*cp1 != '\n') && (*cp1 != '\0') && (count > 0)) {
1028             *cp2++ = *cp1++;
1029             count--;
1030         }
1031         *cp2-- = '\0';
1032         while ((isspace((int) *cp2)) && (cp2 != value)) {
1033             *cp2-- = '\0';
1034         }
1035 
1036         /* check if blank keyword/value */
1037         if ((keyword[0] == '\0') || (value[0] == '\0'))
1038             continue;
1039 
1040         key = 0;
1041         for (i = 0; i < num_kwords; i++) {
1042             if (!strcmp(keyword, kwords[i])) {
1043                 key = i;
1044                 break;
1045             }
1046         }
1047 
1048         if (key == 0) {                         /* Invalid keyword       */
1049             ERRVPRINT(VERBOSE1, "%s '%s' (%s)\n", _("Warning: Invalid keyword"), keyword_org, fname);
1050             continue;
1051         }
1052 
1053         VPRINT(VERBOSE5, "New Key: %d, Value: %s\n", key, value);
1054 
1055         switch (key) {
1056         case 1:
1057             g_settings.settings.out_dir = save_opt(value);
1058             break;                              /* OutputDir      */
1059         case 2:
1060             log_fname = save_opt(value);
1061             break;                              /* LogFile        */
1062         case 3:
1063             g_settings.settings.title_message = save_opt(value);
1064             break;                              /* ReportTitle    */
1065         case 4:
1066             g_settings.settings.hostname = save_opt(value);
1067             break;                              /* HostName       */
1068         case 5:
1069             g_settings.flags.ignore_history = isaffirmitive(value);
1070             break;                              /* IgnoreHist     */
1071         case 6:
1072             ERRVPRINT(VERBOSE1, "%s\n", _("Use of \"Quiet (-q)\" has been deprecated."));
1073             break;                              /* Quiet          */
1074         case 7:
1075             g_settings.flags.time_me = isaffirmitive(value);
1076             break;                              /* TimeMe         */
1077         case 8:
1078             ERRVPRINT(VERBOSE1, "%s\n", _("Use of \"Debug\" has been deprecated."));
1079             break;                              /* Debug          */
1080         case 9:
1081             g_settings.graphs.hourly = isaffirmitive(value);
1082             break;                              /* HourlyGraph    */
1083         case 10:
1084             g_settings.stats.hourly = isaffirmitive(value);
1085             break;                              /* HourlyStats    */
1086         case 11:
1087             g_settings.top.sites = atoi(value);
1088             break;                              /* TopSites       */
1089         case 12:
1090             g_settings.top.urls = atoi(value);
1091             break;                              /* TopURLs        */
1092         case 13:
1093             g_settings.top.refs = atoi(value);
1094             break;                              /* TopRefs        */
1095         case 14:
1096             g_settings.top.agents = atoi(value);
1097             break;                              /* TopAgents      */
1098         case 15:
1099             g_settings.top.countries = atoi(value);
1100             break;                              /* TopCountries   */
1101         case 16:
1102             add_list_member(value, &hidden_sites, USESPACE);
1103             break;                              /* HideSite       */
1104         case 17:
1105             add_list_member(value, &hidden_urls, USESPACE);
1106             break;                              /* HideURL        */
1107         case 18:
1108             add_list_member(value, &hidden_refs, USESPACE);
1109             break;                              /* HideReferrer   */
1110         case 19:
1111             add_list_member(value, &hidden_agents, USESPACE);
1112             break;                              /* HideAgent      */
1113         case 20:
1114             add_list_member(value, &index_alias, USESPACE);
1115             break;                              /* IndexAlias     */
1116         case 21:
1117             add_list_member(value, &html_head, IGNORESPACE);
1118             break;                              /* HTMLHead       */
1119         case 22:
1120             add_list_member(value, &html_post, IGNORESPACE);
1121             break;                              /* HTMLPost       */
1122         case 23:
1123             add_list_member(value, &html_tail, IGNORESPACE);
1124             break;                              /* HTMLTail       */
1125         case 24:
1126             g_settings.settings.mangle_agent = atoi(value);
1127             break;                              /* MangleAgents   */
1128         case 25:
1129             add_list_member(value, &ignored_sites, USESPACE);
1130             break;                              /* IgnoreSite     */
1131         case 26:
1132             add_list_member(value, &ignored_urls, USESPACE);
1133             break;                              /* IgnoreURL      */
1134         case 27:
1135             add_list_member(value, &ignored_refs, USESPACE);
1136             break;                              /* IgnoreReferrer */
1137         case 28:
1138             add_list_member(value, &ignored_agents, USESPACE);
1139             break;                              /* IgnoreAgent    */
1140         case 29:
1141             ERRVPRINT(VERBOSE1, "%s\n", _("Use of \"ReallyQuiet (-Q)\" has been deprecated."));
1142             break;                              /* ReallyQuiet    */
1143         case 30:
1144             g_settings.flags.local_time = !isaffirmitive(value);
1145             break;                              /* GMTTime        */
1146         case 31:
1147             add_list_member(value, &group_urls, USESPACE);
1148             break;                              /* GroupURL       */
1149         case 32:
1150             add_list_member(value, &group_sites, USESPACE);
1151             break;                              /* GroupSite      */
1152         case 33:
1153             add_list_member(value, &group_refs, USESPACE);
1154             break;                              /* GroupReferrer  */
1155         case 34:
1156             add_list_member(value, &group_agents, USESPACE);
1157             break;                              /* GroupAgent     */
1158         case 35:
1159             g_settings.flags.shade_groups = isaffirmitive(value);
1160             break;                              /* GroupShading   */
1161         case 36:
1162             g_settings.flags.highlight_groups = isaffirmitive(value);
1163             break;                              /* GroupHighlight */
1164         case 37:
1165             g_settings.flags.incremental = isaffirmitive(value);
1166             break;                              /* Incremental    */
1167         case 38:
1168             g_settings.settings.state_filename = save_opt(value);
1169             break;                              /* State FName    */
1170         case 39:
1171             g_settings.settings.history_filename = save_opt(value);
1172             break;                              /* History FName  */
1173         case 40:
1174             g_settings.settings.html_ext = save_opt(value);
1175             break;                              /* HTML extension */
1176         case 41:
1177             add_list_member(value, &html_pre, IGNORESPACE);
1178             break;                              /* HTML Pre code  */
1179         case 42:
1180             add_list_member(value, &html_body, IGNORESPACE);
1181             break;                              /* HTML Body code */
1182         case 43:
1183             add_list_member(value, &html_end, IGNORESPACE);
1184             break;                              /* HTML End code  */
1185         case 44:
1186             g_settings.flags.use_https = isaffirmitive(value);
1187             break;                              /* Use https://   */
1188         case 45:
1189             add_list_member(value, &include_sites, USESPACE);
1190             break;                              /* IncludeSite    */
1191         case 46:
1192             add_list_member(value, &include_urls, USESPACE);
1193             break;                              /* IncludeURL     */
1194         case 47:
1195             add_list_member(value, &include_refs, USESPACE);
1196             break;                              /* IncludeReferrer */
1197         case 48:
1198             add_list_member(value, &include_agents, USESPACE);
1199             break;                              /* IncludeAgent   */
1200         case 49:
1201             add_list_member(value, &page_type, USESPACE);
1202             break;                              /* PageType       */
1203         case 50:
1204             g_settings.settings.visit_timeout = atoi(value);
1205             break;                              /* VisitTimeout   */
1206         case 51:
1207             g_settings.graphs.legend = isaffirmitive(value);
1208             break;                              /* GraphLegend    */
1209         case 52:
1210             if (atoi(value) > 0) {
1211                 g_settings.graphs.lines = true;
1212             } else {
1213                 g_settings.graphs.lines = isaffirmitive(value);
1214             }
1215             break;                              /* GraphLines     */
1216         case 53:
1217             g_settings.flags.fold_seq_err = isaffirmitive(value);
1218             break;                              /* FoldSeqErr     */
1219         case 54:
1220             g_settings.graphs.country = isaffirmitive(value);
1221             break;                              /* CountryGraph   */
1222         case 55:
1223             g_settings.top.sites_by_vol = atoi(value);
1224             break;                              /* TopKSites (KB) */
1225         case 56:
1226             g_settings.top.urls_by_vol = atoi(value);
1227             break;                              /* TopKUrls (KB)  */
1228         case 57:
1229             g_settings.top.entry = atoi(value);
1230             break;                              /* Top Entry pgs  */
1231         case 58:
1232             g_settings.top.exit = atoi(value);
1233             break;                              /* Top Exit pages */
1234         case 59:
1235             g_settings.top.search = atoi(value);
1236             break;                              /* Top Search pgs */
1237         case 60:
1238             g_settings.flags.force_log_type = true;
1239             if (strncmp(value, "auto", 4) == 0) {
1240                 g_settings.settings.log_type = LOG_AUTO;
1241                 g_settings.flags.force_log_type = false;
1242             } else if (strncmp(value, "clf", 3) == 0) {
1243                 g_settings.settings.log_type = LOG_CLF;
1244             } else if (strncmp(value, "ftp", 3) == 0) {
1245                 g_settings.settings.log_type = LOG_FTP;
1246             } else if (strncmp(value, "squid", 5) == 0) {
1247                 g_settings.settings.log_type = LOG_SQUID;
1248             } else if (strncmp(value, "combined", 8) == 0) {
1249                 g_settings.settings.log_type = LOG_COMBINED;
1250             } else if (strncmp(value, "domino", 6) == 0) {
1251                 g_settings.settings.log_type = LOG_DOMINO;
1252             } else {
1253                 ERRVPRINT(VERBOSE0, "%s %s\n", _("Unknown Log Type:"), value);
1254                 exit(1);
1255             }
1256             break;                              /* LogType        */
1257         case 61:
1258             add_list_member(value, &search_list, USESPACE);
1259             break;                              /* SearchEngine   */
1260         case 62:
1261             g_settings.settings.group_domains = atoi(value);
1262             break;                              /* GroupDomains   */
1263         case 63:
1264             g_settings.flags.hide_sites = isaffirmitive(value);
1265             break;                              /* HideAllSites   */
1266         case 64:
1267             g_settings.all.sites = isaffirmitive(value);
1268             break;                              /* All Sites?     */
1269         case 65:
1270             g_settings.all.urls = isaffirmitive(value);
1271             break;                              /* All URL's?     */
1272         case 66:
1273             g_settings.all.refs = isaffirmitive(value);
1274             break;                              /* All Refs       */
1275         case 67:
1276             g_settings.all.agents = isaffirmitive(value);
1277             break;                              /* All Agents?    */
1278         case 68:
1279             g_settings.all.search = isaffirmitive(value);
1280             break;                              /* All Srch str   */
1281         case 69:
1282             g_settings.all.users = isaffirmitive(value);
1283             break;                              /* All Users?     */
1284         case 70:
1285             g_settings.top.users = atoi(value);
1286             break;                              /* TopUsers       */
1287         case 71:
1288             add_list_member(value, &hidden_users, USESPACE);
1289             break;                              /* HideUser       */
1290         case 72:
1291             add_list_member(value, &ignored_users, USESPACE);
1292             break;                              /* IgnoreUser     */
1293         case 73:
1294             add_list_member(value, &include_users, USESPACE);
1295             break;                              /* IncludeUser    */
1296         case 74:
1297             add_list_member(value, &group_users, USESPACE);
1298             break;                              /* GroupUser      */
1299         case 75:
1300             g_settings.dump.dump_path = save_opt(value);
1301             break;                              /* DumpPath       */
1302         case 76:
1303             g_settings.dump.dump_ext = save_opt(value);
1304             break;                              /* Dumpfile ext   */
1305         case 77:
1306             g_settings.dump.header = isaffirmitive(value);
1307             break;                              /* DumpHeader?    */
1308         case 78:
1309             g_settings.dump.sites = isaffirmitive(value);
1310             break;                              /* DumpSites?     */
1311         case 79:
1312             g_settings.dump.urls = isaffirmitive(value);
1313             break;                              /* DumpURLs?      */
1314         case 80:
1315             g_settings.dump.refs = isaffirmitive(value);
1316             break;                              /* DumpReferrers? */
1317         case 81:
1318             g_settings.dump.agents = isaffirmitive(value);
1319             break;                              /* DumpAgents?    */
1320         case 82:
1321             g_settings.dump.users = isaffirmitive(value);
1322             break;                              /* DumpUsers?     */
1323         case 83:
1324             g_settings.dump.search = isaffirmitive(value);
1325             break;                              /* DumpSrchStrs?  */
1326         case 84:                               /* Disable DNSCache and DNSChildren */
1327         case 85:
1328             ERRVPRINT(VERBOSE1, "%s '%s' (%s)\n", _("Warning: Invalid keyword"), keyword, fname);
1329             break;
1330         case 86:
1331             g_settings.graphs.daily = isaffirmitive(value);
1332             break;                              /* HourlyGraph    */
1333         case 87:
1334             g_settings.stats.daily = isaffirmitive(value);
1335             break;                              /* HourlyStats    */
1336         case 88:
1337             g_settings.flags.use_geoip = isaffirmitive(value);
1338             break;                              /* Use GeoIP         */
1339         case 89:
1340             g_settings.settings.geoip_database = save_opt(value);
1341             break;                              /* GeoIP Database File */
1342         case 90:
1343             g_settings.settings.index_months = atoi(value);
1344             break;                              /* Months to Display */
1345         case 91:
1346             g_settings.graphs.index_x = atoi(value);
1347             break;                              /* Size of Main Graph X */
1348         case 92:
1349             g_settings.graphs.index_y = atoi(value);
1350             break;                              /* Size of Main Graph Y */
1351         case 93:
1352             g_settings.graphs.daily_x = atoi(value);
1353             break;                              /* Size of Daily Graph X        */
1354         case 94:
1355             g_settings.graphs.daily_y = atoi(value);
1356             break;                              /* Size of Daily Graph Y        */
1357         case 95:
1358             g_settings.graphs.hourly_x = atoi(value);
1359             break;                              /* Size of Hourly Graph X       */
1360         case 96:
1361             g_settings.graphs.hourly_y = atoi(value);
1362             break;                              /* Size of Hourly Graph Y       */
1363         case 97:
1364             g_settings.graphs.pie_x = atoi(value);
1365             break;                              /* Size of Pie Graph X  */
1366         case 98:
1367             g_settings.graphs.pie_y = atoi(value);
1368             break;                              /* Size of Pie Graph Y  */
1369         case 99:
1370             g_settings.graphs.url_by_hits = isaffirmitive(value);
1371             break;                              /* URLs by HITS Graph   */
1372         case 100:
1373             g_settings.graphs.url_by_vol = isaffirmitive(value);
1374             break;                              /* URLs by Volume Graph   */
1375         case 101:
1376             if (value[0] == 'h') {
1377                 g_settings.graphs.exit_pages = 1;
1378             }
1379             if (value[0] == 'v') {
1380                 g_settings.graphs.exit_pages = 2;
1381             }
1382             break;                              /* Top Exit Pages Pie Chart   */
1383         case 102:
1384             if (value[0] == 'h') {
1385                 g_settings.graphs.entry_pages = 1;
1386             }
1387             if (value[0] == 'v') {
1388                 g_settings.graphs.entry_pages = 2;
1389             }
1390             break;                              /* Top Entry Pages Pie Chart   */
1391         case 103:
1392             g_settings.graphs.sites_by_pages = isaffirmitive(value);
1393             break;                              /* TOP Sites by Pages Graph */
1394         case 104:
1395             g_settings.graphs.sites_by_vol = isaffirmitive(value);
1396             break;                              /* TOP Sites by Volume Graph */
1397         case 105:
1398             g_settings.graphs.agents = isaffirmitive(value);
1399             break;                              /* TOP User Agents (by pages) Pie Chart */
1400         case 106:
1401             strncpy(hit_color + 1, value, 6);
1402             break;                              /* Hit Color   (def=00805c)   106  */
1403         case 107:
1404             strncpy(file_color + 1, value, 6);
1405             break;                              /* File Color  (def=0000ff)   107  */
1406         case 108:
1407             strncpy(site_color + 1, value, 6);
1408             break;                              /* Site Color  (def=ff8000)   108  */
1409         case 109:
1410             strncpy(kbyte_color + 1, value, 6);
1411             break;                              /* Kbyte Color (def=ff0000)   109  */
1412         case 110:
1413             strncpy(page_color + 1, value, 6);
1414             break;                              /* Page Color  (def=00c0ff)   110  */
1415         case 111:
1416             strncpy(visit_color + 1, value, 6);
1417             break;                              /* Visit Color (def=ffff00)   111  */
1418         case 112:
1419             ERRVPRINT(VERBOSE1, "%s\n", _("Use of \"ColorBookM\" has been deprecated."));
1420             break;                              /* Bookm Color (def=ff00ff)   112  */
1421         case 113:
1422             strncpy(pie_color1 + 1, value, 6);
1423             break;                              /* Pie Color 1 (def=800080)   113  */
1424         case 114:
1425             strncpy(pie_color2 + 1, value, 6);
1426             break;                              /* Pie Color 2 (def=80ffc0)   114  */
1427         case 115:
1428             strncpy(pie_color3 + 1, value, 6);
1429             break;                              /* Pie Color 3 (def=ff00ff)   115  */
1430         case 116:
1431             strncpy(pie_color4 + 1, value, 6);
1432             break;                              /* Pie Color 4 (def=ffc480)   116  */
1433         case 117:
1434             add_list_member(value, &not_page_type, USESPACE);
1435             break;                              /* NotPageType       */
1436         case 118:
1437             g_settings.top.error = atoi(value);
1438             break;                              /* Top404Error       */
1439         case 119:
1440             g_settings.all.errors = isaffirmitive(value);
1441             break;                              /* All 404errors?     */
1442         case 120:
1443             add_list_member(value, &assign_country, USESPACE);
1444             break;                              /* Assign Address to Country */
1445         case 121:
1446             add_list_member(value, &hidden_agents, USESPACE);
1447             add_list_member(value, &group_agents, USESPACE);
1448             break;                              /* GroupAndHideAgent     */
1449         case 122:
1450             add_list_member(value, &hidden_sites, USESPACE);
1451             add_list_member(value, &group_sites, USESPACE);
1452             break;                              /* GroupAndHideSite      */
1453         case 123:
1454             add_list_member(value, &hidden_refs, USESPACE);
1455             add_list_member(value, &group_refs, USESPACE);
1456             break;                              /* GroupAndHideReferrer  */
1457         case 124:
1458             add_list_member(value, &hidden_urls, USESPACE);
1459             add_list_member(value, &group_urls, USESPACE);
1460             break;                              /* GroupAndHideURL       */
1461         case 125:
1462             add_list_member(value, &hidden_users, USESPACE);
1463             add_list_member(value, &group_users, USESPACE);
1464             break;                              /* GroupAndHideUser      */
1465         case 126:
1466             g_settings.dump.countries = isaffirmitive(value);
1467             break;                              /* DumpCountries?        */
1468         case 127:
1469             g_settings.dump.entry_pages = isaffirmitive(value);
1470             break;                              /* DumpEntryPages?       */
1471         case 128:
1472             g_settings.dump.exit_pages = isaffirmitive(value);
1473             break;                              /* DumpExitPages?        */
1474         case 129:
1475             g_settings.settings.css_filename = save_opt(value);
1476             break;                              /* CSS file filename     */
1477         case 130:
1478             g_settings.flags.display_yearly_subtotals = isaffirmitive(value);
1479             break;                              /* DumpExitPages?        */
1480         case 131:
1481             g_settings.flags.track_206_reqs = isaffirmitive(value);
1482             break;                              /* TrackPartialRequests? */
1483         case 132:
1484             g_settings.settings.flags_location = save_opt(value);
1485             break;                              /*  FlagsLocation        */
1486         case 133:
1487             g_settings.all.entry = isaffirmitive(value);
1488             break;                              /* Display All Entry Pages */
1489         case 134:
1490             g_settings.all.exit = isaffirmitive(value);
1491             break;                              /* Display All Exit Pages  */
1492         case 135:
1493             g_settings.flags.disable_report_file_checks = true;
1494             break;                              /* Disable Report File Checks */
1495         case 136:
1496             add_list_member(strtoupper(value), &seg_countries, IGNORESPACE);
1497             g_settings.flags.segmenting = true;
1498             g_settings.flags.segcountry = true;
1499             break;                              /* Segmenting by Country */
1500         case 137:
1501             add_list_member(value, &seg_referers, IGNORESPACE);
1502             g_settings.flags.segmenting = true;
1503             g_settings.flags.segreferer = true;
1504             break;                              /* Segmenting by Referer */
1505         case 138:
1506             g_settings.flags.ignore_index_alias = isaffirmitive(value);
1507             break;                              /* IgnoreIndexAlias      */
1508         }
1509     }
1510     if ((page_type != NULL) && (not_page_type != NULL)) {
1511         ERRVPRINT(VERBOSE0, "%s\n", _("FATAL! You may not specify both PageType and NotPageType in the config file.%s"));
1512         exit(1);
1513     }
1514 
1515     fclose(fp);
1516 }
1517 
1518 
1519 /********************************************
1520  * option_checks                            *
1521  *                                          *
1522  * Check various options for funky stuff.   *
1523  * Alert/Exit as necessary.                 *
1524  ********************************************/
1525 void
option_checks()1526 option_checks()
1527 {
1528     int max_ctry;                               /* max countries defined       */
1529     int i;
1530 
1531     /* Be polite and announce yourself... */
1532     uname(&system_info);
1533     VPRINT(VERBOSE1, "%s (%s %s) %s\n", PACKAGE_STRING, system_info.sysname, system_info.release, _("English"));
1534 
1535     /* GEOIP Checks */
1536 #if HAVE_GEOIP_H
1537     if (g_settings.flags.use_geoip) {
1538         g_settings.flags.have_geoip = true;
1539         gi = GeoIP_open(g_settings.settings.geoip_database, GEOIP_MEMORY_CACHE);
1540         if (gi == NULL) {
1541             ERRVPRINT(VERBOSE0, "%s: %s\n", _("FATAL. Unable to open the GeoIP database"), g_settings.settings.geoip_database);
1542             exit(1);
1543         }
1544         VPRINT(VERBOSE1, "%s: %s\n", _("Using GeoIP for IP Address Lookups"), g_settings.settings.geoip_database);
1545     }
1546 #endif
1547     if (g_settings.flags.use_geoip && !g_settings.flags.have_geoip) {
1548         VPRINT(VERBOSE1, "%s\n", _("GeoIP is not available in this binary. Ignoring request to use."));
1549     }
1550 
1551     if (page_type == NULL) {                    /* check if page types present     */
1552         if ((g_settings.settings.log_type == LOG_AUTO) || (g_settings.settings.log_type == LOG_CLF) || (g_settings.settings.log_type == LOG_COMBINED)
1553             || (g_settings.settings.log_type == LOG_SQUID)) {
1554             add_list_member("htm", &page_type, USESPACE);       /* if no page types specified, we  */
1555             add_list_member("html", &page_type, USESPACE);      /* use the default ones here...    */
1556             add_list_member("php", &page_type, USESPACE);
1557             if (!isinlist(page_type, (char *) g_settings.settings.html_ext))
1558                 add_list_member((char *) g_settings.settings.html_ext, &page_type, USESPACE);
1559         } else
1560             add_list_member("txt", &page_type, USESPACE);       /* FTP logs default to .txt        */
1561     }
1562 
1563     if (g_settings.flags.ignore_index_alias == false) {
1564         /* add default index. alias */
1565         add_list_member("index.", &index_alias, USESPACE);
1566     }
1567 
1568     for (max_ctry = 0; ctry[max_ctry].desc; max_ctry++);
1569     if (g_settings.top.countries > max_ctry) {
1570         g_settings.top.countries = max_ctry;    /* force upper limit */
1571     }
1572 
1573     if (g_settings.settings.log_type == LOG_FTP) {
1574         /* disable stuff for ftp logs */
1575         g_settings.top.entry = g_settings.top.exit = 0;
1576         g_settings.top.search = 0;
1577     } else {
1578         if (search_list == NULL) {
1579             /* If no search engines defined, define some :) */
1580             add_list_member("google.        q=", &search_list, USESPACE);
1581             add_list_member("yahoo.         p=", &search_list, USESPACE);
1582             add_list_member("msn.           q=", &search_list, USESPACE);
1583             add_list_member("search.aol.    query=", &search_list, USESPACE);
1584             add_list_member("altavista.com  q=", &search_list, USESPACE);
1585             add_list_member("netscape.com   query=", &search_list, USESPACE);
1586             add_list_member("ask.com        q=", &search_list, USESPACE);
1587             add_list_member("alltheweb.com  query=", &search_list, USESPACE);
1588             add_list_member("lycos.com      query=", &search_list, USESPACE);
1589             add_list_member("hotbot.        query=", &search_list, USESPACE);
1590             add_list_member("mamma.com      query=", &search_list, USESPACE);
1591             add_list_member("search.        q=", &search_list, USESPACE);       /* Generic Catchall... */
1592         }
1593     }
1594 
1595     /* ensure entry/exits don't exceed urls */
1596     i = (g_settings.top.urls > g_settings.top.urls_by_vol) ? g_settings.top.urls : g_settings.top.urls_by_vol;
1597     if (g_settings.top.entry > i)
1598         g_settings.top.entry = i;
1599     if (g_settings.top.exit > i)
1600         g_settings.top.exit = i;
1601 
1602 }
1603 
1604 /*********************************************/
1605 /* SAVE_OPT - save option from config file   */
1606 /*********************************************/
1607 
1608 static char *
save_opt(char * str)1609 save_opt(char *str)
1610 {
1611     char *cp1;
1612     size_t string_length;
1613 
1614     string_length = strlen(str);
1615     cp1 = XMALLOC(char, string_length + 1);
1616 
1617     strlcpy(cp1, str, string_length + 1);
1618 
1619     return cp1;
1620 }
1621 
1622 /*********************************************/
1623 /* CLEAR_MONTH - initalize monthly stuff     */
1624 /*********************************************/
1625 
1626 void
clear_month(void)1627 clear_month(void)
1628 {
1629     int i;
1630 
1631     init_counters();                            /* reset monthly counters  */
1632     del_htabs();                                /* clear hash tables       */
1633     if (g_settings.top.countries != 0) {
1634         for (i = 0; i < g_settings.top.countries; i++) {
1635             top_ctrys[i] = NULL;
1636         }
1637     }
1638 }
1639 
1640 /*********************************************/
1641 /* INIT_COUNTERS - prep counters for use     */
1642 /*********************************************/
1643 
1644 void
init_counters(void)1645 init_counters(void)
1646 {
1647     int i;
1648 
1649     memset(&g_counters, 0, sizeof(g_counters));
1650 
1651     for (i = 0; i < TOTAL_RC; i++)
1652         response[i].count = 0;
1653     for (i = 0; ctry[i].desc; i++) {            /* country totals */
1654         ctry[i].count = 0;
1655         ctry[i].files = 0;
1656         ctry[i].xfer = 0;
1657         ctry[i].pages = 0;
1658     }
1659     mh_hit = 0;
1660     g_counters.month.first_day = 0;
1661     g_counters.month.last_day = 0;
1662 }
1663 
1664 static void
init_run_counters(void)1665 init_run_counters(void)
1666 {
1667     memset(&g_run_counters, 0, sizeof(g_run_counters));
1668 }
1669 
1670 
1671 static void
process_end_of_month(void)1672 process_end_of_month(void)
1673 {
1674     g_counters.month.visit = tot_visit(sm_htab);
1675     g_counters.generic.bad_month += g_run_counters.bad_run;
1676     g_counters.generic.ignored_month += g_run_counters.ignored_run;
1677     update_history_array();
1678     month_update_exit(req_tstamp);              /* process exit pages      */
1679 
1680     write_month_html();                         /* generate HTML for month */
1681     /* Update Grand Total Bad/Ignore Counters, before zeroing the actual counters. */
1682 //    clear_month();
1683 
1684 }
1685 
1686 /*********************************************/
1687 /* CURRENT_TIME - return date/time as a string   */
1688 /*********************************************/
1689 
1690 char *
current_time(void)1691 current_time(void)
1692 {
1693     /* get system time */
1694     now = time(NULL);
1695     /* convert to timestamp string */
1696     if (g_settings.flags.local_time) {
1697         strftime(timestamp, sizeof(timestamp), "%d-%b-%Y %H:%M %Z", localtime(&now));
1698     } else {
1699         strftime(timestamp, sizeof(timestamp), "%d-%b-%Y %H:%M GMT", gmtime(&now));
1700     }
1701 
1702     return timestamp;
1703 }
1704 
1705 /*********************************************/
1706 /* ISURLCHAR - checks for valid URL chars    */
1707 /*********************************************/
1708 
1709 int
isurlchar(unsigned char ch)1710 isurlchar(unsigned char ch)
1711 {
1712     if (isalnum((int) ch))
1713         return 1;                               /* allow letters, numbers...    */
1714     if (ch > 127)
1715         return 1;                               /* allow extended chars...      */
1716     return (strchr(":/\\.,' *-+_@~()[]$", ch) != NULL); /* and a few special ones */
1717 }
1718 
1719 /*********************************************/
1720 /* CTRY_IDX - create unique # from domain    */
1721 /*********************************************/
1722 
1723 unsigned long
ctry_idx(char * str)1724 ctry_idx(char *str)
1725 {
1726     int i = strlen(str), j = 0;
1727     unsigned long idx = 0;
1728     char *cp1 = str + i;
1729 
1730     for (; i > 0; i--) {
1731         idx += ((*--cp1 - 'a' + 1) << j);
1732         j += 5;
1733     }
1734     return idx;
1735 }
1736 
1737 /*********************************************/
1738 /* FROM_HEX - convert hex char to decimal    */
1739 /*********************************************/
1740 
1741 char
from_hex(char c)1742 from_hex(char c)
1743 {                                               /* convert hex to dec      */
1744     c = (c >= '0' && c <= '9') ? c - '0' :      /* 0-9?                    */
1745         (c >= 'A' && c <= 'F') ? c - 'A' + 10 : /* A-F?                    */
1746         c - 'a' + 10;                           /* lowercase...            */
1747     return (c < 0 || c > 15) ? 0 : c;           /* return 0 if bad...      */
1748 }
1749 
1750 /*********************************************/
1751 /* UNESCAPE - convert escape seqs to chars   */
1752 /*********************************************/
1753 
1754 char *
unescape(char * str)1755 unescape(char *str)
1756 {
1757     unsigned char *cp1 = (unsigned char *) str; /* force unsigned so we    */
1758     unsigned char *cp2 = (unsigned char *) str; /* can do > 127            */
1759 
1760     if (!str)
1761         return NULL;                            /* make sure strings valid */
1762 
1763     /* for apache log's escape code. */
1764     while (*cp1) {
1765 	if (*cp1 == '\\' && *(cp1 + 1) == 'x' &&
1766 	    isxdigit(*(cp1 + 2)) && isxdigit(*(cp1 + 3))) {
1767 	    *cp2 = from_hex(*(cp1 + 2)) * 16 + from_hex(*(cp1 + 3));
1768 	    if ((*cp2 < 32) || (*cp2 == 127))
1769 		*cp2 = '_';
1770 	    cp1 += 4;
1771 	    cp2++;
1772 	} else if (*cp1 == '\\' && *(cp1 + 1) == '\\') {
1773 	    *cp2 = '\\';
1774 	    cp1 += 2;
1775 	    cp2++;
1776 	} else {
1777 	    *cp2++ = *cp1++;
1778 	}
1779     }
1780     *cp2 = *cp1;
1781     cp1 = cp2 = str;
1782 
1783     while (*cp1) {
1784         if (*cp1 == '%') {                      /* Found an escape?        */
1785             cp1++;
1786             if (isxdigit(*cp1)) {               /* ensure a hex digit      */
1787                 if (*cp1)
1788                     *cp2 = from_hex(*cp1++) * 16;       /* convert hex to an ascii */
1789                 if (*cp1)
1790                     *cp2 += from_hex(*cp1);     /* (hopefully) character   */
1791                 if ((*cp2 < 32) || (*cp2 == 127))
1792                     *cp2 = '_';                 /* make '_' if its bad   */
1793                 if (*cp1)
1794                     cp2++;
1795                 cp1++;
1796             } else
1797                 *cp2++ = '%';
1798         } else
1799             *cp2++ = *cp1++;                    /* if not, just continue   */
1800     }
1801     *cp2 = *cp1;                                /* don't forget terminator */
1802     return str;                                 /* return the string       */
1803 }
1804 
1805 #ifdef HAVE_ICONV
1806 
1807 /*********************************************/
1808 /* SCORE_XXX - calculate score               */
1809 /*********************************************/
1810 
score_eucj(unsigned char * str)1811 int score_eucj(unsigned char *str)
1812 {
1813     int stat=0;
1814     int score=0;
1815     int bad=0;
1816     if(str==NULL) return -1;
1817 
1818     for(; *str!=0;str++){
1819         switch(stat){
1820         case 0:
1821             if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
1822             else if(*str >= 0xa1 && *str <= 0xfe) stat=1; //KANJI(1)
1823             else if(*str == 0x8f); // HOJYO KANJI
1824             else if(*str == 0x8e) stat=2; // KANA
1825             else if(*str < 0x20); //CTRL
1826             else bad=1;
1827             break;
1828         case 1:
1829             if(*str >= 0xa1 && *str <= 0xfe) score += 2; //KANJI(2)
1830             else bad=1;
1831             stat=0;
1832             break;
1833         case 2:
1834             if(*str >= 0xa1 && *str <= 0xdf); //hankaku <- 0
1835             else  bad=1;
1836             stat=0;
1837             break;
1838         }
1839     }
1840     if(bad != 0) score = -1;
1841     return score;
1842 }
1843 
score_sjis(unsigned char * str)1844 int score_sjis(unsigned char *str)
1845 {
1846     int stat=0;
1847     int score=0;
1848     int bad=0;
1849     if(str==NULL) return -1;
1850 
1851     for(; *str != 0; str++){
1852         switch(stat){
1853         case 0:
1854             if(*str>= 0x20 && *str <= 0x7e) score++;//ASCII
1855             else if((*str >= 0x81 && *str <= 0x9f) ||
1856                     (*str >= 0xe0 && *str <= 0xfc)) stat=1; //SJIS(1)
1857             else if(*str >= 0xa1 && *str <= 0xdf); // KANA
1858             else if(*str < 0x20); // CTRL
1859             else bad=1;
1860             break;
1861         case 1:
1862             if((*str >= 0x40 && *str <= 0x7e) ||
1863                (*str >= 0x80 && *str <= 0xfc)) score += 2; //SJIS(2)
1864             else bad=1;
1865             stat=0;
1866             break;
1867         }
1868     }
1869     if(bad != 0) score = -1;
1870     return score;
1871 }
1872 
score_utf8(unsigned char * str)1873 int score_utf8(unsigned char *str)
1874 {
1875     int stat=0;
1876     int score=0;
1877     int bad=0;
1878     if(str==NULL) return -1;
1879 
1880     for(; *str != 0; str++){
1881         switch(stat){
1882         case 0:
1883             if(*str>= 0x20 && *str <= 0x7e) score++; //ASCII
1884             else if(*str >= 0xc0 && *str <= 0xdf) stat=1; //greek etc.
1885             else if(*str >= 0xe0 && *str <= 0xef) stat=2; //KANJI etc.
1886             else if(*str >= 0xf0 && *str <= 0xf7) stat=4;
1887             else if(*str < 0x20); //CTRL
1888             else bad=1;
1889             break;
1890         case 1:
1891             if(*str >= 0x80 && *str <= 0xbf) score++;
1892             else bad=1;
1893             stat=0;
1894             break;
1895         case 2:
1896             if(*str >= 0x80 && *str <= 0xbf) stat=3; //KANJI(2)
1897             else {bad=1; stat=0;}
1898             break;
1899         case 3:
1900             if(*str >= 0x80 && *str <= 0xbf) score+=3; //KANJI(3)
1901             else bad=1;
1902             stat=0;
1903             break;
1904         case 4:
1905         case 5:
1906             if(*str >= 0x80 && *str <= 0xbf) stat++;
1907             else {bad=1; stat=0;}
1908             break;
1909         case 6:
1910             if(*str >= 0x80 && *str <= 0xbf) score+=4;
1911             else bad=1;
1912             stat=0;
1913             break;
1914         }
1915     }
1916     if(bad != 0) score = -1;
1917     return score;
1918 }
1919 
1920 #endif
1921 
1922 /*********************************************/
1923 /* SRCH_STRING - get search strings from ref */
1924 /*********************************************/
1925 void
srch_string(char * refer,char * ptr)1926 srch_string(char *refer, char *ptr)
1927 {
1928     unsigned char tmpbuf[BUFSIZE];
1929     unsigned char srch[80] = "";
1930     unsigned char *cp1, *cp2, *cps;
1931     int sp_flg = 0;
1932 #ifdef HAVE_ICONV
1933     int sjis, eucj, utf8;
1934     unsigned char tmpbuf2[BUFSIZE];
1935     unsigned char *cp3;
1936     size_t inlen, outlen;
1937 #endif
1938 
1939     /* Check if search engine referrer or return  */
1940     if ((cps = isinlist(search_list, refer)) == NULL)
1941         return;
1942 
1943     /* Try to find query variable */
1944     srch[0] = '?';
1945     srch[sizeof(srch) - 1] = '\0';
1946     strcpy(&srch[1], cps);                      /* First, try "?..."      */
1947     if ((cp1 = strstr(ptr, srch)) == NULL) {
1948         srch[0] = '&';                          /* Next, try "&..."       */
1949         if ((cp1 = strstr(ptr, srch)) == NULL)
1950             return;                             /* If not found, split... */
1951     }
1952     cp2 = tmpbuf;
1953 
1954     while (*cp1 != '=' && *cp1 != '\0') {
1955         cp1++;
1956     }
1957     if (*cp1 != '\0') {
1958         cp1++;
1959     }
1960 
1961     while (*cp1 != '&' && *cp1 != '\0') {
1962         if (*cp1 == '"' || *cp1 == ',' || *cp1 == '?') {
1963             cp1++;
1964             continue;
1965         } /* skip bad ones..    */
1966         else {
1967             if (*cp1 == '+')
1968                 *cp1 = ' ';                     /* change + to space  */
1969             if (sp_flg && *cp1 == ' ') {
1970                 cp1++;
1971                 continue;
1972             }                                   /* compress spaces    */
1973             if (*cp1 == ' ')
1974                 sp_flg = 1;
1975             else
1976                 sp_flg = 0;                     /* (flag spaces here) */
1977             *cp2++ = tolower(*cp1);             /* normal character   */
1978             cp1++;
1979         }
1980     }
1981     *cp2 = '\0';
1982     cp2 = tmpbuf;
1983     if (tmpbuf[0] == '?')
1984         tmpbuf[0] = ' ';                        /* format fix ?       */
1985     while (*cp2 != 0 && isspace(*cp2))
1986         cp2++;                                  /* skip leading sps.  */
1987     if (*cp2 == '\0')
1988         return;
1989 
1990     /* any trailing spaces? */
1991     cp1 = cp2 + strlen(cp2) - 1;
1992     while (cp1 != cp2)
1993         if (isspace(*cp1))
1994             *cp1-- = '\0';
1995         else
1996             break;
1997 
1998 #ifdef HAVE_ICONV
1999     utf8 = score_utf8(cp2);
2000     sjis = score_sjis(cp2);
2001     eucj = score_eucj(cp2);
2002     if (sjis > utf8 && sjis > eucj) {
2003 	iconv(cd_from_sjis, NULL, 0, NULL, 0);
2004 	cp3 = cp2;
2005 	inlen = strlen(cp2) + 1;
2006 	cp1 = tmpbuf2;
2007 	outlen = sizeof(tmpbuf2);
2008 	if (iconv(cd_from_sjis,
2009 		  (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
2010 	    inlen == 0) {
2011 	    cp2 = tmpbuf2;
2012 	}
2013     } else if (eucj > utf8 && eucj > sjis) {
2014 	iconv(cd_from_eucj, NULL, 0, NULL, 0);
2015 	cp3 = cp2;
2016 	inlen = strlen(cp2) + 1;
2017 	cp1 = tmpbuf2;
2018 	outlen = sizeof(tmpbuf2);
2019 	if (iconv(cd_from_eucj,
2020 		  (const char **)&cp3, &inlen, (char**)&cp1, &outlen) >= 0 &&
2021 	    inlen == 0) {
2022 	    cp2 = tmpbuf2;
2023 	}
2024     }
2025 #endif
2026 
2027     /* strip invalid chars */
2028     cp1 = cp2;
2029     while (*cp1 != '\0') {
2030         if (((*cp1 > 0) && (*cp1 < 32)) || (*cp1 == 127))
2031             *cp1 = '_';
2032         cp1++;
2033     }
2034 
2035     if (put_snode(cp2, (unsigned long) 1, sr_htab)) {
2036         /* Error adding search string node, skipping .... */
2037         ERRVPRINT(VERBOSE1, "%s %s\n", _("Error adding Search String Node, skipping"), tmpbuf);
2038     }
2039     return;
2040 }
2041 
2042 /*********************************************/
2043 /* GET_DOMAIN - Get domain portion of host   */
2044 /*********************************************/
2045 
2046 char *
get_domain(char * str)2047 get_domain(char *str)
2048 {
2049     char *cp;
2050     int i = g_settings.settings.group_domains + 1;
2051 
2052     cp = str + strlen(str) - 1;
2053     if (isdigit((int) *cp))
2054         return NULL;                            /* ignore IP addresses */
2055 
2056     while (cp != str) {
2057         if (*cp == '.')
2058             if (!(--i))
2059                 return ++cp;
2060         cp--;
2061     }
2062     return cp;
2063 }
2064 
2065 /*********************************************/
2066 /* OUR_GZGETS - enhanced gzgets for log only */
2067 /*********************************************/
2068 
2069 char *
our_gzgets(gzFile fp,char * buf,int size)2070 our_gzgets(gzFile fp, char *buf, int size)
2071 {
2072     char *out_cp = buf;                         /* point to output */
2073 
2074     while (1) {
2075         if (f_cp > (f_buf + f_end - 1)) {       /* load? */
2076             f_end = gzread(fp, f_buf, GZ_BUFSIZE);
2077             if (f_end <= 0)
2078                 return Z_NULL;
2079             f_cp = f_buf;
2080         }
2081 
2082         if (--size) {                           /* more? */
2083             *out_cp++ = *f_cp;
2084             if (*f_cp++ == '\n') {
2085                 *out_cp = '\0';
2086                 return buf;
2087             }
2088         } else {
2089             *out_cp = '\0';
2090             return buf;
2091         }
2092     }
2093 }
2094 
2095 /*****************************************************************/
2096 /*                                                               */
2097 /* JDATE  - Julian date calculator                               */
2098 /*                                                               */
2099 /* Calculates the number of days since Jan 1, 0000.              */
2100 /*                                                               */
2101 /* Originally written by Bradford L. Barrett (03/17/1988)        */
2102 /* Returns an unsigned long value representing the number of     */
2103 /* days since January 1, 0000.                                   */
2104 /*                                                               */
2105 /* Note: Due to the changes made by Pope Gregory XIII in the     */
2106 /*       16th Centyry (Feb 24, 1582), dates before 1583 will     */
2107 /*       not return a truely accurate number (will be at least   */
2108 /*       10 days off).  Somehow, I don't think this will         */
2109 /*       present much of a problem for most situations :)        */
2110 /*                                                               */
2111 /* Usage: days = jdate(day, month, year)                         */
2112 /*                                                               */
2113 /* The number returned is adjusted by 5 to facilitate day of     */
2114 /* week calculations.  The mod of the returned value gives the   */
2115 /* day of the week the date is.  (ie: dow = days % 7 ) where     */
2116 /* dow will return 0=Sunday, 1=Monday, 2=Tuesday, etc...         */
2117 /*                                                               */
2118 /*****************************************************************/
2119 
2120 unsigned long
jdate(int day,int month,int year)2121 jdate(int day, int month, int year)
2122 {
2123     unsigned long days;                         /* value returned */
2124     int mtable[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
2125 
2126     /* First, calculate base number including leap and Centenial year stuff */
2127     days = (((unsigned long) year * 365) + day + mtable[month - 1] + ((year + 4) / 4) - ((year / 100) - (year / 400)));
2128 
2129     /* now adjust for leap year before March 1st */
2130     if ((year % 4 == 0) && !((year % 100 == 0) && (year % 400 != 0)) && (month < 3))
2131         --days;
2132 
2133     /* done, return with calculated value */
2134 
2135     return (days + 5);
2136 }
2137 
2138 
2139 /************************************************************************
2140  * do_agent_mangling                                                    *
2141  *                                                                      *
2142  * Tries to reduce a complex Agent string down to a simpler level.      *
2143  *                                                                      *
2144  * Arguments:                                                           *
2145  * char *agent The Agent to reduce. This function will "mangle" this!   *
2146  *                                                                      *
2147  * Returns:                                                             *
2148  * int. 0 on Success.                                                   *
2149  *                                                                      *
2150  * TODO: Return something else if fails!                                *
2151  * TODO: Do this nicer in PCRE or equiv.                                *
2152  ************************************************************************/
2153 int
do_agent_mangling(char * agent)2154 do_agent_mangling(char *agent)
2155 {
2156     char *cp1, *cp2;                            /* generic char pointers       */
2157     char *agent_start;                          /* Start and End of the Agent string - mainly bounds checking */
2158     char *agent_end;
2159 
2160     agent_start = cp2 = agent;
2161     agent_end = agent + sizeof(agent) - 2;
2162 
2163     cp1 = strstr(agent_start, "ompatible");     /* check known fakers */
2164     if (cp1 != NULL) {
2165         while (*cp1 != ';' && *cp1 != '\0' && (cp1 < agent_end)) {
2166             cp1++;
2167         }
2168         /* kludge for Mozilla/3.01 (compatible;) */
2169         if (*cp1++ == ';' && strcmp(cp1, ")\"")) {      /* success! */
2170             while (*cp1 == ' ' && (cp1 < agent_end)) {
2171                 cp1++;                          /* eat spaces */
2172             }
2173             while (*cp1 != '.' && *cp1 != '\0' && *cp1 != ';' && (cp1 < agent_end) && (cp2 < agent_end)) {
2174                 *cp2++ = *cp1++;
2175             }
2176             if (g_settings.settings.mangle_agent < 5) {
2177                 while (*cp1 != '.' && *cp1 != ';' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end)) {
2178                     *cp2++ = *cp1++;
2179                 }
2180                 if (*cp1 != ';' && *cp1 != '\0' && (cp1 < agent_end - 2) && (cp2 < agent_end - 2)) {
2181                     *cp2++ = *cp1++;
2182                     *cp2++ = *cp1++;
2183                 }
2184             }
2185             if (g_settings.settings.mangle_agent < 4) {
2186                 if (*cp1 >= '0' && *cp1 <= '9' && (cp1 < agent_end) && (cp2 < agent_end)) {
2187                     *cp2++ = *cp1++;
2188                 }
2189             }
2190             if (g_settings.settings.mangle_agent < 3) {
2191                 while (*cp1 != ';' && *cp1 != '\0' && *cp1 != '(' && (cp1 < agent_end) && (cp2 < agent_end)) {
2192                     *cp2++ = *cp1++;
2193                 }
2194             }
2195             if (g_settings.settings.mangle_agent < 2) {
2196                 /* Level 1 - try to get OS */
2197                 cp1 = strstr(agent_start, ")");
2198                 if (cp1 != NULL) {
2199                     *cp2++ = ' ';
2200                     *cp2++ = '(';
2201                     while (*cp1 != ';' && *cp1 != '(' && cp1 > agent_start) {
2202                         cp1--;
2203                     }
2204                     if (cp1 != agent_start && *cp1 != '\0' && (cp1 < agent_end)) {
2205                         cp1++;
2206                     }
2207                     while (*cp1 == ' ' && *cp1 != '\0' && (cp1 < agent_end)) {
2208                         cp1++;
2209                     }
2210                     while (*cp1 != ')' && *cp1 != '\0' && cp1 > cp2 && (cp1 < agent_end) && (cp2 < agent_end)) {
2211                         *cp2++ = *cp1++;
2212                     }
2213                     *cp2++ = ')';
2214                 }
2215             }
2216             *cp2 = '\0';
2217         } else {                                /* nothing after "compatible", should we mangle? */
2218             /* not for now */
2219         }
2220     } else {
2221         cp1 = strstr(agent_start, "Opera");     /* Opera flavor         */
2222         if (cp1 != NULL) {
2223             while (*cp1 != '/' && *cp1 != ' ' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end)) {
2224                 *cp2++ = *cp1++;
2225             }
2226             while (*cp1 != '.' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end)) {
2227                 *cp2++ = *cp1++;
2228             }
2229             if (g_settings.settings.mangle_agent < 5) {
2230                 while (*cp1 != '.' && *cp1 != '\0' && (cp1 < agent_end - 2) && (cp2 < agent_end - 2)) {
2231                     *cp2++ = *cp1++;
2232                 }
2233                 *cp2++ = *cp1++;
2234                 *cp2++ = *cp1++;
2235             }
2236             if (g_settings.settings.mangle_agent < 4)
2237                 if (*cp1 >= '0' && *cp1 <= '9' && (cp1 < agent_end) && (cp2 < agent_end)) {
2238                     *cp2++ = *cp1++;
2239                 }
2240             if (g_settings.settings.mangle_agent < 3)
2241                 while (*cp1 != ' ' && *cp1 != '\0' && *cp1 != '(' && (cp1 < agent_end) && (cp2 < agent_end)) {
2242                     *cp2++ = *cp1++;
2243                 }
2244             if (g_settings.settings.mangle_agent < 2) {
2245                 cp1 = strstr(agent_start, "(");
2246                 if (cp1 != NULL) {
2247                     cp1++;
2248                     *cp2++ = ' ';
2249                     *cp2++ = '(';
2250                     while (*cp1 != ';' && *cp1 != ')' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end - 1)) {
2251                         *cp2++ = *cp1++;
2252                     }
2253                     *cp2++ = ')';
2254                 }
2255             }
2256             *cp2 = '\0';
2257         } else {
2258             cp1 = strstr(agent_start, "Mozilla");       /* Netscape flavor      */
2259             if (cp1 != NULL) {
2260                 while (*cp1 != '/' && *cp1 != ' ' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end)) {
2261                     *cp2++ = *cp1++;
2262                 }
2263                 if (*cp1 == ' ') {
2264                     *cp1 = '/';
2265                 }
2266                 while (*cp1 != '.' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end)) {
2267                     *cp2++ = *cp1++;
2268                 }
2269                 if (g_settings.settings.mangle_agent < 5) {
2270                     while (*cp1 != '.' && *cp1 != '\0' && (cp1 < agent_end - 2) && (cp2 < agent_end - 2)) {
2271                         *cp2++ = *cp1++;
2272                     }
2273                     *cp2++ = *cp1++;
2274                     *cp2++ = *cp1++;
2275                 }
2276                 if (g_settings.settings.mangle_agent < 4)
2277                     if (*cp1 >= '0' && *cp1 <= '9') {
2278                         *cp2++ = *cp1++;
2279                     }
2280                 if (g_settings.settings.mangle_agent < 3) {
2281                     while (*cp1 != ' ' && *cp1 != '\0' && *cp1 != '(' && (cp1 < agent_end) && (cp2 < agent_end)) {
2282                         *cp2++ = *cp1++;
2283                     }
2284                 }
2285                 if (g_settings.settings.mangle_agent < 2) {
2286                     /* Level 1 - Try to get OS */
2287                     cp1 = strstr(agent_start, "(");
2288                     if (cp1 != NULL) {
2289                         cp1++;
2290                         *cp2++ = ' ';
2291                         *cp2++ = '(';
2292                         while (*cp1 != ';' && *cp1 != ')' && *cp1 != '\0' && (cp1 < agent_end) && (cp2 < agent_end - 1)) {
2293                             *cp2++ = *cp1++;
2294                         }
2295                         *cp2++ = ')';
2296                     }
2297                 }
2298                 *cp2 = '\0';
2299             }
2300         }
2301     }
2302     return (0);
2303 }
2304 
2305 
2306 /************************************************************************
2307  * response_code_index                                                  *
2308  *                                                                      *
2309  * Returns the index for a given response code                          *
2310  *                                                                      *
2311  * Arguments:                                                           *
2312  * int resp_code: The response code to have the index discovered for.   *
2313  *                                                                      *
2314  * Returns:                                                             *
2315  * int. Response Code Index                                             *
2316  ************************************************************************/
2317 int
response_code_index(int resp_code)2318 response_code_index(int resp_code)
2319 {
2320     int i;
2321 
2322     switch (resp_code) {
2323     case RC_CONTINUE:
2324         i = IDX_CONTINUE;
2325         break;
2326     case RC_SWITCHPROTO:
2327         i = IDX_SWITCHPROTO;
2328         break;
2329     case RC_OK:
2330         i = IDX_OK;
2331         break;
2332     case RC_CREATED:
2333         i = IDX_CREATED;
2334         break;
2335     case RC_ACCEPTED:
2336         i = IDX_ACCEPTED;
2337         break;
2338     case RC_NONAUTHINFO:
2339         i = IDX_NONAUTHINFO;
2340         break;
2341     case RC_NOCONTENT:
2342         i = IDX_NOCONTENT;
2343         break;
2344     case RC_RESETCONTENT:
2345         i = IDX_RESETCONTENT;
2346         break;
2347     case RC_PARTIALCONTENT:
2348         i = IDX_PARTIALCONTENT;
2349         break;
2350     case RC_MULTIPLECHOICES:
2351         i = IDX_MULTIPLECHOICES;
2352         break;
2353     case RC_MOVEDPERM:
2354         i = IDX_MOVEDPERM;
2355         break;
2356     case RC_MOVEDTEMP:
2357         i = IDX_MOVEDTEMP;
2358         break;
2359     case RC_SEEOTHER:
2360         i = IDX_SEEOTHER;
2361         break;
2362     case RC_NOMOD:
2363         i = IDX_NOMOD;
2364         break;
2365     case RC_USEPROXY:
2366         i = IDX_USEPROXY;
2367         break;
2368     case RC_MOVEDTEMPORARILY:
2369         i = IDX_MOVEDTEMPORARILY;
2370         break;
2371     case RC_BAD:
2372         i = IDX_BAD;
2373         break;
2374     case RC_UNAUTH:
2375         i = IDX_UNAUTH;
2376         break;
2377     case RC_PAYMENTREQ:
2378         i = IDX_PAYMENTREQ;
2379         break;
2380     case RC_FORBIDDEN:
2381         i = IDX_FORBIDDEN;
2382         break;
2383     case RC_NOTFOUND:
2384         i = IDX_NOTFOUND;
2385         break;
2386     case RC_METHODNOTALLOWED:
2387         i = IDX_METHODNOTALLOWED;
2388         break;
2389     case RC_NOTACCEPTABLE:
2390         i = IDX_NOTACCEPTABLE;
2391         break;
2392     case RC_PROXYAUTHREQ:
2393         i = IDX_PROXYAUTHREQ;
2394         break;
2395     case RC_TIMEOUT:
2396         i = IDX_TIMEOUT;
2397         break;
2398     case RC_CONFLICT:
2399         i = IDX_CONFLICT;
2400         break;
2401     case RC_GONE:
2402         i = IDX_GONE;
2403         break;
2404     case RC_LENGTHREQ:
2405         i = IDX_LENGTHREQ;
2406         break;
2407     case RC_PREFAILED:
2408         i = IDX_PREFAILED;
2409         break;
2410     case RC_REQENTTOOLARGE:
2411         i = IDX_REQENTTOOLARGE;
2412         break;
2413     case RC_REQURITOOLARGE:
2414         i = IDX_REQURITOOLARGE;
2415         break;
2416     case RC_UNSUPMEDIATYPE:
2417         i = IDX_UNSUPMEDIATYPE;
2418         break;
2419     case RC_RNGNOTSATISFIABLE:
2420         i = IDX_RNGNOTSATISFIABLE;
2421         break;
2422     case RC_EXPECTATIONFAILED:
2423         i = IDX_EXPECTATIONFAILED;
2424         break;
2425     case RC_SERVERERR:
2426         i = IDX_SERVERERR;
2427         break;
2428     case RC_NOTIMPLEMENTED:
2429         i = IDX_NOTIMPLEMENTED;
2430         break;
2431     case RC_BADGATEWAY:
2432         i = IDX_BADGATEWAY;
2433         break;
2434     case RC_UNAVAIL:
2435         i = IDX_UNAVAIL;
2436         break;
2437     case RC_GATEWAYTIMEOUT:
2438         i = IDX_GATEWAYTIMEOUT;
2439         break;
2440     case RC_BADHTTPVER:
2441         i = IDX_BADHTTPVER;
2442         break;
2443     default:
2444         i = IDX_UNDEFINED;
2445         break;
2446     }
2447     return (i);
2448 }
2449 
2450 
2451 /************************************************************************
2452  * cleanup_host                                                         *
2453  *                                                                      *
2454  * Does what the name says, given a host field, remove all junk and     *
2455  *  clean it.                                                           *
2456  *                                                                      *
2457  * Arguments:                                                           *
2458  * char *hostname: The hostname. Is modified by this function.          *
2459  *                                                                      *
2460  * Returns:                                                             *
2461  * int. 1 on failure, 0 on success.                                     *
2462  ************************************************************************/
2463 int
cleanup_host(char * hostname)2464 cleanup_host(char *hostname)
2465 {
2466     if (hostname[0] == '\0') {
2467         /* Catch blank hostnames */
2468         strncpy(hostname, _("Unknown"), strlen(_("Unknown")));
2469     } else {
2470         strtolower(hostname);
2471     }
2472     return (0);
2473 }
2474 
2475 
2476 /************************************************************************
2477  * cleanup_user                                                         *
2478  *                                                                      *
2479  * Does what the name says, given a user/ident field, remove all junk   *
2480  *  and clean it.                                                       *
2481  *                                                                      *
2482  * Arguments:                                                           *
2483  * char *ident: The user/ident field. Is modified by this function.     *
2484  *                                                                      *
2485  * Returns:                                                             *
2486  * int. 1 on failure, 0 on success.                                     *
2487  ************************************************************************/
2488 int
cleanup_user(char * ident)2489 cleanup_user(char *ident)
2490 {
2491     char *cp1;                                  /* generic char pointer */
2492 
2493     /* fix username if needed */
2494     if (ident[0] == 0) {
2495         ident[0] = '-';
2496         ident[1] = '\0';
2497     } else {
2498         cp1 = ident;
2499         while (*cp1 >= 32 && *cp1 != '"') {
2500             cp1++;
2501         }
2502         *cp1 = '\0';
2503     }
2504     /* unescape user name */
2505     unescape(ident);
2506 
2507     return (0);
2508 }
2509 
2510 
2511 /************************************************************************
2512  * cleanup_url                                                          *
2513  *                                                                      *
2514  * Does what the name says, given a URL, remove all junk and clean it   *
2515  *                                                                      *
2516  * Arguments:                                                           *
2517  * char *url: The URL. Is modified by this function.                    *
2518  *                                                                      *
2519  * Returns:                                                             *
2520  * int. 1 on failure, 0 on success.                                     *
2521  ************************************************************************/
2522 int
cleanup_url(char * url)2523 cleanup_url(char *url)
2524 {
2525     char *cp1;                                  /* generic char pointers       */
2526     LISTPTR lptr;                               /* generic list pointer        */
2527 
2528     /* un-escape URL */
2529     unescape(url);
2530 
2531     if (g_settings.flags.ignore_index_alias == false) {
2532         /* strip query portion of cgi scripts */
2533         cp1 = url;
2534         while (*cp1 != '\0')
2535             if (!isurlchar(*cp1)) {
2536                 *cp1 = '\0';
2537                 break;
2538             } else
2539                 cp1++;
2540         if (url[0] == '\0') {
2541             url[0] = '/';
2542             url[1] = '\0';
2543         }
2544 
2545         /* strip off index.html (or any aliases) */
2546         lptr = index_alias;
2547         while (lptr != NULL) {
2548             if ((cp1 = strstr(url, lptr->string)) != NULL) {
2549                 if ((cp1 == url) || (*(cp1 - 1) == '/')) {
2550                     *cp1 = '\0';
2551                     if (url[0] == '\0') {
2552                         url[0] = '/';
2553                         url[1] = '\0';
2554                     }
2555                     break;
2556                 }
2557             }
2558             lptr = lptr->next;
2559         }
2560     }
2561     return (0);
2562 }
2563 
2564 
2565 /************************************************************************
2566  * cleanup_refer                                                        *
2567  *                                                                      *
2568  * Does what the name says, given a refer field, remove all junk and    *
2569  *  clean it.                                                           *
2570  * Will also extract a search string if appropriate.                    *
2571  *                                                                      *
2572  * Arguments:                                                           *
2573  * char *refer: The Referer Field. Is modified by this function.        *
2574  * char *srchstr: The search string, which *may* be extracted           *
2575  *                                                                      *
2576  * Returns:                                                             *
2577  * int. 1 on failure, 0 on success.                                     *
2578  ************************************************************************/
2579 int
cleanup_refer(char * refer,char * srchstr)2580 cleanup_refer(char *refer, char *srchstr)
2581 {
2582     char *cp1, *cp2;                            /* generic char pointers       */
2583 
2584     /* unescape referrer */
2585     unescape(refer);
2586     unescape(refer);	/* XXX */
2587 
2588     /* fix referrer field */
2589     cp1 = refer;
2590     cp2 = refer;
2591     if (*cp2 != '\0') {
2592         while (*cp1 != '\0') {
2593             if ((*cp1 < 32 && *cp1 > 0) || *cp1 == 127 || *cp1 == '<') {
2594                 *cp1 = 0;
2595             } else {
2596                 *cp2++ = *cp1++;
2597             }
2598         }
2599         cp2 = '\0';
2600     }
2601 
2602     /* strip query portion of cgi referrals */
2603     cp1 = refer;
2604     if (*cp1 != '\0') {
2605         while (*cp1 != '\0') {
2606             if (!isurlchar(*cp1)) {
2607                 /* Save query portion in srchstr */
2608                 strlcpy(srchstr, cp1, MAXSRCH - 1);
2609                 *cp1++ = '\0';
2610                 break;
2611             } else {
2612                 cp1++;
2613             }
2614         }
2615         /* handle null referrer */
2616         if (refer[0] == '\0') {
2617             refer[0] = '-';
2618             refer[1] = '\0';
2619         }
2620     }
2621 
2622     /* if HTTP request, lowercase http://sitename/ portion */
2623     cp1 = refer;
2624     if ((*cp1 == 'h') || (*cp1 == 'H')) {
2625         while ((*cp1 != '/') && (*cp1 != '\0')) {
2626             *cp1 = tolower(*cp1);
2627             cp1++;
2628         }
2629         /* now do hostname */
2630         if ((*cp1 == '/') && (*(cp1 + 1) == '/')) {
2631             cp1++;
2632             cp1++;
2633         }
2634         while ((*cp1 != '/') && (*cp1 != '\0')) {
2635             *cp1 = tolower(*cp1);
2636             cp1++;
2637         }
2638     }
2639     return (0);
2640 }
2641 
2642 
2643 /************************************************************************
2644  * cleanup_agent                                                        *
2645  *                                                                      *
2646  * Does what the name says, given an agent field, remove all junk and   *
2647  *  clean it.                                                           *
2648  *                                                                      *
2649  * Arguments:                                                           *
2650  * char *agent: The Agent Field. Is modified by this function.          *
2651  *                                                                      *
2652  * Returns:                                                             *
2653  * int. 1 on failure, 0 on success.                                     *
2654  ************************************************************************/
2655 int
cleanup_agent(char * agent)2656 cleanup_agent(char *agent)
2657 {
2658     char *cp1, *cp2, *cp3;                      /* generic char pointers       */
2659 
2660     /* Do we need to mangle? */
2661     if (g_settings.settings.mangle_agent) {
2662         do_agent_mangling(agent);
2663     }
2664 
2665     /* fix user agent field */
2666     cp1 = agent;
2667     cp3 = cp2 = cp1++;
2668     if ((*cp2 != '\0') && ((*cp2 == '"') || (*cp2 == '('))) {
2669         while (*cp1 |= '\0') {
2670             cp3 = cp2;
2671             *cp2++ = *cp1++;
2672         }
2673         *cp3 = '\0';
2674     }
2675 
2676     cp1 = agent;
2677     while (*cp1 != 0) {                         /* get rid of more common _bad_ chars ;)   */
2678         if ((*cp1 < 32) || (*cp1 == 127) || (*cp1 == '<') || (*cp1 == '>')) {
2679             *cp1 = '\0';
2680             break;
2681         } else {
2682             cp1++;
2683         }
2684     }
2685 
2686     return (0);
2687 }
2688 
2689 
2690 /************************************************************************
2691  * isaffirmitive
2692  *
2693  * Will return true for any obvious, case insensitive, affirmative
2694  * value.
2695  * False for otherwise.
2696  *
2697  * TODO: Make Language independent?
2698  * Assumes value is a string of up to 20 chars long!
2699  ***********************************************************************/
2700 bool
isaffirmitive(char * value)2701 isaffirmitive(char *value)
2702 {
2703     int i;
2704     int length;
2705     char lowered_value[21];
2706 
2707     length = strlen(value);
2708     if (length > 20) {
2709         length = 20;
2710     } else if (length <= 0) {
2711         return false;
2712     }
2713 
2714     for (i = 0; i < length; i++) {
2715         lowered_value[i] = tolower(value[i]);
2716     }
2717     lowered_value[i] = '\0';
2718 
2719     if (strncmp(lowered_value, "yes", length) == 0 || strncmp(lowered_value, "true", length) == 0 || strncmp(lowered_value, "y", length) == 0) {
2720         return true;
2721     }
2722     if (!(strncmp(lowered_value, "no", length) == 0 || strncmp(lowered_value, "false", length) == 0 || strncmp(lowered_value, "n", length) == 0)) {
2723         ERRVPRINT(VERBOSE1, "%s: %s\n", _("Invalid Yes/No choice. Defaulting to No. Was"), value);
2724     }
2725     return false;
2726 }
2727 
2728 
2729 /************************************************************************
2730  * strtoupper
2731  *
2732  * Converts a string to Upper Case
2733  * Returns a pointer to the string.
2734  ***********************************************************************/
2735 char *
strtoupper(char * str)2736 strtoupper(char *str)
2737 {
2738     unsigned int i = 0;
2739 
2740     while (*(str + i) != '\0') {
2741         *(str + i) = toupper(*(str + i));
2742         i++;
2743     }
2744     return str;
2745 }
2746 
2747 
2748 /************************************************************************
2749  * strtolower
2750  *
2751  * Converts a string to Lower Case
2752  * Returns a pointer to the string.
2753  ***********************************************************************/
2754 char *
strtolower(char * str)2755 strtolower(char *str)
2756 {
2757     unsigned int i = 0;
2758 
2759     while (*(str + i) != '\0') {
2760         *(str + i) = tolower(*(str + i));
2761         i++;
2762     }
2763     return str;
2764 }
2765 
2766 /************************************************************************
2767  ************************************************************************
2768  *                      END OF FILE                                     *
2769  ************************************************************************/
2770