1 /*
2     webalizer - a web server log analysis program
3 
4     Copyright (C) 1997-2013  Bradford L. Barrett
5 
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version, and provided that the above
10     copyright and permission notice is included with all distributed
11     copies of this or derived software.
12 
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17 
18     You should have received a copy of the GNU General Public License
19     along with this program; if not, write to the Free Software
20     Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
21 
22 */
23 
24 /*********************************************/
25 /* STANDARD INCLUDES                         */
26 /*********************************************/
27 
28 /* Fix broken Zlib 64 bitness */
29 #if _FILE_OFFSET_BITS == 64
30 #ifndef _LARGEFILE64_SOURCE
31 #define _LARGEFILE64_SOURCE 1
32 #endif
33 #endif
34 
35 #include <time.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <unistd.h>                           /* normal stuff             */
41 #include <locale.h>
42 #include <ctype.h>
43 #include <sys/utsname.h>
44 #include <zlib.h>
45 #include <sys/stat.h>
46 
47 /* ensure getopt */
48 #ifdef HAVE_GETOPT_H
49 #include <getopt.h>
50 #endif
51 
52 /* ensure sys/types */
53 #ifndef _SYS_TYPES_H
54 #include <sys/types.h>
55 #endif
56 
57 /* Need socket header? */
58 #ifdef HAVE_SYS_SOCKET_H
59 #include <sys/socket.h>
60 #endif
61 
62 /* some systems need this */
63 #ifdef HAVE_MATH_H
64 #include <math.h>
65 #endif
66 
67 #ifdef USE_DNS
68 #include <netdb.h>
69 #include <netinet/in.h>
70 #include <arpa/inet.h>
71 #include <db.h>
72 #endif  /* USE_DNS */
73 
74 #ifdef USE_GEOIP
75 #include <maxminddb.h>
76 #endif
77 
78 #ifdef USE_BZIP
79 #include <bzlib.h>
80 int bz2_rewind(void **, char *, char *);
81 #endif
82 
83 #include "webalizer.h"                         /* main header              */
84 #include "output.h"
85 #include "parser.h"
86 #include "preserve.h"
87 #include "hashtab.h"
88 #include "linklist.h"
89 #include "webalizer_lang.h"                    /* lang. support            */
90 #ifdef USE_DNS
91 #include "dns_resolv.h"
92 #endif
93 
94 /* internal function prototypes */
95 
96 void    clear_month();                              /* clear monthly stuff */
97 char    *unescape(char *);                          /* unescape URLs       */
98 void    print_opts(char *);                         /* print options       */
99 void    print_version();                            /* duhh...             */
100 int     isurlchar(unsigned char, int);              /* valid URL char fnc. */
101 void    get_config(char *);                         /* Read a config file  */
102 static  char *save_opt(char *);                     /* save conf option    */
103 void    srch_string(char *);                        /* srch str analysis   */
104 char	*get_domain(char *);                        /* return domain name  */
105 void    agent_mangle(char *);                       /* reformat user agent */
106 char    *our_gzgets(void *, char *, int);           /* our gzgets          */
107 int     ouricmp(char *, char *);                    /* case ins. compare   */
108 int     isipaddr(char *);                           /* is IP address test  */
109 
110 /*********************************************/
111 /* GLOBAL VARIABLES                          */
112 /*********************************************/
113 
114 char    *version     = "2.23";                /* program version          */
115 char    *editlvl     = "08";                  /* edit level               */
116 char    *moddate     = "26-Aug-2013";         /* modification date        */
117 char    *copyright   = "Copyright 1997-2013 by Bradford L. Barrett";
118 
119 int     verbose      = 2;                     /* 2=verbose,1=err, 0=none  */
120 int     debug_mode   = 0;                     /* debug mode flag          */
121 int     time_me      = 0;                     /* timing display flag      */
122 int     local_time   = 1;                     /* 1=localtime 0=GMT (UTC)  */
123 int     hist_gap     = 0;                     /* 1=error w/hist, save bkp */
124 int     ignore_hist  = 0;                     /* history flag (1=skip)    */
125 int     ignore_state = 0;                     /* state flag (1=skip)      */
126 int     default_index= 1;                     /* default index. (1=yes)   */
127 int     hourly_graph = 1;                     /* hourly graph display     */
128 int     hourly_stats = 1;                     /* hourly stats table       */
129 int     daily_graph  = 1;                     /* daily graph display      */
130 int     daily_stats  = 1;                     /* daily stats table        */
131 int     ctry_graph   = 1;                     /* country graph display    */
132 int     shade_groups = 1;                     /* Group shading 0=no 1=yes */
133 int     hlite_groups = 1;                     /* Group hlite 0=no 1=yes   */
134 int     mangle_agent = 0;                     /* mangle user agents       */
135 int     incremental  = 0;                     /* incremental mode 1=yes   */
136 int     use_https    = 0;                     /* use 'https://' on URLs   */
137 int     htaccess     = 0;                     /* create .htaccess? (0=no) */
138 int     stripcgi     = 1;                     /* strip url cgi (0=no)     */
139 int     normalize    = 1;                     /* normalize CLF URL (0=no) */
140 int     trimsquid    = 0;                     /* trim squid urls (0=no)   */
141 int     searchcasei  = 1;                     /* case insensitive search  */
142 int     visit_timeout= 1800;                  /* visit timeout (seconds)  */
143 int     graph_legend = 1;                     /* graph legend (1=yes)     */
144 int     graph_lines  = 2;                     /* graph lines (0=none)     */
145 int     fold_seq_err = 0;                     /* fold seq err (0=no)      */
146 int     log_type     = LOG_CLF;               /* log type (default=CLF)   */
147 int     group_domains= 0;                     /* Group domains 0=none     */
148 int     hide_sites   = 0;                     /* Hide ind. sites (0=no)   */
149 int     link_referrer= 0;                     /* Link referrers (0=no)    */
150 char    *hname       = NULL;                  /* hostname for reports     */
151 char    *state_fname = "webalizer.current";   /* run state file name      */
152 char    *hist_fname  = "webalizer.hist";      /* name of history file     */
153 char    *html_ext    = "html";                /* HTML file suffix         */
154 char    *dump_ext    = "tab";                 /* Dump file suffix         */
155 char    *conf_fname  = NULL;                  /* name of config file      */
156 char    *log_fname   = NULL;                  /* log file pointer         */
157 char    *out_dir     = NULL;                  /* output directory         */
158 char    *blank_str   = "";                    /* blank string             */
159 char    *geodb_fname = NULL;                  /* GeoDB database filename  */
160 char    *dns_cache   = NULL;                  /* DNS cache file name      */
161 int     dns_children = 0;                     /* DNS children (0=don't do)*/
162 int     cache_ips    = 0;                     /* CacheIPs in DB (0=no)    */
163 int     cache_ttl    = 7;                     /* DNS Cache TTL (days)     */
164 int     geodb        = 0;                     /* Use GeoDB (0=no)         */
165 int     graph_mths   = 12;                    /* # months in index graph  */
166 int     index_mths   = 12;                    /* # months in index table  */
167 int     year_hdrs    = 1;                     /* index year seperators    */
168 int     year_totals  = 1;                     /* index year subtotals     */
169 int     use_flags    = 0;                     /* Show flags in ctry table */
170 char    *flag_dir    = "flags";               /* location of flag icons   */
171 
172 #ifdef USE_GEOIP
173 int     geoip        = 0;                     /* Use GeoIP (0=no)         */
174 char    *geoip_db    = NULL;                  /* GeoIP database filename  */
175 int	mmdb_open    = MMDB_FILE_OPEN_ERROR;  /* GeoIP database open      */
176 MMDB_s  mmdb;                                 /* GeoIP database handle    */
177 #endif
178 
179 int     ntop_sites   = 30;                    /* top n sites to display   */
180 int     ntop_sitesK  = 10;                    /* top n sites (by kbytes)  */
181 int     ntop_urls    = 30;                    /* top n url's to display   */
182 int     ntop_urlsK   = 10;                    /* top n url's (by kbytes)  */
183 int     ntop_entry   = 10;                    /* top n entry url's        */
184 int     ntop_exit    = 10;                    /* top n exit url's         */
185 int     ntop_refs    = 30;                    /* top n referrers ""       */
186 int     ntop_agents  = 15;                    /* top n user agents ""     */
187 int     ntop_ctrys   = 30;                    /* top n countries   ""     */
188 int     ntop_search  = 20;                    /* top n search strings     */
189 int     ntop_users   = 20;                    /* top n users to display   */
190 
191 int     all_sites    = 0;                     /* List All sites (0=no)    */
192 int     all_urls     = 0;                     /* List All URLs  (0=no)    */
193 int     all_refs     = 0;                     /* List All Referrers       */
194 int     all_agents   = 0;                     /* List All User Agents     */
195 int     all_search   = 0;                     /* List All Search Strings  */
196 int     all_users    = 0;                     /* List All Usernames       */
197 
198 int     dump_sites   = 0;                     /* Dump tab delimited sites */
199 int     dump_urls    = 0;                     /* URLs                     */
200 int     dump_refs    = 0;                     /* Referrers                */
201 int     dump_agents  = 0;                     /* User Agents              */
202 int     dump_users   = 0;                     /* Usernames                */
203 int     dump_search  = 0;                     /* Search strings           */
204 int     dump_header  = 0;                     /* Dump header as first rec */
205 char    *dump_path   = NULL;                  /* Path for dump files      */
206 
207 int        cur_year=0, cur_month=0,           /* year/month/day/hour      */
208            cur_day=0, cur_hour=0,             /* tracking variables       */
209            cur_min=0, cur_sec=0;
210 
211 u_int64_t  cur_tstamp=0;                      /* Timestamp...             */
212 u_int64_t  rec_tstamp=0;
213 u_int64_t  req_tstamp=0;
214 u_int64_t  epoch;                             /* used for timestamp adj.  */
215 
216 int        check_dup=0;                       /* check for dup flag       */
217 int        gz_log=COMP_NONE;                  /* gziped log? (0=no)       */
218 
219 double     t_xfer=0.0;                        /* monthly total xfer value */
220 u_int64_t  t_hit=0,t_file=0,t_site=0,         /* monthly total vars       */
221            t_url=0,t_ref=0,t_agent=0,
222            t_page=0, t_visit=0, t_user=0;
223 
224 double     tm_xfer[31];                       /* daily transfer totals    */
225 
226 u_int64_t  tm_hit[31], tm_file[31],           /* daily total arrays       */
227            tm_site[31], tm_page[31],
228            tm_visit[31];
229 
230 u_int64_t  dt_site;                           /* daily 'sites' total      */
231 
232 u_int64_t  ht_hit=0, mh_hit=0;                /* hourly hits totals       */
233 
234 u_int64_t  th_hit[24], th_file[24],           /* hourly total arrays      */
235            th_page[24];
236 
237 double     th_xfer[24];
238 
239 int        f_day,l_day;                       /* first/last day vars      */
240 
241 struct     utsname system_info;               /* system info structure    */
242 
243 u_int64_t  ul_bogus =0;                       /* Dummy counter for groups */
244 
245 struct     log_struct log_rec;                /* expanded log storage     */
246 
247 void       *zlog_fp;                          /* compressed logfile ptr   */
248 FILE       *log_fp;                           /* regular logfile pointer  */
249 
250 char       buffer[BUFSIZE];                   /* log file record buffer   */
251 char       tmp_buf[BUFSIZE];                  /* used to temp save above  */
252 
253 CLISTPTR   *top_ctrys    = NULL;              /* Top countries table      */
254 
255 #define    GZ_BUFSIZE 16384                   /* our_getfs buffer size    */
256 char       f_buf[GZ_BUFSIZE];                 /* our_getfs buffer         */
257 char       *f_cp=f_buf+GZ_BUFSIZE;            /* pointer into the buffer  */
258 int        f_end=0;                           /* count to end of buffer   */
259 
260 char    hit_color[]   = "#00805c";            /* graph hit color          */
261 char    file_color[]  = "#0040ff";            /* graph file color         */
262 char    site_color[]  = "#ff8000";            /* graph site color         */
263 char    kbyte_color[] = "#ff0000";            /* graph kbyte color        */
264 char    page_color[]  = "#00e0ff";            /* graph page color         */
265 char    visit_color[] = "#ffff00";            /* graph visit color        */
266 char    misc_color[]  = "#00e0ff";            /* graph misc color         */
267 char    pie_color1[]  = "#800080";            /* pie additionnal color 1  */
268 char    pie_color2[]  = "#80ffc0";            /* pie additionnal color 2  */
269 char    pie_color3[]  = "#ff00ff";            /* pie additionnal color 3  */
270 char    pie_color4[]  = "#ffc080";            /* pie additionnal color 4  */
271 
272 /*********************************************/
273 /* MAIN - start here                         */
274 /*********************************************/
275 
main(int argc,char * argv[])276 int main(int argc, char *argv[])
277 {
278    int      i;                           /* generic counter             */
279    char     *cp1, *cp2, *cp3;            /* generic char pointers       */
280    char     host_buf[MAXHOST+1];         /* used to save hostname       */
281 
282    NLISTPTR lptr;                        /* generic list pointer        */
283 
284    extern char *optarg;                  /* used for command line       */
285    extern int optind;                    /* parsing routine 'getopt'    */
286    extern int opterr;
287 
288    time_t start_time, end_time;          /* program timers              */
289    float  temp_time;                     /* temporary time storage      */
290 
291    int    rec_year,rec_month=1,rec_day,rec_hour,rec_min,rec_sec;
292 
293    int       good_rec    =0;             /* 1 if we had a good record   */
294    u_int64_t total_rec   =0;             /* Total Records Processed     */
295    u_int64_t total_ignore=0;             /* Total Records Ignored       */
296    u_int64_t total_bad   =0;             /* Total Bad Records           */
297 
298    int    max_ctry;                      /* max countries defined       */
299 
300    /* month names used for parsing logfile (shouldn't be lang specific) */
301    char *log_month[12]={ "jan", "feb", "mar",
302                          "apr", "may", "jun",
303                          "jul", "aug", "sep",
304                          "oct", "nov", "dec"};
305 
306    /* stat struct for files */
307    struct stat log_stat;
308 
309    /* Assume that LC_CTYPE is what the user wants for non-ASCII chars   */
310    setlocale(LC_CTYPE,"");
311 
312    /* initalize epoch */
313    epoch=jdate(1,1,1970);                /* used for timestamp adj.     */
314 
315    sprintf(tmp_buf,"%s/webalizer.conf",ETCDIR);
316    /* check for default config file */
317    if (!access("webalizer.conf",F_OK))
318       get_config("webalizer.conf");
319    else if (!access(tmp_buf,F_OK))
320       get_config(tmp_buf);
321 
322    /* get command line options */
323    opterr = 0;     /* disable parser errors */
324    while ((i=getopt(argc,argv,"a:A:bc:C:dD:e:E:fF:g:GhHiI:jJ:k:K:l:Lm:M:n:N:o:O:pP:qQr:R:s:S:t:Tu:U:vVwW:x:XYz:Z"))!=EOF)
325    {
326       switch (i)
327       {
328         case 'a': add_nlist(optarg,&hidden_agents); break; /* Hide agents   */
329         case 'A': ntop_agents=atoi(optarg);  break;  /* Top agents          */
330         case 'b': ignore_state=1;            break;  /* Ignore state file   */
331         case 'c': get_config(optarg);        break;  /* Config file         */
332         case 'C': ntop_ctrys=atoi(optarg);   break;  /* Top countries       */
333         case 'd': debug_mode=1;              break;  /* Debug               */
334 	case 'D': dns_cache=optarg;          break;  /* DNS Cache filename  */
335         case 'e': ntop_entry=atoi(optarg);   break;  /* Top entry pages     */
336         case 'E': ntop_exit=atoi(optarg);    break;  /* Top exit pages      */
337         case 'f': fold_seq_err=1;            break;  /* Fold sequence errs  */
338         case 'F': log_type=(tolower(optarg[0])=='f')?
339                    LOG_FTP:(tolower(optarg[0])=='s')?
340                    LOG_SQUID:(tolower(optarg[0])=='w')?
341                    LOG_W3C:LOG_CLF;          break;  /* define log type     */
342 	case 'g': group_domains=atoi(optarg); break; /* GroupDomains (0=no) */
343         case 'G': hourly_graph=0;            break;  /* no hourly graph     */
344         case 'h': print_opts(argv[0]);       break;  /* help                */
345         case 'H': hourly_stats=0;            break;  /* no hourly stats     */
346         case 'i': ignore_hist=1;             break;  /* Ignore history      */
347         case 'I': add_nlist(optarg,&index_alias); break; /* Index alias     */
348         case 'j': geodb=1;                   break;  /* Enable GeoDB        */
349         case 'J': geodb_fname=optarg;        break;  /* GeoDB db filename   */
350         case 'k': graph_mths=atoi(optarg);   break;  /* # months idx graph  */
351         case 'K': index_mths=atoi(optarg);   break;  /* # months idx table  */
352         case 'l': graph_lines=atoi(optarg);  break;  /* Graph Lines         */
353         case 'L': graph_legend=0;            break;  /* Graph Legends       */
354         case 'm': visit_timeout=atoi(optarg); break; /* Visit Timeout       */
355         case 'M': mangle_agent=atoi(optarg); break;  /* mangle user agents  */
356         case 'n': hname=optarg;              break;  /* Hostname            */
357         case 'N': dns_children=atoi(optarg); break;  /* # of DNS children   */
358         case 'o': out_dir=optarg;            break;  /* Output directory    */
359         case 'O': add_nlist(optarg,&omit_page); break; /* pages not counted */
360         case 'p': incremental=1;             break;  /* Incremental run     */
361         case 'P': add_nlist(optarg,&page_type); break; /* page view types   */
362         case 'q': verbose=1;                 break;  /* Quiet (verbose=1)   */
363         case 'Q': verbose=0;                 break;  /* Really Quiet        */
364         case 'r': add_nlist(optarg,&hidden_refs);   break; /* Hide referrer */
365         case 'R': ntop_refs=atoi(optarg);    break;  /* Top referrers       */
366         case 's': add_nlist(optarg,&hidden_sites);  break; /* Hide site     */
367         case 'S': ntop_sites=atoi(optarg);   break;  /* Top sites           */
368         case 't': msg_title=optarg;          break;  /* Report title        */
369         case 'T': time_me=1;                 break;  /* TimeMe              */
370         case 'u': add_nlist(optarg,&hidden_urls);   break; /* hide URL      */
371         case 'U': ntop_urls=atoi(optarg);    break;  /* Top urls            */
372         case 'v': verbose=2; debug_mode=1;   break;  /* Verbose             */
373         case 'V': print_version();           break;  /* Version             */
374 #ifdef USE_GEOIP
375         case 'w': geoip=1;                   break;  /* Enable GeoIP        */
376         case 'W': geoip_db=optarg;           break;  /* GeoIP database name */
377 #endif
378         case 'x': html_ext=optarg;           break;  /* HTML file extension */
379         case 'X': hide_sites=1;              break;  /* Hide ind. sites     */
380         case 'Y': ctry_graph=0;              break;  /* Supress ctry graph  */
381         case 'Z': normalize=0;               break;  /* Dont normalize URLs */
382         case 'z': use_flags=1; flag_dir=optarg; break; /* Ctry flag dir     */
383       }
384    }
385 
386    if (argc - optind != 0) log_fname = argv[optind];
387    if ( log_fname && (log_fname[0]=='-')) log_fname=NULL; /* force STDIN?   */
388 
389    /* check for gzipped file - .gz */
390    if (log_fname) if (!strcmp((log_fname+strlen(log_fname)-3),".gz"))
391       gz_log=COMP_GZIP;
392 
393 #ifdef USE_BZIP
394    /* check for bzip file - .bz2 */
395    if (log_fname) if (!strcmp((log_fname+strlen(log_fname)-4),".bz2"))
396       gz_log=COMP_BZIP;
397 #endif
398 
399    /* setup our internal variables */
400    init_counters();                      /* initalize (zero) main counters  */
401    memset(hist, 0, sizeof(hist));        /* initalize (zero) history array  */
402 
403    /* add default index. alias if needed */
404    if (default_index) add_nlist("index.",&index_alias);
405 
406    if (page_type==NULL)                  /* check if page types present     */
407    {
408       if ((log_type==LOG_CLF)||(log_type==LOG_SQUID)||(log_type==LOG_W3C))
409       {
410          add_nlist("htm*"  ,&page_type); /* if no page types specified, we  */
411          add_nlist("cgi"   ,&page_type); /* use the default ones here...    */
412          if (!isinlist(page_type,html_ext)) add_nlist(html_ext,&page_type);
413       }
414       else add_nlist("txt" ,&page_type); /* FTP logs default to .txt        */
415    }
416 
417    for (max_ctry=0;ctry[max_ctry].desc;max_ctry++);
418    if (ntop_ctrys > max_ctry) ntop_ctrys = max_ctry;   /* force upper limit */
419    if (graph_lines> 20)       graph_lines= 20;         /* keep graphs sane! */
420    if (graph_mths<12)         graph_mths=12;
421    if (graph_mths>GRAPHMAX)   graph_mths=GRAPHMAX;
422    if (index_mths<12)         index_mths=12;
423    if (index_mths>HISTSIZE)   index_mths=HISTSIZE;
424 
425    if (log_type == LOG_FTP)
426    {
427       /* disable stuff for ftp logs */
428       ntop_entry=ntop_exit=0;
429       ntop_search=0;
430    }
431    else
432    {
433       if (search_list==NULL)
434       {
435          /* If no search engines defined, define some :) */
436          add_glist(".google.       q="      ,&search_list);
437          add_glist("yahoo.com      p="      ,&search_list);
438          add_glist("altavista.com  q="      ,&search_list);
439          add_glist("aolsearch.     query="  ,&search_list);
440          add_glist("ask.co         q="      ,&search_list);
441          add_glist("eureka.com     q="      ,&search_list);
442          add_glist("lycos.com      query="  ,&search_list);
443          add_glist("hotbot.com     MT="     ,&search_list);
444          add_glist("msn.com        q="      ,&search_list);
445          add_glist("infoseek.com   qt="     ,&search_list);
446          add_glist("webcrawler searchText=" ,&search_list);
447          add_glist("excite         search=" ,&search_list);
448          add_glist("netscape.com   query="  ,&search_list);
449          add_glist("mamma.com      query="  ,&search_list);
450          add_glist("alltheweb.com  q="      ,&search_list);
451          add_glist("northernlight.com qr="  ,&search_list);
452       }
453    }
454 
455    /* ensure entry/exits don't exceed urls */
456    i=(ntop_urls>ntop_urlsK)?ntop_urls:ntop_urlsK;
457    if (ntop_entry>i) ntop_entry=i;
458    if (ntop_exit>i)  ntop_exit=i;
459 
460    for (i=0;i<MAXHASH;i++)
461    {
462       sm_htab[i]=sd_htab[i]=NULL;        /* initalize hash tables           */
463       um_htab[i]=NULL;
464       rm_htab[i]=NULL;
465       am_htab[i]=NULL;
466       sr_htab[i]=NULL;
467    }
468 
469    /* Be polite and announce yourself... */
470    if (verbose>1)
471    {
472       uname(&system_info);
473       printf("Webalizer V%s-%s (%s %s %s) %s\n", version,editlvl,
474               system_info.sysname, system_info.release,
475               system_info.machine,language);
476    }
477 
478 #ifndef USE_DNS
479    if (strstr(argv[0],"webazolver")!=0)
480       /* DNS support not present, aborting... */
481       { printf("%s\n",msg_dns_abrt); exit(1); }
482 #else
483    /* Force sane values for cache TTL */
484    if (cache_ttl<1)   cache_ttl=1;
485    if (cache_ttl>100) cache_ttl=100;
486 #endif  /* USE_DNS */
487 
488    /* open log file */
489    if (log_fname)
490    {
491       /* stat the file */
492       if ( !(lstat(log_fname, &log_stat)) )
493       {
494          /* check if the file a symlink */
495          if ( S_ISLNK(log_stat.st_mode) )
496          {
497             if (verbose)
498             fprintf(stderr,"%s %s (symlink)\n",msg_log_err,log_fname);
499             exit(EBADF);
500          }
501       }
502 
503       if (gz_log)
504       {
505          /* open compressed file */
506 #ifdef USE_BZIP
507          if (gz_log==COMP_BZIP)
508             zlog_fp = BZ2_bzopen(log_fname,"rb");
509          else
510 #endif
511          zlog_fp = gzopen(log_fname, "rb");
512          if (zlog_fp==Z_NULL)
513          {
514             /* Error: Can't open log file ... */
515             fprintf(stderr, "%s %s (%d)\n",msg_log_err,log_fname,ENOENT);
516             exit(ENOENT);
517          }
518       }
519       else
520       {
521          /* open regular file */
522          log_fp = fopen(log_fname,"r");
523          if (log_fp==NULL)
524          {
525             /* Error: Can't open log file ... */
526             fprintf(stderr, "%s %s\n",msg_log_err,log_fname);
527             exit(1);
528          }
529       }
530    }
531 
532    /* Using logfile ... */
533    if (verbose>1)
534    {
535       printf("%s %s (",msg_log_use,log_fname?log_fname:"STDIN");
536       if (gz_log==COMP_GZIP) printf("gzip-");
537 #ifdef USE_BZIP
538       if (gz_log==COMP_BZIP) printf("bzip-");
539 #endif
540       switch (log_type)
541       {
542          /* display log file type hint */
543          case LOG_CLF:   printf("clf)\n");   break;
544          case LOG_FTP:   printf("ftp)\n");   break;
545          case LOG_SQUID: printf("squid)\n"); break;
546          case LOG_W3C:   printf("w3c)\n");   break;
547       }
548    }
549 
550    /* switch directories if needed */
551    if (out_dir)
552    {
553       if (chdir(out_dir) != 0)
554       {
555          /* Error: Can't change directory to ... */
556          fprintf(stderr, "%s %s\n",msg_dir_err,out_dir);
557          exit(1);
558       }
559    }
560 
561 #ifdef USE_DNS
562    if (strstr(argv[0],"webazolver")!=0)
563    {
564       if (!dns_children) dns_children=5;  /* default dns children if needed */
565       if (!dns_cache)
566       {
567          /* No cache file specified, aborting... */
568          fprintf(stderr,"%s\n",msg_dns_nocf);     /* Must have a cache file */
569          exit(1);
570       }
571    }
572 
573    if (dns_cache && dns_children)    /* run-time resolution */
574    {
575       if (dns_children > MAXCHILD) dns_children=MAXCHILD;
576       /* DNS Lookup (#children): */
577       if (verbose>1) printf("%s (%d): ",msg_dns_rslv,dns_children);
578       fflush(stdout);
579       (gz_log)?dns_resolver(zlog_fp):dns_resolver(log_fp);
580 #ifdef USE_BZIP
581       (gz_log==COMP_BZIP)?bz2_rewind(&zlog_fp, log_fname, "rb"):
582 #endif
583       (gz_log==COMP_GZIP)?gzrewind(zlog_fp):
584       (log_fname)?rewind(log_fp):exit(0);
585    }
586 
587    if (strstr(argv[0],"webazolver")!=0) exit(0);   /* webazolver exits here */
588 
589    if (dns_cache)
590    {
591       if (!open_cache()) { dns_cache=NULL; dns_db=NULL; }
592       else
593       {
594          /* Using DNS cache file <filaneme> */
595          if (verbose>1) printf("%s %s\n",msg_dns_usec,dns_cache);
596       }
597    }
598 
599    /* Open GeoDB? */
600    if (geodb)
601    {
602       geo_db=geodb_open(geodb_fname);
603       if (geo_db==NULL)
604       {
605          if (verbose) printf("%s: %s\n",msg_geo_open,
606             (geodb_fname)?geodb_fname:msg_geo_dflt);
607          if (verbose) printf("GeoDB %s\n",msg_geo_nolu);
608          geodb=0;
609       }
610       else if (verbose>1) printf("%s %s\n",
611          msg_geo_use,geodb_ver(geo_db,buffer));
612 #ifdef USE_GEOIP
613       if (geoip) geoip=0;   /* Disable GeoIP if using GeoDB */
614 #endif
615    }
616 #endif  /* USE_DNS */
617 
618 #ifdef USE_GEOIP
619    /* open GeoIP database */
620    if (geoip)
621    {
622       if (geoip_db!=NULL)
623          mmdb_open = MMDB_open(geoip_db, MMDB_MODE_MMAP, &mmdb);
624       else
625          mmdb_open = MMDB_open("GeoLite2-Country.mmdb", MMDB_MODE_MMAP, &mmdb);
626 
627       /* Did we open one? */
628       if (mmdb_open!=MMDB_SUCCESS)
629       {
630          /* couldn't open.. warn user */
631          if (verbose) printf("GeoIP %s\n",msg_geo_nolu);
632          geoip=0;
633       }
634    }
635 #endif /* USE_GEOIP */
636 
637    /* Creating output in ... */
638    if (verbose>1)
639       printf("%s %s\n",msg_dir_use,out_dir?out_dir:msg_cur_dir);
640 
641    /* prep hostname */
642    if (!hname)
643    {
644       if (uname(&system_info)) hname="localhost";
645       else hname=system_info.nodename;
646    }
647 
648    /* Hostname for reports is ... */
649    if (strlen(hname)) if (verbose>1) printf("%s '%s'\n",msg_hostname,hname);
650 
651    /* get past history */
652    if (ignore_hist) { if (verbose>1) printf("%s\n",msg_ign_hist); }
653    else get_history();
654 
655    if (incremental)                      /* incremental processing?         */
656    {
657       if ((i=restore_state()))           /* restore internal data structs   */
658       {
659          /* Error: Unable to restore run data (error num) */
660          /* if (verbose) fprintf(stderr,"%s (%d)\n",msg_bad_data,i); */
661          fprintf(stderr,"%s (%d)\n",msg_bad_data,i);
662          exit(1);
663       }
664    }
665 
666    /* Allocate memory for our TOP countries array */
667    if (ntop_ctrys  != 0)
668    { if ( (top_ctrys=calloc(ntop_ctrys,sizeof(CLISTPTR))) == NULL)
669     /* Can't get memory, Top Countries disabled! */
670     {if (verbose) fprintf(stderr,"%s\n",msg_nomem_tc); ntop_ctrys=0;}}
671 
672    /* get processing start time */
673    start_time = time(NULL);
674 
675    /*********************************************/
676    /* MAIN PROCESS LOOP - read through log file */
677    /*********************************************/
678 
679    while ( (gz_log)?(our_gzgets(zlog_fp,buffer,BUFSIZE) != Z_NULL):
680            (fgets(buffer,BUFSIZE,log_fname?log_fp:stdin) != NULL))
681    {
682       total_rec++;
683       if (strlen(buffer) == (BUFSIZE-1))
684       {
685          if (verbose)
686          {
687             fprintf(stderr,"%s",msg_big_rec);
688             if (debug_mode) fprintf(stderr,":\n%s",buffer);
689             else fprintf(stderr,"\n");
690          }
691 
692          total_bad++;                     /* bump bad record counter      */
693 
694          /* get the rest of the record */
695          while ( (gz_log)?(our_gzgets(zlog_fp,buffer,BUFSIZE)!=Z_NULL):
696                  (fgets(buffer,BUFSIZE,log_fname?log_fp:stdin)!=NULL))
697          {
698             if (strlen(buffer) < BUFSIZE-1)
699             {
700                if (debug_mode && verbose) fprintf(stderr,"%s\n",buffer);
701                break;
702             }
703             if (debug_mode && verbose) fprintf(stderr,"%s",buffer);
704          }
705          continue;                        /* go get next record if any    */
706       }
707 
708       /* got a record... */
709       strcpy(tmp_buf, buffer);            /* save buffer in case of error */
710       if (parse_record(buffer))           /* parse the record             */
711       {
712          /*********************************************/
713          /* PASSED MINIMAL CHECKS, DO A LITTLE MORE   */
714          /*********************************************/
715 
716          /* convert month name to lowercase */
717          for (i=4;i<7;i++)
718             log_rec.datetime[i]=tolower(log_rec.datetime[i]);
719 
720          /* lowercase sitename/IPv6 addresses */
721          cp1=log_rec.hostname;
722          while (*cp1++!='\0') *cp1=tolower(*cp1);
723 
724          /* get year/month/day/hour/min/sec values    */
725          for (i=0;i<12;i++)
726          {
727             if (strncmp(log_month[i],&log_rec.datetime[4],3)==0)
728                { rec_month = i+1; break; }
729          }
730 
731          rec_year=atoi(&log_rec.datetime[8]);    /* get year number (int)   */
732          rec_day =atoi(&log_rec.datetime[1]);    /* get day number          */
733          rec_hour=atoi(&log_rec.datetime[13]);   /* get hour number         */
734          rec_min =atoi(&log_rec.datetime[16]);   /* get minute number       */
735          rec_sec =atoi(&log_rec.datetime[19]);   /* get second number       */
736 
737          /* Kludge for Netscape server time (0-24?) error                   */
738          if (rec_hour>23) rec_hour=0;
739 
740          /* minimal sanity check on date */
741          if ((i>=12)||(rec_min>59)||(rec_sec>60)||(rec_year<1990))
742          {
743             total_bad++;                /* if a bad date, bump counter      */
744             if (verbose)
745             {
746                fprintf(stderr,"%s: %s [%llu]",
747                  msg_bad_date,log_rec.datetime,total_rec);
748                if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf);
749                else fprintf(stderr,"\n");
750             }
751             continue;                   /* and ignore this record           */
752          }
753 
754          /*********************************************/
755          /* GOOD RECORD, CHECK INCREMENTAL/TIMESTAMPS */
756          /*********************************************/
757 
758          /* Flag as a good one */
759          good_rec = 1;
760 
761          /* get current records timestamp (seconds since epoch) */
762          req_tstamp=cur_tstamp;
763          rec_tstamp=((jdate(rec_day,rec_month,rec_year)-epoch)*86400)+
764                      (rec_hour*3600)+(rec_min*60)+rec_sec;
765 
766          /* Do we need to check for duplicate records? (incremental mode)   */
767          if (check_dup)
768          {
769             /* check if less than/equal to last record processed            */
770             if ( rec_tstamp <= cur_tstamp )
771             {
772                /* if it is, assume we have already processed and ignore it  */
773                total_ignore++;
774                continue;
775             }
776             else
777             {
778                /* if it isn't.. disable any more checks this run            */
779                check_dup=0;
780                /* now check if it's a new month                             */
781                if ( (cur_month != rec_month) || (cur_year != rec_year) )
782                {
783                   clear_month();
784                   cur_sec   = rec_sec;          /* set current counters     */
785                   cur_min   = rec_min;
786                   cur_hour  = rec_hour;
787                   cur_day   = rec_day;
788                   cur_month = rec_month;
789                   cur_year  = rec_year;
790                   cur_tstamp= rec_tstamp;
791                   f_day=l_day=rec_day;          /* reset first and last day */
792                }
793             }
794          }
795 
796          /* check for out of sequence records */
797          if (rec_tstamp/3600 < cur_tstamp/3600)
798          {
799             if (!fold_seq_err && ((rec_tstamp+SLOP_VAL)/3600<cur_tstamp/3600) )
800                { total_ignore++; continue; }
801             else
802             {
803                rec_sec   = cur_sec;             /* if folding sequence      */
804                rec_min   = cur_min;             /* errors, just make it     */
805                rec_hour  = cur_hour;            /* look like the last       */
806                rec_day   = cur_day;             /* good records timestamp   */
807                rec_month = cur_month;
808                rec_year  = cur_year;
809                rec_tstamp= cur_tstamp;
810             }
811          }
812          cur_tstamp=rec_tstamp;                 /* update current timestamp */
813 
814          /*********************************************/
815          /* DO SOME PRE-PROCESS FORMATTING            */
816          /*********************************************/
817 
818          /* un-escape URL */
819          unescape(log_rec.url);
820 
821          /* fix URL field */
822          cp1 = cp2 = log_rec.url;
823          /* handle null '-' case here... */
824          if (*++cp1 == '-') strcpy(log_rec.url,"/INVALID-URL");
825          else
826          {
827             /* strip actual URL out of request */
828             while  ( (*cp1 != ' ') && (*cp1 != '\0') ) cp1++;
829             if (*cp1 != '\0')
830             {
831                /* scan to begin of actual URL field */
832                while ((*cp1 == ' ') && (*cp1 != '\0')) cp1++;
833                /* remove duplicate / if needed */
834                while (( *cp1=='/') && (*(cp1+1)=='/')) cp1++;
835                while (( *cp1!='\0')&&(*cp1!='"')) *cp2++=*cp1++;
836                *cp2='\0';
837             }
838          }
839 
840          /* strip query portion of cgi scripts */
841          cp1 = log_rec.url;
842          while (*cp1 != '\0')
843            if (!isurlchar(*cp1, stripcgi)) { *cp1 = '\0'; break; }
844            else cp1++;
845          if (log_rec.url[0]=='\0')
846            { log_rec.url[0]='/'; log_rec.url[1]='\0'; }
847 
848          /* Normalize URL */
849          if (log_type==LOG_CLF && log_rec.resp_code!=RC_NOTFOUND && normalize)
850          {
851             if ( ((cp2=strstr(log_rec.url,"://"))!=NULL)&&(cp2<log_rec.url+6) )
852             {
853                cp1=cp2+3;
854                /* see if a '/' is present after it  */
855                if ( (cp2=strchr(cp1,(int)'/'))==NULL) cp1--;
856                else cp1=cp2;
857                /* Ok, now shift url string          */
858                cp2=log_rec.url; while (*cp1!='\0') *cp2++=*cp1++; *cp2='\0';
859             }
860             /* extra sanity checks on URL string */
861             while ((cp2=strstr(log_rec.url,"/./")))
862                { cp1=cp2+2; while (*cp1!='\0') *cp2++=*cp1++; *cp2='\0'; }
863             if (log_rec.url[0]!='/')
864             {
865                if ( log_rec.resp_code==RC_OK             ||
866                     log_rec.resp_code==RC_PARTIALCONTENT ||
867                     log_rec.resp_code==RC_NOMOD)
868                {
869                   if (debug_mode)
870                      fprintf(stderr,"Converted URL '%s' to '/'\n",log_rec.url);
871                   log_rec.url[0]='/';
872                   log_rec.url[1]='\0';
873                }
874                else
875                {
876                   if (debug_mode)
877                      fprintf(stderr,"Invalid URL: '%s'\n",log_rec.url);
878                   strcpy(log_rec.url,"/INVALID-URL");
879                }
880             }
881             while ( log_rec.url[ (i=strlen(log_rec.url)-1) ] == '?' )
882                log_rec.url[i]='\0';   /* drop trailing ?s if any */
883          }
884          else
885          {
886             /* check for service (ie: http://) and lowercase if found */
887             if (((cp2=strstr(log_rec.url,"://"))!= NULL)&&(cp2<log_rec.url+6))
888             {
889                cp1=log_rec.url;
890                while (cp1!=cp2)
891                {
892                   if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
893                   cp1++;
894                }
895             }
896          }
897 
898          /* strip off index.html (or any aliases) */
899          lptr=index_alias;
900          while (lptr!=NULL)
901          {
902             if ((cp1=strstr(log_rec.url,lptr->string))!=NULL)
903             {
904                if (*(cp1-1)=='/')
905                {
906                   if ( !stripcgi && (cp2=strchr(cp1,'?'))!=NULL )
907                   { while(*cp2) *cp1++=*cp2++; *cp1='\0'; }
908                   else *cp1='\0';
909                   break;
910                }
911             }
912             lptr=lptr->next;
913          }
914 
915          /* unescape referrer */
916          unescape(log_rec.refer);
917 
918          /* fix referrer field */
919          cp1 = log_rec.refer;
920          cp3 = cp2 = cp1++;
921          if ( (*cp2 != '\0') && (*cp2 == '"') )
922          {
923             while ( *cp1 != '\0' )
924             {
925                cp3=cp2;
926                if (((unsigned char)*cp1<32&&(unsigned char)*cp1>0) ||
927                     *cp1==127 || (unsigned char)*cp1=='<') *cp1=0;
928                else *cp2++=*cp1++;
929             }
930             *cp3 = '\0';
931          }
932 
933          /* get query portion of cgi referrals */
934          cp1 = log_rec.refer;
935          if (*cp1 != '\0')
936          {
937             while (*cp1 != '\0')
938             {
939                if (!isurlchar(*cp1, 1))
940                {
941                   /* Save query portion in log.rec.srchstr */
942                   strncpy(log_rec.srchstr,(char *)cp1,MAXSRCH);
943 #ifdef USE_FULL_CGI_REFS
944                   *cp1++;
945 #else
946                   *cp1++='\0';
947 #endif
948                   break;
949                }
950                else cp1++;
951             }
952             /* handle null referrer */
953             if (log_rec.refer[0]=='\0')
954               { log_rec.refer[0]='-'; log_rec.refer[1]='\0'; }
955          }
956 
957          /* if HTTP request, lowercase http://sitename/ portion */
958          cp1 = log_rec.refer;
959          if ( (*cp1=='h') || (*cp1=='H'))
960          {
961             while ( (*cp1!='/') && (*cp1!='\0'))
962             {
963                if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
964                cp1++;
965             }
966             /* now do hostname */
967             if ( (*cp1=='/') && ( *(cp1+1)=='/')) {cp1++; cp1++;}
968             while ( (*cp1!='/') && (*cp1!='\0'))
969             {
970                if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
971                cp1++;
972             }
973          }
974 
975          /* Do we need to mangle? */
976          if (mangle_agent) agent_mangle(log_rec.agent);
977 
978          /* if necessary, shrink referrer to fit storage */
979          if (strlen(log_rec.refer)>=MAXREFH)
980          {
981             if (verbose) fprintf(stderr,"%s [%llu]\n",
982                 msg_big_ref,total_rec);
983             log_rec.refer[MAXREFH-1]='\0';
984          }
985 
986          /* if necessary, shrink URL to fit storage */
987          if (strlen(log_rec.url)>=MAXURLH)
988          {
989             if (verbose) fprintf(stderr,"%s [%llu]\n",
990                 msg_big_req,total_rec);
991             log_rec.url[MAXURLH-1]='\0';
992          }
993 
994          /* fix user agent field */
995          cp1 = log_rec.agent;
996          cp3 = cp2 = cp1++;
997          if ( (*cp2 != '\0') && ((*cp2 == '"')||(*cp2 == '(')) )
998          {
999             while (*cp1 != '\0') { cp3 = cp2; *cp2++ = *cp1++; }
1000             *cp3 = '\0';
1001          }
1002          cp1 = log_rec.agent;    /* CHANGE !!! */
1003          while (*cp1 != 0)       /* get rid of more common _bad_ chars ;)   */
1004          {
1005             if ( ((unsigned char)*cp1 < 32) ||
1006                  ((unsigned char)*cp1==127) ||
1007                  (*cp1=='<') || (*cp1=='>') )
1008                { *cp1='\0'; break; }
1009             else cp1++;
1010          }
1011 
1012          /* fix username if needed */
1013          if (log_rec.ident[0]==0)
1014           {  log_rec.ident[0]='-'; log_rec.ident[1]='\0'; }
1015          else
1016          {
1017             cp3=log_rec.ident;
1018             while ((unsigned char)*cp3>=32 && *cp3!='"') cp3++;
1019             *cp3='\0';
1020          }
1021          /* unescape user name */
1022          unescape(log_rec.ident);
1023 
1024          /********************************************/
1025          /* PROCESS RECORD                           */
1026          /********************************************/
1027 
1028          /* first time through? */
1029          if (cur_month == 0)
1030          {
1031              /* if yes, init our date vars */
1032              cur_month=rec_month; cur_year=rec_year;
1033              cur_day=rec_day; cur_hour=rec_hour;
1034              cur_min=rec_min; cur_sec=rec_sec;
1035              f_day=rec_day;
1036          }
1037 
1038          /* adjust last day processed if different */
1039          if (rec_day > l_day) l_day = rec_day;
1040 
1041          /* update min/sec stuff */
1042          if (cur_sec != rec_sec) cur_sec = rec_sec;
1043          if (cur_min != rec_min) cur_min = rec_min;
1044 
1045          /* check for hour change  */
1046          if (cur_hour != rec_hour)
1047          {
1048             /* if yes, init hourly stuff */
1049             if (ht_hit > mh_hit) mh_hit = ht_hit;
1050             ht_hit = 0;
1051             cur_hour = rec_hour;
1052          }
1053 
1054          /* check for day change   */
1055          if (cur_day != rec_day)
1056          {
1057             /* if yes, init daily stuff */
1058             tm_site[cur_day-1]=dt_site; dt_site=0;
1059             tm_visit[cur_day-1]=tot_visit(sd_htab);
1060             del_hlist(sd_htab);
1061             cur_day = rec_day;
1062          }
1063 
1064          /* check for month change */
1065          if ( (cur_month != rec_month) || (cur_year != rec_year) )
1066          {
1067             /* if yes, do monthly stuff */
1068             t_visit=tot_visit(sm_htab);
1069             month_update_exit(req_tstamp);    /* process exit pages      */
1070             update_history();
1071             write_month_html();               /* generate HTML for month */
1072             clear_month();
1073             cur_month = rec_month;            /* update our flags        */
1074             cur_year  = rec_year;
1075             f_day=l_day=rec_day;
1076          }
1077 
1078          /* save hostname for later */
1079          strncpy(host_buf, log_rec.hostname, sizeof(log_rec.hostname));
1080 
1081 #ifdef USE_DNS
1082          /* Resolve IP address if needed */
1083          if (dns_db)
1084          {
1085             struct addrinfo hints, *ares;
1086             memset(&hints, 0, sizeof(hints));
1087             hints.ai_family = AF_UNSPEC;
1088             hints.ai_socktype = SOCK_STREAM;
1089             hints.ai_flags = AI_NUMERICHOST;
1090             if (0 == getaddrinfo(log_rec.hostname, "0", &hints, &ares))
1091             {
1092                freeaddrinfo(ares);
1093                resolve_dns(&log_rec);
1094 
1095 #ifdef USE_IP_AS_HOSTNAME
1096                /* restore the host's IP-address if the host's name has not been resolved */
1097                if (!strcmp(log_rec.hostname,"\020\002"))
1098                        strncpy(log_rec.hostname, host_buf, sizeof(host_buf));
1099 #endif
1100 
1101             }
1102          }
1103 #endif
1104          /* lowercase hostname and validity check */
1105          cp1 = log_rec.hostname; i=0;
1106 
1107          if ( (!isalnum((unsigned char)*cp1)) && (*cp1!=':') )
1108             strncpy(log_rec.hostname, "Invalid", 8);
1109          else
1110          {
1111             while (*cp1 != '\0')  /* loop through string */
1112             {
1113                if ( (*cp1>='A') && (*cp1<='Z') )
1114                   { *cp1++ += 'a'-'A'; continue; }
1115                if ( *cp1=='.' ) i++;
1116                if ( (isalnum((unsigned char)*cp1)) ||
1117                     (*cp1=='.')||(*cp1=='-')       ||
1118                     (*cp1==':')||((*cp1=='_')&&(i==0)) ) cp1++;
1119                else
1120                {
1121                   /* Invalid hostname found! */
1122                   if (strcmp(log_rec.hostname, host_buf))
1123                      strcpy(log_rec.hostname, host_buf);
1124                   else strncpy(log_rec.hostname,"Invalid",8);
1125                   break;
1126                }
1127             }
1128             if (*cp1 == '\0')   /* did we make it to the end? */
1129             {
1130                if (!isalnum((unsigned char)*(cp1-1)))
1131                   strncpy(log_rec.hostname,"Invalid",8);
1132             }
1133          }
1134 
1135          /* Catch blank hostnames here */
1136          if (log_rec.hostname[0]=='\0')
1137             strncpy(log_rec.hostname,"Unknown",8);
1138 
1139          /* Ignore/Include check */
1140          if ( (isinlist(include_sites,log_rec.hostname)==NULL) &&
1141               (isinlist(include_urls,log_rec.url)==NULL)       &&
1142               (isinlist(include_refs,log_rec.refer)==NULL)     &&
1143               (isinlist(include_agents,log_rec.agent)==NULL)   &&
1144               (isinlist(include_users,log_rec.ident)==NULL)    )
1145          {
1146             if (isinlist(ignored_sites,log_rec.hostname)!=NULL)
1147               { total_ignore++; continue; }
1148             if (isinlist(ignored_urls,log_rec.url)!=NULL)
1149               { total_ignore++; continue; }
1150             if (isinlist(ignored_agents,log_rec.agent)!=NULL)
1151               { total_ignore++; continue; }
1152             if (isinlist(ignored_refs,log_rec.refer)!=NULL)
1153               { total_ignore++; continue; }
1154             if (isinlist(ignored_users,log_rec.ident)!=NULL)
1155               { total_ignore++; continue; }
1156          }
1157 
1158          /* Bump response code totals */
1159          switch (log_rec.resp_code) {
1160           case RC_CONTINUE:         i=IDX_CONTINUE;         break;
1161           case RC_SWITCHPROTO:      i=IDX_SWITCHPROTO;      break;
1162           case RC_OK:               i=IDX_OK;               break;
1163           case RC_CREATED:          i=IDX_CREATED;          break;
1164           case RC_ACCEPTED:         i=IDX_ACCEPTED;         break;
1165           case RC_NONAUTHINFO:      i=IDX_NONAUTHINFO;      break;
1166           case RC_NOCONTENT:        i=IDX_NOCONTENT;        break;
1167           case RC_RESETCONTENT:     i=IDX_RESETCONTENT;     break;
1168           case RC_PARTIALCONTENT:   i=IDX_PARTIALCONTENT;   break;
1169           case RC_MULTIPLECHOICES:  i=IDX_MULTIPLECHOICES;  break;
1170           case RC_MOVEDPERM:        i=IDX_MOVEDPERM;        break;
1171           case RC_MOVEDTEMP:        i=IDX_MOVEDTEMP;        break;
1172           case RC_SEEOTHER:         i=IDX_SEEOTHER;         break;
1173           case RC_NOMOD:            i=IDX_NOMOD;            break;
1174           case RC_USEPROXY:         i=IDX_USEPROXY;         break;
1175  	  case RC_MOVEDTEMPORARILY: i=IDX_MOVEDTEMPORARILY; break;
1176           case RC_BAD:              i=IDX_BAD;              break;
1177           case RC_UNAUTH:           i=IDX_UNAUTH;           break;
1178           case RC_PAYMENTREQ:       i=IDX_PAYMENTREQ;       break;
1179           case RC_FORBIDDEN:        i=IDX_FORBIDDEN;        break;
1180           case RC_NOTFOUND:         i=IDX_NOTFOUND;         break;
1181           case RC_METHODNOTALLOWED: i=IDX_METHODNOTALLOWED; break;
1182           case RC_NOTACCEPTABLE:    i=IDX_NOTACCEPTABLE;    break;
1183           case RC_PROXYAUTHREQ:     i=IDX_PROXYAUTHREQ;     break;
1184           case RC_TIMEOUT:          i=IDX_TIMEOUT;          break;
1185           case RC_CONFLICT:         i=IDX_CONFLICT;         break;
1186           case RC_GONE:             i=IDX_GONE;             break;
1187           case RC_LENGTHREQ:        i=IDX_LENGTHREQ;        break;
1188           case RC_PREFAILED:        i=IDX_PREFAILED;        break;
1189           case RC_REQENTTOOLARGE:   i=IDX_REQENTTOOLARGE;   break;
1190           case RC_REQURITOOLARGE:   i=IDX_REQURITOOLARGE;   break;
1191           case RC_UNSUPMEDIATYPE:   i=IDX_UNSUPMEDIATYPE;   break;
1192 	  case RC_RNGNOTSATISFIABLE:i=IDX_RNGNOTSATISFIABLE;break;
1193 	  case RC_EXPECTATIONFAILED:i=IDX_EXPECTATIONFAILED;break;
1194           case RC_SERVERERR:        i=IDX_SERVERERR;        break;
1195           case RC_NOTIMPLEMENTED:   i=IDX_NOTIMPLEMENTED;   break;
1196           case RC_BADGATEWAY:       i=IDX_BADGATEWAY;       break;
1197           case RC_UNAVAIL:          i=IDX_UNAVAIL;          break;
1198           case RC_GATEWAYTIMEOUT:   i=IDX_GATEWAYTIMEOUT;   break;
1199           case RC_BADHTTPVER:       i=IDX_BADHTTPVER;       break;
1200           default:                  i=IDX_UNDEFINED;        break;
1201          }
1202          response[i].count++;
1203 
1204          /* now save in the various hash tables... */
1205          if (log_rec.resp_code==RC_OK || log_rec.resp_code==RC_PARTIALCONTENT)
1206             i=1; else i=0;
1207 
1208          /* URL/ident hash table (only if valid response code) */
1209          if ((log_rec.resp_code==RC_OK)||(log_rec.resp_code==RC_NOMOD)||
1210              (log_rec.resp_code==RC_PARTIALCONTENT))
1211          {
1212             /* URL hash table */
1213             if (put_unode(log_rec.url,OBJ_REG,(u_int64_t)1,
1214                 log_rec.xfer_size,&t_url,(u_int64_t)0,(u_int64_t)0,um_htab))
1215             {
1216                if (verbose)
1217                /* Error adding URL node, skipping ... */
1218                fprintf(stderr,"%s %s\n", msg_nomem_u, log_rec.url);
1219             }
1220 
1221             /* ident (username) hash table */
1222             if (put_inode(log_rec.ident,OBJ_REG,
1223                 1,(u_int64_t)i,log_rec.xfer_size,&t_user,
1224                 0,rec_tstamp,im_htab))
1225             {
1226                if (verbose)
1227                /* Error adding ident node, skipping .... */
1228                fprintf(stderr,"%s %s\n", msg_nomem_i, log_rec.ident);
1229             }
1230          }
1231 
1232          /* referrer hash table */
1233          if (ntop_refs)
1234          {
1235             if (log_rec.refer[0]!='\0')
1236              if (put_rnode(log_rec.refer,OBJ_REG,(u_int64_t)1,&t_ref,rm_htab))
1237              {
1238               if (verbose)
1239               fprintf(stderr,"%s %s\n", msg_nomem_r, log_rec.refer);
1240              }
1241          }
1242 
1243          /* hostname (site) hash table - daily */
1244          if (put_hnode(log_rec.hostname,OBJ_REG,
1245              1,(u_int64_t)i,log_rec.xfer_size,&dt_site,
1246              0,rec_tstamp,"",sd_htab))
1247          {
1248             if (verbose)
1249             /* Error adding host node (daily), skipping .... */
1250             fprintf(stderr,"%s %s\n",msg_nomem_dh, log_rec.hostname);
1251          }
1252 
1253          /* hostname (site) hash table - monthly */
1254          if (put_hnode(log_rec.hostname,OBJ_REG,
1255              1,(u_int64_t)i,log_rec.xfer_size,&t_site,
1256              0,rec_tstamp,"",sm_htab))
1257          {
1258             if (verbose)
1259             /* Error adding host node (monthly), skipping .... */
1260             fprintf(stderr,"%s %s\n", msg_nomem_mh, log_rec.hostname);
1261          }
1262 
1263          /* user agent hash table */
1264          if (ntop_agents)
1265          {
1266             if (log_rec.agent[0]!='\0')
1267              if (put_anode(log_rec.agent,OBJ_REG,(u_int64_t)1,&t_agent,am_htab))
1268              {
1269               if (verbose)
1270               fprintf(stderr,"%s %s\n", msg_nomem_a, log_rec.agent);
1271              }
1272          }
1273 
1274          /* bump monthly/daily/hourly totals        */
1275          t_hit++; ht_hit++;                         /* daily/hourly hits    */
1276          t_xfer += log_rec.xfer_size;               /* total xfer size      */
1277          tm_xfer[rec_day-1] += log_rec.xfer_size;   /* daily xfer total     */
1278          tm_hit[rec_day-1]++;                       /* daily hits total     */
1279          th_xfer[rec_hour] += log_rec.xfer_size;    /* hourly xfer total    */
1280          th_hit[rec_hour]++;                        /* hourly hits total    */
1281 
1282          /* if RC_OK, increase file counters */
1283          if (log_rec.resp_code == RC_OK)
1284          {
1285             t_file++;
1286             tm_file[rec_day-1]++;
1287             th_file[rec_hour]++;
1288          }
1289 
1290          /* Pages (pageview) calculation */
1291          if (ispage(log_rec.url))
1292          {
1293             t_page++;
1294             tm_page[rec_day-1]++;
1295             th_page[rec_hour]++;
1296 
1297             /* do search string stuff if needed     */
1298             if (ntop_search) srch_string(log_rec.srchstr);
1299          }
1300 
1301          /*********************************************/
1302          /* RECORD PROCESSED - DO GROUPS HERE         */
1303          /*********************************************/
1304 
1305          /* URL Grouping */
1306          if ( (cp1=isinglist(group_urls,log_rec.url))!=NULL)
1307          {
1308             if (put_unode(cp1,OBJ_GRP,(u_int64_t)1,log_rec.xfer_size,
1309                 &ul_bogus,(u_int64_t)0,(u_int64_t)0,um_htab))
1310             {
1311                if (verbose)
1312                /* Error adding URL node, skipping ... */
1313                fprintf(stderr,"%s %s\n", msg_nomem_u, cp1);
1314             }
1315          }
1316 
1317          /* Site Grouping */
1318          if ( (cp1=isinglist(group_sites,log_rec.hostname))!=NULL)
1319          {
1320             if (put_hnode(cp1,OBJ_GRP,1,
1321                           (u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1322                           log_rec.xfer_size,&ul_bogus,
1323                           0,rec_tstamp,"",sm_htab))
1324             {
1325                if (verbose)
1326                /* Error adding Site node, skipping ... */
1327                fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1);
1328             }
1329          }
1330          else
1331          {
1332             /* Domain Grouping */
1333             if (group_domains)
1334             {
1335                cp1 = get_domain(log_rec.hostname);
1336                if (cp1 != NULL)
1337                {
1338                   if (put_hnode(cp1,OBJ_GRP,1,
1339                       (u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1340                       log_rec.xfer_size,&ul_bogus,
1341                       0,rec_tstamp,"",sm_htab))
1342                   {
1343                      if (verbose)
1344                      /* Error adding Site node, skipping ... */
1345                      fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1);
1346                   }
1347                }
1348             }
1349          }
1350 
1351          /* Referrer Grouping */
1352          if ( (cp1=isinglist(group_refs,log_rec.refer))!=NULL)
1353          {
1354             if (put_rnode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,rm_htab))
1355             {
1356                if (verbose)
1357                /* Error adding Referrer node, skipping ... */
1358                fprintf(stderr,"%s %s\n", msg_nomem_r, cp1);
1359             }
1360          }
1361 
1362          /* User Agent Grouping */
1363          if ( (cp1=isinglist(group_agents,log_rec.agent))!=NULL)
1364          {
1365             if (put_anode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,am_htab))
1366             {
1367                if (verbose)
1368                /* Error adding User Agent node, skipping ... */
1369                fprintf(stderr,"%s %s\n", msg_nomem_a, cp1);
1370             }
1371          }
1372 
1373          /* Ident (username) Grouping */
1374          if ( (cp1=isinglist(group_users,log_rec.ident))!=NULL)
1375          {
1376             if (put_inode(cp1,OBJ_GRP,1,
1377                           (u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1378                           log_rec.xfer_size,&ul_bogus,
1379                           0,rec_tstamp,im_htab))
1380             {
1381                if (verbose)
1382                /* Error adding Username node, skipping ... */
1383                fprintf(stderr,"%s %s\n", msg_nomem_i, cp1);
1384             }
1385          }
1386       }
1387 
1388       /*********************************************/
1389       /* BAD RECORD                                */
1390       /*********************************************/
1391 
1392       else
1393       {
1394          /* If first record, check if stupid Netscape header stuff      */
1395          if ( (total_rec==1) && (strncmp(buffer,"format=",7)==0) )
1396          {
1397             /* Skipping Netscape header record */
1398             if (verbose>1) printf("%s\n",msg_ign_nscp);
1399             /* count it as ignored... */
1400             total_ignore++;
1401          }
1402          else
1403          {
1404             /* Check if it's a W3C header or IIS Null-Character line */
1405             if ((buffer[0]=='\0') || (buffer[0]=='#'))
1406             {
1407                total_ignore++;
1408             }
1409             else
1410             {
1411                /* really bad record... */
1412                total_bad++;
1413                if (verbose)
1414                {
1415                   fprintf(stderr,"%s (%llu)",msg_bad_rec,total_rec);
1416                   if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf);
1417                   else fprintf(stderr,"\n");
1418                }
1419             }
1420          }
1421       }
1422    }
1423 
1424    /*********************************************/
1425    /* DONE READING LOG FILE - final processing  */
1426    /*********************************************/
1427 
1428    /* close log file if needed */
1429 #ifdef USE_BZIP
1430    if (gz_log) (gz_log==COMP_BZIP)?BZ2_bzclose(zlog_fp):gzclose(zlog_fp);
1431 #else
1432    if (gz_log) gzclose(zlog_fp);
1433 #endif
1434    else if (log_fname) fclose(log_fp);
1435 
1436    if (good_rec)                             /* were any good records?   */
1437    {
1438       tm_site[cur_day-1]=dt_site;            /* If yes, clean up a bit   */
1439       tm_visit[cur_day-1]=tot_visit(sd_htab);
1440       t_visit=tot_visit(sm_htab);
1441       if (ht_hit > mh_hit) mh_hit = ht_hit;
1442 
1443       if (total_rec > (total_ignore+total_bad)) /* did we process any?   */
1444       {
1445          if (incremental)
1446          {
1447             if (save_state())                /* incremental stuff        */
1448             {
1449                /* Error: Unable to save current run data */
1450                if (verbose) fprintf(stderr,"%s\n",msg_data_err);
1451                unlink(state_fname);
1452             }
1453          }
1454          month_update_exit(rec_tstamp);      /* calculate exit pages     */
1455          update_history();
1456          write_month_html();                 /* write monthly HTML file  */
1457          put_history();                      /* write history            */
1458       }
1459       if (hist[0].month!=0) write_main_index(); /* write main HTML file  */
1460 
1461       /* get processing end time */
1462       end_time = time(NULL);
1463 
1464       /* display end of processing statistics */
1465       if (time_me || (verbose>1))
1466       {
1467          printf("%llu %s ",total_rec, msg_records);
1468          if (total_ignore)
1469          {
1470             printf("(%llu %s",total_ignore,msg_ignored);
1471             if (total_bad) printf(", %llu %s) ",total_bad,msg_bad);
1472                else        printf(") ");
1473          }
1474          else if (total_bad) printf("(%llu %s) ",total_bad,msg_bad);
1475 
1476          /* totoal processing time in seconds */
1477          temp_time = difftime(end_time, start_time);
1478          if (temp_time==0) temp_time=1;
1479          printf("%s %.0f %s", msg_in, temp_time, msg_seconds);
1480 
1481          /* calculate records per second */
1482          if (temp_time)
1483            i=( (int)( (float)total_rec/temp_time ) );
1484          else i=0;
1485 
1486          if ( (i>0) && (i<=total_rec) ) printf(", %d/sec\n", i);
1487             else  printf("\n");
1488       }
1489 
1490 #ifdef USE_DNS
1491       /* Close DNS cache file */
1492       if (dns_db) close_cache();
1493       /* Close GeoDB database */
1494       if (geo_db) geodb_close(geo_db);
1495 #endif
1496 
1497 #ifdef USE_GEOIP
1498       /* Close GeoIP database */
1499       if (mmdb_open==MMDB_SUCCESS) {
1500          MMDB_close(&mmdb);
1501          mmdb_open=MMDB_FILE_OPEN_ERROR;
1502       }
1503 #endif
1504 
1505       /* Whew, all done! Exit with completion status (0) */
1506       exit(0);
1507    }
1508    else
1509    {
1510       /* No valid records found... exit with error (1) */
1511       if (verbose) printf("%s\n",msg_no_vrec);
1512       if (hist[0].month!=0) write_main_index(); /* write main HTML file     */
1513       exit(1);
1514    }
1515 }
1516 
1517 /*********************************************/
1518 /* GET_CONFIG - get configuration file info  */
1519 /*********************************************/
1520 
get_config(char * fname)1521 void get_config(char *fname)
1522 {
1523    char *kwords[]= { "Undefined",         /* 0 = undefined keyword       0  */
1524                      "OutputDir",         /* Output directory            1  */
1525                      "LogFile",           /* Log file to use for input   2  */
1526                      "ReportTitle",       /* Title for reports           3  */
1527                      "HostName",          /* Hostname to use             4  */
1528                      "IgnoreHist",        /* Ignore history file         5  */
1529                      "Quiet",             /* Run in quiet mode           6  */
1530                      "TimeMe",            /* Produce timing results      7  */
1531                      "Debug",             /* Produce debug information   8  */
1532                      "HourlyGraph",       /* Hourly stats graph          9  */
1533                      "HourlyStats",       /* Hourly stats table         10  */
1534                      "TopSites",          /* Top sites                  11  */
1535                      "TopURLs",           /* Top URLs                   12  */
1536                      "TopReferrers",      /* Top Referrers              13  */
1537                      "TopAgents",         /* Top User Agents            14  */
1538                      "TopCountries",      /* Top Countries              15  */
1539                      "HideSite",          /* Sites to hide              16  */
1540                      "HideURL",           /* URLs to hide               17  */
1541                      "HideReferrer",      /* Referrers to hide          18  */
1542                      "HideAgent",         /* User Agents to hide        19  */
1543                      "IndexAlias",        /* Aliases for index.html     20  */
1544                      "HTMLHead",          /* HTML Top1 code             21  */
1545                      "HTMLPost",          /* HTML Top2 code             22  */
1546                      "HTMLTail",          /* HTML Tail code             23  */
1547                      "MangleAgents",      /* Mangle User Agents         24  */
1548                      "IgnoreSite",        /* Sites to ignore            25  */
1549                      "IgnoreURL",         /* Url's to ignore            26  */
1550                      "IgnoreReferrer",    /* Referrers to ignore        27  */
1551                      "IgnoreAgent",       /* User Agents to ignore      28  */
1552                      "ReallyQuiet",       /* Dont display ANY messages  29  */
1553                      "GMTTime",           /* Local or UTC time?         30  */
1554                      "GroupURL",          /* Group URLs                 31  */
1555                      "GroupSite",         /* Group Sites                32  */
1556                      "GroupReferrer",     /* Group Referrers            33  */
1557                      "GroupAgent",        /* Group Agents               34  */
1558                      "GroupShading",      /* Shade Grouped entries      35  */
1559                      "GroupHighlight",    /* BOLD Grouped entries       36  */
1560                      "Incremental",       /* Incremental runs           37  */
1561                      "IncrementalName",   /* Filename for state data    38  */
1562                      "HistoryName",       /* Filename for history data  39  */
1563                      "HTMLExtension",     /* HTML filename extension    40  */
1564                      "HTMLPre",           /* HTML code at beginning     41  */
1565                      "HTMLBody",          /* HTML body code             42  */
1566                      "HTMLEnd",           /* HTML code at end           43  */
1567                      "UseHTTPS",          /* Use https:// on URLs       44  */
1568                      "IncludeSite",       /* Sites to always include    45  */
1569                      "IncludeURL",        /* URLs to always include     46  */
1570                      "IncludeReferrer",   /* Referrers to include       47  */
1571                      "IncludeAgent",      /* User Agents to include     48  */
1572                      "PageType",          /* Page Type (pageview)       49  */
1573                      "VisitTimeout",      /* Visit timeout (seconds)    50  */
1574                      "GraphLegend",       /* Graph Legends (yes/no)     51  */
1575                      "GraphLines",        /* Graph Lines (0=none)       52  */
1576                      "FoldSeqErr",        /* Fold sequence errors       53  */
1577                      "CountryGraph",      /* Display ctry graph (0=no)  54  */
1578                      "TopKSites",         /* Top sites (by KBytes)      55  */
1579                      "TopKURLs",          /* Top URLs  (by KBytes)      56  */
1580                      "TopEntry",          /* Top Entry Pages            57  */
1581                      "TopExit",           /* Top Exit Pages             58  */
1582                      "TopSearch",         /* Top Search Strings         59  */
1583                      "LogType",           /* Log Type (clf/ftp/squid)   60  */
1584                      "SearchEngine",      /* SearchEngine strings       61  */
1585                      "GroupDomains",      /* Group domains (n=level)    62  */
1586                      "HideAllSites",      /* Hide ind. sites (0=no)     63  */
1587                      "AllSites",          /* List all sites?            64  */
1588                      "AllURLs",           /* List all URLs?             65  */
1589                      "AllReferrers",      /* List all Referrers?        66  */
1590                      "AllAgents",         /* List all User Agents?      67  */
1591                      "AllSearchStr",      /* List all Search Strings?   68  */
1592                      "AllUsers",          /* List all Users?            69  */
1593                      "TopUsers",          /* Top Usernames to show      70  */
1594                      "HideUser",          /* Usernames to hide          71  */
1595                      "IgnoreUser",        /* Usernames to ignore        72  */
1596                      "IncludeUser",       /* Usernames to include       73  */
1597                      "GroupUser",         /* Usernames to group         74  */
1598                      "DumpPath",          /* Path for dump files        75  */
1599                      "DumpExtension",     /* Dump filename extension    76  */
1600                      "DumpHeader",        /* Dump header as first rec?  77  */
1601                      "DumpSites",         /* Dump sites tab file        78  */
1602                      "DumpURLs",          /* Dump urls tab file         79  */
1603                      "DumpReferrers",     /* Dump referrers tab file    80  */
1604                      "DumpAgents",        /* Dump user agents tab file  81  */
1605                      "DumpUsers",         /* Dump usernames tab file    82  */
1606                      "DumpSearchStr",     /* Dump search str tab file   83  */
1607                      "DNSCache",          /* DNS Cache file name        84  */
1608                      "DNSChildren",       /* DNS Children (0=no DNS)    85  */
1609                      "DailyGraph",        /* Daily Graph (0=no)         86  */
1610                      "DailyStats",        /* Daily Stats (0=no)         87  */
1611                      "LinkReferrer",      /* Link referrer (0=no)       88  */
1612                      "PagePrefix",        /* PagePrefix - treat as page 89  */
1613                      "ColorHit",          /* Hit Color   (def=00805c)   90  */
1614                      "ColorFile",         /* File Color  (def=0040ff)   91  */
1615                      "ColorSite",         /* Site Color  (def=ff8000)   92  */
1616                      "ColorKbyte",        /* Kbyte Color (def=ff0000)   93  */
1617                      "ColorPage",         /* Page Color  (def=00e0ff)   94  */
1618                      "ColorVisit",        /* Visit Color (def=ffff00)   95  */
1619                      "ColorMisc",         /* Misc Color  (def=00e0ff)   96  */
1620                      "PieColor1",         /* Pie Color 1 (def=800080)   97  */
1621                      "PieColor2",         /* Pie Color 2 (def=80ffc0)   98  */
1622                      "PieColor3",         /* Pie Color 3 (def=ff00ff)   99  */
1623                      "PieColor4",         /* Pie Color 4 (def=ffc080)   100 */
1624                      "CacheIPs",          /* Cache IPs in DNS DB (0=no) 101 */
1625                      "CacheTTL",          /* DNS Cache entry TTL (days) 102 */
1626                      "GeoDB",             /* GeoDB lookups (0=no)       103 */
1627                      "GeoDBDatabase",     /* GeoDB database filename    104 */
1628                      "StripCGI",          /* Strip CGI in URLS (0=no)   105 */
1629                      "TrimSquidURL",      /* Trim squid URLs (0=none)   106 */
1630                      "OmitPage",          /* URLs not counted as pages  107 */
1631                      "HTAccess",          /* Write .httaccess files?    108 */
1632                      "IgnoreState",       /* Ignore state file (0=no)   109 */
1633                      "DefaultIndex",      /* Default index.* (1=yes)    110 */
1634                      "GeoIP",             /* Use GeoIP? (1=yes)         111 */
1635                      "GeoIPDatabase",     /* Database to use for GeoIP  112 */
1636                      "NormalizeURL",      /* Normalize CLF URLs (1=yes) 113 */
1637                      "IndexMonths",       /* # months for main page     114 */
1638                      "GraphMonths",       /* # months for yearly graph  115 */
1639                      "YearHeaders",       /* use year headers? (1=yes)  116 */
1640                      "YearTotals",        /* show year subtotals (0=no) 117 */
1641                      "CountryFlags",      /* show country flags? (0-no) 118 */
1642                      "FlagDir",           /* directory w/flag images    119 */
1643                      "SearchCaseI"        /* srch str case insensitive  120 */
1644                    };
1645 
1646    FILE *fp;
1647 
1648    char buffer[BUFSIZE];
1649    char keyword[MAXKWORD];
1650    char value[MAXKVAL];
1651    char *cp1, *cp2;
1652    int  i,key,count;
1653    int	num_kwords=sizeof(kwords)/sizeof(char *);
1654 
1655    if ( (fp=fopen(fname,"r")) == NULL)
1656    {
1657       if (verbose)
1658       fprintf(stderr,"%s %s\n",msg_bad_conf,fname);
1659       return;
1660    }
1661 
1662    while ( (fgets(buffer,BUFSIZE,fp)) != NULL)
1663    {
1664       /* skip comments and blank lines */
1665       if ( (buffer[0]=='#') || isspace((unsigned char)buffer[0]) ) continue;
1666 
1667       /* Get keyword */
1668       cp1=buffer;cp2=keyword;count=MAXKWORD-1;
1669       while ( (isalnum((unsigned char)*cp1)) && count )
1670          { *cp2++ = *cp1++; count--; }
1671       *cp2='\0';
1672 
1673       /* Get value */
1674       cp2=value; count=MAXKVAL-1;
1675       while ((*cp1!='\n')&&(*cp1!='\0')&&(isspace((unsigned char)*cp1))) cp1++;
1676       while ((*cp1!='\n')&&(*cp1!='\0')&&count ) { *cp2++ = *cp1++; count--; }
1677       *cp2--='\0';
1678       while ((isspace((unsigned char)*cp2)) && (cp2 != value) ) *cp2--='\0';
1679 
1680       /* check if blank keyword/value */
1681       if ( (keyword[0]=='\0') || (value[0]=='\0') ) continue;
1682 
1683       key=0;
1684       for (i=0;i<num_kwords;i++)
1685          if (!ouricmp(keyword,kwords[i])) { key=i; break; }
1686 
1687       if (key==0) { printf("%s '%s' (%s)\n",       /* Invalid keyword       */
1688                     msg_bad_key,keyword,fname);
1689                     continue;
1690                   }
1691 
1692       switch (key)
1693       {
1694         case 1:  out_dir=save_opt(value);          break; /* OutputDir      */
1695         case 2:  log_fname=save_opt(value);        break; /* LogFile        */
1696         case 3:  msg_title=save_opt(value);        break; /* ReportTitle    */
1697         case 4:  hname=save_opt(value);            break; /* HostName       */
1698         case 5:  ignore_hist=
1699                     (tolower(value[0])=='y')?1:0;  break; /* IgnoreHist     */
1700         case 6:  verbose=
1701                     (tolower(value[0])=='y')?1:2;  break; /* Quiet          */
1702         case 7:  time_me=
1703                     (tolower(value[0])=='n')?0:1;  break; /* TimeMe         */
1704         case 8:  debug_mode=
1705                     (tolower(value[0])=='y')?1:0;  break; /* Debug          */
1706         case 9:  hourly_graph=
1707                     (tolower(value[0])=='n')?0:1;  break; /* HourlyGraph    */
1708         case 10: hourly_stats=
1709                     (tolower(value[0])=='n')?0:1;  break; /* HourlyStats    */
1710         case 11: ntop_sites = atoi(value);         break; /* TopSites       */
1711         case 12: ntop_urls = atoi(value);          break; /* TopURLs        */
1712         case 13: ntop_refs = atoi(value);          break; /* TopRefs        */
1713         case 14: ntop_agents = atoi(value);        break; /* TopAgents      */
1714         case 15: ntop_ctrys = atoi(value);         break; /* TopCountries   */
1715         case 16: add_nlist(value,&hidden_sites);   break; /* HideSite       */
1716         case 17: add_nlist(value,&hidden_urls);    break; /* HideURL        */
1717         case 18: add_nlist(value,&hidden_refs);    break; /* HideReferrer   */
1718         case 19: add_nlist(value,&hidden_agents);  break; /* HideAgent      */
1719         case 20: add_nlist(value,&index_alias);    break; /* IndexAlias     */
1720         case 21: add_nlist(value,&html_head);      break; /* HTMLHead       */
1721         case 22: add_nlist(value,&html_post);      break; /* HTMLPost       */
1722         case 23: add_nlist(value,&html_tail);      break; /* HTMLTail       */
1723         case 24: mangle_agent=atoi(value);         break; /* MangleAgents   */
1724         case 25: add_nlist(value,&ignored_sites);  break; /* IgnoreSite     */
1725         case 26: add_nlist(value,&ignored_urls);   break; /* IgnoreURL      */
1726         case 27: add_nlist(value,&ignored_refs);   break; /* IgnoreReferrer */
1727         case 28: add_nlist(value,&ignored_agents); break; /* IgnoreAgent    */
1728         case 29: if (tolower(value[0])=='y')
1729                     verbose=0;                     break; /* ReallyQuiet    */
1730         case 30: local_time=
1731                     (tolower(value[0])=='y')?0:1;  break; /* GMTTime        */
1732         case 31: add_glist(value,&group_urls);     break; /* GroupURL       */
1733         case 32: add_glist(value,&group_sites);    break; /* GroupSite      */
1734         case 33: add_glist(value,&group_refs);     break; /* GroupReferrer  */
1735         case 34: add_glist(value,&group_agents);   break; /* GroupAgent     */
1736         case 35: shade_groups=
1737                     (tolower(value[0])=='n')?0:1;  break; /* GroupShading   */
1738         case 36: hlite_groups=
1739                     (tolower(value[0])=='n')?0:1;  break; /* GroupHighlight */
1740         case 37: incremental=
1741                     (tolower(value[0])=='y')?1:0;  break; /* Incremental    */
1742         case 38: state_fname=save_opt(value);      break; /* State FName    */
1743         case 39: hist_fname=save_opt(value);       break; /* History FName  */
1744         case 40: html_ext=save_opt(value);         break; /* HTML extension */
1745         case 41: add_nlist(value,&html_pre);       break; /* HTML Pre code  */
1746         case 42: add_nlist(value,&html_body);      break; /* HTML Body code */
1747         case 43: add_nlist(value,&html_end);       break; /* HTML End code  */
1748         case 44: use_https=
1749                     (tolower(value[0])=='y')?1:0;  break; /* Use https://   */
1750         case 45: add_nlist(value,&include_sites);  break; /* IncludeSite    */
1751         case 46: add_nlist(value,&include_urls);   break; /* IncludeURL     */
1752         case 47: add_nlist(value,&include_refs);   break; /* IncludeReferrer*/
1753         case 48: add_nlist(value,&include_agents); break; /* IncludeAgent   */
1754         case 49: add_nlist(value,&page_type);      break; /* PageType       */
1755         case 50: visit_timeout=atoi(value);        break; /* VisitTimeout   */
1756         case 51: graph_legend=
1757                     (tolower(value[0])=='n')?0:1;  break; /* GraphLegend    */
1758         case 52: graph_lines = atoi(value);        break; /* GraphLines     */
1759         case 53: fold_seq_err=
1760                     (tolower(value[0])=='y')?1:0;  break; /* FoldSeqErr     */
1761         case 54: ctry_graph=
1762                     (tolower(value[0])=='n')?0:1;  break; /* CountryGraph   */
1763         case 55: ntop_sitesK = atoi(value);        break; /* TopKSites (KB) */
1764         case 56: ntop_urlsK  = atoi(value);        break; /* TopKUrls (KB)  */
1765         case 57: ntop_entry  = atoi(value);        break; /* Top Entry pgs  */
1766         case 58: ntop_exit   = atoi(value);        break; /* Top Exit pages */
1767         case 59: ntop_search = atoi(value);        break; /* Top Search pgs */
1768         case 60: log_type=(tolower(value[0])=='f')?
1769                  LOG_FTP:((tolower(value[0])=='s')?
1770                  LOG_SQUID:((tolower(value[0])=='w')?
1771                  LOG_W3C:LOG_CLF));                break; /* LogType        */
1772         case 61: add_glist(value,&search_list);    break; /* SearchEngine   */
1773         case 62: group_domains=atoi(value);        break; /* GroupDomains   */
1774         case 63: hide_sites=
1775                     (tolower(value[0])=='y')?1:0;  break; /* HideAllSites   */
1776         case 64: all_sites=
1777                     (tolower(value[0])=='y')?1:0;  break; /* All Sites?     */
1778         case 65: all_urls=
1779                     (tolower(value[0])=='y')?1:0;  break; /* All URLs?      */
1780         case 66: all_refs=
1781                     (tolower(value[0])=='y')?1:0;  break; /* All Refs       */
1782         case 67: all_agents=
1783                     (tolower(value[0])=='y')?1:0;  break; /* All Agents?    */
1784         case 68: all_search=
1785                     (tolower(value[0])=='y')?1:0;  break; /* All Srch str   */
1786         case 69: all_users=
1787                     (tolower(value[0])=='y')?1:0;  break; /* All Users?     */
1788         case 70: ntop_users=atoi(value);           break; /* TopUsers       */
1789         case 71: add_nlist(value,&hidden_users);   break; /* HideUser       */
1790         case 72: add_nlist(value,&ignored_users);  break; /* IgnoreUser     */
1791         case 73: add_nlist(value,&include_users);  break; /* IncludeUser    */
1792         case 74: add_glist(value,&group_users);    break; /* GroupUser      */
1793         case 75: dump_path=save_opt(value);        break; /* DumpPath       */
1794         case 76: dump_ext=save_opt(value);         break; /* Dumpfile ext   */
1795         case 77: dump_header=
1796                     (tolower(value[0])=='y')?1:0;  break; /* DumpHeader?    */
1797         case 78: dump_sites=
1798                     (tolower(value[0])=='y')?1:0;  break; /* DumpSites?     */
1799         case 79: dump_urls=
1800                     (tolower(value[0])=='y')?1:0;  break; /* DumpURLs?      */
1801         case 80: dump_refs=
1802                     (tolower(value[0])=='y')?1:0;  break; /* DumpReferrers? */
1803         case 81: dump_agents=
1804                     (tolower(value[0])=='y')?1:0;  break; /* DumpAgents?    */
1805         case 82: dump_users=
1806                     (tolower(value[0])=='y')?1:0;  break; /* DumpUsers?     */
1807         case 83: dump_search=
1808                     (tolower(value[0])=='y')?1:0;  break; /* DumpSrchStrs?  */
1809 #ifdef USE_DNS
1810         case 84: dns_cache=save_opt(value);        break; /* DNSCache fname */
1811         case 85: dns_children=atoi(value);         break; /* DNSChildren    */
1812 #else
1813         case 84: /* Disable DNSCache and DNSChildren if DNS is not enabled  */
1814         case 85: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1815 #endif  /* USE_DNS */
1816         case 86: daily_graph=
1817                     (tolower(value[0])=='n')?0:1;  break; /* HourlyGraph    */
1818         case 87: daily_stats=
1819                     (tolower(value[0])=='n')?0:1;  break; /* HourlyStats    */
1820         case 88: link_referrer=
1821                     (tolower(value[0])=='y')?1:0;  break; /* LinkReferrer   */
1822         case 89: add_nlist(value,&page_prefix);    break; /* PagePrefix     */
1823         case 90: strncpy(hit_color+1,  value, 6);  break; /* ColorHit       */
1824         case 91: strncpy(file_color+1, value, 6);  break; /* ColorFile      */
1825         case 92: strncpy(site_color+1, value, 6);  break; /* ColorSite      */
1826         case 93: strncpy(kbyte_color+1,value, 6);  break; /* ColorKbyte     */
1827         case 94: strncpy(page_color+1, value, 6);  break; /* ColorPage      */
1828         case 95: strncpy(visit_color+1,value, 6);  break; /* ColorVisit     */
1829         case 96: strncpy(misc_color+1, value, 6);  break; /* ColorMisc      */
1830         case 97: strncpy(pie_color1+1, value, 6);  break; /* PieColor1      */
1831         case 98: strncpy(pie_color2+1, value, 6);  break; /* PieColor2      */
1832         case 99: strncpy(pie_color3+1, value, 6);  break; /* PieColor3      */
1833         case 100:strncpy(pie_color4+1, value, 6);  break; /* PieColor4      */
1834 #ifdef USE_DNS
1835         case 101: cache_ips=
1836                     (tolower(value[0])=='y')?1:0;  break; /* CacheIPs       */
1837         case 102: cache_ttl=atoi(value);           break; /* CacheTTL days  */
1838         case 103: geodb=
1839                     (tolower(value[0])=='y')?1:0;  break; /* GeoDB          */
1840         case 104: geodb_fname=save_opt(value);     break; /* GeoDBDatabase  */
1841 #else
1842         case 101: /* Disable CacheIPs/CacheTTL/GeoDB/GeoDBDatabase if none  */
1843         case 102:
1844         case 103:
1845         case 104: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1846 #endif  /* USE_DNS */
1847         case 105: stripcgi=
1848                     (tolower(value[0])=='n')?0:1;  break; /* StripCGI       */
1849         case 106: trimsquid=atoi(value);           break; /* TrimSquidURL   */
1850         case 107: add_nlist(value,&omit_page);     break; /* OmitPage       */
1851         case 108: htaccess=
1852                     (tolower(value[0])=='y')?1:0;  break; /* HTAccess       */
1853         case 109: ignore_state=
1854                     (tolower(value[0])=='y')?1:0;  break; /* IgnoreState    */
1855         case 110: default_index=
1856                     (tolower(value[0])=='n')?0:1;  break; /* DefaultIndex   */
1857 #ifdef USE_GEOIP
1858         case 111: geoip=
1859                     (tolower(value[0])=='y')?1:0;  break; /* GeoIP          */
1860         case 112: geoip_db=save_opt(value);        break; /* GeoIPDatabase  */
1861 #else
1862         case 111: /* Disable GeoIP and GeoIPDatabase if not enabled         */
1863         case 112: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1864 #endif
1865         case 113: normalize=
1866                     (tolower(value[0])=='n')?0:1;  break; /* NormalizeURL   */
1867         case 114: index_mths=atoi(value);          break; /* IndexMonths    */
1868         case 115: graph_mths=atoi(value);          break; /* GraphMonths    */
1869         case 116: year_hdrs=
1870                     (tolower(value[0])=='n')?0:1;  break; /* YearHeaders    */
1871         case 117: year_totals=
1872                     (tolower(value[0])=='n')?0:1;  break; /* YearTotals     */
1873         case 118: use_flags=
1874                     (tolower(value[0])=='y')?1:0;  break; /* CountryFlags   */
1875         case 119: use_flags=1; flag_dir=save_opt(value); break; /* FlagDir  */
1876         case 120: searchcasei=
1877                     (tolower(value[0])=='n')?0:1;  break; /* SearchCaseI    */
1878       }
1879    }
1880    fclose(fp);
1881 }
1882 
1883 /*********************************************/
1884 /* SAVE_OPT - save option from config file   */
1885 /*********************************************/
1886 
save_opt(char * str)1887 static char *save_opt(char *str)
1888 {
1889    char *cp1;
1890 
1891    if ( (cp1=malloc(strlen(str)+1))==NULL) return NULL;
1892 
1893    strcpy(cp1,str);
1894    return cp1;
1895 }
1896 
1897 /*********************************************/
1898 /* CLEAR_MONTH - initalize monthly stuff     */
1899 /*********************************************/
1900 
clear_month()1901 void clear_month()
1902 {
1903    int i;
1904 
1905    init_counters();                  /* reset monthly counters  */
1906    del_htabs();                      /* clear hash tables       */
1907    if (ntop_ctrys!=0 ) for (i=0;i<ntop_ctrys;i++)  top_ctrys[i]=NULL;
1908 }
1909 
1910 /*********************************************/
1911 /* INIT_COUNTERS - prep counters for use     */
1912 /*********************************************/
1913 
init_counters()1914 void init_counters()
1915 {
1916    int i;
1917    for (i=0;i<TOTAL_RC;i++) response[i].count = 0;
1918    for (i=0;i<31;i++)  /* monthly totals      */
1919    {
1920     tm_xfer[i]=0.0;
1921     tm_hit[i]=tm_file[i]=tm_site[i]=tm_page[i]=tm_visit[i]=0;
1922    }
1923    for (i=0;i<24;i++)  /* hourly totals       */
1924    {
1925       th_hit[i]=th_file[i]=th_page[i]=0;
1926       th_xfer[i]=0.0;
1927    }
1928    for (i=0;ctry[i].desc;i++) /* country totals */
1929    {
1930       ctry[i].count=0;
1931       ctry[i].files=0;
1932       ctry[i].xfer=0;
1933    }
1934    t_hit=t_file=t_site=t_url=t_ref=t_agent=t_page=t_visit=t_user=0;
1935    t_xfer=0.0;
1936    mh_hit = dt_site = 0;
1937    f_day=l_day=1;
1938 }
1939 
1940 /*********************************************/
1941 /* PRINT_OPTS - print command line options   */
1942 /*********************************************/
1943 
print_opts(char * pname)1944 void print_opts(char *pname)
1945 {
1946    int i;
1947 
1948    printf("%s: %s %s\n",h_usage1,pname,h_usage2);
1949    for (i=0;h_msg[i];i++) printf("%s\n",h_msg[i]);
1950    exit(1);
1951 }
1952 
1953 /*********************************************/
1954 /* PRINT_VERSION                             */
1955 /*********************************************/
1956 
print_version()1957 void print_version()
1958 {
1959    char buf[128]="";
1960    uname(&system_info);
1961 
1962    printf("Webalizer V%s-%s (%s %s %s) %s\n%s\n",
1963       version,editlvl,
1964       system_info.sysname,system_info.release,system_info.machine,
1965       language,copyright);
1966 
1967 #ifdef USE_DNS
1968    strncpy(&buf[strlen(buf)],"DNS/GeoDB ",11);
1969 #endif
1970 #ifdef USE_BZIP
1971    strncpy(&buf[strlen(buf)],"BZip2 ",7);
1972 #endif
1973 #ifdef USE_GEOIP
1974    strncpy(&buf[strlen(buf)],"GeoIP ",7);
1975 #endif
1976 
1977    if (debug_mode)
1978    {
1979       printf("Mod date: %s  Options: ",moddate);
1980       if (buf[0]!=0) printf("%s",buf);
1981       else           printf("none");
1982       printf("\n");
1983 #if USE_DNS
1984       printf("Default GeoDB dir : %s\n",GEODB_LOC);
1985 #endif
1986       printf("Default config dir: %s\n",ETCDIR);
1987       printf("\n");
1988    }
1989    else printf("\n");
1990    exit(1);
1991 }
1992 
1993 /*********************************************/
1994 /* CUR_TIME - return date/time as a string   */
1995 /*********************************************/
1996 
cur_time()1997 char *cur_time()
1998 {
1999    time_t     now;
2000    static     char timestamp[48];
2001 
2002    /* get system time */
2003    now = time(NULL);
2004    /* convert to timestamp string */
2005    if (local_time)
2006       strftime(timestamp,sizeof(timestamp),"%d-%b-%Y %H:%M %Z",
2007             localtime(&now));
2008    else
2009       strftime(timestamp,sizeof(timestamp),"%d-%b-%Y %H:%M GMT",
2010             gmtime(&now));
2011 
2012    return timestamp;
2013 }
2014 
2015 /*********************************************/
2016 /* ISPAGE - determine if an HTML page or not */
2017 /*********************************************/
2018 
ispage(char * str)2019 int ispage(char *str)
2020 {
2021    NLISTPTR t;
2022    char *cp1, *cp2;
2023 
2024    if (isinlist(omit_page,str)!=NULL) return 0;
2025 
2026    cp1=cp2=str;
2027    while (*cp1!='\0') { if (*cp1=='.') cp2=cp1; cp1++; }
2028    if ((cp2++==str)||(*(--cp1)=='/')) return 1;
2029    t=page_prefix;
2030    while(t!=NULL)
2031    {
2032       /* Check if a PagePrefix matches */
2033       if(strncmp(str,t->string,strlen(t->string))==0) return 1;
2034       t=t->next;
2035    }
2036    return (isinlist(page_type,cp2)!=NULL);
2037 }
2038 
2039 /*********************************************/
2040 /* ISURLCHAR - checks for valid URL chars    */
2041 /*********************************************/
2042 
isurlchar(unsigned char ch,int flag)2043 int isurlchar(unsigned char ch, int flag)
2044 {
2045    if (isalnum(ch)) return 1;                /* allow letters, numbers...    */
2046    if (ch > 127)    return 1;                /* allow extended chars...      */
2047    if (flag)                                 /* and filter some others       */
2048       return (strchr(":/\\.,' *!-+_@~()[]!",ch)!=NULL);    /* strip cgi vars */
2049    else
2050       return (strchr(":/\\.,' *!-+_@~()[]!;?&=",ch)!=NULL); /* keep cgi vars */
2051 }
2052 
2053 /*********************************************/
2054 /* CTRY_IDX - create unique # from TLD       */
2055 /*********************************************/
2056 
ctry_idx(char * str)2057 u_int64_t ctry_idx(char *str)
2058 {
2059    int       i=strlen(str),j=0;
2060    u_int64_t idx=0;
2061    char      *cp=str+i;
2062 
2063    for (;i>0;i--) { idx+=((*--cp-'a'+1)<<j); j+=(j==0)?7:5; }
2064    return idx;
2065 }
2066 
2067 /*********************************************/
2068 /* UN_IDX - get TLD from index #             */
2069 /*********************************************/
2070 
un_idx(u_int64_t idx)2071 char *un_idx(u_int64_t idx)
2072 {
2073    int    i,j;
2074    char   *cp;
2075    static char buf[8];
2076 
2077    memset(buf, 0, sizeof(buf));
2078    if (idx<=0) return buf;
2079    if ((j=(idx&0x7f))>32) /* only for a1, a2 and o1 */
2080       { buf[0]=(idx>>7)+'a'; buf[1]=j-32; return buf; }
2081 
2082    for (i=5;i>=0;i--)
2083       buf[i]=(i==5)?(idx&0x7f)+'a'-1:(j=(idx>>(((5-i)*5)+2))&0x1f)?j+'a'-1:' ';
2084    cp=buf; while (*cp==' ') { for (i=0;i<6;i++) buf[i]=buf[i+1]; } return buf;
2085 }
2086 
2087 /*********************************************/
2088 /* FROM_HEX - convert hex char to decimal    */
2089 /*********************************************/
2090 
from_hex(char c)2091 char from_hex(char c)                           /* convert hex to dec      */
2092 {
2093    c = (c>='0'&&c<='9')?c-'0':                  /* 0-9?                    */
2094        (c>='A'&&c<='F')?c-'A'+10:               /* A-F?                    */
2095        c - 'a' + 10;                            /* lowercase...            */
2096    return (c<0||c>15)?0:c;                      /* return 0 if bad...      */
2097 }
2098 
2099 /*********************************************/
2100 /* UNESCAPE - convert escape seqs to chars   */
2101 /*********************************************/
2102 
unescape(char * str)2103 char *unescape(char *str)
2104 {
2105    unsigned char *cp1=(unsigned char *)str;     /* force unsigned so we    */
2106    unsigned char *cp2=cp1;                      /* can do > 127            */
2107 
2108    if (!str) return NULL;                       /* make sure strings valid */
2109 
2110    while (*cp1)
2111    {
2112       if (*cp1=='%')                            /* Found an escape?        */
2113       {
2114          cp1++;
2115          if (isxdigit(*cp1))                    /* ensure a hex digit      */
2116          {
2117             if (*cp1) *cp2=from_hex(*cp1++)*16; /* convert hex to an ASCII */
2118             if (*cp1) *cp2+=from_hex(*cp1);     /* (hopefully) character   */
2119             if ((*cp2<32)||(*cp2==127)) *cp2='_'; /* make '_' if its bad   */
2120             if (*cp1) { cp2++; cp1++; }
2121          }
2122          else *cp2++='%';
2123       }
2124       else *cp2++ = *cp1++;                     /* if not, just continue   */
2125    }
2126    *cp2=*cp1;                                   /* don't forget terminator */
2127    return str;                                  /* return the string       */
2128 }
2129 
2130 /*********************************************/
2131 /* OURICMP - Case insensitive string compare */
2132 /*********************************************/
2133 
ouricmp(char * str1,char * str2)2134 int ouricmp(char *str1, char *str2)
2135 {
2136    while((*str1!=0) &&
2137     (tolower((unsigned char)*str1)==tolower((unsigned char)*str2)))
2138     { str1++;str2++; }
2139    if (*str1==0) return 0; else return 1;
2140 }
2141 
2142 /*********************************************/
2143 /* SRCH_STRING - get search strings from ref */
2144 /*********************************************/
2145 
srch_string(char * ptr)2146 void srch_string(char *ptr)
2147 {
2148    /* ptr should point to unescaped query string */
2149    char tmpbuf[BUFSIZE];
2150    char srch[80]="";
2151    unsigned char *cp1, *cp2, *cps;
2152    int  sp_flg=0;
2153 
2154    /* Check if search engine referrer or return  */
2155    if ( (cps=(unsigned char *)isinglist(search_list,log_rec.refer))==NULL)
2156       return;
2157 
2158    /* Try to find query variable */
2159    srch[0]='?'; srch[sizeof(srch)-1] = '\0';
2160    strncpy(&srch[1],(char *)cps,sizeof(srch)-2);   /* First, try "?..."      */
2161    if ((cp1=(unsigned char *)strstr(ptr,srch))==NULL)
2162    {
2163       srch[0]='&';                                 /* Next, try "&..."       */
2164       if ((cp1=(unsigned char *)strstr(ptr,srch))==NULL) return;
2165    }
2166    cp2=(unsigned char *)tmpbuf;
2167    while (*cp1!='=' && *cp1!=0) cp1++; if (*cp1!=0) cp1++;
2168    while (*cp1!='&' && *cp1!=0)
2169    {
2170       if (*cp1=='"' || *cp1==',' || *cp1=='?')
2171           { cp1++; continue; }                         /* skip bad ones..    */
2172       else
2173       {
2174          if (*cp1=='+') *cp1=' ';                      /* change + to space  */
2175          if (sp_flg && *cp1==' ') { cp1++; continue; } /* compress spaces    */
2176          if (*cp1==' ') sp_flg=1; else sp_flg=0;       /* (flag spaces here) */
2177          if (searchcasei)
2178             *cp2++=tolower(*cp1++);                    /* normal character   */
2179          else *cp2++=*cp1++;
2180       }
2181    }
2182    *cp2=0; cp2=(unsigned char *)tmpbuf;
2183    if (tmpbuf[0]=='?') tmpbuf[0]=' ';                  /* format fix ?       */
2184    while( *cp2!=0 && isspace((unsigned char)*cp2) ) cp2++;     /* skip sps.  */
2185    if (*cp2==0) return;
2186 
2187    /* any trailing spaces? */
2188    cp1=cp2+strlen((char *)cp2)-1;
2189    while (cp1!=cp2) if (isspace((unsigned char)*cp1)) *cp1--='\0'; else break;
2190 
2191    /* strip invalid chars */
2192    cp1=cp2;
2193    while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; }
2194 
2195    if (put_snode((char *)cp2,(u_int64_t)1,sr_htab))
2196    {
2197       if (verbose)
2198       /* Error adding search string node, skipping .... */
2199       fprintf(stderr,"%s %s\n", msg_nomem_sc, tmpbuf);
2200    }
2201    return;
2202 }
2203 
2204 /*********************************************/
2205 /* GET_DOMAIN - Get domain portion of host   */
2206 /*********************************************/
2207 
get_domain(char * str)2208 char *get_domain(char *str)
2209 {
2210    char *cp;
2211    int  i=group_domains+1;
2212 
2213    if (isipaddr(str)) return NULL;
2214    cp = str+strlen(str)-1;
2215 
2216    while (cp!=str)
2217    {
2218       if (*cp=='.')
2219          if (!(--i)) return ++cp;
2220       cp--;
2221    }
2222    return cp;
2223 }
2224 
2225 /*********************************************/
2226 /* AGENT_MANGLE - Re-format user agent       */
2227 /*********************************************/
2228 
agent_mangle(char * str)2229 void agent_mangle(char *str)
2230 {
2231    char *cp1, *cp2, *cp3;
2232 
2233    str=cp2=log_rec.agent;
2234    cp1=strstr(str,"ompatible"); /* check known fakers */
2235    if (cp1!=NULL)
2236    {
2237       while (*cp1!=';'&&*cp1!='\0') cp1++;
2238       /* kludge for Mozilla/3.01 (compatible;) */
2239       if (*cp1++==';' && strcmp(cp1,")\""))  /* success! */
2240       {
2241          /* Opera can hide as MSIE */
2242          cp3=strstr(str,"Opera");
2243          if (cp3!=NULL)
2244          {
2245             while (*cp3!='.'&&*cp3!='\0')
2246             {
2247                if(*cp3=='/') *cp2++=' ';
2248                else *cp2++=*cp3;
2249                cp3++;
2250             }
2251             cp1=cp3;
2252          }
2253          else
2254          {
2255             while (*cp1 == ' ') cp1++; /* eat spaces */
2256             while (*cp1!='.'&&*cp1!='\0'&&*cp1!=';') *cp2++=*cp1++;
2257          }
2258          if (mangle_agent<5)
2259          {
2260             while (*cp1!='.'&&*cp1!=';'&&*cp1!='\0') *cp2++=*cp1++;
2261             if (*cp1!=';'&&*cp1!='\0') { *cp2++=*cp1++; *cp2++=*cp1++; }
2262          }
2263          if (mangle_agent<4)
2264             if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2265          if (mangle_agent<3)
2266             while (*cp1!=';'&&*cp1!='\0'&&*cp1!='('&&*cp1!=' ') *cp2++=*cp1++;
2267          if (mangle_agent<2)
2268          {
2269             /* Level 1 - try to get OS */
2270             cp1=strstr(cp1,")");
2271             if (cp1!=NULL)
2272             {
2273                *cp2++=' ';
2274                *cp2++='(';
2275                while (*cp1!=';'&&*cp1!='('&&cp1!=str) cp1--;
2276                if (cp1!=str&&*cp1!='\0') cp1++;
2277                while (*cp1==' '&&*cp1!='\0') cp1++;
2278                while (*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2279                *cp2++=')';
2280             }
2281          }
2282          *cp2='\0';
2283       }
2284       else
2285       {
2286          /* nothing after "compatible", should we mangle? */
2287          /* not for now */
2288       }
2289    }
2290    else
2291    {
2292       cp1=strstr(str,"Opera");  /* Opera flavor         */
2293       if (cp1!=NULL)
2294       {
2295          while (*cp1!='/'&&*cp1!=' '&&*cp1!='\0') *cp2++=*cp1++;
2296          while (*cp1!='.'&&*cp1!='\0')
2297          {
2298             if(*cp1=='/') *cp2++=' ';
2299             else *cp2++=*cp1;
2300             cp1++;
2301          }
2302          if (mangle_agent<5)
2303          {
2304             while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2305             *cp2++=*cp1++;
2306             *cp2++=*cp1++;
2307          }
2308          if (mangle_agent<4)
2309             if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2310          if (mangle_agent<3)
2311             while (*cp1!=' '&&*cp1!='\0'&&*cp1!='(') *cp2++=*cp1++;
2312          if (mangle_agent<2)
2313          {
2314             cp1=strstr(cp1,"(");
2315             if (cp1!=NULL)
2316             {
2317                cp1++;
2318                *cp2++=' ';
2319                *cp2++='(';
2320                while (*cp1!=';'&&*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2321                *cp2++=')';
2322             }
2323          }
2324          *cp2='\0';
2325       }
2326       else
2327       {
2328          cp1=strstr(str,"Mozilla");  /* Netscape flavor      */
2329          if (cp1!=NULL)
2330          {
2331             while (*cp1!='/'&&*cp1!=' '&&*cp1!='\0') *cp2++=*cp1++;
2332             if (*cp1==' ') *cp1='/';
2333             while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2334             if (mangle_agent<5)
2335             {
2336                while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2337                *cp2++=*cp1++;
2338                *cp2++=*cp1++;
2339             }
2340             if (mangle_agent<4)
2341                if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2342             if (mangle_agent<3)
2343                while (*cp1!=' '&&*cp1!='\0'&&*cp1!='(') *cp2++=*cp1++;
2344             if (mangle_agent<2)
2345             {
2346                /* Level 1 - Try to get OS */
2347                cp1=strstr(cp1,"(");
2348                if (cp1!=NULL)
2349                {
2350                   cp1++;
2351                   *cp2++=' ';
2352                   *cp2++='(';
2353                   while (*cp1!=';'&&*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2354                   *cp2++=')';
2355                }
2356             }
2357             *cp2='\0';
2358          }
2359       }
2360    }
2361 }
2362 
2363 /*********************************************/
2364 /* OUR_GZGETS - enhanced gzgets for log only */
2365 /*********************************************/
2366 
our_gzgets(void * fp,char * buf,int size)2367 char *our_gzgets(void *fp, char *buf, int size)
2368 {
2369    char *out_cp=buf;      /* point to output */
2370    while (1)
2371    {
2372       if (f_cp>(f_buf+f_end-1))     /* load? */
2373       {
2374 #ifdef USE_BZIP
2375          f_end=(gz_log==COMP_BZIP)?
2376             BZ2_bzread(fp, f_buf, GZ_BUFSIZE):
2377             gzread(fp, f_buf, GZ_BUFSIZE);
2378 #else
2379          f_end=gzread(fp, f_buf, GZ_BUFSIZE);
2380 #endif
2381          if (f_end<=0) return Z_NULL;
2382          f_cp=f_buf;
2383       }
2384 
2385       if (--size)                   /* more? */
2386       {
2387          *out_cp++ = *f_cp;
2388          if (*f_cp++ == '\n') { *out_cp='\0'; return buf; }
2389       }
2390       else { *out_cp='\0'; return buf; }
2391    }
2392 }
2393 
2394 #ifdef USE_BZIP
2395 /*********************************************/
2396 /* bz2_rewind - our 'rewind' for bz2 files   */
2397 /*********************************************/
2398 
bz2_rewind(void ** fp,char * fname,char * mode)2399 int bz2_rewind( void **fp, char *fname, char *mode )
2400 {
2401    BZ2_bzclose( *fp );
2402    *fp = BZ2_bzopen( fname, "rb");
2403    f_cp=f_buf+GZ_BUFSIZE; f_end=0;   /* reset buffer counters */
2404    memset(f_buf, 0, sizeof(f_buf));
2405    if (*fp == Z_NULL) return -1;
2406    else return 0;
2407 }
2408 #endif /* USE_BZIP */
2409 
2410 /*********************************************/
2411 /* ISIPADDR - Determine if str is IP address */
2412 /*********************************************/
2413 
isipaddr(char * str)2414 int isipaddr(char *str)
2415 {
2416    int  i=1,j=0;
2417    char *cp;   /* generic ptr  */
2418 
2419    if (strchr(str,':')!=NULL)
2420    {
2421       /* Possible IPv6 Address */
2422       cp=str;
2423       while (strchr(":.abcdef0123456789",*cp)!=NULL && *cp!='\0')
2424       {
2425          if (*cp=='.')   j++;
2426          if (*cp++==':') i++;
2427       }
2428 
2429       if (*cp!='\0') return -1;                   /* bad hostname (has ':') */
2430       if (i>1 && j) return 2;                     /* IPv4/IPv6    */
2431       return 3;                                   /* IPv6         */
2432    }
2433    else
2434    {
2435       /* Not an IPv6 address, check for IPv4 */
2436       cp=str;
2437       while (strchr(".0123456789",*cp)!=NULL && *cp!='\0')
2438       {
2439          if (*cp++=='.') i++;
2440       }
2441       if (*cp!='\0') return 0;                    /* hostname     */
2442       if (i!=4) return -1;                        /* bad hostname */
2443       return 1;                                   /* IPv4         */
2444    }
2445 }
2446 
2447 /*****************************************************************/
2448 /*                                                               */
2449 /* JDATE  - Julian date calculator                               */
2450 /*                                                               */
2451 /* Calculates the number of days since Jan 1, 0000.              */
2452 /*                                                               */
2453 /* Originally written by Bradford L. Barrett (03/17/1988)        */
2454 /* Returns an unsigned long value representing the number of     */
2455 /* days since January 1, 0000.                                   */
2456 /*                                                               */
2457 /* Note: Due to the changes made by Pope Gregory XIII in the     */
2458 /*       16th Centyry (Feb 24, 1582), dates before 1583 will     */
2459 /*       not return a truely accurate number (will be at least   */
2460 /*       10 days off).  Somehow, I don't think this will         */
2461 /*       present much of a problem for most situations :)        */
2462 /*                                                               */
2463 /* Usage: days = jdate(day, month, year)                         */
2464 /*                                                               */
2465 /* The number returned is adjusted by 5 to facilitate day of     */
2466 /* week calculations.  The mod of the returned value gives the   */
2467 /* day of the week the date is.  (ie: dow = days % 7 ) where     */
2468 /* dow will return 0=Sunday, 1=Monday, 2=Tuesday, etc...         */
2469 /*                                                               */
2470 /*****************************************************************/
2471 
jdate(int day,int month,int year)2472 u_int64_t jdate( int day, int month, int year )
2473 {
2474    u_int64_t days;                      /* value returned */
2475    int mtable[] = {0,31,59,90,120,151,181,212,243,273,304,334};
2476 
2477    /* First, calculate base number including leap and Centenial year stuff */
2478 
2479    days=(((u_int64_t)year*365)+day+mtable[month-1]+
2480            ((year+4)/4) - ((year/100)-(year/400)));
2481 
2482    /* now adjust for leap year before March 1st */
2483 
2484    if ((year % 4 == 0) && !((year % 100 == 0) &&
2485        (year % 400 != 0)) && (month < 3))
2486    --days;
2487 
2488    /* done, return with calculated value */
2489 
2490    return(days+5);
2491 }
2492