1 /*
2 webalizer - a web server log analysis program
3
4 Copyright (C) 1997-2013 Bradford L. Barrett
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version, and provided that the above
10 copyright and permission notice is included with all distributed
11 copies of this or derived software.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
21
22 */
23
24 /*********************************************/
25 /* STANDARD INCLUDES */
26 /*********************************************/
27
28 /* Fix broken Zlib 64 bitness */
29 #if _FILE_OFFSET_BITS == 64
30 #ifndef _LARGEFILE64_SOURCE
31 #define _LARGEFILE64_SOURCE 1
32 #endif
33 #endif
34
35 #include <time.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <errno.h>
40 #include <unistd.h> /* normal stuff */
41 #include <locale.h>
42 #include <ctype.h>
43 #include <sys/utsname.h>
44 #include <zlib.h>
45 #include <sys/stat.h>
46
47 /* ensure getopt */
48 #ifdef HAVE_GETOPT_H
49 #include <getopt.h>
50 #endif
51
52 /* ensure sys/types */
53 #ifndef _SYS_TYPES_H
54 #include <sys/types.h>
55 #endif
56
57 /* Need socket header? */
58 #ifdef HAVE_SYS_SOCKET_H
59 #include <sys/socket.h>
60 #endif
61
62 /* some systems need this */
63 #ifdef HAVE_MATH_H
64 #include <math.h>
65 #endif
66
67 #ifdef USE_DNS
68 #include <netdb.h>
69 #include <netinet/in.h>
70 #include <arpa/inet.h>
71 #include <db.h>
72 #endif /* USE_DNS */
73
74 #ifdef USE_GEOIP
75 #include <maxminddb.h>
76 #endif
77
78 #ifdef USE_BZIP
79 #include <bzlib.h>
80 int bz2_rewind(void **, char *, char *);
81 #endif
82
83 #include "webalizer.h" /* main header */
84 #include "output.h"
85 #include "parser.h"
86 #include "preserve.h"
87 #include "hashtab.h"
88 #include "linklist.h"
89 #include "webalizer_lang.h" /* lang. support */
90 #ifdef USE_DNS
91 #include "dns_resolv.h"
92 #endif
93
94 /* internal function prototypes */
95
96 void clear_month(); /* clear monthly stuff */
97 char *unescape(char *); /* unescape URLs */
98 void print_opts(char *); /* print options */
99 void print_version(); /* duhh... */
100 int isurlchar(unsigned char, int); /* valid URL char fnc. */
101 void get_config(char *); /* Read a config file */
102 static char *save_opt(char *); /* save conf option */
103 void srch_string(char *); /* srch str analysis */
104 char *get_domain(char *); /* return domain name */
105 void agent_mangle(char *); /* reformat user agent */
106 char *our_gzgets(void *, char *, int); /* our gzgets */
107 int ouricmp(char *, char *); /* case ins. compare */
108 int isipaddr(char *); /* is IP address test */
109
110 /*********************************************/
111 /* GLOBAL VARIABLES */
112 /*********************************************/
113
114 char *version = "2.23"; /* program version */
115 char *editlvl = "08"; /* edit level */
116 char *moddate = "26-Aug-2013"; /* modification date */
117 char *copyright = "Copyright 1997-2013 by Bradford L. Barrett";
118
119 int verbose = 2; /* 2=verbose,1=err, 0=none */
120 int debug_mode = 0; /* debug mode flag */
121 int time_me = 0; /* timing display flag */
122 int local_time = 1; /* 1=localtime 0=GMT (UTC) */
123 int hist_gap = 0; /* 1=error w/hist, save bkp */
124 int ignore_hist = 0; /* history flag (1=skip) */
125 int ignore_state = 0; /* state flag (1=skip) */
126 int default_index= 1; /* default index. (1=yes) */
127 int hourly_graph = 1; /* hourly graph display */
128 int hourly_stats = 1; /* hourly stats table */
129 int daily_graph = 1; /* daily graph display */
130 int daily_stats = 1; /* daily stats table */
131 int ctry_graph = 1; /* country graph display */
132 int shade_groups = 1; /* Group shading 0=no 1=yes */
133 int hlite_groups = 1; /* Group hlite 0=no 1=yes */
134 int mangle_agent = 0; /* mangle user agents */
135 int incremental = 0; /* incremental mode 1=yes */
136 int use_https = 0; /* use 'https://' on URLs */
137 int htaccess = 0; /* create .htaccess? (0=no) */
138 int stripcgi = 1; /* strip url cgi (0=no) */
139 int normalize = 1; /* normalize CLF URL (0=no) */
140 int trimsquid = 0; /* trim squid urls (0=no) */
141 int searchcasei = 1; /* case insensitive search */
142 int visit_timeout= 1800; /* visit timeout (seconds) */
143 int graph_legend = 1; /* graph legend (1=yes) */
144 int graph_lines = 2; /* graph lines (0=none) */
145 int fold_seq_err = 0; /* fold seq err (0=no) */
146 int log_type = LOG_CLF; /* log type (default=CLF) */
147 int group_domains= 0; /* Group domains 0=none */
148 int hide_sites = 0; /* Hide ind. sites (0=no) */
149 int link_referrer= 0; /* Link referrers (0=no) */
150 char *hname = NULL; /* hostname for reports */
151 char *state_fname = "webalizer.current"; /* run state file name */
152 char *hist_fname = "webalizer.hist"; /* name of history file */
153 char *html_ext = "html"; /* HTML file suffix */
154 char *dump_ext = "tab"; /* Dump file suffix */
155 char *conf_fname = NULL; /* name of config file */
156 char *log_fname = NULL; /* log file pointer */
157 char *out_dir = NULL; /* output directory */
158 char *blank_str = ""; /* blank string */
159 char *geodb_fname = NULL; /* GeoDB database filename */
160 char *dns_cache = NULL; /* DNS cache file name */
161 int dns_children = 0; /* DNS children (0=don't do)*/
162 int cache_ips = 0; /* CacheIPs in DB (0=no) */
163 int cache_ttl = 7; /* DNS Cache TTL (days) */
164 int geodb = 0; /* Use GeoDB (0=no) */
165 int graph_mths = 12; /* # months in index graph */
166 int index_mths = 12; /* # months in index table */
167 int year_hdrs = 1; /* index year seperators */
168 int year_totals = 1; /* index year subtotals */
169 int use_flags = 0; /* Show flags in ctry table */
170 char *flag_dir = "flags"; /* location of flag icons */
171
172 #ifdef USE_GEOIP
173 int geoip = 0; /* Use GeoIP (0=no) */
174 char *geoip_db = NULL; /* GeoIP database filename */
175 int mmdb_open = MMDB_FILE_OPEN_ERROR; /* GeoIP database open */
176 MMDB_s mmdb; /* GeoIP database handle */
177 #endif
178
179 int ntop_sites = 30; /* top n sites to display */
180 int ntop_sitesK = 10; /* top n sites (by kbytes) */
181 int ntop_urls = 30; /* top n url's to display */
182 int ntop_urlsK = 10; /* top n url's (by kbytes) */
183 int ntop_entry = 10; /* top n entry url's */
184 int ntop_exit = 10; /* top n exit url's */
185 int ntop_refs = 30; /* top n referrers "" */
186 int ntop_agents = 15; /* top n user agents "" */
187 int ntop_ctrys = 30; /* top n countries "" */
188 int ntop_search = 20; /* top n search strings */
189 int ntop_users = 20; /* top n users to display */
190
191 int all_sites = 0; /* List All sites (0=no) */
192 int all_urls = 0; /* List All URLs (0=no) */
193 int all_refs = 0; /* List All Referrers */
194 int all_agents = 0; /* List All User Agents */
195 int all_search = 0; /* List All Search Strings */
196 int all_users = 0; /* List All Usernames */
197
198 int dump_sites = 0; /* Dump tab delimited sites */
199 int dump_urls = 0; /* URLs */
200 int dump_refs = 0; /* Referrers */
201 int dump_agents = 0; /* User Agents */
202 int dump_users = 0; /* Usernames */
203 int dump_search = 0; /* Search strings */
204 int dump_header = 0; /* Dump header as first rec */
205 char *dump_path = NULL; /* Path for dump files */
206
207 int cur_year=0, cur_month=0, /* year/month/day/hour */
208 cur_day=0, cur_hour=0, /* tracking variables */
209 cur_min=0, cur_sec=0;
210
211 u_int64_t cur_tstamp=0; /* Timestamp... */
212 u_int64_t rec_tstamp=0;
213 u_int64_t req_tstamp=0;
214 u_int64_t epoch; /* used for timestamp adj. */
215
216 int check_dup=0; /* check for dup flag */
217 int gz_log=COMP_NONE; /* gziped log? (0=no) */
218
219 double t_xfer=0.0; /* monthly total xfer value */
220 u_int64_t t_hit=0,t_file=0,t_site=0, /* monthly total vars */
221 t_url=0,t_ref=0,t_agent=0,
222 t_page=0, t_visit=0, t_user=0;
223
224 double tm_xfer[31]; /* daily transfer totals */
225
226 u_int64_t tm_hit[31], tm_file[31], /* daily total arrays */
227 tm_site[31], tm_page[31],
228 tm_visit[31];
229
230 u_int64_t dt_site; /* daily 'sites' total */
231
232 u_int64_t ht_hit=0, mh_hit=0; /* hourly hits totals */
233
234 u_int64_t th_hit[24], th_file[24], /* hourly total arrays */
235 th_page[24];
236
237 double th_xfer[24];
238
239 int f_day,l_day; /* first/last day vars */
240
241 struct utsname system_info; /* system info structure */
242
243 u_int64_t ul_bogus =0; /* Dummy counter for groups */
244
245 struct log_struct log_rec; /* expanded log storage */
246
247 void *zlog_fp; /* compressed logfile ptr */
248 FILE *log_fp; /* regular logfile pointer */
249
250 char buffer[BUFSIZE]; /* log file record buffer */
251 char tmp_buf[BUFSIZE]; /* used to temp save above */
252
253 CLISTPTR *top_ctrys = NULL; /* Top countries table */
254
255 #define GZ_BUFSIZE 16384 /* our_getfs buffer size */
256 char f_buf[GZ_BUFSIZE]; /* our_getfs buffer */
257 char *f_cp=f_buf+GZ_BUFSIZE; /* pointer into the buffer */
258 int f_end=0; /* count to end of buffer */
259
260 char hit_color[] = "#00805c"; /* graph hit color */
261 char file_color[] = "#0040ff"; /* graph file color */
262 char site_color[] = "#ff8000"; /* graph site color */
263 char kbyte_color[] = "#ff0000"; /* graph kbyte color */
264 char page_color[] = "#00e0ff"; /* graph page color */
265 char visit_color[] = "#ffff00"; /* graph visit color */
266 char misc_color[] = "#00e0ff"; /* graph misc color */
267 char pie_color1[] = "#800080"; /* pie additionnal color 1 */
268 char pie_color2[] = "#80ffc0"; /* pie additionnal color 2 */
269 char pie_color3[] = "#ff00ff"; /* pie additionnal color 3 */
270 char pie_color4[] = "#ffc080"; /* pie additionnal color 4 */
271
272 /*********************************************/
273 /* MAIN - start here */
274 /*********************************************/
275
main(int argc,char * argv[])276 int main(int argc, char *argv[])
277 {
278 int i; /* generic counter */
279 char *cp1, *cp2, *cp3; /* generic char pointers */
280 char host_buf[MAXHOST+1]; /* used to save hostname */
281
282 NLISTPTR lptr; /* generic list pointer */
283
284 extern char *optarg; /* used for command line */
285 extern int optind; /* parsing routine 'getopt' */
286 extern int opterr;
287
288 time_t start_time, end_time; /* program timers */
289 float temp_time; /* temporary time storage */
290
291 int rec_year,rec_month=1,rec_day,rec_hour,rec_min,rec_sec;
292
293 int good_rec =0; /* 1 if we had a good record */
294 u_int64_t total_rec =0; /* Total Records Processed */
295 u_int64_t total_ignore=0; /* Total Records Ignored */
296 u_int64_t total_bad =0; /* Total Bad Records */
297
298 int max_ctry; /* max countries defined */
299
300 /* month names used for parsing logfile (shouldn't be lang specific) */
301 char *log_month[12]={ "jan", "feb", "mar",
302 "apr", "may", "jun",
303 "jul", "aug", "sep",
304 "oct", "nov", "dec"};
305
306 /* stat struct for files */
307 struct stat log_stat;
308
309 /* Assume that LC_CTYPE is what the user wants for non-ASCII chars */
310 setlocale(LC_CTYPE,"");
311
312 /* initalize epoch */
313 epoch=jdate(1,1,1970); /* used for timestamp adj. */
314
315 sprintf(tmp_buf,"%s/webalizer.conf",ETCDIR);
316 /* check for default config file */
317 if (!access("webalizer.conf",F_OK))
318 get_config("webalizer.conf");
319 else if (!access(tmp_buf,F_OK))
320 get_config(tmp_buf);
321
322 /* get command line options */
323 opterr = 0; /* disable parser errors */
324 while ((i=getopt(argc,argv,"a:A:bc:C:dD:e:E:fF:g:GhHiI:jJ:k:K:l:Lm:M:n:N:o:O:pP:qQr:R:s:S:t:Tu:U:vVwW:x:XYz:Z"))!=EOF)
325 {
326 switch (i)
327 {
328 case 'a': add_nlist(optarg,&hidden_agents); break; /* Hide agents */
329 case 'A': ntop_agents=atoi(optarg); break; /* Top agents */
330 case 'b': ignore_state=1; break; /* Ignore state file */
331 case 'c': get_config(optarg); break; /* Config file */
332 case 'C': ntop_ctrys=atoi(optarg); break; /* Top countries */
333 case 'd': debug_mode=1; break; /* Debug */
334 case 'D': dns_cache=optarg; break; /* DNS Cache filename */
335 case 'e': ntop_entry=atoi(optarg); break; /* Top entry pages */
336 case 'E': ntop_exit=atoi(optarg); break; /* Top exit pages */
337 case 'f': fold_seq_err=1; break; /* Fold sequence errs */
338 case 'F': log_type=(tolower(optarg[0])=='f')?
339 LOG_FTP:(tolower(optarg[0])=='s')?
340 LOG_SQUID:(tolower(optarg[0])=='w')?
341 LOG_W3C:LOG_CLF; break; /* define log type */
342 case 'g': group_domains=atoi(optarg); break; /* GroupDomains (0=no) */
343 case 'G': hourly_graph=0; break; /* no hourly graph */
344 case 'h': print_opts(argv[0]); break; /* help */
345 case 'H': hourly_stats=0; break; /* no hourly stats */
346 case 'i': ignore_hist=1; break; /* Ignore history */
347 case 'I': add_nlist(optarg,&index_alias); break; /* Index alias */
348 case 'j': geodb=1; break; /* Enable GeoDB */
349 case 'J': geodb_fname=optarg; break; /* GeoDB db filename */
350 case 'k': graph_mths=atoi(optarg); break; /* # months idx graph */
351 case 'K': index_mths=atoi(optarg); break; /* # months idx table */
352 case 'l': graph_lines=atoi(optarg); break; /* Graph Lines */
353 case 'L': graph_legend=0; break; /* Graph Legends */
354 case 'm': visit_timeout=atoi(optarg); break; /* Visit Timeout */
355 case 'M': mangle_agent=atoi(optarg); break; /* mangle user agents */
356 case 'n': hname=optarg; break; /* Hostname */
357 case 'N': dns_children=atoi(optarg); break; /* # of DNS children */
358 case 'o': out_dir=optarg; break; /* Output directory */
359 case 'O': add_nlist(optarg,&omit_page); break; /* pages not counted */
360 case 'p': incremental=1; break; /* Incremental run */
361 case 'P': add_nlist(optarg,&page_type); break; /* page view types */
362 case 'q': verbose=1; break; /* Quiet (verbose=1) */
363 case 'Q': verbose=0; break; /* Really Quiet */
364 case 'r': add_nlist(optarg,&hidden_refs); break; /* Hide referrer */
365 case 'R': ntop_refs=atoi(optarg); break; /* Top referrers */
366 case 's': add_nlist(optarg,&hidden_sites); break; /* Hide site */
367 case 'S': ntop_sites=atoi(optarg); break; /* Top sites */
368 case 't': msg_title=optarg; break; /* Report title */
369 case 'T': time_me=1; break; /* TimeMe */
370 case 'u': add_nlist(optarg,&hidden_urls); break; /* hide URL */
371 case 'U': ntop_urls=atoi(optarg); break; /* Top urls */
372 case 'v': verbose=2; debug_mode=1; break; /* Verbose */
373 case 'V': print_version(); break; /* Version */
374 #ifdef USE_GEOIP
375 case 'w': geoip=1; break; /* Enable GeoIP */
376 case 'W': geoip_db=optarg; break; /* GeoIP database name */
377 #endif
378 case 'x': html_ext=optarg; break; /* HTML file extension */
379 case 'X': hide_sites=1; break; /* Hide ind. sites */
380 case 'Y': ctry_graph=0; break; /* Supress ctry graph */
381 case 'Z': normalize=0; break; /* Dont normalize URLs */
382 case 'z': use_flags=1; flag_dir=optarg; break; /* Ctry flag dir */
383 }
384 }
385
386 if (argc - optind != 0) log_fname = argv[optind];
387 if ( log_fname && (log_fname[0]=='-')) log_fname=NULL; /* force STDIN? */
388
389 /* check for gzipped file - .gz */
390 if (log_fname) if (!strcmp((log_fname+strlen(log_fname)-3),".gz"))
391 gz_log=COMP_GZIP;
392
393 #ifdef USE_BZIP
394 /* check for bzip file - .bz2 */
395 if (log_fname) if (!strcmp((log_fname+strlen(log_fname)-4),".bz2"))
396 gz_log=COMP_BZIP;
397 #endif
398
399 /* setup our internal variables */
400 init_counters(); /* initalize (zero) main counters */
401 memset(hist, 0, sizeof(hist)); /* initalize (zero) history array */
402
403 /* add default index. alias if needed */
404 if (default_index) add_nlist("index.",&index_alias);
405
406 if (page_type==NULL) /* check if page types present */
407 {
408 if ((log_type==LOG_CLF)||(log_type==LOG_SQUID)||(log_type==LOG_W3C))
409 {
410 add_nlist("htm*" ,&page_type); /* if no page types specified, we */
411 add_nlist("cgi" ,&page_type); /* use the default ones here... */
412 if (!isinlist(page_type,html_ext)) add_nlist(html_ext,&page_type);
413 }
414 else add_nlist("txt" ,&page_type); /* FTP logs default to .txt */
415 }
416
417 for (max_ctry=0;ctry[max_ctry].desc;max_ctry++);
418 if (ntop_ctrys > max_ctry) ntop_ctrys = max_ctry; /* force upper limit */
419 if (graph_lines> 20) graph_lines= 20; /* keep graphs sane! */
420 if (graph_mths<12) graph_mths=12;
421 if (graph_mths>GRAPHMAX) graph_mths=GRAPHMAX;
422 if (index_mths<12) index_mths=12;
423 if (index_mths>HISTSIZE) index_mths=HISTSIZE;
424
425 if (log_type == LOG_FTP)
426 {
427 /* disable stuff for ftp logs */
428 ntop_entry=ntop_exit=0;
429 ntop_search=0;
430 }
431 else
432 {
433 if (search_list==NULL)
434 {
435 /* If no search engines defined, define some :) */
436 add_glist(".google. q=" ,&search_list);
437 add_glist("yahoo.com p=" ,&search_list);
438 add_glist("altavista.com q=" ,&search_list);
439 add_glist("aolsearch. query=" ,&search_list);
440 add_glist("ask.co q=" ,&search_list);
441 add_glist("eureka.com q=" ,&search_list);
442 add_glist("lycos.com query=" ,&search_list);
443 add_glist("hotbot.com MT=" ,&search_list);
444 add_glist("msn.com q=" ,&search_list);
445 add_glist("infoseek.com qt=" ,&search_list);
446 add_glist("webcrawler searchText=" ,&search_list);
447 add_glist("excite search=" ,&search_list);
448 add_glist("netscape.com query=" ,&search_list);
449 add_glist("mamma.com query=" ,&search_list);
450 add_glist("alltheweb.com q=" ,&search_list);
451 add_glist("northernlight.com qr=" ,&search_list);
452 }
453 }
454
455 /* ensure entry/exits don't exceed urls */
456 i=(ntop_urls>ntop_urlsK)?ntop_urls:ntop_urlsK;
457 if (ntop_entry>i) ntop_entry=i;
458 if (ntop_exit>i) ntop_exit=i;
459
460 for (i=0;i<MAXHASH;i++)
461 {
462 sm_htab[i]=sd_htab[i]=NULL; /* initalize hash tables */
463 um_htab[i]=NULL;
464 rm_htab[i]=NULL;
465 am_htab[i]=NULL;
466 sr_htab[i]=NULL;
467 }
468
469 /* Be polite and announce yourself... */
470 if (verbose>1)
471 {
472 uname(&system_info);
473 printf("Webalizer V%s-%s (%s %s %s) %s\n", version,editlvl,
474 system_info.sysname, system_info.release,
475 system_info.machine,language);
476 }
477
478 #ifndef USE_DNS
479 if (strstr(argv[0],"webazolver")!=0)
480 /* DNS support not present, aborting... */
481 { printf("%s\n",msg_dns_abrt); exit(1); }
482 #else
483 /* Force sane values for cache TTL */
484 if (cache_ttl<1) cache_ttl=1;
485 if (cache_ttl>100) cache_ttl=100;
486 #endif /* USE_DNS */
487
488 /* open log file */
489 if (log_fname)
490 {
491 /* stat the file */
492 if ( !(lstat(log_fname, &log_stat)) )
493 {
494 /* check if the file a symlink */
495 if ( S_ISLNK(log_stat.st_mode) )
496 {
497 if (verbose)
498 fprintf(stderr,"%s %s (symlink)\n",msg_log_err,log_fname);
499 exit(EBADF);
500 }
501 }
502
503 if (gz_log)
504 {
505 /* open compressed file */
506 #ifdef USE_BZIP
507 if (gz_log==COMP_BZIP)
508 zlog_fp = BZ2_bzopen(log_fname,"rb");
509 else
510 #endif
511 zlog_fp = gzopen(log_fname, "rb");
512 if (zlog_fp==Z_NULL)
513 {
514 /* Error: Can't open log file ... */
515 fprintf(stderr, "%s %s (%d)\n",msg_log_err,log_fname,ENOENT);
516 exit(ENOENT);
517 }
518 }
519 else
520 {
521 /* open regular file */
522 log_fp = fopen(log_fname,"r");
523 if (log_fp==NULL)
524 {
525 /* Error: Can't open log file ... */
526 fprintf(stderr, "%s %s\n",msg_log_err,log_fname);
527 exit(1);
528 }
529 }
530 }
531
532 /* Using logfile ... */
533 if (verbose>1)
534 {
535 printf("%s %s (",msg_log_use,log_fname?log_fname:"STDIN");
536 if (gz_log==COMP_GZIP) printf("gzip-");
537 #ifdef USE_BZIP
538 if (gz_log==COMP_BZIP) printf("bzip-");
539 #endif
540 switch (log_type)
541 {
542 /* display log file type hint */
543 case LOG_CLF: printf("clf)\n"); break;
544 case LOG_FTP: printf("ftp)\n"); break;
545 case LOG_SQUID: printf("squid)\n"); break;
546 case LOG_W3C: printf("w3c)\n"); break;
547 }
548 }
549
550 /* switch directories if needed */
551 if (out_dir)
552 {
553 if (chdir(out_dir) != 0)
554 {
555 /* Error: Can't change directory to ... */
556 fprintf(stderr, "%s %s\n",msg_dir_err,out_dir);
557 exit(1);
558 }
559 }
560
561 #ifdef USE_DNS
562 if (strstr(argv[0],"webazolver")!=0)
563 {
564 if (!dns_children) dns_children=5; /* default dns children if needed */
565 if (!dns_cache)
566 {
567 /* No cache file specified, aborting... */
568 fprintf(stderr,"%s\n",msg_dns_nocf); /* Must have a cache file */
569 exit(1);
570 }
571 }
572
573 if (dns_cache && dns_children) /* run-time resolution */
574 {
575 if (dns_children > MAXCHILD) dns_children=MAXCHILD;
576 /* DNS Lookup (#children): */
577 if (verbose>1) printf("%s (%d): ",msg_dns_rslv,dns_children);
578 fflush(stdout);
579 (gz_log)?dns_resolver(zlog_fp):dns_resolver(log_fp);
580 #ifdef USE_BZIP
581 (gz_log==COMP_BZIP)?bz2_rewind(&zlog_fp, log_fname, "rb"):
582 #endif
583 (gz_log==COMP_GZIP)?gzrewind(zlog_fp):
584 (log_fname)?rewind(log_fp):exit(0);
585 }
586
587 if (strstr(argv[0],"webazolver")!=0) exit(0); /* webazolver exits here */
588
589 if (dns_cache)
590 {
591 if (!open_cache()) { dns_cache=NULL; dns_db=NULL; }
592 else
593 {
594 /* Using DNS cache file <filaneme> */
595 if (verbose>1) printf("%s %s\n",msg_dns_usec,dns_cache);
596 }
597 }
598
599 /* Open GeoDB? */
600 if (geodb)
601 {
602 geo_db=geodb_open(geodb_fname);
603 if (geo_db==NULL)
604 {
605 if (verbose) printf("%s: %s\n",msg_geo_open,
606 (geodb_fname)?geodb_fname:msg_geo_dflt);
607 if (verbose) printf("GeoDB %s\n",msg_geo_nolu);
608 geodb=0;
609 }
610 else if (verbose>1) printf("%s %s\n",
611 msg_geo_use,geodb_ver(geo_db,buffer));
612 #ifdef USE_GEOIP
613 if (geoip) geoip=0; /* Disable GeoIP if using GeoDB */
614 #endif
615 }
616 #endif /* USE_DNS */
617
618 #ifdef USE_GEOIP
619 /* open GeoIP database */
620 if (geoip)
621 {
622 if (geoip_db!=NULL)
623 mmdb_open = MMDB_open(geoip_db, MMDB_MODE_MMAP, &mmdb);
624 else
625 mmdb_open = MMDB_open("GeoLite2-Country.mmdb", MMDB_MODE_MMAP, &mmdb);
626
627 /* Did we open one? */
628 if (mmdb_open!=MMDB_SUCCESS)
629 {
630 /* couldn't open.. warn user */
631 if (verbose) printf("GeoIP %s\n",msg_geo_nolu);
632 geoip=0;
633 }
634 }
635 #endif /* USE_GEOIP */
636
637 /* Creating output in ... */
638 if (verbose>1)
639 printf("%s %s\n",msg_dir_use,out_dir?out_dir:msg_cur_dir);
640
641 /* prep hostname */
642 if (!hname)
643 {
644 if (uname(&system_info)) hname="localhost";
645 else hname=system_info.nodename;
646 }
647
648 /* Hostname for reports is ... */
649 if (strlen(hname)) if (verbose>1) printf("%s '%s'\n",msg_hostname,hname);
650
651 /* get past history */
652 if (ignore_hist) { if (verbose>1) printf("%s\n",msg_ign_hist); }
653 else get_history();
654
655 if (incremental) /* incremental processing? */
656 {
657 if ((i=restore_state())) /* restore internal data structs */
658 {
659 /* Error: Unable to restore run data (error num) */
660 /* if (verbose) fprintf(stderr,"%s (%d)\n",msg_bad_data,i); */
661 fprintf(stderr,"%s (%d)\n",msg_bad_data,i);
662 exit(1);
663 }
664 }
665
666 /* Allocate memory for our TOP countries array */
667 if (ntop_ctrys != 0)
668 { if ( (top_ctrys=calloc(ntop_ctrys,sizeof(CLISTPTR))) == NULL)
669 /* Can't get memory, Top Countries disabled! */
670 {if (verbose) fprintf(stderr,"%s\n",msg_nomem_tc); ntop_ctrys=0;}}
671
672 /* get processing start time */
673 start_time = time(NULL);
674
675 /*********************************************/
676 /* MAIN PROCESS LOOP - read through log file */
677 /*********************************************/
678
679 while ( (gz_log)?(our_gzgets(zlog_fp,buffer,BUFSIZE) != Z_NULL):
680 (fgets(buffer,BUFSIZE,log_fname?log_fp:stdin) != NULL))
681 {
682 total_rec++;
683 if (strlen(buffer) == (BUFSIZE-1))
684 {
685 if (verbose)
686 {
687 fprintf(stderr,"%s",msg_big_rec);
688 if (debug_mode) fprintf(stderr,":\n%s",buffer);
689 else fprintf(stderr,"\n");
690 }
691
692 total_bad++; /* bump bad record counter */
693
694 /* get the rest of the record */
695 while ( (gz_log)?(our_gzgets(zlog_fp,buffer,BUFSIZE)!=Z_NULL):
696 (fgets(buffer,BUFSIZE,log_fname?log_fp:stdin)!=NULL))
697 {
698 if (strlen(buffer) < BUFSIZE-1)
699 {
700 if (debug_mode && verbose) fprintf(stderr,"%s\n",buffer);
701 break;
702 }
703 if (debug_mode && verbose) fprintf(stderr,"%s",buffer);
704 }
705 continue; /* go get next record if any */
706 }
707
708 /* got a record... */
709 strcpy(tmp_buf, buffer); /* save buffer in case of error */
710 if (parse_record(buffer)) /* parse the record */
711 {
712 /*********************************************/
713 /* PASSED MINIMAL CHECKS, DO A LITTLE MORE */
714 /*********************************************/
715
716 /* convert month name to lowercase */
717 for (i=4;i<7;i++)
718 log_rec.datetime[i]=tolower(log_rec.datetime[i]);
719
720 /* lowercase sitename/IPv6 addresses */
721 cp1=log_rec.hostname;
722 while (*cp1++!='\0') *cp1=tolower(*cp1);
723
724 /* get year/month/day/hour/min/sec values */
725 for (i=0;i<12;i++)
726 {
727 if (strncmp(log_month[i],&log_rec.datetime[4],3)==0)
728 { rec_month = i+1; break; }
729 }
730
731 rec_year=atoi(&log_rec.datetime[8]); /* get year number (int) */
732 rec_day =atoi(&log_rec.datetime[1]); /* get day number */
733 rec_hour=atoi(&log_rec.datetime[13]); /* get hour number */
734 rec_min =atoi(&log_rec.datetime[16]); /* get minute number */
735 rec_sec =atoi(&log_rec.datetime[19]); /* get second number */
736
737 /* Kludge for Netscape server time (0-24?) error */
738 if (rec_hour>23) rec_hour=0;
739
740 /* minimal sanity check on date */
741 if ((i>=12)||(rec_min>59)||(rec_sec>60)||(rec_year<1990))
742 {
743 total_bad++; /* if a bad date, bump counter */
744 if (verbose)
745 {
746 fprintf(stderr,"%s: %s [%llu]",
747 msg_bad_date,log_rec.datetime,total_rec);
748 if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf);
749 else fprintf(stderr,"\n");
750 }
751 continue; /* and ignore this record */
752 }
753
754 /*********************************************/
755 /* GOOD RECORD, CHECK INCREMENTAL/TIMESTAMPS */
756 /*********************************************/
757
758 /* Flag as a good one */
759 good_rec = 1;
760
761 /* get current records timestamp (seconds since epoch) */
762 req_tstamp=cur_tstamp;
763 rec_tstamp=((jdate(rec_day,rec_month,rec_year)-epoch)*86400)+
764 (rec_hour*3600)+(rec_min*60)+rec_sec;
765
766 /* Do we need to check for duplicate records? (incremental mode) */
767 if (check_dup)
768 {
769 /* check if less than/equal to last record processed */
770 if ( rec_tstamp <= cur_tstamp )
771 {
772 /* if it is, assume we have already processed and ignore it */
773 total_ignore++;
774 continue;
775 }
776 else
777 {
778 /* if it isn't.. disable any more checks this run */
779 check_dup=0;
780 /* now check if it's a new month */
781 if ( (cur_month != rec_month) || (cur_year != rec_year) )
782 {
783 clear_month();
784 cur_sec = rec_sec; /* set current counters */
785 cur_min = rec_min;
786 cur_hour = rec_hour;
787 cur_day = rec_day;
788 cur_month = rec_month;
789 cur_year = rec_year;
790 cur_tstamp= rec_tstamp;
791 f_day=l_day=rec_day; /* reset first and last day */
792 }
793 }
794 }
795
796 /* check for out of sequence records */
797 if (rec_tstamp/3600 < cur_tstamp/3600)
798 {
799 if (!fold_seq_err && ((rec_tstamp+SLOP_VAL)/3600<cur_tstamp/3600) )
800 { total_ignore++; continue; }
801 else
802 {
803 rec_sec = cur_sec; /* if folding sequence */
804 rec_min = cur_min; /* errors, just make it */
805 rec_hour = cur_hour; /* look like the last */
806 rec_day = cur_day; /* good records timestamp */
807 rec_month = cur_month;
808 rec_year = cur_year;
809 rec_tstamp= cur_tstamp;
810 }
811 }
812 cur_tstamp=rec_tstamp; /* update current timestamp */
813
814 /*********************************************/
815 /* DO SOME PRE-PROCESS FORMATTING */
816 /*********************************************/
817
818 /* un-escape URL */
819 unescape(log_rec.url);
820
821 /* fix URL field */
822 cp1 = cp2 = log_rec.url;
823 /* handle null '-' case here... */
824 if (*++cp1 == '-') strcpy(log_rec.url,"/INVALID-URL");
825 else
826 {
827 /* strip actual URL out of request */
828 while ( (*cp1 != ' ') && (*cp1 != '\0') ) cp1++;
829 if (*cp1 != '\0')
830 {
831 /* scan to begin of actual URL field */
832 while ((*cp1 == ' ') && (*cp1 != '\0')) cp1++;
833 /* remove duplicate / if needed */
834 while (( *cp1=='/') && (*(cp1+1)=='/')) cp1++;
835 while (( *cp1!='\0')&&(*cp1!='"')) *cp2++=*cp1++;
836 *cp2='\0';
837 }
838 }
839
840 /* strip query portion of cgi scripts */
841 cp1 = log_rec.url;
842 while (*cp1 != '\0')
843 if (!isurlchar(*cp1, stripcgi)) { *cp1 = '\0'; break; }
844 else cp1++;
845 if (log_rec.url[0]=='\0')
846 { log_rec.url[0]='/'; log_rec.url[1]='\0'; }
847
848 /* Normalize URL */
849 if (log_type==LOG_CLF && log_rec.resp_code!=RC_NOTFOUND && normalize)
850 {
851 if ( ((cp2=strstr(log_rec.url,"://"))!=NULL)&&(cp2<log_rec.url+6) )
852 {
853 cp1=cp2+3;
854 /* see if a '/' is present after it */
855 if ( (cp2=strchr(cp1,(int)'/'))==NULL) cp1--;
856 else cp1=cp2;
857 /* Ok, now shift url string */
858 cp2=log_rec.url; while (*cp1!='\0') *cp2++=*cp1++; *cp2='\0';
859 }
860 /* extra sanity checks on URL string */
861 while ((cp2=strstr(log_rec.url,"/./")))
862 { cp1=cp2+2; while (*cp1!='\0') *cp2++=*cp1++; *cp2='\0'; }
863 if (log_rec.url[0]!='/')
864 {
865 if ( log_rec.resp_code==RC_OK ||
866 log_rec.resp_code==RC_PARTIALCONTENT ||
867 log_rec.resp_code==RC_NOMOD)
868 {
869 if (debug_mode)
870 fprintf(stderr,"Converted URL '%s' to '/'\n",log_rec.url);
871 log_rec.url[0]='/';
872 log_rec.url[1]='\0';
873 }
874 else
875 {
876 if (debug_mode)
877 fprintf(stderr,"Invalid URL: '%s'\n",log_rec.url);
878 strcpy(log_rec.url,"/INVALID-URL");
879 }
880 }
881 while ( log_rec.url[ (i=strlen(log_rec.url)-1) ] == '?' )
882 log_rec.url[i]='\0'; /* drop trailing ?s if any */
883 }
884 else
885 {
886 /* check for service (ie: http://) and lowercase if found */
887 if (((cp2=strstr(log_rec.url,"://"))!= NULL)&&(cp2<log_rec.url+6))
888 {
889 cp1=log_rec.url;
890 while (cp1!=cp2)
891 {
892 if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
893 cp1++;
894 }
895 }
896 }
897
898 /* strip off index.html (or any aliases) */
899 lptr=index_alias;
900 while (lptr!=NULL)
901 {
902 if ((cp1=strstr(log_rec.url,lptr->string))!=NULL)
903 {
904 if (*(cp1-1)=='/')
905 {
906 if ( !stripcgi && (cp2=strchr(cp1,'?'))!=NULL )
907 { while(*cp2) *cp1++=*cp2++; *cp1='\0'; }
908 else *cp1='\0';
909 break;
910 }
911 }
912 lptr=lptr->next;
913 }
914
915 /* unescape referrer */
916 unescape(log_rec.refer);
917
918 /* fix referrer field */
919 cp1 = log_rec.refer;
920 cp3 = cp2 = cp1++;
921 if ( (*cp2 != '\0') && (*cp2 == '"') )
922 {
923 while ( *cp1 != '\0' )
924 {
925 cp3=cp2;
926 if (((unsigned char)*cp1<32&&(unsigned char)*cp1>0) ||
927 *cp1==127 || (unsigned char)*cp1=='<') *cp1=0;
928 else *cp2++=*cp1++;
929 }
930 *cp3 = '\0';
931 }
932
933 /* get query portion of cgi referrals */
934 cp1 = log_rec.refer;
935 if (*cp1 != '\0')
936 {
937 while (*cp1 != '\0')
938 {
939 if (!isurlchar(*cp1, 1))
940 {
941 /* Save query portion in log.rec.srchstr */
942 strncpy(log_rec.srchstr,(char *)cp1,MAXSRCH);
943 #ifdef USE_FULL_CGI_REFS
944 *cp1++;
945 #else
946 *cp1++='\0';
947 #endif
948 break;
949 }
950 else cp1++;
951 }
952 /* handle null referrer */
953 if (log_rec.refer[0]=='\0')
954 { log_rec.refer[0]='-'; log_rec.refer[1]='\0'; }
955 }
956
957 /* if HTTP request, lowercase http://sitename/ portion */
958 cp1 = log_rec.refer;
959 if ( (*cp1=='h') || (*cp1=='H'))
960 {
961 while ( (*cp1!='/') && (*cp1!='\0'))
962 {
963 if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
964 cp1++;
965 }
966 /* now do hostname */
967 if ( (*cp1=='/') && ( *(cp1+1)=='/')) {cp1++; cp1++;}
968 while ( (*cp1!='/') && (*cp1!='\0'))
969 {
970 if ( (*cp1>='A') && (*cp1<='Z')) *cp1 += 'a'-'A';
971 cp1++;
972 }
973 }
974
975 /* Do we need to mangle? */
976 if (mangle_agent) agent_mangle(log_rec.agent);
977
978 /* if necessary, shrink referrer to fit storage */
979 if (strlen(log_rec.refer)>=MAXREFH)
980 {
981 if (verbose) fprintf(stderr,"%s [%llu]\n",
982 msg_big_ref,total_rec);
983 log_rec.refer[MAXREFH-1]='\0';
984 }
985
986 /* if necessary, shrink URL to fit storage */
987 if (strlen(log_rec.url)>=MAXURLH)
988 {
989 if (verbose) fprintf(stderr,"%s [%llu]\n",
990 msg_big_req,total_rec);
991 log_rec.url[MAXURLH-1]='\0';
992 }
993
994 /* fix user agent field */
995 cp1 = log_rec.agent;
996 cp3 = cp2 = cp1++;
997 if ( (*cp2 != '\0') && ((*cp2 == '"')||(*cp2 == '(')) )
998 {
999 while (*cp1 != '\0') { cp3 = cp2; *cp2++ = *cp1++; }
1000 *cp3 = '\0';
1001 }
1002 cp1 = log_rec.agent; /* CHANGE !!! */
1003 while (*cp1 != 0) /* get rid of more common _bad_ chars ;) */
1004 {
1005 if ( ((unsigned char)*cp1 < 32) ||
1006 ((unsigned char)*cp1==127) ||
1007 (*cp1=='<') || (*cp1=='>') )
1008 { *cp1='\0'; break; }
1009 else cp1++;
1010 }
1011
1012 /* fix username if needed */
1013 if (log_rec.ident[0]==0)
1014 { log_rec.ident[0]='-'; log_rec.ident[1]='\0'; }
1015 else
1016 {
1017 cp3=log_rec.ident;
1018 while ((unsigned char)*cp3>=32 && *cp3!='"') cp3++;
1019 *cp3='\0';
1020 }
1021 /* unescape user name */
1022 unescape(log_rec.ident);
1023
1024 /********************************************/
1025 /* PROCESS RECORD */
1026 /********************************************/
1027
1028 /* first time through? */
1029 if (cur_month == 0)
1030 {
1031 /* if yes, init our date vars */
1032 cur_month=rec_month; cur_year=rec_year;
1033 cur_day=rec_day; cur_hour=rec_hour;
1034 cur_min=rec_min; cur_sec=rec_sec;
1035 f_day=rec_day;
1036 }
1037
1038 /* adjust last day processed if different */
1039 if (rec_day > l_day) l_day = rec_day;
1040
1041 /* update min/sec stuff */
1042 if (cur_sec != rec_sec) cur_sec = rec_sec;
1043 if (cur_min != rec_min) cur_min = rec_min;
1044
1045 /* check for hour change */
1046 if (cur_hour != rec_hour)
1047 {
1048 /* if yes, init hourly stuff */
1049 if (ht_hit > mh_hit) mh_hit = ht_hit;
1050 ht_hit = 0;
1051 cur_hour = rec_hour;
1052 }
1053
1054 /* check for day change */
1055 if (cur_day != rec_day)
1056 {
1057 /* if yes, init daily stuff */
1058 tm_site[cur_day-1]=dt_site; dt_site=0;
1059 tm_visit[cur_day-1]=tot_visit(sd_htab);
1060 del_hlist(sd_htab);
1061 cur_day = rec_day;
1062 }
1063
1064 /* check for month change */
1065 if ( (cur_month != rec_month) || (cur_year != rec_year) )
1066 {
1067 /* if yes, do monthly stuff */
1068 t_visit=tot_visit(sm_htab);
1069 month_update_exit(req_tstamp); /* process exit pages */
1070 update_history();
1071 write_month_html(); /* generate HTML for month */
1072 clear_month();
1073 cur_month = rec_month; /* update our flags */
1074 cur_year = rec_year;
1075 f_day=l_day=rec_day;
1076 }
1077
1078 /* save hostname for later */
1079 strncpy(host_buf, log_rec.hostname, sizeof(log_rec.hostname));
1080
1081 #ifdef USE_DNS
1082 /* Resolve IP address if needed */
1083 if (dns_db)
1084 {
1085 struct addrinfo hints, *ares;
1086 memset(&hints, 0, sizeof(hints));
1087 hints.ai_family = AF_UNSPEC;
1088 hints.ai_socktype = SOCK_STREAM;
1089 hints.ai_flags = AI_NUMERICHOST;
1090 if (0 == getaddrinfo(log_rec.hostname, "0", &hints, &ares))
1091 {
1092 freeaddrinfo(ares);
1093 resolve_dns(&log_rec);
1094
1095 #ifdef USE_IP_AS_HOSTNAME
1096 /* restore the host's IP-address if the host's name has not been resolved */
1097 if (!strcmp(log_rec.hostname,"\020\002"))
1098 strncpy(log_rec.hostname, host_buf, sizeof(host_buf));
1099 #endif
1100
1101 }
1102 }
1103 #endif
1104 /* lowercase hostname and validity check */
1105 cp1 = log_rec.hostname; i=0;
1106
1107 if ( (!isalnum((unsigned char)*cp1)) && (*cp1!=':') )
1108 strncpy(log_rec.hostname, "Invalid", 8);
1109 else
1110 {
1111 while (*cp1 != '\0') /* loop through string */
1112 {
1113 if ( (*cp1>='A') && (*cp1<='Z') )
1114 { *cp1++ += 'a'-'A'; continue; }
1115 if ( *cp1=='.' ) i++;
1116 if ( (isalnum((unsigned char)*cp1)) ||
1117 (*cp1=='.')||(*cp1=='-') ||
1118 (*cp1==':')||((*cp1=='_')&&(i==0)) ) cp1++;
1119 else
1120 {
1121 /* Invalid hostname found! */
1122 if (strcmp(log_rec.hostname, host_buf))
1123 strcpy(log_rec.hostname, host_buf);
1124 else strncpy(log_rec.hostname,"Invalid",8);
1125 break;
1126 }
1127 }
1128 if (*cp1 == '\0') /* did we make it to the end? */
1129 {
1130 if (!isalnum((unsigned char)*(cp1-1)))
1131 strncpy(log_rec.hostname,"Invalid",8);
1132 }
1133 }
1134
1135 /* Catch blank hostnames here */
1136 if (log_rec.hostname[0]=='\0')
1137 strncpy(log_rec.hostname,"Unknown",8);
1138
1139 /* Ignore/Include check */
1140 if ( (isinlist(include_sites,log_rec.hostname)==NULL) &&
1141 (isinlist(include_urls,log_rec.url)==NULL) &&
1142 (isinlist(include_refs,log_rec.refer)==NULL) &&
1143 (isinlist(include_agents,log_rec.agent)==NULL) &&
1144 (isinlist(include_users,log_rec.ident)==NULL) )
1145 {
1146 if (isinlist(ignored_sites,log_rec.hostname)!=NULL)
1147 { total_ignore++; continue; }
1148 if (isinlist(ignored_urls,log_rec.url)!=NULL)
1149 { total_ignore++; continue; }
1150 if (isinlist(ignored_agents,log_rec.agent)!=NULL)
1151 { total_ignore++; continue; }
1152 if (isinlist(ignored_refs,log_rec.refer)!=NULL)
1153 { total_ignore++; continue; }
1154 if (isinlist(ignored_users,log_rec.ident)!=NULL)
1155 { total_ignore++; continue; }
1156 }
1157
1158 /* Bump response code totals */
1159 switch (log_rec.resp_code) {
1160 case RC_CONTINUE: i=IDX_CONTINUE; break;
1161 case RC_SWITCHPROTO: i=IDX_SWITCHPROTO; break;
1162 case RC_OK: i=IDX_OK; break;
1163 case RC_CREATED: i=IDX_CREATED; break;
1164 case RC_ACCEPTED: i=IDX_ACCEPTED; break;
1165 case RC_NONAUTHINFO: i=IDX_NONAUTHINFO; break;
1166 case RC_NOCONTENT: i=IDX_NOCONTENT; break;
1167 case RC_RESETCONTENT: i=IDX_RESETCONTENT; break;
1168 case RC_PARTIALCONTENT: i=IDX_PARTIALCONTENT; break;
1169 case RC_MULTIPLECHOICES: i=IDX_MULTIPLECHOICES; break;
1170 case RC_MOVEDPERM: i=IDX_MOVEDPERM; break;
1171 case RC_MOVEDTEMP: i=IDX_MOVEDTEMP; break;
1172 case RC_SEEOTHER: i=IDX_SEEOTHER; break;
1173 case RC_NOMOD: i=IDX_NOMOD; break;
1174 case RC_USEPROXY: i=IDX_USEPROXY; break;
1175 case RC_MOVEDTEMPORARILY: i=IDX_MOVEDTEMPORARILY; break;
1176 case RC_BAD: i=IDX_BAD; break;
1177 case RC_UNAUTH: i=IDX_UNAUTH; break;
1178 case RC_PAYMENTREQ: i=IDX_PAYMENTREQ; break;
1179 case RC_FORBIDDEN: i=IDX_FORBIDDEN; break;
1180 case RC_NOTFOUND: i=IDX_NOTFOUND; break;
1181 case RC_METHODNOTALLOWED: i=IDX_METHODNOTALLOWED; break;
1182 case RC_NOTACCEPTABLE: i=IDX_NOTACCEPTABLE; break;
1183 case RC_PROXYAUTHREQ: i=IDX_PROXYAUTHREQ; break;
1184 case RC_TIMEOUT: i=IDX_TIMEOUT; break;
1185 case RC_CONFLICT: i=IDX_CONFLICT; break;
1186 case RC_GONE: i=IDX_GONE; break;
1187 case RC_LENGTHREQ: i=IDX_LENGTHREQ; break;
1188 case RC_PREFAILED: i=IDX_PREFAILED; break;
1189 case RC_REQENTTOOLARGE: i=IDX_REQENTTOOLARGE; break;
1190 case RC_REQURITOOLARGE: i=IDX_REQURITOOLARGE; break;
1191 case RC_UNSUPMEDIATYPE: i=IDX_UNSUPMEDIATYPE; break;
1192 case RC_RNGNOTSATISFIABLE:i=IDX_RNGNOTSATISFIABLE;break;
1193 case RC_EXPECTATIONFAILED:i=IDX_EXPECTATIONFAILED;break;
1194 case RC_SERVERERR: i=IDX_SERVERERR; break;
1195 case RC_NOTIMPLEMENTED: i=IDX_NOTIMPLEMENTED; break;
1196 case RC_BADGATEWAY: i=IDX_BADGATEWAY; break;
1197 case RC_UNAVAIL: i=IDX_UNAVAIL; break;
1198 case RC_GATEWAYTIMEOUT: i=IDX_GATEWAYTIMEOUT; break;
1199 case RC_BADHTTPVER: i=IDX_BADHTTPVER; break;
1200 default: i=IDX_UNDEFINED; break;
1201 }
1202 response[i].count++;
1203
1204 /* now save in the various hash tables... */
1205 if (log_rec.resp_code==RC_OK || log_rec.resp_code==RC_PARTIALCONTENT)
1206 i=1; else i=0;
1207
1208 /* URL/ident hash table (only if valid response code) */
1209 if ((log_rec.resp_code==RC_OK)||(log_rec.resp_code==RC_NOMOD)||
1210 (log_rec.resp_code==RC_PARTIALCONTENT))
1211 {
1212 /* URL hash table */
1213 if (put_unode(log_rec.url,OBJ_REG,(u_int64_t)1,
1214 log_rec.xfer_size,&t_url,(u_int64_t)0,(u_int64_t)0,um_htab))
1215 {
1216 if (verbose)
1217 /* Error adding URL node, skipping ... */
1218 fprintf(stderr,"%s %s\n", msg_nomem_u, log_rec.url);
1219 }
1220
1221 /* ident (username) hash table */
1222 if (put_inode(log_rec.ident,OBJ_REG,
1223 1,(u_int64_t)i,log_rec.xfer_size,&t_user,
1224 0,rec_tstamp,im_htab))
1225 {
1226 if (verbose)
1227 /* Error adding ident node, skipping .... */
1228 fprintf(stderr,"%s %s\n", msg_nomem_i, log_rec.ident);
1229 }
1230 }
1231
1232 /* referrer hash table */
1233 if (ntop_refs)
1234 {
1235 if (log_rec.refer[0]!='\0')
1236 if (put_rnode(log_rec.refer,OBJ_REG,(u_int64_t)1,&t_ref,rm_htab))
1237 {
1238 if (verbose)
1239 fprintf(stderr,"%s %s\n", msg_nomem_r, log_rec.refer);
1240 }
1241 }
1242
1243 /* hostname (site) hash table - daily */
1244 if (put_hnode(log_rec.hostname,OBJ_REG,
1245 1,(u_int64_t)i,log_rec.xfer_size,&dt_site,
1246 0,rec_tstamp,"",sd_htab))
1247 {
1248 if (verbose)
1249 /* Error adding host node (daily), skipping .... */
1250 fprintf(stderr,"%s %s\n",msg_nomem_dh, log_rec.hostname);
1251 }
1252
1253 /* hostname (site) hash table - monthly */
1254 if (put_hnode(log_rec.hostname,OBJ_REG,
1255 1,(u_int64_t)i,log_rec.xfer_size,&t_site,
1256 0,rec_tstamp,"",sm_htab))
1257 {
1258 if (verbose)
1259 /* Error adding host node (monthly), skipping .... */
1260 fprintf(stderr,"%s %s\n", msg_nomem_mh, log_rec.hostname);
1261 }
1262
1263 /* user agent hash table */
1264 if (ntop_agents)
1265 {
1266 if (log_rec.agent[0]!='\0')
1267 if (put_anode(log_rec.agent,OBJ_REG,(u_int64_t)1,&t_agent,am_htab))
1268 {
1269 if (verbose)
1270 fprintf(stderr,"%s %s\n", msg_nomem_a, log_rec.agent);
1271 }
1272 }
1273
1274 /* bump monthly/daily/hourly totals */
1275 t_hit++; ht_hit++; /* daily/hourly hits */
1276 t_xfer += log_rec.xfer_size; /* total xfer size */
1277 tm_xfer[rec_day-1] += log_rec.xfer_size; /* daily xfer total */
1278 tm_hit[rec_day-1]++; /* daily hits total */
1279 th_xfer[rec_hour] += log_rec.xfer_size; /* hourly xfer total */
1280 th_hit[rec_hour]++; /* hourly hits total */
1281
1282 /* if RC_OK, increase file counters */
1283 if (log_rec.resp_code == RC_OK)
1284 {
1285 t_file++;
1286 tm_file[rec_day-1]++;
1287 th_file[rec_hour]++;
1288 }
1289
1290 /* Pages (pageview) calculation */
1291 if (ispage(log_rec.url))
1292 {
1293 t_page++;
1294 tm_page[rec_day-1]++;
1295 th_page[rec_hour]++;
1296
1297 /* do search string stuff if needed */
1298 if (ntop_search) srch_string(log_rec.srchstr);
1299 }
1300
1301 /*********************************************/
1302 /* RECORD PROCESSED - DO GROUPS HERE */
1303 /*********************************************/
1304
1305 /* URL Grouping */
1306 if ( (cp1=isinglist(group_urls,log_rec.url))!=NULL)
1307 {
1308 if (put_unode(cp1,OBJ_GRP,(u_int64_t)1,log_rec.xfer_size,
1309 &ul_bogus,(u_int64_t)0,(u_int64_t)0,um_htab))
1310 {
1311 if (verbose)
1312 /* Error adding URL node, skipping ... */
1313 fprintf(stderr,"%s %s\n", msg_nomem_u, cp1);
1314 }
1315 }
1316
1317 /* Site Grouping */
1318 if ( (cp1=isinglist(group_sites,log_rec.hostname))!=NULL)
1319 {
1320 if (put_hnode(cp1,OBJ_GRP,1,
1321 (u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1322 log_rec.xfer_size,&ul_bogus,
1323 0,rec_tstamp,"",sm_htab))
1324 {
1325 if (verbose)
1326 /* Error adding Site node, skipping ... */
1327 fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1);
1328 }
1329 }
1330 else
1331 {
1332 /* Domain Grouping */
1333 if (group_domains)
1334 {
1335 cp1 = get_domain(log_rec.hostname);
1336 if (cp1 != NULL)
1337 {
1338 if (put_hnode(cp1,OBJ_GRP,1,
1339 (u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1340 log_rec.xfer_size,&ul_bogus,
1341 0,rec_tstamp,"",sm_htab))
1342 {
1343 if (verbose)
1344 /* Error adding Site node, skipping ... */
1345 fprintf(stderr,"%s %s\n", msg_nomem_mh, cp1);
1346 }
1347 }
1348 }
1349 }
1350
1351 /* Referrer Grouping */
1352 if ( (cp1=isinglist(group_refs,log_rec.refer))!=NULL)
1353 {
1354 if (put_rnode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,rm_htab))
1355 {
1356 if (verbose)
1357 /* Error adding Referrer node, skipping ... */
1358 fprintf(stderr,"%s %s\n", msg_nomem_r, cp1);
1359 }
1360 }
1361
1362 /* User Agent Grouping */
1363 if ( (cp1=isinglist(group_agents,log_rec.agent))!=NULL)
1364 {
1365 if (put_anode(cp1,OBJ_GRP,(u_int64_t)1,&ul_bogus,am_htab))
1366 {
1367 if (verbose)
1368 /* Error adding User Agent node, skipping ... */
1369 fprintf(stderr,"%s %s\n", msg_nomem_a, cp1);
1370 }
1371 }
1372
1373 /* Ident (username) Grouping */
1374 if ( (cp1=isinglist(group_users,log_rec.ident))!=NULL)
1375 {
1376 if (put_inode(cp1,OBJ_GRP,1,
1377 (u_int64_t)(log_rec.resp_code==RC_OK)?1:0,
1378 log_rec.xfer_size,&ul_bogus,
1379 0,rec_tstamp,im_htab))
1380 {
1381 if (verbose)
1382 /* Error adding Username node, skipping ... */
1383 fprintf(stderr,"%s %s\n", msg_nomem_i, cp1);
1384 }
1385 }
1386 }
1387
1388 /*********************************************/
1389 /* BAD RECORD */
1390 /*********************************************/
1391
1392 else
1393 {
1394 /* If first record, check if stupid Netscape header stuff */
1395 if ( (total_rec==1) && (strncmp(buffer,"format=",7)==0) )
1396 {
1397 /* Skipping Netscape header record */
1398 if (verbose>1) printf("%s\n",msg_ign_nscp);
1399 /* count it as ignored... */
1400 total_ignore++;
1401 }
1402 else
1403 {
1404 /* Check if it's a W3C header or IIS Null-Character line */
1405 if ((buffer[0]=='\0') || (buffer[0]=='#'))
1406 {
1407 total_ignore++;
1408 }
1409 else
1410 {
1411 /* really bad record... */
1412 total_bad++;
1413 if (verbose)
1414 {
1415 fprintf(stderr,"%s (%llu)",msg_bad_rec,total_rec);
1416 if (debug_mode) fprintf(stderr,":\n%s\n",tmp_buf);
1417 else fprintf(stderr,"\n");
1418 }
1419 }
1420 }
1421 }
1422 }
1423
1424 /*********************************************/
1425 /* DONE READING LOG FILE - final processing */
1426 /*********************************************/
1427
1428 /* close log file if needed */
1429 #ifdef USE_BZIP
1430 if (gz_log) (gz_log==COMP_BZIP)?BZ2_bzclose(zlog_fp):gzclose(zlog_fp);
1431 #else
1432 if (gz_log) gzclose(zlog_fp);
1433 #endif
1434 else if (log_fname) fclose(log_fp);
1435
1436 if (good_rec) /* were any good records? */
1437 {
1438 tm_site[cur_day-1]=dt_site; /* If yes, clean up a bit */
1439 tm_visit[cur_day-1]=tot_visit(sd_htab);
1440 t_visit=tot_visit(sm_htab);
1441 if (ht_hit > mh_hit) mh_hit = ht_hit;
1442
1443 if (total_rec > (total_ignore+total_bad)) /* did we process any? */
1444 {
1445 if (incremental)
1446 {
1447 if (save_state()) /* incremental stuff */
1448 {
1449 /* Error: Unable to save current run data */
1450 if (verbose) fprintf(stderr,"%s\n",msg_data_err);
1451 unlink(state_fname);
1452 }
1453 }
1454 month_update_exit(rec_tstamp); /* calculate exit pages */
1455 update_history();
1456 write_month_html(); /* write monthly HTML file */
1457 put_history(); /* write history */
1458 }
1459 if (hist[0].month!=0) write_main_index(); /* write main HTML file */
1460
1461 /* get processing end time */
1462 end_time = time(NULL);
1463
1464 /* display end of processing statistics */
1465 if (time_me || (verbose>1))
1466 {
1467 printf("%llu %s ",total_rec, msg_records);
1468 if (total_ignore)
1469 {
1470 printf("(%llu %s",total_ignore,msg_ignored);
1471 if (total_bad) printf(", %llu %s) ",total_bad,msg_bad);
1472 else printf(") ");
1473 }
1474 else if (total_bad) printf("(%llu %s) ",total_bad,msg_bad);
1475
1476 /* totoal processing time in seconds */
1477 temp_time = difftime(end_time, start_time);
1478 if (temp_time==0) temp_time=1;
1479 printf("%s %.0f %s", msg_in, temp_time, msg_seconds);
1480
1481 /* calculate records per second */
1482 if (temp_time)
1483 i=( (int)( (float)total_rec/temp_time ) );
1484 else i=0;
1485
1486 if ( (i>0) && (i<=total_rec) ) printf(", %d/sec\n", i);
1487 else printf("\n");
1488 }
1489
1490 #ifdef USE_DNS
1491 /* Close DNS cache file */
1492 if (dns_db) close_cache();
1493 /* Close GeoDB database */
1494 if (geo_db) geodb_close(geo_db);
1495 #endif
1496
1497 #ifdef USE_GEOIP
1498 /* Close GeoIP database */
1499 if (mmdb_open==MMDB_SUCCESS) {
1500 MMDB_close(&mmdb);
1501 mmdb_open=MMDB_FILE_OPEN_ERROR;
1502 }
1503 #endif
1504
1505 /* Whew, all done! Exit with completion status (0) */
1506 exit(0);
1507 }
1508 else
1509 {
1510 /* No valid records found... exit with error (1) */
1511 if (verbose) printf("%s\n",msg_no_vrec);
1512 if (hist[0].month!=0) write_main_index(); /* write main HTML file */
1513 exit(1);
1514 }
1515 }
1516
1517 /*********************************************/
1518 /* GET_CONFIG - get configuration file info */
1519 /*********************************************/
1520
get_config(char * fname)1521 void get_config(char *fname)
1522 {
1523 char *kwords[]= { "Undefined", /* 0 = undefined keyword 0 */
1524 "OutputDir", /* Output directory 1 */
1525 "LogFile", /* Log file to use for input 2 */
1526 "ReportTitle", /* Title for reports 3 */
1527 "HostName", /* Hostname to use 4 */
1528 "IgnoreHist", /* Ignore history file 5 */
1529 "Quiet", /* Run in quiet mode 6 */
1530 "TimeMe", /* Produce timing results 7 */
1531 "Debug", /* Produce debug information 8 */
1532 "HourlyGraph", /* Hourly stats graph 9 */
1533 "HourlyStats", /* Hourly stats table 10 */
1534 "TopSites", /* Top sites 11 */
1535 "TopURLs", /* Top URLs 12 */
1536 "TopReferrers", /* Top Referrers 13 */
1537 "TopAgents", /* Top User Agents 14 */
1538 "TopCountries", /* Top Countries 15 */
1539 "HideSite", /* Sites to hide 16 */
1540 "HideURL", /* URLs to hide 17 */
1541 "HideReferrer", /* Referrers to hide 18 */
1542 "HideAgent", /* User Agents to hide 19 */
1543 "IndexAlias", /* Aliases for index.html 20 */
1544 "HTMLHead", /* HTML Top1 code 21 */
1545 "HTMLPost", /* HTML Top2 code 22 */
1546 "HTMLTail", /* HTML Tail code 23 */
1547 "MangleAgents", /* Mangle User Agents 24 */
1548 "IgnoreSite", /* Sites to ignore 25 */
1549 "IgnoreURL", /* Url's to ignore 26 */
1550 "IgnoreReferrer", /* Referrers to ignore 27 */
1551 "IgnoreAgent", /* User Agents to ignore 28 */
1552 "ReallyQuiet", /* Dont display ANY messages 29 */
1553 "GMTTime", /* Local or UTC time? 30 */
1554 "GroupURL", /* Group URLs 31 */
1555 "GroupSite", /* Group Sites 32 */
1556 "GroupReferrer", /* Group Referrers 33 */
1557 "GroupAgent", /* Group Agents 34 */
1558 "GroupShading", /* Shade Grouped entries 35 */
1559 "GroupHighlight", /* BOLD Grouped entries 36 */
1560 "Incremental", /* Incremental runs 37 */
1561 "IncrementalName", /* Filename for state data 38 */
1562 "HistoryName", /* Filename for history data 39 */
1563 "HTMLExtension", /* HTML filename extension 40 */
1564 "HTMLPre", /* HTML code at beginning 41 */
1565 "HTMLBody", /* HTML body code 42 */
1566 "HTMLEnd", /* HTML code at end 43 */
1567 "UseHTTPS", /* Use https:// on URLs 44 */
1568 "IncludeSite", /* Sites to always include 45 */
1569 "IncludeURL", /* URLs to always include 46 */
1570 "IncludeReferrer", /* Referrers to include 47 */
1571 "IncludeAgent", /* User Agents to include 48 */
1572 "PageType", /* Page Type (pageview) 49 */
1573 "VisitTimeout", /* Visit timeout (seconds) 50 */
1574 "GraphLegend", /* Graph Legends (yes/no) 51 */
1575 "GraphLines", /* Graph Lines (0=none) 52 */
1576 "FoldSeqErr", /* Fold sequence errors 53 */
1577 "CountryGraph", /* Display ctry graph (0=no) 54 */
1578 "TopKSites", /* Top sites (by KBytes) 55 */
1579 "TopKURLs", /* Top URLs (by KBytes) 56 */
1580 "TopEntry", /* Top Entry Pages 57 */
1581 "TopExit", /* Top Exit Pages 58 */
1582 "TopSearch", /* Top Search Strings 59 */
1583 "LogType", /* Log Type (clf/ftp/squid) 60 */
1584 "SearchEngine", /* SearchEngine strings 61 */
1585 "GroupDomains", /* Group domains (n=level) 62 */
1586 "HideAllSites", /* Hide ind. sites (0=no) 63 */
1587 "AllSites", /* List all sites? 64 */
1588 "AllURLs", /* List all URLs? 65 */
1589 "AllReferrers", /* List all Referrers? 66 */
1590 "AllAgents", /* List all User Agents? 67 */
1591 "AllSearchStr", /* List all Search Strings? 68 */
1592 "AllUsers", /* List all Users? 69 */
1593 "TopUsers", /* Top Usernames to show 70 */
1594 "HideUser", /* Usernames to hide 71 */
1595 "IgnoreUser", /* Usernames to ignore 72 */
1596 "IncludeUser", /* Usernames to include 73 */
1597 "GroupUser", /* Usernames to group 74 */
1598 "DumpPath", /* Path for dump files 75 */
1599 "DumpExtension", /* Dump filename extension 76 */
1600 "DumpHeader", /* Dump header as first rec? 77 */
1601 "DumpSites", /* Dump sites tab file 78 */
1602 "DumpURLs", /* Dump urls tab file 79 */
1603 "DumpReferrers", /* Dump referrers tab file 80 */
1604 "DumpAgents", /* Dump user agents tab file 81 */
1605 "DumpUsers", /* Dump usernames tab file 82 */
1606 "DumpSearchStr", /* Dump search str tab file 83 */
1607 "DNSCache", /* DNS Cache file name 84 */
1608 "DNSChildren", /* DNS Children (0=no DNS) 85 */
1609 "DailyGraph", /* Daily Graph (0=no) 86 */
1610 "DailyStats", /* Daily Stats (0=no) 87 */
1611 "LinkReferrer", /* Link referrer (0=no) 88 */
1612 "PagePrefix", /* PagePrefix - treat as page 89 */
1613 "ColorHit", /* Hit Color (def=00805c) 90 */
1614 "ColorFile", /* File Color (def=0040ff) 91 */
1615 "ColorSite", /* Site Color (def=ff8000) 92 */
1616 "ColorKbyte", /* Kbyte Color (def=ff0000) 93 */
1617 "ColorPage", /* Page Color (def=00e0ff) 94 */
1618 "ColorVisit", /* Visit Color (def=ffff00) 95 */
1619 "ColorMisc", /* Misc Color (def=00e0ff) 96 */
1620 "PieColor1", /* Pie Color 1 (def=800080) 97 */
1621 "PieColor2", /* Pie Color 2 (def=80ffc0) 98 */
1622 "PieColor3", /* Pie Color 3 (def=ff00ff) 99 */
1623 "PieColor4", /* Pie Color 4 (def=ffc080) 100 */
1624 "CacheIPs", /* Cache IPs in DNS DB (0=no) 101 */
1625 "CacheTTL", /* DNS Cache entry TTL (days) 102 */
1626 "GeoDB", /* GeoDB lookups (0=no) 103 */
1627 "GeoDBDatabase", /* GeoDB database filename 104 */
1628 "StripCGI", /* Strip CGI in URLS (0=no) 105 */
1629 "TrimSquidURL", /* Trim squid URLs (0=none) 106 */
1630 "OmitPage", /* URLs not counted as pages 107 */
1631 "HTAccess", /* Write .httaccess files? 108 */
1632 "IgnoreState", /* Ignore state file (0=no) 109 */
1633 "DefaultIndex", /* Default index.* (1=yes) 110 */
1634 "GeoIP", /* Use GeoIP? (1=yes) 111 */
1635 "GeoIPDatabase", /* Database to use for GeoIP 112 */
1636 "NormalizeURL", /* Normalize CLF URLs (1=yes) 113 */
1637 "IndexMonths", /* # months for main page 114 */
1638 "GraphMonths", /* # months for yearly graph 115 */
1639 "YearHeaders", /* use year headers? (1=yes) 116 */
1640 "YearTotals", /* show year subtotals (0=no) 117 */
1641 "CountryFlags", /* show country flags? (0-no) 118 */
1642 "FlagDir", /* directory w/flag images 119 */
1643 "SearchCaseI" /* srch str case insensitive 120 */
1644 };
1645
1646 FILE *fp;
1647
1648 char buffer[BUFSIZE];
1649 char keyword[MAXKWORD];
1650 char value[MAXKVAL];
1651 char *cp1, *cp2;
1652 int i,key,count;
1653 int num_kwords=sizeof(kwords)/sizeof(char *);
1654
1655 if ( (fp=fopen(fname,"r")) == NULL)
1656 {
1657 if (verbose)
1658 fprintf(stderr,"%s %s\n",msg_bad_conf,fname);
1659 return;
1660 }
1661
1662 while ( (fgets(buffer,BUFSIZE,fp)) != NULL)
1663 {
1664 /* skip comments and blank lines */
1665 if ( (buffer[0]=='#') || isspace((unsigned char)buffer[0]) ) continue;
1666
1667 /* Get keyword */
1668 cp1=buffer;cp2=keyword;count=MAXKWORD-1;
1669 while ( (isalnum((unsigned char)*cp1)) && count )
1670 { *cp2++ = *cp1++; count--; }
1671 *cp2='\0';
1672
1673 /* Get value */
1674 cp2=value; count=MAXKVAL-1;
1675 while ((*cp1!='\n')&&(*cp1!='\0')&&(isspace((unsigned char)*cp1))) cp1++;
1676 while ((*cp1!='\n')&&(*cp1!='\0')&&count ) { *cp2++ = *cp1++; count--; }
1677 *cp2--='\0';
1678 while ((isspace((unsigned char)*cp2)) && (cp2 != value) ) *cp2--='\0';
1679
1680 /* check if blank keyword/value */
1681 if ( (keyword[0]=='\0') || (value[0]=='\0') ) continue;
1682
1683 key=0;
1684 for (i=0;i<num_kwords;i++)
1685 if (!ouricmp(keyword,kwords[i])) { key=i; break; }
1686
1687 if (key==0) { printf("%s '%s' (%s)\n", /* Invalid keyword */
1688 msg_bad_key,keyword,fname);
1689 continue;
1690 }
1691
1692 switch (key)
1693 {
1694 case 1: out_dir=save_opt(value); break; /* OutputDir */
1695 case 2: log_fname=save_opt(value); break; /* LogFile */
1696 case 3: msg_title=save_opt(value); break; /* ReportTitle */
1697 case 4: hname=save_opt(value); break; /* HostName */
1698 case 5: ignore_hist=
1699 (tolower(value[0])=='y')?1:0; break; /* IgnoreHist */
1700 case 6: verbose=
1701 (tolower(value[0])=='y')?1:2; break; /* Quiet */
1702 case 7: time_me=
1703 (tolower(value[0])=='n')?0:1; break; /* TimeMe */
1704 case 8: debug_mode=
1705 (tolower(value[0])=='y')?1:0; break; /* Debug */
1706 case 9: hourly_graph=
1707 (tolower(value[0])=='n')?0:1; break; /* HourlyGraph */
1708 case 10: hourly_stats=
1709 (tolower(value[0])=='n')?0:1; break; /* HourlyStats */
1710 case 11: ntop_sites = atoi(value); break; /* TopSites */
1711 case 12: ntop_urls = atoi(value); break; /* TopURLs */
1712 case 13: ntop_refs = atoi(value); break; /* TopRefs */
1713 case 14: ntop_agents = atoi(value); break; /* TopAgents */
1714 case 15: ntop_ctrys = atoi(value); break; /* TopCountries */
1715 case 16: add_nlist(value,&hidden_sites); break; /* HideSite */
1716 case 17: add_nlist(value,&hidden_urls); break; /* HideURL */
1717 case 18: add_nlist(value,&hidden_refs); break; /* HideReferrer */
1718 case 19: add_nlist(value,&hidden_agents); break; /* HideAgent */
1719 case 20: add_nlist(value,&index_alias); break; /* IndexAlias */
1720 case 21: add_nlist(value,&html_head); break; /* HTMLHead */
1721 case 22: add_nlist(value,&html_post); break; /* HTMLPost */
1722 case 23: add_nlist(value,&html_tail); break; /* HTMLTail */
1723 case 24: mangle_agent=atoi(value); break; /* MangleAgents */
1724 case 25: add_nlist(value,&ignored_sites); break; /* IgnoreSite */
1725 case 26: add_nlist(value,&ignored_urls); break; /* IgnoreURL */
1726 case 27: add_nlist(value,&ignored_refs); break; /* IgnoreReferrer */
1727 case 28: add_nlist(value,&ignored_agents); break; /* IgnoreAgent */
1728 case 29: if (tolower(value[0])=='y')
1729 verbose=0; break; /* ReallyQuiet */
1730 case 30: local_time=
1731 (tolower(value[0])=='y')?0:1; break; /* GMTTime */
1732 case 31: add_glist(value,&group_urls); break; /* GroupURL */
1733 case 32: add_glist(value,&group_sites); break; /* GroupSite */
1734 case 33: add_glist(value,&group_refs); break; /* GroupReferrer */
1735 case 34: add_glist(value,&group_agents); break; /* GroupAgent */
1736 case 35: shade_groups=
1737 (tolower(value[0])=='n')?0:1; break; /* GroupShading */
1738 case 36: hlite_groups=
1739 (tolower(value[0])=='n')?0:1; break; /* GroupHighlight */
1740 case 37: incremental=
1741 (tolower(value[0])=='y')?1:0; break; /* Incremental */
1742 case 38: state_fname=save_opt(value); break; /* State FName */
1743 case 39: hist_fname=save_opt(value); break; /* History FName */
1744 case 40: html_ext=save_opt(value); break; /* HTML extension */
1745 case 41: add_nlist(value,&html_pre); break; /* HTML Pre code */
1746 case 42: add_nlist(value,&html_body); break; /* HTML Body code */
1747 case 43: add_nlist(value,&html_end); break; /* HTML End code */
1748 case 44: use_https=
1749 (tolower(value[0])=='y')?1:0; break; /* Use https:// */
1750 case 45: add_nlist(value,&include_sites); break; /* IncludeSite */
1751 case 46: add_nlist(value,&include_urls); break; /* IncludeURL */
1752 case 47: add_nlist(value,&include_refs); break; /* IncludeReferrer*/
1753 case 48: add_nlist(value,&include_agents); break; /* IncludeAgent */
1754 case 49: add_nlist(value,&page_type); break; /* PageType */
1755 case 50: visit_timeout=atoi(value); break; /* VisitTimeout */
1756 case 51: graph_legend=
1757 (tolower(value[0])=='n')?0:1; break; /* GraphLegend */
1758 case 52: graph_lines = atoi(value); break; /* GraphLines */
1759 case 53: fold_seq_err=
1760 (tolower(value[0])=='y')?1:0; break; /* FoldSeqErr */
1761 case 54: ctry_graph=
1762 (tolower(value[0])=='n')?0:1; break; /* CountryGraph */
1763 case 55: ntop_sitesK = atoi(value); break; /* TopKSites (KB) */
1764 case 56: ntop_urlsK = atoi(value); break; /* TopKUrls (KB) */
1765 case 57: ntop_entry = atoi(value); break; /* Top Entry pgs */
1766 case 58: ntop_exit = atoi(value); break; /* Top Exit pages */
1767 case 59: ntop_search = atoi(value); break; /* Top Search pgs */
1768 case 60: log_type=(tolower(value[0])=='f')?
1769 LOG_FTP:((tolower(value[0])=='s')?
1770 LOG_SQUID:((tolower(value[0])=='w')?
1771 LOG_W3C:LOG_CLF)); break; /* LogType */
1772 case 61: add_glist(value,&search_list); break; /* SearchEngine */
1773 case 62: group_domains=atoi(value); break; /* GroupDomains */
1774 case 63: hide_sites=
1775 (tolower(value[0])=='y')?1:0; break; /* HideAllSites */
1776 case 64: all_sites=
1777 (tolower(value[0])=='y')?1:0; break; /* All Sites? */
1778 case 65: all_urls=
1779 (tolower(value[0])=='y')?1:0; break; /* All URLs? */
1780 case 66: all_refs=
1781 (tolower(value[0])=='y')?1:0; break; /* All Refs */
1782 case 67: all_agents=
1783 (tolower(value[0])=='y')?1:0; break; /* All Agents? */
1784 case 68: all_search=
1785 (tolower(value[0])=='y')?1:0; break; /* All Srch str */
1786 case 69: all_users=
1787 (tolower(value[0])=='y')?1:0; break; /* All Users? */
1788 case 70: ntop_users=atoi(value); break; /* TopUsers */
1789 case 71: add_nlist(value,&hidden_users); break; /* HideUser */
1790 case 72: add_nlist(value,&ignored_users); break; /* IgnoreUser */
1791 case 73: add_nlist(value,&include_users); break; /* IncludeUser */
1792 case 74: add_glist(value,&group_users); break; /* GroupUser */
1793 case 75: dump_path=save_opt(value); break; /* DumpPath */
1794 case 76: dump_ext=save_opt(value); break; /* Dumpfile ext */
1795 case 77: dump_header=
1796 (tolower(value[0])=='y')?1:0; break; /* DumpHeader? */
1797 case 78: dump_sites=
1798 (tolower(value[0])=='y')?1:0; break; /* DumpSites? */
1799 case 79: dump_urls=
1800 (tolower(value[0])=='y')?1:0; break; /* DumpURLs? */
1801 case 80: dump_refs=
1802 (tolower(value[0])=='y')?1:0; break; /* DumpReferrers? */
1803 case 81: dump_agents=
1804 (tolower(value[0])=='y')?1:0; break; /* DumpAgents? */
1805 case 82: dump_users=
1806 (tolower(value[0])=='y')?1:0; break; /* DumpUsers? */
1807 case 83: dump_search=
1808 (tolower(value[0])=='y')?1:0; break; /* DumpSrchStrs? */
1809 #ifdef USE_DNS
1810 case 84: dns_cache=save_opt(value); break; /* DNSCache fname */
1811 case 85: dns_children=atoi(value); break; /* DNSChildren */
1812 #else
1813 case 84: /* Disable DNSCache and DNSChildren if DNS is not enabled */
1814 case 85: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1815 #endif /* USE_DNS */
1816 case 86: daily_graph=
1817 (tolower(value[0])=='n')?0:1; break; /* HourlyGraph */
1818 case 87: daily_stats=
1819 (tolower(value[0])=='n')?0:1; break; /* HourlyStats */
1820 case 88: link_referrer=
1821 (tolower(value[0])=='y')?1:0; break; /* LinkReferrer */
1822 case 89: add_nlist(value,&page_prefix); break; /* PagePrefix */
1823 case 90: strncpy(hit_color+1, value, 6); break; /* ColorHit */
1824 case 91: strncpy(file_color+1, value, 6); break; /* ColorFile */
1825 case 92: strncpy(site_color+1, value, 6); break; /* ColorSite */
1826 case 93: strncpy(kbyte_color+1,value, 6); break; /* ColorKbyte */
1827 case 94: strncpy(page_color+1, value, 6); break; /* ColorPage */
1828 case 95: strncpy(visit_color+1,value, 6); break; /* ColorVisit */
1829 case 96: strncpy(misc_color+1, value, 6); break; /* ColorMisc */
1830 case 97: strncpy(pie_color1+1, value, 6); break; /* PieColor1 */
1831 case 98: strncpy(pie_color2+1, value, 6); break; /* PieColor2 */
1832 case 99: strncpy(pie_color3+1, value, 6); break; /* PieColor3 */
1833 case 100:strncpy(pie_color4+1, value, 6); break; /* PieColor4 */
1834 #ifdef USE_DNS
1835 case 101: cache_ips=
1836 (tolower(value[0])=='y')?1:0; break; /* CacheIPs */
1837 case 102: cache_ttl=atoi(value); break; /* CacheTTL days */
1838 case 103: geodb=
1839 (tolower(value[0])=='y')?1:0; break; /* GeoDB */
1840 case 104: geodb_fname=save_opt(value); break; /* GeoDBDatabase */
1841 #else
1842 case 101: /* Disable CacheIPs/CacheTTL/GeoDB/GeoDBDatabase if none */
1843 case 102:
1844 case 103:
1845 case 104: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1846 #endif /* USE_DNS */
1847 case 105: stripcgi=
1848 (tolower(value[0])=='n')?0:1; break; /* StripCGI */
1849 case 106: trimsquid=atoi(value); break; /* TrimSquidURL */
1850 case 107: add_nlist(value,&omit_page); break; /* OmitPage */
1851 case 108: htaccess=
1852 (tolower(value[0])=='y')?1:0; break; /* HTAccess */
1853 case 109: ignore_state=
1854 (tolower(value[0])=='y')?1:0; break; /* IgnoreState */
1855 case 110: default_index=
1856 (tolower(value[0])=='n')?0:1; break; /* DefaultIndex */
1857 #ifdef USE_GEOIP
1858 case 111: geoip=
1859 (tolower(value[0])=='y')?1:0; break; /* GeoIP */
1860 case 112: geoip_db=save_opt(value); break; /* GeoIPDatabase */
1861 #else
1862 case 111: /* Disable GeoIP and GeoIPDatabase if not enabled */
1863 case 112: printf("%s '%s' (%s)\n",msg_bad_key,keyword,fname); break;
1864 #endif
1865 case 113: normalize=
1866 (tolower(value[0])=='n')?0:1; break; /* NormalizeURL */
1867 case 114: index_mths=atoi(value); break; /* IndexMonths */
1868 case 115: graph_mths=atoi(value); break; /* GraphMonths */
1869 case 116: year_hdrs=
1870 (tolower(value[0])=='n')?0:1; break; /* YearHeaders */
1871 case 117: year_totals=
1872 (tolower(value[0])=='n')?0:1; break; /* YearTotals */
1873 case 118: use_flags=
1874 (tolower(value[0])=='y')?1:0; break; /* CountryFlags */
1875 case 119: use_flags=1; flag_dir=save_opt(value); break; /* FlagDir */
1876 case 120: searchcasei=
1877 (tolower(value[0])=='n')?0:1; break; /* SearchCaseI */
1878 }
1879 }
1880 fclose(fp);
1881 }
1882
1883 /*********************************************/
1884 /* SAVE_OPT - save option from config file */
1885 /*********************************************/
1886
save_opt(char * str)1887 static char *save_opt(char *str)
1888 {
1889 char *cp1;
1890
1891 if ( (cp1=malloc(strlen(str)+1))==NULL) return NULL;
1892
1893 strcpy(cp1,str);
1894 return cp1;
1895 }
1896
1897 /*********************************************/
1898 /* CLEAR_MONTH - initalize monthly stuff */
1899 /*********************************************/
1900
clear_month()1901 void clear_month()
1902 {
1903 int i;
1904
1905 init_counters(); /* reset monthly counters */
1906 del_htabs(); /* clear hash tables */
1907 if (ntop_ctrys!=0 ) for (i=0;i<ntop_ctrys;i++) top_ctrys[i]=NULL;
1908 }
1909
1910 /*********************************************/
1911 /* INIT_COUNTERS - prep counters for use */
1912 /*********************************************/
1913
init_counters()1914 void init_counters()
1915 {
1916 int i;
1917 for (i=0;i<TOTAL_RC;i++) response[i].count = 0;
1918 for (i=0;i<31;i++) /* monthly totals */
1919 {
1920 tm_xfer[i]=0.0;
1921 tm_hit[i]=tm_file[i]=tm_site[i]=tm_page[i]=tm_visit[i]=0;
1922 }
1923 for (i=0;i<24;i++) /* hourly totals */
1924 {
1925 th_hit[i]=th_file[i]=th_page[i]=0;
1926 th_xfer[i]=0.0;
1927 }
1928 for (i=0;ctry[i].desc;i++) /* country totals */
1929 {
1930 ctry[i].count=0;
1931 ctry[i].files=0;
1932 ctry[i].xfer=0;
1933 }
1934 t_hit=t_file=t_site=t_url=t_ref=t_agent=t_page=t_visit=t_user=0;
1935 t_xfer=0.0;
1936 mh_hit = dt_site = 0;
1937 f_day=l_day=1;
1938 }
1939
1940 /*********************************************/
1941 /* PRINT_OPTS - print command line options */
1942 /*********************************************/
1943
print_opts(char * pname)1944 void print_opts(char *pname)
1945 {
1946 int i;
1947
1948 printf("%s: %s %s\n",h_usage1,pname,h_usage2);
1949 for (i=0;h_msg[i];i++) printf("%s\n",h_msg[i]);
1950 exit(1);
1951 }
1952
1953 /*********************************************/
1954 /* PRINT_VERSION */
1955 /*********************************************/
1956
print_version()1957 void print_version()
1958 {
1959 char buf[128]="";
1960 uname(&system_info);
1961
1962 printf("Webalizer V%s-%s (%s %s %s) %s\n%s\n",
1963 version,editlvl,
1964 system_info.sysname,system_info.release,system_info.machine,
1965 language,copyright);
1966
1967 #ifdef USE_DNS
1968 strncpy(&buf[strlen(buf)],"DNS/GeoDB ",11);
1969 #endif
1970 #ifdef USE_BZIP
1971 strncpy(&buf[strlen(buf)],"BZip2 ",7);
1972 #endif
1973 #ifdef USE_GEOIP
1974 strncpy(&buf[strlen(buf)],"GeoIP ",7);
1975 #endif
1976
1977 if (debug_mode)
1978 {
1979 printf("Mod date: %s Options: ",moddate);
1980 if (buf[0]!=0) printf("%s",buf);
1981 else printf("none");
1982 printf("\n");
1983 #if USE_DNS
1984 printf("Default GeoDB dir : %s\n",GEODB_LOC);
1985 #endif
1986 printf("Default config dir: %s\n",ETCDIR);
1987 printf("\n");
1988 }
1989 else printf("\n");
1990 exit(1);
1991 }
1992
1993 /*********************************************/
1994 /* CUR_TIME - return date/time as a string */
1995 /*********************************************/
1996
cur_time()1997 char *cur_time()
1998 {
1999 time_t now;
2000 static char timestamp[48];
2001
2002 /* get system time */
2003 now = time(NULL);
2004 /* convert to timestamp string */
2005 if (local_time)
2006 strftime(timestamp,sizeof(timestamp),"%d-%b-%Y %H:%M %Z",
2007 localtime(&now));
2008 else
2009 strftime(timestamp,sizeof(timestamp),"%d-%b-%Y %H:%M GMT",
2010 gmtime(&now));
2011
2012 return timestamp;
2013 }
2014
2015 /*********************************************/
2016 /* ISPAGE - determine if an HTML page or not */
2017 /*********************************************/
2018
ispage(char * str)2019 int ispage(char *str)
2020 {
2021 NLISTPTR t;
2022 char *cp1, *cp2;
2023
2024 if (isinlist(omit_page,str)!=NULL) return 0;
2025
2026 cp1=cp2=str;
2027 while (*cp1!='\0') { if (*cp1=='.') cp2=cp1; cp1++; }
2028 if ((cp2++==str)||(*(--cp1)=='/')) return 1;
2029 t=page_prefix;
2030 while(t!=NULL)
2031 {
2032 /* Check if a PagePrefix matches */
2033 if(strncmp(str,t->string,strlen(t->string))==0) return 1;
2034 t=t->next;
2035 }
2036 return (isinlist(page_type,cp2)!=NULL);
2037 }
2038
2039 /*********************************************/
2040 /* ISURLCHAR - checks for valid URL chars */
2041 /*********************************************/
2042
isurlchar(unsigned char ch,int flag)2043 int isurlchar(unsigned char ch, int flag)
2044 {
2045 if (isalnum(ch)) return 1; /* allow letters, numbers... */
2046 if (ch > 127) return 1; /* allow extended chars... */
2047 if (flag) /* and filter some others */
2048 return (strchr(":/\\.,' *!-+_@~()[]!",ch)!=NULL); /* strip cgi vars */
2049 else
2050 return (strchr(":/\\.,' *!-+_@~()[]!;?&=",ch)!=NULL); /* keep cgi vars */
2051 }
2052
2053 /*********************************************/
2054 /* CTRY_IDX - create unique # from TLD */
2055 /*********************************************/
2056
ctry_idx(char * str)2057 u_int64_t ctry_idx(char *str)
2058 {
2059 int i=strlen(str),j=0;
2060 u_int64_t idx=0;
2061 char *cp=str+i;
2062
2063 for (;i>0;i--) { idx+=((*--cp-'a'+1)<<j); j+=(j==0)?7:5; }
2064 return idx;
2065 }
2066
2067 /*********************************************/
2068 /* UN_IDX - get TLD from index # */
2069 /*********************************************/
2070
un_idx(u_int64_t idx)2071 char *un_idx(u_int64_t idx)
2072 {
2073 int i,j;
2074 char *cp;
2075 static char buf[8];
2076
2077 memset(buf, 0, sizeof(buf));
2078 if (idx<=0) return buf;
2079 if ((j=(idx&0x7f))>32) /* only for a1, a2 and o1 */
2080 { buf[0]=(idx>>7)+'a'; buf[1]=j-32; return buf; }
2081
2082 for (i=5;i>=0;i--)
2083 buf[i]=(i==5)?(idx&0x7f)+'a'-1:(j=(idx>>(((5-i)*5)+2))&0x1f)?j+'a'-1:' ';
2084 cp=buf; while (*cp==' ') { for (i=0;i<6;i++) buf[i]=buf[i+1]; } return buf;
2085 }
2086
2087 /*********************************************/
2088 /* FROM_HEX - convert hex char to decimal */
2089 /*********************************************/
2090
from_hex(char c)2091 char from_hex(char c) /* convert hex to dec */
2092 {
2093 c = (c>='0'&&c<='9')?c-'0': /* 0-9? */
2094 (c>='A'&&c<='F')?c-'A'+10: /* A-F? */
2095 c - 'a' + 10; /* lowercase... */
2096 return (c<0||c>15)?0:c; /* return 0 if bad... */
2097 }
2098
2099 /*********************************************/
2100 /* UNESCAPE - convert escape seqs to chars */
2101 /*********************************************/
2102
unescape(char * str)2103 char *unescape(char *str)
2104 {
2105 unsigned char *cp1=(unsigned char *)str; /* force unsigned so we */
2106 unsigned char *cp2=cp1; /* can do > 127 */
2107
2108 if (!str) return NULL; /* make sure strings valid */
2109
2110 while (*cp1)
2111 {
2112 if (*cp1=='%') /* Found an escape? */
2113 {
2114 cp1++;
2115 if (isxdigit(*cp1)) /* ensure a hex digit */
2116 {
2117 if (*cp1) *cp2=from_hex(*cp1++)*16; /* convert hex to an ASCII */
2118 if (*cp1) *cp2+=from_hex(*cp1); /* (hopefully) character */
2119 if ((*cp2<32)||(*cp2==127)) *cp2='_'; /* make '_' if its bad */
2120 if (*cp1) { cp2++; cp1++; }
2121 }
2122 else *cp2++='%';
2123 }
2124 else *cp2++ = *cp1++; /* if not, just continue */
2125 }
2126 *cp2=*cp1; /* don't forget terminator */
2127 return str; /* return the string */
2128 }
2129
2130 /*********************************************/
2131 /* OURICMP - Case insensitive string compare */
2132 /*********************************************/
2133
ouricmp(char * str1,char * str2)2134 int ouricmp(char *str1, char *str2)
2135 {
2136 while((*str1!=0) &&
2137 (tolower((unsigned char)*str1)==tolower((unsigned char)*str2)))
2138 { str1++;str2++; }
2139 if (*str1==0) return 0; else return 1;
2140 }
2141
2142 /*********************************************/
2143 /* SRCH_STRING - get search strings from ref */
2144 /*********************************************/
2145
srch_string(char * ptr)2146 void srch_string(char *ptr)
2147 {
2148 /* ptr should point to unescaped query string */
2149 char tmpbuf[BUFSIZE];
2150 char srch[80]="";
2151 unsigned char *cp1, *cp2, *cps;
2152 int sp_flg=0;
2153
2154 /* Check if search engine referrer or return */
2155 if ( (cps=(unsigned char *)isinglist(search_list,log_rec.refer))==NULL)
2156 return;
2157
2158 /* Try to find query variable */
2159 srch[0]='?'; srch[sizeof(srch)-1] = '\0';
2160 strncpy(&srch[1],(char *)cps,sizeof(srch)-2); /* First, try "?..." */
2161 if ((cp1=(unsigned char *)strstr(ptr,srch))==NULL)
2162 {
2163 srch[0]='&'; /* Next, try "&..." */
2164 if ((cp1=(unsigned char *)strstr(ptr,srch))==NULL) return;
2165 }
2166 cp2=(unsigned char *)tmpbuf;
2167 while (*cp1!='=' && *cp1!=0) cp1++; if (*cp1!=0) cp1++;
2168 while (*cp1!='&' && *cp1!=0)
2169 {
2170 if (*cp1=='"' || *cp1==',' || *cp1=='?')
2171 { cp1++; continue; } /* skip bad ones.. */
2172 else
2173 {
2174 if (*cp1=='+') *cp1=' '; /* change + to space */
2175 if (sp_flg && *cp1==' ') { cp1++; continue; } /* compress spaces */
2176 if (*cp1==' ') sp_flg=1; else sp_flg=0; /* (flag spaces here) */
2177 if (searchcasei)
2178 *cp2++=tolower(*cp1++); /* normal character */
2179 else *cp2++=*cp1++;
2180 }
2181 }
2182 *cp2=0; cp2=(unsigned char *)tmpbuf;
2183 if (tmpbuf[0]=='?') tmpbuf[0]=' '; /* format fix ? */
2184 while( *cp2!=0 && isspace((unsigned char)*cp2) ) cp2++; /* skip sps. */
2185 if (*cp2==0) return;
2186
2187 /* any trailing spaces? */
2188 cp1=cp2+strlen((char *)cp2)-1;
2189 while (cp1!=cp2) if (isspace((unsigned char)*cp1)) *cp1--='\0'; else break;
2190
2191 /* strip invalid chars */
2192 cp1=cp2;
2193 while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; }
2194
2195 if (put_snode((char *)cp2,(u_int64_t)1,sr_htab))
2196 {
2197 if (verbose)
2198 /* Error adding search string node, skipping .... */
2199 fprintf(stderr,"%s %s\n", msg_nomem_sc, tmpbuf);
2200 }
2201 return;
2202 }
2203
2204 /*********************************************/
2205 /* GET_DOMAIN - Get domain portion of host */
2206 /*********************************************/
2207
get_domain(char * str)2208 char *get_domain(char *str)
2209 {
2210 char *cp;
2211 int i=group_domains+1;
2212
2213 if (isipaddr(str)) return NULL;
2214 cp = str+strlen(str)-1;
2215
2216 while (cp!=str)
2217 {
2218 if (*cp=='.')
2219 if (!(--i)) return ++cp;
2220 cp--;
2221 }
2222 return cp;
2223 }
2224
2225 /*********************************************/
2226 /* AGENT_MANGLE - Re-format user agent */
2227 /*********************************************/
2228
agent_mangle(char * str)2229 void agent_mangle(char *str)
2230 {
2231 char *cp1, *cp2, *cp3;
2232
2233 str=cp2=log_rec.agent;
2234 cp1=strstr(str,"ompatible"); /* check known fakers */
2235 if (cp1!=NULL)
2236 {
2237 while (*cp1!=';'&&*cp1!='\0') cp1++;
2238 /* kludge for Mozilla/3.01 (compatible;) */
2239 if (*cp1++==';' && strcmp(cp1,")\"")) /* success! */
2240 {
2241 /* Opera can hide as MSIE */
2242 cp3=strstr(str,"Opera");
2243 if (cp3!=NULL)
2244 {
2245 while (*cp3!='.'&&*cp3!='\0')
2246 {
2247 if(*cp3=='/') *cp2++=' ';
2248 else *cp2++=*cp3;
2249 cp3++;
2250 }
2251 cp1=cp3;
2252 }
2253 else
2254 {
2255 while (*cp1 == ' ') cp1++; /* eat spaces */
2256 while (*cp1!='.'&&*cp1!='\0'&&*cp1!=';') *cp2++=*cp1++;
2257 }
2258 if (mangle_agent<5)
2259 {
2260 while (*cp1!='.'&&*cp1!=';'&&*cp1!='\0') *cp2++=*cp1++;
2261 if (*cp1!=';'&&*cp1!='\0') { *cp2++=*cp1++; *cp2++=*cp1++; }
2262 }
2263 if (mangle_agent<4)
2264 if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2265 if (mangle_agent<3)
2266 while (*cp1!=';'&&*cp1!='\0'&&*cp1!='('&&*cp1!=' ') *cp2++=*cp1++;
2267 if (mangle_agent<2)
2268 {
2269 /* Level 1 - try to get OS */
2270 cp1=strstr(cp1,")");
2271 if (cp1!=NULL)
2272 {
2273 *cp2++=' ';
2274 *cp2++='(';
2275 while (*cp1!=';'&&*cp1!='('&&cp1!=str) cp1--;
2276 if (cp1!=str&&*cp1!='\0') cp1++;
2277 while (*cp1==' '&&*cp1!='\0') cp1++;
2278 while (*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2279 *cp2++=')';
2280 }
2281 }
2282 *cp2='\0';
2283 }
2284 else
2285 {
2286 /* nothing after "compatible", should we mangle? */
2287 /* not for now */
2288 }
2289 }
2290 else
2291 {
2292 cp1=strstr(str,"Opera"); /* Opera flavor */
2293 if (cp1!=NULL)
2294 {
2295 while (*cp1!='/'&&*cp1!=' '&&*cp1!='\0') *cp2++=*cp1++;
2296 while (*cp1!='.'&&*cp1!='\0')
2297 {
2298 if(*cp1=='/') *cp2++=' ';
2299 else *cp2++=*cp1;
2300 cp1++;
2301 }
2302 if (mangle_agent<5)
2303 {
2304 while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2305 *cp2++=*cp1++;
2306 *cp2++=*cp1++;
2307 }
2308 if (mangle_agent<4)
2309 if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2310 if (mangle_agent<3)
2311 while (*cp1!=' '&&*cp1!='\0'&&*cp1!='(') *cp2++=*cp1++;
2312 if (mangle_agent<2)
2313 {
2314 cp1=strstr(cp1,"(");
2315 if (cp1!=NULL)
2316 {
2317 cp1++;
2318 *cp2++=' ';
2319 *cp2++='(';
2320 while (*cp1!=';'&&*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2321 *cp2++=')';
2322 }
2323 }
2324 *cp2='\0';
2325 }
2326 else
2327 {
2328 cp1=strstr(str,"Mozilla"); /* Netscape flavor */
2329 if (cp1!=NULL)
2330 {
2331 while (*cp1!='/'&&*cp1!=' '&&*cp1!='\0') *cp2++=*cp1++;
2332 if (*cp1==' ') *cp1='/';
2333 while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2334 if (mangle_agent<5)
2335 {
2336 while (*cp1!='.'&&*cp1!='\0') *cp2++=*cp1++;
2337 *cp2++=*cp1++;
2338 *cp2++=*cp1++;
2339 }
2340 if (mangle_agent<4)
2341 if (*cp1>='0'&&*cp1<='9') *cp2++=*cp1++;
2342 if (mangle_agent<3)
2343 while (*cp1!=' '&&*cp1!='\0'&&*cp1!='(') *cp2++=*cp1++;
2344 if (mangle_agent<2)
2345 {
2346 /* Level 1 - Try to get OS */
2347 cp1=strstr(cp1,"(");
2348 if (cp1!=NULL)
2349 {
2350 cp1++;
2351 *cp2++=' ';
2352 *cp2++='(';
2353 while (*cp1!=';'&&*cp1!=')'&&*cp1!='\0') *cp2++=*cp1++;
2354 *cp2++=')';
2355 }
2356 }
2357 *cp2='\0';
2358 }
2359 }
2360 }
2361 }
2362
2363 /*********************************************/
2364 /* OUR_GZGETS - enhanced gzgets for log only */
2365 /*********************************************/
2366
our_gzgets(void * fp,char * buf,int size)2367 char *our_gzgets(void *fp, char *buf, int size)
2368 {
2369 char *out_cp=buf; /* point to output */
2370 while (1)
2371 {
2372 if (f_cp>(f_buf+f_end-1)) /* load? */
2373 {
2374 #ifdef USE_BZIP
2375 f_end=(gz_log==COMP_BZIP)?
2376 BZ2_bzread(fp, f_buf, GZ_BUFSIZE):
2377 gzread(fp, f_buf, GZ_BUFSIZE);
2378 #else
2379 f_end=gzread(fp, f_buf, GZ_BUFSIZE);
2380 #endif
2381 if (f_end<=0) return Z_NULL;
2382 f_cp=f_buf;
2383 }
2384
2385 if (--size) /* more? */
2386 {
2387 *out_cp++ = *f_cp;
2388 if (*f_cp++ == '\n') { *out_cp='\0'; return buf; }
2389 }
2390 else { *out_cp='\0'; return buf; }
2391 }
2392 }
2393
2394 #ifdef USE_BZIP
2395 /*********************************************/
2396 /* bz2_rewind - our 'rewind' for bz2 files */
2397 /*********************************************/
2398
bz2_rewind(void ** fp,char * fname,char * mode)2399 int bz2_rewind( void **fp, char *fname, char *mode )
2400 {
2401 BZ2_bzclose( *fp );
2402 *fp = BZ2_bzopen( fname, "rb");
2403 f_cp=f_buf+GZ_BUFSIZE; f_end=0; /* reset buffer counters */
2404 memset(f_buf, 0, sizeof(f_buf));
2405 if (*fp == Z_NULL) return -1;
2406 else return 0;
2407 }
2408 #endif /* USE_BZIP */
2409
2410 /*********************************************/
2411 /* ISIPADDR - Determine if str is IP address */
2412 /*********************************************/
2413
isipaddr(char * str)2414 int isipaddr(char *str)
2415 {
2416 int i=1,j=0;
2417 char *cp; /* generic ptr */
2418
2419 if (strchr(str,':')!=NULL)
2420 {
2421 /* Possible IPv6 Address */
2422 cp=str;
2423 while (strchr(":.abcdef0123456789",*cp)!=NULL && *cp!='\0')
2424 {
2425 if (*cp=='.') j++;
2426 if (*cp++==':') i++;
2427 }
2428
2429 if (*cp!='\0') return -1; /* bad hostname (has ':') */
2430 if (i>1 && j) return 2; /* IPv4/IPv6 */
2431 return 3; /* IPv6 */
2432 }
2433 else
2434 {
2435 /* Not an IPv6 address, check for IPv4 */
2436 cp=str;
2437 while (strchr(".0123456789",*cp)!=NULL && *cp!='\0')
2438 {
2439 if (*cp++=='.') i++;
2440 }
2441 if (*cp!='\0') return 0; /* hostname */
2442 if (i!=4) return -1; /* bad hostname */
2443 return 1; /* IPv4 */
2444 }
2445 }
2446
2447 /*****************************************************************/
2448 /* */
2449 /* JDATE - Julian date calculator */
2450 /* */
2451 /* Calculates the number of days since Jan 1, 0000. */
2452 /* */
2453 /* Originally written by Bradford L. Barrett (03/17/1988) */
2454 /* Returns an unsigned long value representing the number of */
2455 /* days since January 1, 0000. */
2456 /* */
2457 /* Note: Due to the changes made by Pope Gregory XIII in the */
2458 /* 16th Centyry (Feb 24, 1582), dates before 1583 will */
2459 /* not return a truely accurate number (will be at least */
2460 /* 10 days off). Somehow, I don't think this will */
2461 /* present much of a problem for most situations :) */
2462 /* */
2463 /* Usage: days = jdate(day, month, year) */
2464 /* */
2465 /* The number returned is adjusted by 5 to facilitate day of */
2466 /* week calculations. The mod of the returned value gives the */
2467 /* day of the week the date is. (ie: dow = days % 7 ) where */
2468 /* dow will return 0=Sunday, 1=Monday, 2=Tuesday, etc... */
2469 /* */
2470 /*****************************************************************/
2471
jdate(int day,int month,int year)2472 u_int64_t jdate( int day, int month, int year )
2473 {
2474 u_int64_t days; /* value returned */
2475 int mtable[] = {0,31,59,90,120,151,181,212,243,273,304,334};
2476
2477 /* First, calculate base number including leap and Centenial year stuff */
2478
2479 days=(((u_int64_t)year*365)+day+mtable[month-1]+
2480 ((year+4)/4) - ((year/100)-(year/400)));
2481
2482 /* now adjust for leap year before March 1st */
2483
2484 if ((year % 4 == 0) && !((year % 100 == 0) &&
2485 (year % 400 != 0)) && (month < 3))
2486 --days;
2487
2488 /* done, return with calculated value */
2489
2490 return(days+5);
2491 }
2492