1 /***             analog 6.0             http://www.analog.cx/             ***/
2 /*** This program is copyright (c) Stephen R. E. Turner 1995 - 2004 except as
3  *** stated otherwise.
4  ***
5  *** This program is free software. You can redistribute it and/or modify it
6  *** under the terms of version 2 of the GNU General Public License, which you
7  *** should have received with it.
8  ***
9  *** This program is distributed in the hope that it will be useful, but
10  *** without any warranty, expressed or implied.   ***/
11 
12 /*** process.c; process some data ***/
13 
14 #include "anlghea3.h"
15 
process_data(Logfile * logfilep,Hashtable ** hash,Arraydata ** arraydata,choice * count,choice * code2type,choice datacols[ITEM_NUMBER][OUTCOME_NUMBER][DATACOLS_NUMBER][2],choice data2cols[ITEM_NUMBER][DATA_NUMBER],unsigned int * no_cols,Include ** wanthead,Include * ispagehead,Alias ** aliashead,Include * argshead,Include * refargshead,Dateman * dman,Tree ** tree,Derv ** derv,choice * alltrees,choice * alldervs,choice * lowmem,logical case_insensitive,logical usercase_insensitive,unsigned char convfloor,logical multibyte,char * dirsuffix,unsigned int dirsufflength,unsigned int granularity)16 void process_data(Logfile *logfilep, Hashtable **hash,
17 		  Arraydata **arraydata, choice *count, choice *code2type,
18 		  choice datacols[ITEM_NUMBER][OUTCOME_NUMBER][DATACOLS_NUMBER][2],
19 		  choice data2cols[ITEM_NUMBER][DATA_NUMBER],
20 		  unsigned int *no_cols, Include **wanthead,
21 		  Include *ispagehead, Alias **aliashead, Include *argshead,
22 		  Include *refargshead, Dateman *dman, Tree **tree,
23 		  Derv **derv, choice *alltrees, choice *alldervs,
24 		  choice *lowmem, logical case_insensitive,
25 		  logical usercase_insensitive, unsigned char convfloor,
26 		  logical multibyte, char *dirsuffix,
27 		  unsigned int dirsufflength, unsigned int granularity) {
28   extern unsigned int year, month, date, hour, minute, code;
29   extern unsigned long unixtime, proctime;
30   extern char am;
31   extern double bytes;
32   extern Memman mm[], mmq, mms, *amemman;
33   extern choice *rep2type;
34   extern Hashentry *unwanted_entry, *blank_entry;
35   extern Hashindex *dummy_item;
36 
37   static Hashindex *gp[ITEM_NUMBER];
38   unsigned long data[DATA2_NUMBER];
39   Hashentry *item[ITEM_NUMBER];
40   logical wanttree[ITEM_NUMBER];
41   logical isitpage, last7;
42   choice ispage = UNSET;
43   choice wanted = TRUE, rc, outcome;
44   timecode_t timecode = FIRST_TIME;
45   char *name, *namestart, *nameend;
46   size_t len;
47   choice i, j, k;
48 
49   /*** check whether this line is wanted ***/
50 
51   if (count[INP_CODE] != 0) {
52     if (code == IGNORE_CODE) {
53       for (j = 0; j < ITEM_NUMBER; j++) {   /* reset strings */
54 	if (count[j] != 0)
55 	  mm[j].next_pos = mm[j].curr_pos;
56       }
57       mmq.next_pos = mmq.curr_pos;
58       mms.next_pos = mms.curr_pos;
59       logfilep->data[LOGDATA_UNKNOWN]++;
60       return;
61     }
62     else if (code2type[code] == UNWANTED)
63       wanted = FALSE;
64   }
65   if (wanted && count[INP_DATE] > 0) {
66     if (count[INP_UNIXTIME])
67       wanted = wantunixtime(&timecode, dman, unixtime, logfilep->tz);
68     else {
69       if (count[INP_AM]) {
70 	if (hour > 12) {
71 	  corrupt_line(logfilep, "Hour greater than 12", -1);
72 	  return;
73 	}
74 	else if (hour == 12)
75 	  hour = 0;
76 	if (am == 'p')
77 	  hour += 12;
78       }
79       wanted = wantdate(&timecode, dman, hour, minute, date, month, year,
80 			logfilep->tz);
81     }
82     if (wanted == ERR) { /* corrupt date */
83       corrupt_line(logfilep, "Corrupt date or time", -1);
84       return;
85     }
86   }  /* end count[INP_DATE] > 0 */
87   for (i = 0; i < ITEM_NUMBER; i++) {
88     wanttree[i] = FALSE;
89     if (!wanted) {
90       for (j = i; j < ITEM_NUMBER; j++) {  /* reset not-yet-hashed strings */
91 	if (count[j] != 0)                 /* NB i is now (unwanted i) + 1 */
92 	  mm[j].next_pos = mm[j].curr_pos;
93       }
94       mmq.next_pos = mmq.curr_pos;
95       mms.next_pos = mms.curr_pos;
96       logfilep->data[LOGDATA_UNWANTED]++;
97       return;
98     }
99     if (i == ITEM_HOST)
100       prealiasS(&(mm[ITEM_HOST]), &mms);
101     name = (char *)(mm[i].curr_pos);
102     if (count[i] == 0 || IS_EMPTY_STRING(name) ||
103 	(name[0] == '-' && name[1] == '\0')) {
104       item[i] = blank_entry; /* or unwanted_; but we get wanted right anyway */
105       wanted = (wanthead[i] == NULL || included("", FALSE, wanthead[i]));
106       /* wanthead[i] == NULL is tested again in included() but it often saves
107 	 a call to that function, because blankness is common. */
108     }
109     else {
110       if (i == ITEM_FILE || i == ITEM_REFERRER) {
111 	if ((j = prealias(&(mm[i]), &(mm[ITEM_VHOST]), item[ITEM_VHOST], &mmq,
112 			  (logical)((i == ITEM_FILE)?case_insensitive:FALSE),
113 			  (i == ITEM_FILE)?(logfilep->prefix):NULL,
114 			  logfilep->prefixlen, logfilep->pvpos,
115 			  (i == ITEM_FILE)?argshead:refargshead)) < 0) {
116 	  if (j == -1)
117 	    corrupt_line(logfilep,
118 			 "%v in file prefix but no VHOST in line", -1);
119 	  else
120 	    corrupt_line(logfilep, "Filename too long", -1);
121 	  return;
122 	}
123       }
124       if (lowmem[i] == 0) {
125 	if (gp[i] == NULL || !STREQ(name, gp[i]->name)) {
126 	  gp[i] = hashfind(&mm[i], &(hash[i]), no_cols[i], wanthead[i], UNSET,
127 			   ispagehead, aliashead[i], dirsuffix, dirsufflength,
128 			   usercase_insensitive, 0, FALSE, i, FALSE);
129 	}     /* if name the same as last time, don't need */
130 	else  /* to hashfind again, or save the name */
131 	  mm[i].next_pos = mm[i].curr_pos;
132 	item[i] = (Hashentry *)(gp[i]->other);
133 	wanted = (choice)(ENTRY_WANTED(item[i]));
134       }
135 
136       else if (lowmem[i] == 1) {
137 	if ((rc = do_alias(name, amemman, aliashead[i], dirsuffix,
138 			   dirsufflength, usercase_insensitive, 0, FALSE, i))
139 	    == FALSE) {
140 	  item[i] = hashfind(&mm[i], &(hash[i]), no_cols[i], wanthead[i],
141 			     UNSET, ispagehead, NULL, dirsuffix, dirsufflength,
142 			     usercase_insensitive, 0, FALSE, i, TRUE)->own;
143 	}
144 	else if (rc == TRUE) {
145 	  mm[i].next_pos = mm[i].curr_pos;  /* don't save string */
146 	  item[i] = hashfind(amemman, &(hash[i]), no_cols[i], wanthead[i],
147 			     UNSET, ispagehead, NULL, dirsuffix, dirsufflength,
148 			     usercase_insensitive, 0, FALSE, i, TRUE)->own;
149 	}
150 	else { /* rc == ERR */
151 	  mm[i].next_pos = mm[i].curr_pos;
152 	  if (included("", FALSE, wanthead[i]))
153 	    item[i] = blank_entry;
154 	  else
155 	    item[i] = unwanted_entry;
156 	}
157 	wanted = (choice)(ENTRY_WANTED(item[i]));
158       }
159 
160       else { /* lowmem[i] >= 2 */
161 	if ((rc = do_alias(name, amemman, aliashead[i], dirsuffix,
162 			   dirsufflength, usercase_insensitive, 0, FALSE, i))
163 	    == TRUE) {
164 	  mm[i].next_pos = mm[i].curr_pos;  /* don't save old string */
165 	  len = strlen((char *)(amemman->curr_pos));
166 	  memcpy(submalloc(&(mm[i]), len + 1), amemman->curr_pos, len + 1);
167 	  name = (char *)(mm[i].curr_pos); /* which might have changed */
168 	  amemman->next_pos = amemman->curr_pos;
169 	}
170 	if (rc == ERR) {
171 	  if (included("", FALSE, wanthead[i])) {
172 	    item[i] = blank_entry;
173 	    if (i == ITEM_FILE)
174 	      ispage = FALSE;
175 	  }
176 	  else
177 	    wanted = FALSE;
178 	  mm[i].next_pos = mm[i].curr_pos;
179 	}
180 	else {
181 	  isitpage = pageq(name, ispagehead, i);
182 	  if (i == ITEM_FILE)
183 	    ispage = (choice)isitpage;
184 	  if (included(name, isitpage, wanthead[i])) {
185 	    if (lowmem[i] == 2) {
186 	      item[i] = hashfind(&(mm[i]), &(hash[i]), no_cols[i], wanthead[i],
187 				 isitpage, ispagehead, NULL, dirsuffix,
188 				 dirsufflength, usercase_insensitive, 0, FALSE,
189 				 i, TRUE)->own;
190 	    }
191 	    else {
192 	      item[i] = blank_entry;
193 	      wanttree[i] = TRUE;
194 	      mm[i].next_pos = mm[i].curr_pos;
195 	    }
196 	  }
197 	  else {
198 	    wanted = FALSE;
199 	    mm[i].next_pos = mm[i].curr_pos;
200 	  }
201 	}
202       }  /* end lowmem[i] >= 2 */
203     }
204   }      /* end for i */
205   if (!wanted) {
206     logfilep->data[LOGDATA_UNWANTED]++;
207     return;
208   }
209 
210   /*** now add it to the hash tables ***/
211 
212   /* add to logfile from and to if wanted, whatever status code */
213   if (timecode != FIRST_TIME)
214     logfilep->from = MIN(logfilep->from, timecode);
215   logfilep->to = MAX(logfilep->to, timecode);
216   last7 = (timecode > dman->last7from && timecode <= dman->last7to);
217   if (ispage == UNSET)            /* NB blank_entry has ispage FALSE */
218     ispage = (choice)(item[ITEM_FILE]->ispage);
219   if (count[INP_BYTES] == 0)
220     bytes = 0;
221   if (count[INP_CODE] == 0) {
222     outcome = SUCCESS;
223     if (count[ITEM_FILE] == 2) {
224       logfilep->data[LOGDATA_SUCC]++;
225       logfilep->data[LOGDATA_SUCC7] += (unsigned long)last7;
226       logfilep->data[LOGDATA_PAGES] += (unsigned long)ispage;
227       logfilep->data[LOGDATA_PAGES7] +=
228 	(unsigned long)((logical)ispage && last7);
229     }
230     else {
231       logfilep->data[LOGDATA_UNKNOWN]++;
232       logfilep->data[LOGDATA_UNKNOWN7] += (unsigned long)last7;
233     }
234   }
235   else if (code <= 199) {
236     outcome = INFO;
237     logfilep->data[LOGDATA_INFO]++;
238     logfilep->data[LOGDATA_INFO7] += (unsigned long)last7;
239   }
240   else switch (outcome = code2type[code]) {
241   case SUCCESS:
242     logfilep->data[LOGDATA_SUCC]++;
243     logfilep->data[LOGDATA_SUCC7] += (unsigned long)last7;
244     logfilep->data[LOGDATA_PAGES] += (unsigned long)ispage;
245     logfilep->data[LOGDATA_PAGES7] +=
246       (unsigned long)((logical)ispage && last7);
247     break;
248   case FAILURE:
249     logfilep->data[LOGDATA_FAIL]++;
250     logfilep->data[LOGDATA_FAIL7] += (unsigned long)last7;
251     break;
252   case REDIRECT:
253     logfilep->data[LOGDATA_REDIR]++;
254     logfilep->data[LOGDATA_REDIR7] += (unsigned long)last7;
255     break;
256   case INFO:
257     logfilep->data[LOGDATA_INFO]++;
258     logfilep->data[LOGDATA_INFO7] += (unsigned long)last7;
259     break;
260   }
261 
262   /* NB any change in what to count when will require corresponding change to
263      end of strtoinfmt() and to fmt munching in correct() */
264   if (count[INP_CODE] == 2)
265     arrayscore(arraydata[REP_CODE - FIRST_ARRAYREP], code, 1,
266 	       (unsigned long)last7, 0, 0, 0., 0., timecode);
267   if (outcome != INFO) {
268     if (outcome == SUCCESS) {
269       if (count[INP_DATE] == 2)  /* only if file present: see strtoinfmt() */
270 	datehash(timecode, dman, 1, (unsigned long)ispage, bytes, granularity);
271       if (count[INP_BYTES] == 2) {
272 	arrayscore(arraydata[REP_SIZE - FIRST_ARRAYREP], bytes, 1,
273 		   (unsigned long)last7, (unsigned long)ispage,
274 		   (unsigned long)((logical)ispage && last7), bytes,
275 		   last7?bytes:0., timecode);
276 	logfilep->bytes += bytes;
277 	if (last7)
278 	  logfilep->bytes7 += bytes;
279       }
280       if (count[INP_PROCTIME] == 2)
281 	arrayscore(arraydata[REP_PROCTIME - FIRST_ARRAYREP], proctime, 1,
282 		   (unsigned long)last7, (unsigned long)ispage,
283 		   (unsigned long)((logical)ispage && last7), bytes,
284 		   last7?bytes:0., timecode);
285 
286       if (alltrees[0] != REP_NUMBER || alldervs[0] != REP_NUMBER) {
287 	/* for LOWMEM 3, run through alltrees then alldervs */
288 	/* NB these (POSSTREE/POSSDERV in init.c) only count successes */
289 	for (k = 0; k <= 1; k++) {
290 	  for (i = 0; (k?(alldervs[i]):(alltrees[i])) != REP_NUMBER; i++) {
291 	    j = rep2type[k?(alldervs[i]):(alltrees[i])];
292 	    if (wanttree[j]) {
293 	      dummy_item->name = mm[j].curr_pos;
294 	      /* mm.curr_pos is marked for deletion, but still intact at
295 		 present */
296 	      dummy_item->own->data[data2cols[j][REQUESTS]] = 1;
297 	      if (data2cols[j][REQUESTS7] >= 0)  /* see comment in genrep() */
298 		dummy_item->own->data[data2cols[j][REQUESTS7]] =
299 		  (unsigned long)last7;
300 	      if (data2cols[j][PAGES] >= 0)
301 		dummy_item->own->data[data2cols[j][PAGES]] =
302 		  (unsigned long)ispage;
303 	      if (data2cols[j][PAGES7] >= 0)
304 		dummy_item->own->data[data2cols[j][PAGES7]] =
305 		  (unsigned long)((logical)ispage && last7);
306 	      if (data2cols[j][SUCCDATE] >= 0)
307 		dummy_item->own->data[data2cols[j][SUCCDATE]] = timecode;
308 	      if (data2cols[j][SUCCFIRSTD] >= 0)
309 		dummy_item->own->data[data2cols[j][SUCCFIRSTD]] = timecode;
310 	      dummy_item->own->bytes = bytes;
311 	      dummy_item->own->bytes7 = last7?bytes:0.;
312 	      if (k)
313 		makederived(derv[alldervs[i] - FIRST_DERVREP], dummy_item,
314 			    NULL, convfloor, multibyte, alldervs[i],
315 			    datacols[j], no_cols[j]);
316 	      else {
317 		namestart = NULL;
318 		tree[G(alltrees[i])]->cutfn(&namestart, &nameend,
319 					    dummy_item->name, FALSE);
320 		(void)treefind(namestart, nameend,
321 			       &(tree[G(alltrees[i])]->tree), dummy_item,
322 			       tree[G(alltrees[i])]->cutfn, FALSE, TRUE, FALSE,
323 			       tree[G(alltrees[i])]->space, datacols[j],
324 			       no_cols[j]);
325 	      }
326 	    }
327 	  }
328 	}
329       }  /* there are trees or dervs */
330     }    /* outcome == SUCCESS */
331     data[REQUESTS2] = 1;
332     data[REQUESTS72] = (unsigned long)last7;
333     data[PAGES2] = (unsigned long)ispage;
334     data[PAGES72] = (unsigned long)((logical)ispage && last7);
335     data[DATE2] = timecode;
336     data[FIRSTD2] = timecode;
337     for (i = 0; i < ITEM_NUMBER; i++) {
338       if (count[i] == 2 && !ENTRY_BLANK(item[i]))
339 	hashscore(item[i], data, datacols[i][outcome], outcome, bytes);
340     }
341   }      /* end if outcome != INFO */
342 }
343 
corrupt_line(Logfile * logfilep,char * message,ptrdiff_t n)344 void corrupt_line(Logfile *logfilep, char *message, ptrdiff_t n) {
345   extern FILE *errfile;
346   extern char *debug_args, *record_start, *pos;
347 
348   pos = record_start;
349   parsenonnewline(logfilep);  /* skip to after next newline */
350   *pos = '\0';
351   if (pos - record_start > 500)  /* debug might only handle 509 chars */
352     *(record_start + 500) = '\0';
353   debug('C', "%s", record_start);
354   if (n < 0)
355     debug('C', "  (%s)", message);
356   else if (strchr(debug_args, 'C')) {
357     fprintf(errfile, "C: ");
358     for ( ; n >= 10; n -= 10)
359       fprintf(errfile, "          ");
360     for ( ; n > 0; n--)
361       fputc(' ', errfile);
362     fprintf(errfile, "*\n");
363   }
364   logfilep->data[LOGDATA_CORRUPT]++;
365   *pos = '\n';
366   (void)parsenewline(logfilep, NULL, '\0');
367 }
368 
arrayscore(Arraydata * array,double amount,unsigned long reqs,unsigned long reqs7,unsigned long pages,unsigned long pages7,double bytes,double bytes7,timecode_t timecode)369 void arrayscore(Arraydata *array, double amount, unsigned long reqs,
370 		unsigned long reqs7, unsigned long pages, unsigned long pages7,
371 		double bytes, double bytes7, timecode_t timecode) {
372   choice i;
373 
374   for (i = 0; ; i++) {  /* last threshold must be -1 to ensure termination */
375     if (amount <= array[i].threshold || array[i].threshold < -0.5) {
376       array[i].reqs += reqs;
377       array[i].reqs7 += reqs7;
378       array[i].pages += pages;
379       array[i].pages7 += pages7;
380       array[i].bytes += bytes;
381       array[i].bytes7 += bytes7;
382       array[i].lastdate = MAX(array[i].lastdate, timecode);
383       array[i].firstdate = MIN(array[i].firstdate, timecode);
384       return;
385     }
386   }
387 }
388