1 /*** analog 6.0 http://www.analog.cx/ ***/
2 /*** This program is copyright (c) Stephen R. E. Turner 1995 - 2004 except as
3 *** stated otherwise.
4 ***
5 *** This program is free software. You can redistribute it and/or modify it
6 *** under the terms of version 2 of the GNU General Public License, which you
7 *** should have received with it.
8 ***
9 *** This program is distributed in the hope that it will be useful, but
10 *** without any warranty, expressed or implied. ***/
11
12 /*** process.c; process some data ***/
13
14 #include "anlghea3.h"
15
process_data(Logfile * logfilep,Hashtable ** hash,Arraydata ** arraydata,choice * count,choice * code2type,choice datacols[ITEM_NUMBER][OUTCOME_NUMBER][DATACOLS_NUMBER][2],choice data2cols[ITEM_NUMBER][DATA_NUMBER],unsigned int * no_cols,Include ** wanthead,Include * ispagehead,Alias ** aliashead,Include * argshead,Include * refargshead,Dateman * dman,Tree ** tree,Derv ** derv,choice * alltrees,choice * alldervs,choice * lowmem,logical case_insensitive,logical usercase_insensitive,unsigned char convfloor,logical multibyte,char * dirsuffix,unsigned int dirsufflength,unsigned int granularity)16 void process_data(Logfile *logfilep, Hashtable **hash,
17 Arraydata **arraydata, choice *count, choice *code2type,
18 choice datacols[ITEM_NUMBER][OUTCOME_NUMBER][DATACOLS_NUMBER][2],
19 choice data2cols[ITEM_NUMBER][DATA_NUMBER],
20 unsigned int *no_cols, Include **wanthead,
21 Include *ispagehead, Alias **aliashead, Include *argshead,
22 Include *refargshead, Dateman *dman, Tree **tree,
23 Derv **derv, choice *alltrees, choice *alldervs,
24 choice *lowmem, logical case_insensitive,
25 logical usercase_insensitive, unsigned char convfloor,
26 logical multibyte, char *dirsuffix,
27 unsigned int dirsufflength, unsigned int granularity) {
28 extern unsigned int year, month, date, hour, minute, code;
29 extern unsigned long unixtime, proctime;
30 extern char am;
31 extern double bytes;
32 extern Memman mm[], mmq, mms, *amemman;
33 extern choice *rep2type;
34 extern Hashentry *unwanted_entry, *blank_entry;
35 extern Hashindex *dummy_item;
36
37 static Hashindex *gp[ITEM_NUMBER];
38 unsigned long data[DATA2_NUMBER];
39 Hashentry *item[ITEM_NUMBER];
40 logical wanttree[ITEM_NUMBER];
41 logical isitpage, last7;
42 choice ispage = UNSET;
43 choice wanted = TRUE, rc, outcome;
44 timecode_t timecode = FIRST_TIME;
45 char *name, *namestart, *nameend;
46 size_t len;
47 choice i, j, k;
48
49 /*** check whether this line is wanted ***/
50
51 if (count[INP_CODE] != 0) {
52 if (code == IGNORE_CODE) {
53 for (j = 0; j < ITEM_NUMBER; j++) { /* reset strings */
54 if (count[j] != 0)
55 mm[j].next_pos = mm[j].curr_pos;
56 }
57 mmq.next_pos = mmq.curr_pos;
58 mms.next_pos = mms.curr_pos;
59 logfilep->data[LOGDATA_UNKNOWN]++;
60 return;
61 }
62 else if (code2type[code] == UNWANTED)
63 wanted = FALSE;
64 }
65 if (wanted && count[INP_DATE] > 0) {
66 if (count[INP_UNIXTIME])
67 wanted = wantunixtime(&timecode, dman, unixtime, logfilep->tz);
68 else {
69 if (count[INP_AM]) {
70 if (hour > 12) {
71 corrupt_line(logfilep, "Hour greater than 12", -1);
72 return;
73 }
74 else if (hour == 12)
75 hour = 0;
76 if (am == 'p')
77 hour += 12;
78 }
79 wanted = wantdate(&timecode, dman, hour, minute, date, month, year,
80 logfilep->tz);
81 }
82 if (wanted == ERR) { /* corrupt date */
83 corrupt_line(logfilep, "Corrupt date or time", -1);
84 return;
85 }
86 } /* end count[INP_DATE] > 0 */
87 for (i = 0; i < ITEM_NUMBER; i++) {
88 wanttree[i] = FALSE;
89 if (!wanted) {
90 for (j = i; j < ITEM_NUMBER; j++) { /* reset not-yet-hashed strings */
91 if (count[j] != 0) /* NB i is now (unwanted i) + 1 */
92 mm[j].next_pos = mm[j].curr_pos;
93 }
94 mmq.next_pos = mmq.curr_pos;
95 mms.next_pos = mms.curr_pos;
96 logfilep->data[LOGDATA_UNWANTED]++;
97 return;
98 }
99 if (i == ITEM_HOST)
100 prealiasS(&(mm[ITEM_HOST]), &mms);
101 name = (char *)(mm[i].curr_pos);
102 if (count[i] == 0 || IS_EMPTY_STRING(name) ||
103 (name[0] == '-' && name[1] == '\0')) {
104 item[i] = blank_entry; /* or unwanted_; but we get wanted right anyway */
105 wanted = (wanthead[i] == NULL || included("", FALSE, wanthead[i]));
106 /* wanthead[i] == NULL is tested again in included() but it often saves
107 a call to that function, because blankness is common. */
108 }
109 else {
110 if (i == ITEM_FILE || i == ITEM_REFERRER) {
111 if ((j = prealias(&(mm[i]), &(mm[ITEM_VHOST]), item[ITEM_VHOST], &mmq,
112 (logical)((i == ITEM_FILE)?case_insensitive:FALSE),
113 (i == ITEM_FILE)?(logfilep->prefix):NULL,
114 logfilep->prefixlen, logfilep->pvpos,
115 (i == ITEM_FILE)?argshead:refargshead)) < 0) {
116 if (j == -1)
117 corrupt_line(logfilep,
118 "%v in file prefix but no VHOST in line", -1);
119 else
120 corrupt_line(logfilep, "Filename too long", -1);
121 return;
122 }
123 }
124 if (lowmem[i] == 0) {
125 if (gp[i] == NULL || !STREQ(name, gp[i]->name)) {
126 gp[i] = hashfind(&mm[i], &(hash[i]), no_cols[i], wanthead[i], UNSET,
127 ispagehead, aliashead[i], dirsuffix, dirsufflength,
128 usercase_insensitive, 0, FALSE, i, FALSE);
129 } /* if name the same as last time, don't need */
130 else /* to hashfind again, or save the name */
131 mm[i].next_pos = mm[i].curr_pos;
132 item[i] = (Hashentry *)(gp[i]->other);
133 wanted = (choice)(ENTRY_WANTED(item[i]));
134 }
135
136 else if (lowmem[i] == 1) {
137 if ((rc = do_alias(name, amemman, aliashead[i], dirsuffix,
138 dirsufflength, usercase_insensitive, 0, FALSE, i))
139 == FALSE) {
140 item[i] = hashfind(&mm[i], &(hash[i]), no_cols[i], wanthead[i],
141 UNSET, ispagehead, NULL, dirsuffix, dirsufflength,
142 usercase_insensitive, 0, FALSE, i, TRUE)->own;
143 }
144 else if (rc == TRUE) {
145 mm[i].next_pos = mm[i].curr_pos; /* don't save string */
146 item[i] = hashfind(amemman, &(hash[i]), no_cols[i], wanthead[i],
147 UNSET, ispagehead, NULL, dirsuffix, dirsufflength,
148 usercase_insensitive, 0, FALSE, i, TRUE)->own;
149 }
150 else { /* rc == ERR */
151 mm[i].next_pos = mm[i].curr_pos;
152 if (included("", FALSE, wanthead[i]))
153 item[i] = blank_entry;
154 else
155 item[i] = unwanted_entry;
156 }
157 wanted = (choice)(ENTRY_WANTED(item[i]));
158 }
159
160 else { /* lowmem[i] >= 2 */
161 if ((rc = do_alias(name, amemman, aliashead[i], dirsuffix,
162 dirsufflength, usercase_insensitive, 0, FALSE, i))
163 == TRUE) {
164 mm[i].next_pos = mm[i].curr_pos; /* don't save old string */
165 len = strlen((char *)(amemman->curr_pos));
166 memcpy(submalloc(&(mm[i]), len + 1), amemman->curr_pos, len + 1);
167 name = (char *)(mm[i].curr_pos); /* which might have changed */
168 amemman->next_pos = amemman->curr_pos;
169 }
170 if (rc == ERR) {
171 if (included("", FALSE, wanthead[i])) {
172 item[i] = blank_entry;
173 if (i == ITEM_FILE)
174 ispage = FALSE;
175 }
176 else
177 wanted = FALSE;
178 mm[i].next_pos = mm[i].curr_pos;
179 }
180 else {
181 isitpage = pageq(name, ispagehead, i);
182 if (i == ITEM_FILE)
183 ispage = (choice)isitpage;
184 if (included(name, isitpage, wanthead[i])) {
185 if (lowmem[i] == 2) {
186 item[i] = hashfind(&(mm[i]), &(hash[i]), no_cols[i], wanthead[i],
187 isitpage, ispagehead, NULL, dirsuffix,
188 dirsufflength, usercase_insensitive, 0, FALSE,
189 i, TRUE)->own;
190 }
191 else {
192 item[i] = blank_entry;
193 wanttree[i] = TRUE;
194 mm[i].next_pos = mm[i].curr_pos;
195 }
196 }
197 else {
198 wanted = FALSE;
199 mm[i].next_pos = mm[i].curr_pos;
200 }
201 }
202 } /* end lowmem[i] >= 2 */
203 }
204 } /* end for i */
205 if (!wanted) {
206 logfilep->data[LOGDATA_UNWANTED]++;
207 return;
208 }
209
210 /*** now add it to the hash tables ***/
211
212 /* add to logfile from and to if wanted, whatever status code */
213 if (timecode != FIRST_TIME)
214 logfilep->from = MIN(logfilep->from, timecode);
215 logfilep->to = MAX(logfilep->to, timecode);
216 last7 = (timecode > dman->last7from && timecode <= dman->last7to);
217 if (ispage == UNSET) /* NB blank_entry has ispage FALSE */
218 ispage = (choice)(item[ITEM_FILE]->ispage);
219 if (count[INP_BYTES] == 0)
220 bytes = 0;
221 if (count[INP_CODE] == 0) {
222 outcome = SUCCESS;
223 if (count[ITEM_FILE] == 2) {
224 logfilep->data[LOGDATA_SUCC]++;
225 logfilep->data[LOGDATA_SUCC7] += (unsigned long)last7;
226 logfilep->data[LOGDATA_PAGES] += (unsigned long)ispage;
227 logfilep->data[LOGDATA_PAGES7] +=
228 (unsigned long)((logical)ispage && last7);
229 }
230 else {
231 logfilep->data[LOGDATA_UNKNOWN]++;
232 logfilep->data[LOGDATA_UNKNOWN7] += (unsigned long)last7;
233 }
234 }
235 else if (code <= 199) {
236 outcome = INFO;
237 logfilep->data[LOGDATA_INFO]++;
238 logfilep->data[LOGDATA_INFO7] += (unsigned long)last7;
239 }
240 else switch (outcome = code2type[code]) {
241 case SUCCESS:
242 logfilep->data[LOGDATA_SUCC]++;
243 logfilep->data[LOGDATA_SUCC7] += (unsigned long)last7;
244 logfilep->data[LOGDATA_PAGES] += (unsigned long)ispage;
245 logfilep->data[LOGDATA_PAGES7] +=
246 (unsigned long)((logical)ispage && last7);
247 break;
248 case FAILURE:
249 logfilep->data[LOGDATA_FAIL]++;
250 logfilep->data[LOGDATA_FAIL7] += (unsigned long)last7;
251 break;
252 case REDIRECT:
253 logfilep->data[LOGDATA_REDIR]++;
254 logfilep->data[LOGDATA_REDIR7] += (unsigned long)last7;
255 break;
256 case INFO:
257 logfilep->data[LOGDATA_INFO]++;
258 logfilep->data[LOGDATA_INFO7] += (unsigned long)last7;
259 break;
260 }
261
262 /* NB any change in what to count when will require corresponding change to
263 end of strtoinfmt() and to fmt munching in correct() */
264 if (count[INP_CODE] == 2)
265 arrayscore(arraydata[REP_CODE - FIRST_ARRAYREP], code, 1,
266 (unsigned long)last7, 0, 0, 0., 0., timecode);
267 if (outcome != INFO) {
268 if (outcome == SUCCESS) {
269 if (count[INP_DATE] == 2) /* only if file present: see strtoinfmt() */
270 datehash(timecode, dman, 1, (unsigned long)ispage, bytes, granularity);
271 if (count[INP_BYTES] == 2) {
272 arrayscore(arraydata[REP_SIZE - FIRST_ARRAYREP], bytes, 1,
273 (unsigned long)last7, (unsigned long)ispage,
274 (unsigned long)((logical)ispage && last7), bytes,
275 last7?bytes:0., timecode);
276 logfilep->bytes += bytes;
277 if (last7)
278 logfilep->bytes7 += bytes;
279 }
280 if (count[INP_PROCTIME] == 2)
281 arrayscore(arraydata[REP_PROCTIME - FIRST_ARRAYREP], proctime, 1,
282 (unsigned long)last7, (unsigned long)ispage,
283 (unsigned long)((logical)ispage && last7), bytes,
284 last7?bytes:0., timecode);
285
286 if (alltrees[0] != REP_NUMBER || alldervs[0] != REP_NUMBER) {
287 /* for LOWMEM 3, run through alltrees then alldervs */
288 /* NB these (POSSTREE/POSSDERV in init.c) only count successes */
289 for (k = 0; k <= 1; k++) {
290 for (i = 0; (k?(alldervs[i]):(alltrees[i])) != REP_NUMBER; i++) {
291 j = rep2type[k?(alldervs[i]):(alltrees[i])];
292 if (wanttree[j]) {
293 dummy_item->name = mm[j].curr_pos;
294 /* mm.curr_pos is marked for deletion, but still intact at
295 present */
296 dummy_item->own->data[data2cols[j][REQUESTS]] = 1;
297 if (data2cols[j][REQUESTS7] >= 0) /* see comment in genrep() */
298 dummy_item->own->data[data2cols[j][REQUESTS7]] =
299 (unsigned long)last7;
300 if (data2cols[j][PAGES] >= 0)
301 dummy_item->own->data[data2cols[j][PAGES]] =
302 (unsigned long)ispage;
303 if (data2cols[j][PAGES7] >= 0)
304 dummy_item->own->data[data2cols[j][PAGES7]] =
305 (unsigned long)((logical)ispage && last7);
306 if (data2cols[j][SUCCDATE] >= 0)
307 dummy_item->own->data[data2cols[j][SUCCDATE]] = timecode;
308 if (data2cols[j][SUCCFIRSTD] >= 0)
309 dummy_item->own->data[data2cols[j][SUCCFIRSTD]] = timecode;
310 dummy_item->own->bytes = bytes;
311 dummy_item->own->bytes7 = last7?bytes:0.;
312 if (k)
313 makederived(derv[alldervs[i] - FIRST_DERVREP], dummy_item,
314 NULL, convfloor, multibyte, alldervs[i],
315 datacols[j], no_cols[j]);
316 else {
317 namestart = NULL;
318 tree[G(alltrees[i])]->cutfn(&namestart, &nameend,
319 dummy_item->name, FALSE);
320 (void)treefind(namestart, nameend,
321 &(tree[G(alltrees[i])]->tree), dummy_item,
322 tree[G(alltrees[i])]->cutfn, FALSE, TRUE, FALSE,
323 tree[G(alltrees[i])]->space, datacols[j],
324 no_cols[j]);
325 }
326 }
327 }
328 }
329 } /* there are trees or dervs */
330 } /* outcome == SUCCESS */
331 data[REQUESTS2] = 1;
332 data[REQUESTS72] = (unsigned long)last7;
333 data[PAGES2] = (unsigned long)ispage;
334 data[PAGES72] = (unsigned long)((logical)ispage && last7);
335 data[DATE2] = timecode;
336 data[FIRSTD2] = timecode;
337 for (i = 0; i < ITEM_NUMBER; i++) {
338 if (count[i] == 2 && !ENTRY_BLANK(item[i]))
339 hashscore(item[i], data, datacols[i][outcome], outcome, bytes);
340 }
341 } /* end if outcome != INFO */
342 }
343
corrupt_line(Logfile * logfilep,char * message,ptrdiff_t n)344 void corrupt_line(Logfile *logfilep, char *message, ptrdiff_t n) {
345 extern FILE *errfile;
346 extern char *debug_args, *record_start, *pos;
347
348 pos = record_start;
349 parsenonnewline(logfilep); /* skip to after next newline */
350 *pos = '\0';
351 if (pos - record_start > 500) /* debug might only handle 509 chars */
352 *(record_start + 500) = '\0';
353 debug('C', "%s", record_start);
354 if (n < 0)
355 debug('C', " (%s)", message);
356 else if (strchr(debug_args, 'C')) {
357 fprintf(errfile, "C: ");
358 for ( ; n >= 10; n -= 10)
359 fprintf(errfile, " ");
360 for ( ; n > 0; n--)
361 fputc(' ', errfile);
362 fprintf(errfile, "*\n");
363 }
364 logfilep->data[LOGDATA_CORRUPT]++;
365 *pos = '\n';
366 (void)parsenewline(logfilep, NULL, '\0');
367 }
368
arrayscore(Arraydata * array,double amount,unsigned long reqs,unsigned long reqs7,unsigned long pages,unsigned long pages7,double bytes,double bytes7,timecode_t timecode)369 void arrayscore(Arraydata *array, double amount, unsigned long reqs,
370 unsigned long reqs7, unsigned long pages, unsigned long pages7,
371 double bytes, double bytes7, timecode_t timecode) {
372 choice i;
373
374 for (i = 0; ; i++) { /* last threshold must be -1 to ensure termination */
375 if (amount <= array[i].threshold || array[i].threshold < -0.5) {
376 array[i].reqs += reqs;
377 array[i].reqs7 += reqs7;
378 array[i].pages += pages;
379 array[i].pages7 += pages7;
380 array[i].bytes += bytes;
381 array[i].bytes7 += bytes7;
382 array[i].lastdate = MAX(array[i].lastdate, timecode);
383 array[i].firstdate = MIN(array[i].firstdate, timecode);
384 return;
385 }
386 }
387 }
388