1 /*
2  * haproxy log statistics reporter
3  *
4  * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  *
11  */
12 
13 #include <errno.h>
14 #include <fcntl.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <syslog.h>
18 #include <string.h>
19 #include <unistd.h>
20 #include <ctype.h>
21 #include <time.h>
22 
23 #include <eb32tree.h>
24 #include <eb64tree.h>
25 #include <ebistree.h>
26 #include <ebsttree.h>
27 
28 #define SOURCE_FIELD 5
29 #define ACCEPT_FIELD 6
30 #define SERVER_FIELD 8
31 #define TIME_FIELD 9
32 #define STATUS_FIELD 10
33 #define BYTES_SENT_FIELD 11
34 #define TERM_CODES_FIELD 14
35 #define CONN_FIELD 15
36 #define QUEUE_LEN_FIELD 16
37 #define METH_FIELD 17
38 #define URL_FIELD 18
39 #define MAXLINE 16384
40 #define QBITS 4
41 
42 #define SEP(c) ((unsigned char)(c) <= ' ')
43 #define SKIP_CHAR(p,c) do { while (1) { int __c = (unsigned char)*p++; if (__c == c) break; if (__c <= ' ') { p--; break; } } } while (0)
44 
45 /* [0] = err/date, [1] = req, [2] = conn, [3] = resp, [4] = data */
46 static struct eb_root timers[5] = {
47 	EB_ROOT_UNIQUE, EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
48 	EB_ROOT_UNIQUE, EB_ROOT_UNIQUE,
49 };
50 
51 struct timer {
52 	struct eb32_node node;
53 	unsigned int count;
54 };
55 
56 struct srv_st {
57 	unsigned int st_cnt[6]; /* 0xx to 5xx */
58 	unsigned int nb_ct, nb_rt, nb_ok;
59 	unsigned long long cum_ct, cum_rt;
60 	struct ebmb_node node;
61 	/* don't put anything else here, the server name will be there */
62 };
63 
64 struct url_stat {
65 	union {
66 		struct ebpt_node url;
67 		struct eb64_node val;
68 	} node;
69 	char *url;
70 	unsigned long long total_time;    /* sum(all reqs' times) */
71 	unsigned long long total_time_ok; /* sum(all OK reqs' times) */
72 	unsigned long long total_bytes_sent; /* sum(all bytes sent) */
73 	unsigned int nb_err, nb_req;
74 };
75 
76 #define FILT_COUNT_ONLY		0x01
77 #define FILT_INVERT		0x02
78 #define FILT_QUIET		0x04
79 #define FILT_ERRORS_ONLY	0x08
80 #define FILT_ACC_DELAY		0x10
81 #define FILT_ACC_COUNT		0x20
82 #define FILT_GRAPH_TIMERS	0x40
83 #define FILT_PERCENTILE		0x80
84 #define FILT_TIME_RESP         0x100
85 
86 #define FILT_INVERT_ERRORS     0x200
87 #define FILT_INVERT_TIME_RESP  0x400
88 
89 #define FILT_COUNT_STATUS      0x800
90 #define FILT_COUNT_SRV_STATUS 0x1000
91 #define FILT_COUNT_TERM_CODES 0x2000
92 
93 #define FILT_COUNT_URL_ONLY  0x004000
94 #define FILT_COUNT_URL_COUNT 0x008000
95 #define FILT_COUNT_URL_ERR   0x010000
96 #define FILT_COUNT_URL_TTOT  0x020000
97 #define FILT_COUNT_URL_TAVG  0x040000
98 #define FILT_COUNT_URL_TTOTO 0x080000
99 #define FILT_COUNT_URL_TAVGO 0x100000
100 
101 #define FILT_HTTP_ONLY       0x200000
102 #define FILT_TERM_CODE_NAME  0x400000
103 #define FILT_INVERT_TERM_CODE_NAME 0x800000
104 
105 #define FILT_HTTP_STATUS           0x1000000
106 #define FILT_INVERT_HTTP_STATUS    0x2000000
107 #define FILT_QUEUE_ONLY            0x4000000
108 #define FILT_QUEUE_SRV_ONLY        0x8000000
109 
110 #define FILT_COUNT_URL_BAVG       0x10000000
111 #define FILT_COUNT_URL_BTOT       0x20000000
112 
113 #define FILT_COUNT_URL_ANY   (FILT_COUNT_URL_ONLY|FILT_COUNT_URL_COUNT|FILT_COUNT_URL_ERR| \
114 			      FILT_COUNT_URL_TTOT|FILT_COUNT_URL_TAVG|FILT_COUNT_URL_TTOTO|FILT_COUNT_URL_TAVGO| \
115 			      FILT_COUNT_URL_BAVG|FILT_COUNT_URL_BTOT)
116 
117 #define FILT_COUNT_COOK_CODES 0x40000000
118 #define FILT_COUNT_IP_COUNT   0x80000000
119 
120 #define FILT2_TIMESTAMP	0x01
121 
122 unsigned int filter = 0;
123 unsigned int filter2 = 0;
124 unsigned int filter_invert = 0;
125 const char *line;
126 int linenum = 0;
127 int parse_err = 0;
128 int lines_out = 0;
129 int lines_max = -1;
130 
131 const char *fgets2(FILE *stream);
132 
133 void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr);
134 void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr);
135 void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr);
136 void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr);
137 void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr);
138 void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr);
139 void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr);
140 void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr);
141 void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr);
142 
usage(FILE * output,const char * msg)143 void usage(FILE *output, const char *msg)
144 {
145 	fprintf(output,
146 		"%s"
147 		"Usage: halog [-h|--help] for long help\n"
148 		"       halog [-q] [-c] [-m <lines>]\n"
149 		"       {-cc|-gt|-pct|-st|-tc|-srv|-u|-uc|-ue|-ua|-ut|-uao|-uto|-uba|-ubt|-ic}\n"
150 		"       [-s <skip>] [-e|-E] [-H] [-rt|-RT <time>] [-ad <delay>] [-ac <count>]\n"
151 		"       [-v] [-Q|-QS] [-tcn|-TCN <termcode>] [ -hs|-HS [min][:[max]] ] [ -time [min][:[max]] ] < log\n"
152 		"\n",
153 		msg ? msg : ""
154 		);
155 }
156 
die(const char * msg)157 void die(const char *msg)
158 {
159 	usage(stderr, msg);
160 	exit(1);
161 }
162 
help()163 void help()
164 {
165 	usage(stdout, NULL);
166 	printf(
167 	       "Input filters (several filters may be combined) :\n"
168 	       " -H                      only match lines containing HTTP logs (ignore TCP)\n"
169 	       " -E                      only match lines without any error (no 5xx status)\n"
170 	       " -e                      only match lines with errors (status 5xx or negative)\n"
171 	       " -rt|-RT <time>          only match response times larger|smaller than <time>\n"
172 	       " -Q|-QS                  only match queued requests (any queue|server queue)\n"
173 	       " -tcn|-TCN <code>        only match requests with/without termination code <code>\n"
174 	       " -hs|-HS <[min][:][max]> only match requests with HTTP status codes within/not\n"
175 	       "                         within min..max. Any of them may be omitted. Exact\n"
176 	       "                         code is checked for if no ':' is specified.\n"
177 	       " -time <[min][:max]>     only match requests recorded between timestamps.\n"
178 	       "                         Any of them may be omitted.\n"
179 	       "Modifiers\n"
180 	       " -v                      invert the input filtering condition\n"
181 	       " -q                      don't report errors/warnings\n"
182 	       " -m <lines>              limit output to the first <lines> lines\n"
183                " -s <skip_n_fields>      skip n fields from the beginning of a line (default %d)\n"
184                "                         you can also use -n to start from earlier then field %d\n"
185                "\n"
186 	       "Output filters - only one may be used at a time\n"
187 	       " -c    only report the number of lines that would have been printed\n"
188 	       " -pct  output connect and response times percentiles\n"
189 	       " -st   output number of requests per HTTP status code\n"
190 	       " -cc   output number of requests per cookie code (2 chars)\n"
191 	       " -tc   output number of requests per termination code (2 chars)\n"
192 	       " -srv  output statistics per server (time, requests, errors)\n"
193 	       " -u*   output statistics per URL (time, requests, errors)\n"
194 	       "       Additional characters indicate the output sorting key :\n"
195 	       "       -u : by URL, -uc : request count, -ue : error count\n"
196 	       "       -ua : average response time, -ut : average total time\n"
197 	       "       -uao, -uto: average times computed on valid ('OK') requests\n"
198 	       "       -uba, -ubt: average bytes returned, total bytes returned\n",
199                SOURCE_FIELD,SOURCE_FIELD
200 	       );
201 	exit(0);
202 }
203 
204 
205 /* return pointer to first char not part of current field starting at <p>. */
206 
207 #if defined(__i386__)
208 /* this one is always faster on 32-bits */
field_stop(const char * p)209 static inline const char *field_stop(const char *p)
210 {
211 	asm(
212 	    /* Look for spaces */
213 	    "4:                  \n\t"
214 	    "inc   %0            \n\t"
215 	    "cmpb  $0x20, -1(%0) \n\t"
216 	    "ja    4b            \n\t"
217 	    "jz    3f            \n\t"
218 
219 	    /* we only get there for control chars 0..31. Leave if we find '\0' */
220 	    "cmpb  $0x0, -1(%0)  \n\t"
221 	    "jnz   4b            \n\t"
222 
223 	    /* return %0-1 = position of the last char we checked */
224 	    "3:                  \n\t"
225 	    "dec   %0            \n\t"
226 	    : "=r" (p)
227 	    : "0" (p)
228 	    );
229 	return p;
230 }
231 #else
field_stop(const char * p)232 const char *field_stop(const char *p)
233 {
234 	unsigned char c;
235 
236 	while (1) {
237 		c = *(p++);
238 		if (c > ' ')
239 			continue;
240 		if (c == ' ' || c == 0)
241 			break;
242 	}
243 	return p - 1;
244 }
245 #endif
246 
247 /* return field <field> (starting from 1) in string <p>. Only consider
248  * contiguous spaces (or tabs) as one delimiter. May return pointer to
249  * last char if field is not found. Equivalent to awk '{print $field}'.
250  */
field_start(const char * p,int field)251 const char *field_start(const char *p, int field)
252 {
253 #ifndef PREFER_ASM
254 	unsigned char c;
255 	while (1) {
256 		/* skip spaces */
257 		while (1) {
258 			c = *(p++);
259 			if (c > ' ')
260 				break;
261 			if (c == ' ')
262 				continue;
263 			if (!c) /* end of line */
264 				return p-1;
265 			/* other char => new field */
266 			break;
267 		}
268 
269 		/* start of field */
270 		field--;
271 		if (!field)
272 			return p-1;
273 
274 		/* skip this field */
275 		while (1) {
276 			c = *(p++);
277 			if (c == ' ')
278 				break;
279 			if (c > ' ')
280 				continue;
281 			if (c == '\0')
282 				return p - 1;
283 		}
284 	}
285 #else
286 	/* This version works optimally on i386 and x86_64 but the code above
287 	 * shows similar performance. However, depending on the version of GCC
288 	 * used, inlining rules change and it may have difficulties to make
289 	 * efficient use of this code at other locations and could result in
290 	 * worse performance (eg: gcc 4.4). You may want to experience.
291 	 */
292 	asm(
293 	    /* skip spaces */
294 	    "1:                  \n\t"
295 	    "inc   %0            \n\t"
296 	    "cmpb  $0x20, -1(%0) \n\t"
297 	    "ja    2f            \n\t"
298 	    "jz    1b            \n\t"
299 
300 	    /* we only get there for control chars 0..31. Leave if we find '\0' */
301 	    "cmpb  $0x0, -1(%0)  \n\t"
302 	    "jz    3f            \n\t"
303 
304 	    /* start of field at [%0-1]. Check if we need to skip more fields */
305 	    "2:                  \n\t"
306 	    "dec   %1            \n\t"
307 	    "jz    3f            \n\t"
308 
309 	    /* Look for spaces */
310 	    "4:                  \n\t"
311 	    "inc   %0            \n\t"
312 	    "cmpb  $0x20, -1(%0) \n\t"
313 	    "jz    1b            \n\t"
314 	    "ja    4b            \n\t"
315 
316 	    /* we only get there for control chars 0..31. Leave if we find '\0' */
317 	    "cmpb  $0x0, -1(%0)  \n\t"
318 	    "jnz   4b            \n\t"
319 
320 	    /* return %0-1 = position of the last char we checked */
321 	    "3:                  \n\t"
322 	    "dec   %0            \n\t"
323 	    : "=r" (p)
324 	    : "r" (field), "0" (p)
325 	    );
326 	return p;
327 #endif
328 }
329 
330 /* keep only the <bits> higher bits of <i> */
quantify_u32(unsigned int i,int bits)331 static inline unsigned int quantify_u32(unsigned int i, int bits)
332 {
333 	int high;
334 
335 	if (!bits)
336 		return 0;
337 
338 	if (i)
339 		high = fls_auto(i);    // 1 to 32
340 	else
341 		high = 0;
342 
343 	if (high <= bits)
344 		return i;
345 
346 	return i & ~((1 << (high - bits)) - 1);
347 }
348 
349 /* keep only the <bits> higher bits of the absolute value of <i>, as well as
350  * its sign. */
quantify(int i,int bits)351 static inline int quantify(int i, int bits)
352 {
353 	if (i >= 0)
354 		return quantify_u32(i, bits);
355 	else
356 		return -quantify_u32(-i, bits);
357 }
358 
359 /* Insert timer value <v> into tree <r>. A pre-allocated node must be passed
360  * in <alloc>. It may be NULL, in which case the function will allocate it
361  * itself. It will be reset to NULL once consumed. The caller is responsible
362  * for freeing the node once not used anymore. The node where the value was
363  * inserted is returned.
364  */
insert_timer(struct eb_root * r,struct timer ** alloc,int v)365 struct timer *insert_timer(struct eb_root *r, struct timer **alloc, int v)
366 {
367 	struct timer *t = *alloc;
368 	struct eb32_node *n;
369 
370 	if (!t) {
371 		t = calloc(sizeof(*t), 1);
372 		if (unlikely(!t)) {
373 			fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
374 			exit(1);
375 		}
376 	}
377 	t->node.key = quantify(v, QBITS);         // keep only the higher QBITS bits
378 
379 	n = eb32i_insert(r, &t->node);
380 	if (n == &t->node)
381 		t = NULL;  /* node inserted, will malloc next time */
382 
383 	*alloc = t;
384 	return container_of(n, struct timer, node);
385 }
386 
387 /* Insert value value <v> into tree <r>. A pre-allocated node must be passed
388  * in <alloc>. It may be NULL, in which case the function will allocate it
389  * itself. It will be reset to NULL once consumed. The caller is responsible
390  * for freeing the node once not used anymore. The node where the value was
391  * inserted is returned.
392  */
insert_value(struct eb_root * r,struct timer ** alloc,int v)393 struct timer *insert_value(struct eb_root *r, struct timer **alloc, int v)
394 {
395 	struct timer *t = *alloc;
396 	struct eb32_node *n;
397 
398 	if (!t) {
399 		t = calloc(sizeof(*t), 1);
400 		if (unlikely(!t)) {
401 			fprintf(stderr, "%s: not enough memory\n", __FUNCTION__);
402 			exit(1);
403 		}
404 	}
405 	t->node.key = v;
406 
407 	n = eb32i_insert(r, &t->node);
408 	if (n == &t->node)
409 		t = NULL;  /* node inserted, will malloc next time */
410 
411 	*alloc = t;
412 	return container_of(n, struct timer, node);
413 }
414 
str2ic(const char * s)415 int str2ic(const char *s)
416 {
417 	int i = 0;
418 	int j, k;
419 
420 	if (*s != '-') {
421 		/* positive number */
422 		while (1) {
423 			j = (*s++) - '0';
424 			k = i * 10;
425 			if ((unsigned)j > 9)
426 				break;
427 			i = k + j;
428 		}
429 	} else {
430 		/* negative number */
431 		s++;
432 		while (1) {
433 			j = (*s++) - '0';
434 			k = i * 10;
435 			if ((unsigned)j > 9)
436 				break;
437 			i = k - j;
438 		}
439 	}
440 
441 	return i;
442 }
443 
444 
445 /* Equivalent to strtoul with a length. */
__strl2ui(const char * s,int len)446 static inline unsigned int __strl2ui(const char *s, int len)
447 {
448 	unsigned int i = 0;
449 	while (len-- > 0) {
450 		i = i * 10 - '0';
451 		i += (unsigned char)*s++;
452 	}
453 	return i;
454 }
455 
strl2ui(const char * s,int len)456 unsigned int strl2ui(const char *s, int len)
457 {
458 	return __strl2ui(s, len);
459 }
460 
461 /* Convert "[04/Dec/2008:09:49:40.555]" to an integer equivalent to the time of
462  * the day in milliseconds. It returns -1 for all unparsable values. The parser
463  * looks ugly but gcc emits far better code that way.
464  */
convert_date(const char * field)465 int convert_date(const char *field)
466 {
467 	unsigned int h, m, s, ms;
468 	unsigned char c;
469 	const char *e;
470 
471 	h = m = s = ms = 0;
472 	e = field;
473 
474 	/* skip the date */
475 	while (1) {
476 		c = *(e++);
477 		if (c == ':')
478 			break;
479 		if (!c)
480 			goto out_err;
481 	}
482 
483 	/* hour + ':' */
484 	while (1) {
485 		c = *(e++) - '0';
486 		if (c > 9)
487 			break;
488 		h = h * 10 + c;
489 	}
490 	if (c == (unsigned char)(0 - '0'))
491 		goto out_err;
492 
493 	/* minute + ':' */
494 	while (1) {
495 		c = *(e++) - '0';
496 		if (c > 9)
497 			break;
498 		m = m * 10 + c;
499 	}
500 	if (c == (unsigned char)(0 - '0'))
501 		goto out_err;
502 
503 	/* second + '.' or ']' */
504 	while (1) {
505 		c = *(e++) - '0';
506 		if (c > 9)
507 			break;
508 		s = s * 10 + c;
509 	}
510 	if (c == (unsigned char)(0 - '0'))
511 		goto out_err;
512 
513 	/* if there's a '.', we have milliseconds */
514 	if (c == (unsigned char)('.' - '0')) {
515 		/* millisecond second + ']' */
516 		while (1) {
517 			c = *(e++) - '0';
518 			if (c > 9)
519 				break;
520 			ms = ms * 10 + c;
521 		}
522 		if (c == (unsigned char)(0 - '0'))
523 			goto out_err;
524 	}
525 	return (((h * 60) + m) * 60 + s) * 1000 + ms;
526  out_err:
527 	return -1;
528 }
529 
530 /* Convert "[04/Dec/2008:09:49:40.555]" to an unix timestamp.
531  * It returns -1 for all unparsable values. The parser
532  * looks ugly but gcc emits far better code that way.
533  */
convert_date_to_timestamp(const char * field)534 int convert_date_to_timestamp(const char *field)
535 {
536 	unsigned int d, mo, y, h, m, s;
537 	unsigned char c;
538 	const char *e;
539 	time_t rawtime;
540 	static struct tm * timeinfo;
541 	static int last_res;
542 
543 	d = mo = y = h = m = s = 0;
544 	e = field;
545 
546 	c = *(e++); // remove '['
547 	/* day + '/' */
548 	while (1) {
549 		c = *(e++) - '0';
550 		if (c > 9)
551 			break;
552 		d = d * 10 + c;
553 		if (c == (unsigned char)(0 - '0'))
554 			goto out_err;
555 	}
556 
557 	/* month + '/' */
558 	c = *(e++);
559 	if (c =='F') {
560 		mo = 2;
561 		e = e+3;
562 	} else if (c =='S') {
563 		mo = 9;
564 		e = e+3;
565 	} else if (c =='O') {
566 		mo = 10;
567 		e = e+3;
568 	} else if (c =='N') {
569 		mo = 11;
570 		e = e+3;
571 	} else if (c == 'D') {
572 		mo = 12;
573 		e = e+3;
574 	} else if (c == 'A') {
575 		c = *(e++);
576 		if (c == 'p') {
577 			mo = 4;
578 			e = e+2;
579 		} else if (c == 'u') {
580 			mo = 8;
581 			e = e+2;
582 		} else
583 			goto out_err;
584 	} else if (c == 'J') {
585 		c = *(e++);
586 		if (c == 'a') {
587 			mo = 1;
588 			e = e+2;
589 		} else if (c == 'u') {
590 			c = *(e++);
591 			if (c == 'n') {
592 				mo = 6;
593 				e = e+1;
594 			} else if (c == 'l') {
595 				mo = 7;
596 				e++;
597 			}
598 		} else
599 			goto out_err;
600 	} else if (c == 'M') {
601 		e++;
602 		c = *(e++);
603 		if (c == 'r') {
604 			mo = 3;
605 			e = e+1;
606 		} else if (c == 'y') {
607 			mo = 5;
608 			e = e+1;
609 		} else
610 			goto out_err;
611 	} else
612 		goto out_err;
613 
614 	/* year + ':' */
615 	while (1) {
616 		c = *(e++) - '0';
617 		if (c > 9)
618 			break;
619 		y = y * 10 + c;
620 		if (c == (unsigned char)(0 - '0'))
621 			goto out_err;
622 	}
623 
624 	/* hour + ':' */
625 	while (1) {
626 		c = *(e++) - '0';
627 		if (c > 9)
628 			break;
629 		h = h * 10 + c;
630 	}
631 	if (c == (unsigned char)(0 - '0'))
632 		goto out_err;
633 
634 	/* minute + ':' */
635 	while (1) {
636 		c = *(e++) - '0';
637 		if (c > 9)
638 			break;
639 		m = m * 10 + c;
640 	}
641 	if (c == (unsigned char)(0 - '0'))
642 		goto out_err;
643 
644 	/* second + '.' or ']' */
645 	while (1) {
646 		c = *(e++) - '0';
647 		if (c > 9)
648 			break;
649 		s = s * 10 + c;
650 	}
651 
652 	if (likely(timeinfo)) {
653 		if (timeinfo->tm_min == m &&
654 		    timeinfo->tm_hour == h &&
655 		    timeinfo->tm_mday == d &&
656 		    timeinfo->tm_mon == mo - 1 &&
657 		    timeinfo->tm_year == y - 1900)
658 			return last_res + s;
659 	}
660 	else {
661 		time(&rawtime);
662 		timeinfo = localtime(&rawtime);
663 	}
664 
665 	timeinfo->tm_sec = 0;
666 	timeinfo->tm_min = m;
667 	timeinfo->tm_hour = h;
668 	timeinfo->tm_mday = d;
669 	timeinfo->tm_mon = mo - 1;
670 	timeinfo->tm_year = y - 1900;
671 	last_res = mktime(timeinfo);
672 
673 	return last_res + s;
674  out_err:
675 	return -1;
676 }
677 
truncated_line(int linenum,const char * line)678 void truncated_line(int linenum, const char *line)
679 {
680 	if (!(filter & FILT_QUIET))
681 		fprintf(stderr, "Truncated line %d: %s\n", linenum, line);
682 }
683 
main(int argc,char ** argv)684 int main(int argc, char **argv)
685 {
686 	const char *b, *p, *time_field, *accept_field, *source_field;
687 	const char *filter_term_code_name = NULL;
688 	const char *output_file = NULL;
689 	int f, last;
690 	struct timer *t = NULL;
691 	struct eb32_node *n;
692 	struct url_stat *ustat = NULL;
693 	int val, test;
694 	unsigned int uval;
695 	int filter_acc_delay = 0, filter_acc_count = 0;
696 	int filter_time_resp = 0;
697 	int filt_http_status_low = 0, filt_http_status_high = 0;
698 	int filt2_timestamp_low = 0, filt2_timestamp_high = 0;
699 	int skip_fields = 1;
700 
701 	void (*line_filter)(const char *accept_field, const char *time_field, struct timer **tptr) = NULL;
702 
703 	argc--; argv++;
704 	while (argc > 0) {
705 		if (*argv[0] != '-')
706 			break;
707 
708 		if (strcmp(argv[0], "-ad") == 0) {
709 			if (argc < 2) die("missing option for -ad");
710 			argc--; argv++;
711 			filter |= FILT_ACC_DELAY;
712 			filter_acc_delay = atol(*argv);
713 		}
714 		else if (strcmp(argv[0], "-ac") == 0) {
715 			if (argc < 2) die("missing option for -ac");
716 			argc--; argv++;
717 			filter |= FILT_ACC_COUNT;
718 			filter_acc_count = atol(*argv);
719 		}
720 		else if (strcmp(argv[0], "-rt") == 0) {
721 			if (argc < 2) die("missing option for -rt");
722 			argc--; argv++;
723 			filter |= FILT_TIME_RESP;
724 			filter_time_resp = atol(*argv);
725 		}
726 		else if (strcmp(argv[0], "-RT") == 0) {
727 			if (argc < 2) die("missing option for -RT");
728 			argc--; argv++;
729 			filter |= FILT_TIME_RESP | FILT_INVERT_TIME_RESP;
730 			filter_time_resp = atol(*argv);
731 		}
732 		else if (strcmp(argv[0], "-s") == 0) {
733 			if (argc < 2) die("missing option for -s");
734 			argc--; argv++;
735 			skip_fields = atol(*argv);
736 		}
737 		else if (strcmp(argv[0], "-m") == 0) {
738 			if (argc < 2) die("missing option for -m");
739 			argc--; argv++;
740 			lines_max = atol(*argv);
741 		}
742 		else if (strcmp(argv[0], "-e") == 0)
743 			filter |= FILT_ERRORS_ONLY;
744 		else if (strcmp(argv[0], "-E") == 0)
745 			filter |= FILT_ERRORS_ONLY | FILT_INVERT_ERRORS;
746 		else if (strcmp(argv[0], "-H") == 0)
747 			filter |= FILT_HTTP_ONLY;
748 		else if (strcmp(argv[0], "-Q") == 0)
749 			filter |= FILT_QUEUE_ONLY;
750 		else if (strcmp(argv[0], "-QS") == 0)
751 			filter |= FILT_QUEUE_SRV_ONLY;
752 		else if (strcmp(argv[0], "-c") == 0)
753 			filter |= FILT_COUNT_ONLY;
754 		else if (strcmp(argv[0], "-q") == 0)
755 			filter |= FILT_QUIET;
756 		else if (strcmp(argv[0], "-v") == 0)
757 			filter_invert = !filter_invert;
758 		else if (strcmp(argv[0], "-gt") == 0)
759 			filter |= FILT_GRAPH_TIMERS;
760 		else if (strcmp(argv[0], "-pct") == 0)
761 			filter |= FILT_PERCENTILE;
762 		else if (strcmp(argv[0], "-st") == 0)
763 			filter |= FILT_COUNT_STATUS;
764 		else if (strcmp(argv[0], "-srv") == 0)
765 			filter |= FILT_COUNT_SRV_STATUS;
766 		else if (strcmp(argv[0], "-cc") == 0)
767 			filter |= FILT_COUNT_COOK_CODES;
768 		else if (strcmp(argv[0], "-tc") == 0)
769 			filter |= FILT_COUNT_TERM_CODES;
770 		else if (strcmp(argv[0], "-tcn") == 0) {
771 			if (argc < 2) die("missing option for -tcn");
772 			argc--; argv++;
773 			filter |= FILT_TERM_CODE_NAME;
774 			filter_term_code_name = *argv;
775 		}
776 		else if (strcmp(argv[0], "-TCN") == 0) {
777 			if (argc < 2) die("missing option for -TCN");
778 			argc--; argv++;
779 			filter |= FILT_TERM_CODE_NAME | FILT_INVERT_TERM_CODE_NAME;
780 			filter_term_code_name = *argv;
781 		}
782 		else if (strcmp(argv[0], "-hs") == 0 || strcmp(argv[0], "-HS") == 0) {
783 			char *sep, *str;
784 
785 			if (argc < 2) die("missing option for -hs/-HS ([min]:[max])");
786 			filter |= FILT_HTTP_STATUS;
787 			if (argv[0][1] == 'H')
788 				filter |= FILT_INVERT_HTTP_STATUS;
789 
790 			argc--; argv++;
791 			str = *argv;
792 			sep = strchr(str, ':');  /* [min]:[max] */
793 			if (!sep)
794 				sep = str; /* make max point to min */
795 			else
796 				*sep++ = 0;
797 			filt_http_status_low = *str ? atol(str) : 0;
798 			filt_http_status_high = *sep ? atol(sep) : 65535;
799 		}
800 		else if (strcmp(argv[0], "-time") == 0) {
801 			char *sep, *str;
802 
803 			if (argc < 2) die("missing option for -time ([min]:[max])");
804 			filter2 |= FILT2_TIMESTAMP;
805 
806 			argc--; argv++;
807 			str = *argv;
808 			sep = strchr(str, ':');  /* [min]:[max] */
809 			filt2_timestamp_low = *str ? atol(str) : 0;
810 			if (!sep)
811 				filt2_timestamp_high = 0xFFFFFFFF;
812 			else
813 				filt2_timestamp_high = atol(++sep);
814 		}
815 		else if (strcmp(argv[0], "-u") == 0)
816 			filter |= FILT_COUNT_URL_ONLY;
817 		else if (strcmp(argv[0], "-uc") == 0)
818 			filter |= FILT_COUNT_URL_COUNT;
819 		else if (strcmp(argv[0], "-ue") == 0)
820 			filter |= FILT_COUNT_URL_ERR;
821 		else if (strcmp(argv[0], "-ua") == 0)
822 			filter |= FILT_COUNT_URL_TAVG;
823 		else if (strcmp(argv[0], "-ut") == 0)
824 			filter |= FILT_COUNT_URL_TTOT;
825 		else if (strcmp(argv[0], "-uao") == 0)
826 			filter |= FILT_COUNT_URL_TAVGO;
827 		else if (strcmp(argv[0], "-uto") == 0)
828 			filter |= FILT_COUNT_URL_TTOTO;
829 		else if (strcmp(argv[0], "-uba") == 0)
830 			filter |= FILT_COUNT_URL_BAVG;
831 		else if (strcmp(argv[0], "-ubt") == 0)
832 			filter |= FILT_COUNT_URL_BTOT;
833 		else if (strcmp(argv[0], "-ic") == 0)
834 			filter |= FILT_COUNT_IP_COUNT;
835 		else if (strcmp(argv[0], "-o") == 0) {
836 			if (output_file)
837 				die("Fatal: output file name already specified.\n");
838 			if (argc < 2)
839 				die("Fatal: missing output file name.\n");
840 			output_file = argv[1];
841 		}
842 		else if (strcmp(argv[0], "-h") == 0 || strcmp(argv[0], "--help") == 0)
843 			help();
844 		argc--;
845 		argv++;
846 	}
847 
848 	if (!filter)
849 		die("No action specified.\n");
850 
851 	if (filter & FILT_ACC_COUNT && !filter_acc_count)
852 		filter_acc_count=1;
853 
854 	if (filter & FILT_ACC_DELAY && !filter_acc_delay)
855 		filter_acc_delay = 1;
856 
857 
858 	/* by default, all lines are printed */
859 	line_filter = filter_output_line;
860 	if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY))
861 		line_filter = filter_accept_holes;
862 	else if (filter & (FILT_GRAPH_TIMERS|FILT_PERCENTILE))
863 		line_filter = filter_graphs;
864 	else if (filter & FILT_COUNT_STATUS)
865 		line_filter = filter_count_status;
866 	else if (filter & FILT_COUNT_COOK_CODES)
867 		line_filter = filter_count_cook_codes;
868 	else if (filter & FILT_COUNT_TERM_CODES)
869 		line_filter = filter_count_term_codes;
870 	else if (filter & FILT_COUNT_SRV_STATUS)
871 		line_filter = filter_count_srv_status;
872 	else if (filter & FILT_COUNT_URL_ANY)
873 		line_filter = filter_count_url;
874 	else if (filter & FILT_COUNT_ONLY)
875 		line_filter = NULL;
876 
877 #if defined(POSIX_FADV_SEQUENTIAL)
878 	/* around 20% performance improvement is observed on Linux with this
879 	 * on cold-cache. Surprisingly, WILLNEED is less performant. Don't
880 	 * use NOREUSE as it flushes the cache and prevents easy data
881 	 * manipulation on logs!
882 	 */
883 	posix_fadvise(0, 0, 0, POSIX_FADV_SEQUENTIAL);
884 #endif
885 
886 	if (!line_filter && /* FILT_COUNT_ONLY ( see above), and no input filter (see below) */
887 	    !(filter & (FILT_HTTP_ONLY|FILT_TIME_RESP|FILT_ERRORS_ONLY|FILT_HTTP_STATUS|FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY|FILT_TERM_CODE_NAME)) &&
888 		!(filter2 & (FILT2_TIMESTAMP))) {
889 		/* read the whole file at once first, ignore it if inverted output */
890 		if (!filter_invert)
891 			while ((lines_max < 0 || lines_out < lines_max) && fgets2(stdin) != NULL)
892 				lines_out++;
893 
894 		goto skip_filters;
895 	}
896 
897 	while ((line = fgets2(stdin)) != NULL) {
898 		linenum++;
899 		time_field = NULL; accept_field = NULL;
900 		source_field = NULL;
901 
902 		test = 1;
903 
904 		/* for any line we process, we first ensure that there is a field
905 		 * looking like the accept date field (beginning with a '[').
906 		 */
907 		if (filter & FILT_COUNT_IP_COUNT) {
908 			/* we need the IP first */
909 			source_field = field_start(line, SOURCE_FIELD + skip_fields);
910 			accept_field = field_start(source_field, ACCEPT_FIELD - SOURCE_FIELD + 1);
911 		}
912 		else
913 			accept_field = field_start(line, ACCEPT_FIELD + skip_fields);
914 
915 		if (unlikely(*accept_field != '[')) {
916 			parse_err++;
917 			continue;
918 		}
919 
920 		/* the day of month field is begin 01 and 31 */
921 		if (accept_field[1] < '0' || accept_field[1] > '3') {
922 			parse_err++;
923 			continue;
924 		}
925 
926 		if (filter2 & FILT2_TIMESTAMP) {
927 			uval = convert_date_to_timestamp(accept_field);
928 			test &= (uval>=filt2_timestamp_low && uval<=filt2_timestamp_high) ;
929 		}
930 
931 		if (filter & FILT_HTTP_ONLY) {
932 			/* only report lines with at least 4 timers */
933 			if (!time_field) {
934 				time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
935 				if (unlikely(!*time_field)) {
936 					truncated_line(linenum, line);
937 					continue;
938 				}
939 			}
940 
941 			field_stop(time_field + 1);
942 			/* we have field TIME_FIELD in [time_field]..[e-1] */
943 			p = time_field;
944 			f = 0;
945 			while (!SEP(*p)) {
946 				if (++f == 4)
947 					break;
948 				SKIP_CHAR(p, '/');
949 			}
950 			test &= (f >= 4);
951 		}
952 
953 		if (filter & FILT_TIME_RESP) {
954 			int tps;
955 
956 			/* only report lines with response times larger than filter_time_resp */
957 			if (!time_field) {
958 				time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
959 				if (unlikely(!*time_field)) {
960 					truncated_line(linenum, line);
961 					continue;
962 				}
963 			}
964 
965 			field_stop(time_field + 1);
966 			/* we have field TIME_FIELD in [time_field]..[e-1], let's check only the response time */
967 
968 			p = time_field;
969 			f = 0;
970 			while (!SEP(*p)) {
971 				tps = str2ic(p);
972 				if (tps < 0) {
973 					tps = -1;
974 				}
975 				if (++f == 4)
976 					break;
977 				SKIP_CHAR(p, '/');
978 			}
979 
980 			if (unlikely(f < 4)) {
981 				parse_err++;
982 				continue;
983 			}
984 
985 			test &= (tps >= filter_time_resp) ^ !!(filter & FILT_INVERT_TIME_RESP);
986 		}
987 
988 		if (filter & (FILT_ERRORS_ONLY | FILT_HTTP_STATUS)) {
989 			/* Check both error codes (-1, 5xx) and status code ranges */
990 			if (time_field)
991 				b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
992 			else
993 				b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1);
994 
995 			if (unlikely(!*b)) {
996 				truncated_line(linenum, line);
997 				continue;
998 			}
999 
1000 			val = str2ic(b);
1001 			if (filter & FILT_ERRORS_ONLY)
1002 				test &= (val < 0 || (val >= 500 && val <= 599)) ^ !!(filter & FILT_INVERT_ERRORS);
1003 
1004 			if (filter & FILT_HTTP_STATUS)
1005 				test &= (val >= filt_http_status_low && val <= filt_http_status_high) ^ !!(filter & FILT_INVERT_HTTP_STATUS);
1006 		}
1007 
1008 		if (filter & (FILT_QUEUE_ONLY|FILT_QUEUE_SRV_ONLY)) {
1009 			/* Check if the server's queue is non-nul */
1010 			if (time_field)
1011 				b = field_start(time_field, QUEUE_LEN_FIELD - TIME_FIELD + 1);
1012 			else
1013 				b = field_start(accept_field, QUEUE_LEN_FIELD - ACCEPT_FIELD + 1);
1014 
1015 			if (unlikely(!*b)) {
1016 				truncated_line(linenum, line);
1017 				continue;
1018 			}
1019 
1020 			if (*b == '0') {
1021 				if (filter & FILT_QUEUE_SRV_ONLY) {
1022 					test = 0;
1023 				}
1024 				else {
1025 					do {
1026 						b++;
1027 						if (*b == '/') {
1028 							b++;
1029 							break;
1030 						}
1031 					} while (*b);
1032 					test &= ((unsigned char)(*b - '1') < 9);
1033 				}
1034 			}
1035 		}
1036 
1037 		if (filter & FILT_TERM_CODE_NAME) {
1038 			/* only report corresponding termination code name */
1039 			if (time_field)
1040 				b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1041 			else
1042 				b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1043 
1044 			if (unlikely(!*b)) {
1045 				truncated_line(linenum, line);
1046 				continue;
1047 			}
1048 
1049 			test &= (b[0] == filter_term_code_name[0] && b[1] == filter_term_code_name[1]) ^ !!(filter & FILT_INVERT_TERM_CODE_NAME);
1050 		}
1051 
1052 
1053 		test ^= filter_invert;
1054 		if (!test)
1055 			continue;
1056 
1057 		/************** here we process inputs *******************/
1058 
1059 		if (line_filter) {
1060 			if (filter & FILT_COUNT_IP_COUNT)
1061 				filter_count_ip(source_field, accept_field, time_field, &t);
1062 			else
1063 				line_filter(accept_field, time_field, &t);
1064 		}
1065 		else
1066 			lines_out++; /* FILT_COUNT_ONLY was used, so we're just counting lines */
1067 		if (lines_max >= 0 && lines_out >= lines_max)
1068 			break;
1069 	}
1070 
1071  skip_filters:
1072 	/*****************************************************
1073 	 * Here we've finished reading all input. Depending on the
1074 	 * filters, we may still have some analysis to run on the
1075 	 * collected data and to output data in a new format.
1076 	 *************************************************** */
1077 
1078 	if (t)
1079 		free(t);
1080 
1081 	if (filter & FILT_COUNT_ONLY) {
1082 		printf("%d\n", lines_out);
1083 		exit(0);
1084 	}
1085 
1086 	if (filter & (FILT_ACC_COUNT|FILT_ACC_DELAY)) {
1087 		/* sort and count all timers. Output will look like this :
1088 		 * <accept_date> <delta_ms from previous one> <nb entries>
1089 		 */
1090 		n = eb32_first(&timers[0]);
1091 
1092 		if (n)
1093 			last = n->key;
1094 		while (n) {
1095 			unsigned int d, h, m, s, ms;
1096 
1097 			t = container_of(n, struct timer, node);
1098 			h = n->key;
1099 			d = h - last;
1100 			last = h;
1101 
1102 			if (d >= filter_acc_delay && t->count >= filter_acc_count) {
1103 				ms = h % 1000; h = h / 1000;
1104 				s = h % 60; h = h / 60;
1105 				m = h % 60; h = h / 60;
1106 				printf("%02d:%02d:%02d.%03d %d %d %d\n", h, m, s, ms, last, d, t->count);
1107 				lines_out++;
1108 				if (lines_max >= 0 && lines_out >= lines_max)
1109 					break;
1110 			}
1111 			n = eb32_next(n);
1112 		}
1113 	}
1114 	else if (filter & FILT_GRAPH_TIMERS) {
1115 		/* sort all timers */
1116 		for (f = 0; f < 5; f++) {
1117 			struct eb32_node *n;
1118 			int val;
1119 
1120 			val = 0;
1121 			n = eb32_first(&timers[f]);
1122 			while (n) {
1123 				int i;
1124 				double d;
1125 
1126 				t = container_of(n, struct timer, node);
1127 				last = n->key;
1128 				val = t->count;
1129 
1130 				i = (last < 0) ? -last : last;
1131 				i = fls_auto(i) - QBITS;
1132 
1133 				if (i > 0)
1134 					d = val / (double)(1 << i);
1135 				else
1136 					d = val;
1137 
1138 				if (d > 0.0)
1139 					printf("%d %d %f\n", f, last, d+1.0);
1140 
1141 				n = eb32_next(n);
1142 			}
1143 		}
1144 	}
1145 	else if (filter & FILT_PERCENTILE) {
1146 		/* report timers by percentile :
1147 		 *    <percent> <total> <max_req_time> <max_conn_time> <max_resp_time> <max_data_time>
1148 		 * We don't count errs.
1149 		 */
1150 		struct eb32_node *n[5];
1151 		unsigned long cum[5];
1152 		double step;
1153 
1154 		if (!lines_out)
1155 			goto empty;
1156 
1157 		for (f = 1; f < 5; f++) {
1158 			n[f] = eb32_first(&timers[f]);
1159 			cum[f] = container_of(n[f], struct timer, node)->count;
1160 		}
1161 
1162 		for (step = 1; step <= 1000;) {
1163 			unsigned int thres = lines_out * (step / 1000.0);
1164 
1165 			printf("%3.1f %d ", step/10.0, thres);
1166 			for (f = 1; f < 5; f++) {
1167 				struct eb32_node *next;
1168 				while (cum[f] < thres) {
1169 					/* need to find other keys */
1170 					next = eb32_next(n[f]);
1171 					if (!next)
1172 						break;
1173 					n[f] = next;
1174 					cum[f] += container_of(next, struct timer, node)->count;
1175 				}
1176 
1177 				/* value still within $step % of total */
1178 				printf("%d ", n[f]->key);
1179 			}
1180 			putchar('\n');
1181 			if (step >= 100 && step < 900)
1182 				step += 50;  // jump 5% by 5% between those steps.
1183 			else if (step >= 20 && step < 980)
1184 				step += 10;
1185 			else
1186 				step += 1;
1187 		}
1188 	}
1189 	else if (filter & FILT_COUNT_STATUS) {
1190 		/* output all statuses in the form of <status> <occurrences> */
1191 		n = eb32_first(&timers[0]);
1192 		while (n) {
1193 			t = container_of(n, struct timer, node);
1194 			printf("%d %d\n", n->key, t->count);
1195 			lines_out++;
1196 			if (lines_max >= 0 && lines_out >= lines_max)
1197 				break;
1198 			n = eb32_next(n);
1199 		}
1200 	}
1201 	else if (filter & FILT_COUNT_SRV_STATUS) {
1202 		struct ebmb_node *srv_node;
1203 		struct srv_st *srv;
1204 
1205 		printf("#srv_name 1xx 2xx 3xx 4xx 5xx other tot_req req_ok pct_ok avg_ct avg_rt\n");
1206 
1207 		srv_node = ebmb_first(&timers[0]);
1208 		while (srv_node) {
1209 			int tot_rq;
1210 
1211 			srv = container_of(srv_node, struct srv_st, node);
1212 
1213 			tot_rq = 0;
1214 			for (f = 0; f <= 5; f++)
1215 				tot_rq += srv->st_cnt[f];
1216 
1217 			printf("%s %d %d %d %d %d %d %d %d %.1f %d %d\n",
1218 			       srv_node->key, srv->st_cnt[1], srv->st_cnt[2],
1219 			       srv->st_cnt[3], srv->st_cnt[4], srv->st_cnt[5], srv->st_cnt[0],
1220 			       tot_rq,
1221 			       srv->nb_ok, (double)srv->nb_ok * 100.0 / (tot_rq?tot_rq:1),
1222 			       (int)(srv->cum_ct / (srv->nb_ct?srv->nb_ct:1)), (int)(srv->cum_rt / (srv->nb_rt?srv->nb_rt:1)));
1223 			srv_node = ebmb_next(srv_node);
1224 			lines_out++;
1225 			if (lines_max >= 0 && lines_out >= lines_max)
1226 				break;
1227 		}
1228 	}
1229 	else if (filter & (FILT_COUNT_TERM_CODES|FILT_COUNT_COOK_CODES)) {
1230 		/* output all statuses in the form of <code> <occurrences> */
1231 		n = eb32_first(&timers[0]);
1232 		while (n) {
1233 			t = container_of(n, struct timer, node);
1234 			printf("%c%c %d\n", (n->key >> 8), (n->key) & 255, t->count);
1235 			lines_out++;
1236 			if (lines_max >= 0 && lines_out >= lines_max)
1237 				break;
1238 			n = eb32_next(n);
1239 		}
1240 	}
1241 	else if (filter & (FILT_COUNT_URL_ANY|FILT_COUNT_IP_COUNT)) {
1242 		struct eb_node *node, *next;
1243 
1244 		if (!(filter & FILT_COUNT_URL_ONLY)) {
1245 			/* we have to sort on another criterion. We'll use timers[1] for the
1246 			 * destination tree.
1247 			 */
1248 
1249 			timers[1] = EB_ROOT; /* reconfigure to accept duplicates */
1250 			for (node = eb_first(&timers[0]); node; node = next) {
1251 				next = eb_next(node);
1252 				eb_delete(node);
1253 
1254 				ustat = container_of(node, struct url_stat, node.url.node);
1255 
1256 				if (filter & (FILT_COUNT_URL_COUNT|FILT_COUNT_IP_COUNT))
1257 					ustat->node.val.key = ustat->nb_req;
1258 				else if (filter & FILT_COUNT_URL_ERR)
1259 					ustat->node.val.key = ustat->nb_err;
1260 				else if (filter & FILT_COUNT_URL_TTOT)
1261 					ustat->node.val.key = ustat->total_time;
1262 				else if (filter & FILT_COUNT_URL_TAVG)
1263 					ustat->node.val.key = ustat->nb_req ? ustat->total_time / ustat->nb_req : 0;
1264 				else if (filter & FILT_COUNT_URL_TTOTO)
1265 					ustat->node.val.key = ustat->total_time_ok;
1266 				else if (filter & FILT_COUNT_URL_TAVGO)
1267 					ustat->node.val.key = (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0;
1268 				else if (filter & FILT_COUNT_URL_BAVG)
1269 					ustat->node.val.key = ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0;
1270 				else if (filter & FILT_COUNT_URL_BTOT)
1271 					ustat->node.val.key = ustat->total_bytes_sent;
1272 				else
1273 					ustat->node.val.key = 0;
1274 
1275 				eb64_insert(&timers[1], &ustat->node.val);
1276 			}
1277 			/* switch trees */
1278 			timers[0] = timers[1];
1279 		}
1280 
1281 		if (FILT_COUNT_IP_COUNT)
1282 			printf("#req err ttot tavg oktot okavg bavg btot src\n");
1283 		else
1284 			printf("#req err ttot tavg oktot okavg bavg btot url\n");
1285 
1286 		/* scan the tree in its reverse sorting order */
1287 		node = eb_last(&timers[0]);
1288 		while (node) {
1289 			ustat = container_of(node, struct url_stat, node.url.node);
1290 			printf("%d %d %Ld %Ld %Ld %Ld %Ld %Ld %s\n",
1291 			       ustat->nb_req,
1292 			       ustat->nb_err,
1293 			       ustat->total_time,
1294 			       ustat->nb_req ? ustat->total_time / ustat->nb_req : 0,
1295 			       ustat->total_time_ok,
1296 			       (ustat->nb_req - ustat->nb_err) ? ustat->total_time_ok / (ustat->nb_req - ustat->nb_err) : 0,
1297 			       ustat->nb_req ? ustat->total_bytes_sent / ustat->nb_req : 0,
1298 			       ustat->total_bytes_sent,
1299 			       ustat->url);
1300 
1301 			node = eb_prev(node);
1302 			lines_out++;
1303 			if (lines_max >= 0 && lines_out >= lines_max)
1304 				break;
1305 		}
1306 	}
1307 
1308  empty:
1309 	if (!(filter & FILT_QUIET))
1310 		fprintf(stderr, "%d lines in, %d lines out, %d parsing errors\n",
1311 			linenum, lines_out, parse_err);
1312 	exit(0);
1313 }
1314 
filter_output_line(const char * accept_field,const char * time_field,struct timer ** tptr)1315 void filter_output_line(const char *accept_field, const char *time_field, struct timer **tptr)
1316 {
1317 	puts(line);
1318 	lines_out++;
1319 }
1320 
filter_accept_holes(const char * accept_field,const char * time_field,struct timer ** tptr)1321 void filter_accept_holes(const char *accept_field, const char *time_field, struct timer **tptr)
1322 {
1323 	struct timer *t2;
1324 	int val;
1325 
1326 	val = convert_date(accept_field);
1327 	if (unlikely(val < 0)) {
1328 		truncated_line(linenum, line);
1329 		return;
1330 	}
1331 
1332 	t2 = insert_value(&timers[0], tptr, val);
1333 	t2->count++;
1334 	return;
1335 }
1336 
filter_count_status(const char * accept_field,const char * time_field,struct timer ** tptr)1337 void filter_count_status(const char *accept_field, const char *time_field, struct timer **tptr)
1338 {
1339 	struct timer *t2;
1340 	const char *b;
1341 	int val;
1342 
1343 	if (time_field)
1344 		b = field_start(time_field, STATUS_FIELD - TIME_FIELD + 1);
1345 	else
1346 		b = field_start(accept_field, STATUS_FIELD - ACCEPT_FIELD + 1);
1347 
1348 	if (unlikely(!*b)) {
1349 		truncated_line(linenum, line);
1350 		return;
1351 	}
1352 
1353 	val = str2ic(b);
1354 
1355 	t2 = insert_value(&timers[0], tptr, val);
1356 	t2->count++;
1357 }
1358 
filter_count_cook_codes(const char * accept_field,const char * time_field,struct timer ** tptr)1359 void filter_count_cook_codes(const char *accept_field, const char *time_field, struct timer **tptr)
1360 {
1361 	struct timer *t2;
1362 	const char *b;
1363 	int val;
1364 
1365 	if (time_field)
1366 		b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1367 	else
1368 		b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1369 
1370 	if (unlikely(!*b)) {
1371 		truncated_line(linenum, line);
1372 		return;
1373 	}
1374 
1375 	val = 256 * b[2] + b[3];
1376 
1377 	t2 = insert_value(&timers[0], tptr, val);
1378 	t2->count++;
1379 }
1380 
filter_count_term_codes(const char * accept_field,const char * time_field,struct timer ** tptr)1381 void filter_count_term_codes(const char *accept_field, const char *time_field, struct timer **tptr)
1382 {
1383 	struct timer *t2;
1384 	const char *b;
1385 	int val;
1386 
1387 	if (time_field)
1388 		b = field_start(time_field, TERM_CODES_FIELD - TIME_FIELD + 1);
1389 	else
1390 		b = field_start(accept_field, TERM_CODES_FIELD - ACCEPT_FIELD + 1);
1391 
1392 	if (unlikely(!*b)) {
1393 		truncated_line(linenum, line);
1394 		return;
1395 	}
1396 
1397 	val = 256 * b[0] + b[1];
1398 
1399 	t2 = insert_value(&timers[0], tptr, val);
1400 	t2->count++;
1401 }
1402 
filter_count_srv_status(const char * accept_field,const char * time_field,struct timer ** tptr)1403 void filter_count_srv_status(const char *accept_field, const char *time_field, struct timer **tptr)
1404 {
1405 	const char *b, *e, *p;
1406 	int f, err, array[5];
1407 	struct ebmb_node *srv_node;
1408 	struct srv_st *srv;
1409 	int val;
1410 
1411 	/* the server field is before the status field, so let's
1412 	 * parse them in the proper order.
1413 	 */
1414 	b = field_start(accept_field, SERVER_FIELD - ACCEPT_FIELD + 1);
1415 	if (unlikely(!*b)) {
1416 		truncated_line(linenum, line);
1417 		return;
1418 	}
1419 
1420 	e = field_stop(b + 1);  /* we have the server name in [b]..[e-1] */
1421 
1422 	/* the chance that a server name already exists is extremely high,
1423 	 * so let's perform a normal lookup first.
1424 	 */
1425 	srv_node = ebst_lookup_len(&timers[0], b, e - b);
1426 	srv = container_of(srv_node, struct srv_st, node);
1427 
1428 	if (!srv_node) {
1429 		/* server not yet in the tree, let's create it */
1430 		srv = (void *)calloc(1, sizeof(struct srv_st) + e - b + 1);
1431 		srv_node = &srv->node;
1432 		memcpy(&srv_node->key, b, e - b);
1433 		srv_node->key[e - b] = '\0';
1434 		ebst_insert(&timers[0], srv_node);
1435 	}
1436 
1437 	/* let's collect the connect and response times */
1438 	if (!time_field) {
1439 		time_field = field_start(e, TIME_FIELD - SERVER_FIELD);
1440 		if (unlikely(!*time_field)) {
1441 			truncated_line(linenum, line);
1442 			return;
1443 		}
1444 	}
1445 
1446 	e = field_stop(time_field + 1);
1447 	/* we have field TIME_FIELD in [time_field]..[e-1] */
1448 
1449 	p = time_field;
1450 	err = 0;
1451 	f = 0;
1452 	while (!SEP(*p)) {
1453 		array[f] = str2ic(p);
1454 		if (array[f] < 0) {
1455 			array[f] = -1;
1456 			err = 1;
1457 		}
1458 		if (++f == 5)
1459 			break;
1460 		SKIP_CHAR(p, '/');
1461 	}
1462 
1463 	if (unlikely(f < 5)){
1464 		parse_err++;
1465 		return;
1466 	}
1467 
1468 	/* OK we have our timers in array[2,3] */
1469 	if (!err)
1470 		srv->nb_ok++;
1471 
1472 	if (array[2] >= 0) {
1473 		srv->cum_ct += array[2];
1474 		srv->nb_ct++;
1475 	}
1476 
1477 	if (array[3] >= 0) {
1478 		srv->cum_rt += array[3];
1479 		srv->nb_rt++;
1480 	}
1481 
1482 	/* we're interested in the 5 HTTP status classes (1xx ... 5xx), and
1483 	 * the invalid ones which will be reported as 0.
1484 	 */
1485 	b = field_start(e, STATUS_FIELD - TIME_FIELD);
1486 	if (unlikely(!*b)) {
1487 		truncated_line(linenum, line);
1488 		return;
1489 	}
1490 
1491 	val = 0;
1492 	if (*b >= '1' && *b <= '5')
1493 		val = *b - '0';
1494 
1495 	srv->st_cnt[val]++;
1496 }
1497 
filter_count_url(const char * accept_field,const char * time_field,struct timer ** tptr)1498 void filter_count_url(const char *accept_field, const char *time_field, struct timer **tptr)
1499 {
1500 	struct url_stat *ustat = NULL;
1501 	struct ebpt_node *ebpt_old;
1502 	const char *b, *e;
1503 	int f, err, array[5];
1504 	int val;
1505 
1506 	/* let's collect the response time */
1507 	if (!time_field) {
1508 		time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);  // avg 115 ns per line
1509 		if (unlikely(!*time_field)) {
1510 			truncated_line(linenum, line);
1511 			return;
1512 		}
1513 	}
1514 
1515 	/* we have the field TIME_FIELD starting at <time_field>. We'll
1516 	 * parse the 5 timers to detect errors, it takes avg 55 ns per line.
1517 	 */
1518 	e = time_field; err = 0; f = 0;
1519 	while (!SEP(*e)) {
1520 		array[f] = str2ic(e);
1521 		if (array[f] < 0) {
1522 			array[f] = -1;
1523 			err = 1;
1524 		}
1525 		if (++f == 5)
1526 			break;
1527 		SKIP_CHAR(e, '/');
1528 	}
1529 	if (f < 5) {
1530 		parse_err++;
1531 		return;
1532 	}
1533 
1534 	/* OK we have our timers in array[3], and err is >0 if at
1535 	 * least one -1 was seen. <e> points to the first char of
1536 	 * the last timer. Let's prepare a new node with that.
1537 	 */
1538 	if (unlikely(!ustat))
1539 		ustat = calloc(1, sizeof(*ustat));
1540 
1541 	ustat->nb_err = err;
1542 	ustat->nb_req = 1;
1543 
1544 	/* use array[4] = total time in case of error */
1545 	ustat->total_time = (array[3] >= 0) ? array[3] : array[4];
1546 	ustat->total_time_ok = (array[3] >= 0) ? array[3] : 0;
1547 
1548 	e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1);
1549 	val = str2ic(e);
1550 	ustat->total_bytes_sent = val;
1551 
1552 	/* the line may be truncated because of a bad request or anything like this,
1553 	 * without a method. Also, if it does not begin with an quote, let's skip to
1554 	 * the next field because it's a capture. Let's fall back to the "method" itself
1555 	 * if there's nothing else.
1556 	 */
1557 	e = field_start(e, METH_FIELD - BYTES_SENT_FIELD + 1);
1558 	while (*e != '"' && *e) {
1559 		/* Note: some syslog servers escape quotes ! */
1560 		if (*e == '\\' && e[1] == '"')
1561 			break;
1562 		e = field_start(e, 2);
1563 	}
1564 
1565 	if (unlikely(!*e)) {
1566 		truncated_line(linenum, line);
1567 		free(ustat);
1568 		return;
1569 	}
1570 
1571 	b = field_start(e, URL_FIELD - METH_FIELD + 1); // avg 40 ns per line
1572 	if (!*b)
1573 		b = e;
1574 
1575 	/* stop at end of field or first ';' or '?', takes avg 64 ns per line */
1576 	e = b;
1577 	do {
1578 		if (*e == ' ' || *e == '?' || *e == ';') {
1579 			*(char *)e = 0;
1580 			break;
1581 		}
1582 		e++;
1583 	} while (*e);
1584 
1585 	/* now instead of copying the URL for a simple lookup, we'll link
1586 	 * to it from the node we're trying to insert. If it returns a
1587 	 * different value, it was already there. Otherwise we just have
1588 	 * to dynamically realloc an entry using strdup().
1589 	 */
1590 	ustat->node.url.key = (char *)b;
1591 	ebpt_old = ebis_insert(&timers[0], &ustat->node.url);
1592 
1593 	if (ebpt_old != &ustat->node.url) {
1594 		struct url_stat *ustat_old;
1595 		/* node was already there, let's update previous one */
1596 		ustat_old = container_of(ebpt_old, struct url_stat, node.url);
1597 		ustat_old->nb_req ++;
1598 		ustat_old->nb_err += ustat->nb_err;
1599 		ustat_old->total_time += ustat->total_time;
1600 		ustat_old->total_time_ok += ustat->total_time_ok;
1601 		ustat_old->total_bytes_sent += ustat->total_bytes_sent;
1602 	} else {
1603 		ustat->url = ustat->node.url.key = strdup(ustat->node.url.key);
1604 		ustat = NULL; /* node was used */
1605 	}
1606 }
1607 
filter_count_ip(const char * source_field,const char * accept_field,const char * time_field,struct timer ** tptr)1608 void filter_count_ip(const char *source_field, const char *accept_field, const char *time_field, struct timer **tptr)
1609 {
1610 	struct url_stat *ustat = NULL;
1611 	struct ebpt_node *ebpt_old;
1612 	const char *b, *e;
1613 	int f, err, array[5];
1614 	int val;
1615 
1616 	/* let's collect the response time */
1617 	if (!time_field) {
1618 		time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);  // avg 115 ns per line
1619 		if (unlikely(!*time_field)) {
1620 			truncated_line(linenum, line);
1621 			return;
1622 		}
1623 	}
1624 
1625 	/* we have the field TIME_FIELD starting at <time_field>. We'll
1626 	 * parse the 5 timers to detect errors, it takes avg 55 ns per line.
1627 	 */
1628 	e = time_field; err = 0; f = 0;
1629 	while (!SEP(*e)) {
1630 		if (f == 0 || f == 4) {
1631 			array[f] = str2ic(e);
1632 			if (array[f] < 0) {
1633 				array[f] = -1;
1634 				err = 1;
1635 			}
1636 		}
1637 		if (++f == 5)
1638 			break;
1639 		SKIP_CHAR(e, '/');
1640 	}
1641 	if (f < 5) {
1642 		parse_err++;
1643 		return;
1644 	}
1645 
1646 	/* OK we have our timers in array[0], and err is >0 if at
1647 	 * least one -1 was seen. <e> points to the first char of
1648 	 * the last timer. Let's prepare a new node with that.
1649 	 */
1650 	if (unlikely(!ustat))
1651 		ustat = calloc(1, sizeof(*ustat));
1652 
1653 	ustat->nb_err = err;
1654 	ustat->nb_req = 1;
1655 
1656 	/* use array[4] = total time in case of error */
1657 	ustat->total_time = (array[0] >= 0) ? array[0] : array[4];
1658 	ustat->total_time_ok = (array[0] >= 0) ? array[0] : 0;
1659 
1660 	e = field_start(e, BYTES_SENT_FIELD - TIME_FIELD + 1);
1661 	val = str2ic(e);
1662 	ustat->total_bytes_sent = val;
1663 
1664 	/* the source might be IPv4 or IPv6, so we always strip the port by
1665 	 * removing the last colon.
1666 	 */
1667 	b = source_field;
1668 	e = field_stop(b + 1);
1669 	while (e > b && e[-1] != ':')
1670 		e--;
1671 	*(char *)(e - 1) = '\0';
1672 
1673 	/* now instead of copying the src for a simple lookup, we'll link
1674 	 * to it from the node we're trying to insert. If it returns a
1675 	 * different value, it was already there. Otherwise we just have
1676 	 * to dynamically realloc an entry using strdup(). We're using the
1677 	 * <url> field of the node to store the source address.
1678 	 */
1679 	ustat->node.url.key = (char *)b;
1680 	ebpt_old = ebis_insert(&timers[0], &ustat->node.url);
1681 
1682 	if (ebpt_old != &ustat->node.url) {
1683 		struct url_stat *ustat_old;
1684 		/* node was already there, let's update previous one */
1685 		ustat_old = container_of(ebpt_old, struct url_stat, node.url);
1686 		ustat_old->nb_req ++;
1687 		ustat_old->nb_err += ustat->nb_err;
1688 		ustat_old->total_time += ustat->total_time;
1689 		ustat_old->total_time_ok += ustat->total_time_ok;
1690 		ustat_old->total_bytes_sent += ustat->total_bytes_sent;
1691 	} else {
1692 		ustat->url = ustat->node.url.key = strdup(ustat->node.url.key);
1693 		ustat = NULL; /* node was used */
1694 	}
1695 }
1696 
filter_graphs(const char * accept_field,const char * time_field,struct timer ** tptr)1697 void filter_graphs(const char *accept_field, const char *time_field, struct timer **tptr)
1698 {
1699 	struct timer *t2;
1700 	const char *p;
1701 	int f, err, array[5];
1702 
1703 	if (!time_field) {
1704 		time_field = field_start(accept_field, TIME_FIELD - ACCEPT_FIELD + 1);
1705 		if (unlikely(!*time_field)) {
1706 			truncated_line(linenum, line);
1707 			return;
1708 		}
1709 	}
1710 
1711 	field_stop(time_field + 1);
1712 	/* we have field TIME_FIELD in [time_field]..[e-1] */
1713 
1714 	p = time_field;
1715 	err = 0;
1716 	f = 0;
1717 	while (!SEP(*p)) {
1718 		array[f] = str2ic(p);
1719 		if (array[f] < 0) {
1720 			array[f] = -1;
1721 			err = 1;
1722 		}
1723 		if (++f == 5)
1724 			break;
1725 		SKIP_CHAR(p, '/');
1726 	}
1727 
1728 	if (unlikely(f < 5)) {
1729 		parse_err++;
1730 		return;
1731 	}
1732 
1733 	/* if we find at least one negative time, we count one error
1734 	 * with a time equal to the total session time. This will
1735 	 * emphasize quantum timing effects associated to known
1736 	 * timeouts. Note that on some buggy machines, it is possible
1737 	 * that the total time is negative, hence the reason to reset
1738 	 * it.
1739 	 */
1740 
1741 	if (filter & FILT_GRAPH_TIMERS) {
1742 		if (err) {
1743 			if (array[4] < 0)
1744 				array[4] = -1;
1745 			t2 = insert_timer(&timers[0], tptr, array[4]);  // total time
1746 			t2->count++;
1747 		} else {
1748 			int v;
1749 
1750 			t2 = insert_timer(&timers[1], tptr, array[0]); t2->count++;  // req
1751 			t2 = insert_timer(&timers[2], tptr, array[2]); t2->count++;  // conn
1752 			t2 = insert_timer(&timers[3], tptr, array[3]); t2->count++;  // resp
1753 
1754 			v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
1755 			if (v < 0 && !(filter & FILT_QUIET))
1756 				fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
1757 					line, array[0], array[1], array[2], array[3], array[4], v);
1758 			t2 = insert_timer(&timers[4], tptr, v); t2->count++;
1759 			lines_out++;
1760 		}
1761 	} else { /* percentile */
1762 		if (err) {
1763 			if (array[4] < 0)
1764 				array[4] = -1;
1765 			t2 = insert_value(&timers[0], tptr, array[4]);  // total time
1766 			t2->count++;
1767 		} else {
1768 			int v;
1769 
1770 			t2 = insert_value(&timers[1], tptr, array[0]); t2->count++;  // req
1771 			t2 = insert_value(&timers[2], tptr, array[2]); t2->count++;  // conn
1772 			t2 = insert_value(&timers[3], tptr, array[3]); t2->count++;  // resp
1773 
1774 			v = array[4] - array[0] - array[1] - array[2] - array[3]; // data time
1775 			if (v < 0 && !(filter & FILT_QUIET))
1776 				fprintf(stderr, "ERR: %s (%d %d %d %d %d => %d)\n",
1777 					line, array[0], array[1], array[2], array[3], array[4], v);
1778 			t2 = insert_value(&timers[4], tptr, v); t2->count++;
1779 			lines_out++;
1780 		}
1781 	}
1782 }
1783 
1784 
1785 /*
1786  * Local variables:
1787  *  c-indent-level: 8
1788  *  c-basic-offset: 8
1789  * End:
1790  */
1791