xref: /openbsd/usr.sbin/btrace/bt_parse.y (revision d89ec533)
1 /*	$OpenBSD: bt_parse.y,v 1.45 2021/11/12 16:57:24 claudio Exp $	*/
2 
3 /*
4  * Copyright (c) 2019-2021 Martin Pieuchot <mpi@openbsd.org>
5  * Copyright (c) 2019 Tobias Heider <tobhe@openbsd.org>
6  * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 /*
22  * B tracing language parser.
23  *
24  * The dialect of the language understood by this parser aims to be
25  * compatible with the one understood by bpftrace(8), see:
26  *
27  * https://github.com/iovisor/bpftrace/blob/master/docs/reference_guide.md
28  *
29  */
30 
31 %{
32 #include <sys/queue.h>
33 
34 #include <assert.h>
35 #include <ctype.h>
36 #include <err.h>
37 #include <limits.h>
38 #include <stdarg.h>
39 #include <stdint.h>
40 #include <stdio.h>
41 
42 #include "bt_parser.h"
43 
44 /* Name for the default map @[], hopefully nobody will use this one ;) */
45 #define UNNAMED_MAP	"___unnamed_map_doesnt_have_any_name"
46 
47 /* Number of rules to evaluate. */
48 struct bt_ruleq		g_rules = TAILQ_HEAD_INITIALIZER(g_rules);
49 
50 /* Number of probes except BEGIN/END. */
51 int		 	g_nprobes;
52 
53 /* List of global variables, including maps. */
54 SLIST_HEAD(, bt_var)	 g_variables;
55 
56 /* List of local variables, cleaned for each new rule. */
57 SLIST_HEAD(, bt_var)	l_variables;
58 
59 struct bt_arg 		g_nullba = BA_INITIALIZER(0, B_AT_LONG);
60 struct bt_arg		g_maxba = BA_INITIALIZER(LONG_MAX, B_AT_LONG);
61 
62 struct bt_rule	*br_new(struct bt_probe *, struct bt_filter *,
63 		     struct bt_stmt *);
64 struct bt_probe	*bp_new(const char *, const char *, const char *, int32_t);
65 struct bt_arg	*ba_append(struct bt_arg *, struct bt_arg *);
66 struct bt_arg	*ba_op(enum bt_argtype, struct bt_arg *, struct bt_arg *);
67 struct bt_stmt	*bs_new(enum bt_action, struct bt_arg *, struct bt_var *);
68 struct bt_stmt	*bs_append(struct bt_stmt *, struct bt_stmt *);
69 
70 struct bt_var	*bg_lookup(const char *);
71 struct bt_stmt	*bg_store(const char *, struct bt_arg *);
72 struct bt_arg	*bg_find(const char *);
73 struct bt_var	*bg_get(const char *);
74 
75 struct bt_var	*bl_lookup(const char *);
76 struct bt_stmt	*bl_store(const char *, struct bt_arg *);
77 struct bt_arg	*bl_find(const char *);
78 
79 struct bt_arg	*bm_find(const char *, struct bt_arg *);
80 struct bt_stmt	*bm_insert(const char *, struct bt_arg *, struct bt_arg *);
81 struct bt_stmt	*bm_op(enum bt_action, struct bt_arg *, struct bt_arg *);
82 
83 struct bt_stmt	*bh_inc(const char *, struct bt_arg *, struct bt_arg *);
84 
85 /*
86  * Lexer
87  */
88 const char	*pbuf;
89 size_t		 plen;
90 size_t		 pindex;
91 int		 perrors = 0;
92 
93 typedef struct {
94 	union {
95 		long			 number;
96 		int			 i;
97 		const char		*string;
98 		struct bt_probe		*probe;
99 		struct bt_filter	*filter;
100 		struct bt_stmt		*stmt;
101 		struct bt_arg		*arg;
102 	} v;
103 	const char			*filename;
104 	int				 lineno;
105 	int				 colno;
106 } yystype;
107 #define YYSTYPE yystype
108 
109 static void	 yyerror(const char *, ...);
110 static int	 yylex(void);
111 
112 static int pflag;
113 %}
114 
115 %token	<v.i>		ERROR ENDFILT
116 %token	<v.i>		OP_EQ OP_NE OP_LE OP_LT OP_GE OP_GT OP_LAND OP_LOR
117 /* Builtins */
118 %token	<v.i>		BUILTIN BEGIN END HZ IF
119 /* Functions and Map operators */
120 %token  <v.i>		F_DELETE F_PRINT
121 %token	<v.i>		MFUNC FUNC0 FUNC1 FUNCN OP1 OP2 OP4 MOP0 MOP1
122 %token	<v.string>	STRING CSTRING
123 %token	<v.number>	NUMBER
124 
125 %type	<v.string>	gvar lvar
126 %type	<v.i>		beginend
127 %type	<v.probe>	plist probe pname
128 %type	<v.filter>	filter
129 %type	<v.stmt>	action stmt stmtblck stmtlist block
130 %type	<v.arg>		pat vargs mentry mpat pargs staticv
131 %type	<v.arg>		expr term fterm variable factor func
132 %%
133 
134 grammar	: /* empty */
135 	| grammar '\n'
136 	| grammar rule
137 	| grammar error
138 	;
139 
140 rule	: plist filter action		{ br_new($1, $2, $3); }
141 	;
142 
143 beginend: BEGIN	| END ;
144 
145 plist	: plist ',' probe		{ $$ = bp_append($1, $3); }
146 	| probe
147 	;
148 
149 probe	: { pflag = 1; } pname		{ $$ = $2; pflag = 0; }
150 	| beginend			{ $$ = bp_new(NULL, NULL, NULL, $1); }
151 	;
152 
153 pname	: STRING ':' STRING ':' STRING	{ $$ = bp_new($1, $3, $5, 0); }
154 	| STRING ':' HZ ':' NUMBER	{ $$ = bp_new($1, "hz", NULL, $5); }
155 	;
156 
157 staticv	: '$' NUMBER			{ $$ = get_varg($2); }
158 	| '$' '#'			{ $$ = get_nargs(); }
159 	;
160 
161 gvar	: '@' STRING			{ $$ = $2; }
162 	| '@'				{ $$ = UNNAMED_MAP; }
163 	;
164 
165 lvar	: '$' STRING			{ $$ = $2; }
166 	;
167 
168 mentry	: gvar '[' vargs ']'		{ $$ = bm_find($1, $3); }
169 	;
170 
171 mpat	: MOP0 '(' ')'			{ $$ = ba_new(NULL, $1); }
172 	| MOP1 '(' pat ')'		{ $$ = ba_new($3, $1); }
173 	| pat
174 	;
175 
176 pat	: CSTRING			{ $$ = ba_new($1, B_AT_STR); }
177 	| expr
178 	;
179 
180 filter	: /* empty */			{ $$ = NULL; }
181 	| '/' expr ENDFILT		{ $$ = bc_new(NULL, B_AT_OP_NE, $2); }
182 	;
183 
184 /*
185  * Give higher precedence to:
186  *  1. && and ||
187  *  2. ==, !=, <<, <, >=, >, +, =, &, ^, |
188  *  3. * and /
189  */
190 expr	: expr OP_LAND term	{ $$ = ba_op(B_AT_OP_LAND, $1, $3); }
191 	| expr OP_LOR term	{ $$ = ba_op(B_AT_OP_LOR, $1, $3); }
192 	| term
193 	;
194 
195 term	: term OP_EQ fterm	{ $$ = ba_op(B_AT_OP_EQ, $1, $3); }
196 	| term OP_NE fterm	{ $$ = ba_op(B_AT_OP_NE, $1, $3); }
197 	| term OP_LE fterm	{ $$ = ba_op(B_AT_OP_LE, $1, $3); }
198 	| term OP_LT fterm	{ $$ = ba_op(B_AT_OP_LT, $1, $3); }
199 	| term OP_GE fterm	{ $$ = ba_op(B_AT_OP_GE, $1, $3); }
200 	| term OP_GT fterm	{ $$ = ba_op(B_AT_OP_GT, $1, $3); }
201 	| term '+' fterm	{ $$ = ba_op(B_AT_OP_PLUS, $1, $3); }
202 	| term '-' fterm	{ $$ = ba_op(B_AT_OP_MINUS, $1, $3); }
203 	| term '&' fterm	{ $$ = ba_op(B_AT_OP_BAND, $1, $3); }
204 	| term '^' fterm	{ $$ = ba_op(B_AT_OP_XOR, $1, $3); }
205 	| term '|' fterm	{ $$ = ba_op(B_AT_OP_BOR, $1, $3); }
206 	| fterm
207 	;
208 
209 fterm	: fterm '*' factor	{ $$ = ba_op(B_AT_OP_MULT, $1, $3); }
210 	| fterm '/' factor	{ $$ = ba_op(B_AT_OP_DIVIDE, $1, $3); }
211 	| factor
212 	;
213 
214 variable: lvar			{ $$ = bl_find($1); }
215 	| gvar			{ $$ = bg_find($1); }
216 	;
217 
218 factor : '(' expr ')'		{ $$ = $2; }
219 	| NUMBER		{ $$ = ba_new($1, B_AT_LONG); }
220 	| BUILTIN		{ $$ = ba_new(NULL, $1); }
221 	| staticv
222 	| variable
223 	| mentry
224 	| func
225 	;
226 
227 func	: STR '(' staticv ')'		{ $$ = ba_new($3, B_AT_FN_STR); }
228 	| STR '(' staticv ',' pat ')'	{ $$ = ba_op(B_AT_FN_STR, $3, $5); }
229 	;
230 
231 vargs	: pat
232 	| vargs ',' pat			{ $$ = ba_append($1, $3); }
233 	;
234 
235 pargs	: expr
236 	| gvar ',' pat			{ $$ = ba_append(bg_find($1), $3); }
237 	;
238 
239 NL	: /* empty */
240 	| '\n'
241 	;
242 
243 stmt	: ';' NL			{ $$ = NULL; }
244 	| gvar '=' pat			{ $$ = bg_store($1, $3); }
245 	| lvar '=' pat			{ $$ = bl_store($1, $3); }
246 	| gvar '[' vargs ']' '=' mpat	{ $$ = bm_insert($1, $3, $6); }
247 	| FUNCN '(' vargs ')'		{ $$ = bs_new($1, $3, NULL); }
248 	| FUNC1 '(' pat ')'		{ $$ = bs_new($1, $3, NULL); }
249 	| MFUNC '(' variable ')'	{ $$ = bs_new($1, $3, NULL); }
250 	| FUNC0 '(' ')'			{ $$ = bs_new($1, NULL, NULL); }
251 	| F_DELETE '(' mentry ')'	{ $$ = bm_op($1, $3, NULL); }
252 	| F_PRINT '(' pargs ')'		{ $$ = bs_new($1, $3, NULL); }
253 	| gvar '=' OP1 '(' pat ')'	{ $$ = bh_inc($1, $5, NULL); }
254 	| gvar '=' OP4 '(' pat ',' vargs ')'	{ $$ = bh_inc($1, $5, $7); }
255 	;
256 
257 stmtblck: IF '(' expr ')' block			{ $$ = bt_new($3, $5); }
258 	;
259 
260 stmtlist: stmtlist stmtblck		{ $$ = bs_append($1, $2); }
261 	| stmtlist stmt			{ $$ = bs_append($1, $2); }
262 	| stmtblck
263 	| stmt
264 	;
265 
266 block	: '{' stmt ';' '}'			{ $$ = $2; }
267 	| stmt ';'
268 	;
269 
270 action	: '{' stmtlist '}'		{ $$ = $2; }
271 	;
272 
273 %%
274 
275 struct bt_arg*
276 get_varg(int index)
277 {
278 	extern int nargs;
279 	extern char **vargs;
280 	const char *errstr = NULL;
281 	long val;
282 
283 	if (0 < index && index <= nargs) {
284 		val = (long)strtonum(vargs[index-1], LONG_MIN, LONG_MAX,
285 		    &errstr);
286 		if (errstr == NULL)
287 			return ba_new(val, B_AT_LONG);
288 		return ba_new(vargs[index-1], B_AT_STR);
289 	}
290 
291 	return ba_new(0L, B_AT_NIL);
292 }
293 
294 struct bt_arg*
295 get_nargs(void)
296 {
297 	extern int nargs;
298 
299 	return ba_new((long) nargs, B_AT_LONG);
300 }
301 
302 /* Create a new rule, representing  "probe / filter / { action }" */
303 struct bt_rule *
304 br_new(struct bt_probe *probe, struct bt_filter *filter, struct bt_stmt *head)
305 {
306 	struct bt_rule *br;
307 
308 	br = calloc(1, sizeof(*br));
309 	if (br == NULL)
310 		err(1, "bt_rule: calloc");
311 	/* SLIST_INSERT_HEAD() nullify the next pointer. */
312 	SLIST_FIRST(&br->br_probes) = probe;
313 	br->br_filter = filter;
314 	/* SLIST_INSERT_HEAD() nullify the next pointer. */
315 	SLIST_FIRST(&br->br_action) = head;
316 
317 	SLIST_FIRST(&br->br_variables) = SLIST_FIRST(&l_variables);
318 	SLIST_INIT(&l_variables);
319 
320 	do {
321 		if (probe->bp_type != B_PT_PROBE)
322 			continue;
323 		g_nprobes++;
324 	} while ((probe = SLIST_NEXT(probe, bp_next)) != NULL);
325 
326 	TAILQ_INSERT_TAIL(&g_rules, br, br_next);
327 
328 	return br;
329 }
330 
331 /* Create a new condition */
332 struct bt_filter *
333 bc_new(struct bt_arg *term, enum bt_argtype op, struct bt_arg *ba)
334 {
335 	struct bt_filter *bf;
336 
337 	bf = calloc(1, sizeof(*bf));
338 	if (bf == NULL)
339 		err(1, "bt_filter: calloc");
340 
341 	bf->bf_condition = bs_new(B_AC_TEST, ba_op(op, term, ba), NULL);
342 
343 	return bf;
344 }
345 
346 /* Create a new if/else test */
347 struct bt_stmt *
348 bt_new(struct bt_arg *ba, struct bt_stmt *condbs)
349 {
350 	struct bt_arg *bop;
351 
352 	bop = ba_op(B_AT_OP_NE, NULL, ba);
353 
354 	return bs_new(B_AC_TEST, bop, (struct bt_var *)condbs);
355 
356 }
357 /* Create a new probe */
358 struct bt_probe *
359 bp_new(const char *prov, const char *func, const char *name, int32_t rate)
360 {
361 	struct bt_probe *bp;
362 	enum bt_ptype ptype;
363 
364 	if (rate < 0 || rate > INT32_MAX)
365 		errx(1, "only positive values permitted");
366 
367 	if (prov == NULL && func == NULL && name == NULL)
368 		ptype = rate; /* BEGIN or END */
369 	else
370 		ptype = B_PT_PROBE;
371 
372 	bp = calloc(1, sizeof(*bp));
373 	if (bp == NULL)
374 		err(1, "bt_probe: calloc");
375 	bp->bp_prov = prov;
376 	bp->bp_func = func;
377 	bp->bp_name = name;
378 	bp->bp_rate = rate;
379 	bp->bp_type = ptype;
380 
381 	return bp;
382 }
383 
384 /*
385  * Link two probes together, to build a probe list attached to
386  * a single action.
387  */
388 struct bt_probe *
389 bp_append(struct bt_probe *bp0, struct bt_probe *bp1)
390 {
391 	struct bt_probe *bp = bp0;
392 
393 	assert(bp1 != NULL);
394 
395 	if (bp0 == NULL)
396 		return bp1;
397 
398 	while (SLIST_NEXT(bp, bp_next) != NULL)
399 		bp = SLIST_NEXT(bp, bp_next);
400 
401 	SLIST_INSERT_AFTER(bp, bp1, bp_next);
402 
403 	return bp0;
404 }
405 
406 /* Create a new argument */
407 struct bt_arg *
408 ba_new0(void *val, enum bt_argtype type)
409 {
410 	struct bt_arg *ba;
411 
412 	ba = calloc(1, sizeof(*ba));
413 	if (ba == NULL)
414 		err(1, "bt_arg: calloc");
415 	ba->ba_value = val;
416 	ba->ba_type = type;
417 
418 	return ba;
419 }
420 
421 /*
422  * Link two arguments together, to build an argument list used in
423  * function calls.
424  */
425 struct bt_arg *
426 ba_append(struct bt_arg *da0, struct bt_arg *da1)
427 {
428 	struct bt_arg *ba = da0;
429 
430 	assert(da1 != NULL);
431 
432 	if (da0 == NULL)
433 		return da1;
434 
435 	while (SLIST_NEXT(ba, ba_next) != NULL)
436 		ba = SLIST_NEXT(ba, ba_next);
437 
438 	SLIST_INSERT_AFTER(ba, da1, ba_next);
439 
440 	return da0;
441 }
442 
443 /* Create an operator argument */
444 struct bt_arg *
445 ba_op(enum bt_argtype op, struct bt_arg *da0, struct bt_arg *da1)
446 {
447 	return ba_new(ba_append(da0, da1), op);
448 }
449 
450 /* Create a new statement: function call or assignment. */
451 struct bt_stmt *
452 bs_new(enum bt_action act, struct bt_arg *head, struct bt_var *var)
453 {
454 	struct bt_stmt *bs;
455 
456 	bs = calloc(1, sizeof(*bs));
457 	if (bs == NULL)
458 		err(1, "bt_stmt: calloc");
459 	bs->bs_act = act;
460 	bs->bs_var = var;
461 	/* SLIST_INSERT_HEAD() nullify the next pointer. */
462 	SLIST_FIRST(&bs->bs_args) = head;
463 
464 	return bs;
465 }
466 
467 /* Link two statements together, to build an 'action'. */
468 struct bt_stmt *
469 bs_append(struct bt_stmt *ds0, struct bt_stmt *ds1)
470 {
471 	struct bt_stmt *bs = ds0;
472 
473 	if (ds0 == NULL)
474 		return ds1;
475 
476 	if (ds1 == NULL)
477 		return ds0;
478 
479 	while (SLIST_NEXT(bs, bs_next) != NULL)
480 		bs = SLIST_NEXT(bs, bs_next);
481 
482 	SLIST_INSERT_AFTER(bs, ds1, bs_next);
483 
484 	return ds0;
485 }
486 
487 const char *
488 bv_name(struct bt_var *bv)
489 {
490 	if (strncmp(bv->bv_name, UNNAMED_MAP, strlen(UNNAMED_MAP)) == 0)
491 		return "";
492 	return bv->bv_name;
493 }
494 
495 /* Allocate a variable. */
496 struct bt_var *
497 bv_new(const char *vname)
498 {
499 	struct bt_var *bv;
500 
501 	bv = calloc(1, sizeof(*bv));
502 	if (bv == NULL)
503 		err(1, "bt_var: calloc");
504 	bv->bv_name = vname;
505 
506 	return bv;
507 }
508 
509 /* Return the global variable corresponding to `vname'. */
510 struct bt_var *
511 bg_lookup(const char *vname)
512 {
513 	struct bt_var *bv;
514 
515 	SLIST_FOREACH(bv, &g_variables, bv_next) {
516 		if (strcmp(vname, bv->bv_name) == 0)
517 			break;
518 	}
519 
520 	return bv;
521 }
522 
523 /* Find or allocate a global variable corresponding to `vname' */
524 struct bt_var *
525 bg_get(const char *vname)
526 {
527 	struct bt_var *bv;
528 
529 	bv = bg_lookup(vname);
530 	if (bv == NULL) {
531 		bv = bv_new(vname);
532 		SLIST_INSERT_HEAD(&g_variables, bv, bv_next);
533 	}
534 
535 	return bv;
536 }
537 
538 /* Create an "argument" that points to an existing untyped variable. */
539 struct bt_arg *
540 bg_find(const char *vname)
541 {
542 	return ba_new(bg_get(vname), B_AT_VAR);
543 }
544 
545 /* Create a 'store' statement to assign a value to a global variable. */
546 struct bt_stmt *
547 bg_store(const char *vname, struct bt_arg *vval)
548 {
549 	return bs_new(B_AC_STORE, vval, bg_get(vname));
550 }
551 
552 /* Return the local variable corresponding to `vname'. */
553 struct bt_var *
554 bl_lookup(const char *vname)
555 {
556 	struct bt_var *bv;
557 
558 	SLIST_FOREACH(bv, &l_variables, bv_next) {
559 		if (strcmp(vname, bv->bv_name) == 0)
560 			break;
561 	}
562 
563 	return bv;
564 }
565 
566 /* Find or create a local variable corresponding to `vname' */
567 struct bt_arg *
568 bl_find(const char *vname)
569 {
570 	struct bt_var *bv;
571 
572 	bv = bl_lookup(vname);
573 	if (bv == NULL) {
574 		bv = bv_new(vname);
575 		SLIST_INSERT_HEAD(&l_variables, bv, bv_next);
576 	}
577 
578 	return ba_new(bv, B_AT_VAR);
579 }
580 
581 /* Create a 'store' statement to assign a value to a local variable. */
582 struct bt_stmt *
583 bl_store(const char *vname, struct bt_arg *vval)
584 {
585 	struct bt_var *bv;
586 
587 	bv = bl_lookup(vname);
588 	if (bv == NULL) {
589 		bv = bv_new(vname);
590 		SLIST_INSERT_HEAD(&l_variables, bv, bv_next);
591 	}
592 
593 	return bs_new(B_AC_STORE, vval, bv);
594 }
595 
596 struct bt_stmt *
597 bm_op(enum bt_action mact, struct bt_arg *ba, struct bt_arg *mval)
598 {
599 	return bs_new(mact, ba, (struct bt_var *)mval);
600 }
601 
602 /* Create a 'map store' statement to assign a value to a map entry. */
603 struct bt_stmt *
604 bm_insert(const char *mname, struct bt_arg *mkey, struct bt_arg *mval)
605 {
606 	struct bt_arg *ba;
607 
608 	ba = ba_new(bg_get(mname), B_AT_MAP);
609 	ba->ba_key = mkey;
610 
611 	return bs_new(B_AC_INSERT, ba, (struct bt_var *)mval);
612 }
613 
614 /* Create an argument that points to a variable and attach a key to it. */
615 struct bt_arg *
616 bm_find(const char *vname, struct bt_arg *mkey)
617 {
618 	struct bt_arg *ba;
619 
620 	ba = ba_new(bg_get(vname), B_AT_MAP);
621 	ba->ba_key = mkey;
622 	return ba;
623 }
624 
625 /*
626  * Histograms implemented using associative arrays (maps).  In the case
627  * of linear histograms `ba_key' points to a list of (min, max, step)
628  * necessary to "bucketize" any value.
629  */
630 struct bt_stmt *
631 bh_inc(const char *hname, struct bt_arg *hval, struct bt_arg *hrange)
632 {
633 	struct bt_arg *ba;
634 
635 	if (hrange == NULL) {
636 		/* Power-of-2 histogram */
637 	} else {
638 		long min = 0, max;
639 		int count = 0;
640 
641 		/* Linear histogram */
642 		for (ba = hrange; ba != NULL; ba = SLIST_NEXT(ba, ba_next)) {
643 			if (++count > 3)
644 				yyerror("too many arguments");
645 			if (ba->ba_type != B_AT_LONG)
646 				yyerror("type invalid");
647 
648 			switch (count) {
649 			case 1:
650 				min = (long)ba->ba_value;
651 				if (min >= 0)
652 					break;
653 				yyerror("negative minium");
654 			case 2:
655 				max = (long)ba->ba_value;
656 				if (max > min)
657 					break;
658 				yyerror("maximum smaller than minium (%d < %d)",
659 				    max,  min);
660 			case 3:
661 				break;
662 			default:
663 				assert(0);
664 			}
665 		}
666 		if (count < 3)
667 			yyerror("%d missing arguments", 3 - count);
668 	}
669 
670 	ba = ba_new(bg_get(hname), B_AT_HIST);
671 	ba->ba_key = hrange;
672 	return bs_new(B_AC_BUCKETIZE, ba, (struct bt_var *)hval);
673 }
674 
675 struct keyword {
676 	const char	*word;
677 	int		 token;
678 	int		 type;
679 };
680 
681 int
682 kw_cmp(const void *str, const void *xkw)
683 {
684 	return (strcmp(str, ((const struct keyword *)xkw)->word));
685 }
686 
687 struct keyword *
688 lookup(char *s)
689 {
690 	static const struct keyword kws[] = {
691 		{ "BEGIN",	BEGIN,		B_PT_BEGIN },
692 		{ "END",	END,		B_PT_END },
693 		{ "arg0",	BUILTIN,	B_AT_BI_ARG0 },
694 		{ "arg1",	BUILTIN,	B_AT_BI_ARG1 },
695 		{ "arg2",	BUILTIN,	B_AT_BI_ARG2 },
696 		{ "arg3",	BUILTIN,	B_AT_BI_ARG3 },
697 		{ "arg4",	BUILTIN,	B_AT_BI_ARG4 },
698 		{ "arg5",	BUILTIN,	B_AT_BI_ARG5 },
699 		{ "arg6",	BUILTIN,	B_AT_BI_ARG6 },
700 		{ "arg7",	BUILTIN,	B_AT_BI_ARG7 },
701 		{ "arg8",	BUILTIN,	B_AT_BI_ARG8 },
702 		{ "arg9",	BUILTIN,	B_AT_BI_ARG9 },
703 		{ "clear",	MFUNC,		B_AC_CLEAR },
704 		{ "comm",	BUILTIN,	B_AT_BI_COMM },
705 		{ "count",	MOP0, 		B_AT_MF_COUNT },
706 		{ "cpu",	BUILTIN,	B_AT_BI_CPU },
707 		{ "delete",	F_DELETE,	B_AC_DELETE },
708 		{ "exit",	FUNC0,		B_AC_EXIT },
709 		{ "hist",	OP1,		0 },
710 		{ "hz",		HZ,		0 },
711 		{ "if",		IF,		0 },
712 		{ "kstack",	BUILTIN,	B_AT_BI_KSTACK },
713 		{ "lhist",	OP4,		0 },
714 		{ "max",	MOP1,		B_AT_MF_MAX },
715 		{ "min",	MOP1,		B_AT_MF_MIN },
716 		{ "nsecs",	BUILTIN,	B_AT_BI_NSECS },
717 		{ "pid",	BUILTIN,	B_AT_BI_PID },
718 		{ "print",	F_PRINT,	B_AC_PRINT },
719 		{ "printf",	FUNCN,		B_AC_PRINTF },
720 		{ "probe",	BUILTIN,	B_AT_BI_PROBE },
721 		{ "retval",	BUILTIN,	B_AT_BI_RETVAL },
722 		{ "str",	STR,		B_AT_FN_STR },
723 		{ "sum",	MOP1,		B_AT_MF_SUM },
724 		{ "tid",	BUILTIN,	B_AT_BI_TID },
725 		{ "time",	FUNC1,		B_AC_TIME },
726 		{ "ustack",	BUILTIN,	B_AT_BI_USTACK },
727 		{ "zero",	MFUNC,		B_AC_ZERO },
728 	};
729 
730 	return bsearch(s, kws, nitems(kws), sizeof(kws[0]), kw_cmp);
731 }
732 
733 int
734 peek(void)
735 {
736 	if (pbuf != NULL) {
737 		if (pindex < plen)
738 			return pbuf[pindex];
739 	}
740 	return EOF;
741 }
742 
743 int
744 lgetc(void)
745 {
746 	if (pbuf != NULL) {
747 		if (pindex < plen) {
748 			yylval.colno++;
749 			return pbuf[pindex++];
750 		}
751 	}
752 	return EOF;
753 }
754 
755 void
756 lungetc(void)
757 {
758 	if (pbuf != NULL && pindex > 0) {
759 		yylval.colno--;
760 		pindex--;
761 	}
762 }
763 
764 int
765 yylex(void)
766 {
767 	unsigned char	 buf[1024];
768 	unsigned char	*ebuf, *p, *str;
769 	int		 c;
770 
771 	ebuf = buf + sizeof(buf);
772 	p = buf;
773 
774 again:
775 	/* skip whitespaces */
776 	for (c = lgetc(); isspace(c); c = lgetc()) {
777 		if (c == '\n') {
778 			yylval.lineno++;
779 			yylval.colno = 0;
780 		}
781 	}
782 
783 	/* skip single line comments and shell magic */
784 	if ((c == '/' && peek() == '/') ||
785 	    (yylval.lineno == 1 && yylval.colno == 1 && c == '#' &&
786 	     peek() == '!')) {
787 		for (c = lgetc(); c != EOF; c = lgetc()) {
788 			if (c == '\n') {
789 				yylval.lineno++;
790 				yylval.colno = 0;
791 				goto again;
792 			}
793 		}
794 	}
795 
796 	/* skip multi line comments */
797 	if (c == '/' && peek() == '*') {
798 		int pc;
799 
800 		for (pc = 0, c = lgetc(); c != EOF; c = lgetc()) {
801 			if (pc == '*' && c == '/')
802 				goto again;
803 			else if (c == '\n')
804 				yylval.lineno++;
805 			pc = c;
806 		}
807 	}
808 
809 	switch (c) {
810 	case '!':
811 	case '=':
812 		if (peek() == '=') {
813 			lgetc();
814 			return (c == '=') ? OP_EQ : OP_NE;
815 		}
816 		return c;
817 	case '<':
818 		if (peek() == '=') {
819 			lgetc();
820 			return OP_LE;
821 		}
822 		return OP_LT;
823 	case '>':
824 		if (peek() == '=') {
825 			lgetc();
826 			return OP_GE;
827 		}
828 		return OP_GT;
829 	case '&':
830 		if (peek() == '&') {
831 			lgetc();
832 			return OP_LAND;
833 		}
834 		return c;
835 	case '|':
836 		if (peek() == '|') {
837 			lgetc();
838 			return OP_LOR;
839 		}
840 		return c;
841 	case '/':
842 		if (peek() == '{' || peek() == '/' || peek() == '\n') {
843 			return ENDFILT;
844 		}
845 		/* FALLTHROUGH */
846 	case ',':
847 	case '(':
848 	case ')':
849 	case '{':
850 	case '}':
851 	case ':':
852 	case ';':
853 		return c;
854 	case EOF:
855 		return 0;
856 	case '"':
857 		/* parse C-like string */
858 		while ((c = lgetc()) != EOF && c != '"') {
859 			if (c == '\\') {
860 				c = lgetc();
861 				switch (c) {
862 				case '\\':	c = '\\';	break;
863 				case '\'':	c = '\'';	break;
864 				case '"':	c = '"';	break;
865 				case 'a':	c = '\a';	break;
866 				case 'b':	c = '\b';	break;
867 				case 'e':	c = 033;	break;
868 				case 'f':	c = '\f';	break;
869 				case 'n':	c = '\n';	break;
870 				case 'r':	c = '\r';	break;
871 				case 't':	c = '\t';	break;
872 				case 'v':	c = '\v';	break;
873 				default:
874 					yyerror("'%c' unsuported escape", c);
875 					return ERROR;
876 				}
877 			}
878 			*p++ = c;
879 			if (p == ebuf) {
880 				yyerror("too long line");
881 				return ERROR;
882 			}
883 		}
884 		if (c == EOF) {
885 			yyerror("\"%s\" invalid EOF", buf);
886 			return ERROR;
887 		}
888 		*p++ = '\0';
889 		if ((str = strdup(buf)) == NULL)
890 			err(1, "%s", __func__);
891 		yylval.v.string = str;
892 		return CSTRING;
893 	default:
894 		break;
895 	}
896 
897 #define allowed_to_end_number(x) \
898     (isspace(x) || x == ')' || x == '/' || x == '{' || x == ';' || x == ']' || x == ',')
899 
900 	/* parsing number */
901 	if (isdigit(c)) {
902 		do {
903 			*p++ = c;
904 			if (p == ebuf) {
905 				yyerror("too long line");
906 				return ERROR;
907 			}
908 		} while ((c = lgetc()) != EOF && isdigit(c));
909 		lungetc();
910 		if (c == EOF || allowed_to_end_number(c)) {
911 			const char *errstr = NULL;
912 
913 			*p = '\0';
914 			yylval.v.number = strtonum(buf, LONG_MIN, LONG_MAX,
915 			    &errstr);
916 			if (errstr) {
917 				yyerror("invalid number '%s' (%s)", buf,
918 				    errstr);
919 				return ERROR;
920 			}
921 			return NUMBER;
922 		} else {
923 			while (p > buf + 1) {
924 				--p;
925 				lungetc();
926 			}
927 			c = *--p;
928 		}
929 	}
930 
931 #define allowed_in_string(x) (isalnum(c) || c == '_')
932 
933 	/* parsing next word */
934 	if (allowed_in_string(c)) {
935 		struct keyword *kwp;
936 		do {
937 			*p++ = c;
938 			if (p == ebuf) {
939 				yyerror("too long line");
940 				return ERROR;
941 			}
942 		} while ((c = lgetc()) != EOF && (allowed_in_string(c)));
943 		lungetc();
944 		*p = '\0';
945 		kwp = lookup(buf);
946 		if (kwp == NULL) {
947 			if ((yylval.v.string = strdup(buf)) == NULL)
948 				err(1, "%s", __func__);
949 			return STRING;
950 		}
951 		if (pflag) {
952 			/*
953 			 * Probe lexer backdoor, interpret the token as a string
954 			 * rather than a keyword. Otherwise, reserved keywords
955 			 * would conflict with syscall names. The exception to
956 			 * this is 'hz', which hopefully will never be a
957 			 * syscall.
958 			 */
959 			if (kwp->token != HZ) {
960 				yylval.v.string = kwp->word;
961 				return STRING;
962 			}
963 		}
964 		yylval.v.i = kwp->type;
965 		return kwp->token;
966 	}
967 
968 	if (c == '\n') {
969 		yylval.lineno++;
970 		yylval.colno = 0;
971 	}
972 	if (c == EOF)
973 		return 0;
974 	return c;
975 }
976 
977 void
978 pprint_syntax_error(void)
979 {
980 	char line[BUFSIZ];
981 	int c, indent = yylval.colno;
982 	size_t i;
983 
984 	strlcpy(line, &pbuf[pindex - yylval.colno], sizeof(line));
985 
986 	for (i = 0; line[i] != '\0' && (c = line[i]) != '\n'; i++) {
987 		if (c == '\t')
988 			indent += (8 - 1);
989 		fputc(c, stderr);
990 	}
991 
992 	fprintf(stderr, "\n%*c\n", indent, '^');
993 }
994 
995 void
996 yyerror(const char *fmt, ...)
997 {
998 	const char *prefix;
999 	va_list	va;
1000 
1001 	prefix = (yylval.filename != NULL) ? yylval.filename : getprogname();
1002 
1003 	fprintf(stderr, "%s:%d:%d: ", prefix, yylval.lineno, yylval.colno);
1004 	va_start(va, fmt);
1005 	vfprintf(stderr, fmt, va);
1006 	va_end(va);
1007 	fprintf(stderr, ":\n");
1008 
1009 	pprint_syntax_error();
1010 
1011 	perrors++;
1012 }
1013 
1014 int
1015 btparse(const char *str, size_t len, const char *filename, int debug)
1016 {
1017 	if (debug > 0)
1018 		yydebug = 1;
1019 	pbuf = str;
1020 	plen = len;
1021 	pindex = 0;
1022 	yylval.filename = filename;
1023 	yylval.lineno = 1;
1024 
1025 	yyparse();
1026 	if (perrors)
1027 		return perrors;
1028 
1029 	assert(SLIST_EMPTY(&l_variables));
1030 
1031 	return 0;
1032 }
1033