xref: /openbsd/usr.sbin/btrace/bt_parse.y (revision 139d07b5)
1 /*	$OpenBSD: bt_parse.y,v 1.62 2025/01/23 11:17:32 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2019-2023 Martin Pieuchot <mpi@openbsd.org>
5  * Copyright (c) 2019 Tobias Heider <tobhe@openbsd.org>
6  * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 /*
22  * B tracing language parser.
23  *
24  * The dialect of the language understood by this parser aims to be
25  * compatible with the one understood by bpftrace(8), see:
26  *
27  * https://github.com/iovisor/bpftrace/blob/master/docs/reference_guide.md
28  *
29  */
30 
31 %{
32 #include <sys/queue.h>
33 
34 #include <assert.h>
35 #include <ctype.h>
36 #include <err.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <stdarg.h>
40 #include <stdint.h>
41 #include <stdio.h>
42 
43 #include "bt_parser.h"
44 
45 /* Name for the default map @[], hopefully nobody will use this one ;) */
46 #define UNNAMED_MAP	"___unnamed_map_doesnt_have_any_name"
47 
48 /* Number of rules to evaluate. */
49 struct bt_ruleq		g_rules = TAILQ_HEAD_INITIALIZER(g_rules);
50 
51 /* Number of probes except BEGIN/END. */
52 int		 	g_nprobes;
53 
54 /* List of global variables, including maps. */
55 SLIST_HEAD(, bt_var)	 g_variables;
56 
57 /* List of local variables, cleaned for each new rule. */
58 SLIST_HEAD(, bt_var)	l_variables;
59 
60 struct bt_arg 		g_nullba = BA_INITIALIZER(0, B_AT_LONG);
61 struct bt_arg		g_maxba = BA_INITIALIZER(LONG_MAX, B_AT_LONG);
62 
63 struct bt_rule	*br_new(struct bt_probe *, struct bt_filter *,
64 		     struct bt_stmt *);
65 struct bt_probe	*bp_new(const char *, const char *, const char *, long);
66 struct bt_arg	*ba_append(struct bt_arg *, struct bt_arg *);
67 struct bt_arg	*ba_op(enum bt_argtype, struct bt_arg *, struct bt_arg *);
68 struct bt_stmt	*bs_new(enum bt_action, struct bt_arg *, struct bt_var *);
69 struct bt_stmt	*bs_append(struct bt_stmt *, struct bt_stmt *);
70 
71 struct bt_var	*bg_lookup(const char *);
72 struct bt_stmt	*bg_store(const char *, struct bt_arg *);
73 struct bt_arg	*bg_find(const char *);
74 struct bt_var	*bg_get(const char *);
75 
76 struct bt_arg	*bi_find(struct bt_arg *, unsigned long);
77 
78 struct bt_var	*bl_lookup(const char *);
79 struct bt_stmt	*bl_store(const char *, struct bt_arg *);
80 struct bt_arg	*bl_find(const char *);
81 
82 struct bt_arg	*bm_find(const char *, struct bt_arg *);
83 struct bt_stmt	*bm_insert(const char *, struct bt_arg *, struct bt_arg *);
84 struct bt_stmt	*bm_op(enum bt_action, struct bt_arg *, struct bt_arg *);
85 
86 struct bt_stmt	*bh_inc(const char *, struct bt_arg *, struct bt_arg *);
87 
88 /*
89  * Lexer
90  */
91 const char	*pbuf;
92 size_t		 plen;
93 size_t		 pindex;
94 int		 perrors = 0;
95 
96 typedef struct {
97 	union {
98 		long			 number;
99 		int			 i;
100 		const char		*string;
101 		struct bt_probe		*probe;
102 		struct bt_filter	*filter;
103 		struct bt_stmt		*stmt;
104 		struct bt_arg		*arg;
105 	} v;
106 	const char			*filename;
107 	int				 lineno;
108 	int				 colno;
109 } yystype;
110 #define YYSTYPE yystype
111 
112 static void	 yyerror(const char *, ...);
113 static int	 yylex(void);
114 
115 static int 	 pflag = 0;		/* probe parsing context flag */
116 static int 	 beflag = 0;		/* BEGIN/END parsing context flag */
117 %}
118 
119 %token	<v.i>		ERROR ENDFILT
120 %token	<v.i>		OP_EQ OP_NE OP_LE OP_LT OP_GE OP_GT OP_LAND OP_LOR
121 /* Builtins */
122 %token	<v.i>		BUILTIN BEGIN ELSE END IF STR
123 /* Functions and Map operators */
124 %token  <v.i>		F_DELETE F_PRINT
125 %token	<v.i>		MFUNC FUNC0 FUNC1 FUNCN OP1 OP2 OP4 MOP0 MOP1
126 %token	<v.string>	STRING CSTRING GVAR LVAR
127 %token	<v.arg>		PVAR PNUM
128 %token	<v.number>	NUMBER
129 
130 %type	<v.i>		beginend
131 %type	<v.probe>	plist probe pname
132 %type	<v.filter>	filter
133 %type	<v.stmt>	action stmt stmtblck stmtlist block
134 %type	<v.arg>		vargs mentry mpat pargs
135 %type	<v.arg>		expr term fterm variable factor func
136 %%
137 
138 grammar	: /* empty */
139 	| grammar '\n'
140 	| grammar rule
141 	| grammar error
142 	;
143 
144 rule	: plist filter action		{ br_new($1, $2, $3); beflag = 0; }
145 	;
146 
147 beginend: BEGIN	| END ;
148 
149 plist	: plist ',' probe		{ $$ = bp_append($1, $3); }
150 	| probe
151 	;
152 
153 probe	: { pflag = 1; } pname		{ $$ = $2; pflag = 0; }
154 	| { beflag = 1; } beginend	{ $$ = bp_new(NULL, NULL, NULL, $2); }
155 	;
156 
157 pname	: STRING ':' STRING ':' STRING	{ $$ = bp_new($1, $3, $5, 0); }
158 	| STRING ':' STRING ':' NUMBER	{ $$ = bp_new($1, $3, NULL, $5); }
159 	;
160 
161 mentry	: GVAR '[' vargs ']'		{ $$ = bm_find($1, $3); }
162 	;
163 
164 mpat	: MOP0 '(' ')'			{ $$ = ba_new(NULL, $1); }
165 	| MOP1 '(' expr ')'		{ $$ = ba_new($3, $1); }
166 	| expr
167 	;
168 
169 filter	: /* empty */			{ $$ = NULL; }
170 	| '/' expr ENDFILT		{ $$ = bc_new(NULL, B_AT_OP_NE, $2); }
171 	;
172 
173 /*
174  * Give higher precedence to:
175  *  1. && and ||
176  *  2. ==, !=, <<, <, >=, >, +, =, &, ^, |
177  *  3. *, /, %
178  */
179 expr	: expr OP_LAND term	{ $$ = ba_op(B_AT_OP_LAND, $1, $3); }
180 	| expr OP_LOR term	{ $$ = ba_op(B_AT_OP_LOR, $1, $3); }
181 	| term
182 	;
183 
184 term	: term OP_EQ fterm	{ $$ = ba_op(B_AT_OP_EQ, $1, $3); }
185 	| term OP_NE fterm	{ $$ = ba_op(B_AT_OP_NE, $1, $3); }
186 	| term OP_LE fterm	{ $$ = ba_op(B_AT_OP_LE, $1, $3); }
187 	| term OP_LT fterm	{ $$ = ba_op(B_AT_OP_LT, $1, $3); }
188 	| term OP_GE fterm	{ $$ = ba_op(B_AT_OP_GE, $1, $3); }
189 	| term OP_GT fterm	{ $$ = ba_op(B_AT_OP_GT, $1, $3); }
190 	| term '+' fterm	{ $$ = ba_op(B_AT_OP_PLUS, $1, $3); }
191 	| term '-' fterm	{ $$ = ba_op(B_AT_OP_MINUS, $1, $3); }
192 	| term '&' fterm	{ $$ = ba_op(B_AT_OP_BAND, $1, $3); }
193 	| term '^' fterm	{ $$ = ba_op(B_AT_OP_XOR, $1, $3); }
194 	| term '|' fterm	{ $$ = ba_op(B_AT_OP_BOR, $1, $3); }
195 	| fterm
196 	;
197 
198 fterm	: fterm '*' factor	{ $$ = ba_op(B_AT_OP_MULT, $1, $3); }
199 	| fterm '/' factor	{ $$ = ba_op(B_AT_OP_DIVIDE, $1, $3); }
200 	| fterm '%' factor	{ $$ = ba_op(B_AT_OP_MODULO, $1, $3); }
201 	| factor
202 	;
203 
204 variable: LVAR			{ $$ = bl_find($1); }
205 	| GVAR			{ $$ = bg_find($1); }
206 	| variable '.' NUMBER	{ $$ = bi_find($1, $3); }
207 	;
208 
209 factor : '(' expr ')'		{ $$ = $2; }
210 	| '(' vargs ',' expr ')'{ $$ = ba_new(ba_append($2, $4), B_AT_TUPLE); }
211 	| NUMBER		{ $$ = ba_new($1, B_AT_LONG); }
212 	| BUILTIN		{ $$ = ba_new(NULL, $1); }
213 	| CSTRING		{ $$ = ba_new($1, B_AT_STR); }
214 	| PVAR
215 	| PNUM
216 	| variable
217 	| mentry
218 	| func
219 	;
220 
221 func	: STR '(' PVAR ')'		{ $$ = ba_new($3, B_AT_FN_STR); }
222 	| STR '(' PVAR ',' expr ')'	{ $$ = ba_op(B_AT_FN_STR, $3, $5); }
223 	;
224 
225 vargs	: expr
226 	| vargs ',' expr		{ $$ = ba_append($1, $3); }
227 	;
228 
229 pargs	: expr
230 	| GVAR ',' expr			{ $$ = ba_append(bg_find($1), $3); }
231 	;
232 
233 NL	: /* empty */
234 	| '\n'
235 	;
236 
237 stmt	: ';' NL			{ $$ = NULL; }
238 	| GVAR '=' expr			{ $$ = bg_store($1, $3); }
239 	| LVAR '=' expr			{ $$ = bl_store($1, $3); }
240 	| GVAR '[' vargs ']' '=' mpat	{ $$ = bm_insert($1, $3, $6); }
241 	| FUNCN '(' vargs ')'		{ $$ = bs_new($1, $3, NULL); }
242 	| FUNC1 '(' expr ')'		{ $$ = bs_new($1, $3, NULL); }
243 	| MFUNC '(' variable ')'	{ $$ = bs_new($1, $3, NULL); }
244 	| FUNC0 '(' ')'			{ $$ = bs_new($1, NULL, NULL); }
245 	| F_DELETE '(' mentry ')'	{ $$ = bm_op($1, $3, NULL); }
246 	| F_PRINT '(' pargs ')'		{ $$ = bs_new($1, $3, NULL); }
247 	| GVAR '=' OP1 '(' expr ')'	{ $$ = bh_inc($1, $5, NULL); }
248 	| GVAR '=' OP4 '(' expr ',' vargs ')'	{ $$ = bh_inc($1, $5, $7); }
249 	;
250 
251 stmtblck: IF '(' expr ')' block			{ $$ = bt_new($3, $5, NULL); }
252 	| IF '(' expr ')' block ELSE block	{ $$ = bt_new($3, $5, $7); }
253 	| IF '(' expr ')' block ELSE stmtblck	{ $$ = bt_new($3, $5, $7); }
254 	;
255 
256 stmtlist: stmtlist stmtblck		{ $$ = bs_append($1, $2); }
257 	| stmtlist stmt			{ $$ = bs_append($1, $2); }
258 	| stmtblck
259 	| stmt
260 	;
261 
262 block	: action
263 	| stmt ';'
264 	;
265 
266 action	: '{' stmtlist '}'		{ $$ = $2; }
267 	| '{' '}'			{ $$ = NULL; }
268 	;
269 
270 %%
271 
272 struct bt_arg*
273 get_varg(int index)
274 {
275 	extern int nargs;
276 	extern char **vargs;
277 	const char *errstr = NULL;
278 	long val;
279 
280 	if (1 <= index && index <= nargs) {
281 		val = (long)strtonum(vargs[index-1], LONG_MIN, LONG_MAX,
282 		    &errstr);
283 		if (errstr == NULL)
284 			return ba_new(val, B_AT_LONG);
285 		return ba_new(vargs[index-1], B_AT_STR);
286 	}
287 
288 	return ba_new(0L, B_AT_NIL);
289 }
290 
291 struct bt_arg*
get_nargs(void)292 get_nargs(void)
293 {
294 	extern int nargs;
295 
296 	return ba_new((long) nargs, B_AT_LONG);
297 }
298 
299 /* Create a new rule, representing  "probe / filter / { action }" */
300 struct bt_rule *
br_new(struct bt_probe * probe,struct bt_filter * filter,struct bt_stmt * head)301 br_new(struct bt_probe *probe, struct bt_filter *filter, struct bt_stmt *head)
302 {
303 	struct bt_rule *br;
304 
305 	br = calloc(1, sizeof(*br));
306 	if (br == NULL)
307 		err(1, "bt_rule: calloc");
308 	/* SLIST_INSERT_HEAD() nullify the next pointer. */
309 	SLIST_FIRST(&br->br_probes) = probe;
310 	br->br_filter = filter;
311 	/* SLIST_INSERT_HEAD() nullify the next pointer. */
312 	SLIST_FIRST(&br->br_action) = head;
313 
314 	SLIST_FIRST(&br->br_variables) = SLIST_FIRST(&l_variables);
315 	SLIST_INIT(&l_variables);
316 
317 	do {
318 		if (probe->bp_type != B_PT_PROBE)
319 			continue;
320 		g_nprobes++;
321 	} while ((probe = SLIST_NEXT(probe, bp_next)) != NULL);
322 
323 	TAILQ_INSERT_TAIL(&g_rules, br, br_next);
324 
325 	return br;
326 }
327 
328 /* Create a new condition */
329 struct bt_filter *
bc_new(struct bt_arg * term,enum bt_argtype op,struct bt_arg * ba)330 bc_new(struct bt_arg *term, enum bt_argtype op, struct bt_arg *ba)
331 {
332 	struct bt_filter *bf;
333 
334 	bf = calloc(1, sizeof(*bf));
335 	if (bf == NULL)
336 		err(1, "bt_filter: calloc");
337 
338 	bf->bf_condition = bs_new(B_AC_TEST, ba_op(op, term, ba), NULL);
339 
340 	return bf;
341 }
342 
343 /* Create a new if/else test */
344 struct bt_stmt *
bt_new(struct bt_arg * ba,struct bt_stmt * condbs,struct bt_stmt * elsebs)345 bt_new(struct bt_arg *ba, struct bt_stmt *condbs, struct bt_stmt *elsebs)
346 {
347 	struct bt_arg *bop;
348 	struct bt_cond *bc;
349 
350 	bop = ba_op(B_AT_OP_NE, NULL, ba);
351 
352 	bc = calloc(1, sizeof(*bc));
353 	if (bc == NULL)
354 		err(1, "bt_cond: calloc");
355 	bc->bc_condbs = condbs;
356 	bc->bc_elsebs = elsebs;
357 
358 	return bs_new(B_AC_TEST, bop, (struct bt_var *)bc);
359 }
360 
361 /*
362  * interval and profile support the same units.
363  */
364 static uint64_t
bp_unit_to_nsec(const char * unit,long value)365 bp_unit_to_nsec(const char *unit, long value)
366 {
367 	static const struct {
368 		const char *name;
369 		enum { UNIT_HZ, UNIT_US, UNIT_MS, UNIT_S } id;
370 		long long max;
371 	} units[] = {
372 		{ .name = "hz", .id = UNIT_HZ, .max = 1000000LL },
373 		{ .name = "us", .id = UNIT_US, .max = LLONG_MAX / 1000 },
374 		{ .name = "ms", .id = UNIT_MS, .max = LLONG_MAX / 1000000 },
375 		{ .name = "s", .id = UNIT_S, .max = LLONG_MAX / 1000000000 },
376 	};
377 	size_t i;
378 
379 	for (i = 0; i < nitems(units); i++) {
380 		if (strcmp(units[i].name, unit) == 0) {
381 			if (value < 1)
382 				yyerror("Number is invalid: %ld", value);
383 			if (value > units[i].max)
384 				yyerror("Number is too large: %ld", value);
385 			switch (units[i].id) {
386 			case UNIT_HZ:
387 				return (1000000000LLU / value);
388 			case UNIT_US:
389 				return (value * 1000LLU);
390 			case UNIT_MS:
391 				return (value * 1000000LLU);
392 			case UNIT_S:
393 				return (value * 1000000000LLU);
394 			}
395 		}
396 	}
397 	yyerror("Invalid unit: %s", unit);
398 	return 0;
399 }
400 
401 /* Create a new probe */
402 struct bt_probe *
bp_new(const char * prov,const char * func,const char * name,long number)403 bp_new(const char *prov, const char *func, const char *name, long number)
404 {
405 	struct bt_probe *bp;
406 	enum bt_ptype ptype;
407 
408 	if (prov == NULL && func == NULL && name == NULL)
409 		ptype = number; /* BEGIN or END */
410 	else
411 		ptype = B_PT_PROBE;
412 
413 	bp = calloc(1, sizeof(*bp));
414 	if (bp == NULL)
415 		err(1, "bt_probe: calloc");
416 	bp->bp_prov = prov;
417 	bp->bp_func = func;
418 	bp->bp_name = name;
419 	if (ptype == B_PT_PROBE && name == NULL)
420 		bp->bp_nsecs = bp_unit_to_nsec(func, number);
421 	bp->bp_type = ptype;
422 
423 	return bp;
424 }
425 
426 /*
427  * Link two probes together, to build a probe list attached to
428  * a single action.
429  */
430 struct bt_probe *
bp_append(struct bt_probe * bp0,struct bt_probe * bp1)431 bp_append(struct bt_probe *bp0, struct bt_probe *bp1)
432 {
433 	struct bt_probe *bp = bp0;
434 
435 	assert(bp1 != NULL);
436 
437 	if (bp0 == NULL)
438 		return bp1;
439 
440 	while (SLIST_NEXT(bp, bp_next) != NULL)
441 		bp = SLIST_NEXT(bp, bp_next);
442 
443 	SLIST_INSERT_AFTER(bp, bp1, bp_next);
444 
445 	return bp0;
446 }
447 
448 /* Create a new argument */
449 struct bt_arg *
ba_new0(void * val,enum bt_argtype type)450 ba_new0(void *val, enum bt_argtype type)
451 {
452 	struct bt_arg *ba;
453 
454 	ba = calloc(1, sizeof(*ba));
455 	if (ba == NULL)
456 		err(1, "bt_arg: calloc");
457 	ba->ba_value = val;
458 	ba->ba_type = type;
459 
460 	return ba;
461 }
462 
463 /*
464  * Link two arguments together, to build an argument list used in
465  * operators, tuples and function calls.
466  */
467 struct bt_arg *
ba_append(struct bt_arg * da0,struct bt_arg * da1)468 ba_append(struct bt_arg *da0, struct bt_arg *da1)
469 {
470 	struct bt_arg *ba = da0;
471 
472 	assert(da1 != NULL);
473 
474 	if (da0 == NULL)
475 		return da1;
476 
477 	while (SLIST_NEXT(ba, ba_next) != NULL)
478 		ba = SLIST_NEXT(ba, ba_next);
479 
480 	SLIST_INSERT_AFTER(ba, da1, ba_next);
481 
482 	return da0;
483 }
484 
485 /* Create an operator argument */
486 struct bt_arg *
ba_op(enum bt_argtype op,struct bt_arg * da0,struct bt_arg * da1)487 ba_op(enum bt_argtype op, struct bt_arg *da0, struct bt_arg *da1)
488 {
489 	return ba_new(ba_append(da0, da1), op);
490 }
491 
492 /* Create a new statement: function call or assignment. */
493 struct bt_stmt *
bs_new(enum bt_action act,struct bt_arg * head,struct bt_var * var)494 bs_new(enum bt_action act, struct bt_arg *head, struct bt_var *var)
495 {
496 	struct bt_stmt *bs;
497 
498 	bs = calloc(1, sizeof(*bs));
499 	if (bs == NULL)
500 		err(1, "bt_stmt: calloc");
501 	bs->bs_act = act;
502 	bs->bs_var = var;
503 	/* SLIST_INSERT_HEAD() nullify the next pointer. */
504 	SLIST_FIRST(&bs->bs_args) = head;
505 
506 	return bs;
507 }
508 
509 /* Link two statements together, to build an 'action'. */
510 struct bt_stmt *
bs_append(struct bt_stmt * ds0,struct bt_stmt * ds1)511 bs_append(struct bt_stmt *ds0, struct bt_stmt *ds1)
512 {
513 	struct bt_stmt *bs = ds0;
514 
515 	if (ds0 == NULL)
516 		return ds1;
517 
518 	if (ds1 == NULL)
519 		return ds0;
520 
521 	while (SLIST_NEXT(bs, bs_next) != NULL)
522 		bs = SLIST_NEXT(bs, bs_next);
523 
524 	SLIST_INSERT_AFTER(bs, ds1, bs_next);
525 
526 	return ds0;
527 }
528 
529 const char *
bv_name(struct bt_var * bv)530 bv_name(struct bt_var *bv)
531 {
532 	if (strncmp(bv->bv_name, UNNAMED_MAP, strlen(UNNAMED_MAP)) == 0)
533 		return "";
534 	return bv->bv_name;
535 }
536 
537 /* Allocate a variable. */
538 struct bt_var *
bv_new(const char * vname)539 bv_new(const char *vname)
540 {
541 	struct bt_var *bv;
542 
543 	bv = calloc(1, sizeof(*bv));
544 	if (bv == NULL)
545 		err(1, "bt_var: calloc");
546 	bv->bv_name = vname;
547 
548 	return bv;
549 }
550 
551 /* Return the global variable corresponding to `vname'. */
552 struct bt_var *
bg_lookup(const char * vname)553 bg_lookup(const char *vname)
554 {
555 	struct bt_var *bv;
556 
557 	SLIST_FOREACH(bv, &g_variables, bv_next) {
558 		if (strcmp(vname, bv->bv_name) == 0)
559 			break;
560 	}
561 
562 	return bv;
563 }
564 
565 /* Find or allocate a global variable corresponding to `vname' */
566 struct bt_var *
bg_get(const char * vname)567 bg_get(const char *vname)
568 {
569 	struct bt_var *bv;
570 
571 	bv = bg_lookup(vname);
572 	if (bv == NULL) {
573 		bv = bv_new(vname);
574 		SLIST_INSERT_HEAD(&g_variables, bv, bv_next);
575 	}
576 
577 	return bv;
578 }
579 
580 /* Create an "argument" that points to an existing untyped variable. */
581 struct bt_arg *
bg_find(const char * vname)582 bg_find(const char *vname)
583 {
584 	return ba_new(bg_get(vname), B_AT_VAR);
585 }
586 
587 /* Create a 'store' statement to assign a value to a global variable. */
588 struct bt_stmt *
bg_store(const char * vname,struct bt_arg * vval)589 bg_store(const char *vname, struct bt_arg *vval)
590 {
591 	return bs_new(B_AC_STORE, vval, bg_get(vname));
592 }
593 
594 /* Return the local variable corresponding to `vname'. */
595 struct bt_var *
bl_lookup(const char * vname)596 bl_lookup(const char *vname)
597 {
598 	struct bt_var *bv;
599 
600 	SLIST_FOREACH(bv, &l_variables, bv_next) {
601 		if (strcmp(vname, bv->bv_name) == 0)
602 			break;
603 	}
604 
605 	return bv;
606 }
607 
608 /* Find or create a local variable corresponding to `vname' */
609 struct bt_arg *
bl_find(const char * vname)610 bl_find(const char *vname)
611 {
612 	struct bt_var *bv;
613 
614 	bv = bl_lookup(vname);
615 	if (bv == NULL) {
616 		bv = bv_new(vname);
617 		SLIST_INSERT_HEAD(&l_variables, bv, bv_next);
618 	}
619 
620 	return ba_new(bv, B_AT_VAR);
621 }
622 
623 /* Create a 'store' statement to assign a value to a local variable. */
624 struct bt_stmt *
bl_store(const char * vname,struct bt_arg * vval)625 bl_store(const char *vname, struct bt_arg *vval)
626 {
627 	struct bt_var *bv;
628 
629 	bv = bl_lookup(vname);
630 	if (bv == NULL) {
631 		bv = bv_new(vname);
632 		SLIST_INSERT_HEAD(&l_variables, bv, bv_next);
633 	}
634 
635 	return bs_new(B_AC_STORE, vval, bv);
636 }
637 
638 /* Create an argument that points to a tuple variable and a given index */
639 struct bt_arg *
bi_find(struct bt_arg * ba,unsigned long index)640 bi_find(struct bt_arg *ba, unsigned long index)
641 {
642 	struct bt_var *bv = ba->ba_value;
643 
644 	ba = ba_new(bv, B_AT_TMEMBER);
645 	ba->ba_key = (void *)index;
646 	return ba;
647 }
648 
649 struct bt_stmt *
bm_op(enum bt_action mact,struct bt_arg * ba,struct bt_arg * mval)650 bm_op(enum bt_action mact, struct bt_arg *ba, struct bt_arg *mval)
651 {
652 	return bs_new(mact, ba, (struct bt_var *)mval);
653 }
654 
655 /* Create a 'map store' statement to assign a value to a map entry. */
656 struct bt_stmt *
bm_insert(const char * mname,struct bt_arg * mkey,struct bt_arg * mval)657 bm_insert(const char *mname, struct bt_arg *mkey, struct bt_arg *mval)
658 {
659 	struct bt_arg *ba;
660 
661 	if (mkey->ba_type == B_AT_TUPLE)
662 		yyerror("tuple cannot be used as map key");
663 
664 	ba = ba_new(bg_get(mname), B_AT_MAP);
665 	ba->ba_key = mkey;
666 
667 	return bs_new(B_AC_INSERT, ba, (struct bt_var *)mval);
668 }
669 
670 /* Create an argument that points to a map variable and attach a key to it. */
671 struct bt_arg *
bm_find(const char * vname,struct bt_arg * mkey)672 bm_find(const char *vname, struct bt_arg *mkey)
673 {
674 	struct bt_arg *ba;
675 
676 	ba = ba_new(bg_get(vname), B_AT_MAP);
677 	ba->ba_key = mkey;
678 	return ba;
679 }
680 
681 /*
682  * Histograms implemented using associative arrays (maps).  In the case
683  * of linear histograms `ba_key' points to a list of (min, max, step)
684  * necessary to "bucketize" any value.
685  */
686 struct bt_stmt *
bh_inc(const char * hname,struct bt_arg * hval,struct bt_arg * hrange)687 bh_inc(const char *hname, struct bt_arg *hval, struct bt_arg *hrange)
688 {
689 	struct bt_arg *ba;
690 
691 	if (hrange == NULL) {
692 		/* Power-of-2 histogram */
693 	} else {
694 		long min = 0, max;
695 		int count = 0;
696 
697 		/* Linear histogram */
698 		for (ba = hrange; ba != NULL; ba = SLIST_NEXT(ba, ba_next)) {
699 			if (++count > 3)
700 				yyerror("too many arguments");
701 			if (ba->ba_type != B_AT_LONG)
702 				yyerror("type invalid");
703 
704 			switch (count) {
705 			case 1:
706 				min = (long)ba->ba_value;
707 				if (min >= 0)
708 					break;
709 				yyerror("negative minimum");
710 			case 2:
711 				max = (long)ba->ba_value;
712 				if (max > min)
713 					break;
714 				yyerror("maximum smaller than minimum (%d < %d)",
715 				    max,  min);
716 			case 3:
717 				break;
718 			default:
719 				assert(0);
720 			}
721 		}
722 		if (count < 3)
723 			yyerror("%d missing arguments", 3 - count);
724 	}
725 
726 	ba = ba_new(bg_get(hname), B_AT_HIST);
727 	ba->ba_key = hrange;
728 	return bs_new(B_AC_BUCKETIZE, ba, (struct bt_var *)hval);
729 }
730 
731 struct keyword {
732 	const char	*word;
733 	int		 token;
734 	int		 type;
735 };
736 
737 int
kw_cmp(const void * str,const void * xkw)738 kw_cmp(const void *str, const void *xkw)
739 {
740 	return (strcmp(str, ((const struct keyword *)xkw)->word));
741 }
742 
743 struct keyword *
lookup(char * s)744 lookup(char *s)
745 {
746 	static const struct keyword kws[] = {
747 		{ "BEGIN",	BEGIN,		B_PT_BEGIN },
748 		{ "END",	END,		B_PT_END },
749 		{ "arg0",	BUILTIN,	B_AT_BI_ARG0 },
750 		{ "arg1",	BUILTIN,	B_AT_BI_ARG1 },
751 		{ "arg2",	BUILTIN,	B_AT_BI_ARG2 },
752 		{ "arg3",	BUILTIN,	B_AT_BI_ARG3 },
753 		{ "arg4",	BUILTIN,	B_AT_BI_ARG4 },
754 		{ "arg5",	BUILTIN,	B_AT_BI_ARG5 },
755 		{ "arg6",	BUILTIN,	B_AT_BI_ARG6 },
756 		{ "arg7",	BUILTIN,	B_AT_BI_ARG7 },
757 		{ "arg8",	BUILTIN,	B_AT_BI_ARG8 },
758 		{ "arg9",	BUILTIN,	B_AT_BI_ARG9 },
759 		{ "clear",	MFUNC,		B_AC_CLEAR },
760 		{ "comm",	BUILTIN,	B_AT_BI_COMM },
761 		{ "count",	MOP0, 		B_AT_MF_COUNT },
762 		{ "cpu",	BUILTIN,	B_AT_BI_CPU },
763 		{ "delete",	F_DELETE,	B_AC_DELETE },
764 		{ "else",	ELSE,		0 },
765 		{ "exit",	FUNC0,		B_AC_EXIT },
766 		{ "hist",	OP1,		0 },
767 		{ "if",		IF,		0 },
768 		{ "kstack",	BUILTIN,	B_AT_BI_KSTACK },
769 		{ "lhist",	OP4,		0 },
770 		{ "max",	MOP1,		B_AT_MF_MAX },
771 		{ "min",	MOP1,		B_AT_MF_MIN },
772 		{ "nsecs",	BUILTIN,	B_AT_BI_NSECS },
773 		{ "pid",	BUILTIN,	B_AT_BI_PID },
774 		{ "print",	F_PRINT,	B_AC_PRINT },
775 		{ "printf",	FUNCN,		B_AC_PRINTF },
776 		{ "probe",	BUILTIN,	B_AT_BI_PROBE },
777 		{ "retval",	BUILTIN,	B_AT_BI_RETVAL },
778 		{ "str",	STR,		B_AT_FN_STR },
779 		{ "sum",	MOP1,		B_AT_MF_SUM },
780 		{ "tid",	BUILTIN,	B_AT_BI_TID },
781 		{ "time",	FUNC1,		B_AC_TIME },
782 		{ "ustack",	BUILTIN,	B_AT_BI_USTACK },
783 		{ "zero",	MFUNC,		B_AC_ZERO },
784 	};
785 
786 	return bsearch(s, kws, nitems(kws), sizeof(kws[0]), kw_cmp);
787 }
788 
789 int
peek(void)790 peek(void)
791 {
792 	if (pbuf != NULL) {
793 		if (pindex < plen)
794 			return pbuf[pindex];
795 	}
796 	return EOF;
797 }
798 
799 int
lgetc(void)800 lgetc(void)
801 {
802 	if (pbuf != NULL) {
803 		if (pindex < plen) {
804 			yylval.colno++;
805 			return pbuf[pindex++];
806 		}
807 	}
808 	return EOF;
809 }
810 
811 void
lungetc(void)812 lungetc(void)
813 {
814 	if (pbuf != NULL && pindex > 0) {
815 		yylval.colno--;
816 		pindex--;
817 	}
818 }
819 
820 static inline int
allowed_to_end_number(int x)821 allowed_to_end_number(int x)
822 {
823 	return (isspace(x) || x == ')' || x == '/' || x == '{' || x == ';' ||
824 	    x == ']' || x == ',' || x == '=');
825 }
826 
827 static inline int
allowed_in_string(int x)828 allowed_in_string(int x)
829 {
830 	return (isalnum(x) || x == '_');
831 }
832 
833 static int
skip(void)834 skip(void)
835 {
836 	int c;
837 
838 again:
839 	/* skip whitespaces */
840 	for (c = lgetc(); isspace(c); c = lgetc()) {
841 		if (c == '\n') {
842 			yylval.lineno++;
843 			yylval.colno = 0;
844 		}
845 	}
846 
847 	/* skip single line comments and shell magic */
848 	if ((c == '/' && peek() == '/') ||
849 	    (yylval.lineno == 1 && yylval.colno == 1 && c == '#' &&
850 	     peek() == '!')) {
851 		for (c = lgetc(); c != EOF; c = lgetc()) {
852 			if (c == '\n') {
853 				yylval.lineno++;
854 				yylval.colno = 0;
855 				goto again;
856 			}
857 		}
858 	}
859 
860 	/* skip multi line comments */
861 	if (c == '/' && peek() == '*') {
862 		int pc;
863 
864 		for (pc = 0, c = lgetc(); c != EOF; c = lgetc()) {
865 			if (pc == '*' && c == '/')
866 				goto again;
867 			else if (c == '\n')
868 				yylval.lineno++;
869 			pc = c;
870 		}
871 	}
872 
873 	return c;
874 }
875 
876 int
yylex(void)877 yylex(void)
878 {
879 	unsigned char	 buf[1024];
880 	unsigned char	*ebuf, *p, *str;
881 	int		 c;
882 
883 	ebuf = buf + sizeof(buf);
884 	p = buf;
885 
886 again:
887 	c = skip();
888 
889 	switch (c) {
890 	case '!':
891 	case '=':
892 		if (peek() == '=') {
893 			lgetc();
894 			return (c == '=') ? OP_EQ : OP_NE;
895 		}
896 		return c;
897 	case '<':
898 		if (peek() == '=') {
899 			lgetc();
900 			return OP_LE;
901 		}
902 		return OP_LT;
903 	case '>':
904 		if (peek() == '=') {
905 			lgetc();
906 			return OP_GE;
907 		}
908 		return OP_GT;
909 	case '&':
910 		if (peek() == '&') {
911 			lgetc();
912 			return OP_LAND;
913 		}
914 		return c;
915 	case '|':
916 		if (peek() == '|') {
917 			lgetc();
918 			return OP_LOR;
919 		}
920 		return c;
921 	case '/':
922 		while (isspace(peek())) {
923 			if (lgetc() == '\n') {
924 				yylval.lineno++;
925 				yylval.colno = 0;
926 			}
927 		}
928 		if (peek() == '{' || peek() == '/' || peek() == '\n')
929 			return ENDFILT;
930 		/* FALLTHROUGH */
931 	case ',':
932 	case '(':
933 	case ')':
934 	case '{':
935 	case '}':
936 	case ':':
937 	case ';':
938 		return c;
939 	case '$':
940 		c = lgetc();
941 		if (c == '#') {
942 			yylval.v.arg = get_nargs();
943 			return PNUM;
944 		} else if (isdigit(c)) {
945 			do {
946 				*p++ = c;
947 				if (p == ebuf) {
948 					yyerror("line too long");
949 					return ERROR;
950 				}
951 			} while ((c = lgetc()) != EOF && isdigit(c));
952 			lungetc();
953 			*p = '\0';
954 			if (c == EOF || allowed_to_end_number(c)) {
955 				const char *errstr = NULL;
956 				int num;
957 
958 				num = strtonum(buf, 1, INT_MAX, &errstr);
959 				if (errstr) {
960 					yyerror("'$%s' is %s", buf, errstr);
961 					return ERROR;
962 				}
963 
964 				yylval.v.arg = get_varg(num);
965 				return PVAR;
966 			}
967 		} else if (isalpha(c)) {
968 			do {
969 				*p++ = c;
970 				if (p == ebuf) {
971 					yyerror("line too long");
972 					return ERROR;
973 				}
974 			} while ((c = lgetc()) != EOF && allowed_in_string(c));
975 			lungetc();
976 			*p = '\0';
977 			if ((yylval.v.string = strdup(buf)) == NULL)
978 				err(1, "%s", __func__);
979 			return LVAR;
980 		}
981 		yyerror("'$%s%c' is an invalid variable name", buf, c);
982 		return ERROR;
983 		break;
984 	case '@':
985 		c = lgetc();
986 		/* check for unnamed map '@' */
987 		if (isalpha(c)) {
988 			do {
989 				*p++ = c;
990 				if (p == ebuf) {
991 					yyerror("line too long");
992 					return ERROR;
993 				}
994 			} while ((c = lgetc()) != EOF && allowed_in_string(c));
995 			lungetc();
996 			*p = '\0';
997 			if ((yylval.v.string = strdup(buf)) == NULL)
998 				err(1, "%s", __func__);
999 			return GVAR;
1000 		} else if (allowed_to_end_number(c) || c == '[') {
1001 			lungetc();
1002 			*p = '\0';
1003 			yylval.v.string = UNNAMED_MAP;
1004 			return GVAR;
1005 		}
1006 		yyerror("'@%s%c' is an invalid variable name", buf, c);
1007 		return ERROR;
1008 		break;
1009 	case EOF:
1010 		return 0;
1011 	case '"':
1012 		/* parse C-like string */
1013 		while ((c = lgetc()) != EOF) {
1014 			if (c == '"') {
1015 				/* handle multi-line strings */
1016 				c = skip();
1017 				if (c == '"')
1018 					continue;
1019 				else
1020 					lungetc();
1021 				break;
1022 			}
1023 			if (c == '\\') {
1024 				c = lgetc();
1025 				switch (c) {
1026 				case '\\':	c = '\\';	break;
1027 				case '\'':	c = '\'';	break;
1028 				case '"':	c = '"';	break;
1029 				case 'a':	c = '\a';	break;
1030 				case 'b':	c = '\b';	break;
1031 				case 'e':	c = 033;	break;
1032 				case 'f':	c = '\f';	break;
1033 				case 'n':	c = '\n';	break;
1034 				case 'r':	c = '\r';	break;
1035 				case 't':	c = '\t';	break;
1036 				case 'v':	c = '\v';	break;
1037 				default:
1038 					yyerror("'%c' unsupported escape", c);
1039 					return ERROR;
1040 				}
1041 			}
1042 			*p++ = c;
1043 			if (p == ebuf) {
1044 				yyerror("line too long");
1045 				return ERROR;
1046 			}
1047 		}
1048 		if (c == EOF) {
1049 			yyerror("\"%s\" invalid EOF", buf);
1050 			return ERROR;
1051 		}
1052 		*p++ = '\0';
1053 		if ((str = strdup(buf)) == NULL)
1054 			err(1, "%s", __func__);
1055 		yylval.v.string = str;
1056 		return CSTRING;
1057 	default:
1058 		break;
1059 	}
1060 
1061 	/* parsing number */
1062 	if (isdigit(c)) {
1063 		do {
1064 			*p++ = c;
1065 			if (p == ebuf) {
1066 				yyerror("line too long");
1067 				return ERROR;
1068 			}
1069 		} while ((c = lgetc()) != EOF &&
1070 		    (isxdigit(c) || c == 'x' || c == 'X'));
1071 		lungetc();
1072 		if (c == EOF || allowed_to_end_number(c)) {
1073 			*p = '\0';
1074 			errno = 0;
1075 			yylval.v.number = strtol(buf, NULL, 0);
1076 			if (errno == ERANGE) {
1077 				/*
1078 				 * Characters are already validated, so only
1079 				 * check ERANGE.
1080 				 */
1081 				yyerror("%sflow", (yylval.v.number == LONG_MIN)
1082 				    ? "under" : "over");
1083 				return ERROR;
1084 			}
1085 			return NUMBER;
1086 		} else {
1087 			while (p > buf + 1) {
1088 				--p;
1089 				lungetc();
1090 			}
1091 			c = *--p;
1092 		}
1093 	}
1094 
1095 	/* parsing next word */
1096 	if (allowed_in_string(c)) {
1097 		struct keyword *kwp;
1098 		do {
1099 			*p++ = c;
1100 			if (p == ebuf) {
1101 				yyerror("line too long");
1102 				return ERROR;
1103 			}
1104 		} while ((c = lgetc()) != EOF && (allowed_in_string(c)));
1105 		lungetc();
1106 		*p = '\0';
1107 		kwp = lookup(buf);
1108 		if (kwp == NULL) {
1109 			if ((yylval.v.string = strdup(buf)) == NULL)
1110 				err(1, "%s", __func__);
1111 			return STRING;
1112 		}
1113 		if (pflag) {
1114 			/*
1115 			 * Probe lexer backdoor, interpret the token as a string
1116 			 * rather than a keyword. Otherwise, reserved keywords
1117 			 * would conflict with syscall names.
1118 			 */
1119 			yylval.v.string = kwp->word;
1120 			return STRING;
1121 		} else if (beflag) {
1122 			/* Interpret tokens in a BEGIN/END context. */
1123 			if (kwp->type >= B_AT_BI_ARG0 &&
1124 			    kwp->type <= B_AT_BI_ARG9)
1125 				yyerror("the %s builtin cannot be used with "
1126 				    "BEGIN or END probes", kwp->word);
1127 		}
1128 		yylval.v.i = kwp->type;
1129 		return kwp->token;
1130 	}
1131 
1132 	if (c == '\n') {
1133 		yylval.lineno++;
1134 		yylval.colno = 0;
1135 	}
1136 	if (c == EOF)
1137 		return 0;
1138 	return c;
1139 }
1140 
1141 void
pprint_syntax_error(void)1142 pprint_syntax_error(void)
1143 {
1144 	char line[BUFSIZ];
1145 	int c, indent = yylval.colno;
1146 	size_t i;
1147 
1148 	strlcpy(line, &pbuf[pindex - yylval.colno], sizeof(line));
1149 
1150 	for (i = 0; line[i] != '\0' && (c = line[i]) != '\n'; i++) {
1151 		if (c == '\t')
1152 			indent += (8 - 1);
1153 		fputc(c, stderr);
1154 	}
1155 
1156 	fprintf(stderr, "\n%*c\n", indent, '^');
1157 }
1158 
1159 void
yyerror(const char * fmt,...)1160 yyerror(const char *fmt, ...)
1161 {
1162 	const char *prefix;
1163 	va_list	va;
1164 
1165 	prefix = (yylval.filename != NULL) ? yylval.filename : getprogname();
1166 
1167 	fprintf(stderr, "%s:%d:%d: ", prefix, yylval.lineno, yylval.colno);
1168 	va_start(va, fmt);
1169 	vfprintf(stderr, fmt, va);
1170 	va_end(va);
1171 	fprintf(stderr, ":\n");
1172 
1173 	pprint_syntax_error();
1174 
1175 	perrors++;
1176 }
1177 
1178 int
btparse(const char * str,size_t len,const char * filename,int debug)1179 btparse(const char *str, size_t len, const char *filename, int debug)
1180 {
1181 	if (debug > 0)
1182 		yydebug = 1;
1183 	pbuf = str;
1184 	plen = len;
1185 	pindex = 0;
1186 	yylval.filename = filename;
1187 	yylval.lineno = 1;
1188 
1189 	yyparse();
1190 	if (perrors)
1191 		return perrors;
1192 
1193 	assert(SLIST_EMPTY(&l_variables));
1194 
1195 	return 0;
1196 }
1197