xref: /openbsd/usr.sbin/btrace/bt_parse.y (revision 771fbea0)
1 /*	$OpenBSD: bt_parse.y,v 1.34 2021/04/22 11:53:13 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2019-2021 Martin Pieuchot <mpi@openbsd.org>
5  * Copyright (c) 2019 Tobias Heider <tobhe@openbsd.org>
6  * Copyright (c) 2015 Ted Unangst <tedu@openbsd.org>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 /*
22  * B tracing language parser.
23  *
24  * The dialect of the language understood by this parser aims to be
25  * compatible with the one understood by bpftrace(8), see:
26  *
27  * https://github.com/iovisor/bpftrace/blob/master/docs/reference_guide.md
28  *
29  */
30 
31 %{
32 #include <sys/queue.h>
33 
34 #include <assert.h>
35 #include <ctype.h>
36 #include <err.h>
37 #include <limits.h>
38 #include <stdarg.h>
39 #include <stdint.h>
40 #include <stdio.h>
41 
42 #include "bt_parser.h"
43 
44 /* Name for the default map @[], hopefully nobody will use this one ;) */
45 #define UNNAMED_MAP	"___unnamed_map_doesnt_have_any_name"
46 
47 /* Number of rules to evaluate. */
48 struct bt_ruleq		g_rules = TAILQ_HEAD_INITIALIZER(g_rules);
49 
50 /* Number of probes except BEGIN/END. */
51 int		 	g_nprobes;
52 
53 /* List of global variables, including maps. */
54 SLIST_HEAD(, bt_var)	 g_variables;
55 
56 /* List of local variables, cleaned for each new rule. */
57 SLIST_HEAD(, bt_var)	l_variables;
58 
59 struct bt_rule	*br_new(struct bt_probe *, struct bt_filter *, struct bt_stmt *,
60 		     enum bt_rtype);
61 struct bt_probe	*bp_new(const char *, const char *, const char *, int32_t);
62 struct bt_arg	*ba_append(struct bt_arg *, struct bt_arg *);
63 struct bt_arg	*ba_op(enum bt_argtype, struct bt_arg *, struct bt_arg *);
64 struct bt_stmt	*bs_new(enum bt_action, struct bt_arg *, struct bt_var *);
65 struct bt_stmt	*bs_append(struct bt_stmt *, struct bt_stmt *);
66 
67 struct bt_var	*bg_lookup(const char *);
68 struct bt_stmt	*bg_store(const char *, struct bt_arg *);
69 struct bt_arg	*bg_find(const char *);
70 struct bt_var	*bg_get(const char *);
71 
72 struct bt_var	*bl_lookup(const char *);
73 struct bt_stmt	*bl_store(const char *, struct bt_arg *);
74 struct bt_arg	*bl_find(const char *);
75 
76 struct bt_arg	*bm_find(const char *, struct bt_arg *);
77 struct bt_stmt	*bm_insert(const char *, struct bt_arg *, struct bt_arg *);
78 struct bt_stmt	*bm_op(enum bt_action, struct bt_arg *, struct bt_arg *);
79 
80 struct bt_stmt	*bh_inc(const char *, struct bt_arg *, struct bt_arg *);
81 
82 /*
83  * Lexer
84  */
85 const char	*pbuf;
86 size_t		 plen;
87 size_t		 pindex;
88 int		 perrors = 0;
89 
90 typedef struct {
91 	union {
92 		long			 number;
93 		int			 i;
94 		const char		*string;
95 		struct bt_probe		*probe;
96 		struct bt_filter	*filter;
97 		struct bt_stmt		*stmt;
98 		struct bt_arg		*arg;
99 	} v;
100 	const char			*filename;
101 	int				 lineno;
102 	int				 colno;
103 } yystype;
104 #define YYSTYPE yystype
105 
106 static void	 yyerror(const char *, ...);
107 static int	 yylex(void);
108 
109 static int pflag;
110 %}
111 
112 %token	<v.i>		ERROR ENDFILT OP_EQ OP_NE OP_LE OP_GE OP_LAND OP_LOR
113 /* Builtins */
114 %token	<v.i>		BUILTIN BEGIN END HZ
115 /* Functions and Map operators */
116 %token  <v.i>		F_DELETE F_PRINT FUNC0 FUNC1 FUNCN OP1 OP4 MOP0 MOP1
117 %token	<v.string>	STRING CSTRING
118 %token	<v.number>	NUMBER
119 
120 %type	<v.string>	gvar lvar
121 %type	<v.number>	staticv
122 %type	<v.i>		beginend
123 %type	<v.probe>	probe pname
124 %type	<v.filter>	filter
125 %type	<v.stmt>	action stmt stmtlist
126 %type	<v.arg>		expr vargs mentry mexpr pargs term
127 
128 %right	'='
129 %nonassoc OP_EQ OP_NE OP_LE OP_GE OP_LAND OP_LOR
130 %left	'&' '|'
131 %left	'+' '-'
132 %left	'/' '*'
133 %%
134 
135 grammar	: /* empty */
136 	| grammar '\n'
137 	| grammar rule
138 	| grammar error
139 	;
140 
141 rule	: beginend action		{ br_new(NULL, NULL, $2, $1); }
142 	| probe filter action		{ br_new($1, $2, $3, B_RT_PROBE); }
143 	;
144 
145 beginend: BEGIN	| END ;
146 
147 probe	: { pflag = 1; } pname		{ $$ = $2; pflag = 0; }
148 
149 pname	: STRING ':' STRING ':' STRING	{ $$ = bp_new($1, $3, $5, 0); }
150 	| STRING ':' HZ ':' NUMBER	{ $$ = bp_new($1, "hz", NULL, $5); }
151 	;
152 
153 staticv	: NUMBER
154 	| '$' NUMBER			{ $$ = get_varg($2); }
155 	;
156 
157 gvar	: '@' STRING			{ $$ = $2; }
158 	| '@'				{ $$ = UNNAMED_MAP; }
159 	;
160 
161 lvar	: '$' STRING			{ $$ = $2; }
162 	;
163 
164 mentry	: gvar '[' vargs ']'		{ $$ = bm_find($1, $3); }
165 	;
166 
167 mexpr	: MOP0 '(' ')'			{ $$ = ba_new(NULL, $1); }
168 	| MOP1 '(' expr ')'		{ $$ = ba_new($3, $1); }
169 	| expr
170 	;
171 
172 expr	: CSTRING			{ $$ = ba_new($1, B_AT_STR); }
173 	| term
174 	;
175 
176 filter	: /* empty */			{ $$ = NULL; }
177 	| '/' term ENDFILT		{ $$ = bc_new(NULL, B_AT_OP_NE, $2); }
178 	;
179 
180 term	: '(' term ')'			{ $$ = $2; }
181 	| term OP_EQ term		{ $$ = ba_op(B_AT_OP_EQ, $1, $3); }
182 	| term OP_NE term		{ $$ = ba_op(B_AT_OP_NE, $1, $3); }
183 	| term OP_LE term		{ $$ = ba_op(B_AT_OP_LE, $1, $3); }
184 	| term OP_GE term		{ $$ = ba_op(B_AT_OP_GE, $1, $3); }
185 	| term OP_LAND term		{ $$ = ba_op(B_AT_OP_LAND, $1, $3); }
186 	| term OP_LOR term		{ $$ = ba_op(B_AT_OP_LOR, $1, $3); }
187 	| term '+' term			{ $$ = ba_op(B_AT_OP_PLUS, $1, $3); }
188 	| term '-' term			{ $$ = ba_op(B_AT_OP_MINUS, $1, $3); }
189 	| term '*' term			{ $$ = ba_op(B_AT_OP_MULT, $1, $3); }
190 	| term '/' term			{ $$ = ba_op(B_AT_OP_DIVIDE, $1, $3); }
191 	| term '&' term			{ $$ = ba_op(B_AT_OP_BAND, $1, $3); }
192 	| term '|' term			{ $$ = ba_op(B_AT_OP_BOR, $1, $3); }
193 	| staticv			{ $$ = ba_new($1, B_AT_LONG); }
194 	| BUILTIN			{ $$ = ba_new(NULL, $1); }
195 	| lvar				{ $$ = bl_find($1); }
196 	| gvar				{ $$ = bg_find($1); }
197 	| mentry
198 	;
199 
200 
201 vargs	: expr
202 	| vargs ',' expr	{ $$ = ba_append($1, $3); }
203 	;
204 
205 pargs	: term
206 	| gvar ',' expr		{ $$ = ba_append(bg_find($1), $3); }
207 	;
208 
209 NL	: /* empty */ | '\n'
210 		;
211 
212 stmt	: ';' NL			{ $$ = NULL; }
213 	| gvar '=' expr			{ $$ = bg_store($1, $3); }
214 	| lvar '=' expr			{ $$ = bl_store($1, $3); }
215 	| gvar '[' vargs ']' '=' mexpr	{ $$ = bm_insert($1, $3, $6); }
216 	| FUNCN '(' vargs ')'		{ $$ = bs_new($1, $3, NULL); }
217 	| FUNC1 '(' expr ')'		{ $$ = bs_new($1, $3, NULL); }
218 	| FUNC0 '(' ')'			{ $$ = bs_new($1, NULL, NULL); }
219 	| F_DELETE '(' mentry ')'	{ $$ = bm_op($1, $3, NULL); }
220 	| F_PRINT '(' pargs ')'		{ $$ = bs_new($1, $3, NULL); }
221 	| gvar '=' OP1 '(' expr ')'	{ $$ = bh_inc($1, $5, NULL); }
222 	| gvar '=' OP4 '(' expr ',' vargs ')'	{ $$ = bh_inc($1, $5, $7); }
223 	;
224 
225 stmtlist: stmt
226 	| stmtlist stmt			{ $$ = bs_append($1, $2); }
227 	;
228 
229 action	: '{' stmtlist '}'		{ $$ = $2; }
230 	;
231 
232 %%
233 
234 int
235 get_varg(int index)
236 {
237 	extern int vargs[];
238 
239 	assert(index == 1);
240 
241 	return vargs[index - 1];
242 }
243 
244 /* Create a new rule, representing  "probe / filter / { action }" */
245 struct bt_rule *
246 br_new(struct bt_probe *probe, struct bt_filter *filter, struct bt_stmt *head,
247     enum bt_rtype rtype)
248 {
249 	struct bt_rule *br;
250 
251 	br = calloc(1, sizeof(*br));
252 	if (br == NULL)
253 		err(1, "bt_rule: calloc");
254 	br->br_probe = probe;
255 	br->br_filter = filter;
256 	/* SLIST_INSERT_HEAD() nullify the next pointer. */
257 	SLIST_FIRST(&br->br_action) = head;
258 	br->br_type = rtype;
259 
260 	SLIST_FIRST(&br->br_variables) = SLIST_FIRST(&l_variables);
261 	SLIST_INIT(&l_variables);
262 
263 	if (rtype == B_RT_PROBE) {
264 		g_nprobes++;
265 		TAILQ_INSERT_TAIL(&g_rules, br, br_next);
266 	} else {
267 		TAILQ_INSERT_HEAD(&g_rules, br, br_next);
268 	}
269 
270 	return br;
271 }
272 
273 /* Create a new condition */
274 struct bt_filter *
275 bc_new(struct bt_arg *term, enum bt_argtype op, struct bt_arg *ba)
276 {
277 	struct bt_filter *bf;
278 
279 	bf = calloc(1, sizeof(*bf));
280 	if (bf == NULL)
281 		err(1, "bt_filter: calloc");
282 
283 	bf->bf_condition = bs_new(B_AC_TEST, ba_op(op, term, ba), NULL);
284 
285 	return bf;
286 }
287 
288 /* Create a new probe */
289 struct bt_probe *
290 bp_new(const char *prov, const char *func, const char *name, int32_t rate)
291 {
292 	struct bt_probe *bp;
293 
294 	if (rate < 0 || rate > INT32_MAX)
295 		errx(1, "only positive values permitted");
296 
297 	bp = calloc(1, sizeof(*bp));
298 	if (bp == NULL)
299 		err(1, "bt_probe: calloc");
300 	bp->bp_prov = prov;
301 	bp->bp_func = func;
302 	bp->bp_name = name;
303 	bp->bp_rate = rate;
304 
305 	return bp;
306 }
307 
308 /* Create a new argument */
309 struct bt_arg *
310 ba_new0(void *val, enum bt_argtype type)
311 {
312 	struct bt_arg *ba;
313 
314 	ba = calloc(1, sizeof(*ba));
315 	if (ba == NULL)
316 		err(1, "bt_arg: calloc");
317 	ba->ba_value = val;
318 	ba->ba_type = type;
319 
320 	return ba;
321 }
322 
323 /*
324  * Link two arguments together, to build an argument list used in
325  * function calls.
326  */
327 struct bt_arg *
328 ba_append(struct bt_arg *da0, struct bt_arg *da1)
329 {
330 	struct bt_arg *ba = da0;
331 
332 	assert(da1 != NULL);
333 
334 	if (da0 == NULL)
335 		return da1;
336 
337 	while (SLIST_NEXT(ba, ba_next) != NULL)
338 		ba = SLIST_NEXT(ba, ba_next);
339 
340 	SLIST_INSERT_AFTER(ba, da1, ba_next);
341 
342 	return da0;
343 }
344 
345 /* Create an operator argument */
346 struct bt_arg *
347 ba_op(enum bt_argtype op, struct bt_arg *da0, struct bt_arg *da1)
348 {
349 	return ba_new(ba_append(da0, da1), op);
350 }
351 
352 /* Create a new statement: function call or assignment. */
353 struct bt_stmt *
354 bs_new(enum bt_action act, struct bt_arg *head, struct bt_var *var)
355 {
356 	struct bt_stmt *bs;
357 
358 	bs = calloc(1, sizeof(*bs));
359 	if (bs == NULL)
360 		err(1, "bt_stmt: calloc");
361 	bs->bs_act = act;
362 	bs->bs_var = var;
363 	/* SLIST_INSERT_HEAD() nullify the next pointer. */
364 	SLIST_FIRST(&bs->bs_args) = head;
365 
366 	return bs;
367 }
368 
369 /* Link two statements together, to build an 'action'. */
370 struct bt_stmt *
371 bs_append(struct bt_stmt *ds0, struct bt_stmt *ds1)
372 {
373 	struct bt_stmt *bs = ds0;
374 
375 	if (ds0 == NULL)
376 		return ds1;
377 
378 	if (ds1 == NULL)
379 		return ds0;
380 
381 	while (SLIST_NEXT(bs, bs_next) != NULL)
382 		bs = SLIST_NEXT(bs, bs_next);
383 
384 	SLIST_INSERT_AFTER(bs, ds1, bs_next);
385 
386 	return ds0;
387 }
388 
389 const char *
390 bv_name(struct bt_var *bv)
391 {
392 	if (strncmp(bv->bv_name, UNNAMED_MAP, strlen(UNNAMED_MAP)) == 0)
393 		return "";
394 	return bv->bv_name;
395 }
396 
397 /* Allocate a variable. */
398 struct bt_var *
399 bv_new(const char *vname)
400 {
401 	struct bt_var *bv;
402 
403 	bv = calloc(1, sizeof(*bv));
404 	if (bv == NULL)
405 		err(1, "bt_var: calloc");
406 	bv->bv_name = vname;
407 
408 	return bv;
409 }
410 
411 /* Return the global variable corresponding to `vname'. */
412 struct bt_var *
413 bg_lookup(const char *vname)
414 {
415 	struct bt_var *bv;
416 
417 	SLIST_FOREACH(bv, &g_variables, bv_next) {
418 		if (strcmp(vname, bv->bv_name) == 0)
419 			break;
420 	}
421 
422 	return bv;
423 }
424 
425 /* Find or allocate a global variable corresponding to `vname' */
426 struct bt_var *
427 bg_get(const char *vname)
428 {
429 	struct bt_var *bv;
430 
431 	bv = bg_lookup(vname);
432 	if (bv == NULL) {
433 		bv = bv_new(vname);
434 		SLIST_INSERT_HEAD(&g_variables, bv, bv_next);
435 	}
436 
437 	return bv;
438 }
439 
440 /* Create an "argument" that points to an existing untyped variable. */
441 struct bt_arg *
442 bg_find(const char *vname)
443 {
444 	struct bt_var *bv;
445 
446 	bv = bg_lookup(vname);
447 	if (bv == NULL)
448 		yyerror("variable '%s' accessed before being set", vname);
449 
450 	return ba_new(bv, B_AT_VAR);
451 }
452 
453 /* Create a 'store' statement to assign a value to a global variable. */
454 struct bt_stmt *
455 bg_store(const char *vname, struct bt_arg *vval)
456 {
457 	return bs_new(B_AC_STORE, vval, bg_get(vname));
458 }
459 
460 /* Return the local variable corresponding to `vname'. */
461 struct bt_var *
462 bl_lookup(const char *vname)
463 {
464 	struct bt_var *bv;
465 
466 	SLIST_FOREACH(bv, &l_variables, bv_next) {
467 		if (strcmp(vname, bv->bv_name) == 0)
468 			break;
469 	}
470 
471 	return bv;
472 }
473 
474 /* Find or create a local variable corresponding to `vname' */
475 struct bt_arg *
476 bl_find(const char *vname)
477 {
478 	struct bt_var *bv;
479 
480 	bv = bl_lookup(vname);
481 	if (bv == NULL) {
482 		bv = bv_new(vname);
483 		SLIST_INSERT_HEAD(&l_variables, bv, bv_next);
484 	}
485 
486 	return ba_new(bv, B_AT_VAR);
487 }
488 
489 /* Create a 'store' statement to assign a value to a local variable. */
490 struct bt_stmt *
491 bl_store(const char *vname, struct bt_arg *vval)
492 {
493 	struct bt_var *bv;
494 
495 	bv = bl_lookup(vname);
496 	if (bv == NULL) {
497 		bv = bv_new(vname);
498 		SLIST_INSERT_HEAD(&l_variables, bv, bv_next);
499 	}
500 
501 	return bs_new(B_AC_STORE, vval, bv);
502 }
503 
504 struct bt_stmt *
505 bm_op(enum bt_action mact, struct bt_arg *ba, struct bt_arg *mval)
506 {
507 	return bs_new(mact, ba, (struct bt_var *)mval);
508 }
509 
510 /* Create a 'map store' statement to assign a value to a map entry. */
511 struct bt_stmt *
512 bm_insert(const char *mname, struct bt_arg *mkey, struct bt_arg *mval)
513 {
514 	struct bt_arg *ba;
515 
516 	ba = ba_new(bg_get(mname), B_AT_MAP);
517 	ba->ba_key = mkey;
518 
519 	return bs_new(B_AC_INSERT, ba, (struct bt_var *)mval);
520 }
521 
522 /* Create an argument that points to a variable and attach a key to it. */
523 struct bt_arg *
524 bm_find(const char *vname, struct bt_arg *mkey)
525 {
526 	struct bt_var *bv;
527 	struct bt_arg *ba;
528 
529 	bv = bg_lookup(vname);
530 	if (bv == NULL)
531 		yyerror("variable '%s' accessed before being set", vname);
532 
533 	ba = ba_new(bv, B_AT_MAP);
534 	ba->ba_key = mkey;
535 	return ba;
536 }
537 
538 /*
539  * Histograms implemented using associative arrays (maps).  In the case
540  * of linear histograms `ba_key' points to a list of (min, max, step)
541  * necessary to "bucketize" any value.
542  */
543 struct bt_stmt *
544 bh_inc(const char *hname, struct bt_arg *hval, struct bt_arg *hrange)
545 {
546 	struct bt_arg *ba;
547 
548 	if (hrange == NULL) {
549 		/* Power-of-2 histogram */
550 	} else {
551 		long min = 0, max;
552 		int count = 0;
553 
554 		/* Linear histogram */
555 		for (ba = hrange; ba != NULL; ba = SLIST_NEXT(ba, ba_next)) {
556 			if (++count > 3)
557 				yyerror("too many arguments");
558 			if (ba->ba_type != B_AT_LONG)
559 				yyerror("type invalid");
560 
561 			switch (count) {
562 			case 1:
563 				min = (long)ba->ba_value;
564 				if (min >= 0)
565 					break;
566 				yyerror("negative minium");
567 			case 2:
568 				max = (long)ba->ba_value;
569 				if (max > min)
570 					break;
571 				yyerror("maximum smaller than minium (%d < %d)",
572 				    max,  min);
573 			case 3:
574 				break;
575 			default:
576 				assert(0);
577 			}
578 		}
579 		if (count < 3)
580 			yyerror("%d missing arguments", 3 - count);
581 	}
582 
583 	ba = ba_new(bg_get(hname), B_AT_HIST);
584 	ba->ba_key = hrange;
585 	return bs_new(B_AC_BUCKETIZE, ba, (struct bt_var *)hval);
586 }
587 
588 struct keyword {
589 	const char	*word;
590 	int		 token;
591 	int		 type;
592 };
593 
594 int
595 kw_cmp(const void *str, const void *xkw)
596 {
597 	return (strcmp(str, ((const struct keyword *)xkw)->word));
598 }
599 
600 struct keyword *
601 lookup(char *s)
602 {
603 	static const struct keyword kws[] = {
604 		{ "BEGIN",	BEGIN,		B_RT_BEGIN },
605 		{ "END",	END,		B_RT_END },
606 		{ "arg0",	BUILTIN,	B_AT_BI_ARG0 },
607 		{ "arg1",	BUILTIN,	B_AT_BI_ARG1 },
608 		{ "arg2",	BUILTIN,	B_AT_BI_ARG2 },
609 		{ "arg3",	BUILTIN,	B_AT_BI_ARG3 },
610 		{ "arg4",	BUILTIN,	B_AT_BI_ARG4 },
611 		{ "arg5",	BUILTIN,	B_AT_BI_ARG5 },
612 		{ "arg6",	BUILTIN,	B_AT_BI_ARG6 },
613 		{ "arg7",	BUILTIN,	B_AT_BI_ARG7 },
614 		{ "arg8",	BUILTIN,	B_AT_BI_ARG8 },
615 		{ "arg9",	BUILTIN,	B_AT_BI_ARG9 },
616 		{ "clear",	FUNC1,		B_AC_CLEAR },
617 		{ "comm",	BUILTIN,	B_AT_BI_COMM },
618 		{ "count",	MOP0, 		B_AT_MF_COUNT },
619 		{ "cpu",	BUILTIN,	B_AT_BI_CPU },
620 		{ "delete",	F_DELETE,	B_AC_DELETE },
621 		{ "exit",	FUNC0,		B_AC_EXIT },
622 		{ "hist",	OP1,		0 },
623 		{ "hz",		HZ,		0 },
624 		{ "kstack",	BUILTIN,	B_AT_BI_KSTACK },
625 		{ "lhist",	OP4,		0 },
626 		{ "max",	MOP1,		B_AT_MF_MAX },
627 		{ "min",	MOP1,		B_AT_MF_MIN },
628 		{ "nsecs",	BUILTIN,	B_AT_BI_NSECS },
629 		{ "pid",	BUILTIN,	B_AT_BI_PID },
630 		{ "print",	F_PRINT,	B_AC_PRINT },
631 		{ "printf",	FUNCN,		B_AC_PRINTF },
632 		{ "retval",	BUILTIN,	B_AT_BI_RETVAL },
633 		{ "sum",	MOP1,		B_AT_MF_SUM },
634 		{ "tid",	BUILTIN,	B_AT_BI_TID },
635 		{ "time",	FUNC1,		B_AC_TIME },
636 		{ "ustack",	BUILTIN,	B_AT_BI_USTACK },
637 		{ "zero",	FUNC1,		B_AC_ZERO },
638 	};
639 
640 	return bsearch(s, kws, nitems(kws), sizeof(kws[0]), kw_cmp);
641 }
642 
643 int
644 peek(void)
645 {
646 	if (pbuf != NULL) {
647 		if (pindex < plen)
648 			return pbuf[pindex];
649 	}
650 	return EOF;
651 }
652 
653 int
654 lgetc(void)
655 {
656 	if (pbuf != NULL) {
657 		if (pindex < plen) {
658 			yylval.colno++;
659 			return pbuf[pindex++];
660 		}
661 	}
662 	return EOF;
663 }
664 
665 void
666 lungetc(void)
667 {
668 	if (pbuf != NULL && pindex > 0) {
669 		yylval.colno--;
670 		pindex--;
671 	}
672 }
673 
674 int
675 yylex(void)
676 {
677 	unsigned char	 buf[1024];
678 	unsigned char	*ebuf, *p, *str;
679 	int		 c;
680 
681 	ebuf = buf + sizeof(buf);
682 	p = buf;
683 
684 again:
685 	/* skip whitespaces */
686 	for (c = lgetc(); isspace(c); c = lgetc()) {
687 		if (c == '\n') {
688 			yylval.lineno++;
689 			yylval.colno = 0;
690 		}
691 	}
692 
693 	/* skip single line comments and shell magic */
694 	if ((c == '/' && peek() == '/') ||
695 	    (yylval.lineno == 1 && yylval.colno == 1 && c == '#' &&
696 	     peek() == '!')) {
697 		for (c = lgetc(); c != EOF; c = lgetc()) {
698 			if (c == '\n') {
699 				yylval.lineno++;
700 				yylval.colno = 0;
701 				goto again;
702 			}
703 		}
704 	}
705 
706 	/* skip multi line comments */
707 	if (c == '/' && peek() == '*') {
708 		int pc;
709 
710 		for (pc = 0, c = lgetc(); c != EOF; c = lgetc()) {
711 			if (pc == '*' && c == '/')
712 				goto again;
713 			else if (c == '\n')
714 				yylval.lineno++;
715 			pc = c;
716 		}
717 	}
718 
719 	switch (c) {
720 	case '!':
721 	case '=':
722 		if (peek() == '=') {
723 			lgetc();
724 			return (c == '=') ? OP_EQ : OP_NE;
725 		}
726 	case '&':
727 		if (peek() == '&') {
728 			lgetc();
729 			return OP_LAND;
730 		}
731 	case '|':
732 		if (peek() == '|') {
733 			lgetc();
734 			return OP_LOR;
735 		}
736 	case '/':
737 		if (peek() == '{' || peek() == '/' || peek() == '\n') {
738 			return ENDFILT;
739 		}
740 		/* FALLTHROUGH */
741 	case ',':
742 	case '(':
743 	case ')':
744 	case '{':
745 	case '}':
746 	case ':':
747 	case ';':
748 		return c;
749 	case EOF:
750 		return 0;
751 	case '"':
752 		/* parse C-like string */
753 		while ((c = lgetc()) != EOF && c != '"') {
754 			if (c == '\\') {
755 				c = lgetc();
756 				switch (c) {
757 				case '\\':	c = '\\';	break;
758 				case '\'':	c = '\'';	break;
759 				case '"':	c = '"';	break;
760 				case 'a':	c = '\a';	break;
761 				case 'b':	c = '\b';	break;
762 				case 'e':	c = 033;	break;
763 				case 'f':	c = '\f';	break;
764 				case 'n':	c = '\n';	break;
765 				case 'r':	c = '\r';	break;
766 				case 't':	c = '\t';	break;
767 				case 'v':	c = '\v';	break;
768 				default:
769 					yyerror("'%c' unsuported escape", c);
770 					return ERROR;
771 				}
772 			}
773 			*p++ = c;
774 			if (p == ebuf) {
775 				yyerror("too long line");
776 				return ERROR;
777 			}
778 		}
779 		if (c == EOF) {
780 			yyerror("\"%s\" invalid EOF", buf);
781 			return ERROR;
782 		}
783 		*p++ = '\0';
784 		if ((str = strdup(buf)) == NULL)
785 			err(1, "%s", __func__);
786 		yylval.v.string = str;
787 		return CSTRING;
788 	default:
789 		break;
790 	}
791 
792 #define allowed_to_end_number(x) \
793     (isspace(x) || x == ')' || x == '/' || x == '{' || x == ';' || x == ']' || x == ',')
794 
795 	/* parsing number */
796 	if (isdigit(c)) {
797 		do {
798 			*p++ = c;
799 			if (p == ebuf) {
800 				yyerror("too long line");
801 				return ERROR;
802 			}
803 		} while ((c = lgetc()) != EOF && isdigit(c));
804 		lungetc();
805 		if (c == EOF || allowed_to_end_number(c)) {
806 			const char *errstr = NULL;
807 
808 			*p = '\0';
809 			yylval.v.number = strtonum(buf, LONG_MIN, LONG_MAX,
810 			    &errstr);
811 			if (errstr) {
812 				yyerror("invalid number '%s' (%s)", buf,
813 				    errstr);
814 				return ERROR;
815 			}
816 			return NUMBER;
817 		} else {
818 			while (p > buf + 1) {
819 				--p;
820 				lungetc();
821 			}
822 			c = *--p;
823 		}
824 	}
825 
826 #define allowed_in_string(x) (isalnum(c) || c == '_')
827 
828 	/* parsing next word */
829 	if (allowed_in_string(c)) {
830 		struct keyword *kwp;
831 		do {
832 			*p++ = c;
833 			if (p == ebuf) {
834 				yyerror("too long line");
835 				return ERROR;
836 			}
837 		} while ((c = lgetc()) != EOF && (allowed_in_string(c)));
838 		lungetc();
839 		*p = '\0';
840 		kwp = lookup(buf);
841 		if (kwp == NULL) {
842 			if ((yylval.v.string = strdup(buf)) == NULL)
843 				err(1, "%s", __func__);
844 			return STRING;
845 		}
846 		if (pflag) {
847 			/*
848 			 * Probe lexer backdoor, interpret the token as a string
849 			 * rather than a keyword. Otherwise, reserved keywords
850 			 * would conflict with syscall names. The exception to
851 			 * this is 'hz', which hopefully will never be a
852 			 * syscall.
853 			 */
854 			if (kwp->token != HZ) {
855 				yylval.v.string = kwp->word;
856 				return STRING;
857 			}
858 		}
859 		yylval.v.i = kwp->type;
860 		return kwp->token;
861 	}
862 
863 	if (c == '\n') {
864 		yylval.lineno++;
865 		yylval.colno = 0;
866 	}
867 	if (c == EOF)
868 		return 0;
869 	return c;
870 }
871 
872 void
873 pprint_syntax_error(void)
874 {
875 	char line[BUFSIZ];
876 	int c, indent = yylval.colno;
877 	size_t i;
878 
879 	strlcpy(line, &pbuf[pindex - yylval.colno], sizeof(line));
880 
881 	for (i = 0; line[i] != '\0' && (c = line[i]) != '\n'; i++) {
882 		if (c == '\t')
883 			indent += (8 - 1);
884 		fputc(c, stderr);
885 	}
886 
887 	fprintf(stderr, "\n%*c\n", indent, '^');
888 }
889 
890 void
891 yyerror(const char *fmt, ...)
892 {
893 	const char *prefix;
894 	va_list	va;
895 
896 	prefix = (yylval.filename != NULL) ? yylval.filename : getprogname();
897 
898 	fprintf(stderr, "%s:%d:%d: ", prefix, yylval.lineno, yylval.colno);
899 	va_start(va, fmt);
900 	vfprintf(stderr, fmt, va);
901 	va_end(va);
902 	fprintf(stderr, ":\n");
903 
904 	pprint_syntax_error();
905 
906 	perrors++;
907 }
908 
909 int
910 btparse(const char *str, size_t len, const char *filename, int debug)
911 {
912 	if (debug > 0)
913 		yydebug = 1;
914 	pbuf = str;
915 	plen = len;
916 	pindex = 0;
917 	yylval.filename = filename;
918 	yylval.lineno = 1;
919 
920 	yyparse();
921 
922 	assert(SLIST_EMPTY(&l_variables));
923 
924 	return perrors;
925 }
926