xref: /openbsd/usr.bin/awk/awkgram.y (revision 4bdff4be)
1 /*	$OpenBSD: awkgram.y,v 1.16 2023/09/10 14:59:00 millert Exp $	*/
2 /****************************************************************
3 Copyright (C) Lucent Technologies 1997
4 All Rights Reserved
5 
6 Permission to use, copy, modify, and distribute this software and
7 its documentation for any purpose and without fee is hereby
8 granted, provided that the above copyright notice appear in all
9 copies and that both that the copyright notice and this
10 permission notice and warranty disclaimer appear in supporting
11 documentation, and that the name Lucent Technologies or any of
12 its entities not be used in advertising or publicity pertaining
13 to distribution of the software without specific, written prior
14 permission.
15 
16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23 THIS SOFTWARE.
24 ****************************************************************/
25 
26 %{
27 #include <stdio.h>
28 #include <string.h>
29 #include "awk.h"
30 
31 void checkdup(Node *list, Cell *item);
32 int yywrap(void) { return(1); }
33 
34 Node	*beginloc = 0;
35 Node	*endloc = 0;
36 bool	infunc	= false;	/* = true if in arglist or body of func */
37 int	inloop	= 0;	/* >= 1 if in while, for, do; can't be bool, since loops can next */
38 char	*curfname = 0;	/* current function name */
39 Node	*arglist = 0;	/* list of args for current function */
40 %}
41 
42 %union {
43 	Node	*p;
44 	Cell	*cp;
45 	int	i;
46 	char	*s;
47 }
48 
49 %token	<i>	FIRSTTOKEN	/* must be first */
50 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
51 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
52 %token	<i>	ARRAY
53 %token	<i>	MATCH NOTMATCH MATCHOP
54 %token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
55 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
56 %token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
57 %token	<i>	GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
58 %token	<i>	ADD MINUS MULT DIVIDE MOD
59 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
60 %token	<i>	PRINT PRINTF SPRINTF
61 %token	<p>	ELSE INTEST CONDEXPR
62 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
63 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
64 %token	<s>	REGEXPR
65 
66 %type	<p>	pas pattern ppattern plist pplist patlist prarg term re
67 %type	<p>	pa_pat pa_stat pa_stats
68 %type	<s>	reg_expr
69 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
70 %type	<p>	var varname funcname varlist
71 %type	<p>	for if else while
72 %type	<i>	do st
73 %type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
74 %type	<i>	subop print
75 %type	<cp>	string
76 
77 %right	ASGNOP
78 %right	'?'
79 %right	':'
80 %left	BOR
81 %left	AND
82 %left	GETLINE
83 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
84 %left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
85 %left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
86 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
87 %left	REGEXPR VAR VARNF IVAR WHILE '('
88 %left	CAT
89 %left	'+' '-'
90 %left	'*' '/' '%'
91 %left	NOT UMINUS UPLUS
92 %right	POWER
93 %right	DECR INCR
94 %left	INDIRECT
95 %token	LASTTOKEN	/* must be last */
96 
97 %%
98 
99 program:
100 	  pas	{ if (errorflag==0)
101 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
102 	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
103 	;
104 
105 and:
106 	  AND | and NL
107 	;
108 
109 bor:
110 	  BOR | bor NL
111 	;
112 
113 comma:
114 	  ',' | comma NL
115 	;
116 
117 do:
118 	  DO | do NL
119 	;
120 
121 else:
122 	  ELSE | else NL
123 	;
124 
125 for:
126 	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
127 		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
128 	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
129 		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
130 	| FOR '(' varname IN varname rparen {inloop++;} stmt
131 		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
132 	;
133 
134 funcname:
135 	  VAR	{ setfname($1); }
136 	| CALL	{ setfname($1); }
137 	;
138 
139 if:
140 	  IF '(' pattern rparen		{ $$ = notnull($3); }
141 	;
142 
143 lbrace:
144 	  '{' | lbrace NL
145 	;
146 
147 nl:
148 	  NL | nl NL
149 	;
150 
151 opt_nl:
152 	  /* empty */	{ $$ = 0; }
153 	| nl
154 	;
155 
156 opt_pst:
157 	  /* empty */	{ $$ = 0; }
158 	| pst
159 	;
160 
161 
162 opt_simple_stmt:
163 	  /* empty */			{ $$ = 0; }
164 	| simple_stmt
165 	;
166 
167 pas:
168 	  opt_pst			{ $$ = 0; }
169 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
170 	;
171 
172 pa_pat:
173 	  pattern	{ $$ = notnull($1); }
174 	;
175 
176 pa_stat:
177 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
178 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
179 	| pa_pat ',' opt_nl pa_pat		{ $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
180 	| pa_pat ',' opt_nl pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $4, $6); }
181 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
182 	| XBEGIN lbrace stmtlist '}'
183 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
184 	| XEND lbrace stmtlist '}'
185 		{ endloc = linkum(endloc, $3); $$ = 0; }
186 	| FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}'
187 		{ infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
188 	;
189 
190 pa_stats:
191 	  pa_stat
192 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
193 	;
194 
195 patlist:
196 	  pattern
197 	| patlist comma pattern		{ $$ = linkum($1, $3); }
198 	;
199 
200 ppattern:
201 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
202 	| ppattern '?' ppattern ':' ppattern %prec '?'
203 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
204 	| ppattern bor ppattern %prec BOR
205 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
206 	| ppattern and ppattern %prec AND
207 		{ $$ = op2(AND, notnull($1), notnull($3)); }
208 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); }
209 	| ppattern MATCHOP ppattern
210 		{ if (constnode($3)) {
211 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
212 			free($3);
213 		  } else
214 			$$ = op3($2, (Node *)1, $1, $3); }
215 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
216 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
217 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
218 	| re
219 	| term
220 	;
221 
222 pattern:
223 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
224 	| pattern '?' pattern ':' pattern %prec '?'
225 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
226 	| pattern bor pattern %prec BOR
227 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
228 	| pattern and pattern %prec AND
229 		{ $$ = op2(AND, notnull($1), notnull($3)); }
230 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
231 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
232 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
233 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
234 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
235 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
236 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); }
237 	| pattern MATCHOP pattern
238 		{ if (constnode($3)) {
239 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
240 			free($3);
241 		  } else
242 			$$ = op3($2, (Node *)1, $1, $3); }
243 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
244 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
245 	| pattern '|' GETLINE var	{
246 			if (safe) SYNTAX("cmd | getline is unsafe");
247 			else $$ = op3(GETLINE, $4, itonp($2), $1); }
248 	| pattern '|' GETLINE		{
249 			if (safe) SYNTAX("cmd | getline is unsafe");
250 			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
251 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
252 	| re
253 	| term
254 	;
255 
256 plist:
257 	  pattern comma pattern		{ $$ = linkum($1, $3); }
258 	| plist comma pattern		{ $$ = linkum($1, $3); }
259 	;
260 
261 pplist:
262 	  ppattern
263 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
264 	;
265 
266 prarg:
267 	  /* empty */			{ $$ = rectonode(); }
268 	| pplist
269 	| '(' plist ')'			{ $$ = $2; }
270 	;
271 
272 print:
273 	  PRINT | PRINTF
274 	;
275 
276 pst:
277 	  NL | ';' | pst NL | pst ';'
278 	;
279 
280 rbrace:
281 	  '}' | rbrace NL
282 	;
283 
284 re:
285 	   reg_expr
286 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); free($1); }
287 	| NOT re	{ $$ = op1(NOT, notnull($2)); }
288 	;
289 
290 reg_expr:
291 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
292 	;
293 
294 rparen:
295 	  ')' | rparen NL
296 	;
297 
298 simple_stmt:
299 	  print prarg '|' term		{
300 			if (safe) SYNTAX("print | is unsafe");
301 			else $$ = stat3($1, $2, itonp($3), $4); }
302 	| print prarg APPEND term	{
303 			if (safe) SYNTAX("print >> is unsafe");
304 			else $$ = stat3($1, $2, itonp($3), $4); }
305 	| print prarg GT term		{
306 			if (safe) SYNTAX("print > is unsafe");
307 			else $$ = stat3($1, $2, itonp($3), $4); }
308 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
309 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
310 	| DELETE varname		 { $$ = stat2(DELETE, makearr($2), 0); }
311 	| pattern			{ $$ = exptostat($1); }
312 	| error				{ yyclearin; SYNTAX("illegal statement"); }
313 	;
314 
315 st:
316 	  nl
317 	| ';' opt_nl
318 	;
319 
320 stmt:
321 	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
322 				  $$ = stat1(BREAK, NIL); }
323 	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
324 				  $$ = stat1(CONTINUE, NIL); }
325 	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
326 		{ $$ = stat2(DO, $3, notnull($7)); }
327 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
328 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
329 	| for
330 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
331 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
332 	| lbrace stmtlist rbrace { $$ = $2; }
333 	| NEXT st	{ if (infunc)
334 				SYNTAX("next is illegal inside a function");
335 			  $$ = stat1(NEXT, NIL); }
336 	| NEXTFILE st	{ if (infunc)
337 				SYNTAX("nextfile is illegal inside a function");
338 			  $$ = stat1(NEXTFILE, NIL); }
339 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
340 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
341 	| simple_stmt st
342 	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
343 	| ';' opt_nl		{ $$ = 0; }
344 	;
345 
346 stmtlist:
347 	  stmt
348 	| stmtlist stmt		{ $$ = linkum($1, $2); }
349 	;
350 
351 subop:
352 	  SUB | GSUB
353 	;
354 
355 string:
356 	  STRING
357 	| string STRING		{ $$ = catstr($1, $2); }
358 	;
359 
360 term:
361  	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
362  	| term '+' term			{ $$ = op2(ADD, $1, $3); }
363 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
364 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
365 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
366 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
367 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
368 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
369 	| '+' term %prec UMINUS		{ $$ = op1(UPLUS, $2); }
370 	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
371 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
372 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
373 	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
374 	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
375 	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
376 	| CLOSE term			{ $$ = op1(CLOSE, $2); }
377 	| DECR var			{ $$ = op1(PREDECR, $2); }
378 	| INCR var			{ $$ = op1(PREINCR, $2); }
379 	| var DECR			{ $$ = op1(POSTDECR, $1); }
380 	| var INCR			{ $$ = op1(POSTINCR, $1); }
381 	| GENSUB '(' reg_expr comma pattern comma pattern ')'
382 		{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
383 	| GENSUB '(' pattern comma pattern comma pattern ')'
384 		{ if (constnode($3)) {
385 			$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
386 			free($3);
387 		  } else
388 			$$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
389 		}
390 	| GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
391 		{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
392 	| GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
393 		{ if (constnode($3)) {
394 			$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
395 			free($3);
396 		  } else
397 			$$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
398 		}
399 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
400 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
401 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
402 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
403 	| INDEX '(' pattern comma pattern ')'
404 		{ $$ = op2(INDEX, $3, $5); }
405 	| INDEX '(' pattern comma reg_expr ')'
406 		{ SYNTAX("index() doesn't permit regular expressions");
407 		  $$ = op2(INDEX, $3, (Node*)$5); }
408 	| '(' pattern ')'		{ $$ = $2; }
409 	| MATCHFCN '(' pattern comma reg_expr ')'
410 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); free($5); }
411 	| MATCHFCN '(' pattern comma pattern ')'
412 		{ if (constnode($5)) {
413 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
414 			free($5);
415 		  } else
416 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
417 	| NUMBER			{ $$ = celltonode($1, CCON); }
418 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
419 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
420 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
421 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); free($7); }
422 	| SPLIT '(' pattern comma varname ')'
423 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
424 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
425 	| string	 		{ $$ = celltonode($1, CCON); }
426 	| subop '(' reg_expr comma pattern ')'
427 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); free($3); }
428 	| subop '(' pattern comma pattern ')'
429 		{ if (constnode($3)) {
430 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
431 			free($3);
432 		  } else
433 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
434 	| subop '(' reg_expr comma pattern comma var ')'
435 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); free($3); }
436 	| subop '(' pattern comma pattern comma var ')'
437 		{ if (constnode($3)) {
438 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
439 			free($3);
440 		  } else
441 			$$ = op4($1, (Node *)1, $3, $5, $7); }
442 	| SUBSTR '(' pattern comma pattern comma pattern ')'
443 		{ $$ = op3(SUBSTR, $3, $5, $7); }
444 	| SUBSTR '(' pattern comma pattern ')'
445 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
446 	| var
447 	;
448 
449 var:
450 	  varname
451 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
452 	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
453 	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
454 	;
455 
456 varlist:
457 	  /* nothing */		{ arglist = $$ = 0; }
458 	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
459 	| varlist comma VAR	{
460 			checkdup($1, $3);
461 			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
462 	;
463 
464 varname:
465 	  VAR			{ $$ = celltonode($1, CVAR); }
466 	| ARG 			{ $$ = op1(ARG, itonp($1)); }
467 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
468 	;
469 
470 
471 while:
472 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
473 	;
474 
475 %%
476 
477 void setfname(Cell *p)
478 {
479 	if (isarr(p))
480 		SYNTAX("%s is an array, not a function", p->nval);
481 	else if (isfcn(p))
482 		SYNTAX("you can't define function %s more than once", p->nval);
483 	curfname = p->nval;
484 }
485 
486 int constnode(Node *p)
487 {
488 	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
489 }
490 
491 char *strnode(Node *p)
492 {
493 	return ((Cell *)(p->narg[0]))->sval;
494 }
495 
496 Node *notnull(Node *n)
497 {
498 	switch (n->nobj) {
499 	case LE: case LT: case EQ: case NE: case GT: case GE:
500 	case BOR: case AND: case NOT:
501 		return n;
502 	default:
503 		return op2(NE, n, nullnode);
504 	}
505 }
506 
507 void checkdup(Node *vl, Cell *cp)	/* check if name already in list */
508 {
509 	char *s = cp->nval;
510 	for ( ; vl; vl = vl->nnext) {
511 		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
512 			SYNTAX("duplicate argument %s", s);
513 			break;
514 		}
515 	}
516 }
517