1 /*
2    Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002.
3 
4    Sccsid @(#)awk.g.y	1.9 (gritter) 5/14/06>
5  */
6 /* UNIX(R) Regular Expression Tools
7 
8    Copyright (C) 2001 Caldera International, Inc.
9 
10    This program is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2 of the License, or
13    (at your option) any later version.
14 
15    This program is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19 
20    You should have received a copy of the GNU General Public License
21    along with this program; if not, write to:
22        Free Software Foundation, Inc.
23        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 */
25 /*		copyright	"%c%" 	*/
26 
27 /*	from RCS Header: awk.g.y 1.2 91/06/25 	*/
28 
29 %{
30 #include "awk.h"
31 #include <unistd.h>
32 #include <inttypes.h>
33 #include <pfmt.h>
yywrap(void)34 int yywrap(void) { return(1); }
35 #ifndef	DEBUG
36 #	define	PUTS(x)
37 #endif
38 Node	*beginloc = 0, *endloc = 0;
39 int	infunc	= 0;	/* = 1 if in arglist or body of func */
40 unsigned char	*curfname = 0;
41 Node	*arglist = 0;	/* list of args for current function */
42 static void setfname(Cell *);
43 static int constnode(Node *);
44 static unsigned char *strnode(Node *);
45 static Node *notnull(Node *);
46 extern	const char illstat[];
47 
48 extern int	yylex(void);
49 %}
50 
51 %union {
52 	Node	*p;
53 	Cell	*cp;
54 	intptr_t	i;
55 	unsigned char	*s;
56 }
57 
58 %token	<i>	FIRSTTOKEN	/* must be first */
59 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
60 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
61 %token	<i>	ARRAY
62 %token	<i>	MATCH NOTMATCH MATCHOP
63 %token	<i>	FINAL DOT ALL CCL NCCL CHAR MCHAR OR STAR QUEST PLUS
64 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
65 %token	<i>	ARG BLTIN BREAK CONTINUE DELETE DO EXIT FOR FUNC
66 %token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT
67 %token	<i>	ADD MINUS MULT DIVIDE MOD
68 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
69 %token	<i>	PRINT PRINTF SPRINTF
70 %token	<p>	ELSE INTEST CONDEXPR
71 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
72 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING FIELD
73 %token	<s>	REGEXPR
74 
75 %type	<p>	pas pattern ppattern plist pplist patlist prarg term re
76 %type	<p>	pa_pat pa_stat pa_stats
77 %type	<s>	reg_expr
78 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
79 %type	<p>	var varname funcname varlist
80 %type	<p>	for if while
81 %type	<i>	pst opt_pst lbrace rparen comma nl opt_nl and bor
82 %type	<i>	subop print
83 
84 %right	ASGNOP
85 %right	'?'
86 %right	':'
87 %left	BOR
88 %left	AND
89 %left	GETLINE
90 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
91 %left	ARG BLTIN BREAK CALL CONTINUE DELETE DO EXIT FOR FIELD FUNC
92 %left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
93 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
94 %left	REGEXPR VAR VARNF IVAR WHILE '('
95 %left	CAT
96 %left	'+' '-'
97 %left	'*' '/' '%'
98 %left	NOT UMINUS
99 %right	POWER
100 %right	DECR INCR
101 %left	INDIRECT
102 %token	LASTTOKEN	/* must be last */
103 
104 %%
105 
106 program:
107 	  pas	{ if (errorflag==0)
108 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
109 	| error	{ yyclearin; bracecheck(); vyyerror(":95:Bailing out"); }
110 	;
111 
112 and:
113 	  AND | and NL
114 	;
115 
116 bor:
117 	  BOR | bor NL
118 	;
119 
120 comma:
121 	  ',' | comma NL
122 	;
123 
124 do:
125 	  DO { } | do NL
126 	;
127 
128 else:
129 	  ELSE { } | else NL
130 	;
131 
132 for:
133 	  FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt
134 		{ $$ = stat4(FOR, $3, notnull($5), $7, $9); }
135 	| FOR '(' opt_simple_stmt ';'  ';' opt_simple_stmt rparen stmt
136 		{ $$ = stat4(FOR, $3, NIL, $6, $8); }
137 	| FOR '(' varname IN varname rparen stmt
138 		{ $$ = stat3(IN, $3, makearr($5), $7); }
139 	;
140 
141 funcname:
142 	  VAR	{ setfname($1); }
143 	| CALL	{ setfname($1); }
144 	;
145 
146 if:
147 	  IF '(' pattern rparen		{ $$ = notnull($3); }
148 	;
149 
150 lbrace:
151 	  '{' | lbrace NL
152 	;
153 
154 nl:
155 	  NL | nl NL
156 	;
157 
158 opt_nl:
159 	  /* empty */	{ $$ = 0; }
160 	| nl
161 	;
162 
163 opt_pst:
164 	  /* empty */	{ $$ = 0; }
165 	| pst
166 	;
167 
168 
169 opt_simple_stmt:
170 	  /* empty */			{ $$ = 0; }
171 	| simple_stmt
172 	;
173 
174 pas:
175 	  opt_pst			{ $$ = 0; }
176 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
177 	;
178 
179 pa_pat:
180 	  pattern	{ $$ = notnull($1); }
181 	;
182 
183 pa_stat:
184 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
185 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
186 	| pa_pat ',' pa_pat		{ $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
187 	| pa_pat ',' pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $3, $5); }
188 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
189 	| XBEGIN lbrace stmtlist '}'
190 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
191 	| XEND lbrace stmtlist '}'
192 		{ endloc = linkum(endloc, $3); $$ = 0; }
193 	| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
194 		{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
195 	;
196 
197 pa_stats:
198 	  pa_stat
199 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
200 	;
201 
202 patlist:
203 	  pattern
204 	| patlist comma pattern	{ $$ = linkum($1, $3); }
205 	;
206 
207 ppattern:
208 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
209 	| ppattern '?' ppattern ':' ppattern %prec '?'
210 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
211 	| ppattern bor ppattern %prec BOR
212 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
213 	| ppattern and ppattern %prec AND
214 		{ $$ = op2(AND, notnull($1), notnull($3)); }
215 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
216 	| ppattern MATCHOP ppattern
217 		{ if (constnode($3))
218 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
219 		  else
220 			$$ = op3($2, (Node *)1, $1, $3); }
221 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
222 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
223 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
224 	| re
225 	| term
226 	;
227 
228 pattern:
229 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
230 	| pattern '?' pattern ':' pattern %prec '?'
231 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
232 	| pattern bor pattern %prec BOR
233 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
234 	| pattern and pattern %prec AND
235 		{ $$ = op2(AND, notnull($1), notnull($3)); }
236 	| NOT pattern
237 		{ $$ = op1(NOT, op2(NE,$2,valtonode(lookup("$zero&null",symtab),CCON))); }
238 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
239 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
240 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
241 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
242 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
243 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
244 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
245 	| pattern MATCHOP pattern
246 		{ if (constnode($3))
247 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
248 		  else
249 			$$ = op3($2, (Node *)1, $1, $3); }
250 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
251 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
252 	| pattern '|' GETLINE var	{ $$ = op3(GETLINE, $4, (Node*)$2, $1); }
253 	| pattern '|' GETLINE		{ $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); }
254 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
255 	| re
256 	| term
257 	;
258 
259 plist:
260 	  pattern comma pattern		{ $$ = linkum($1, $3); }
261 	| plist comma pattern		{ $$ = linkum($1, $3); }
262 	;
263 
264 pplist:
265 	  ppattern
266 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
267 	;
268 
269 prarg:
270 	  /* empty */			{ $$ = rectonode(); }
271 	| pplist
272 	| '(' plist ')'			{ $$ = $2; }
273 	;
274 
275 print:
276 	  PRINT | PRINTF
277 	;
278 
279 pst:
280 	  NL | ';' | pst NL | pst ';'
281 	;
282 
283 rbrace:
284 	  '}' { } | rbrace NL
285 	;
286 
287 re:
288 	 reg_expr
289 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1,0)); }
290 	| NOT re	{$$ = op1(NOT, notnull($2)); }
291 	;
292 
293 reg_expr:
294 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
295 	;
296 
297 rparen:
298 	  ')' | rparen NL
299 	;
300 
301 simple_stmt:
302 	  print prarg '|' term		{ $$ = stat3($1, $2, (Node *) $3, $4); }
303 	| print prarg APPEND term	{ $$ = stat3($1, $2, (Node *) $3, $4); }
304 	| print prarg GT term		{ $$ = stat3($1, $2, (Node *) $3, $4); }
305 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
306 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
307 	| DELETE varname		{ yyclearin; vyyerror(":96:You can only delete array[element]"); $$ = stat1(DELETE, $2); }
308 	| pattern			{ $$ = exptostat($1); }
309 	| error				{ yyclearin; vyyerror(illstat); }
310 	;
311 
312 st:
313 	  nl { } | ';' opt_nl { }
314 	;
315 
316 stmt:
317 	  BREAK st		{ $$ = stat1(BREAK, NIL); }
318 	| CONTINUE st		{ $$ = stat1(CONTINUE, NIL); }
319 	| do stmt WHILE '(' pattern ')' st
320 		{ $$ = stat2(DO, $2, notnull($5)); }
321 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
322 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
323 	| for
324 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
325 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
326 	| lbrace stmtlist rbrace { $$ = $2; }
327 	| NEXT st	{ if (infunc)
328 				vyyerror(":97:Next is illegal inside a function");
329 			  $$ = stat1(NEXT, NIL); }
330 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
331 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
332 	| simple_stmt st
333 	| while stmt		{ $$ = stat2(WHILE, $1, $2); }
334 	| ';' opt_nl		{ $$ = 0; }
335 	;
336 
337 stmtlist:
338 	  stmt
339 	| stmtlist stmt		{ $$ = linkum($1, $2); }
340 	;
341 
342 subop:
343 	  SUB | GSUB
344 	;
345 
346 term:
347 	  term '+' term			{ $$ = op2(ADD, $1, $3); }
348 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
349 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
350 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
351 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
352 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
353 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
354 	| '+' term %prec UMINUS		{ $$ = $2; }
355 	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
356 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, (Node *) $1, rectonode()); }
357 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, (Node *) $1, $3); }
358 	| BLTIN				{ $$ = op2(BLTIN, (Node *) $1, rectonode()); }
359 	| CALL '(' ')'			{ $$ = op2(CALL, valtonode($1,CVAR), NIL); }
360 	| CALL '(' patlist ')'		{ $$ = op2(CALL, valtonode($1,CVAR), $3); }
361 	| DECR var			{ $$ = op1(PREDECR, $2); }
362 	| INCR var			{ $$ = op1(PREINCR, $2); }
363 	| var DECR			{ $$ = op1(POSTDECR, $1); }
364 	| var INCR			{ $$ = op1(POSTINCR, $1); }
365 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, (Node *)$3, $4); }
366 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, (Node *)$2, $3); }
367 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
368 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
369 	| INDEX '(' pattern comma pattern ')'
370 		{ $$ = op2(INDEX, $3, $5); }
371 	| INDEX '(' pattern comma reg_expr ')'
372 		{ vyyerror(":98:Index() doesn't permit regular expressions");
373 		  $$ = op2(INDEX, $3, (Node*)$5); }
374 	| '(' pattern ')'		{ $$ = $2; }
375 	| MATCHFCN '(' pattern comma reg_expr ')'
376 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
377 	| MATCHFCN '(' pattern comma pattern ')'
378 		{ if (constnode($5))
379 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
380 		  else
381 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
382 	| NUMBER			{ $$ = valtonode($1, CCON); }
383 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
384 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
385 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
386 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
387 	| SPLIT '(' pattern comma varname ')'
388 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
389 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
390 	| STRING	 		{ $$ = valtonode($1, CCON); }
391 	| subop '(' reg_expr comma pattern ')'
392 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
393 	| subop '(' pattern comma pattern ')'
394 		{ if (constnode($3))
395 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
396 		  else
397 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
398 	| subop '(' reg_expr comma pattern comma var ')'
399 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
400 	| subop '(' pattern comma pattern comma var ')'
401 		{ if (constnode($3))
402 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
403 		  else
404 			$$ = op4($1, (Node *)1, $3, $5, $7); }
405 	| SUBSTR '(' pattern comma pattern comma pattern ')'
406 		{ $$ = op3(SUBSTR, $3, $5, $7); }
407 	| SUBSTR '(' pattern comma pattern ')'
408 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
409 	| var
410 	;
411 
412 var:
413 	  varname
414 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
415 	| FIELD				{ $$ = valtonode($1, CFLD); }
416 	| IVAR				{ $$ = op1(INDIRECT, valtonode($1, CVAR)); }
417 	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
418 	;
419 
420 varlist:
421 	  /* nothing */		{ arglist = $$ = 0; }
422 	| VAR			{ arglist = $$ = valtonode($1,CVAR); }
423 	| varlist comma VAR	{ arglist = $$ = linkum($1,valtonode($3,CVAR)); }
424 	;
425 
426 varname:
427 	  VAR			{ $$ = valtonode($1, CVAR); }
428 	| ARG 			{ $$ = op1(ARG, (Node *) $1); }
429 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
430 	;
431 
432 
433 while:
434 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
435 	;
436 
437 %%
438 
439 static void
440 setfname(Cell *p)
441 {
442 	if (isarr(p))
443 		vyyerror(":99:%s is an array, not a function", p->nval);
444 	else if (isfunc(p))
445 		vyyerror(":100:You cannot define function %s more than once", p->nval);
446 	curfname = p->nval;
447 }
448 
449 static int
450 constnode(Node *p)
451 {
452 	return p->ntype == NVALUE && ((Cell *) (p->narg[0]))->csub == CCON;
453 }
454 
455 static unsigned char *strnode(Node *p)
456 {
457 	return ((Cell *)(p->narg[0]))->sval;
458 }
459 
460 static Node *notnull(Node *n)
461 {
462 	switch (n->nobj) {
463 	case LE: case LT: case EQ: case NE: case GT: case GE:
464 	case BOR: case AND: case NOT:
465 		return n;
466 	default:
467 		return op2(NE, n, nullnode);
468 	}
469 }
470