1 /*
2 Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002.
3
4 Sccsid @(#)awk.g.y 1.9 (gritter) 5/14/06>
5 */
6 /* UNIX(R) Regular Expression Tools
7
8 Copyright (C) 2001 Caldera International, Inc.
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
14
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to:
22 Free Software Foundation, Inc.
23 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25 /* copyright "%c%" */
26
27 /* from RCS Header: awk.g.y 1.2 91/06/25 */
28
29 %{
30 #include "awk.h"
31 #include <unistd.h>
32 #include <inttypes.h>
33 #include <pfmt.h>
yywrap(void)34 int yywrap(void) { return(1); }
35 #ifndef DEBUG
36 # define PUTS(x)
37 #endif
38 Node *beginloc = 0, *endloc = 0;
39 int infunc = 0; /* = 1 if in arglist or body of func */
40 unsigned char *curfname = 0;
41 Node *arglist = 0; /* list of args for current function */
42 static void setfname(Cell *);
43 static int constnode(Node *);
44 static unsigned char *strnode(Node *);
45 static Node *notnull(Node *);
46 extern const char illstat[];
47
48 extern int yylex(void);
49 %}
50
51 %union {
52 Node *p;
53 Cell *cp;
54 intptr_t i;
55 unsigned char *s;
56 }
57
58 %token <i> FIRSTTOKEN /* must be first */
59 %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
60 %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
61 %token <i> ARRAY
62 %token <i> MATCH NOTMATCH MATCHOP
63 %token <i> FINAL DOT ALL CCL NCCL CHAR MCHAR OR STAR QUEST PLUS
64 %token <i> AND BOR APPEND EQ GE GT LE LT NE IN
65 %token <i> ARG BLTIN BREAK CONTINUE DELETE DO EXIT FOR FUNC
66 %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT
67 %token <i> ADD MINUS MULT DIVIDE MOD
68 %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
69 %token <i> PRINT PRINTF SPRINTF
70 %token <p> ELSE INTEST CONDEXPR
71 %token <i> POSTINCR PREINCR POSTDECR PREDECR
72 %token <cp> VAR IVAR VARNF CALL NUMBER STRING FIELD
73 %token <s> REGEXPR
74
75 %type <p> pas pattern ppattern plist pplist patlist prarg term re
76 %type <p> pa_pat pa_stat pa_stats
77 %type <s> reg_expr
78 %type <p> simple_stmt opt_simple_stmt stmt stmtlist
79 %type <p> var varname funcname varlist
80 %type <p> for if while
81 %type <i> pst opt_pst lbrace rparen comma nl opt_nl and bor
82 %type <i> subop print
83
84 %right ASGNOP
85 %right '?'
86 %right ':'
87 %left BOR
88 %left AND
89 %left GETLINE
90 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
91 %left ARG BLTIN BREAK CALL CONTINUE DELETE DO EXIT FOR FIELD FUNC
92 %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
93 %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
94 %left REGEXPR VAR VARNF IVAR WHILE '('
95 %left CAT
96 %left '+' '-'
97 %left '*' '/' '%'
98 %left NOT UMINUS
99 %right POWER
100 %right DECR INCR
101 %left INDIRECT
102 %token LASTTOKEN /* must be last */
103
104 %%
105
106 program:
107 pas { if (errorflag==0)
108 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
109 | error { yyclearin; bracecheck(); vyyerror(":95:Bailing out"); }
110 ;
111
112 and:
113 AND | and NL
114 ;
115
116 bor:
117 BOR | bor NL
118 ;
119
120 comma:
121 ',' | comma NL
122 ;
123
124 do:
125 DO { } | do NL
126 ;
127
128 else:
129 ELSE { } | else NL
130 ;
131
132 for:
133 FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt
134 { $$ = stat4(FOR, $3, notnull($5), $7, $9); }
135 | FOR '(' opt_simple_stmt ';' ';' opt_simple_stmt rparen stmt
136 { $$ = stat4(FOR, $3, NIL, $6, $8); }
137 | FOR '(' varname IN varname rparen stmt
138 { $$ = stat3(IN, $3, makearr($5), $7); }
139 ;
140
141 funcname:
142 VAR { setfname($1); }
143 | CALL { setfname($1); }
144 ;
145
146 if:
147 IF '(' pattern rparen { $$ = notnull($3); }
148 ;
149
150 lbrace:
151 '{' | lbrace NL
152 ;
153
154 nl:
155 NL | nl NL
156 ;
157
158 opt_nl:
159 /* empty */ { $$ = 0; }
160 | nl
161 ;
162
163 opt_pst:
164 /* empty */ { $$ = 0; }
165 | pst
166 ;
167
168
169 opt_simple_stmt:
170 /* empty */ { $$ = 0; }
171 | simple_stmt
172 ;
173
174 pas:
175 opt_pst { $$ = 0; }
176 | opt_pst pa_stats opt_pst { $$ = $2; }
177 ;
178
179 pa_pat:
180 pattern { $$ = notnull($1); }
181 ;
182
183 pa_stat:
184 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
185 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
186 | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
187 | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); }
188 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
189 | XBEGIN lbrace stmtlist '}'
190 { beginloc = linkum(beginloc, $3); $$ = 0; }
191 | XEND lbrace stmtlist '}'
192 { endloc = linkum(endloc, $3); $$ = 0; }
193 | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
194 { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
195 ;
196
197 pa_stats:
198 pa_stat
199 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
200 ;
201
202 patlist:
203 pattern
204 | patlist comma pattern { $$ = linkum($1, $3); }
205 ;
206
207 ppattern:
208 var ASGNOP ppattern { $$ = op2($2, $1, $3); }
209 | ppattern '?' ppattern ':' ppattern %prec '?'
210 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
211 | ppattern bor ppattern %prec BOR
212 { $$ = op2(BOR, notnull($1), notnull($3)); }
213 | ppattern and ppattern %prec AND
214 { $$ = op2(AND, notnull($1), notnull($3)); }
215 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
216 | ppattern MATCHOP ppattern
217 { if (constnode($3))
218 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
219 else
220 $$ = op3($2, (Node *)1, $1, $3); }
221 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
222 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
223 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
224 | re
225 | term
226 ;
227
228 pattern:
229 var ASGNOP pattern { $$ = op2($2, $1, $3); }
230 | pattern '?' pattern ':' pattern %prec '?'
231 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
232 | pattern bor pattern %prec BOR
233 { $$ = op2(BOR, notnull($1), notnull($3)); }
234 | pattern and pattern %prec AND
235 { $$ = op2(AND, notnull($1), notnull($3)); }
236 | NOT pattern
237 { $$ = op1(NOT, op2(NE,$2,valtonode(lookup("$zero&null",symtab),CCON))); }
238 | pattern EQ pattern { $$ = op2($2, $1, $3); }
239 | pattern GE pattern { $$ = op2($2, $1, $3); }
240 | pattern GT pattern { $$ = op2($2, $1, $3); }
241 | pattern LE pattern { $$ = op2($2, $1, $3); }
242 | pattern LT pattern { $$ = op2($2, $1, $3); }
243 | pattern NE pattern { $$ = op2($2, $1, $3); }
244 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
245 | pattern MATCHOP pattern
246 { if (constnode($3))
247 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
248 else
249 $$ = op3($2, (Node *)1, $1, $3); }
250 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
251 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
252 | pattern '|' GETLINE var { $$ = op3(GETLINE, $4, (Node*)$2, $1); }
253 | pattern '|' GETLINE { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); }
254 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
255 | re
256 | term
257 ;
258
259 plist:
260 pattern comma pattern { $$ = linkum($1, $3); }
261 | plist comma pattern { $$ = linkum($1, $3); }
262 ;
263
264 pplist:
265 ppattern
266 | pplist comma ppattern { $$ = linkum($1, $3); }
267 ;
268
269 prarg:
270 /* empty */ { $$ = rectonode(); }
271 | pplist
272 | '(' plist ')' { $$ = $2; }
273 ;
274
275 print:
276 PRINT | PRINTF
277 ;
278
279 pst:
280 NL | ';' | pst NL | pst ';'
281 ;
282
283 rbrace:
284 '}' { } | rbrace NL
285 ;
286
287 re:
288 reg_expr
289 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1,0)); }
290 | NOT re {$$ = op1(NOT, notnull($2)); }
291 ;
292
293 reg_expr:
294 '/' {startreg();} REGEXPR '/' { $$ = $3; }
295 ;
296
297 rparen:
298 ')' | rparen NL
299 ;
300
301 simple_stmt:
302 print prarg '|' term { $$ = stat3($1, $2, (Node *) $3, $4); }
303 | print prarg APPEND term { $$ = stat3($1, $2, (Node *) $3, $4); }
304 | print prarg GT term { $$ = stat3($1, $2, (Node *) $3, $4); }
305 | print prarg { $$ = stat3($1, $2, NIL, NIL); }
306 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
307 | DELETE varname { yyclearin; vyyerror(":96:You can only delete array[element]"); $$ = stat1(DELETE, $2); }
308 | pattern { $$ = exptostat($1); }
309 | error { yyclearin; vyyerror(illstat); }
310 ;
311
312 st:
313 nl { } | ';' opt_nl { }
314 ;
315
316 stmt:
317 BREAK st { $$ = stat1(BREAK, NIL); }
318 | CONTINUE st { $$ = stat1(CONTINUE, NIL); }
319 | do stmt WHILE '(' pattern ')' st
320 { $$ = stat2(DO, $2, notnull($5)); }
321 | EXIT pattern st { $$ = stat1(EXIT, $2); }
322 | EXIT st { $$ = stat1(EXIT, NIL); }
323 | for
324 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
325 | if stmt { $$ = stat3(IF, $1, $2, NIL); }
326 | lbrace stmtlist rbrace { $$ = $2; }
327 | NEXT st { if (infunc)
328 vyyerror(":97:Next is illegal inside a function");
329 $$ = stat1(NEXT, NIL); }
330 | RETURN pattern st { $$ = stat1(RETURN, $2); }
331 | RETURN st { $$ = stat1(RETURN, NIL); }
332 | simple_stmt st
333 | while stmt { $$ = stat2(WHILE, $1, $2); }
334 | ';' opt_nl { $$ = 0; }
335 ;
336
337 stmtlist:
338 stmt
339 | stmtlist stmt { $$ = linkum($1, $2); }
340 ;
341
342 subop:
343 SUB | GSUB
344 ;
345
346 term:
347 term '+' term { $$ = op2(ADD, $1, $3); }
348 | term '-' term { $$ = op2(MINUS, $1, $3); }
349 | term '*' term { $$ = op2(MULT, $1, $3); }
350 | term '/' term { $$ = op2(DIVIDE, $1, $3); }
351 | term '%' term { $$ = op2(MOD, $1, $3); }
352 | term POWER term { $$ = op2(POWER, $1, $3); }
353 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
354 | '+' term %prec UMINUS { $$ = $2; }
355 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
356 | BLTIN '(' ')' { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
357 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, (Node *) $1, $3); }
358 | BLTIN { $$ = op2(BLTIN, (Node *) $1, rectonode()); }
359 | CALL '(' ')' { $$ = op2(CALL, valtonode($1,CVAR), NIL); }
360 | CALL '(' patlist ')' { $$ = op2(CALL, valtonode($1,CVAR), $3); }
361 | DECR var { $$ = op1(PREDECR, $2); }
362 | INCR var { $$ = op1(PREINCR, $2); }
363 | var DECR { $$ = op1(POSTDECR, $1); }
364 | var INCR { $$ = op1(POSTINCR, $1); }
365 | GETLINE var LT term { $$ = op3(GETLINE, $2, (Node *)$3, $4); }
366 | GETLINE LT term { $$ = op3(GETLINE, NIL, (Node *)$2, $3); }
367 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
368 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
369 | INDEX '(' pattern comma pattern ')'
370 { $$ = op2(INDEX, $3, $5); }
371 | INDEX '(' pattern comma reg_expr ')'
372 { vyyerror(":98:Index() doesn't permit regular expressions");
373 $$ = op2(INDEX, $3, (Node*)$5); }
374 | '(' pattern ')' { $$ = $2; }
375 | MATCHFCN '(' pattern comma reg_expr ')'
376 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
377 | MATCHFCN '(' pattern comma pattern ')'
378 { if (constnode($5))
379 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
380 else
381 $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
382 | NUMBER { $$ = valtonode($1, CCON); }
383 | SPLIT '(' pattern comma varname comma pattern ')' /* string */
384 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
385 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
386 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
387 | SPLIT '(' pattern comma varname ')'
388 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
389 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
390 | STRING { $$ = valtonode($1, CCON); }
391 | subop '(' reg_expr comma pattern ')'
392 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
393 | subop '(' pattern comma pattern ')'
394 { if (constnode($3))
395 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
396 else
397 $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
398 | subop '(' reg_expr comma pattern comma var ')'
399 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
400 | subop '(' pattern comma pattern comma var ')'
401 { if (constnode($3))
402 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
403 else
404 $$ = op4($1, (Node *)1, $3, $5, $7); }
405 | SUBSTR '(' pattern comma pattern comma pattern ')'
406 { $$ = op3(SUBSTR, $3, $5, $7); }
407 | SUBSTR '(' pattern comma pattern ')'
408 { $$ = op3(SUBSTR, $3, $5, NIL); }
409 | var
410 ;
411
412 var:
413 varname
414 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
415 | FIELD { $$ = valtonode($1, CFLD); }
416 | IVAR { $$ = op1(INDIRECT, valtonode($1, CVAR)); }
417 | INDIRECT term { $$ = op1(INDIRECT, $2); }
418 ;
419
420 varlist:
421 /* nothing */ { arglist = $$ = 0; }
422 | VAR { arglist = $$ = valtonode($1,CVAR); }
423 | varlist comma VAR { arglist = $$ = linkum($1,valtonode($3,CVAR)); }
424 ;
425
426 varname:
427 VAR { $$ = valtonode($1, CVAR); }
428 | ARG { $$ = op1(ARG, (Node *) $1); }
429 | VARNF { $$ = op1(VARNF, (Node *) $1); }
430 ;
431
432
433 while:
434 WHILE '(' pattern rparen { $$ = notnull($3); }
435 ;
436
437 %%
438
439 static void
440 setfname(Cell *p)
441 {
442 if (isarr(p))
443 vyyerror(":99:%s is an array, not a function", p->nval);
444 else if (isfunc(p))
445 vyyerror(":100:You cannot define function %s more than once", p->nval);
446 curfname = p->nval;
447 }
448
449 static int
450 constnode(Node *p)
451 {
452 return p->ntype == NVALUE && ((Cell *) (p->narg[0]))->csub == CCON;
453 }
454
455 static unsigned char *strnode(Node *p)
456 {
457 return ((Cell *)(p->narg[0]))->sval;
458 }
459
460 static Node *notnull(Node *n)
461 {
462 switch (n->nobj) {
463 case LE: case LT: case EQ: case NE: case GT: case GE:
464 case BOR: case AND: case NOT:
465 return n;
466 default:
467 return op2(NE, n, nullnode);
468 }
469 }
470