1 /* $OpenBSD: awkgram.y,v 1.16 2023/09/10 14:59:00 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 %{ 27 #include <stdio.h> 28 #include <string.h> 29 #include "awk.h" 30 31 void checkdup(Node *list, Cell *item); 32 int yywrap(void) { return(1); } 33 34 Node *beginloc = 0; 35 Node *endloc = 0; 36 bool infunc = false; /* = true if in arglist or body of func */ 37 int inloop = 0; /* >= 1 if in while, for, do; can't be bool, since loops can next */ 38 char *curfname = 0; /* current function name */ 39 Node *arglist = 0; /* list of args for current function */ 40 %} 41 42 %union { 43 Node *p; 44 Cell *cp; 45 int i; 46 char *s; 47 } 48 49 %token <i> FIRSTTOKEN /* must be first */ 50 %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND 51 %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' 52 %token <i> ARRAY 53 %token <i> MATCH NOTMATCH MATCHOP 54 %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO 55 %token <i> AND BOR APPEND EQ GE GT LE LT NE IN 56 %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 57 %token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE 58 %token <i> ADD MINUS MULT DIVIDE MOD 59 %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ 60 %token <i> PRINT PRINTF SPRINTF 61 %token <p> ELSE INTEST CONDEXPR 62 %token <i> POSTINCR PREINCR POSTDECR PREDECR 63 %token <cp> VAR IVAR VARNF CALL NUMBER STRING 64 %token <s> REGEXPR 65 66 %type <p> pas pattern ppattern plist pplist patlist prarg term re 67 %type <p> pa_pat pa_stat pa_stats 68 %type <s> reg_expr 69 %type <p> simple_stmt opt_simple_stmt stmt stmtlist 70 %type <p> var varname funcname varlist 71 %type <p> for if else while 72 %type <i> do st 73 %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor 74 %type <i> subop print 75 %type <cp> string 76 77 %right ASGNOP 78 %right '?' 79 %right ':' 80 %left BOR 81 %left AND 82 %left GETLINE 83 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' 84 %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 85 %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER 86 %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR 87 %left REGEXPR VAR VARNF IVAR WHILE '(' 88 %left CAT 89 %left '+' '-' 90 %left '*' '/' '%' 91 %left NOT UMINUS UPLUS 92 %right POWER 93 %right DECR INCR 94 %left INDIRECT 95 %token LASTTOKEN /* must be last */ 96 97 %% 98 99 program: 100 pas { if (errorflag==0) 101 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } 102 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } 103 ; 104 105 and: 106 AND | and NL 107 ; 108 109 bor: 110 BOR | bor NL 111 ; 112 113 comma: 114 ',' | comma NL 115 ; 116 117 do: 118 DO | do NL 119 ; 120 121 else: 122 ELSE | else NL 123 ; 124 125 for: 126 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 127 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } 128 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 129 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } 130 | FOR '(' varname IN varname rparen {inloop++;} stmt 131 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } 132 ; 133 134 funcname: 135 VAR { setfname($1); } 136 | CALL { setfname($1); } 137 ; 138 139 if: 140 IF '(' pattern rparen { $$ = notnull($3); } 141 ; 142 143 lbrace: 144 '{' | lbrace NL 145 ; 146 147 nl: 148 NL | nl NL 149 ; 150 151 opt_nl: 152 /* empty */ { $$ = 0; } 153 | nl 154 ; 155 156 opt_pst: 157 /* empty */ { $$ = 0; } 158 | pst 159 ; 160 161 162 opt_simple_stmt: 163 /* empty */ { $$ = 0; } 164 | simple_stmt 165 ; 166 167 pas: 168 opt_pst { $$ = 0; } 169 | opt_pst pa_stats opt_pst { $$ = $2; } 170 ; 171 172 pa_pat: 173 pattern { $$ = notnull($1); } 174 ; 175 176 pa_stat: 177 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } 178 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } 179 | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); } 180 | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); } 181 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } 182 | XBEGIN lbrace stmtlist '}' 183 { beginloc = linkum(beginloc, $3); $$ = 0; } 184 | XEND lbrace stmtlist '}' 185 { endloc = linkum(endloc, $3); $$ = 0; } 186 | FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}' 187 { infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } 188 ; 189 190 pa_stats: 191 pa_stat 192 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } 193 ; 194 195 patlist: 196 pattern 197 | patlist comma pattern { $$ = linkum($1, $3); } 198 ; 199 200 ppattern: 201 var ASGNOP ppattern { $$ = op2($2, $1, $3); } 202 | ppattern '?' ppattern ':' ppattern %prec '?' 203 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 204 | ppattern bor ppattern %prec BOR 205 { $$ = op2(BOR, notnull($1), notnull($3)); } 206 | ppattern and ppattern %prec AND 207 { $$ = op2(AND, notnull($1), notnull($3)); } 208 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); } 209 | ppattern MATCHOP ppattern 210 { if (constnode($3)) { 211 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 212 free($3); 213 } else 214 $$ = op3($2, (Node *)1, $1, $3); } 215 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 216 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 217 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } 218 | re 219 | term 220 ; 221 222 pattern: 223 var ASGNOP pattern { $$ = op2($2, $1, $3); } 224 | pattern '?' pattern ':' pattern %prec '?' 225 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 226 | pattern bor pattern %prec BOR 227 { $$ = op2(BOR, notnull($1), notnull($3)); } 228 | pattern and pattern %prec AND 229 { $$ = op2(AND, notnull($1), notnull($3)); } 230 | pattern EQ pattern { $$ = op2($2, $1, $3); } 231 | pattern GE pattern { $$ = op2($2, $1, $3); } 232 | pattern GT pattern { $$ = op2($2, $1, $3); } 233 | pattern LE pattern { $$ = op2($2, $1, $3); } 234 | pattern LT pattern { $$ = op2($2, $1, $3); } 235 | pattern NE pattern { $$ = op2($2, $1, $3); } 236 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); } 237 | pattern MATCHOP pattern 238 { if (constnode($3)) { 239 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 240 free($3); 241 } else 242 $$ = op3($2, (Node *)1, $1, $3); } 243 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 244 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 245 | pattern '|' GETLINE var { 246 if (safe) SYNTAX("cmd | getline is unsafe"); 247 else $$ = op3(GETLINE, $4, itonp($2), $1); } 248 | pattern '|' GETLINE { 249 if (safe) SYNTAX("cmd | getline is unsafe"); 250 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } 251 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } 252 | re 253 | term 254 ; 255 256 plist: 257 pattern comma pattern { $$ = linkum($1, $3); } 258 | plist comma pattern { $$ = linkum($1, $3); } 259 ; 260 261 pplist: 262 ppattern 263 | pplist comma ppattern { $$ = linkum($1, $3); } 264 ; 265 266 prarg: 267 /* empty */ { $$ = rectonode(); } 268 | pplist 269 | '(' plist ')' { $$ = $2; } 270 ; 271 272 print: 273 PRINT | PRINTF 274 ; 275 276 pst: 277 NL | ';' | pst NL | pst ';' 278 ; 279 280 rbrace: 281 '}' | rbrace NL 282 ; 283 284 re: 285 reg_expr 286 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); free($1); } 287 | NOT re { $$ = op1(NOT, notnull($2)); } 288 ; 289 290 reg_expr: 291 '/' {startreg();} REGEXPR '/' { $$ = $3; } 292 ; 293 294 rparen: 295 ')' | rparen NL 296 ; 297 298 simple_stmt: 299 print prarg '|' term { 300 if (safe) SYNTAX("print | is unsafe"); 301 else $$ = stat3($1, $2, itonp($3), $4); } 302 | print prarg APPEND term { 303 if (safe) SYNTAX("print >> is unsafe"); 304 else $$ = stat3($1, $2, itonp($3), $4); } 305 | print prarg GT term { 306 if (safe) SYNTAX("print > is unsafe"); 307 else $$ = stat3($1, $2, itonp($3), $4); } 308 | print prarg { $$ = stat3($1, $2, NIL, NIL); } 309 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } 310 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } 311 | pattern { $$ = exptostat($1); } 312 | error { yyclearin; SYNTAX("illegal statement"); } 313 ; 314 315 st: 316 nl 317 | ';' opt_nl 318 ; 319 320 stmt: 321 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); 322 $$ = stat1(BREAK, NIL); } 323 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); 324 $$ = stat1(CONTINUE, NIL); } 325 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st 326 { $$ = stat2(DO, $3, notnull($7)); } 327 | EXIT pattern st { $$ = stat1(EXIT, $2); } 328 | EXIT st { $$ = stat1(EXIT, NIL); } 329 | for 330 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } 331 | if stmt { $$ = stat3(IF, $1, $2, NIL); } 332 | lbrace stmtlist rbrace { $$ = $2; } 333 | NEXT st { if (infunc) 334 SYNTAX("next is illegal inside a function"); 335 $$ = stat1(NEXT, NIL); } 336 | NEXTFILE st { if (infunc) 337 SYNTAX("nextfile is illegal inside a function"); 338 $$ = stat1(NEXTFILE, NIL); } 339 | RETURN pattern st { $$ = stat1(RETURN, $2); } 340 | RETURN st { $$ = stat1(RETURN, NIL); } 341 | simple_stmt st 342 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } 343 | ';' opt_nl { $$ = 0; } 344 ; 345 346 stmtlist: 347 stmt 348 | stmtlist stmt { $$ = linkum($1, $2); } 349 ; 350 351 subop: 352 SUB | GSUB 353 ; 354 355 string: 356 STRING 357 | string STRING { $$ = catstr($1, $2); } 358 ; 359 360 term: 361 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } 362 | term '+' term { $$ = op2(ADD, $1, $3); } 363 | term '-' term { $$ = op2(MINUS, $1, $3); } 364 | term '*' term { $$ = op2(MULT, $1, $3); } 365 | term '/' term { $$ = op2(DIVIDE, $1, $3); } 366 | term '%' term { $$ = op2(MOD, $1, $3); } 367 | term POWER term { $$ = op2(POWER, $1, $3); } 368 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } 369 | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); } 370 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } 371 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } 372 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } 373 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } 374 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } 375 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } 376 | CLOSE term { $$ = op1(CLOSE, $2); } 377 | DECR var { $$ = op1(PREDECR, $2); } 378 | INCR var { $$ = op1(PREINCR, $2); } 379 | var DECR { $$ = op1(POSTDECR, $1); } 380 | var INCR { $$ = op1(POSTINCR, $1); } 381 | GENSUB '(' reg_expr comma pattern comma pattern ')' 382 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } 383 | GENSUB '(' pattern comma pattern comma pattern ')' 384 { if (constnode($3)) { 385 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); 386 free($3); 387 } else 388 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); 389 } 390 | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' 391 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } 392 | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' 393 { if (constnode($3)) { 394 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); 395 free($3); 396 } else 397 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); 398 } 399 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } 400 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } 401 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } 402 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } 403 | INDEX '(' pattern comma pattern ')' 404 { $$ = op2(INDEX, $3, $5); } 405 | INDEX '(' pattern comma reg_expr ')' 406 { SYNTAX("index() doesn't permit regular expressions"); 407 $$ = op2(INDEX, $3, (Node*)$5); } 408 | '(' pattern ')' { $$ = $2; } 409 | MATCHFCN '(' pattern comma reg_expr ')' 410 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); free($5); } 411 | MATCHFCN '(' pattern comma pattern ')' 412 { if (constnode($5)) { 413 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); 414 free($5); 415 } else 416 $$ = op3(MATCHFCN, (Node *)1, $3, $5); } 417 | NUMBER { $$ = celltonode($1, CCON); } 418 | SPLIT '(' pattern comma varname comma pattern ')' /* string */ 419 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } 420 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ 421 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); free($7); } 422 | SPLIT '(' pattern comma varname ')' 423 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ 424 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } 425 | string { $$ = celltonode($1, CCON); } 426 | subop '(' reg_expr comma pattern ')' 427 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); free($3); } 428 | subop '(' pattern comma pattern ')' 429 { if (constnode($3)) { 430 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); 431 free($3); 432 } else 433 $$ = op4($1, (Node *)1, $3, $5, rectonode()); } 434 | subop '(' reg_expr comma pattern comma var ')' 435 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); free($3); } 436 | subop '(' pattern comma pattern comma var ')' 437 { if (constnode($3)) { 438 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); 439 free($3); 440 } else 441 $$ = op4($1, (Node *)1, $3, $5, $7); } 442 | SUBSTR '(' pattern comma pattern comma pattern ')' 443 { $$ = op3(SUBSTR, $3, $5, $7); } 444 | SUBSTR '(' pattern comma pattern ')' 445 { $$ = op3(SUBSTR, $3, $5, NIL); } 446 | var 447 ; 448 449 var: 450 varname 451 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } 452 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } 453 | INDIRECT term { $$ = op1(INDIRECT, $2); } 454 ; 455 456 varlist: 457 /* nothing */ { arglist = $$ = 0; } 458 | VAR { arglist = $$ = celltonode($1,CVAR); } 459 | varlist comma VAR { 460 checkdup($1, $3); 461 arglist = $$ = linkum($1,celltonode($3,CVAR)); } 462 ; 463 464 varname: 465 VAR { $$ = celltonode($1, CVAR); } 466 | ARG { $$ = op1(ARG, itonp($1)); } 467 | VARNF { $$ = op1(VARNF, (Node *) $1); } 468 ; 469 470 471 while: 472 WHILE '(' pattern rparen { $$ = notnull($3); } 473 ; 474 475 %% 476 477 void setfname(Cell *p) 478 { 479 if (isarr(p)) 480 SYNTAX("%s is an array, not a function", p->nval); 481 else if (isfcn(p)) 482 SYNTAX("you can't define function %s more than once", p->nval); 483 curfname = p->nval; 484 } 485 486 int constnode(Node *p) 487 { 488 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; 489 } 490 491 char *strnode(Node *p) 492 { 493 return ((Cell *)(p->narg[0]))->sval; 494 } 495 496 Node *notnull(Node *n) 497 { 498 switch (n->nobj) { 499 case LE: case LT: case EQ: case NE: case GT: case GE: 500 case BOR: case AND: case NOT: 501 return n; 502 default: 503 return op2(NE, n, nullnode); 504 } 505 } 506 507 void checkdup(Node *vl, Cell *cp) /* check if name already in list */ 508 { 509 char *s = cp->nval; 510 for ( ; vl; vl = vl->nnext) { 511 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { 512 SYNTAX("duplicate argument %s", s); 513 break; 514 } 515 } 516 } 517