1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #define DEBUG 26 #include <stdio.h> 27 #include <ctype.h> 28 #include <errno.h> 29 #include <wchar.h> 30 #include <wctype.h> 31 #include <fcntl.h> 32 #include <setjmp.h> 33 #include <limits.h> 34 #include <math.h> 35 #include <string.h> 36 #include <stdlib.h> 37 #include <time.h> 38 #include <sys/types.h> 39 #include <sys/wait.h> 40 #include "awk.h" 41 #include "ytab.h" 42 43 static void stdinit(void); 44 static void flush_all(void); 45 46 #if 1 47 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 48 #else 49 void tempfree(Cell *p) { 50 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 51 WARNING("bad csub %d in Cell %d %s", 52 p->csub, p->ctype, p->sval); 53 } 54 if (istemp(p)) 55 tfree(p); 56 } 57 #endif 58 59 /* do we really need these? */ 60 /* #ifdef _NFILE */ 61 /* #ifndef FOPEN_MAX */ 62 /* #define FOPEN_MAX _NFILE */ 63 /* #endif */ 64 /* #endif */ 65 /* */ 66 /* #ifndef FOPEN_MAX */ 67 /* #define FOPEN_MAX 40 */ /* max number of open files */ 68 /* #endif */ 69 /* */ 70 /* #ifndef RAND_MAX */ 71 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 72 /* #endif */ 73 74 jmp_buf env; 75 extern int pairstack[]; 76 extern Awkfloat srand_seed; 77 78 Node *winner = NULL; /* root of parse tree */ 79 Cell *tmps; /* free temporary cells for execution */ 80 81 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 82 Cell *True = &truecell; 83 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 84 Cell *False = &falsecell; 85 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 86 Cell *jbreak = &breakcell; 87 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 88 Cell *jcont = &contcell; 89 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 90 Cell *jnext = &nextcell; 91 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 92 Cell *jnextfile = &nextfilecell; 93 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 94 Cell *jexit = &exitcell; 95 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 96 Cell *jret = &retcell; 97 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 98 99 Node *curnode = NULL; /* the node being executed, for debugging */ 100 101 /* buffer memory management */ 102 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 103 const char *whatrtn) 104 /* pbuf: address of pointer to buffer being managed 105 * psiz: address of buffer size variable 106 * minlen: minimum length of buffer needed 107 * quantum: buffer size quantum 108 * pbptr: address of movable pointer into buffer, or 0 if none 109 * whatrtn: name of the calling routine if failure should cause fatal error 110 * 111 * return 0 for realloc failure, !=0 for success 112 */ 113 { 114 if (minlen > *psiz) { 115 char *tbuf; 116 int rminlen = quantum ? minlen % quantum : 0; 117 int boff = pbptr ? *pbptr - *pbuf : 0; 118 /* round up to next multiple of quantum */ 119 if (rminlen) 120 minlen += quantum - rminlen; 121 tbuf = realloc(*pbuf, minlen); 122 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, *pbuf, tbuf); 123 if (tbuf == NULL) { 124 if (whatrtn) 125 FATAL("out of memory in %s", whatrtn); 126 return 0; 127 } 128 *pbuf = tbuf; 129 *psiz = minlen; 130 if (pbptr) 131 *pbptr = tbuf + boff; 132 } 133 return 1; 134 } 135 136 void run(Node *a) /* execution of parse tree starts here */ 137 { 138 139 stdinit(); 140 execute(a); 141 closeall(); 142 } 143 144 Cell *execute(Node *u) /* execute a node of the parse tree */ 145 { 146 Cell *(*proc)(Node **, int); 147 Cell *x; 148 Node *a; 149 150 if (u == NULL) 151 return(True); 152 for (a = u; ; a = a->nnext) { 153 curnode = a; 154 if (isvalue(a)) { 155 x = (Cell *) (a->narg[0]); 156 if (isfld(x) && !donefld) 157 fldbld(); 158 else if (isrec(x) && !donerec) 159 recbld(); 160 return(x); 161 } 162 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 163 FATAL("illegal statement"); 164 proc = proctab[a->nobj-FIRSTTOKEN]; 165 x = (*proc)(a->narg, a->nobj); 166 if (isfld(x) && !donefld) 167 fldbld(); 168 else if (isrec(x) && !donerec) 169 recbld(); 170 if (isexpr(a)) 171 return(x); 172 if (isjump(x)) 173 return(x); 174 if (a->nnext == NULL) 175 return(x); 176 tempfree(x); 177 } 178 } 179 180 181 Cell *program(Node **a, int n) /* execute an awk program */ 182 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 183 Cell *x; 184 185 if (setjmp(env) != 0) 186 goto ex; 187 if (a[0]) { /* BEGIN */ 188 x = execute(a[0]); 189 if (isexit(x)) 190 return(True); 191 if (isjump(x)) 192 FATAL("illegal break, continue, next or nextfile from BEGIN"); 193 tempfree(x); 194 } 195 if (a[1] || a[2]) 196 while (getrec(&record, &recsize, true) > 0) { 197 x = execute(a[1]); 198 if (isexit(x)) 199 break; 200 tempfree(x); 201 } 202 ex: 203 if (setjmp(env) != 0) /* handles exit within END */ 204 goto ex1; 205 if (a[2]) { /* END */ 206 x = execute(a[2]); 207 if (isbreak(x) || isnext(x) || iscont(x)) 208 FATAL("illegal break, continue, next or nextfile from END"); 209 tempfree(x); 210 } 211 ex1: 212 return(True); 213 } 214 215 struct Frame { /* stack frame for awk function calls */ 216 int nargs; /* number of arguments in this call */ 217 Cell *fcncell; /* pointer to Cell for function */ 218 Cell **args; /* pointer to array of arguments after execute */ 219 Cell *retval; /* return value */ 220 }; 221 222 #define NARGS 50 /* max args in a call */ 223 224 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 225 int nframe = 0; /* number of frames allocated */ 226 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 227 228 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 229 { 230 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 231 int i, ncall, ndef; 232 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 233 Node *x; 234 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 235 Cell *y, *z, *fcn; 236 char *s; 237 238 fcn = execute(a[0]); /* the function itself */ 239 s = fcn->nval; 240 if (!isfcn(fcn)) 241 FATAL("calling undefined function %s", s); 242 if (frame == NULL) { 243 frp = frame = calloc(nframe += 100, sizeof(*frame)); 244 if (frame == NULL) 245 FATAL("out of space for stack frames calling %s", s); 246 } 247 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 248 ncall++; 249 ndef = (int) fcn->fval; /* args in defn */ 250 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 251 if (ncall > ndef) 252 WARNING("function %s called with %d args, uses only %d", 253 s, ncall, ndef); 254 if (ncall + ndef > NARGS) 255 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 256 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 257 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 258 y = execute(x); 259 oargs[i] = y; 260 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 261 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 262 if (isfcn(y)) 263 FATAL("can't use function %s as argument in %s", y->nval, s); 264 if (isarr(y)) 265 args[i] = y; /* arrays by ref */ 266 else 267 args[i] = copycell(y); 268 tempfree(y); 269 } 270 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 271 args[i] = gettemp(); 272 *args[i] = newcopycell; 273 } 274 frp++; /* now ok to up frame */ 275 if (frp >= frame + nframe) { 276 int dfp = frp - frame; /* old index */ 277 frame = realloc(frame, (nframe += 100) * sizeof(*frame)); 278 if (frame == NULL) 279 FATAL("out of space for stack frames in %s", s); 280 frp = frame + dfp; 281 } 282 frp->fcncell = fcn; 283 frp->args = args; 284 frp->nargs = ndef; /* number defined with (excess are locals) */ 285 frp->retval = gettemp(); 286 287 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 288 y = execute((Node *)(fcn->sval)); /* execute body */ 289 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 290 291 for (i = 0; i < ndef; i++) { 292 Cell *t = frp->args[i]; 293 if (isarr(t)) { 294 if (t->csub == CCOPY) { 295 if (i >= ncall) { 296 freesymtab(t); 297 t->csub = CTEMP; 298 tempfree(t); 299 } else { 300 oargs[i]->tval = t->tval; 301 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 302 oargs[i]->sval = t->sval; 303 tempfree(t); 304 } 305 } 306 } else if (t != y) { /* kludge to prevent freeing twice */ 307 t->csub = CTEMP; 308 tempfree(t); 309 } else if (t == y && t->csub == CCOPY) { 310 t->csub = CTEMP; 311 tempfree(t); 312 freed = 1; 313 } 314 } 315 tempfree(fcn); 316 if (isexit(y) || isnext(y)) 317 return y; 318 if (freed == 0) { 319 tempfree(y); /* don't free twice! */ 320 } 321 z = frp->retval; /* return value */ 322 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 323 frp--; 324 return(z); 325 } 326 327 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 328 { 329 Cell *y; 330 331 /* copy is not constant or field */ 332 333 y = gettemp(); 334 y->tval = x->tval & ~(CON|FLD|REC); 335 y->csub = CCOPY; /* prevents freeing until call is over */ 336 y->nval = x->nval; /* BUG? */ 337 if (isstr(x) /* || x->ctype == OCELL */) { 338 y->sval = tostring(x->sval); 339 y->tval &= ~DONTFREE; 340 } else 341 y->tval |= DONTFREE; 342 y->fval = x->fval; 343 return y; 344 } 345 346 Cell *arg(Node **a, int n) /* nth argument of a function */ 347 { 348 349 n = ptoi(a[0]); /* argument number, counting from 0 */ 350 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 351 if (n+1 > frp->nargs) 352 FATAL("argument #%d of function %s was not supplied", 353 n+1, frp->fcncell->nval); 354 return frp->args[n]; 355 } 356 357 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 358 { 359 Cell *y; 360 361 switch (n) { 362 case EXIT: 363 if (a[0] != NULL) { 364 y = execute(a[0]); 365 errorflag = (int) getfval(y); 366 tempfree(y); 367 } 368 longjmp(env, 1); 369 case RETURN: 370 if (a[0] != NULL) { 371 y = execute(a[0]); 372 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 373 setsval(frp->retval, getsval(y)); 374 frp->retval->fval = getfval(y); 375 frp->retval->tval |= NUM; 376 } 377 else if (y->tval & STR) 378 setsval(frp->retval, getsval(y)); 379 else if (y->tval & NUM) 380 setfval(frp->retval, getfval(y)); 381 else /* can't happen */ 382 FATAL("bad type variable %d", y->tval); 383 tempfree(y); 384 } 385 return(jret); 386 case NEXT: 387 return(jnext); 388 case NEXTFILE: 389 nextfile(); 390 return(jnextfile); 391 case BREAK: 392 return(jbreak); 393 case CONTINUE: 394 return(jcont); 395 default: /* can't happen */ 396 FATAL("illegal jump type %d", n); 397 } 398 return 0; /* not reached */ 399 } 400 401 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 402 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 403 Cell *r, *x; 404 extern Cell **fldtab; 405 FILE *fp; 406 char *buf; 407 int bufsize = recsize; 408 int mode; 409 bool newflag; 410 411 if ((buf = malloc(bufsize)) == NULL) 412 FATAL("out of memory in getline"); 413 414 fflush(stdout); /* in case someone is waiting for a prompt */ 415 r = gettemp(); 416 if (a[1] != NULL) { /* getline < file */ 417 x = execute(a[2]); /* filename */ 418 mode = ptoi(a[1]); 419 if (mode == '|') /* input pipe */ 420 mode = LE; /* arbitrary flag */ 421 fp = openfile(mode, getsval(x), &newflag); 422 tempfree(x); 423 if (fp == NULL) 424 n = -1; 425 else 426 n = readrec(&buf, &bufsize, fp, newflag); 427 if (n <= 0) { 428 ; 429 } else if (a[0] != NULL) { /* getline var <file */ 430 x = execute(a[0]); 431 setsval(x, buf); 432 if (is_number(x->sval)) { 433 x->fval = atof(x->sval); 434 x->tval |= NUM; 435 } 436 tempfree(x); 437 } else { /* getline <file */ 438 setsval(fldtab[0], buf); 439 if (is_number(fldtab[0]->sval)) { 440 fldtab[0]->fval = atof(fldtab[0]->sval); 441 fldtab[0]->tval |= NUM; 442 } 443 } 444 } else { /* bare getline; use current input */ 445 if (a[0] == NULL) /* getline */ 446 n = getrec(&record, &recsize, true); 447 else { /* getline var */ 448 n = getrec(&buf, &bufsize, false); 449 x = execute(a[0]); 450 setsval(x, buf); 451 if (is_number(x->sval)) { 452 x->fval = atof(x->sval); 453 x->tval |= NUM; 454 } 455 tempfree(x); 456 } 457 } 458 setfval(r, (Awkfloat) n); 459 free(buf); 460 return r; 461 } 462 463 Cell *getnf(Node **a, int n) /* get NF */ 464 { 465 if (!donefld) 466 fldbld(); 467 return (Cell *) a[0]; 468 } 469 470 static char * 471 makearraystring(Node *p, const char *func) 472 { 473 char *buf; 474 int bufsz = recsize; 475 size_t blen; 476 477 if ((buf = malloc(bufsz)) == NULL) { 478 FATAL("%s: out of memory", func); 479 } 480 481 blen = 0; 482 buf[blen] = '\0'; 483 484 for (; p; p = p->nnext) { 485 Cell *x = execute(p); /* expr */ 486 char *s = getsval(x); 487 size_t seplen = strlen(getsval(subseploc)); 488 size_t nsub = p->nnext ? seplen : 0; 489 size_t slen = strlen(s); 490 size_t tlen = blen + slen + nsub; 491 492 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 493 FATAL("%s: out of memory %s[%s...]", 494 func, x->nval, buf); 495 } 496 memcpy(buf + blen, s, slen); 497 if (nsub) { 498 memcpy(buf + blen + slen, *SUBSEP, nsub); 499 } 500 buf[tlen] = '\0'; 501 blen = tlen; 502 tempfree(x); 503 } 504 return buf; 505 } 506 507 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 508 { 509 Cell *x, *z; 510 char *buf; 511 512 x = execute(a[0]); /* Cell* for symbol table */ 513 buf = makearraystring(a[1], __func__); 514 if (!isarr(x)) { 515 DPRINTF("making %s into an array\n", NN(x->nval)); 516 if (freeable(x)) 517 xfree(x->sval); 518 x->tval &= ~(STR|NUM|DONTFREE); 519 x->tval |= ARR; 520 x->sval = (char *) makesymtab(NSYMTAB); 521 } 522 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 523 z->ctype = OCELL; 524 z->csub = CVAR; 525 tempfree(x); 526 free(buf); 527 return(z); 528 } 529 530 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 531 { 532 Cell *x; 533 534 x = execute(a[0]); /* Cell* for symbol table */ 535 if (x == symtabloc) { 536 FATAL("cannot delete SYMTAB or its elements"); 537 } 538 if (!isarr(x)) 539 return True; 540 if (a[1] == NULL) { /* delete the elements, not the table */ 541 freesymtab(x); 542 x->tval &= ~STR; 543 x->tval |= ARR; 544 x->sval = (char *) makesymtab(NSYMTAB); 545 } else { 546 char *buf = makearraystring(a[1], __func__); 547 freeelem(x, buf); 548 free(buf); 549 } 550 tempfree(x); 551 return True; 552 } 553 554 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 555 { 556 Cell *ap, *k; 557 char *buf; 558 559 ap = execute(a[1]); /* array name */ 560 if (!isarr(ap)) { 561 DPRINTF("making %s into an array\n", ap->nval); 562 if (freeable(ap)) 563 xfree(ap->sval); 564 ap->tval &= ~(STR|NUM|DONTFREE); 565 ap->tval |= ARR; 566 ap->sval = (char *) makesymtab(NSYMTAB); 567 } 568 buf = makearraystring(a[0], __func__); 569 k = lookup(buf, (Array *) ap->sval); 570 tempfree(ap); 571 free(buf); 572 if (k == NULL) 573 return(False); 574 else 575 return(True); 576 } 577 578 579 Cell *matchop(Node **a, int n) /* ~ and match() */ 580 { 581 Cell *x, *y; 582 char *s, *t; 583 int i; 584 fa *pfa; 585 int (*mf)(fa *, const char *) = match, mode = 0; 586 587 if (n == MATCHFCN) { 588 mf = pmatch; 589 mode = 1; 590 } 591 x = execute(a[1]); /* a[1] = target text */ 592 s = getsval(x); 593 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 594 i = (*mf)((fa *) a[2], s); 595 else { 596 y = execute(a[2]); /* a[2] = regular expr */ 597 t = getsval(y); 598 pfa = makedfa(t, mode); 599 i = (*mf)(pfa, s); 600 tempfree(y); 601 } 602 tempfree(x); 603 if (n == MATCHFCN) { 604 int start = patbeg - s + 1; 605 if (patlen < 0) 606 start = 0; 607 setfval(rstartloc, (Awkfloat) start); 608 setfval(rlengthloc, (Awkfloat) patlen); 609 x = gettemp(); 610 x->tval = NUM; 611 x->fval = start; 612 return x; 613 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 614 return(True); 615 else 616 return(False); 617 } 618 619 620 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 621 { 622 Cell *x, *y; 623 int i; 624 625 x = execute(a[0]); 626 i = istrue(x); 627 tempfree(x); 628 switch (n) { 629 case BOR: 630 if (i) return(True); 631 y = execute(a[1]); 632 i = istrue(y); 633 tempfree(y); 634 if (i) return(True); 635 else return(False); 636 case AND: 637 if ( !i ) return(False); 638 y = execute(a[1]); 639 i = istrue(y); 640 tempfree(y); 641 if (i) return(True); 642 else return(False); 643 case NOT: 644 if (i) return(False); 645 else return(True); 646 default: /* can't happen */ 647 FATAL("unknown boolean operator %d", n); 648 } 649 return 0; /*NOTREACHED*/ 650 } 651 652 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 653 { 654 int i; 655 Cell *x, *y; 656 Awkfloat j; 657 658 x = execute(a[0]); 659 y = execute(a[1]); 660 if (x->tval&NUM && y->tval&NUM) { 661 j = x->fval - y->fval; 662 i = j<0? -1: (j>0? 1: 0); 663 } else { 664 i = strcmp(getsval(x), getsval(y)); 665 } 666 tempfree(x); 667 tempfree(y); 668 switch (n) { 669 case LT: if (i<0) return(True); 670 else return(False); 671 case LE: if (i<=0) return(True); 672 else return(False); 673 case NE: if (i!=0) return(True); 674 else return(False); 675 case EQ: if (i == 0) return(True); 676 else return(False); 677 case GE: if (i>=0) return(True); 678 else return(False); 679 case GT: if (i>0) return(True); 680 else return(False); 681 default: /* can't happen */ 682 FATAL("unknown relational operator %d", n); 683 } 684 return 0; /*NOTREACHED*/ 685 } 686 687 void tfree(Cell *a) /* free a tempcell */ 688 { 689 if (freeable(a)) { 690 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 691 xfree(a->sval); 692 } 693 if (a == tmps) 694 FATAL("tempcell list is curdled"); 695 a->cnext = tmps; 696 tmps = a; 697 } 698 699 Cell *gettemp(void) /* get a tempcell */ 700 { int i; 701 Cell *x; 702 703 if (!tmps) { 704 tmps = calloc(100, sizeof(*tmps)); 705 if (!tmps) 706 FATAL("out of space for temporaries"); 707 for (i = 1; i < 100; i++) 708 tmps[i-1].cnext = &tmps[i]; 709 tmps[i-1].cnext = NULL; 710 } 711 x = tmps; 712 tmps = x->cnext; 713 *x = tempcell; 714 return(x); 715 } 716 717 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 718 { 719 Awkfloat val; 720 Cell *x; 721 int m; 722 char *s; 723 724 x = execute(a[0]); 725 val = getfval(x); /* freebsd: defend against super large field numbers */ 726 if ((Awkfloat)INT_MAX < val) 727 FATAL("trying to access out of range field %s", x->nval); 728 m = (int) val; 729 if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ 730 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 731 /* BUG: can x->nval ever be null??? */ 732 tempfree(x); 733 x = fieldadr(m); 734 x->ctype = OCELL; /* BUG? why are these needed? */ 735 x->csub = CFLD; 736 return(x); 737 } 738 739 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 740 { 741 int k, m, n; 742 char *s; 743 int temp; 744 Cell *x, *y, *z = NULL; 745 746 x = execute(a[0]); 747 y = execute(a[1]); 748 if (a[2] != NULL) 749 z = execute(a[2]); 750 s = getsval(x); 751 k = strlen(s) + 1; 752 if (k <= 1) { 753 tempfree(x); 754 tempfree(y); 755 if (a[2] != NULL) { 756 tempfree(z); 757 } 758 x = gettemp(); 759 setsval(x, ""); 760 return(x); 761 } 762 m = (int) getfval(y); 763 if (m <= 0) 764 m = 1; 765 else if (m > k) 766 m = k; 767 tempfree(y); 768 if (a[2] != NULL) { 769 n = (int) getfval(z); 770 tempfree(z); 771 } else 772 n = k - 1; 773 if (n < 0) 774 n = 0; 775 else if (n > k - m) 776 n = k - m; 777 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 778 y = gettemp(); 779 temp = s[n+m-1]; /* with thanks to John Linderman */ 780 s[n+m-1] = '\0'; 781 setsval(y, s + m - 1); 782 s[n+m-1] = temp; 783 tempfree(x); 784 return(y); 785 } 786 787 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 788 { 789 Cell *x, *y, *z; 790 char *s1, *s2, *p1, *p2, *q; 791 Awkfloat v = 0.0; 792 793 x = execute(a[0]); 794 s1 = getsval(x); 795 y = execute(a[1]); 796 s2 = getsval(y); 797 798 z = gettemp(); 799 for (p1 = s1; *p1 != '\0'; p1++) { 800 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 801 continue; 802 if (*p2 == '\0') { 803 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ 804 break; 805 } 806 } 807 tempfree(x); 808 tempfree(y); 809 setfval(z, v); 810 return(z); 811 } 812 813 #define MAXNUMSIZE 50 814 815 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 816 { 817 char *fmt; 818 char *p, *t; 819 const char *os; 820 Cell *x; 821 int flag = 0, n; 822 int fmtwd; /* format width */ 823 int fmtsz = recsize; 824 char *buf = *pbuf; 825 int bufsize = *pbufsize; 826 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 827 #define BUFSZ(a) (bufsize - ((a) - buf)) 828 829 static bool first = true; 830 static bool have_a_format = false; 831 832 if (first) { 833 char xbuf[100]; 834 835 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 836 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 837 first = false; 838 } 839 840 os = s; 841 p = buf; 842 if ((fmt = malloc(fmtsz)) == NULL) 843 FATAL("out of memory in format()"); 844 while (*s) { 845 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 846 if (*s != '%') { 847 *p++ = *s++; 848 continue; 849 } 850 if (*(s+1) == '%') { 851 *p++ = '%'; 852 s += 2; 853 continue; 854 } 855 /* have to be real careful in case this is a huge number, eg, %100000d */ 856 fmtwd = atoi(s+1); 857 if (fmtwd < 0) 858 fmtwd = -fmtwd; 859 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 860 for (t = fmt; (*t++ = *s) != '\0'; s++) { 861 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 862 FATAL("format item %.30s... ran format() out of memory", os); 863 /* Ignore size specifiers */ 864 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 865 t--; 866 continue; 867 } 868 if (isalpha((uschar)*s)) 869 break; 870 if (*s == '$') { 871 FATAL("'$' not permitted in awk formats"); 872 } 873 if (*s == '*') { 874 if (a == NULL) { 875 FATAL("not enough args in printf(%s)", os); 876 } 877 x = execute(a); 878 a = a->nnext; 879 snprintf(t - 1, FMTSZ(t - 1), 880 "%d", fmtwd=(int) getfval(x)); 881 if (fmtwd < 0) 882 fmtwd = -fmtwd; 883 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 884 t = fmt + strlen(fmt); 885 tempfree(x); 886 } 887 } 888 *t = '\0'; 889 if (fmtwd < 0) 890 fmtwd = -fmtwd; 891 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 892 switch (*s) { 893 case 'a': case 'A': 894 if (have_a_format) 895 flag = *s; 896 else 897 flag = 'f'; 898 break; 899 case 'f': case 'e': case 'g': case 'E': case 'G': 900 flag = 'f'; 901 break; 902 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 903 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 904 *(t-1) = 'j'; 905 *t = *s; 906 *++t = '\0'; 907 break; 908 case 's': 909 flag = 's'; 910 break; 911 case 'c': 912 flag = 'c'; 913 break; 914 default: 915 WARNING("weird printf conversion %s", fmt); 916 flag = '?'; 917 break; 918 } 919 if (a == NULL) 920 FATAL("not enough args in printf(%s)", os); 921 x = execute(a); 922 a = a->nnext; 923 n = MAXNUMSIZE; 924 if (fmtwd > n) 925 n = fmtwd; 926 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 927 switch (flag) { 928 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 929 t = getsval(x); 930 n = strlen(t); 931 if (fmtwd > n) 932 n = fmtwd; 933 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 934 p += strlen(p); 935 snprintf(p, BUFSZ(p), "%s", t); 936 break; 937 case 'a': 938 case 'A': 939 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 940 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 941 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 942 case 's': 943 t = getsval(x); 944 n = strlen(t); 945 if (fmtwd > n) 946 n = fmtwd; 947 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 948 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); 949 snprintf(p, BUFSZ(p), fmt, t); 950 break; 951 case 'c': 952 if (isnum(x)) { 953 if ((int)getfval(x)) 954 snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); 955 else { 956 *p++ = '\0'; /* explicit null byte */ 957 *p = '\0'; /* next output will start here */ 958 } 959 } else 960 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 961 break; 962 default: 963 FATAL("can't happen: bad conversion %c in format()", flag); 964 } 965 tempfree(x); 966 p += strlen(p); 967 s++; 968 } 969 *p = '\0'; 970 free(fmt); 971 for ( ; a; a = a->nnext) /* evaluate any remaining args */ 972 execute(a); 973 *pbuf = buf; 974 *pbufsize = bufsize; 975 return p - buf; 976 } 977 978 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 979 { 980 Cell *x; 981 Node *y; 982 char *buf; 983 int bufsz=3*recsize; 984 985 if ((buf = malloc(bufsz)) == NULL) 986 FATAL("out of memory in awksprintf"); 987 y = a[0]->nnext; 988 x = execute(a[0]); 989 if (format(&buf, &bufsz, getsval(x), y) == -1) 990 FATAL("sprintf string %.30s... too long. can't happen.", buf); 991 tempfree(x); 992 x = gettemp(); 993 x->sval = buf; 994 x->tval = STR; 995 return(x); 996 } 997 998 Cell *awkprintf(Node **a, int n) /* printf */ 999 { /* a[0] is list of args, starting with format string */ 1000 /* a[1] is redirection operator, a[2] is redirection file */ 1001 FILE *fp; 1002 Cell *x; 1003 Node *y; 1004 char *buf; 1005 int len; 1006 int bufsz=3*recsize; 1007 1008 if ((buf = malloc(bufsz)) == NULL) 1009 FATAL("out of memory in awkprintf"); 1010 y = a[0]->nnext; 1011 x = execute(a[0]); 1012 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1013 FATAL("printf string %.30s... too long. can't happen.", buf); 1014 tempfree(x); 1015 if (a[1] == NULL) { 1016 /* fputs(buf, stdout); */ 1017 fwrite(buf, len, 1, stdout); 1018 if (ferror(stdout)) 1019 FATAL("write error on stdout"); 1020 } else { 1021 fp = redirect(ptoi(a[1]), a[2]); 1022 /* fputs(buf, fp); */ 1023 fwrite(buf, len, 1, fp); 1024 fflush(fp); 1025 if (ferror(fp)) 1026 FATAL("write error on %s", filename(fp)); 1027 } 1028 free(buf); 1029 return(True); 1030 } 1031 1032 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1033 { 1034 Awkfloat i, j = 0; 1035 double v; 1036 Cell *x, *y, *z; 1037 1038 x = execute(a[0]); 1039 i = getfval(x); 1040 tempfree(x); 1041 if (n != UMINUS && n != UPLUS) { 1042 y = execute(a[1]); 1043 j = getfval(y); 1044 tempfree(y); 1045 } 1046 z = gettemp(); 1047 switch (n) { 1048 case ADD: 1049 i += j; 1050 break; 1051 case MINUS: 1052 i -= j; 1053 break; 1054 case MULT: 1055 i *= j; 1056 break; 1057 case DIVIDE: 1058 if (j == 0) 1059 FATAL("division by zero"); 1060 i /= j; 1061 break; 1062 case MOD: 1063 if (j == 0) 1064 FATAL("division by zero in mod"); 1065 modf(i/j, &v); 1066 i = i - j * v; 1067 break; 1068 case UMINUS: 1069 i = -i; 1070 break; 1071 case UPLUS: /* handled by getfval(), above */ 1072 break; 1073 case POWER: 1074 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1075 i = ipow(i, (int) j); 1076 else { 1077 errno = 0; 1078 i = errcheck(pow(i, j), "pow"); 1079 } 1080 break; 1081 default: /* can't happen */ 1082 FATAL("illegal arithmetic operator %d", n); 1083 } 1084 setfval(z, i); 1085 return(z); 1086 } 1087 1088 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1089 { 1090 double v; 1091 1092 if (n <= 0) 1093 return 1; 1094 v = ipow(x, n/2); 1095 if (n % 2 == 0) 1096 return v * v; 1097 else 1098 return x * v * v; 1099 } 1100 1101 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1102 { 1103 Cell *x, *z; 1104 int k; 1105 Awkfloat xf; 1106 1107 x = execute(a[0]); 1108 xf = getfval(x); 1109 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1110 if (n == PREINCR || n == PREDECR) { 1111 setfval(x, xf + k); 1112 return(x); 1113 } 1114 z = gettemp(); 1115 setfval(z, xf); 1116 setfval(x, xf + k); 1117 tempfree(x); 1118 return(z); 1119 } 1120 1121 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1122 { /* this is subtle; don't muck with it. */ 1123 Cell *x, *y; 1124 Awkfloat xf, yf; 1125 double v; 1126 1127 y = execute(a[1]); 1128 x = execute(a[0]); 1129 if (n == ASSIGN) { /* ordinary assignment */ 1130 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1131 ; /* self-assignment: leave alone unless it's a field or NF */ 1132 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1133 setsval(x, getsval(y)); 1134 x->fval = getfval(y); 1135 x->tval |= NUM; 1136 } 1137 else if (isstr(y)) 1138 setsval(x, getsval(y)); 1139 else if (isnum(y)) 1140 setfval(x, getfval(y)); 1141 else 1142 funnyvar(y, "read value of"); 1143 tempfree(y); 1144 return(x); 1145 } 1146 xf = getfval(x); 1147 yf = getfval(y); 1148 switch (n) { 1149 case ADDEQ: 1150 xf += yf; 1151 break; 1152 case SUBEQ: 1153 xf -= yf; 1154 break; 1155 case MULTEQ: 1156 xf *= yf; 1157 break; 1158 case DIVEQ: 1159 if (yf == 0) 1160 FATAL("division by zero in /="); 1161 xf /= yf; 1162 break; 1163 case MODEQ: 1164 if (yf == 0) 1165 FATAL("division by zero in %%="); 1166 modf(xf/yf, &v); 1167 xf = xf - yf * v; 1168 break; 1169 case POWEQ: 1170 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1171 xf = ipow(xf, (int) yf); 1172 else { 1173 errno = 0; 1174 xf = errcheck(pow(xf, yf), "pow"); 1175 } 1176 break; 1177 default: 1178 FATAL("illegal assignment operator %d", n); 1179 break; 1180 } 1181 tempfree(y); 1182 setfval(x, xf); 1183 return(x); 1184 } 1185 1186 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1187 { 1188 Cell *x, *y, *z; 1189 int n1, n2; 1190 char *s = NULL; 1191 int ssz = 0; 1192 1193 x = execute(a[0]); 1194 n1 = strlen(getsval(x)); 1195 adjbuf(&s, &ssz, n1, recsize, 0, "cat1"); 1196 memcpy(s, x->sval, n1); 1197 1198 y = execute(a[1]); 1199 n2 = strlen(getsval(y)); 1200 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1201 memcpy(s + n1, y->sval, n2); 1202 s[n1 + n2] = '\0'; 1203 1204 tempfree(x); 1205 tempfree(y); 1206 1207 z = gettemp(); 1208 z->sval = s; 1209 z->tval = STR; 1210 1211 return(z); 1212 } 1213 1214 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1215 { 1216 Cell *x; 1217 1218 if (a[0] == NULL) 1219 x = execute(a[1]); 1220 else { 1221 x = execute(a[0]); 1222 if (istrue(x)) { 1223 tempfree(x); 1224 x = execute(a[1]); 1225 } 1226 } 1227 return x; 1228 } 1229 1230 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1231 { 1232 Cell *x; 1233 int pair; 1234 1235 pair = ptoi(a[3]); 1236 if (pairstack[pair] == 0) { 1237 x = execute(a[0]); 1238 if (istrue(x)) 1239 pairstack[pair] = 1; 1240 tempfree(x); 1241 } 1242 if (pairstack[pair] == 1) { 1243 x = execute(a[1]); 1244 if (istrue(x)) 1245 pairstack[pair] = 0; 1246 tempfree(x); 1247 x = execute(a[2]); 1248 return(x); 1249 } 1250 return(False); 1251 } 1252 1253 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1254 { 1255 Cell *x = NULL, *y, *ap; 1256 const char *s, *origs, *t; 1257 const char *fs = NULL; 1258 char *origfs = NULL; 1259 int sep; 1260 char temp, num[50]; 1261 int n, tempstat, arg3type; 1262 1263 y = execute(a[0]); /* source string */ 1264 origs = s = strdup(getsval(y)); 1265 arg3type = ptoi(a[3]); 1266 if (a[2] == NULL) /* fs string */ 1267 fs = getsval(fsloc); 1268 else if (arg3type == STRING) { /* split(str,arr,"string") */ 1269 x = execute(a[2]); 1270 fs = origfs = strdup(getsval(x)); 1271 tempfree(x); 1272 } else if (arg3type == REGEXPR) 1273 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1274 else 1275 FATAL("illegal type of split"); 1276 sep = *fs; 1277 ap = execute(a[1]); /* array name */ 1278 freesymtab(ap); 1279 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1280 ap->tval &= ~STR; 1281 ap->tval |= ARR; 1282 ap->sval = (char *) makesymtab(NSYMTAB); 1283 1284 n = 0; 1285 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1286 /* split(s, a, //); have to arrange that it looks like empty sep */ 1287 arg3type = 0; 1288 fs = ""; 1289 sep = 0; 1290 } 1291 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1292 fa *pfa; 1293 if (arg3type == REGEXPR) { /* it's ready already */ 1294 pfa = (fa *) a[2]; 1295 } else { 1296 pfa = makedfa(fs, 1); 1297 } 1298 if (nematch(pfa,s)) { 1299 tempstat = pfa->initstat; 1300 pfa->initstat = 2; 1301 do { 1302 n++; 1303 snprintf(num, sizeof(num), "%d", n); 1304 temp = *patbeg; 1305 setptr(patbeg, '\0'); 1306 if (is_number(s)) 1307 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); 1308 else 1309 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1310 setptr(patbeg, temp); 1311 s = patbeg + patlen; 1312 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1313 n++; 1314 snprintf(num, sizeof(num), "%d", n); 1315 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1316 pfa->initstat = tempstat; 1317 goto spdone; 1318 } 1319 } while (nematch(pfa,s)); 1320 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1321 /* cf gsub and refldbld */ 1322 } 1323 n++; 1324 snprintf(num, sizeof(num), "%d", n); 1325 if (is_number(s)) 1326 setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); 1327 else 1328 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1329 spdone: 1330 pfa = NULL; 1331 } else if (sep == ' ') { 1332 for (n = 0; ; ) { 1333 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1334 while (ISWS(*s)) 1335 s++; 1336 if (*s == '\0') 1337 break; 1338 n++; 1339 t = s; 1340 do 1341 s++; 1342 while (*s != '\0' && !ISWS(*s)); 1343 temp = *s; 1344 setptr(s, '\0'); 1345 snprintf(num, sizeof(num), "%d", n); 1346 if (is_number(t)) 1347 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); 1348 else 1349 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1350 setptr(s, temp); 1351 if (*s != '\0') 1352 s++; 1353 } 1354 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1355 for (n = 0; *s != '\0'; s++) { 1356 char buf[2]; 1357 n++; 1358 snprintf(num, sizeof(num), "%d", n); 1359 buf[0] = *s; 1360 buf[1] = '\0'; 1361 if (isdigit((uschar)buf[0])) 1362 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1363 else 1364 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1365 } 1366 } else if (*s != '\0') { 1367 for (;;) { 1368 n++; 1369 t = s; 1370 while (*s != sep && *s != '\n' && *s != '\0') 1371 s++; 1372 temp = *s; 1373 setptr(s, '\0'); 1374 snprintf(num, sizeof(num), "%d", n); 1375 if (is_number(t)) 1376 setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); 1377 else 1378 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1379 setptr(s, temp); 1380 if (*s++ == '\0') 1381 break; 1382 } 1383 } 1384 tempfree(ap); 1385 tempfree(y); 1386 xfree(origs); 1387 xfree(origfs); 1388 x = gettemp(); 1389 x->tval = NUM; 1390 x->fval = n; 1391 return(x); 1392 } 1393 1394 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1395 { 1396 Cell *x; 1397 1398 x = execute(a[0]); 1399 if (istrue(x)) { 1400 tempfree(x); 1401 x = execute(a[1]); 1402 } else { 1403 tempfree(x); 1404 x = execute(a[2]); 1405 } 1406 return(x); 1407 } 1408 1409 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1410 { 1411 Cell *x; 1412 1413 x = execute(a[0]); 1414 if (istrue(x)) { 1415 tempfree(x); 1416 x = execute(a[1]); 1417 } else if (a[2] != NULL) { 1418 tempfree(x); 1419 x = execute(a[2]); 1420 } 1421 return(x); 1422 } 1423 1424 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1425 { 1426 Cell *x; 1427 1428 for (;;) { 1429 x = execute(a[0]); 1430 if (!istrue(x)) 1431 return(x); 1432 tempfree(x); 1433 x = execute(a[1]); 1434 if (isbreak(x)) { 1435 x = True; 1436 return(x); 1437 } 1438 if (isnext(x) || isexit(x) || isret(x)) 1439 return(x); 1440 tempfree(x); 1441 } 1442 } 1443 1444 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1445 { 1446 Cell *x; 1447 1448 for (;;) { 1449 x = execute(a[0]); 1450 if (isbreak(x)) 1451 return True; 1452 if (isnext(x) || isexit(x) || isret(x)) 1453 return(x); 1454 tempfree(x); 1455 x = execute(a[1]); 1456 if (!istrue(x)) 1457 return(x); 1458 tempfree(x); 1459 } 1460 } 1461 1462 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1463 { 1464 Cell *x; 1465 1466 x = execute(a[0]); 1467 tempfree(x); 1468 for (;;) { 1469 if (a[1]!=NULL) { 1470 x = execute(a[1]); 1471 if (!istrue(x)) return(x); 1472 else tempfree(x); 1473 } 1474 x = execute(a[3]); 1475 if (isbreak(x)) /* turn off break */ 1476 return True; 1477 if (isnext(x) || isexit(x) || isret(x)) 1478 return(x); 1479 tempfree(x); 1480 x = execute(a[2]); 1481 tempfree(x); 1482 } 1483 } 1484 1485 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1486 { 1487 Cell *x, *vp, *arrayp, *cp, *ncp; 1488 Array *tp; 1489 int i; 1490 1491 vp = execute(a[0]); 1492 arrayp = execute(a[1]); 1493 if (!isarr(arrayp)) { 1494 return True; 1495 } 1496 tp = (Array *) arrayp->sval; 1497 tempfree(arrayp); 1498 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1499 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1500 setsval(vp, cp->nval); 1501 ncp = cp->cnext; 1502 x = execute(a[2]); 1503 if (isbreak(x)) { 1504 tempfree(vp); 1505 return True; 1506 } 1507 if (isnext(x) || isexit(x) || isret(x)) { 1508 tempfree(vp); 1509 return(x); 1510 } 1511 tempfree(x); 1512 } 1513 } 1514 return True; 1515 } 1516 1517 static char *nawk_convert(const char *s, int (*fun_c)(int), 1518 wint_t (*fun_wc)(wint_t)) 1519 { 1520 char *buf = NULL; 1521 char *pbuf = NULL; 1522 const char *ps = NULL; 1523 size_t n = 0; 1524 mbstate_t mbs, mbs2; 1525 wchar_t wc; 1526 size_t sz = MB_CUR_MAX; 1527 1528 if (sz == 1) { 1529 buf = tostring(s); 1530 1531 for (pbuf = buf; *pbuf; pbuf++) 1532 *pbuf = fun_c((uschar)*pbuf); 1533 1534 return buf; 1535 } else { 1536 /* upper/lower character may be shorter/longer */ 1537 buf = tostringN(s, strlen(s) * sz + 1); 1538 1539 memset(&mbs, 0, sizeof(mbs)); 1540 memset(&mbs2, 0, sizeof(mbs2)); 1541 1542 ps = s; 1543 pbuf = buf; 1544 while (n = mbrtowc(&wc, ps, sz, &mbs), 1545 n > 0 && n != (size_t)-1 && n != (size_t)-2) 1546 { 1547 ps += n; 1548 1549 n = wcrtomb(pbuf, fun_wc(wc), &mbs2); 1550 if (n == (size_t)-1) 1551 FATAL("illegal wide character %s", s); 1552 1553 pbuf += n; 1554 } 1555 1556 *pbuf = '\0'; 1557 1558 if (n) 1559 FATAL("illegal byte sequence %s", s); 1560 1561 return buf; 1562 } 1563 } 1564 1565 static char *nawk_toupper(const char *s) 1566 { 1567 return nawk_convert(s, toupper, towupper); 1568 } 1569 1570 static char *nawk_tolower(const char *s) 1571 { 1572 return nawk_convert(s, tolower, towlower); 1573 } 1574 1575 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 1576 { 1577 Cell *x, *y; 1578 Awkfloat u; 1579 int t; 1580 Awkfloat tmp; 1581 char *buf; 1582 Node *nextarg; 1583 FILE *fp; 1584 int status = 0; 1585 1586 t = ptoi(a[0]); 1587 x = execute(a[1]); 1588 nextarg = a[1]->nnext; 1589 switch (t) { 1590 case FLENGTH: 1591 if (isarr(x)) 1592 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 1593 else 1594 u = strlen(getsval(x)); 1595 break; 1596 case FLOG: 1597 errno = 0; 1598 u = errcheck(log(getfval(x)), "log"); 1599 break; 1600 case FINT: 1601 modf(getfval(x), &u); break; 1602 case FEXP: 1603 errno = 0; 1604 u = errcheck(exp(getfval(x)), "exp"); 1605 break; 1606 case FSQRT: 1607 errno = 0; 1608 u = errcheck(sqrt(getfval(x)), "sqrt"); 1609 break; 1610 case FSIN: 1611 u = sin(getfval(x)); break; 1612 case FCOS: 1613 u = cos(getfval(x)); break; 1614 case FATAN: 1615 if (nextarg == NULL) { 1616 WARNING("atan2 requires two arguments; returning 1.0"); 1617 u = 1.0; 1618 } else { 1619 y = execute(a[1]->nnext); 1620 u = atan2(getfval(x), getfval(y)); 1621 tempfree(y); 1622 nextarg = nextarg->nnext; 1623 } 1624 break; 1625 case FSYSTEM: 1626 fflush(stdout); /* in case something is buffered already */ 1627 status = system(getsval(x)); 1628 u = status; 1629 if (status != -1) { 1630 if (WIFEXITED(status)) { 1631 u = WEXITSTATUS(status); 1632 } else if (WIFSIGNALED(status)) { 1633 u = WTERMSIG(status) + 256; 1634 #ifdef WCOREDUMP 1635 if (WCOREDUMP(status)) 1636 u += 256; 1637 #endif 1638 } else /* something else?!? */ 1639 u = 0; 1640 } 1641 break; 1642 case FRAND: 1643 /* random() returns numbers in [0..2^31-1] 1644 * in order to get a number in [0, 1), divide it by 2^31 1645 */ 1646 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 1647 break; 1648 case FSRAND: 1649 if (isrec(x)) /* no argument provided */ 1650 u = time((time_t *)0); 1651 else 1652 u = getfval(x); 1653 tmp = u; 1654 srandom((unsigned long) u); 1655 u = srand_seed; 1656 srand_seed = tmp; 1657 break; 1658 case FTOUPPER: 1659 case FTOLOWER: 1660 if (t == FTOUPPER) 1661 buf = nawk_toupper(getsval(x)); 1662 else 1663 buf = nawk_tolower(getsval(x)); 1664 tempfree(x); 1665 x = gettemp(); 1666 setsval(x, buf); 1667 free(buf); 1668 return x; 1669 case FFLUSH: 1670 if (isrec(x) || strlen(getsval(x)) == 0) { 1671 flush_all(); /* fflush() or fflush("") -> all */ 1672 u = 0; 1673 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 1674 u = EOF; 1675 else 1676 u = fflush(fp); 1677 break; 1678 default: /* can't happen */ 1679 FATAL("illegal function type %d", t); 1680 break; 1681 } 1682 tempfree(x); 1683 x = gettemp(); 1684 setfval(x, u); 1685 if (nextarg != NULL) { 1686 WARNING("warning: function has too many arguments"); 1687 for ( ; nextarg; nextarg = nextarg->nnext) 1688 execute(nextarg); 1689 } 1690 return(x); 1691 } 1692 1693 Cell *printstat(Node **a, int n) /* print a[0] */ 1694 { 1695 Node *x; 1696 Cell *y; 1697 FILE *fp; 1698 1699 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 1700 fp = stdout; 1701 else 1702 fp = redirect(ptoi(a[1]), a[2]); 1703 for (x = a[0]; x != NULL; x = x->nnext) { 1704 y = execute(x); 1705 fputs(getpssval(y), fp); 1706 tempfree(y); 1707 if (x->nnext == NULL) 1708 fputs(getsval(orsloc), fp); 1709 else 1710 fputs(getsval(ofsloc), fp); 1711 } 1712 if (a[1] != NULL) 1713 fflush(fp); 1714 if (ferror(fp)) 1715 FATAL("write error on %s", filename(fp)); 1716 return(True); 1717 } 1718 1719 Cell *nullproc(Node **a, int n) 1720 { 1721 return 0; 1722 } 1723 1724 1725 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 1726 { 1727 FILE *fp; 1728 Cell *x; 1729 char *fname; 1730 1731 x = execute(b); 1732 fname = getsval(x); 1733 fp = openfile(a, fname, NULL); 1734 if (fp == NULL) 1735 FATAL("can't open file %s", fname); 1736 tempfree(x); 1737 return fp; 1738 } 1739 1740 struct files { 1741 FILE *fp; 1742 const char *fname; 1743 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 1744 } *files; 1745 1746 size_t nfiles; 1747 1748 static void stdinit(void) /* in case stdin, etc., are not constants */ 1749 { 1750 nfiles = FOPEN_MAX; 1751 files = calloc(nfiles, sizeof(*files)); 1752 if (files == NULL) 1753 FATAL("can't allocate file memory for %zu files", nfiles); 1754 files[0].fp = stdin; 1755 files[0].fname = "/dev/stdin"; 1756 files[0].mode = LT; 1757 files[1].fp = stdout; 1758 files[1].fname = "/dev/stdout"; 1759 files[1].mode = GT; 1760 files[2].fp = stderr; 1761 files[2].fname = "/dev/stderr"; 1762 files[2].mode = GT; 1763 } 1764 1765 FILE *openfile(int a, const char *us, bool *pnewflag) 1766 { 1767 const char *s = us; 1768 size_t i; 1769 int m; 1770 FILE *fp = NULL; 1771 1772 if (*s == '\0') 1773 FATAL("null file name in print or getline"); 1774 for (i = 0; i < nfiles; i++) 1775 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 1776 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 1777 a == FFLUSH)) { 1778 if (pnewflag) 1779 *pnewflag = false; 1780 return files[i].fp; 1781 } 1782 if (a == FFLUSH) /* didn't find it, so don't create it! */ 1783 return NULL; 1784 1785 for (i = 0; i < nfiles; i++) 1786 if (files[i].fp == NULL) 1787 break; 1788 if (i >= nfiles) { 1789 struct files *nf; 1790 size_t nnf = nfiles + FOPEN_MAX; 1791 nf = realloc(files, nnf * sizeof(*nf)); 1792 if (nf == NULL) 1793 FATAL("cannot grow files for %s and %zu files", s, nnf); 1794 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 1795 nfiles = nnf; 1796 files = nf; 1797 } 1798 fflush(stdout); /* force a semblance of order */ 1799 m = a; 1800 if (a == GT) { 1801 fp = fopen(s, "w"); 1802 } else if (a == APPEND) { 1803 fp = fopen(s, "a"); 1804 m = GT; /* so can mix > and >> */ 1805 } else if (a == '|') { /* output pipe */ 1806 fp = popen(s, "w"); 1807 } else if (a == LE) { /* input pipe */ 1808 fp = popen(s, "r"); 1809 } else if (a == LT) { /* getline <file */ 1810 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 1811 } else /* can't happen */ 1812 FATAL("illegal redirection %d", a); 1813 if (fp != NULL) { 1814 files[i].fname = tostring(s); 1815 files[i].fp = fp; 1816 files[i].mode = m; 1817 if (pnewflag) 1818 *pnewflag = true; 1819 if (fp != stdin && fp != stdout && fp != stderr) 1820 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 1821 } 1822 return fp; 1823 } 1824 1825 const char *filename(FILE *fp) 1826 { 1827 size_t i; 1828 1829 for (i = 0; i < nfiles; i++) 1830 if (fp == files[i].fp) 1831 return files[i].fname; 1832 return "???"; 1833 } 1834 1835 Cell *closefile(Node **a, int n) 1836 { 1837 Cell *x; 1838 size_t i; 1839 bool stat; 1840 1841 x = execute(a[0]); 1842 getsval(x); 1843 stat = true; 1844 for (i = 0; i < nfiles; i++) { 1845 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 1846 continue; 1847 if (ferror(files[i].fp)) 1848 FATAL("i/o error occurred on %s", files[i].fname); 1849 if (files[i].mode == '|' || files[i].mode == LE) 1850 stat = pclose(files[i].fp) == -1; 1851 else 1852 stat = fclose(files[i].fp) == EOF; 1853 if (stat) 1854 FATAL("i/o error occurred closing %s", files[i].fname); 1855 if (i > 2) /* don't do /dev/std... */ 1856 xfree(files[i].fname); 1857 files[i].fname = NULL; /* watch out for ref thru this */ 1858 files[i].fp = NULL; 1859 } 1860 tempfree(x); 1861 x = gettemp(); 1862 setfval(x, (Awkfloat) (stat ? -1 : 0)); 1863 return(x); 1864 } 1865 1866 void closeall(void) 1867 { 1868 size_t i; 1869 bool stat = false; 1870 1871 for (i = 0; i < nfiles; i++) { 1872 if (! files[i].fp) 1873 continue; 1874 if (ferror(files[i].fp)) 1875 FATAL( "i/o error occurred on %s", files[i].fname ); 1876 if (files[i].mode == '|' || files[i].mode == LE) 1877 stat = pclose(files[i].fp) == -1; 1878 else 1879 stat = fclose(files[i].fp) == EOF; 1880 if (stat) 1881 FATAL( "i/o error occurred while closing %s", files[i].fname ); 1882 } 1883 } 1884 1885 static void flush_all(void) 1886 { 1887 size_t i; 1888 1889 for (i = 0; i < nfiles; i++) 1890 if (files[i].fp) 1891 fflush(files[i].fp); 1892 } 1893 1894 void backsub(char **pb_ptr, const char **sptr_ptr); 1895 1896 Cell *sub(Node **a, int nnn) /* substitute command */ 1897 { 1898 const char *sptr, *q; 1899 Cell *x, *y, *result; 1900 char *t, *buf, *pb; 1901 fa *pfa; 1902 int bufsz = recsize; 1903 1904 if ((buf = malloc(bufsz)) == NULL) 1905 FATAL("out of memory in sub"); 1906 x = execute(a[3]); /* target string */ 1907 t = getsval(x); 1908 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 1909 pfa = (fa *) a[1]; /* regular expression */ 1910 else { 1911 y = execute(a[1]); 1912 pfa = makedfa(getsval(y), 1); 1913 tempfree(y); 1914 } 1915 y = execute(a[2]); /* replacement string */ 1916 result = False; 1917 if (pmatch(pfa, t)) { 1918 sptr = t; 1919 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 1920 pb = buf; 1921 while (sptr < patbeg) 1922 *pb++ = *sptr++; 1923 sptr = getsval(y); 1924 while (*sptr != '\0') { 1925 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 1926 if (*sptr == '\\') { 1927 backsub(&pb, &sptr); 1928 } else if (*sptr == '&') { 1929 sptr++; 1930 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 1931 for (q = patbeg; q < patbeg+patlen; ) 1932 *pb++ = *q++; 1933 } else 1934 *pb++ = *sptr++; 1935 } 1936 *pb = '\0'; 1937 if (pb > buf + bufsz) 1938 FATAL("sub result1 %.30s too big; can't happen", buf); 1939 sptr = patbeg + patlen; 1940 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 1941 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 1942 while ((*pb++ = *sptr++) != '\0') 1943 continue; 1944 } 1945 if (pb > buf + bufsz) 1946 FATAL("sub result2 %.30s too big; can't happen", buf); 1947 setsval(x, buf); /* BUG: should be able to avoid copy */ 1948 result = True; 1949 } 1950 tempfree(x); 1951 tempfree(y); 1952 free(buf); 1953 return result; 1954 } 1955 1956 Cell *gsub(Node **a, int nnn) /* global substitute */ 1957 { 1958 Cell *x, *y; 1959 char *rptr, *pb; 1960 const char *q, *t, *sptr; 1961 char *buf; 1962 fa *pfa; 1963 int mflag, tempstat, num; 1964 int bufsz = recsize; 1965 1966 if ((buf = malloc(bufsz)) == NULL) 1967 FATAL("out of memory in gsub"); 1968 mflag = 0; /* if mflag == 0, can replace empty string */ 1969 num = 0; 1970 x = execute(a[3]); /* target string */ 1971 t = getsval(x); 1972 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 1973 pfa = (fa *) a[1]; /* regular expression */ 1974 else { 1975 y = execute(a[1]); 1976 pfa = makedfa(getsval(y), 1); 1977 tempfree(y); 1978 } 1979 y = execute(a[2]); /* replacement string */ 1980 if (pmatch(pfa, t)) { 1981 tempstat = pfa->initstat; 1982 pfa->initstat = 2; 1983 pb = buf; 1984 rptr = getsval(y); 1985 do { 1986 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 1987 if (mflag == 0) { /* can replace empty */ 1988 num++; 1989 sptr = rptr; 1990 while (*sptr != '\0') { 1991 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 1992 if (*sptr == '\\') { 1993 backsub(&pb, &sptr); 1994 } else if (*sptr == '&') { 1995 sptr++; 1996 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 1997 for (q = patbeg; q < patbeg+patlen; ) 1998 *pb++ = *q++; 1999 } else 2000 *pb++ = *sptr++; 2001 } 2002 } 2003 if (*t == '\0') /* at end */ 2004 goto done; 2005 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2006 *pb++ = *t++; 2007 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2008 FATAL("gsub result0 %.30s too big; can't happen", buf); 2009 mflag = 0; 2010 } 2011 else { /* matched nonempty string */ 2012 num++; 2013 sptr = t; 2014 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2015 while (sptr < patbeg) 2016 *pb++ = *sptr++; 2017 sptr = rptr; 2018 while (*sptr != '\0') { 2019 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2020 if (*sptr == '\\') { 2021 backsub(&pb, &sptr); 2022 } else if (*sptr == '&') { 2023 sptr++; 2024 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2025 for (q = patbeg; q < patbeg+patlen; ) 2026 *pb++ = *q++; 2027 } else 2028 *pb++ = *sptr++; 2029 } 2030 t = patbeg + patlen; 2031 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2032 goto done; 2033 if (pb > buf + bufsz) 2034 FATAL("gsub result1 %.30s too big; can't happen", buf); 2035 mflag = 1; 2036 } 2037 } while (pmatch(pfa,t)); 2038 sptr = t; 2039 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2040 while ((*pb++ = *sptr++) != '\0') 2041 continue; 2042 done: if (pb < buf + bufsz) 2043 *pb = '\0'; 2044 else if (*(pb-1) != '\0') 2045 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2046 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2047 pfa->initstat = tempstat; 2048 } 2049 tempfree(x); 2050 tempfree(y); 2051 x = gettemp(); 2052 x->tval = NUM; 2053 x->fval = num; 2054 free(buf); 2055 return(x); 2056 } 2057 2058 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2059 { /* sptr[0] == '\\' */ 2060 char *pb = *pb_ptr; 2061 const char *sptr = *sptr_ptr; 2062 static bool first = true; 2063 static bool do_posix = false; 2064 2065 if (first) { 2066 first = false; 2067 do_posix = (getenv("POSIXLY_CORRECT") != NULL); 2068 } 2069 2070 if (sptr[1] == '\\') { 2071 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2072 *pb++ = '\\'; 2073 *pb++ = '&'; 2074 sptr += 4; 2075 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2076 *pb++ = '\\'; 2077 sptr += 2; 2078 } else if (do_posix) { /* \\x -> \x */ 2079 sptr++; 2080 *pb++ = *sptr++; 2081 } else { /* \\x -> \\x */ 2082 *pb++ = *sptr++; 2083 *pb++ = *sptr++; 2084 } 2085 } else if (sptr[1] == '&') { /* literal & */ 2086 sptr++; 2087 *pb++ = *sptr++; 2088 } else /* literal \ */ 2089 *pb++ = *sptr++; 2090 2091 *pb_ptr = pb; 2092 *sptr_ptr = sptr; 2093 } 2094