1 /* $OpenBSD: run.c,v 1.69 2020/12/09 20:00:11 millert Exp $ */ 2 /**************************************************************** 3 Copyright (C) Lucent Technologies 1997 4 All Rights Reserved 5 6 Permission to use, copy, modify, and distribute this software and 7 its documentation for any purpose and without fee is hereby 8 granted, provided that the above copyright notice appear in all 9 copies and that both that the copyright notice and this 10 permission notice and warranty disclaimer appear in supporting 11 documentation, and that the name Lucent Technologies or any of 12 its entities not be used in advertising or publicity pertaining 13 to distribution of the software without specific, written prior 14 permission. 15 16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 23 THIS SOFTWARE. 24 ****************************************************************/ 25 26 #define DEBUG 27 #include <stdio.h> 28 #include <ctype.h> 29 #include <errno.h> 30 #include <wchar.h> 31 #include <wctype.h> 32 #include <fcntl.h> 33 #include <setjmp.h> 34 #include <limits.h> 35 #include <math.h> 36 #include <string.h> 37 #include <stdlib.h> 38 #include <time.h> 39 #include <sys/types.h> 40 #include <sys/wait.h> 41 #include "awk.h" 42 #include "awkgram.tab.h" 43 44 static void stdinit(void); 45 static void flush_all(void); 46 47 #if 1 48 #define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) 49 #else 50 void tempfree(Cell *p) { 51 if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { 52 WARNING("bad csub %d in Cell %d %s", 53 p->csub, p->ctype, p->sval); 54 } 55 if (istemp(p)) 56 tfree(p); 57 } 58 #endif 59 60 /* do we really need these? */ 61 /* #ifdef _NFILE */ 62 /* #ifndef FOPEN_MAX */ 63 /* #define FOPEN_MAX _NFILE */ 64 /* #endif */ 65 /* #endif */ 66 /* */ 67 /* #ifndef FOPEN_MAX */ 68 /* #define FOPEN_MAX 40 */ /* max number of open files */ 69 /* #endif */ 70 /* */ 71 /* #ifndef RAND_MAX */ 72 /* #define RAND_MAX 32767 */ /* all that ansi guarantees */ 73 /* #endif */ 74 75 jmp_buf env; 76 extern int pairstack[]; 77 extern Awkfloat srand_seed; 78 79 Node *winner = NULL; /* root of parse tree */ 80 Cell *tmps; /* free temporary cells for execution */ 81 82 static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; 83 Cell *True = &truecell; 84 static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; 85 Cell *False = &falsecell; 86 static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; 87 Cell *jbreak = &breakcell; 88 static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; 89 Cell *jcont = &contcell; 90 static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; 91 Cell *jnext = &nextcell; 92 static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; 93 Cell *jnextfile = &nextfilecell; 94 static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; 95 Cell *jexit = &exitcell; 96 static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; 97 Cell *jret = &retcell; 98 static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 99 100 Node *curnode = NULL; /* the node being executed, for debugging */ 101 102 /* buffer memory management */ 103 int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, 104 const char *whatrtn) 105 /* pbuf: address of pointer to buffer being managed 106 * psiz: address of buffer size variable 107 * minlen: minimum length of buffer needed 108 * quantum: buffer size quantum 109 * pbptr: address of movable pointer into buffer, or 0 if none 110 * whatrtn: name of the calling routine if failure should cause fatal error 111 * 112 * return 0 for realloc failure, !=0 for success 113 */ 114 { 115 if (minlen > *psiz) { 116 char *tbuf; 117 int rminlen = quantum ? minlen % quantum : 0; 118 int boff = pbptr ? *pbptr - *pbuf : 0; 119 /* round up to next multiple of quantum */ 120 if (rminlen) 121 minlen += quantum - rminlen; 122 tbuf = (char *) realloc(*pbuf, minlen); 123 DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); 124 if (tbuf == NULL) { 125 if (whatrtn) 126 FATAL("out of memory in %s", whatrtn); 127 return 0; 128 } 129 *pbuf = tbuf; 130 *psiz = minlen; 131 if (pbptr) 132 *pbptr = tbuf + boff; 133 } 134 return 1; 135 } 136 137 void run(Node *a) /* execution of parse tree starts here */ 138 { 139 140 stdinit(); 141 execute(a); 142 closeall(); 143 } 144 145 Cell *execute(Node *u) /* execute a node of the parse tree */ 146 { 147 Cell *(*proc)(Node **, int); 148 Cell *x; 149 Node *a; 150 151 if (u == NULL) 152 return(True); 153 for (a = u; ; a = a->nnext) { 154 curnode = a; 155 if (isvalue(a)) { 156 x = (Cell *) (a->narg[0]); 157 if (isfld(x) && !donefld) 158 fldbld(); 159 else if (isrec(x) && !donerec) 160 recbld(); 161 return(x); 162 } 163 if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ 164 FATAL("illegal statement"); 165 proc = proctab[a->nobj-FIRSTTOKEN]; 166 x = (*proc)(a->narg, a->nobj); 167 if (isfld(x) && !donefld) 168 fldbld(); 169 else if (isrec(x) && !donerec) 170 recbld(); 171 if (isexpr(a)) 172 return(x); 173 if (isjump(x)) 174 return(x); 175 if (a->nnext == NULL) 176 return(x); 177 tempfree(x); 178 } 179 } 180 181 182 Cell *program(Node **a, int n) /* execute an awk program */ 183 { /* a[0] = BEGIN, a[1] = body, a[2] = END */ 184 Cell *x; 185 186 if (setjmp(env) != 0) 187 goto ex; 188 if (a[0]) { /* BEGIN */ 189 x = execute(a[0]); 190 if (isexit(x)) 191 return(True); 192 if (isjump(x)) 193 FATAL("illegal break, continue, next or nextfile from BEGIN"); 194 tempfree(x); 195 } 196 if (a[1] || a[2]) 197 while (getrec(&record, &recsize, true) > 0) { 198 x = execute(a[1]); 199 if (isexit(x)) 200 break; 201 tempfree(x); 202 } 203 ex: 204 if (setjmp(env) != 0) /* handles exit within END */ 205 goto ex1; 206 if (a[2]) { /* END */ 207 x = execute(a[2]); 208 if (isbreak(x) || isnext(x) || iscont(x)) 209 FATAL("illegal break, continue, next or nextfile from END"); 210 tempfree(x); 211 } 212 ex1: 213 return(True); 214 } 215 216 struct Frame { /* stack frame for awk function calls */ 217 int nargs; /* number of arguments in this call */ 218 Cell *fcncell; /* pointer to Cell for function */ 219 Cell **args; /* pointer to array of arguments after execute */ 220 Cell *retval; /* return value */ 221 }; 222 223 #define NARGS 50 /* max args in a call */ 224 225 struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ 226 int nframe = 0; /* number of frames allocated */ 227 struct Frame *frp = NULL; /* frame pointer. bottom level unused */ 228 229 Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ 230 { 231 static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; 232 int i, ncall, ndef; 233 int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ 234 Node *x; 235 Cell *args[NARGS], *oargs[NARGS]; /* BUG: fixed size arrays */ 236 Cell *y, *z, *fcn; 237 char *s; 238 239 fcn = execute(a[0]); /* the function itself */ 240 s = fcn->nval; 241 if (!isfcn(fcn)) 242 FATAL("calling undefined function %s", s); 243 if (frame == NULL) { 244 frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); 245 if (frame == NULL) 246 FATAL("out of space for stack frames calling %s", s); 247 } 248 for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ 249 ncall++; 250 ndef = (int) fcn->fval; /* args in defn */ 251 DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); 252 if (ncall > ndef) 253 WARNING("function %s called with %d args, uses only %d", 254 s, ncall, ndef); 255 if (ncall + ndef > NARGS) 256 FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); 257 for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ 258 DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); 259 y = execute(x); 260 oargs[i] = y; 261 DPRINTF("args[%d]: %s %f <%s>, t=%o\n", 262 i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); 263 if (isfcn(y)) 264 FATAL("can't use function %s as argument in %s", y->nval, s); 265 if (isarr(y)) 266 args[i] = y; /* arrays by ref */ 267 else 268 args[i] = copycell(y); 269 tempfree(y); 270 } 271 for ( ; i < ndef; i++) { /* add null args for ones not provided */ 272 args[i] = gettemp(); 273 *args[i] = newcopycell; 274 } 275 frp++; /* now ok to up frame */ 276 if (frp >= frame + nframe) { 277 int dfp = frp - frame; /* old index */ 278 frame = (struct Frame *) reallocarray(frame, (nframe += 100), sizeof(*frame)); 279 if (frame == NULL) 280 FATAL("out of space for stack frames in %s", s); 281 frp = frame + dfp; 282 } 283 frp->fcncell = fcn; 284 frp->args = args; 285 frp->nargs = ndef; /* number defined with (excess are locals) */ 286 frp->retval = gettemp(); 287 288 DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); 289 y = execute((Node *)(fcn->sval)); /* execute body */ 290 DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); 291 292 for (i = 0; i < ndef; i++) { 293 Cell *t = frp->args[i]; 294 if (isarr(t)) { 295 if (t->csub == CCOPY) { 296 if (i >= ncall) { 297 freesymtab(t); 298 t->csub = CTEMP; 299 tempfree(t); 300 } else { 301 oargs[i]->tval = t->tval; 302 oargs[i]->tval &= ~(STR|NUM|DONTFREE); 303 oargs[i]->sval = t->sval; 304 tempfree(t); 305 } 306 } 307 } else if (t != y) { /* kludge to prevent freeing twice */ 308 t->csub = CTEMP; 309 tempfree(t); 310 } else if (t == y && t->csub == CCOPY) { 311 t->csub = CTEMP; 312 tempfree(t); 313 freed = 1; 314 } 315 } 316 tempfree(fcn); 317 if (isexit(y) || isnext(y)) 318 return y; 319 if (freed == 0) { 320 tempfree(y); /* don't free twice! */ 321 } 322 z = frp->retval; /* return value */ 323 DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); 324 frp--; 325 return(z); 326 } 327 328 Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ 329 { 330 Cell *y; 331 332 /* copy is not constant or field */ 333 334 y = gettemp(); 335 y->tval = x->tval & ~(CON|FLD|REC); 336 y->csub = CCOPY; /* prevents freeing until call is over */ 337 y->nval = x->nval; /* BUG? */ 338 if (isstr(x) /* || x->ctype == OCELL */) { 339 y->sval = tostring(x->sval); 340 y->tval &= ~DONTFREE; 341 } else 342 y->tval |= DONTFREE; 343 y->fval = x->fval; 344 return y; 345 } 346 347 Cell *arg(Node **a, int n) /* nth argument of a function */ 348 { 349 350 n = ptoi(a[0]); /* argument number, counting from 0 */ 351 DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); 352 if (n+1 > frp->nargs) 353 FATAL("argument #%d of function %s was not supplied", 354 n+1, frp->fcncell->nval); 355 return frp->args[n]; 356 } 357 358 Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ 359 { 360 Cell *y; 361 362 switch (n) { 363 case EXIT: 364 if (a[0] != NULL) { 365 y = execute(a[0]); 366 errorflag = (int) getfval(y); 367 tempfree(y); 368 } 369 longjmp(env, 1); 370 case RETURN: 371 if (a[0] != NULL) { 372 y = execute(a[0]); 373 if ((y->tval & (STR|NUM)) == (STR|NUM)) { 374 setsval(frp->retval, getsval(y)); 375 frp->retval->fval = getfval(y); 376 frp->retval->tval |= NUM; 377 } 378 else if (y->tval & STR) 379 setsval(frp->retval, getsval(y)); 380 else if (y->tval & NUM) 381 setfval(frp->retval, getfval(y)); 382 else /* can't happen */ 383 FATAL("bad type variable %d", y->tval); 384 tempfree(y); 385 } 386 return(jret); 387 case NEXT: 388 return(jnext); 389 case NEXTFILE: 390 nextfile(); 391 return(jnextfile); 392 case BREAK: 393 return(jbreak); 394 case CONTINUE: 395 return(jcont); 396 default: /* can't happen */ 397 FATAL("illegal jump type %d", n); 398 } 399 return 0; /* not reached */ 400 } 401 402 Cell *awkgetline(Node **a, int n) /* get next line from specific input */ 403 { /* a[0] is variable, a[1] is operator, a[2] is filename */ 404 Cell *r, *x; 405 extern Cell **fldtab; 406 FILE *fp; 407 char *buf; 408 int bufsize = recsize; 409 int mode; 410 bool newflag; 411 double result; 412 413 if ((buf = (char *) malloc(bufsize)) == NULL) 414 FATAL("out of memory in getline"); 415 416 fflush(stdout); /* in case someone is waiting for a prompt */ 417 r = gettemp(); 418 if (a[1] != NULL) { /* getline < file */ 419 x = execute(a[2]); /* filename */ 420 mode = ptoi(a[1]); 421 if (mode == '|') /* input pipe */ 422 mode = LE; /* arbitrary flag */ 423 fp = openfile(mode, getsval(x), &newflag); 424 tempfree(x); 425 if (fp == NULL) 426 n = -1; 427 else 428 n = readrec(&buf, &bufsize, fp, newflag); 429 if (n <= 0) { 430 ; 431 } else if (a[0] != NULL) { /* getline var <file */ 432 x = execute(a[0]); 433 setsval(x, buf); 434 if (is_number(x->sval, & result)) { 435 x->fval = result; 436 x->tval |= NUM; 437 } 438 tempfree(x); 439 } else { /* getline <file */ 440 setsval(fldtab[0], buf); 441 if (is_number(fldtab[0]->sval, & result)) { 442 fldtab[0]->fval = result; 443 fldtab[0]->tval |= NUM; 444 } 445 } 446 } else { /* bare getline; use current input */ 447 if (a[0] == NULL) /* getline */ 448 n = getrec(&record, &recsize, true); 449 else { /* getline var */ 450 n = getrec(&buf, &bufsize, false); 451 x = execute(a[0]); 452 setsval(x, buf); 453 if (is_number(x->sval, & result)) { 454 x->fval = result; 455 x->tval |= NUM; 456 } 457 tempfree(x); 458 } 459 } 460 setfval(r, (Awkfloat) n); 461 free(buf); 462 return r; 463 } 464 465 Cell *getnf(Node **a, int n) /* get NF */ 466 { 467 if (!donefld) 468 fldbld(); 469 return (Cell *) a[0]; 470 } 471 472 static char * 473 makearraystring(Node *p, const char *func) 474 { 475 char *buf; 476 int bufsz = recsize; 477 size_t blen; 478 479 if ((buf = (char *) malloc(bufsz)) == NULL) { 480 FATAL("%s: out of memory", func); 481 } 482 483 blen = 0; 484 buf[blen] = '\0'; 485 486 for (; p; p = p->nnext) { 487 Cell *x = execute(p); /* expr */ 488 char *s = getsval(x); 489 size_t seplen = strlen(getsval(subseploc)); 490 size_t nsub = p->nnext ? seplen : 0; 491 size_t slen = strlen(s); 492 size_t tlen = blen + slen + nsub; 493 494 if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { 495 FATAL("%s: out of memory %s[%s...]", 496 func, x->nval, buf); 497 } 498 memcpy(buf + blen, s, slen); 499 if (nsub) { 500 memcpy(buf + blen + slen, *SUBSEP, nsub); 501 } 502 buf[tlen] = '\0'; 503 blen = tlen; 504 tempfree(x); 505 } 506 return buf; 507 } 508 509 Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 510 { 511 Cell *x, *z; 512 char *buf; 513 514 x = execute(a[0]); /* Cell* for symbol table */ 515 buf = makearraystring(a[1], __func__); 516 if (!isarr(x)) { 517 DPRINTF("making %s into an array\n", NN(x->nval)); 518 if (freeable(x)) 519 xfree(x->sval); 520 x->tval &= ~(STR|NUM|DONTFREE); 521 x->tval |= ARR; 522 x->sval = (char *) makesymtab(NSYMTAB); 523 } 524 z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); 525 z->ctype = OCELL; 526 z->csub = CVAR; 527 tempfree(x); 528 free(buf); 529 return(z); 530 } 531 532 Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ 533 { 534 Cell *x; 535 536 x = execute(a[0]); /* Cell* for symbol table */ 537 if (x == symtabloc) { 538 FATAL("cannot delete SYMTAB or its elements"); 539 } 540 if (!isarr(x)) 541 return True; 542 if (a[1] == NULL) { /* delete the elements, not the table */ 543 freesymtab(x); 544 x->tval &= ~STR; 545 x->tval |= ARR; 546 x->sval = (char *) makesymtab(NSYMTAB); 547 } else { 548 char *buf = makearraystring(a[1], __func__); 549 freeelem(x, buf); 550 free(buf); 551 } 552 tempfree(x); 553 return True; 554 } 555 556 Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ 557 { 558 Cell *ap, *k; 559 char *buf; 560 561 ap = execute(a[1]); /* array name */ 562 if (!isarr(ap)) { 563 DPRINTF("making %s into an array\n", ap->nval); 564 if (freeable(ap)) 565 xfree(ap->sval); 566 ap->tval &= ~(STR|NUM|DONTFREE); 567 ap->tval |= ARR; 568 ap->sval = (char *) makesymtab(NSYMTAB); 569 } 570 buf = makearraystring(a[0], __func__); 571 k = lookup(buf, (Array *) ap->sval); 572 tempfree(ap); 573 free(buf); 574 if (k == NULL) 575 return(False); 576 else 577 return(True); 578 } 579 580 581 Cell *matchop(Node **a, int n) /* ~ and match() */ 582 { 583 Cell *x, *y; 584 char *s, *t; 585 int i; 586 fa *pfa; 587 int (*mf)(fa *, const char *) = match, mode = 0; 588 589 if (n == MATCHFCN) { 590 mf = pmatch; 591 mode = 1; 592 } 593 x = execute(a[1]); /* a[1] = target text */ 594 s = getsval(x); 595 if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ 596 i = (*mf)((fa *) a[2], s); 597 else { 598 y = execute(a[2]); /* a[2] = regular expr */ 599 t = getsval(y); 600 pfa = makedfa(t, mode); 601 i = (*mf)(pfa, s); 602 tempfree(y); 603 } 604 tempfree(x); 605 if (n == MATCHFCN) { 606 int start = patbeg - s + 1; 607 if (patlen < 0) 608 start = 0; 609 setfval(rstartloc, (Awkfloat) start); 610 setfval(rlengthloc, (Awkfloat) patlen); 611 x = gettemp(); 612 x->tval = NUM; 613 x->fval = start; 614 return x; 615 } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) 616 return(True); 617 else 618 return(False); 619 } 620 621 622 Cell *boolop(Node **a, int n) /* a[0] || a[1], a[0] && a[1], !a[0] */ 623 { 624 Cell *x, *y; 625 int i; 626 627 x = execute(a[0]); 628 i = istrue(x); 629 tempfree(x); 630 switch (n) { 631 case BOR: 632 if (i) return(True); 633 y = execute(a[1]); 634 i = istrue(y); 635 tempfree(y); 636 if (i) return(True); 637 else return(False); 638 case AND: 639 if ( !i ) return(False); 640 y = execute(a[1]); 641 i = istrue(y); 642 tempfree(y); 643 if (i) return(True); 644 else return(False); 645 case NOT: 646 if (i) return(False); 647 else return(True); 648 default: /* can't happen */ 649 FATAL("unknown boolean operator %d", n); 650 } 651 return 0; /*NOTREACHED*/ 652 } 653 654 Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ 655 { 656 int i; 657 Cell *x, *y; 658 Awkfloat j; 659 660 x = execute(a[0]); 661 y = execute(a[1]); 662 if (x->tval&NUM && y->tval&NUM) { 663 j = x->fval - y->fval; 664 i = j<0? -1: (j>0? 1: 0); 665 } else { 666 i = strcmp(getsval(x), getsval(y)); 667 } 668 tempfree(x); 669 tempfree(y); 670 switch (n) { 671 case LT: if (i<0) return(True); 672 else return(False); 673 case LE: if (i<=0) return(True); 674 else return(False); 675 case NE: if (i!=0) return(True); 676 else return(False); 677 case EQ: if (i == 0) return(True); 678 else return(False); 679 case GE: if (i>=0) return(True); 680 else return(False); 681 case GT: if (i>0) return(True); 682 else return(False); 683 default: /* can't happen */ 684 FATAL("unknown relational operator %d", n); 685 } 686 return 0; /*NOTREACHED*/ 687 } 688 689 void tfree(Cell *a) /* free a tempcell */ 690 { 691 if (freeable(a)) { 692 DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); 693 xfree(a->sval); 694 } 695 if (a == tmps) 696 FATAL("tempcell list is curdled"); 697 a->cnext = tmps; 698 tmps = a; 699 } 700 701 Cell *gettemp(void) /* get a tempcell */ 702 { int i; 703 Cell *x; 704 705 if (!tmps) { 706 tmps = (Cell *) calloc(100, sizeof(*tmps)); 707 if (!tmps) 708 FATAL("out of space for temporaries"); 709 for (i = 1; i < 100; i++) 710 tmps[i-1].cnext = &tmps[i]; 711 tmps[i-1].cnext = NULL; 712 } 713 x = tmps; 714 tmps = x->cnext; 715 *x = tempcell; 716 return(x); 717 } 718 719 Cell *indirect(Node **a, int n) /* $( a[0] ) */ 720 { 721 Awkfloat val; 722 Cell *x; 723 int m; 724 char *s; 725 726 x = execute(a[0]); 727 val = getfval(x); /* freebsd: defend against super large field numbers */ 728 if ((Awkfloat)INT_MAX < val) 729 FATAL("trying to access out of range field %s", x->nval); 730 m = (int) val; 731 if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ 732 FATAL("illegal field $(%s), name \"%s\"", s, x->nval); 733 /* BUG: can x->nval ever be null??? */ 734 tempfree(x); 735 x = fieldadr(m); 736 x->ctype = OCELL; /* BUG? why are these needed? */ 737 x->csub = CFLD; 738 return(x); 739 } 740 741 Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ 742 { 743 int k, m, n; 744 char *s; 745 int temp; 746 Cell *x, *y, *z = NULL; 747 748 x = execute(a[0]); 749 y = execute(a[1]); 750 if (a[2] != NULL) 751 z = execute(a[2]); 752 s = getsval(x); 753 k = strlen(s) + 1; 754 if (k <= 1) { 755 tempfree(x); 756 tempfree(y); 757 if (a[2] != NULL) { 758 tempfree(z); 759 } 760 x = gettemp(); 761 setsval(x, ""); 762 return(x); 763 } 764 m = (int) getfval(y); 765 if (m <= 0) 766 m = 1; 767 else if (m > k) 768 m = k; 769 tempfree(y); 770 if (a[2] != NULL) { 771 n = (int) getfval(z); 772 tempfree(z); 773 } else 774 n = k - 1; 775 if (n < 0) 776 n = 0; 777 else if (n > k - m) 778 n = k - m; 779 DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); 780 y = gettemp(); 781 temp = s[n+m-1]; /* with thanks to John Linderman */ 782 s[n+m-1] = '\0'; 783 setsval(y, s + m - 1); 784 s[n+m-1] = temp; 785 tempfree(x); 786 return(y); 787 } 788 789 Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ 790 { 791 Cell *x, *y, *z; 792 char *s1, *s2, *p1, *p2, *q; 793 Awkfloat v = 0.0; 794 795 x = execute(a[0]); 796 s1 = getsval(x); 797 y = execute(a[1]); 798 s2 = getsval(y); 799 800 z = gettemp(); 801 for (p1 = s1; *p1 != '\0'; p1++) { 802 for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) 803 continue; 804 if (*p2 == '\0') { 805 v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ 806 break; 807 } 808 } 809 tempfree(x); 810 tempfree(y); 811 setfval(z, v); 812 return(z); 813 } 814 815 #define MAXNUMSIZE 50 816 817 int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like conversions */ 818 { 819 char *fmt; 820 char *p, *t; 821 const char *os; 822 Cell *x; 823 int flag = 0, n; 824 int fmtwd; /* format width */ 825 int fmtsz = recsize; 826 char *buf = *pbuf; 827 int bufsize = *pbufsize; 828 #define FMTSZ(a) (fmtsz - ((a) - fmt)) 829 #define BUFSZ(a) (bufsize - ((a) - buf)) 830 831 static bool first = true; 832 static bool have_a_format = false; 833 834 if (first) { 835 char xbuf[100]; 836 837 snprintf(xbuf, sizeof(xbuf), "%a", 42.0); 838 have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); 839 first = false; 840 } 841 842 os = s; 843 p = buf; 844 if ((fmt = (char *) malloc(fmtsz)) == NULL) 845 FATAL("out of memory in format()"); 846 while (*s) { 847 adjbuf(&buf, &bufsize, MAXNUMSIZE+1+p-buf, recsize, &p, "format1"); 848 if (*s != '%') { 849 *p++ = *s++; 850 continue; 851 } 852 if (*(s+1) == '%') { 853 *p++ = '%'; 854 s += 2; 855 continue; 856 } 857 /* have to be real careful in case this is a huge number, eg, %100000d */ 858 fmtwd = atoi(s+1); 859 if (fmtwd < 0) 860 fmtwd = -fmtwd; 861 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format2"); 862 for (t = fmt; (*t++ = *s) != '\0'; s++) { 863 if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) 864 FATAL("format item %.30s... ran format() out of memory", os); 865 /* Ignore size specifiers */ 866 if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ 867 t--; 868 continue; 869 } 870 if (isalpha((uschar)*s)) 871 break; 872 if (*s == '$') { 873 FATAL("'$' not permitted in awk formats"); 874 } 875 if (*s == '*') { 876 if (a == NULL) { 877 FATAL("not enough args in printf(%s)", os); 878 } 879 x = execute(a); 880 a = a->nnext; 881 snprintf(t - 1, FMTSZ(t - 1), 882 "%d", fmtwd=(int) getfval(x)); 883 if (fmtwd < 0) 884 fmtwd = -fmtwd; 885 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); 886 t = fmt + strlen(fmt); 887 tempfree(x); 888 } 889 } 890 *t = '\0'; 891 if (fmtwd < 0) 892 fmtwd = -fmtwd; 893 adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format4"); 894 switch (*s) { 895 case 'a': case 'A': 896 if (have_a_format) 897 flag = *s; 898 else 899 flag = 'f'; 900 break; 901 case 'f': case 'e': case 'g': case 'E': case 'G': 902 flag = 'f'; 903 break; 904 case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': 905 flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; 906 *(t-1) = 'j'; 907 *t = *s; 908 *++t = '\0'; 909 break; 910 case 's': 911 flag = 's'; 912 break; 913 case 'c': 914 flag = 'c'; 915 break; 916 default: 917 WARNING("weird printf conversion %s", fmt); 918 flag = '?'; 919 break; 920 } 921 if (a == NULL) 922 FATAL("not enough args in printf(%s)", os); 923 x = execute(a); 924 a = a->nnext; 925 n = MAXNUMSIZE; 926 if (fmtwd > n) 927 n = fmtwd; 928 adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); 929 switch (flag) { 930 case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ 931 t = getsval(x); 932 n = strlen(t); 933 if (fmtwd > n) 934 n = fmtwd; 935 adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); 936 p += strlen(p); 937 snprintf(p, BUFSZ(p), "%s", t); 938 break; 939 case 'a': 940 case 'A': 941 case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; 942 case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; 943 case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; 944 case 's': 945 t = getsval(x); 946 n = strlen(t); 947 if (fmtwd > n) 948 n = fmtwd; 949 if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) 950 FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); 951 snprintf(p, BUFSZ(p), fmt, t); 952 break; 953 case 'c': 954 if (isnum(x)) { 955 if ((int)getfval(x)) 956 snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); 957 else { 958 *p++ = '\0'; /* explicit null byte */ 959 *p = '\0'; /* next output will start here */ 960 } 961 } else 962 snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); 963 break; 964 default: 965 FATAL("can't happen: bad conversion %c in format()", flag); 966 } 967 tempfree(x); 968 p += strlen(p); 969 s++; 970 } 971 *p = '\0'; 972 free(fmt); 973 for ( ; a; a = a->nnext) /* evaluate any remaining args */ 974 execute(a); 975 *pbuf = buf; 976 *pbufsize = bufsize; 977 return p - buf; 978 } 979 980 Cell *awksprintf(Node **a, int n) /* sprintf(a[0]) */ 981 { 982 Cell *x; 983 Node *y; 984 char *buf; 985 int bufsz=3*recsize; 986 987 if ((buf = (char *) malloc(bufsz)) == NULL) 988 FATAL("out of memory in awksprintf"); 989 y = a[0]->nnext; 990 x = execute(a[0]); 991 if (format(&buf, &bufsz, getsval(x), y) == -1) 992 FATAL("sprintf string %.30s... too long. can't happen.", buf); 993 tempfree(x); 994 x = gettemp(); 995 x->sval = buf; 996 x->tval = STR; 997 return(x); 998 } 999 1000 Cell *awkprintf(Node **a, int n) /* printf */ 1001 { /* a[0] is list of args, starting with format string */ 1002 /* a[1] is redirection operator, a[2] is redirection file */ 1003 FILE *fp; 1004 Cell *x; 1005 Node *y; 1006 char *buf; 1007 int len; 1008 int bufsz=3*recsize; 1009 1010 if ((buf = (char *) malloc(bufsz)) == NULL) 1011 FATAL("out of memory in awkprintf"); 1012 y = a[0]->nnext; 1013 x = execute(a[0]); 1014 if ((len = format(&buf, &bufsz, getsval(x), y)) == -1) 1015 FATAL("printf string %.30s... too long. can't happen.", buf); 1016 tempfree(x); 1017 if (a[1] == NULL) { 1018 /* fputs(buf, stdout); */ 1019 fwrite(buf, len, 1, stdout); 1020 if (ferror(stdout)) 1021 FATAL("write error on stdout"); 1022 } else { 1023 fp = redirect(ptoi(a[1]), a[2]); 1024 /* fputs(buf, fp); */ 1025 fwrite(buf, len, 1, fp); 1026 fflush(fp); 1027 if (ferror(fp)) 1028 FATAL("write error on %s", filename(fp)); 1029 } 1030 free(buf); 1031 return(True); 1032 } 1033 1034 Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ 1035 { 1036 Awkfloat i, j = 0; 1037 double v; 1038 Cell *x, *y, *z; 1039 1040 x = execute(a[0]); 1041 i = getfval(x); 1042 tempfree(x); 1043 if (n != UMINUS && n != UPLUS) { 1044 y = execute(a[1]); 1045 j = getfval(y); 1046 tempfree(y); 1047 } 1048 z = gettemp(); 1049 switch (n) { 1050 case ADD: 1051 i += j; 1052 break; 1053 case MINUS: 1054 i -= j; 1055 break; 1056 case MULT: 1057 i *= j; 1058 break; 1059 case DIVIDE: 1060 if (j == 0) 1061 FATAL("division by zero"); 1062 i /= j; 1063 break; 1064 case MOD: 1065 if (j == 0) 1066 FATAL("division by zero in mod"); 1067 modf(i/j, &v); 1068 i = i - j * v; 1069 break; 1070 case UMINUS: 1071 i = -i; 1072 break; 1073 case UPLUS: /* handled by getfval(), above */ 1074 break; 1075 case POWER: 1076 if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ 1077 i = ipow(i, (int) j); 1078 else { 1079 errno = 0; 1080 i = errcheck(pow(i, j), "pow"); 1081 } 1082 break; 1083 default: /* can't happen */ 1084 FATAL("illegal arithmetic operator %d", n); 1085 } 1086 setfval(z, i); 1087 return(z); 1088 } 1089 1090 double ipow(double x, int n) /* x**n. ought to be done by pow, but isn't always */ 1091 { 1092 double v; 1093 1094 if (n <= 0) 1095 return 1; 1096 v = ipow(x, n/2); 1097 if (n % 2 == 0) 1098 return v * v; 1099 else 1100 return x * v * v; 1101 } 1102 1103 Cell *incrdecr(Node **a, int n) /* a[0]++, etc. */ 1104 { 1105 Cell *x, *z; 1106 int k; 1107 Awkfloat xf; 1108 1109 x = execute(a[0]); 1110 xf = getfval(x); 1111 k = (n == PREINCR || n == POSTINCR) ? 1 : -1; 1112 if (n == PREINCR || n == PREDECR) { 1113 setfval(x, xf + k); 1114 return(x); 1115 } 1116 z = gettemp(); 1117 setfval(z, xf); 1118 setfval(x, xf + k); 1119 tempfree(x); 1120 return(z); 1121 } 1122 1123 Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ 1124 { /* this is subtle; don't muck with it. */ 1125 Cell *x, *y; 1126 Awkfloat xf, yf; 1127 double v; 1128 1129 y = execute(a[1]); 1130 x = execute(a[0]); 1131 if (n == ASSIGN) { /* ordinary assignment */ 1132 if (x == y && !(x->tval & (FLD|REC)) && x != nfloc) 1133 ; /* self-assignment: leave alone unless it's a field or NF */ 1134 else if ((y->tval & (STR|NUM)) == (STR|NUM)) { 1135 setsval(x, getsval(y)); 1136 x->fval = getfval(y); 1137 x->tval |= NUM; 1138 } 1139 else if (isstr(y)) 1140 setsval(x, getsval(y)); 1141 else if (isnum(y)) 1142 setfval(x, getfval(y)); 1143 else 1144 funnyvar(y, "read value of"); 1145 tempfree(y); 1146 return(x); 1147 } 1148 xf = getfval(x); 1149 yf = getfval(y); 1150 switch (n) { 1151 case ADDEQ: 1152 xf += yf; 1153 break; 1154 case SUBEQ: 1155 xf -= yf; 1156 break; 1157 case MULTEQ: 1158 xf *= yf; 1159 break; 1160 case DIVEQ: 1161 if (yf == 0) 1162 FATAL("division by zero in /="); 1163 xf /= yf; 1164 break; 1165 case MODEQ: 1166 if (yf == 0) 1167 FATAL("division by zero in %%="); 1168 modf(xf/yf, &v); 1169 xf = xf - yf * v; 1170 break; 1171 case POWEQ: 1172 if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ 1173 xf = ipow(xf, (int) yf); 1174 else { 1175 errno = 0; 1176 xf = errcheck(pow(xf, yf), "pow"); 1177 } 1178 break; 1179 default: 1180 FATAL("illegal assignment operator %d", n); 1181 break; 1182 } 1183 tempfree(y); 1184 setfval(x, xf); 1185 return(x); 1186 } 1187 1188 Cell *cat(Node **a, int q) /* a[0] cat a[1] */ 1189 { 1190 Cell *x, *y, *z; 1191 int n1, n2; 1192 char *s = NULL; 1193 int ssz = 0; 1194 1195 x = execute(a[0]); 1196 n1 = strlen(getsval(x)); 1197 adjbuf(&s, &ssz, n1, recsize, 0, "cat1"); 1198 memcpy(s, x->sval, n1); 1199 1200 y = execute(a[1]); 1201 n2 = strlen(getsval(y)); 1202 adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); 1203 memcpy(s + n1, y->sval, n2); 1204 s[n1 + n2] = '\0'; 1205 1206 tempfree(x); 1207 tempfree(y); 1208 1209 z = gettemp(); 1210 z->sval = s; 1211 z->tval = STR; 1212 1213 return(z); 1214 } 1215 1216 Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ 1217 { 1218 Cell *x; 1219 1220 if (a[0] == NULL) 1221 x = execute(a[1]); 1222 else { 1223 x = execute(a[0]); 1224 if (istrue(x)) { 1225 tempfree(x); 1226 x = execute(a[1]); 1227 } 1228 } 1229 return x; 1230 } 1231 1232 Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ 1233 { 1234 Cell *x; 1235 int pair; 1236 1237 pair = ptoi(a[3]); 1238 if (pairstack[pair] == 0) { 1239 x = execute(a[0]); 1240 if (istrue(x)) 1241 pairstack[pair] = 1; 1242 tempfree(x); 1243 } 1244 if (pairstack[pair] == 1) { 1245 x = execute(a[1]); 1246 if (istrue(x)) 1247 pairstack[pair] = 0; 1248 tempfree(x); 1249 x = execute(a[2]); 1250 return(x); 1251 } 1252 return(False); 1253 } 1254 1255 Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ 1256 { 1257 Cell *x = NULL, *y, *ap; 1258 const char *s, *origs, *t; 1259 const char *fs = NULL; 1260 char *origfs = NULL; 1261 int sep; 1262 char temp, num[50]; 1263 int n, tempstat, arg3type; 1264 double result; 1265 1266 y = execute(a[0]); /* source string */ 1267 origs = s = strdup(getsval(y)); 1268 if (s == NULL) 1269 FATAL("out of space in split"); 1270 arg3type = ptoi(a[3]); 1271 if (a[2] == NULL) /* fs string */ 1272 fs = getsval(fsloc); 1273 else if (arg3type == STRING) { /* split(str,arr,"string") */ 1274 x = execute(a[2]); 1275 fs = origfs = strdup(getsval(x)); 1276 if (fs == NULL) 1277 FATAL("out of space in split"); 1278 tempfree(x); 1279 } else if (arg3type == REGEXPR) 1280 fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ 1281 else 1282 FATAL("illegal type of split"); 1283 sep = *fs; 1284 ap = execute(a[1]); /* array name */ 1285 freesymtab(ap); 1286 DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); 1287 ap->tval &= ~STR; 1288 ap->tval |= ARR; 1289 ap->sval = (char *) makesymtab(NSYMTAB); 1290 1291 n = 0; 1292 if (arg3type == REGEXPR && strlen((char*)((fa*)a[2])->restr) == 0) { 1293 /* split(s, a, //); have to arrange that it looks like empty sep */ 1294 arg3type = 0; 1295 fs = ""; 1296 sep = 0; 1297 } 1298 if (*s != '\0' && (strlen(fs) > 1 || arg3type == REGEXPR)) { /* reg expr */ 1299 fa *pfa; 1300 if (arg3type == REGEXPR) { /* it's ready already */ 1301 pfa = (fa *) a[2]; 1302 } else { 1303 pfa = makedfa(fs, 1); 1304 } 1305 if (nematch(pfa,s)) { 1306 tempstat = pfa->initstat; 1307 pfa->initstat = 2; 1308 do { 1309 n++; 1310 snprintf(num, sizeof(num), "%d", n); 1311 temp = *patbeg; 1312 setptr(patbeg, '\0'); 1313 if (is_number(s, & result)) 1314 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1315 else 1316 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1317 setptr(patbeg, temp); 1318 s = patbeg + patlen; 1319 if (*(patbeg+patlen-1) == '\0' || *s == '\0') { 1320 n++; 1321 snprintf(num, sizeof(num), "%d", n); 1322 setsymtab(num, "", 0.0, STR, (Array *) ap->sval); 1323 pfa->initstat = tempstat; 1324 goto spdone; 1325 } 1326 } while (nematch(pfa,s)); 1327 pfa->initstat = tempstat; /* bwk: has to be here to reset */ 1328 /* cf gsub and refldbld */ 1329 } 1330 n++; 1331 snprintf(num, sizeof(num), "%d", n); 1332 if (is_number(s, & result)) 1333 setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); 1334 else 1335 setsymtab(num, s, 0.0, STR, (Array *) ap->sval); 1336 spdone: 1337 pfa = NULL; 1338 } else if (sep == ' ') { 1339 for (n = 0; ; ) { 1340 #define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 1341 while (ISWS(*s)) 1342 s++; 1343 if (*s == '\0') 1344 break; 1345 n++; 1346 t = s; 1347 do 1348 s++; 1349 while (*s != '\0' && !ISWS(*s)); 1350 temp = *s; 1351 setptr(s, '\0'); 1352 snprintf(num, sizeof(num), "%d", n); 1353 if (is_number(t, & result)) 1354 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1355 else 1356 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1357 setptr(s, temp); 1358 if (*s != '\0') 1359 s++; 1360 } 1361 } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ 1362 for (n = 0; *s != '\0'; s++) { 1363 char buf[2]; 1364 n++; 1365 snprintf(num, sizeof(num), "%d", n); 1366 buf[0] = *s; 1367 buf[1] = '\0'; 1368 if (isdigit((uschar)buf[0])) 1369 setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); 1370 else 1371 setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); 1372 } 1373 } else if (*s != '\0') { 1374 for (;;) { 1375 n++; 1376 t = s; 1377 while (*s != sep && *s != '\n' && *s != '\0') 1378 s++; 1379 temp = *s; 1380 setptr(s, '\0'); 1381 snprintf(num, sizeof(num), "%d", n); 1382 if (is_number(t, & result)) 1383 setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); 1384 else 1385 setsymtab(num, t, 0.0, STR, (Array *) ap->sval); 1386 setptr(s, temp); 1387 if (*s++ == '\0') 1388 break; 1389 } 1390 } 1391 tempfree(ap); 1392 tempfree(y); 1393 xfree(origs); 1394 xfree(origfs); 1395 x = gettemp(); 1396 x->tval = NUM; 1397 x->fval = n; 1398 return(x); 1399 } 1400 1401 Cell *condexpr(Node **a, int n) /* a[0] ? a[1] : a[2] */ 1402 { 1403 Cell *x; 1404 1405 x = execute(a[0]); 1406 if (istrue(x)) { 1407 tempfree(x); 1408 x = execute(a[1]); 1409 } else { 1410 tempfree(x); 1411 x = execute(a[2]); 1412 } 1413 return(x); 1414 } 1415 1416 Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ 1417 { 1418 Cell *x; 1419 1420 x = execute(a[0]); 1421 if (istrue(x)) { 1422 tempfree(x); 1423 x = execute(a[1]); 1424 } else if (a[2] != NULL) { 1425 tempfree(x); 1426 x = execute(a[2]); 1427 } 1428 return(x); 1429 } 1430 1431 Cell *whilestat(Node **a, int n) /* while (a[0]) a[1] */ 1432 { 1433 Cell *x; 1434 1435 for (;;) { 1436 x = execute(a[0]); 1437 if (!istrue(x)) 1438 return(x); 1439 tempfree(x); 1440 x = execute(a[1]); 1441 if (isbreak(x)) { 1442 x = True; 1443 return(x); 1444 } 1445 if (isnext(x) || isexit(x) || isret(x)) 1446 return(x); 1447 tempfree(x); 1448 } 1449 } 1450 1451 Cell *dostat(Node **a, int n) /* do a[0]; while(a[1]) */ 1452 { 1453 Cell *x; 1454 1455 for (;;) { 1456 x = execute(a[0]); 1457 if (isbreak(x)) 1458 return True; 1459 if (isnext(x) || isexit(x) || isret(x)) 1460 return(x); 1461 tempfree(x); 1462 x = execute(a[1]); 1463 if (!istrue(x)) 1464 return(x); 1465 tempfree(x); 1466 } 1467 } 1468 1469 Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ 1470 { 1471 Cell *x; 1472 1473 x = execute(a[0]); 1474 tempfree(x); 1475 for (;;) { 1476 if (a[1]!=NULL) { 1477 x = execute(a[1]); 1478 if (!istrue(x)) return(x); 1479 else tempfree(x); 1480 } 1481 x = execute(a[3]); 1482 if (isbreak(x)) /* turn off break */ 1483 return True; 1484 if (isnext(x) || isexit(x) || isret(x)) 1485 return(x); 1486 tempfree(x); 1487 x = execute(a[2]); 1488 tempfree(x); 1489 } 1490 } 1491 1492 Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ 1493 { 1494 Cell *x, *vp, *arrayp, *cp, *ncp; 1495 Array *tp; 1496 int i; 1497 1498 vp = execute(a[0]); 1499 arrayp = execute(a[1]); 1500 if (!isarr(arrayp)) { 1501 return True; 1502 } 1503 tp = (Array *) arrayp->sval; 1504 tempfree(arrayp); 1505 for (i = 0; i < tp->size; i++) { /* this routine knows too much */ 1506 for (cp = tp->tab[i]; cp != NULL; cp = ncp) { 1507 setsval(vp, cp->nval); 1508 ncp = cp->cnext; 1509 x = execute(a[2]); 1510 if (isbreak(x)) { 1511 tempfree(vp); 1512 return True; 1513 } 1514 if (isnext(x) || isexit(x) || isret(x)) { 1515 tempfree(vp); 1516 return(x); 1517 } 1518 tempfree(x); 1519 } 1520 } 1521 return True; 1522 } 1523 1524 static char *nawk_convert(const char *s, int (*fun_c)(int), 1525 wint_t (*fun_wc)(wint_t)) 1526 { 1527 char *buf = NULL; 1528 char *pbuf = NULL; 1529 const char *ps = NULL; 1530 size_t n = 0; 1531 wchar_t wc; 1532 size_t sz = MB_CUR_MAX; 1533 1534 if (sz == 1) { 1535 buf = tostring(s); 1536 1537 for (pbuf = buf; *pbuf; pbuf++) 1538 *pbuf = fun_c((uschar)*pbuf); 1539 1540 return buf; 1541 } else { 1542 /* upper/lower character may be shorter/longer */ 1543 buf = tostringN(s, strlen(s) * sz + 1); 1544 1545 (void) mbtowc(NULL, NULL, 0); /* reset internal state */ 1546 /* 1547 * Reset internal state here too. 1548 * Assign result to avoid a compiler warning. (Casting to void 1549 * doesn't work.) 1550 * Increment said variable to avoid a different warning. 1551 */ 1552 int unused = wctomb(NULL, L'\0'); 1553 unused++; 1554 1555 ps = s; 1556 pbuf = buf; 1557 while (n = mbtowc(&wc, ps, sz), 1558 n > 0 && n != (size_t)-1 && n != (size_t)-2) 1559 { 1560 ps += n; 1561 1562 n = wctomb(pbuf, fun_wc(wc)); 1563 if (n == (size_t)-1) 1564 FATAL("illegal wide character %s", s); 1565 1566 pbuf += n; 1567 } 1568 1569 *pbuf = '\0'; 1570 1571 if (n) 1572 FATAL("illegal byte sequence %s", s); 1573 1574 return buf; 1575 } 1576 } 1577 1578 #ifdef __DJGPP__ 1579 static wint_t towupper(wint_t wc) 1580 { 1581 if (wc >= 0 && wc < 256) 1582 return toupper(wc & 0xFF); 1583 1584 return wc; 1585 } 1586 1587 static wint_t towlower(wint_t wc) 1588 { 1589 if (wc >= 0 && wc < 256) 1590 return tolower(wc & 0xFF); 1591 1592 return wc; 1593 } 1594 #endif 1595 1596 static char *nawk_toupper(const char *s) 1597 { 1598 return nawk_convert(s, toupper, towupper); 1599 } 1600 1601 static char *nawk_tolower(const char *s) 1602 { 1603 return nawk_convert(s, tolower, towlower); 1604 } 1605 1606 Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ 1607 { 1608 Cell *x, *y; 1609 Awkfloat u; 1610 int t, sz; 1611 Awkfloat tmp; 1612 char *buf, *fmt; 1613 Node *nextarg; 1614 FILE *fp; 1615 int status = 0; 1616 time_t tv; 1617 struct tm *tm, tmbuf; 1618 1619 t = ptoi(a[0]); 1620 x = execute(a[1]); 1621 nextarg = a[1]->nnext; 1622 switch (t) { 1623 case FLENGTH: 1624 if (isarr(x)) 1625 u = ((Array *) x->sval)->nelem; /* GROT. should be function*/ 1626 else 1627 u = strlen(getsval(x)); 1628 break; 1629 case FLOG: 1630 errno = 0; 1631 u = errcheck(log(getfval(x)), "log"); 1632 break; 1633 case FINT: 1634 modf(getfval(x), &u); break; 1635 case FEXP: 1636 errno = 0; 1637 u = errcheck(exp(getfval(x)), "exp"); 1638 break; 1639 case FSQRT: 1640 errno = 0; 1641 u = errcheck(sqrt(getfval(x)), "sqrt"); 1642 break; 1643 case FSIN: 1644 u = sin(getfval(x)); break; 1645 case FCOS: 1646 u = cos(getfval(x)); break; 1647 case FATAN: 1648 if (nextarg == NULL) { 1649 WARNING("atan2 requires two arguments; returning 1.0"); 1650 u = 1.0; 1651 } else { 1652 y = execute(a[1]->nnext); 1653 u = atan2(getfval(x), getfval(y)); 1654 tempfree(y); 1655 nextarg = nextarg->nnext; 1656 } 1657 break; 1658 case FCOMPL: 1659 u = ~((int)getfval(x)); 1660 break; 1661 case FAND: 1662 if (nextarg == 0) { 1663 WARNING("and requires two arguments; returning 0"); 1664 u = 0; 1665 break; 1666 } 1667 y = execute(a[1]->nnext); 1668 u = ((int)getfval(x)) & ((int)getfval(y)); 1669 tempfree(y); 1670 nextarg = nextarg->nnext; 1671 break; 1672 case FFOR: 1673 if (nextarg == 0) { 1674 WARNING("or requires two arguments; returning 0"); 1675 u = 0; 1676 break; 1677 } 1678 y = execute(a[1]->nnext); 1679 u = ((int)getfval(x)) | ((int)getfval(y)); 1680 tempfree(y); 1681 nextarg = nextarg->nnext; 1682 break; 1683 case FXOR: 1684 if (nextarg == 0) { 1685 WARNING("xor requires two arguments; returning 0"); 1686 u = 0; 1687 break; 1688 } 1689 y = execute(a[1]->nnext); 1690 u = ((int)getfval(x)) ^ ((int)getfval(y)); 1691 tempfree(y); 1692 nextarg = nextarg->nnext; 1693 break; 1694 case FLSHIFT: 1695 if (nextarg == 0) { 1696 WARNING("lshift requires two arguments; returning 0"); 1697 u = 0; 1698 break; 1699 } 1700 y = execute(a[1]->nnext); 1701 u = ((int)getfval(x)) << ((int)getfval(y)); 1702 tempfree(y); 1703 nextarg = nextarg->nnext; 1704 break; 1705 case FRSHIFT: 1706 if (nextarg == 0) { 1707 WARNING("rshift requires two arguments; returning 0"); 1708 u = 0; 1709 break; 1710 } 1711 y = execute(a[1]->nnext); 1712 u = ((int)getfval(x)) >> ((int)getfval(y)); 1713 tempfree(y); 1714 nextarg = nextarg->nnext; 1715 break; 1716 case FSYSTEM: 1717 fflush(stdout); /* in case something is buffered already */ 1718 status = system(getsval(x)); 1719 u = status; 1720 if (status != -1) { 1721 if (WIFEXITED(status)) { 1722 u = WEXITSTATUS(status); 1723 } else if (WIFSIGNALED(status)) { 1724 u = WTERMSIG(status) + 256; 1725 #ifdef WCOREDUMP 1726 if (WCOREDUMP(status)) 1727 u += 256; 1728 #endif 1729 } else /* something else?!? */ 1730 u = 0; 1731 } 1732 break; 1733 case FRAND: 1734 /* random() returns numbers in [0..2^31-1] 1735 * in order to get a number in [0, 1), divide it by 2^31 1736 */ 1737 u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); 1738 break; 1739 case FSRAND: 1740 if (isrec(x)) { /* no argument provided */ 1741 u = time(NULL); 1742 tmp = u; 1743 srandom((unsigned int) u); 1744 } else { 1745 u = getfval(x); 1746 tmp = u; 1747 srandom_deterministic((unsigned int) u); 1748 } 1749 u = srand_seed; 1750 srand_seed = tmp; 1751 break; 1752 case FTOUPPER: 1753 case FTOLOWER: 1754 if (t == FTOUPPER) 1755 buf = nawk_toupper(getsval(x)); 1756 else 1757 buf = nawk_tolower(getsval(x)); 1758 tempfree(x); 1759 x = gettemp(); 1760 setsval(x, buf); 1761 free(buf); 1762 return x; 1763 case FFLUSH: 1764 if (isrec(x) || strlen(getsval(x)) == 0) { 1765 flush_all(); /* fflush() or fflush("") -> all */ 1766 u = 0; 1767 } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) 1768 u = EOF; 1769 else 1770 u = fflush(fp); 1771 break; 1772 case FMKTIME: 1773 memset(&tmbuf, 0, sizeof(tmbuf)); 1774 tm = &tmbuf; 1775 t = sscanf(getsval(x), "%d %d %d %d %d %d %d", 1776 &tm->tm_year, &tm->tm_mon, &tm->tm_mday, &tm->tm_hour, 1777 &tm->tm_min, &tm->tm_sec, &tm->tm_isdst); 1778 switch (t) { 1779 case 6: 1780 tm->tm_isdst = -1; /* let mktime figure it out */ 1781 /* FALLTHROUGH */ 1782 case 7: 1783 tm->tm_year -= 1900; 1784 tm->tm_mon--; 1785 u = mktime(tm); 1786 break; 1787 default: 1788 u = -1; 1789 break; 1790 } 1791 break; 1792 case FSYSTIME: 1793 u = time((time_t *) 0); 1794 break; 1795 case FSTRFTIME: 1796 /* strftime([format [,timestamp]]) */ 1797 if (nextarg) { 1798 y = execute(nextarg); 1799 nextarg = nextarg->nnext; 1800 tv = (time_t) getfval(y); 1801 tempfree(y); 1802 } else 1803 tv = time((time_t *) 0); 1804 tm = localtime(&tv); 1805 if (tm == NULL) 1806 FATAL("bad time %ld", (long)tv); 1807 1808 if (isrec(x)) { 1809 /* format argument not provided, use default */ 1810 fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); 1811 } else 1812 fmt = tostring(getsval(x)); 1813 1814 sz = 32; 1815 buf = NULL; 1816 do { 1817 if ((buf = (char *) reallocarray(buf, 2, sz)) == NULL) 1818 FATAL("out of memory in strftime"); 1819 sz *= 2; 1820 } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); 1821 1822 y = gettemp(); 1823 setsval(y, buf); 1824 free(fmt); 1825 free(buf); 1826 1827 return y; 1828 default: /* can't happen */ 1829 FATAL("illegal function type %d", t); 1830 break; 1831 } 1832 tempfree(x); 1833 x = gettemp(); 1834 setfval(x, u); 1835 if (nextarg != NULL) { 1836 WARNING("warning: function has too many arguments"); 1837 for ( ; nextarg; nextarg = nextarg->nnext) 1838 execute(nextarg); 1839 } 1840 return(x); 1841 } 1842 1843 Cell *printstat(Node **a, int n) /* print a[0] */ 1844 { 1845 Node *x; 1846 Cell *y; 1847 FILE *fp; 1848 1849 if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ 1850 fp = stdout; 1851 else 1852 fp = redirect(ptoi(a[1]), a[2]); 1853 for (x = a[0]; x != NULL; x = x->nnext) { 1854 y = execute(x); 1855 fputs(getpssval(y), fp); 1856 tempfree(y); 1857 if (x->nnext == NULL) 1858 fputs(getsval(orsloc), fp); 1859 else 1860 fputs(getsval(ofsloc), fp); 1861 } 1862 if (a[1] != NULL) 1863 fflush(fp); 1864 if (ferror(fp)) 1865 FATAL("write error on %s", filename(fp)); 1866 return(True); 1867 } 1868 1869 Cell *nullproc(Node **a, int n) 1870 { 1871 return 0; 1872 } 1873 1874 1875 FILE *redirect(int a, Node *b) /* set up all i/o redirections */ 1876 { 1877 FILE *fp; 1878 Cell *x; 1879 char *fname; 1880 1881 x = execute(b); 1882 fname = getsval(x); 1883 fp = openfile(a, fname, NULL); 1884 if (fp == NULL) 1885 FATAL("can't open file %s", fname); 1886 tempfree(x); 1887 return fp; 1888 } 1889 1890 struct files { 1891 FILE *fp; 1892 const char *fname; 1893 int mode; /* '|', 'a', 'w' => LE/LT, GT */ 1894 } *files; 1895 1896 size_t nfiles; 1897 1898 static void stdinit(void) /* in case stdin, etc., are not constants */ 1899 { 1900 nfiles = FOPEN_MAX; 1901 files = (struct files *) calloc(nfiles, sizeof(*files)); 1902 if (files == NULL) 1903 FATAL("can't allocate file memory for %zu files", nfiles); 1904 files[0].fp = stdin; 1905 files[0].fname = "/dev/stdin"; 1906 files[0].mode = LT; 1907 files[1].fp = stdout; 1908 files[1].fname = "/dev/stdout"; 1909 files[1].mode = GT; 1910 files[2].fp = stderr; 1911 files[2].fname = "/dev/stderr"; 1912 files[2].mode = GT; 1913 } 1914 1915 FILE *openfile(int a, const char *us, bool *pnewflag) 1916 { 1917 const char *s = us; 1918 size_t i; 1919 int m; 1920 FILE *fp = NULL; 1921 1922 if (*s == '\0') 1923 FATAL("null file name in print or getline"); 1924 for (i = 0; i < nfiles; i++) 1925 if (files[i].fname && strcmp(s, files[i].fname) == 0 && 1926 (a == files[i].mode || (a==APPEND && files[i].mode==GT) || 1927 a == FFLUSH)) { 1928 if (pnewflag) 1929 *pnewflag = false; 1930 return files[i].fp; 1931 } 1932 if (a == FFLUSH) /* didn't find it, so don't create it! */ 1933 return NULL; 1934 1935 for (i = 0; i < nfiles; i++) 1936 if (files[i].fp == NULL) 1937 break; 1938 if (i >= nfiles) { 1939 struct files *nf; 1940 size_t nnf = nfiles + FOPEN_MAX; 1941 nf = (struct files *) reallocarray(files, nnf, sizeof(*nf)); 1942 if (nf == NULL) 1943 FATAL("cannot grow files for %s and %zu files", s, nnf); 1944 memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); 1945 nfiles = nnf; 1946 files = nf; 1947 } 1948 fflush(stdout); /* force a semblance of order */ 1949 m = a; 1950 if (a == GT) { 1951 fp = fopen(s, "w"); 1952 } else if (a == APPEND) { 1953 fp = fopen(s, "a"); 1954 m = GT; /* so can mix > and >> */ 1955 } else if (a == '|') { /* output pipe */ 1956 fp = popen(s, "w"); 1957 } else if (a == LE) { /* input pipe */ 1958 fp = popen(s, "r"); 1959 } else if (a == LT) { /* getline <file */ 1960 fp = strcmp(s, "-") == 0 ? stdin : fopen(s, "r"); /* "-" is stdin */ 1961 } else /* can't happen */ 1962 FATAL("illegal redirection %d", a); 1963 if (fp != NULL) { 1964 files[i].fname = tostring(s); 1965 files[i].fp = fp; 1966 files[i].mode = m; 1967 if (pnewflag) 1968 *pnewflag = true; 1969 if (fp != stdin && fp != stdout && fp != stderr) 1970 (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); 1971 } 1972 return fp; 1973 } 1974 1975 const char *filename(FILE *fp) 1976 { 1977 size_t i; 1978 1979 for (i = 0; i < nfiles; i++) 1980 if (fp == files[i].fp) 1981 return files[i].fname; 1982 return "???"; 1983 } 1984 1985 Cell *closefile(Node **a, int n) 1986 { 1987 Cell *x; 1988 size_t i; 1989 bool stat; 1990 1991 x = execute(a[0]); 1992 getsval(x); 1993 stat = true; 1994 for (i = 0; i < nfiles; i++) { 1995 if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) 1996 continue; 1997 if (ferror(files[i].fp)) 1998 FATAL("i/o error occurred on %s", files[i].fname); 1999 if (files[i].fp == stdin || files[i].fp == stdout || 2000 files[i].fp == stderr) 2001 stat = freopen("/dev/null", "r+", files[i].fp) == NULL; 2002 else if (files[i].mode == '|' || files[i].mode == LE) 2003 stat = pclose(files[i].fp) == -1; 2004 else 2005 stat = fclose(files[i].fp) == EOF; 2006 if (stat) 2007 FATAL("i/o error occurred closing %s", files[i].fname); 2008 if (i > 2) /* don't do /dev/std... */ 2009 xfree(files[i].fname); 2010 files[i].fname = NULL; /* watch out for ref thru this */ 2011 files[i].fp = NULL; 2012 break; 2013 } 2014 tempfree(x); 2015 x = gettemp(); 2016 setfval(x, (Awkfloat) (stat ? -1 : 0)); 2017 return(x); 2018 } 2019 2020 void closeall(void) 2021 { 2022 size_t i; 2023 bool stat = false; 2024 2025 for (i = 0; i < nfiles; i++) { 2026 if (! files[i].fp) 2027 continue; 2028 if (ferror(files[i].fp)) 2029 FATAL( "i/o error occurred on %s", files[i].fname ); 2030 if (files[i].fp == stdin) 2031 continue; 2032 if (files[i].mode == '|' || files[i].mode == LE) 2033 stat = pclose(files[i].fp) == -1; 2034 else if (files[i].fp == stdout || files[i].fp == stderr) 2035 stat = fflush(files[i].fp) == EOF; 2036 else 2037 stat = fclose(files[i].fp) == EOF; 2038 if (stat) 2039 FATAL( "i/o error occurred while closing %s", files[i].fname ); 2040 } 2041 } 2042 2043 static void flush_all(void) 2044 { 2045 size_t i; 2046 2047 for (i = 0; i < nfiles; i++) 2048 if (files[i].fp) 2049 fflush(files[i].fp); 2050 } 2051 2052 void backsub(char **pb_ptr, const char **sptr_ptr); 2053 2054 Cell *sub(Node **a, int nnn) /* substitute command */ 2055 { 2056 const char *sptr, *q; 2057 Cell *x, *y, *result; 2058 char *t, *buf, *pb; 2059 fa *pfa; 2060 int bufsz = recsize; 2061 2062 if ((buf = (char *) malloc(bufsz)) == NULL) 2063 FATAL("out of memory in sub"); 2064 x = execute(a[3]); /* target string */ 2065 t = getsval(x); 2066 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2067 pfa = (fa *) a[1]; /* regular expression */ 2068 else { 2069 y = execute(a[1]); 2070 pfa = makedfa(getsval(y), 1); 2071 tempfree(y); 2072 } 2073 y = execute(a[2]); /* replacement string */ 2074 result = False; 2075 if (pmatch(pfa, t)) { 2076 sptr = t; 2077 adjbuf(&buf, &bufsz, 1+patbeg-sptr, recsize, 0, "sub"); 2078 pb = buf; 2079 while (sptr < patbeg) 2080 *pb++ = *sptr++; 2081 sptr = getsval(y); 2082 while (*sptr != '\0') { 2083 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); 2084 if (*sptr == '\\') { 2085 backsub(&pb, &sptr); 2086 } else if (*sptr == '&') { 2087 sptr++; 2088 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "sub"); 2089 for (q = patbeg; q < patbeg+patlen; ) 2090 *pb++ = *q++; 2091 } else 2092 *pb++ = *sptr++; 2093 } 2094 *pb = '\0'; 2095 if (pb > buf + bufsz) 2096 FATAL("sub result1 %.30s too big; can't happen", buf); 2097 sptr = patbeg + patlen; 2098 if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { 2099 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); 2100 while ((*pb++ = *sptr++) != '\0') 2101 continue; 2102 } 2103 if (pb > buf + bufsz) 2104 FATAL("sub result2 %.30s too big; can't happen", buf); 2105 setsval(x, buf); /* BUG: should be able to avoid copy */ 2106 result = True; 2107 } 2108 tempfree(x); 2109 tempfree(y); 2110 free(buf); 2111 return result; 2112 } 2113 2114 Cell *gsub(Node **a, int nnn) /* global substitute */ 2115 { 2116 Cell *x, *y; 2117 char *rptr, *pb; 2118 const char *q, *t, *sptr; 2119 char *buf; 2120 fa *pfa; 2121 int mflag, tempstat, num; 2122 int bufsz = recsize; 2123 2124 if ((buf = (char *) malloc(bufsz)) == NULL) 2125 FATAL("out of memory in gsub"); 2126 mflag = 0; /* if mflag == 0, can replace empty string */ 2127 num = 0; 2128 x = execute(a[3]); /* target string */ 2129 t = getsval(x); 2130 if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ 2131 pfa = (fa *) a[1]; /* regular expression */ 2132 else { 2133 y = execute(a[1]); 2134 pfa = makedfa(getsval(y), 1); 2135 tempfree(y); 2136 } 2137 y = execute(a[2]); /* replacement string */ 2138 if (pmatch(pfa, t)) { 2139 tempstat = pfa->initstat; 2140 pfa->initstat = 2; 2141 pb = buf; 2142 rptr = getsval(y); 2143 do { 2144 if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ 2145 if (mflag == 0) { /* can replace empty */ 2146 num++; 2147 sptr = rptr; 2148 while (*sptr != '\0') { 2149 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2150 if (*sptr == '\\') { 2151 backsub(&pb, &sptr); 2152 } else if (*sptr == '&') { 2153 sptr++; 2154 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2155 for (q = patbeg; q < patbeg+patlen; ) 2156 *pb++ = *q++; 2157 } else 2158 *pb++ = *sptr++; 2159 } 2160 } 2161 if (*t == '\0') /* at end */ 2162 goto done; 2163 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); 2164 *pb++ = *t++; 2165 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2166 FATAL("gsub result0 %.30s too big; can't happen", buf); 2167 mflag = 0; 2168 } 2169 else { /* matched nonempty string */ 2170 num++; 2171 sptr = t; 2172 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gsub"); 2173 while (sptr < patbeg) 2174 *pb++ = *sptr++; 2175 sptr = rptr; 2176 while (*sptr != '\0') { 2177 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); 2178 if (*sptr == '\\') { 2179 backsub(&pb, &sptr); 2180 } else if (*sptr == '&') { 2181 sptr++; 2182 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gsub"); 2183 for (q = patbeg; q < patbeg+patlen; ) 2184 *pb++ = *q++; 2185 } else 2186 *pb++ = *sptr++; 2187 } 2188 t = patbeg + patlen; 2189 if (patlen == 0 || *t == '\0' || *(t-1) == '\0') 2190 goto done; 2191 if (pb > buf + bufsz) 2192 FATAL("gsub result1 %.30s too big; can't happen", buf); 2193 mflag = 1; 2194 } 2195 } while (pmatch(pfa,t)); 2196 sptr = t; 2197 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); 2198 while ((*pb++ = *sptr++) != '\0') 2199 continue; 2200 done: if (pb < buf + bufsz) 2201 *pb = '\0'; 2202 else if (*(pb-1) != '\0') 2203 FATAL("gsub result2 %.30s truncated; can't happen", buf); 2204 setsval(x, buf); /* BUG: should be able to avoid copy + free */ 2205 pfa->initstat = tempstat; 2206 } 2207 tempfree(x); 2208 tempfree(y); 2209 x = gettemp(); 2210 x->tval = NUM; 2211 x->fval = num; 2212 free(buf); 2213 return(x); 2214 } 2215 2216 Cell *gensub(Node **a, int nnn) /* global selective substitute */ 2217 /* XXX incomplete - doesn't support backreferences \0 ... \9 */ 2218 { 2219 Cell *x, *y, *res, *h; 2220 char *rptr; 2221 const char *sptr; 2222 char *buf, *pb; 2223 const char *t, *q; 2224 fa *pfa; 2225 int mflag, tempstat, num, whichm; 2226 int bufsz = recsize; 2227 2228 if ((buf = malloc(bufsz)) == NULL) 2229 FATAL("out of memory in gensub"); 2230 mflag = 0; /* if mflag == 0, can replace empty string */ 2231 num = 0; 2232 x = execute(a[4]); /* source string */ 2233 t = getsval(x); 2234 res = copycell(x); /* target string - initially copy of source */ 2235 res->csub = CTEMP; /* result values are temporary */ 2236 if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ 2237 pfa = (fa *) a[1]; /* regular expression */ 2238 else { 2239 y = execute(a[1]); 2240 pfa = makedfa(getsval(y), 1); 2241 tempfree(y); 2242 } 2243 y = execute(a[2]); /* replacement string */ 2244 h = execute(a[3]); /* which matches should be replaced */ 2245 sptr = getsval(h); 2246 if (sptr[0] == 'g' || sptr[0] == 'G') 2247 whichm = -1; 2248 else { 2249 /* 2250 * The specified number is index of replacement, starting 2251 * from 1. GNU awk treats index lower than 0 same as 2252 * 1, we do same for compatibility. 2253 */ 2254 whichm = (int) getfval(h) - 1; 2255 if (whichm < 0) 2256 whichm = 0; 2257 } 2258 tempfree(h); 2259 2260 if (pmatch(pfa, t)) { 2261 char *sl; 2262 2263 tempstat = pfa->initstat; 2264 pfa->initstat = 2; 2265 pb = buf; 2266 rptr = getsval(y); 2267 /* 2268 * XXX if there are any backreferences in subst string, 2269 * complain now. 2270 */ 2271 for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { 2272 if (strchr("0123456789", sl[1])) { 2273 FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); 2274 } 2275 } 2276 2277 do { 2278 if (whichm >= 0 && whichm != num) { 2279 num++; 2280 adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); 2281 2282 /* copy the part of string up to and including 2283 * match to output buffer */ 2284 while (t < patbeg + patlen) 2285 *pb++ = *t++; 2286 continue; 2287 } 2288 2289 if (patlen == 0 && *patbeg != 0) { /* matched empty string */ 2290 if (mflag == 0) { /* can replace empty */ 2291 num++; 2292 sptr = rptr; 2293 while (*sptr != 0) { 2294 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2295 if (*sptr == '\\') { 2296 backsub(&pb, &sptr); 2297 } else if (*sptr == '&') { 2298 sptr++; 2299 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2300 for (q = patbeg; q < patbeg+patlen; ) 2301 *pb++ = *q++; 2302 } else 2303 *pb++ = *sptr++; 2304 } 2305 } 2306 if (*t == 0) /* at end */ 2307 goto done; 2308 adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); 2309 *pb++ = *t++; 2310 if (pb > buf + bufsz) /* BUG: not sure of this test */ 2311 FATAL("gensub result0 %.30s too big; can't happen", buf); 2312 mflag = 0; 2313 } 2314 else { /* matched nonempty string */ 2315 num++; 2316 sptr = t; 2317 adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); 2318 while (sptr < patbeg) 2319 *pb++ = *sptr++; 2320 sptr = rptr; 2321 while (*sptr != 0) { 2322 adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); 2323 if (*sptr == '\\') { 2324 backsub(&pb, &sptr); 2325 } else if (*sptr == '&') { 2326 sptr++; 2327 adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); 2328 for (q = patbeg; q < patbeg+patlen; ) 2329 *pb++ = *q++; 2330 } else 2331 *pb++ = *sptr++; 2332 } 2333 t = patbeg + patlen; 2334 if (patlen == 0 || *t == 0 || *(t-1) == 0) 2335 goto done; 2336 if (pb > buf + bufsz) 2337 FATAL("gensub result1 %.30s too big; can't happen", buf); 2338 mflag = 1; 2339 } 2340 } while (pmatch(pfa,t)); 2341 sptr = t; 2342 adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); 2343 while ((*pb++ = *sptr++) != 0) 2344 ; 2345 done: if (pb > buf + bufsz) 2346 FATAL("gensub result2 %.30s too big; can't happen", buf); 2347 *pb = '\0'; 2348 setsval(res, buf); 2349 pfa->initstat = tempstat; 2350 } 2351 tempfree(x); 2352 tempfree(y); 2353 free(buf); 2354 return(res); 2355 } 2356 2357 void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ 2358 { /* sptr[0] == '\\' */ 2359 char *pb = *pb_ptr; 2360 const char *sptr = *sptr_ptr; 2361 2362 if (sptr[1] == '\\') { 2363 if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ 2364 *pb++ = '\\'; 2365 *pb++ = '&'; 2366 sptr += 4; 2367 } else if (sptr[2] == '&') { /* \\& -> \ + matched */ 2368 *pb++ = '\\'; 2369 sptr += 2; 2370 } else if (do_posix) { /* \\x -> \x */ 2371 sptr++; 2372 *pb++ = *sptr++; 2373 } else { /* \\x -> \\x */ 2374 *pb++ = *sptr++; 2375 *pb++ = *sptr++; 2376 } 2377 } else if (sptr[1] == '&') { /* literal & */ 2378 sptr++; 2379 *pb++ = *sptr++; 2380 } else /* literal \ */ 2381 *pb++ = *sptr++; 2382 2383 *pb_ptr = pb; 2384 *sptr_ptr = sptr; 2385 } 2386