1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #define DEBUG 26 #include <stdio.h> 27 #include <math.h> 28 #include <ctype.h> 29 #include <string.h> 30 #include <stdlib.h> 31 #include "awk.h" 32 #include "ytab.h" 33 34 #define FULLTAB 2 /* rehash when table gets this x full */ 35 #define GROWTAB 4 /* grow table by this factor */ 36 37 Array *symtab; /* main symbol table */ 38 39 char **FS; /* initial field sep */ 40 char **RS; /* initial record sep */ 41 char **OFS; /* output field sep */ 42 char **ORS; /* output record sep */ 43 char **OFMT; /* output format for numbers */ 44 char **CONVFMT; /* format for conversions in getsval */ 45 Awkfloat *NF; /* number of fields in current record */ 46 Awkfloat *NR; /* number of current record */ 47 Awkfloat *FNR; /* number of current record in current file */ 48 char **FILENAME; /* current filename argument */ 49 Awkfloat *ARGC; /* number of arguments from command line */ 50 char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ 51 Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ 52 Awkfloat *RLENGTH; /* length of same */ 53 54 Cell *fsloc; /* FS */ 55 Cell *nrloc; /* NR */ 56 Cell *nfloc; /* NF */ 57 Cell *fnrloc; /* FNR */ 58 Cell *ofsloc; /* OFS */ 59 Cell *orsloc; /* ORS */ 60 Cell *rsloc; /* RS */ 61 Array *ARGVtab; /* symbol table containing ARGV[...] */ 62 Array *ENVtab; /* symbol table containing ENVIRON[...] */ 63 Cell *rstartloc; /* RSTART */ 64 Cell *rlengthloc; /* RLENGTH */ 65 Cell *subseploc; /* SUBSEP */ 66 Cell *symtabloc; /* SYMTAB */ 67 68 Cell *nullloc; /* a guaranteed empty cell */ 69 Node *nullnode; /* zero&null, converted into a node for comparisons */ 70 Cell *literal0; 71 72 extern Cell **fldtab; 73 74 static void 75 setfree(Cell *vp) 76 { 77 if (&vp->sval == FS || &vp->sval == RS || 78 &vp->sval == OFS || &vp->sval == ORS || 79 &vp->sval == OFMT || &vp->sval == CONVFMT || 80 &vp->sval == FILENAME || &vp->sval == SUBSEP) 81 vp->tval |= DONTFREE; 82 else 83 vp->tval &= ~DONTFREE; 84 } 85 86 void syminit(void) /* initialize symbol table with builtin vars */ 87 { 88 literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab); 89 /* this is used for if(x)... tests: */ 90 nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab); 91 nullnode = celltonode(nullloc, CCON); 92 93 fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); 94 FS = &fsloc->sval; 95 rsloc = setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab); 96 RS = &rsloc->sval; 97 ofsloc = setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab); 98 OFS = &ofsloc->sval; 99 orsloc = setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab); 100 ORS = &orsloc->sval; 101 OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; 102 CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; 103 FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval; 104 nfloc = setsymtab("NF", "", 0.0, NUM, symtab); 105 NF = &nfloc->fval; 106 nrloc = setsymtab("NR", "", 0.0, NUM, symtab); 107 NR = &nrloc->fval; 108 fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); 109 FNR = &fnrloc->fval; 110 subseploc = setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab); 111 SUBSEP = &subseploc->sval; 112 rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); 113 RSTART = &rstartloc->fval; 114 rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); 115 RLENGTH = &rlengthloc->fval; 116 symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab); 117 free(symtabloc->sval); 118 symtabloc->sval = (char *) symtab; 119 } 120 121 void arginit(int ac, char **av) /* set up ARGV and ARGC */ 122 { 123 Cell *cp; 124 int i; 125 char temp[50]; 126 127 ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; 128 cp = setsymtab("ARGV", "", 0.0, ARR, symtab); 129 ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ 130 free(cp->sval); 131 cp->sval = (char *) ARGVtab; 132 for (i = 0; i < ac; i++) { 133 sprintf(temp, "%d", i); 134 if (is_number(*av)) 135 setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab); 136 else 137 setsymtab(temp, *av, 0.0, STR, ARGVtab); 138 av++; 139 } 140 } 141 142 void envinit(char **envp) /* set up ENVIRON variable */ 143 { 144 Cell *cp; 145 char *p; 146 147 cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab); 148 ENVtab = makesymtab(NSYMTAB); 149 free(cp->sval); 150 cp->sval = (char *) ENVtab; 151 for ( ; *envp; envp++) { 152 if ((p = strchr(*envp, '=')) == NULL) 153 continue; 154 if( p == *envp ) /* no left hand side name in env string */ 155 continue; 156 *p++ = 0; /* split into two strings at = */ 157 if (is_number(p)) 158 setsymtab(*envp, p, atof(p), STR|NUM, ENVtab); 159 else 160 setsymtab(*envp, p, 0.0, STR, ENVtab); 161 p[-1] = '='; /* restore in case env is passed down to a shell */ 162 } 163 } 164 165 Array *makesymtab(int n) /* make a new symbol table */ 166 { 167 Array *ap; 168 Cell **tp; 169 170 ap = malloc(sizeof(*ap)); 171 tp = calloc(n, sizeof(*tp)); 172 if (ap == NULL || tp == NULL) 173 FATAL("out of space in makesymtab"); 174 ap->nelem = 0; 175 ap->size = n; 176 ap->tab = tp; 177 return(ap); 178 } 179 180 void freesymtab(Cell *ap) /* free a symbol table */ 181 { 182 Cell *cp, *temp; 183 Array *tp; 184 int i; 185 186 if (!isarr(ap)) 187 return; 188 tp = (Array *) ap->sval; 189 if (tp == NULL) 190 return; 191 for (i = 0; i < tp->size; i++) { 192 for (cp = tp->tab[i]; cp != NULL; cp = temp) { 193 xfree(cp->nval); 194 if (freeable(cp)) 195 xfree(cp->sval); 196 temp = cp->cnext; /* avoids freeing then using */ 197 free(cp); 198 tp->nelem--; 199 } 200 tp->tab[i] = NULL; 201 } 202 if (tp->nelem != 0) 203 WARNING("can't happen: inconsistent element count freeing %s", ap->nval); 204 free(tp->tab); 205 free(tp); 206 } 207 208 void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */ 209 { 210 Array *tp; 211 Cell *p, *prev = NULL; 212 int h; 213 214 tp = (Array *) ap->sval; 215 h = hash(s, tp->size); 216 for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) 217 if (strcmp(s, p->nval) == 0) { 218 if (prev == NULL) /* 1st one */ 219 tp->tab[h] = p->cnext; 220 else /* middle somewhere */ 221 prev->cnext = p->cnext; 222 if (freeable(p)) 223 xfree(p->sval); 224 free(p->nval); 225 free(p); 226 tp->nelem--; 227 return; 228 } 229 } 230 231 Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp) 232 { 233 int h; 234 Cell *p; 235 236 if (n != NULL && (p = lookup(n, tp)) != NULL) { 237 DPRINTF("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", 238 (void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval); 239 return(p); 240 } 241 p = malloc(sizeof(*p)); 242 if (p == NULL) 243 FATAL("out of space for symbol table at %s", n); 244 p->nval = tostring(n); 245 p->sval = s ? tostring(s) : tostring(""); 246 p->fval = f; 247 p->tval = t; 248 p->csub = CUNK; 249 p->ctype = OCELL; 250 tp->nelem++; 251 if (tp->nelem > FULLTAB * tp->size) 252 rehash(tp); 253 h = hash(n, tp->size); 254 p->cnext = tp->tab[h]; 255 tp->tab[h] = p; 256 DPRINTF("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", 257 (void*)p, p->nval, p->sval, p->fval, p->tval); 258 return(p); 259 } 260 261 int hash(const char *s, int n) /* form hash value for string s */ 262 { 263 unsigned hashval; 264 265 for (hashval = 0; *s != '\0'; s++) 266 hashval = (*s + 31 * hashval); 267 return hashval % n; 268 } 269 270 void rehash(Array *tp) /* rehash items in small table into big one */ 271 { 272 int i, nh, nsz; 273 Cell *cp, *op, **np; 274 275 nsz = GROWTAB * tp->size; 276 np = calloc(nsz, sizeof(*np)); 277 if (np == NULL) /* can't do it, but can keep running. */ 278 return; /* someone else will run out later. */ 279 for (i = 0; i < tp->size; i++) { 280 for (cp = tp->tab[i]; cp; cp = op) { 281 op = cp->cnext; 282 nh = hash(cp->nval, nsz); 283 cp->cnext = np[nh]; 284 np[nh] = cp; 285 } 286 } 287 free(tp->tab); 288 tp->tab = np; 289 tp->size = nsz; 290 } 291 292 Cell *lookup(const char *s, Array *tp) /* look for s in tp */ 293 { 294 Cell *p; 295 int h; 296 297 h = hash(s, tp->size); 298 for (p = tp->tab[h]; p != NULL; p = p->cnext) 299 if (strcmp(s, p->nval) == 0) 300 return(p); /* found it */ 301 return(NULL); /* not found */ 302 } 303 304 Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ 305 { 306 int fldno; 307 308 f += 0.0; /* normalise negative zero to positive zero */ 309 if ((vp->tval & (NUM | STR)) == 0) 310 funnyvar(vp, "assign to"); 311 if (isfld(vp)) { 312 donerec = false; /* mark $0 invalid */ 313 fldno = atoi(vp->nval); 314 if (fldno > *NF) 315 newfld(fldno); 316 DPRINTF("setting field %d to %g\n", fldno, f); 317 } else if (&vp->fval == NF) { 318 donerec = false; /* mark $0 invalid */ 319 setlastfld(f); 320 DPRINTF("setting NF to %g\n", f); 321 } else if (isrec(vp)) { 322 donefld = false; /* mark $1... invalid */ 323 donerec = true; 324 savefs(); 325 } else if (vp == ofsloc) { 326 if (!donerec) 327 recbld(); 328 } 329 if (freeable(vp)) 330 xfree(vp->sval); /* free any previous string */ 331 vp->tval &= ~(STR|CONVC|CONVO); /* mark string invalid */ 332 vp->fmt = NULL; 333 vp->tval |= NUM; /* mark number ok */ 334 if (f == -0) /* who would have thought this possible? */ 335 f = 0; 336 DPRINTF("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval); 337 return vp->fval = f; 338 } 339 340 void funnyvar(Cell *vp, const char *rw) 341 { 342 if (isarr(vp)) 343 FATAL("can't %s %s; it's an array name.", rw, vp->nval); 344 if (vp->tval & FCN) 345 FATAL("can't %s %s; it's a function.", rw, vp->nval); 346 WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", 347 (void *)vp, vp->nval, vp->sval, vp->fval, vp->tval); 348 } 349 350 char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ 351 { 352 char *t; 353 int fldno; 354 Awkfloat f; 355 356 DPRINTF("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", 357 (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld); 358 if ((vp->tval & (NUM | STR)) == 0) 359 funnyvar(vp, "assign to"); 360 if (isfld(vp)) { 361 donerec = false; /* mark $0 invalid */ 362 fldno = atoi(vp->nval); 363 if (fldno > *NF) 364 newfld(fldno); 365 DPRINTF("setting field %d to %s (%p)\n", fldno, s, s); 366 } else if (isrec(vp)) { 367 donefld = false; /* mark $1... invalid */ 368 donerec = true; 369 savefs(); 370 } else if (vp == ofsloc) { 371 if (!donerec) 372 recbld(); 373 } 374 t = s ? tostring(s) : tostring(""); /* in case it's self-assign */ 375 if (freeable(vp)) 376 xfree(vp->sval); 377 vp->tval &= ~(NUM|CONVC|CONVO); 378 vp->tval |= STR; 379 vp->fmt = NULL; 380 setfree(vp); 381 DPRINTF("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", 382 (void*)vp, NN(vp->nval), t, t, vp->tval, donerec, donefld); 383 vp->sval = t; 384 if (&vp->fval == NF) { 385 donerec = false; /* mark $0 invalid */ 386 f = getfval(vp); 387 setlastfld(f); 388 DPRINTF("setting NF to %g\n", f); 389 } 390 391 return(vp->sval); 392 } 393 394 Awkfloat getfval(Cell *vp) /* get float val of a Cell */ 395 { 396 if ((vp->tval & (NUM | STR)) == 0) 397 funnyvar(vp, "read value of"); 398 if (isfld(vp) && !donefld) 399 fldbld(); 400 else if (isrec(vp) && !donerec) 401 recbld(); 402 if (!isnum(vp)) { /* not a number */ 403 vp->fval = atof(vp->sval); /* best guess */ 404 if (is_number(vp->sval) && !(vp->tval&CON)) 405 vp->tval |= NUM; /* make NUM only sparingly */ 406 } 407 DPRINTF("getfval %p: %s = %g, t=%o\n", 408 (void*)vp, NN(vp->nval), vp->fval, vp->tval); 409 return(vp->fval); 410 } 411 412 static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */ 413 { 414 char s[256]; 415 double dtemp; 416 417 if ((vp->tval & (NUM | STR)) == 0) 418 funnyvar(vp, "read value of"); 419 if (isfld(vp) && ! donefld) 420 fldbld(); 421 else if (isrec(vp) && ! donerec) 422 recbld(); 423 424 /* 425 * ADR: This is complicated and more fragile than is desirable. 426 * Retrieving a string value for a number associates the string 427 * value with the scalar. Previously, the string value was 428 * sticky, meaning if converted via OFMT that became the value 429 * (even though POSIX wants it to be via CONVFMT). Or if CONVFMT 430 * changed after a string value was retrieved, the original value 431 * was maintained and used. Also not per POSIX. 432 * 433 * We work around this design by adding two additional flags, 434 * CONVC and CONVO, indicating how the string value was 435 * obtained (via CONVFMT or OFMT) and _also_ maintaining a copy 436 * of the pointer to the xFMT format string used for the 437 * conversion. This pointer is only read, **never** dereferenced. 438 * The next time we do a conversion, if it's coming from the same 439 * xFMT as last time, and the pointer value is different, we 440 * know that the xFMT format string changed, and we need to 441 * redo the conversion. If it's the same, we don't have to. 442 * 443 * There are also several cases where we don't do a conversion, 444 * such as for a field (see the checks below). 445 */ 446 447 /* Don't duplicate the code for actually updating the value */ 448 #define update_str_val(vp) \ 449 { \ 450 if (freeable(vp)) \ 451 xfree(vp->sval); \ 452 if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ 453 snprintf(s, sizeof (s), "%.30g", vp->fval); \ 454 else \ 455 snprintf(s, sizeof (s), *fmt, vp->fval); \ 456 vp->sval = tostring(s); \ 457 vp->tval &= ~DONTFREE; \ 458 vp->tval |= STR; \ 459 } 460 461 if (isstr(vp) == 0) { 462 update_str_val(vp); 463 if (fmt == OFMT) { 464 vp->tval &= ~CONVC; 465 vp->tval |= CONVO; 466 } else { 467 /* CONVFMT */ 468 vp->tval &= ~CONVO; 469 vp->tval |= CONVC; 470 } 471 vp->fmt = *fmt; 472 } else if ((vp->tval & DONTFREE) != 0 || ! isnum(vp) || isfld(vp)) { 473 goto done; 474 } else if (isstr(vp)) { 475 if (fmt == OFMT) { 476 if ((vp->tval & CONVC) != 0 477 || ((vp->tval & CONVO) != 0 && vp->fmt != *fmt)) { 478 update_str_val(vp); 479 vp->tval &= ~CONVC; 480 vp->tval |= CONVO; 481 vp->fmt = *fmt; 482 } 483 } else { 484 /* CONVFMT */ 485 if ((vp->tval & CONVO) != 0 486 || ((vp->tval & CONVC) != 0 && vp->fmt != *fmt)) { 487 update_str_val(vp); 488 vp->tval &= ~CONVO; 489 vp->tval |= CONVC; 490 vp->fmt = *fmt; 491 } 492 } 493 } 494 done: 495 DPRINTF("getsval %p: %s = \"%s (%p)\", t=%o\n", 496 (void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval); 497 return(vp->sval); 498 } 499 500 char *getsval(Cell *vp) /* get string val of a Cell */ 501 { 502 return get_str_val(vp, CONVFMT); 503 } 504 505 char *getpssval(Cell *vp) /* get string val of a Cell for print */ 506 { 507 return get_str_val(vp, OFMT); 508 } 509 510 511 char *tostring(const char *s) /* make a copy of string s */ 512 { 513 char *p = strdup(s); 514 if (p == NULL) 515 FATAL("out of space in tostring on %s", s); 516 return(p); 517 } 518 519 char *tostringN(const char *s, size_t n) /* make a copy of string s */ 520 { 521 char *p; 522 523 p = malloc(n); 524 if (p == NULL) 525 FATAL("out of space in tostring on %s", s); 526 strcpy(p, s); 527 return(p); 528 } 529 530 Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */ 531 { 532 Cell *c; 533 char *p; 534 char *sa = getsval(a); 535 char *sb = getsval(b); 536 size_t l = strlen(sa) + strlen(sb) + 1; 537 p = malloc(l); 538 if (p == NULL) 539 FATAL("out of space concatenating %s and %s", sa, sb); 540 snprintf(p, l, "%s%s", sa, sb); 541 542 l++; // add room for ' ' 543 char *newbuf = malloc(l); 544 if (newbuf == NULL) 545 FATAL("out of space concatenating %s and %s", sa, sb); 546 // See string() in lex.c; a string "xx" is stored in the symbol 547 // table as "xx ". 548 snprintf(newbuf, l, "%s ", p); 549 c = setsymtab(newbuf, p, 0.0, CON|STR|DONTFREE, symtab); 550 free(p); 551 free(newbuf); 552 return c; 553 } 554 555 char *qstring(const char *is, int delim) /* collect string up to next delim */ 556 { 557 const char *os = is; 558 int c, n; 559 const uschar *s = (const uschar *) is; 560 uschar *buf, *bp; 561 562 if ((buf = malloc(strlen(is)+3)) == NULL) 563 FATAL( "out of space in qstring(%s)", s); 564 for (bp = buf; (c = *s) != delim; s++) { 565 if (c == '\n') 566 SYNTAX( "newline in string %.20s...", os ); 567 else if (c != '\\') 568 *bp++ = c; 569 else { /* \something */ 570 c = *++s; 571 if (c == 0) { /* \ at end */ 572 *bp++ = '\\'; 573 break; /* for loop */ 574 } 575 switch (c) { 576 case '\\': *bp++ = '\\'; break; 577 case 'n': *bp++ = '\n'; break; 578 case 't': *bp++ = '\t'; break; 579 case 'b': *bp++ = '\b'; break; 580 case 'f': *bp++ = '\f'; break; 581 case 'r': *bp++ = '\r'; break; 582 case 'v': *bp++ = '\v'; break; 583 case 'a': *bp++ = '\a'; break; 584 default: 585 if (!isdigit(c)) { 586 *bp++ = c; 587 break; 588 } 589 n = c - '0'; 590 if (isdigit(s[1])) { 591 n = 8 * n + *++s - '0'; 592 if (isdigit(s[1])) 593 n = 8 * n + *++s - '0'; 594 } 595 *bp++ = n; 596 break; 597 } 598 } 599 } 600 *bp++ = 0; 601 return (char *) buf; 602 } 603 604 const char *flags2str(int flags) 605 { 606 static const struct ftab { 607 const char *name; 608 int value; 609 } flagtab[] = { 610 { "NUM", NUM }, 611 { "STR", STR }, 612 { "DONTFREE", DONTFREE }, 613 { "CON", CON }, 614 { "ARR", ARR }, 615 { "FCN", FCN }, 616 { "FLD", FLD }, 617 { "REC", REC }, 618 { "CONVC", CONVC }, 619 { "CONVO", CONVO }, 620 { NULL, 0 } 621 }; 622 static char buf[100]; 623 int i; 624 char *cp = buf; 625 626 for (i = 0; flagtab[i].name != NULL; i++) { 627 if ((flags & flagtab[i].value) != 0) { 628 if (cp > buf) 629 *cp++ = '|'; 630 strcpy(cp, flagtab[i].name); 631 cp += strlen(cp); 632 } 633 } 634 635 return buf; 636 } 637