1 /* 2 * Copyright (c) 1980 Regents of the University of California. 3 * All rights reserved. The Berkeley Software License Agreement 4 * specifies the terms and conditions for redistribution. 5 */ 6 7 #ifndef lint 8 static char *sccsid = "@(#)dol.c 5.5 (Berkeley) 01/15/88"; 9 #endif 10 11 #include "sh.h" 12 13 /* 14 * C shell 15 */ 16 17 /* 18 * These routines perform variable substitution and quoting via ' and ". 19 * To this point these constructs have been preserved in the divided 20 * input words. Here we expand variables and turn quoting via ' and " into 21 * QUOTE bits on characters (which prevent further interpretation). 22 * If the `:q' modifier was applied during history expansion, then 23 * some QUOTEing may have occurred already, so we dont "trim()" here. 24 */ 25 26 int Dpeekc, Dpeekrd; /* Peeks for DgetC and Dreadc */ 27 char *Dcp, **Dvp; /* Input vector for Dreadc */ 28 29 #define DEOF -1 30 31 #define unDgetC(c) Dpeekc = c 32 33 #define QUOTES (_Q|_Q1|_ESC) /* \ ' " ` */ 34 35 /* 36 * The following variables give the information about the current 37 * $ expansion, recording the current word position, the remaining 38 * words within this expansion, the count of remaining words, and the 39 * information about any : modifier which is being applied. 40 */ 41 char *dolp; /* Remaining chars from this word */ 42 char **dolnxt; /* Further words */ 43 int dolcnt; /* Count of further words */ 44 char dolmod; /* : modifier character */ 45 int dolmcnt; /* :gx -> 10000, else 1 */ 46 47 /* 48 * Fix up the $ expansions and quotations in the 49 * argument list to command t. 50 */ 51 Dfix(t) 52 register struct command *t; 53 { 54 register char **pp; 55 register char *p; 56 57 if (noexec) 58 return; 59 /* Note that t_dcom isn't trimmed thus !...:q's aren't lost */ 60 for (pp = t->t_dcom; p = *pp++;) 61 while (*p) 62 if (cmap(*p++, _DOL|QUOTES)) { /* $, \, ', ", ` */ 63 Dfix2(t->t_dcom); /* found one */ 64 blkfree(t->t_dcom); 65 t->t_dcom = gargv; 66 gargv = 0; 67 return; 68 } 69 } 70 71 /* 72 * $ substitute one word, for i/o redirection 73 */ 74 char * 75 Dfix1(cp) 76 register char *cp; 77 { 78 char *Dv[2]; 79 80 if (noexec) 81 return (0); 82 Dv[0] = cp; Dv[1] = NOSTR; 83 Dfix2(Dv); 84 if (gargc != 1) { 85 setname(cp); 86 bferr("Ambiguous"); 87 } 88 cp = savestr(gargv[0]); 89 blkfree(gargv), gargv = 0; 90 return (cp); 91 } 92 93 /* 94 * Subroutine to do actual fixing after state initialization. 95 */ 96 Dfix2(v) 97 char **v; 98 { 99 char *agargv[GAVSIZ]; 100 101 ginit(agargv); /* Initialize glob's area pointers */ 102 Dvp = v; Dcp = ""; /* Setup input vector for Dreadc */ 103 unDgetC(0); unDredc(0); /* Clear out any old peeks (at error) */ 104 dolp = 0; dolcnt = 0; /* Clear out residual $ expands (...) */ 105 while (Dword()) 106 continue; 107 gargv = copyblk(gargv); 108 } 109 110 /* 111 * Get a word. This routine is analogous to the routine 112 * word() in sh.lex.c for the main lexical input. One difference 113 * here is that we don't get a newline to terminate our expansion. 114 * Rather, DgetC will return a DEOF when we hit the end-of-input. 115 */ 116 Dword() 117 { 118 register int c, c1; 119 char wbuf[BUFSIZ]; 120 register char *wp = wbuf; 121 register int i = BUFSIZ - 4; 122 register bool dolflg; 123 bool sofar = 0; 124 125 loop: 126 c = DgetC(DODOL); 127 switch (c) { 128 129 case DEOF: 130 deof: 131 if (sofar == 0) 132 return (0); 133 /* finish this word and catch the code above the next time */ 134 unDredc(c); 135 /* fall into ... */ 136 137 case '\n': 138 *wp = 0; 139 goto ret; 140 141 case ' ': 142 case '\t': 143 goto loop; 144 145 case '`': 146 /* We preserve ` quotations which are done yet later */ 147 *wp++ = c, --i; 148 case '\'': 149 case '"': 150 /* 151 * Note that DgetC never returns a QUOTES character 152 * from an expansion, so only true input quotes will 153 * get us here or out. 154 */ 155 c1 = c; 156 dolflg = c1 == '"' ? DODOL : 0; 157 for (;;) { 158 c = DgetC(dolflg); 159 if (c == c1) 160 break; 161 if (c == '\n' || c == DEOF) 162 error("Unmatched %c", c1); 163 if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE)) 164 --wp, ++i; 165 if (--i <= 0) 166 goto toochars; 167 switch (c1) { 168 169 case '"': 170 /* 171 * Leave any `s alone for later. 172 * Other chars are all quoted, thus `...` 173 * can tell it was within "...". 174 */ 175 *wp++ = c == '`' ? '`' : c | QUOTE; 176 break; 177 178 case '\'': 179 /* Prevent all further interpretation */ 180 *wp++ = c | QUOTE; 181 break; 182 183 case '`': 184 /* Leave all text alone for later */ 185 *wp++ = c; 186 break; 187 } 188 } 189 if (c1 == '`') 190 *wp++ = '`', --i; 191 goto pack; /* continue the word */ 192 193 case '\\': 194 c = DgetC(0); /* No $ subst! */ 195 if (c == '\n' || c == DEOF) 196 goto loop; 197 c |= QUOTE; 198 break; 199 } 200 unDgetC(c); 201 pack: 202 sofar = 1; 203 /* pack up more characters in this word */ 204 for (;;) { 205 c = DgetC(DODOL); 206 if (c == '\\') { 207 c = DgetC(0); 208 if (c == DEOF) 209 goto deof; 210 if (c == '\n') 211 c = ' '; 212 else 213 c |= QUOTE; 214 } 215 if (c == DEOF) 216 goto deof; 217 if (cmap(c, _SP|_NL|_Q|_Q1)) { /* sp \t\n'"` */ 218 unDgetC(c); 219 if (cmap(c, QUOTES)) 220 goto loop; 221 *wp++ = 0; 222 goto ret; 223 } 224 if (--i <= 0) 225 toochars: 226 error("Word too long"); 227 *wp++ = c; 228 } 229 ret: 230 Gcat("", wbuf); 231 return (1); 232 } 233 234 /* 235 * Get a character, performing $ substitution unless flag is 0. 236 * Any QUOTES character which is returned from a $ expansion is 237 * QUOTEd so that it will not be recognized above. 238 */ 239 DgetC(flag) 240 register int flag; 241 { 242 register int c; 243 244 top: 245 if (c = Dpeekc) { 246 Dpeekc = 0; 247 return (c); 248 } 249 if (lap) { 250 c = *lap++ & (QUOTE|TRIM); 251 if (c == 0) { 252 lap = 0; 253 goto top; 254 } 255 quotspec: 256 if (cmap(c, QUOTES)) 257 return (c | QUOTE); 258 return (c); 259 } 260 if (dolp) { 261 if (c = *dolp++ & (QUOTE|TRIM)) 262 goto quotspec; 263 if (dolcnt > 0) { 264 setDolp(*dolnxt++); 265 --dolcnt; 266 return (' '); 267 } 268 dolp = 0; 269 } 270 if (dolcnt > 0) { 271 setDolp(*dolnxt++); 272 --dolcnt; 273 goto top; 274 } 275 c = Dredc(); 276 if (c == '$' && flag) { 277 Dgetdol(); 278 goto top; 279 } 280 return (c); 281 } 282 283 char *nulvec[] = { 0 }; 284 struct varent nulargv = { nulvec, "argv", 0 }; 285 286 /* 287 * Handle the multitudinous $ expansion forms. 288 * Ugh. 289 */ 290 Dgetdol() 291 { 292 register char *np; 293 register struct varent *vp; 294 char name[20]; 295 int c, sc; 296 int subscr = 0, lwb = 1, upb = 0; 297 bool dimen = 0, bitset = 0; 298 char wbuf[BUFSIZ]; 299 300 dolmod = dolmcnt = 0; 301 c = sc = DgetC(0); 302 if (c == '{') 303 c = DgetC(0); /* sc is { to take } later */ 304 if ((c & TRIM) == '#') 305 dimen++, c = DgetC(0); /* $# takes dimension */ 306 else if (c == '?') 307 bitset++, c = DgetC(0); /* $? tests existence */ 308 switch (c) { 309 310 case '$': 311 if (dimen || bitset) 312 goto syntax; /* No $?$, $#$ */ 313 setDolp(doldol); 314 goto eatbrac; 315 316 case '<'|QUOTE: 317 if (dimen || bitset) 318 goto syntax; /* No $?<, $#< */ 319 for (np = wbuf; read(OLDSTD, np, 1) == 1; np++) { 320 if (np >= &wbuf[BUFSIZ-1]) 321 error("$< line too long"); 322 if (*np <= 0 || *np == '\n') 323 break; 324 } 325 *np = 0; 326 /* 327 * KLUDGE: dolmod is set here because it will 328 * cause setDolp to call domod and thus to copy wbuf. 329 * Otherwise setDolp would use it directly. If we saved 330 * it ourselves, no one would know when to free it. 331 * The actual function of the 'q' causes filename 332 * expansion not to be done on the interpolated value. 333 */ 334 dolmod = 'q'; 335 dolmcnt = 10000; 336 setDolp(wbuf); 337 goto eatbrac; 338 339 case DEOF: 340 case '\n': 341 goto syntax; 342 343 case '*': 344 (void) strcpy(name, "argv"); 345 vp = adrof("argv"); 346 subscr = -1; /* Prevent eating [...] */ 347 break; 348 349 default: 350 np = name; 351 if (digit(c)) { 352 if (dimen) 353 goto syntax; /* No $#1, e.g. */ 354 subscr = 0; 355 do { 356 subscr = subscr * 10 + c - '0'; 357 c = DgetC(0); 358 } while (digit(c)); 359 unDredc(c); 360 if (subscr < 0) 361 goto oob; 362 if (subscr == 0) { 363 if (bitset) { 364 dolp = file ? "1" : "0"; 365 goto eatbrac; 366 } 367 if (file == 0) 368 error("No file for $0"); 369 setDolp(file); 370 goto eatbrac; 371 } 372 if (bitset) 373 goto syntax; 374 vp = adrof("argv"); 375 if (vp == 0) { 376 vp = &nulargv; 377 goto eatmod; 378 } 379 break; 380 } 381 if (!alnum(c)) 382 goto syntax; 383 for (;;) { 384 *np++ = c; 385 c = DgetC(0); 386 if (!alnum(c)) 387 break; 388 if (np >= &name[sizeof name - 2]) 389 syntax: 390 error("Variable syntax"); 391 } 392 *np++ = 0; 393 unDredc(c); 394 vp = adrof(name); 395 } 396 if (bitset) { 397 dolp = (vp || getenv(name)) ? "1" : "0"; 398 goto eatbrac; 399 } 400 if (vp == 0) { 401 np = getenv(name); 402 if (np) { 403 addla(np); 404 goto eatbrac; 405 } 406 udvar(name); 407 /*NOTREACHED*/ 408 } 409 c = DgetC(0); 410 upb = blklen(vp->vec); 411 if (dimen == 0 && subscr == 0 && c == '[') { 412 np = name; 413 for (;;) { 414 c = DgetC(DODOL); /* Allow $ expand within [ ] */ 415 if (c == ']') 416 break; 417 if (c == '\n' || c == DEOF) 418 goto syntax; 419 if (np >= &name[sizeof name - 2]) 420 goto syntax; 421 *np++ = c; 422 } 423 *np = 0, np = name; 424 if (dolp || dolcnt) /* $ exp must end before ] */ 425 goto syntax; 426 if (!*np) 427 goto syntax; 428 if (digit(*np)) { 429 register int i = 0; 430 431 while (digit(*np)) 432 i = i * 10 + *np++ - '0'; 433 if ((i < 0 || i > upb) && !any(*np, "-*")) { 434 oob: 435 setname(vp->v_name); 436 error("Subscript out of range"); 437 } 438 lwb = i; 439 if (!*np) 440 upb = lwb, np = "*"; 441 } 442 if (*np == '*') 443 np++; 444 else if (*np != '-') 445 goto syntax; 446 else { 447 register int i = upb; 448 449 np++; 450 if (digit(*np)) { 451 i = 0; 452 while (digit(*np)) 453 i = i * 10 + *np++ - '0'; 454 if (i < 0 || i > upb) 455 goto oob; 456 } 457 if (i < lwb) 458 upb = lwb - 1; 459 else 460 upb = i; 461 } 462 if (lwb == 0) { 463 if (upb != 0) 464 goto oob; 465 upb = -1; 466 } 467 if (*np) 468 goto syntax; 469 } else { 470 if (subscr > 0) 471 if (subscr > upb) 472 lwb = 1, upb = 0; 473 else 474 lwb = upb = subscr; 475 unDredc(c); 476 } 477 if (dimen) { 478 char *cp = putn(upb - lwb + 1); 479 480 addla(cp); 481 xfree(cp); 482 } else { 483 eatmod: 484 c = DgetC(0); 485 if (c == ':') { 486 c = DgetC(0), dolmcnt = 1; 487 if (c == 'g') 488 c = DgetC(0), dolmcnt = 10000; 489 if (!any(c, "htrqxe")) 490 error("Bad : mod in $"); 491 dolmod = c; 492 if (c == 'q') 493 dolmcnt = 10000; 494 } else 495 unDredc(c); 496 dolnxt = &vp->vec[lwb - 1]; 497 dolcnt = upb - lwb + 1; 498 } 499 eatbrac: 500 if (sc == '{') { 501 c = Dredc(); 502 if (c != '}') 503 goto syntax; 504 } 505 } 506 507 setDolp(cp) 508 register char *cp; 509 { 510 register char *dp; 511 512 if (dolmod == 0 || dolmcnt == 0) { 513 dolp = cp; 514 return; 515 } 516 dp = domod(cp, dolmod); 517 if (dp) { 518 dolmcnt--; 519 addla(dp); 520 xfree(dp); 521 } else 522 addla(cp); 523 dolp = ""; 524 if (err) 525 error(err); 526 } 527 528 unDredc(c) 529 int c; 530 { 531 532 Dpeekrd = c; 533 } 534 535 Dredc() 536 { 537 register int c; 538 539 if (c = Dpeekrd) { 540 Dpeekrd = 0; 541 return (c); 542 } 543 if (Dcp && (c = *Dcp++)) 544 return (c&(QUOTE|TRIM)); 545 if (*Dvp == 0) { 546 Dcp = 0; 547 return (DEOF); 548 } 549 Dcp = *Dvp++; 550 return (' '); 551 } 552 553 Dtestq(c) 554 register int c; 555 { 556 557 if (cmap(c, QUOTES)) 558 gflag = 1; 559 } 560 561 /* 562 * Form a shell temporary file (in unit 0) from the words 563 * of the shell input up to EOF or a line the same as "term". 564 * Unit 0 should have been closed before this call. 565 */ 566 heredoc(term) 567 char *term; 568 { 569 register int c; 570 char *Dv[2]; 571 char obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ]; 572 int ocnt, lcnt, mcnt; 573 register char *lbp, *obp, *mbp; 574 char **vp; 575 bool quoted; 576 577 if (creat(shtemp, 0600) < 0) 578 Perror(shtemp); 579 (void) close(0); 580 if (open(shtemp, 2) < 0) { 581 int oerrno = errno; 582 583 (void) unlink(shtemp); 584 errno = oerrno; 585 Perror(shtemp); 586 } 587 (void) unlink(shtemp); /* 0 0 inode! */ 588 Dv[0] = term; Dv[1] = NOSTR; gflag = 0; 589 trim(Dv); rscan(Dv, Dtestq); quoted = gflag; 590 ocnt = BUFSIZ; obp = obuf; 591 for (;;) { 592 /* 593 * Read up a line 594 */ 595 lbp = lbuf; lcnt = BUFSIZ - 4; 596 for (;;) { 597 c = readc(1); /* 1 -> Want EOF returns */ 598 if (c < 0 || c == '\n') 599 break; 600 if (c &= TRIM) { 601 *lbp++ = c; 602 if (--lcnt < 0) { 603 setname("<<"); 604 error("Line overflow"); 605 } 606 } 607 } 608 *lbp = 0; 609 610 /* 611 * Check for EOF or compare to terminator -- before expansion 612 */ 613 if (c < 0 || eq(lbuf, term)) { 614 (void) write(0, obuf, BUFSIZ - ocnt); 615 (void) lseek(0, (off_t)0, 0); 616 return; 617 } 618 619 /* 620 * If term was quoted or -n just pass it on 621 */ 622 if (quoted || noexec) { 623 *lbp++ = '\n'; *lbp = 0; 624 for (lbp = lbuf; c = *lbp++;) { 625 *obp++ = c; 626 if (--ocnt == 0) { 627 (void) write(0, obuf, BUFSIZ); 628 obp = obuf; ocnt = BUFSIZ; 629 } 630 } 631 continue; 632 } 633 634 /* 635 * Term wasn't quoted so variable and then command 636 * expand the input line 637 */ 638 Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4; 639 for (;;) { 640 c = DgetC(DODOL); 641 if (c == DEOF) 642 break; 643 if ((c &= TRIM) == 0) 644 continue; 645 /* \ quotes \ $ ` here */ 646 if (c =='\\') { 647 c = DgetC(0); 648 if (!any(c, "$\\`")) 649 unDgetC(c | QUOTE), c = '\\'; 650 else 651 c |= QUOTE; 652 } 653 *mbp++ = c; 654 if (--mcnt == 0) { 655 setname("<<"); 656 bferr("Line overflow"); 657 } 658 } 659 *mbp++ = 0; 660 661 /* 662 * If any ` in line do command substitution 663 */ 664 mbp = mbuf; 665 if (any('`', mbp)) { 666 /* 667 * 1 arg to dobackp causes substitution to be literal. 668 * Words are broken only at newlines so that all blanks 669 * and tabs are preserved. Blank lines (null words) 670 * are not discarded. 671 */ 672 vp = dobackp(mbuf, 1); 673 } else 674 /* Setup trivial vector similar to return of dobackp */ 675 Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv; 676 677 /* 678 * Resurrect the words from the command substitution 679 * each separated by a newline. Note that the last 680 * newline of a command substitution will have been 681 * discarded, but we put a newline after the last word 682 * because this represents the newline after the last 683 * input line! 684 */ 685 for (; *vp; vp++) { 686 for (mbp = *vp; *mbp; mbp++) { 687 *obp++ = *mbp & TRIM; 688 if (--ocnt == 0) { 689 (void) write(0, obuf, BUFSIZ); 690 obp = obuf; ocnt = BUFSIZ; 691 } 692 } 693 *obp++ = '\n'; 694 if (--ocnt == 0) { 695 (void) write(0, obuf, BUFSIZ); 696 obp = obuf; ocnt = BUFSIZ; 697 } 698 } 699 if (pargv) 700 blkfree(pargv), pargv = 0; 701 } 702 } 703