1 /* $NetBSD: lex.c,v 1.7 1999/10/20 15:49:15 hubertf Exp $ */ 2 3 /* 4 * lexical analysis and source input 5 */ 6 7 #include "sh.h" 8 #include <ctype.h> 9 10 11 /* Structure to keep track of the lexing state and the various pieces of info 12 * needed for each particular state. 13 */ 14 typedef struct lex_state Lex_state; 15 struct lex_state { 16 int ls_state; 17 union { 18 /* $(...) */ 19 struct scsparen_info { 20 int nparen; /* count open parenthesis */ 21 int csstate; /* XXX remove */ 22 #define ls_scsparen ls_info.u_scsparen 23 } u_scsparen; 24 25 /* $((...)) */ 26 struct sasparen_info { 27 int nparen; /* count open parenthesis */ 28 int start; /* marks start of $(( in output str */ 29 #define ls_sasparen ls_info.u_sasparen 30 } u_sasparen; 31 32 /* ((...)) */ 33 struct sletparen_info { 34 int nparen; /* count open parenthesis */ 35 #define ls_sletparen ls_info.u_sletparen 36 } u_sletparen; 37 38 /* `...` */ 39 struct sbquote_info { 40 int indquotes; /* true if in double quotes: "`...`" */ 41 #define ls_sbquote ls_info.u_sbquote 42 } u_sbquote; 43 44 Lex_state *base; /* used to point to next state block */ 45 } ls_info; 46 }; 47 48 typedef struct State_info State_info; 49 struct State_info { 50 Lex_state *base; 51 Lex_state *end; 52 }; 53 54 55 static void readhere ARGS((struct ioword *iop)); 56 static int getsc__ ARGS((void)); 57 static void getsc_line ARGS((Source *s)); 58 static int getsc_bn ARGS((void)); 59 static char *get_brace_var ARGS((XString *wsp, char *wp)); 60 static int arraysub ARGS((char **strp)); 61 static const char *ungetsc ARGS((int c)); 62 static void gethere ARGS((void)); 63 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end)); 64 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end)); 65 66 static int backslash_skip; 67 static int ignore_backslash_newline; 68 69 /* optimized getsc_bn() */ 70 #define getsc() (*source->str != '\0' && *source->str != '\\' \ 71 && !backslash_skip ? *source->str++ : getsc_bn()) 72 /* optimized getsc__() */ 73 #define getsc_() ((*source->str != '\0') ? *source->str++ : getsc__()) 74 75 #define STATE_BSIZE 32 76 77 #define PUSH_STATE(s) do { \ 78 if (++statep == state_info.end) \ 79 statep = push_state_(&state_info, statep); \ 80 state = statep->ls_state = (s); \ 81 } while (0) 82 83 #define POP_STATE() do { \ 84 if (--statep == state_info.base) \ 85 statep = pop_state_(&state_info, statep); \ 86 state = statep->ls_state; \ 87 } while (0) 88 89 90 91 /* 92 * Lexical analyzer 93 * 94 * tokens are not regular expressions, they are LL(1). 95 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))". 96 * hence the state stack. 97 */ 98 99 int 100 yylex(cf) 101 int cf; 102 { 103 Lex_state states[STATE_BSIZE], *statep; 104 State_info state_info; 105 register int c, state; 106 XString ws; /* expandable output word */ 107 register char *wp; /* output word pointer */ 108 char *sp, *dp; 109 int c2; 110 111 112 Again: 113 states[0].ls_state = -1; 114 states[0].ls_info.base = (Lex_state *) 0; 115 statep = &states[1]; 116 state_info.base = states; 117 state_info.end = &states[STATE_BSIZE]; 118 119 Xinit(ws, wp, 64, ATEMP); 120 121 backslash_skip = 0; 122 ignore_backslash_newline = 0; 123 124 if (cf&ONEWORD) 125 state = SWORD; 126 #ifdef KSH 127 else if (cf&LETEXPR) { 128 *wp++ = OQUOTE; /* enclose arguments in (double) quotes */ 129 state = SLETPAREN; 130 statep->ls_sletparen.nparen = 0; 131 } 132 #endif /* KSH */ 133 else { /* normal lexing */ 134 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE; 135 while ((c = getsc()) == ' ' || c == '\t') 136 ; 137 if (c == '#') { 138 ignore_backslash_newline++; 139 while ((c = getsc()) != '\0' && c != '\n') 140 ; 141 ignore_backslash_newline--; 142 } 143 ungetsc(c); 144 } 145 if (source->flags & SF_ALIAS) { /* trailing ' ' in alias definition */ 146 source->flags &= ~SF_ALIAS; 147 /* In POSIX mode, a trailing space only counts if we are 148 * parsing a simple command 149 */ 150 if (!Flag(FPOSIX) || (cf & CMDWORD)) 151 cf |= ALIAS; 152 } 153 154 /* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */ 155 statep->ls_state = state; 156 157 /* collect non-special or quoted characters to form word */ 158 while (!((c = getsc()) == 0 159 || ((state == SBASE || state == SHEREDELIM) 160 && ctype(c, C_LEX1)))) 161 { 162 Xcheck(ws, wp); 163 switch (state) { 164 case SBASE: 165 if (c == '[' && (cf & (VARASN|ARRAYVAR))) { 166 *wp = EOS; /* temporary */ 167 if (is_wdvarname(Xstring(ws, wp), FALSE)) 168 { 169 char *p, *tmp; 170 171 if (arraysub(&tmp)) { 172 *wp++ = CHAR; 173 *wp++ = c; 174 for (p = tmp; *p; ) { 175 Xcheck(ws, wp); 176 *wp++ = CHAR; 177 *wp++ = *p++; 178 } 179 afree(tmp, ATEMP); 180 break; 181 } else { 182 Source *s; 183 184 s = pushs(SREREAD, 185 source->areap); 186 s->start = s->str 187 = s->u.freeme = tmp; 188 s->next = source; 189 source = s; 190 } 191 } 192 *wp++ = CHAR; 193 *wp++ = c; 194 break; 195 } 196 /* fall through.. */ 197 Sbase1: /* includes *(...|...) pattern (*+?@!) */ 198 #ifdef KSH 199 if (c == '*' || c == '@' || c == '+' || c == '?' 200 || c == '!') 201 { 202 c2 = getsc(); 203 if (c2 == '(' /*)*/ ) { 204 *wp++ = OPAT; 205 *wp++ = c; 206 PUSH_STATE(SPATTERN); 207 break; 208 } 209 ungetsc(c2); 210 } 211 #endif /* KSH */ 212 /* fall through.. */ 213 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */ 214 switch (c) { 215 case '\\': 216 c = getsc(); 217 #ifdef OS2 218 if (isalnum(c)) { 219 *wp++ = CHAR, *wp++ = '\\'; 220 *wp++ = CHAR, *wp++ = c; 221 } else 222 #endif 223 if (c) /* trailing \ is lost */ 224 *wp++ = QCHAR, *wp++ = c; 225 break; 226 case '\'': 227 *wp++ = OQUOTE; 228 ignore_backslash_newline++; 229 PUSH_STATE(SSQUOTE); 230 break; 231 case '"': 232 *wp++ = OQUOTE; 233 PUSH_STATE(SDQUOTE); 234 break; 235 default: 236 goto Subst; 237 } 238 break; 239 240 Subst: 241 switch (c) { 242 case '\\': 243 c = getsc(); 244 switch (c) { 245 case '"': case '\\': 246 case '$': case '`': 247 *wp++ = QCHAR, *wp++ = c; 248 break; 249 default: 250 Xcheck(ws, wp); 251 if (c) { /* trailing \ is lost */ 252 *wp++ = CHAR, *wp++ = '\\'; 253 *wp++ = CHAR, *wp++ = c; 254 } 255 break; 256 } 257 break; 258 case '$': 259 c = getsc(); 260 if (c == '(') /*)*/ { 261 c = getsc(); 262 if (c == '(') /*)*/ { 263 PUSH_STATE(SASPAREN); 264 statep->ls_sasparen.nparen = 2; 265 statep->ls_sasparen.start = 266 Xsavepos(ws, wp); 267 *wp++ = EXPRSUB; 268 } else { 269 ungetsc(c); 270 PUSH_STATE(SCSPAREN); 271 statep->ls_scsparen.nparen = 1; 272 statep->ls_scsparen.csstate = 0; 273 *wp++ = COMSUB; 274 } 275 } else if (c == '{') /*}*/ { 276 *wp++ = OSUBST; 277 *wp++ = '{'; /*}*/ 278 wp = get_brace_var(&ws, wp); 279 c = getsc(); 280 /* allow :# and :% (ksh88 compat) */ 281 if (c == ':') { 282 *wp++ = CHAR, *wp++ = c; 283 c = getsc(); 284 } 285 /* If this is a trim operation, 286 * treat (,|,) specially in STBRACE. 287 */ 288 if (c == '#' || c == '%') { 289 ungetsc(c); 290 PUSH_STATE(STBRACE); 291 } else { 292 ungetsc(c); 293 PUSH_STATE(SBRACE); 294 } 295 } else if (ctype(c, C_ALPHA)) { 296 *wp++ = OSUBST; 297 *wp++ = 'X'; 298 do { 299 Xcheck(ws, wp); 300 *wp++ = c; 301 c = getsc(); 302 } while (ctype(c, C_ALPHA|C_DIGIT)); 303 *wp++ = '\0'; 304 *wp++ = CSUBST; 305 *wp++ = 'X'; 306 ungetsc(c); 307 } else if (ctype(c, C_DIGIT|C_VAR1)) { 308 Xcheck(ws, wp); 309 *wp++ = OSUBST; 310 *wp++ = 'X'; 311 *wp++ = c; 312 *wp++ = '\0'; 313 *wp++ = CSUBST; 314 *wp++ = 'X'; 315 } else { 316 *wp++ = CHAR, *wp++ = '$'; 317 ungetsc(c); 318 } 319 break; 320 case '`': 321 PUSH_STATE(SBQUOTE); 322 *wp++ = COMSUB; 323 /* Need to know if we are inside double quotes 324 * since sh/at&t-ksh translate the \" to " in 325 * "`..\"..`". 326 * This is not done in posix mode (section 327 * 3.2.3, Double Quotes: "The backquote shall 328 * retain its special meaning introducing the 329 * other form of command substitution (see 330 * 3.6.3). The portion of the quoted string 331 * from the initial backquote and the 332 * characters up to the next backquote that 333 * is not preceded by a backslash (having 334 * escape characters removed) defines that 335 * command whose output replaces `...` when 336 * the word is expanded." 337 * Section 3.6.3, Command Substitution: 338 * "Within the backquoted style of command 339 * substitution, backslash shall retain its 340 * literal meaning, except when followed by 341 * $ ` \."). 342 */ 343 statep->ls_sbquote.indquotes = 0; 344 if (!Flag(FPOSIX)) { 345 Lex_state *s = statep; 346 Lex_state *base = state_info.base; 347 while (1) { 348 for (; s != base; s--) { 349 if (s->ls_state == SDQUOTE) { 350 statep->ls_sbquote.indquotes = 1; 351 break; 352 } 353 } 354 if (s != base) 355 break; 356 if (!(s = s->ls_info.base)) 357 break; 358 base = s-- - STATE_BSIZE; 359 } 360 } 361 break; 362 default: 363 *wp++ = CHAR, *wp++ = c; 364 } 365 break; 366 367 case SSQUOTE: 368 if (c == '\'') { 369 POP_STATE(); 370 *wp++ = CQUOTE; 371 ignore_backslash_newline--; 372 } else 373 *wp++ = QCHAR, *wp++ = c; 374 break; 375 376 case SDQUOTE: 377 if (c == '"') { 378 POP_STATE(); 379 *wp++ = CQUOTE; 380 } else 381 goto Subst; 382 break; 383 384 case SCSPAREN: /* $( .. ) */ 385 /* todo: deal with $(...) quoting properly 386 * kludge to partly fake quoting inside $(..): doesn't 387 * really work because nested $(..) or ${..} inside 388 * double quotes aren't dealt with. 389 */ 390 switch (statep->ls_scsparen.csstate) { 391 case 0: /* normal */ 392 switch (c) { 393 case '(': 394 statep->ls_scsparen.nparen++; 395 break; 396 case ')': 397 statep->ls_scsparen.nparen--; 398 break; 399 case '\\': 400 statep->ls_scsparen.csstate = 1; 401 break; 402 case '"': 403 statep->ls_scsparen.csstate = 2; 404 break; 405 case '\'': 406 statep->ls_scsparen.csstate = 4; 407 ignore_backslash_newline++; 408 break; 409 } 410 break; 411 412 case 1: /* backslash in normal mode */ 413 case 3: /* backslash in double quotes */ 414 --statep->ls_scsparen.csstate; 415 break; 416 417 case 2: /* double quotes */ 418 if (c == '"') 419 statep->ls_scsparen.csstate = 0; 420 else if (c == '\\') 421 statep->ls_scsparen.csstate = 3; 422 break; 423 424 case 4: /* single quotes */ 425 if (c == '\'') { 426 statep->ls_scsparen.csstate = 0; 427 ignore_backslash_newline--; 428 } 429 break; 430 } 431 if (statep->ls_scsparen.nparen == 0) { 432 POP_STATE(); 433 *wp++ = 0; /* end of COMSUB */ 434 } else 435 *wp++ = c; 436 break; 437 438 case SASPAREN: /* $(( .. )) */ 439 /* todo: deal with $((...); (...)) properly */ 440 /* XXX should nest using existing state machine 441 * (embed "..", $(...), etc.) */ 442 if (c == '(') 443 statep->ls_sasparen.nparen++; 444 else if (c == ')') { 445 statep->ls_sasparen.nparen--; 446 if (statep->ls_sasparen.nparen == 1) { 447 /*(*/ 448 if ((c2 = getsc()) == ')') { 449 POP_STATE(); 450 *wp++ = 0; /* end of EXPRSUB */ 451 break; 452 } else { 453 char *s; 454 455 ungetsc(c2); 456 /* mismatched parenthesis - 457 * assume we were really 458 * parsing a $(..) expression 459 */ 460 s = Xrestpos(ws, wp, 461 statep->ls_sasparen.start); 462 memmove(s + 1, s, wp - s); 463 *s++ = COMSUB; 464 *s = '('; /*)*/ 465 wp++; 466 statep->ls_scsparen.nparen = 1; 467 statep->ls_scsparen.csstate = 0; 468 state = statep->ls_state 469 = SCSPAREN; 470 471 } 472 } 473 } 474 *wp++ = c; 475 break; 476 477 case SBRACE: 478 /*{*/ 479 if (c == '}') { 480 POP_STATE(); 481 *wp++ = CSUBST; 482 *wp++ = /*{*/ '}'; 483 } else 484 goto Sbase1; 485 break; 486 487 case STBRACE: 488 /* Same as SBRACE, except (,|,) treated specially */ 489 /*{*/ 490 if (c == '}') { 491 POP_STATE(); 492 *wp++ = CSUBST; 493 *wp++ = /*{*/ '}'; 494 } else if (c == '|') { 495 *wp++ = SPAT; 496 } else if (c == '(') { 497 *wp++ = OPAT; 498 *wp++ = ' '; /* simile for @ */ 499 PUSH_STATE(SPATTERN); 500 } else 501 goto Sbase1; 502 break; 503 504 case SBQUOTE: 505 if (c == '`') { 506 *wp++ = 0; 507 POP_STATE(); 508 } else if (c == '\\') { 509 switch (c = getsc()) { 510 case '\\': 511 case '$': case '`': 512 *wp++ = c; 513 break; 514 case '"': 515 if (statep->ls_sbquote.indquotes) { 516 *wp++ = c; 517 break; 518 } 519 /* fall through.. */ 520 default: 521 if (c) { /* trailing \ is lost */ 522 *wp++ = '\\'; 523 *wp++ = c; 524 } 525 break; 526 } 527 } else 528 *wp++ = c; 529 break; 530 531 case SWORD: /* ONEWORD */ 532 goto Subst; 533 534 #ifdef KSH 535 case SLETPAREN: /* LETEXPR: (( ... )) */ 536 /*(*/ 537 if (c == ')') { 538 if (statep->ls_sletparen.nparen > 0) 539 --statep->ls_sletparen.nparen; 540 /*(*/ 541 else if ((c2 = getsc()) == ')') { 542 c = 0; 543 *wp++ = CQUOTE; 544 goto Done; 545 } else 546 ungetsc(c2); 547 } else if (c == '(') 548 /* parenthesis inside quotes and backslashes 549 * are lost, but at&t ksh doesn't count them 550 * either 551 */ 552 ++statep->ls_sletparen.nparen; 553 goto Sbase2; 554 #endif /* KSH */ 555 556 case SHEREDELIM: /* <<,<<- delimiter */ 557 /* XXX chuck this state (and the next) - use 558 * the existing states ($ and \`..` should be 559 * stripped of their specialness after the 560 * fact). 561 */ 562 /* here delimiters need a special case since 563 * $ and `..` are not to be treated specially 564 */ 565 if (c == '\\') { 566 c = getsc(); 567 if (c) { /* trailing \ is lost */ 568 *wp++ = QCHAR; 569 *wp++ = c; 570 } 571 } else if (c == '\'') { 572 PUSH_STATE(SSQUOTE); 573 *wp++ = OQUOTE; 574 ignore_backslash_newline++; 575 } else if (c == '"') { 576 state = statep->ls_state = SHEREDQUOTE; 577 *wp++ = OQUOTE; 578 } else { 579 *wp++ = CHAR; 580 *wp++ = c; 581 } 582 break; 583 584 case SHEREDQUOTE: /* " in <<,<<- delimiter */ 585 if (c == '"') { 586 *wp++ = CQUOTE; 587 state = statep->ls_state = SHEREDELIM; 588 } else { 589 if (c == '\\') { 590 switch (c = getsc()) { 591 case '\\': case '"': 592 case '$': case '`': 593 break; 594 default: 595 if (c) { /* trailing \ lost */ 596 *wp++ = CHAR; 597 *wp++ = '\\'; 598 } 599 break; 600 } 601 } 602 *wp++ = CHAR; 603 *wp++ = c; 604 } 605 break; 606 607 case SPATTERN: /* in *(...|...) pattern (*+?@!) */ 608 if ( /*(*/ c == ')') { 609 *wp++ = CPAT; 610 POP_STATE(); 611 } else if (c == '|') { 612 *wp++ = SPAT; 613 } else if (c == '(') { 614 *wp++ = OPAT; 615 *wp++ = ' '; /* simile for @ */ 616 PUSH_STATE(SPATTERN); 617 } else 618 goto Sbase1; 619 break; 620 } 621 } 622 Done: 623 Xcheck(ws, wp); 624 if (statep != &states[1]) 625 /* XXX figure out what is missing */ 626 yyerror("no closing quote\n"); 627 628 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */ 629 if (state == SHEREDELIM) 630 state = SBASE; 631 632 dp = Xstring(ws, wp); 633 if ((c == '<' || c == '>') && state == SBASE 634 && ((c2 = Xlength(ws, wp)) == 0 635 || (c2 == 2 && dp[0] == CHAR && digit(dp[1])))) 636 { 637 struct ioword *iop = 638 (struct ioword *) alloc(sizeof(*iop), ATEMP); 639 640 if (c2 == 2) 641 iop->unit = dp[1] - '0'; 642 else 643 iop->unit = c == '>'; /* 0 for <, 1 for > */ 644 645 c2 = getsc(); 646 /* <<, >>, <> are ok, >< is not */ 647 if (c == c2 || (c == '<' && c2 == '>')) { 648 iop->flag = c == c2 ? 649 (c == '>' ? IOCAT : IOHERE) : IORDWR; 650 if (iop->flag == IOHERE) { 651 if ((c2 = getsc()) == '-') { 652 iop->flag |= IOSKIP; 653 } else { 654 ungetsc(c2); 655 } 656 } 657 } else if (c2 == '&') 658 iop->flag = IODUP | (c == '<' ? IORDUP : 0); 659 else { 660 iop->flag = c == '>' ? IOWRITE : IOREAD; 661 if (c == '>' && c2 == '|') 662 iop->flag |= IOCLOB; 663 else 664 ungetsc(c2); 665 } 666 667 iop->name = (char *) 0; 668 iop->delim = (char *) 0; 669 iop->heredoc = (char *) 0; 670 Xfree(ws, wp); /* free word */ 671 yylval.iop = iop; 672 return REDIR; 673 } 674 675 if (wp == dp && state == SBASE) { 676 Xfree(ws, wp); /* free word */ 677 /* no word, process LEX1 character */ 678 switch (c) { 679 default: 680 return c; 681 682 case '|': 683 case '&': 684 case ';': 685 if ((c2 = getsc()) == c) 686 c = (c == ';') ? BREAK : 687 (c == '|') ? LOGOR : 688 (c == '&') ? LOGAND : 689 YYERRCODE; 690 #ifdef KSH 691 else if (c == '|' && c2 == '&') 692 c = COPROC; 693 #endif /* KSH */ 694 else 695 ungetsc(c2); 696 return c; 697 698 case '\n': 699 gethere(); 700 if (cf & CONTIN) 701 goto Again; 702 return c; 703 704 case '(': /*)*/ 705 #ifdef KSH 706 if ((c2 = getsc()) == '(') /*)*/ 707 /* XXX need to handle ((...); (...)) */ 708 c = MDPAREN; 709 else 710 ungetsc(c2); 711 #endif /* KSH */ 712 return c; 713 /*(*/ 714 case ')': 715 return c; 716 } 717 } 718 719 *wp++ = EOS; /* terminate word */ 720 yylval.cp = Xclose(ws, wp); 721 if (state == SWORD 722 #ifdef KSH 723 || state == SLETPAREN 724 #endif /* KSH */ 725 ) /* ONEWORD? */ 726 return LWORD; 727 ungetsc(c); /* unget terminator */ 728 729 /* copy word to unprefixed string ident */ 730 for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; ) 731 *dp++ = *sp++; 732 /* Make sure the ident array stays '\0' paded */ 733 memset(dp, 0, (ident+IDENT) - dp + 1); 734 if (c != EOS) 735 *ident = '\0'; /* word is not unquoted */ 736 737 if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) { 738 struct tbl *p; 739 int h = hash(ident); 740 741 /* { */ 742 if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h)) 743 && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) 744 { 745 afree(yylval.cp, ATEMP); 746 return p->val.i; 747 } 748 if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h)) 749 && (p->flag & ISSET)) 750 { 751 register Source *s; 752 753 for (s = source; s->type == SALIAS; s = s->next) 754 if (s->u.tblp == p) 755 return LWORD; 756 /* push alias expansion */ 757 s = pushs(SALIAS, source->areap); 758 s->start = s->str = p->val.s; 759 s->u.tblp = p; 760 s->next = source; 761 source = s; 762 afree(yylval.cp, ATEMP); 763 goto Again; 764 } 765 } 766 767 return LWORD; 768 } 769 770 static void 771 gethere() 772 { 773 register struct ioword **p; 774 775 for (p = heres; p < herep; p++) 776 readhere(*p); 777 herep = heres; 778 } 779 780 /* 781 * read "<<word" text into temp file 782 */ 783 784 static void 785 readhere(iop) 786 struct ioword *iop; 787 { 788 register int c; 789 char *volatile eof; 790 char *eofp; 791 int skiptabs; 792 XString xs; 793 char *xp; 794 int xpos; 795 796 eof = evalstr(iop->delim, 0); 797 798 if (!(iop->flag & IOEVAL)) 799 ignore_backslash_newline++; 800 801 Xinit(xs, xp, 256, ATEMP); 802 803 for (;;) { 804 eofp = eof; 805 skiptabs = iop->flag & IOSKIP; 806 xpos = Xsavepos(xs, xp); 807 while ((c = getsc()) != 0) { 808 if (skiptabs) { 809 if (c == '\t') 810 continue; 811 skiptabs = 0; 812 } 813 if (c != *eofp) 814 break; 815 Xcheck(xs, xp); 816 Xput(xs, xp, c); 817 eofp++; 818 } 819 /* Allow EOF here so commands with out trailing newlines 820 * will work (eg, ksh -c '...', $(...), etc). 821 */ 822 if (*eofp == '\0' && (c == 0 || c == '\n')) { 823 xp = Xrestpos(xs, xp, xpos); 824 break; 825 } 826 ungetsc(c); 827 while ((c = getsc()) != '\n') { 828 if (c == 0) 829 yyerror("here document `%s' unclosed\n", eof); 830 Xcheck(xs, xp); 831 Xput(xs, xp, c); 832 } 833 Xcheck(xs, xp); 834 Xput(xs, xp, c); 835 } 836 Xput(xs, xp, '\0'); 837 iop->heredoc = Xclose(xs, xp); 838 839 if (!(iop->flag & IOEVAL)) 840 ignore_backslash_newline--; 841 } 842 843 void 844 #ifdef HAVE_PROTOTYPES 845 yyerror(const char *fmt, ...) 846 #else 847 yyerror(fmt, va_alist) 848 const char *fmt; 849 va_dcl 850 #endif 851 { 852 va_list va; 853 854 /* pop aliases and re-reads */ 855 while (source->type == SALIAS || source->type == SREREAD) 856 source = source->next; 857 source->str = null; /* zap pending input */ 858 859 error_prefix(TRUE); 860 SH_VA_START(va, fmt); 861 shf_vfprintf(shl_out, fmt, va); 862 va_end(va); 863 errorf(null); 864 } 865 866 /* 867 * input for yylex with alias expansion 868 */ 869 870 Source * 871 pushs(type, areap) 872 int type; 873 Area *areap; 874 { 875 register Source *s; 876 877 s = (Source *) alloc(sizeof(Source), areap); 878 s->type = type; 879 s->str = null; 880 s->start = NULL; 881 s->line = 0; 882 s->errline = 0; 883 s->file = NULL; 884 s->flags = 0; 885 s->next = NULL; 886 s->areap = areap; 887 if (type == SFILE || type == SSTDIN) { 888 char *dummy; 889 Xinit(s->xs, dummy, 256, s->areap); 890 } else 891 memset(&s->xs, 0, sizeof(s->xs)); 892 return s; 893 } 894 895 static int 896 getsc__() 897 { 898 register Source *s = source; 899 register int c; 900 901 while ((c = *s->str++) == 0) { 902 s->str = NULL; /* return 0 for EOF by default */ 903 switch (s->type) { 904 case SEOF: 905 s->str = null; 906 return 0; 907 908 case SSTDIN: 909 case SFILE: 910 getsc_line(s); 911 break; 912 913 case SWSTR: 914 break; 915 916 case SSTRING: 917 break; 918 919 case SWORDS: 920 s->start = s->str = *s->u.strv++; 921 s->type = SWORDSEP; 922 break; 923 924 case SWORDSEP: 925 if (*s->u.strv == NULL) { 926 s->start = s->str = newline; 927 s->type = SEOF; 928 } else { 929 s->start = s->str = space; 930 s->type = SWORDS; 931 } 932 break; 933 934 case SALIAS: 935 if (s->flags & SF_ALIASEND) { 936 /* pass on an unused SF_ALIAS flag */ 937 source = s->next; 938 source->flags |= s->flags & SF_ALIAS; 939 s = source; 940 } else if (*s->u.tblp->val.s 941 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1])) 942 { 943 source = s = s->next; /* pop source stack */ 944 /* Note that this alias ended with a space, 945 * enabling alias expansion on the following 946 * word. 947 */ 948 s->flags |= SF_ALIAS; 949 } else { 950 /* At this point, we need to keep the current 951 * alias in the source list so recursive 952 * aliases can be detected and we also need 953 * to return the next character. Do this 954 * by temporarily popping the alias to get 955 * the next character and then put it back 956 * in the source list with the SF_ALIASEND 957 * flag set. 958 */ 959 source = s->next; /* pop source stack */ 960 source->flags |= s->flags & SF_ALIAS; 961 c = getsc__(); 962 if (c) { 963 s->flags |= SF_ALIASEND; 964 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 965 s->start = s->str = s->ugbuf; 966 s->next = source; 967 source = s; 968 } else { 969 s = source; 970 /* avoid reading eof twice */ 971 s->str = NULL; 972 break; 973 } 974 } 975 continue; 976 977 case SREREAD: 978 if (s->start != s->ugbuf) /* yuck */ 979 afree(s->u.freeme, ATEMP); 980 source = s = s->next; 981 continue; 982 } 983 if (s->str == NULL) { 984 s->type = SEOF; 985 s->start = s->str = null; 986 return '\0'; 987 } 988 if (s->flags & SF_ECHO) { 989 shf_puts(s->str, shl_out); 990 shf_flush(shl_out); 991 } 992 } 993 return c; 994 } 995 996 static void 997 getsc_line(s) 998 Source *s; 999 { 1000 char *xp = Xstring(s->xs, xp); 1001 int interactive = Flag(FTALKING) && s->type == SSTDIN; 1002 int have_tty = interactive && (s->flags & SF_TTY); 1003 1004 /* Done here to ensure nothing odd happens when a timeout occurs */ 1005 XcheckN(s->xs, xp, LINE); 1006 *xp = '\0'; 1007 s->start = s->str = xp; 1008 1009 #ifdef KSH 1010 if (have_tty && ksh_tmout) { 1011 ksh_tmout_state = TMOUT_READING; 1012 alarm(ksh_tmout); 1013 } 1014 #endif /* KSH */ 1015 #ifdef EDIT 1016 if (have_tty && (0 1017 # ifdef VI 1018 || Flag(FVI) 1019 # endif /* VI */ 1020 # ifdef EMACS 1021 || Flag(FEMACS) || Flag(FGMACS) 1022 # endif /* EMACS */ 1023 )) 1024 { 1025 int nread; 1026 1027 nread = x_read(xp, LINE); 1028 if (nread < 0) /* read error */ 1029 nread = 0; 1030 xp[nread] = '\0'; 1031 xp += nread; 1032 } 1033 else 1034 #endif /* EDIT */ 1035 { 1036 if (interactive) { 1037 pprompt(prompt, 0); 1038 } else 1039 s->line++; 1040 1041 while (1) { 1042 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf); 1043 1044 if (!p && shf_error(s->u.shf) 1045 && shf_errno(s->u.shf) == EINTR) 1046 { 1047 shf_clearerr(s->u.shf); 1048 if (trap) 1049 runtraps(0); 1050 continue; 1051 } 1052 if (!p || (xp = p, xp[-1] == '\n')) 1053 break; 1054 /* double buffer size */ 1055 xp++; /* move past null so doubling works... */ 1056 XcheckN(s->xs, xp, Xlength(s->xs, xp)); 1057 xp--; /* ...and move back again */ 1058 } 1059 /* flush any unwanted input so other programs/builtins 1060 * can read it. Not very optimal, but less error prone 1061 * than flushing else where, dealing with redirections, 1062 * etc.. 1063 * todo: reduce size of shf buffer (~128?) if SSTDIN 1064 */ 1065 if (s->type == SSTDIN) 1066 shf_flush(s->u.shf); 1067 } 1068 /* XXX: temporary kludge to restore source after a 1069 * trap may have been executed. 1070 */ 1071 source = s; 1072 #ifdef KSH 1073 if (have_tty && ksh_tmout) 1074 { 1075 ksh_tmout_state = TMOUT_EXECUTING; 1076 alarm(0); 1077 } 1078 #endif /* KSH */ 1079 s->start = s->str = Xstring(s->xs, xp); 1080 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp)); 1081 /* Note: if input is all nulls, this is not eof */ 1082 if (Xlength(s->xs, xp) == 0) { /* EOF */ 1083 if (s->type == SFILE) 1084 shf_fdclose(s->u.shf); 1085 s->str = NULL; 1086 } else if (interactive) { 1087 #ifdef HISTORY 1088 char *p = Xstring(s->xs, xp); 1089 if (cur_prompt == PS1) 1090 while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS)) 1091 p++; 1092 if (*p) { 1093 # ifdef EASY_HISTORY 1094 if (cur_prompt == PS2) 1095 histappend(Xstring(s->xs, xp), 1); 1096 else 1097 # endif /* EASY_HISTORY */ 1098 { 1099 s->line++; 1100 histsave(s->line, s->str, 1); 1101 } 1102 } 1103 #endif /* HISTORY */ 1104 } 1105 if (interactive) 1106 set_prompt(PS2, (Source *) 0); 1107 } 1108 1109 void 1110 set_prompt(to, s) 1111 int to; 1112 Source *s; 1113 { 1114 cur_prompt = to; 1115 1116 switch (to) { 1117 case PS1: /* command */ 1118 #ifdef KSH 1119 /* Substitute ! and !! here, before substitutions are done 1120 * so ! in expanded variables are not expanded. 1121 * NOTE: this is not what at&t ksh does (it does it after 1122 * substitutions, POSIX doesn't say which is to be done. 1123 */ 1124 { 1125 struct shf *shf; 1126 char *ps1; 1127 Area *saved_atemp; 1128 #ifdef __GNUC__ 1129 (void) &ps1; 1130 #endif 1131 1132 ps1 = str_val(global("PS1")); 1133 shf = shf_sopen((char *) 0, strlen(ps1) * 2, 1134 SHF_WR | SHF_DYNAMIC, (struct shf *) 0); 1135 while (*ps1) { 1136 if (*ps1 != '!' || *++ps1 == '!') 1137 shf_putchar(*ps1++, shf); 1138 else 1139 shf_fprintf(shf, "%d", 1140 s ? s->line + 1 : 0); 1141 } 1142 ps1 = shf_sclose(shf); 1143 saved_atemp = ATEMP; 1144 newenv(E_ERRH); 1145 if (ksh_sigsetjmp(e->jbuf, 0)) { 1146 prompt = safe_prompt; 1147 /* Don't print an error - assume it has already 1148 * been printed. Reason is we may have forked 1149 * to run a command and the child may be 1150 * unwinding its stack through this code as it 1151 * exits. 1152 */ 1153 } else 1154 prompt = str_save(substitute(ps1, 0), 1155 saved_atemp); 1156 quitenv(); 1157 } 1158 #else /* KSH */ 1159 prompt = str_val(global("PS1")); 1160 #endif /* KSH */ 1161 break; 1162 1163 case PS2: /* command continuation */ 1164 prompt = str_val(global("PS2")); 1165 break; 1166 } 1167 } 1168 1169 /* See also related routine, promptlen() in edit.c */ 1170 void 1171 pprompt(cp, ntruncate) 1172 const char *cp; 1173 int ntruncate; 1174 { 1175 #if 0 1176 char nbuf[32]; 1177 int c; 1178 1179 while (*cp != 0) { 1180 if (*cp != '!') 1181 c = *cp++; 1182 else if (*++cp == '!') 1183 c = *cp++; 1184 else { 1185 int len; 1186 char *p; 1187 1188 shf_snprintf(p = nbuf, sizeof(nbuf), "%d", 1189 source->line + 1); 1190 len = strlen(nbuf); 1191 if (ntruncate) { 1192 if (ntruncate >= len) { 1193 ntruncate -= len; 1194 continue; 1195 } 1196 p += ntruncate; 1197 len -= ntruncate; 1198 ntruncate = 0; 1199 } 1200 shf_write(p, len, shl_out); 1201 continue; 1202 } 1203 if (ntruncate) 1204 --ntruncate; 1205 else 1206 shf_putc(c, shl_out); 1207 } 1208 #endif /* 0 */ 1209 shf_puts(cp + ntruncate, shl_out); 1210 shf_flush(shl_out); 1211 } 1212 1213 /* Read the variable part of a ${...} expression (ie, up to but not including 1214 * the :[-+?=#%] or close-brace. 1215 */ 1216 static char * 1217 get_brace_var(wsp, wp) 1218 XString *wsp; 1219 char *wp; 1220 { 1221 enum parse_state { 1222 PS_INITIAL, PS_SAW_HASH, PS_IDENT, 1223 PS_NUMBER, PS_VAR1, PS_END 1224 } 1225 state; 1226 char c; 1227 1228 state = PS_INITIAL; 1229 while (1) { 1230 c = getsc(); 1231 /* State machine to figure out where the variable part ends. */ 1232 switch (state) { 1233 case PS_INITIAL: 1234 if (c == '#') { 1235 state = PS_SAW_HASH; 1236 break; 1237 } 1238 /* fall through.. */ 1239 case PS_SAW_HASH: 1240 if (letter(c)) 1241 state = PS_IDENT; 1242 else if (digit(c)) 1243 state = PS_NUMBER; 1244 else if (ctype(c, C_VAR1)) 1245 state = PS_VAR1; 1246 else 1247 state = PS_END; 1248 break; 1249 case PS_IDENT: 1250 if (!letnum(c)) { 1251 state = PS_END; 1252 if (c == '[') { 1253 char *tmp, *p; 1254 1255 if (!arraysub(&tmp)) 1256 yyerror("missing ]\n"); 1257 *wp++ = c; 1258 for (p = tmp; *p; ) { 1259 Xcheck(*wsp, wp); 1260 *wp++ = *p++; 1261 } 1262 afree(tmp, ATEMP); 1263 c = getsc(); /* the ] */ 1264 } 1265 } 1266 break; 1267 case PS_NUMBER: 1268 if (!digit(c)) 1269 state = PS_END; 1270 break; 1271 case PS_VAR1: 1272 state = PS_END; 1273 break; 1274 case PS_END: /* keep gcc happy */ 1275 break; 1276 } 1277 if (state == PS_END) { 1278 *wp++ = '\0'; /* end of variable part */ 1279 ungetsc(c); 1280 break; 1281 } 1282 Xcheck(*wsp, wp); 1283 *wp++ = c; 1284 } 1285 return wp; 1286 } 1287 1288 /* 1289 * Save an array subscript - returns true if matching bracket found, false 1290 * if eof or newline was found. 1291 * (Returned string double null terminated) 1292 */ 1293 static int 1294 arraysub(strp) 1295 char **strp; 1296 { 1297 XString ws; 1298 char *wp; 1299 char c; 1300 int depth = 1; /* we are just past the initial [ */ 1301 1302 Xinit(ws, wp, 32, ATEMP); 1303 1304 do { 1305 c = getsc(); 1306 Xcheck(ws, wp); 1307 *wp++ = c; 1308 if (c == '[') 1309 depth++; 1310 else if (c == ']') 1311 depth--; 1312 } while (depth > 0 && c && c != '\n'); 1313 1314 *wp++ = '\0'; 1315 *strp = Xclose(ws, wp); 1316 1317 return depth == 0 ? 1 : 0; 1318 } 1319 1320 /* Unget a char: handles case when we are already at the start of the buffer */ 1321 static const char * 1322 ungetsc(c) 1323 int c; 1324 { 1325 if (backslash_skip) 1326 backslash_skip--; 1327 /* Don't unget eof... */ 1328 if (source->str == null && c == '\0') 1329 return source->str; 1330 if (source->str > source->start) 1331 source->str--; 1332 else { 1333 Source *s; 1334 1335 s = pushs(SREREAD, source->areap); 1336 s->ugbuf[0] = c; s->ugbuf[1] = '\0'; 1337 s->start = s->str = s->ugbuf; 1338 s->next = source; 1339 source = s; 1340 } 1341 return source->str; 1342 } 1343 1344 1345 /* Called to get a char that isn't a \newline sequence. */ 1346 static int 1347 getsc_bn ARGS((void)) 1348 { 1349 int c, c2; 1350 1351 if (ignore_backslash_newline) 1352 return getsc_(); 1353 1354 if (backslash_skip == 1) { 1355 backslash_skip = 2; 1356 return getsc_(); 1357 } 1358 1359 backslash_skip = 0; 1360 1361 while (1) { 1362 c = getsc_(); 1363 if (c == '\\') { 1364 if ((c2 = getsc_()) == '\n') 1365 /* ignore the \newline; get the next char... */ 1366 continue; 1367 ungetsc(c2); 1368 backslash_skip = 1; 1369 } 1370 return c; 1371 } 1372 } 1373 1374 static Lex_state * 1375 push_state_(si, old_end) 1376 State_info *si; 1377 Lex_state *old_end; 1378 { 1379 Lex_state *new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP); 1380 1381 new[0].ls_info.base = old_end; 1382 si->base = &new[0]; 1383 si->end = &new[STATE_BSIZE]; 1384 return &new[1]; 1385 } 1386 1387 static Lex_state * 1388 pop_state_(si, old_end) 1389 State_info *si; 1390 Lex_state *old_end; 1391 { 1392 Lex_state *old_base = si->base; 1393 1394 si->base = old_end->ls_info.base - STATE_BSIZE; 1395 si->end = old_end->ls_info.base; 1396 1397 afree(old_base, ATEMP); 1398 1399 return si->base + STATE_BSIZE - 1;; 1400 } 1401