xref: /netbsd/bin/ksh/lex.c (revision bf9ec67e)
1 /*	$NetBSD: lex.c,v 1.7 1999/10/20 15:49:15 hubertf Exp $	*/
2 
3 /*
4  * lexical analysis and source input
5  */
6 
7 #include "sh.h"
8 #include <ctype.h>
9 
10 
11 /* Structure to keep track of the lexing state and the various pieces of info
12  * needed for each particular state.
13  */
14 typedef struct lex_state Lex_state;
15 struct lex_state {
16 	int ls_state;
17 	union {
18 	    /* $(...) */
19 	    struct scsparen_info {
20 		    int nparen;		/* count open parenthesis */
21 		    int csstate; /* XXX remove */
22 #define ls_scsparen ls_info.u_scsparen
23 	    } u_scsparen;
24 
25 	    /* $((...)) */
26 	    struct sasparen_info {
27 		    int nparen;		/* count open parenthesis */
28 		    int start;		/* marks start of $(( in output str */
29 #define ls_sasparen ls_info.u_sasparen
30 	    } u_sasparen;
31 
32 	    /* ((...)) */
33 	    struct sletparen_info {
34 		    int nparen;		/* count open parenthesis */
35 #define ls_sletparen ls_info.u_sletparen
36 	    } u_sletparen;
37 
38 	    /* `...` */
39 	    struct sbquote_info {
40 		    int indquotes;	/* true if in double quotes: "`...`" */
41 #define ls_sbquote ls_info.u_sbquote
42 	    } u_sbquote;
43 
44 	    Lex_state *base;		/* used to point to next state block */
45 	} ls_info;
46 };
47 
48 typedef struct State_info State_info;
49 struct State_info {
50 	Lex_state	*base;
51 	Lex_state	*end;
52 };
53 
54 
55 static void	readhere ARGS((struct ioword *iop));
56 static int	getsc__ ARGS((void));
57 static void	getsc_line ARGS((Source *s));
58 static int	getsc_bn ARGS((void));
59 static char	*get_brace_var ARGS((XString *wsp, char *wp));
60 static int	arraysub ARGS((char **strp));
61 static const char *ungetsc ARGS((int c));
62 static void	gethere ARGS((void));
63 static Lex_state *push_state_ ARGS((State_info *si, Lex_state *old_end));
64 static Lex_state *pop_state_ ARGS((State_info *si, Lex_state *old_end));
65 
66 static int backslash_skip;
67 static int ignore_backslash_newline;
68 
69 /* optimized getsc_bn() */
70 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
71 			 && !backslash_skip ? *source->str++ : getsc_bn())
72 /* optimized getsc__() */
73 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
74 
75 #define STATE_BSIZE	32
76 
77 #define PUSH_STATE(s)	do { \
78 			    if (++statep == state_info.end) \
79 				statep = push_state_(&state_info, statep); \
80 			    state = statep->ls_state = (s); \
81 			} while (0)
82 
83 #define POP_STATE()	do { \
84 			    if (--statep == state_info.base) \
85 				statep = pop_state_(&state_info, statep); \
86 			    state = statep->ls_state; \
87 			} while (0)
88 
89 
90 
91 /*
92  * Lexical analyzer
93  *
94  * tokens are not regular expressions, they are LL(1).
95  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
96  * hence the state stack.
97  */
98 
99 int
100 yylex(cf)
101 	int cf;
102 {
103 	Lex_state states[STATE_BSIZE], *statep;
104 	State_info state_info;
105 	register int c, state;
106 	XString ws;		/* expandable output word */
107 	register char *wp;	/* output word pointer */
108 	char *sp, *dp;
109 	int c2;
110 
111 
112   Again:
113 	states[0].ls_state = -1;
114 	states[0].ls_info.base = (Lex_state *) 0;
115 	statep = &states[1];
116 	state_info.base = states;
117 	state_info.end = &states[STATE_BSIZE];
118 
119 	Xinit(ws, wp, 64, ATEMP);
120 
121 	backslash_skip = 0;
122 	ignore_backslash_newline = 0;
123 
124 	if (cf&ONEWORD)
125 		state = SWORD;
126 #ifdef KSH
127 	else if (cf&LETEXPR) {
128 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
129 		state = SLETPAREN;
130 		statep->ls_sletparen.nparen = 0;
131 	}
132 #endif /* KSH */
133 	else {		/* normal lexing */
134 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
135 		while ((c = getsc()) == ' ' || c == '\t')
136 			;
137 		if (c == '#') {
138 			ignore_backslash_newline++;
139 			while ((c = getsc()) != '\0' && c != '\n')
140 				;
141 			ignore_backslash_newline--;
142 		}
143 		ungetsc(c);
144 	}
145 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
146 		source->flags &= ~SF_ALIAS;
147 		/* In POSIX mode, a trailing space only counts if we are
148 		 * parsing a simple command
149 		 */
150 		if (!Flag(FPOSIX) || (cf & CMDWORD))
151 			cf |= ALIAS;
152 	}
153 
154 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
155 	statep->ls_state = state;
156 
157 	/* collect non-special or quoted characters to form word */
158 	while (!((c = getsc()) == 0
159 		 || ((state == SBASE || state == SHEREDELIM)
160 		     && ctype(c, C_LEX1))))
161 	{
162 		Xcheck(ws, wp);
163 		switch (state) {
164 		  case SBASE:
165 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
166 				*wp = EOS; /* temporary */
167 				if (is_wdvarname(Xstring(ws, wp), FALSE))
168 				{
169 					char *p, *tmp;
170 
171 					if (arraysub(&tmp)) {
172 						*wp++ = CHAR;
173 						*wp++ = c;
174 						for (p = tmp; *p; ) {
175 							Xcheck(ws, wp);
176 							*wp++ = CHAR;
177 							*wp++ = *p++;
178 						}
179 						afree(tmp, ATEMP);
180 						break;
181 					} else {
182 						Source *s;
183 
184 						s = pushs(SREREAD,
185 							  source->areap);
186 						s->start = s->str
187 							= s->u.freeme = tmp;
188 						s->next = source;
189 						source = s;
190 					}
191 				}
192 				*wp++ = CHAR;
193 				*wp++ = c;
194 				break;
195 			}
196 			/* fall through.. */
197 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
198 #ifdef KSH
199 			if (c == '*' || c == '@' || c == '+' || c == '?'
200 			    || c == '!')
201 			{
202 				c2 = getsc();
203 				if (c2 == '(' /*)*/ ) {
204 					*wp++ = OPAT;
205 					*wp++ = c;
206 					PUSH_STATE(SPATTERN);
207 					break;
208 				}
209 				ungetsc(c2);
210 			}
211 #endif /* KSH */
212 			/* fall through.. */
213 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
214 			switch (c) {
215 			  case '\\':
216 				c = getsc();
217 #ifdef OS2
218 				if (isalnum(c)) {
219 					*wp++ = CHAR, *wp++ = '\\';
220 					*wp++ = CHAR, *wp++ = c;
221 				} else
222 #endif
223 				if (c) /* trailing \ is lost */
224 					*wp++ = QCHAR, *wp++ = c;
225 				break;
226 			  case '\'':
227 				*wp++ = OQUOTE;
228 				ignore_backslash_newline++;
229 				PUSH_STATE(SSQUOTE);
230 				break;
231 			  case '"':
232 				*wp++ = OQUOTE;
233 				PUSH_STATE(SDQUOTE);
234 				break;
235 			  default:
236 				goto Subst;
237 			}
238 			break;
239 
240 		  Subst:
241 			switch (c) {
242 			  case '\\':
243 				c = getsc();
244 				switch (c) {
245 				  case '"': case '\\':
246 				  case '$': case '`':
247 					*wp++ = QCHAR, *wp++ = c;
248 					break;
249 				  default:
250 					Xcheck(ws, wp);
251 					if (c) { /* trailing \ is lost */
252 						*wp++ = CHAR, *wp++ = '\\';
253 						*wp++ = CHAR, *wp++ = c;
254 					}
255 					break;
256 				}
257 				break;
258 			  case '$':
259 				c = getsc();
260 				if (c == '(') /*)*/ {
261 					c = getsc();
262 					if (c == '(') /*)*/ {
263 						PUSH_STATE(SASPAREN);
264 						statep->ls_sasparen.nparen = 2;
265 						statep->ls_sasparen.start =
266 							Xsavepos(ws, wp);
267 						*wp++ = EXPRSUB;
268 					} else {
269 						ungetsc(c);
270 						PUSH_STATE(SCSPAREN);
271 						statep->ls_scsparen.nparen = 1;
272 						statep->ls_scsparen.csstate = 0;
273 						*wp++ = COMSUB;
274 					}
275 				} else if (c == '{') /*}*/ {
276 					*wp++ = OSUBST;
277 					*wp++ = '{'; /*}*/
278 					wp = get_brace_var(&ws, wp);
279 					c = getsc();
280 					/* allow :# and :% (ksh88 compat) */
281 					if (c == ':') {
282 						*wp++ = CHAR, *wp++ = c;
283 						c = getsc();
284 					}
285 					/* If this is a trim operation,
286 					 * treat (,|,) specially in STBRACE.
287 					 */
288 					if (c == '#' || c == '%') {
289 						ungetsc(c);
290 						PUSH_STATE(STBRACE);
291 					} else {
292 						ungetsc(c);
293 						PUSH_STATE(SBRACE);
294 					}
295 				} else if (ctype(c, C_ALPHA)) {
296 					*wp++ = OSUBST;
297 					*wp++ = 'X';
298 					do {
299 						Xcheck(ws, wp);
300 						*wp++ = c;
301 						c = getsc();
302 					} while (ctype(c, C_ALPHA|C_DIGIT));
303 					*wp++ = '\0';
304 					*wp++ = CSUBST;
305 					*wp++ = 'X';
306 					ungetsc(c);
307 				} else if (ctype(c, C_DIGIT|C_VAR1)) {
308 					Xcheck(ws, wp);
309 					*wp++ = OSUBST;
310 					*wp++ = 'X';
311 					*wp++ = c;
312 					*wp++ = '\0';
313 					*wp++ = CSUBST;
314 					*wp++ = 'X';
315 				} else {
316 					*wp++ = CHAR, *wp++ = '$';
317 					ungetsc(c);
318 				}
319 				break;
320 			  case '`':
321 				PUSH_STATE(SBQUOTE);
322 				*wp++ = COMSUB;
323 				/* Need to know if we are inside double quotes
324 				 * since sh/at&t-ksh translate the \" to " in
325 				 * "`..\"..`".
326 				 * This is not done in posix mode (section
327 				 * 3.2.3, Double Quotes: "The backquote shall
328 				 * retain its special meaning introducing the
329 				 * other form of command substitution (see
330 				 * 3.6.3). The portion of the quoted string
331 				 * from the initial backquote and the
332 				 * characters up to the next backquote that
333 				 * is not preceded by a backslash (having
334 				 * escape characters removed) defines that
335 				 * command whose output replaces `...` when
336 				 * the word is expanded."
337 				 * Section 3.6.3, Command Substitution:
338 				 * "Within the backquoted style of command
339 				 * substitution, backslash shall retain its
340 				 * literal meaning, except when followed by
341 				 * $ ` \.").
342 				 */
343 				statep->ls_sbquote.indquotes = 0;
344 				if (!Flag(FPOSIX)) {
345 					Lex_state *s = statep;
346 					Lex_state *base = state_info.base;
347 					while (1) {
348 						for (; s != base; s--) {
349 							if (s->ls_state == SDQUOTE) {
350 								statep->ls_sbquote.indquotes = 1;
351 								break;
352 							}
353 						}
354 						if (s != base)
355 							break;
356 						if (!(s = s->ls_info.base))
357 							break;
358 						base = s-- - STATE_BSIZE;
359 					}
360 				}
361 				break;
362 			  default:
363 				*wp++ = CHAR, *wp++ = c;
364 			}
365 			break;
366 
367 		  case SSQUOTE:
368 			if (c == '\'') {
369 				POP_STATE();
370 				*wp++ = CQUOTE;
371 				ignore_backslash_newline--;
372 			} else
373 				*wp++ = QCHAR, *wp++ = c;
374 			break;
375 
376 		  case SDQUOTE:
377 			if (c == '"') {
378 				POP_STATE();
379 				*wp++ = CQUOTE;
380 			} else
381 				goto Subst;
382 			break;
383 
384 		  case SCSPAREN: /* $( .. ) */
385 			/* todo: deal with $(...) quoting properly
386 			 * kludge to partly fake quoting inside $(..): doesn't
387 			 * really work because nested $(..) or ${..} inside
388 			 * double quotes aren't dealt with.
389 			 */
390 			switch (statep->ls_scsparen.csstate) {
391 			  case 0: /* normal */
392 				switch (c) {
393 				  case '(':
394 					statep->ls_scsparen.nparen++;
395 					break;
396 				  case ')':
397 					statep->ls_scsparen.nparen--;
398 					break;
399 				  case '\\':
400 					statep->ls_scsparen.csstate = 1;
401 					break;
402 				  case '"':
403 					statep->ls_scsparen.csstate = 2;
404 					break;
405 				  case '\'':
406 					statep->ls_scsparen.csstate = 4;
407 					ignore_backslash_newline++;
408 					break;
409 				}
410 				break;
411 
412 			  case 1: /* backslash in normal mode */
413 			  case 3: /* backslash in double quotes */
414 				--statep->ls_scsparen.csstate;
415 				break;
416 
417 			  case 2: /* double quotes */
418 				if (c == '"')
419 					statep->ls_scsparen.csstate = 0;
420 				else if (c == '\\')
421 					statep->ls_scsparen.csstate = 3;
422 				break;
423 
424 			  case 4: /* single quotes */
425 				if (c == '\'') {
426 					statep->ls_scsparen.csstate = 0;
427 					ignore_backslash_newline--;
428 				}
429 				break;
430 			}
431 			if (statep->ls_scsparen.nparen == 0) {
432 				POP_STATE();
433 				*wp++ = 0; /* end of COMSUB */
434 			} else
435 				*wp++ = c;
436 			break;
437 
438 		  case SASPAREN: /* $(( .. )) */
439 			/* todo: deal with $((...); (...)) properly */
440 			/* XXX should nest using existing state machine
441 			 *     (embed "..", $(...), etc.) */
442 			if (c == '(')
443 				statep->ls_sasparen.nparen++;
444 			else if (c == ')') {
445 				statep->ls_sasparen.nparen--;
446 				if (statep->ls_sasparen.nparen == 1) {
447 					/*(*/
448 					if ((c2 = getsc()) == ')') {
449 						POP_STATE();
450 						*wp++ = 0; /* end of EXPRSUB */
451 						break;
452 					} else {
453 						char *s;
454 
455 						ungetsc(c2);
456 						/* mismatched parenthesis -
457 						 * assume we were really
458 						 * parsing a $(..) expression
459 						 */
460 						s = Xrestpos(ws, wp,
461 						     statep->ls_sasparen.start);
462 						memmove(s + 1, s, wp - s);
463 						*s++ = COMSUB;
464 						*s = '('; /*)*/
465 						wp++;
466 						statep->ls_scsparen.nparen = 1;
467 						statep->ls_scsparen.csstate = 0;
468 						state = statep->ls_state
469 							= SCSPAREN;
470 
471 					}
472 				}
473 			}
474 			*wp++ = c;
475 			break;
476 
477 		  case SBRACE:
478 			/*{*/
479 			if (c == '}') {
480 				POP_STATE();
481 				*wp++ = CSUBST;
482 				*wp++ = /*{*/ '}';
483 			} else
484 				goto Sbase1;
485 			break;
486 
487 		  case STBRACE:
488 			/* Same as SBRACE, except (,|,) treated specially */
489 			/*{*/
490 			if (c == '}') {
491 				POP_STATE();
492 				*wp++ = CSUBST;
493 				*wp++ = /*{*/ '}';
494 			} else if (c == '|') {
495 				*wp++ = SPAT;
496 			} else if (c == '(') {
497 				*wp++ = OPAT;
498 				*wp++ = ' ';	/* simile for @ */
499 				PUSH_STATE(SPATTERN);
500 			} else
501 				goto Sbase1;
502 			break;
503 
504 		  case SBQUOTE:
505 			if (c == '`') {
506 				*wp++ = 0;
507 				POP_STATE();
508 			} else if (c == '\\') {
509 				switch (c = getsc()) {
510 				  case '\\':
511 				  case '$': case '`':
512 					*wp++ = c;
513 					break;
514 				  case '"':
515 					if (statep->ls_sbquote.indquotes) {
516 						*wp++ = c;
517 						break;
518 					}
519 					/* fall through.. */
520 				  default:
521 					if (c) { /* trailing \ is lost */
522 						*wp++ = '\\';
523 						*wp++ = c;
524 					}
525 					break;
526 				}
527 			} else
528 				*wp++ = c;
529 			break;
530 
531 		  case SWORD:	/* ONEWORD */
532 			goto Subst;
533 
534 #ifdef KSH
535 		  case SLETPAREN:	/* LETEXPR: (( ... )) */
536 			/*(*/
537 			if (c == ')') {
538 				if (statep->ls_sletparen.nparen > 0)
539 				    --statep->ls_sletparen.nparen;
540 				/*(*/
541 				else if ((c2 = getsc()) == ')') {
542 					c = 0;
543 					*wp++ = CQUOTE;
544 					goto Done;
545 				} else
546 					ungetsc(c2);
547 			} else if (c == '(')
548 				/* parenthesis inside quotes and backslashes
549 				 * are lost, but at&t ksh doesn't count them
550 				 * either
551 				 */
552 				++statep->ls_sletparen.nparen;
553 			goto Sbase2;
554 #endif /* KSH */
555 
556 		  case SHEREDELIM:	/* <<,<<- delimiter */
557 			/* XXX chuck this state (and the next) - use
558 			 * the existing states ($ and \`..` should be
559 			 * stripped of their specialness after the
560 			 * fact).
561 			 */
562 			/* here delimiters need a special case since
563 			 * $ and `..` are not to be treated specially
564 			 */
565 			if (c == '\\') {
566 				c = getsc();
567 				if (c) { /* trailing \ is lost */
568 					*wp++ = QCHAR;
569 					*wp++ = c;
570 				}
571 			} else if (c == '\'') {
572 				PUSH_STATE(SSQUOTE);
573 				*wp++ = OQUOTE;
574 				ignore_backslash_newline++;
575 			} else if (c == '"') {
576 				state = statep->ls_state = SHEREDQUOTE;
577 				*wp++ = OQUOTE;
578 			} else {
579 				*wp++ = CHAR;
580 				*wp++ = c;
581 			}
582 			break;
583 
584 		  case SHEREDQUOTE:	/* " in <<,<<- delimiter */
585 			if (c == '"') {
586 				*wp++ = CQUOTE;
587 				state = statep->ls_state = SHEREDELIM;
588 			} else {
589 				if (c == '\\') {
590 					switch (c = getsc()) {
591 					  case '\\': case '"':
592 					  case '$': case '`':
593 						break;
594 					  default:
595 						if (c) { /* trailing \ lost */
596 							*wp++ = CHAR;
597 							*wp++ = '\\';
598 						}
599 						break;
600 					}
601 				}
602 				*wp++ = CHAR;
603 				*wp++ = c;
604 			}
605 			break;
606 
607 		  case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
608 			if ( /*(*/ c == ')') {
609 				*wp++ = CPAT;
610 				POP_STATE();
611 			} else if (c == '|') {
612 				*wp++ = SPAT;
613 			} else if (c == '(') {
614 				*wp++ = OPAT;
615 				*wp++ = ' ';	/* simile for @ */
616 				PUSH_STATE(SPATTERN);
617 			} else
618 				goto Sbase1;
619 			break;
620 		}
621 	}
622 Done:
623 	Xcheck(ws, wp);
624 	if (statep != &states[1])
625 		/* XXX figure out what is missing */
626 		yyerror("no closing quote\n");
627 
628 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
629 	if (state == SHEREDELIM)
630 		state = SBASE;
631 
632 	dp = Xstring(ws, wp);
633 	if ((c == '<' || c == '>') && state == SBASE
634 	    && ((c2 = Xlength(ws, wp)) == 0
635 	        || (c2 == 2 && dp[0] == CHAR && digit(dp[1]))))
636 	{
637 		struct ioword *iop =
638 				(struct ioword *) alloc(sizeof(*iop), ATEMP);
639 
640 		if (c2 == 2)
641 			iop->unit = dp[1] - '0';
642 		else
643 			iop->unit = c == '>'; /* 0 for <, 1 for > */
644 
645 		c2 = getsc();
646 		/* <<, >>, <> are ok, >< is not */
647 		if (c == c2 || (c == '<' && c2 == '>')) {
648 			iop->flag = c == c2 ?
649 				  (c == '>' ? IOCAT : IOHERE) : IORDWR;
650 			if (iop->flag == IOHERE) {
651 				if ((c2 = getsc()) == '-') {
652 					iop->flag |= IOSKIP;
653 				} else {
654 					ungetsc(c2);
655 				}
656 			}
657 		} else if (c2 == '&')
658 			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
659 		else {
660 			iop->flag = c == '>' ? IOWRITE : IOREAD;
661 			if (c == '>' && c2 == '|')
662 				iop->flag |= IOCLOB;
663 			else
664 				ungetsc(c2);
665 		}
666 
667 		iop->name = (char *) 0;
668 		iop->delim = (char *) 0;
669 		iop->heredoc = (char *) 0;
670 		Xfree(ws, wp);	/* free word */
671 		yylval.iop = iop;
672 		return REDIR;
673 	}
674 
675 	if (wp == dp && state == SBASE) {
676 		Xfree(ws, wp);	/* free word */
677 		/* no word, process LEX1 character */
678 		switch (c) {
679 		  default:
680 			return c;
681 
682 		  case '|':
683 		  case '&':
684 		  case ';':
685 			if ((c2 = getsc()) == c)
686 				c = (c == ';') ? BREAK :
687 				    (c == '|') ? LOGOR :
688 				    (c == '&') ? LOGAND :
689 				    YYERRCODE;
690 #ifdef KSH
691 			else if (c == '|' && c2 == '&')
692 				c = COPROC;
693 #endif /* KSH */
694 			else
695 				ungetsc(c2);
696 			return c;
697 
698 		  case '\n':
699 			gethere();
700 			if (cf & CONTIN)
701 				goto Again;
702 			return c;
703 
704 		  case '(':  /*)*/
705 #ifdef KSH
706 			if ((c2 = getsc()) == '(') /*)*/
707 				/* XXX need to handle ((...); (...)) */
708 				c = MDPAREN;
709 			else
710 				ungetsc(c2);
711 #endif /* KSH */
712 			return c;
713 		  /*(*/
714 		  case ')':
715 			return c;
716 		}
717 	}
718 
719 	*wp++ = EOS;		/* terminate word */
720 	yylval.cp = Xclose(ws, wp);
721 	if (state == SWORD
722 #ifdef KSH
723 		|| state == SLETPAREN
724 #endif /* KSH */
725 		)	/* ONEWORD? */
726 		return LWORD;
727 	ungetsc(c);		/* unget terminator */
728 
729 	/* copy word to unprefixed string ident */
730 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
731 		*dp++ = *sp++;
732 	/* Make sure the ident array stays '\0' paded */
733 	memset(dp, 0, (ident+IDENT) - dp + 1);
734 	if (c != EOS)
735 		*ident = '\0';	/* word is not unquoted */
736 
737 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
738 		struct tbl *p;
739 		int h = hash(ident);
740 
741 		/* { */
742 		if ((cf & KEYWORD) && (p = tsearch(&keywords, ident, h))
743 		    && (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}'))
744 		{
745 			afree(yylval.cp, ATEMP);
746 			return p->val.i;
747 		}
748 		if ((cf & ALIAS) && (p = tsearch(&aliases, ident, h))
749 		    && (p->flag & ISSET))
750 		{
751 			register Source *s;
752 
753 			for (s = source; s->type == SALIAS; s = s->next)
754 				if (s->u.tblp == p)
755 					return LWORD;
756 			/* push alias expansion */
757 			s = pushs(SALIAS, source->areap);
758 			s->start = s->str = p->val.s;
759 			s->u.tblp = p;
760 			s->next = source;
761 			source = s;
762 			afree(yylval.cp, ATEMP);
763 			goto Again;
764 		}
765 	}
766 
767 	return LWORD;
768 }
769 
770 static void
771 gethere()
772 {
773 	register struct ioword **p;
774 
775 	for (p = heres; p < herep; p++)
776 		readhere(*p);
777 	herep = heres;
778 }
779 
780 /*
781  * read "<<word" text into temp file
782  */
783 
784 static void
785 readhere(iop)
786 	struct ioword *iop;
787 {
788 	register int c;
789 	char *volatile eof;
790 	char *eofp;
791 	int skiptabs;
792 	XString xs;
793 	char *xp;
794 	int xpos;
795 
796 	eof = evalstr(iop->delim, 0);
797 
798 	if (!(iop->flag & IOEVAL))
799 		ignore_backslash_newline++;
800 
801 	Xinit(xs, xp, 256, ATEMP);
802 
803 	for (;;) {
804 		eofp = eof;
805 		skiptabs = iop->flag & IOSKIP;
806 		xpos = Xsavepos(xs, xp);
807 		while ((c = getsc()) != 0) {
808 			if (skiptabs) {
809 				if (c == '\t')
810 					continue;
811 				skiptabs = 0;
812 			}
813 			if (c != *eofp)
814 				break;
815 			Xcheck(xs, xp);
816 			Xput(xs, xp, c);
817 			eofp++;
818 		}
819 		/* Allow EOF here so commands with out trailing newlines
820 		 * will work (eg, ksh -c '...', $(...), etc).
821 		 */
822 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
823 			xp = Xrestpos(xs, xp, xpos);
824 			break;
825 		}
826 		ungetsc(c);
827 		while ((c = getsc()) != '\n') {
828 			if (c == 0)
829 				yyerror("here document `%s' unclosed\n", eof);
830 			Xcheck(xs, xp);
831 			Xput(xs, xp, c);
832 		}
833 		Xcheck(xs, xp);
834 		Xput(xs, xp, c);
835 	}
836 	Xput(xs, xp, '\0');
837 	iop->heredoc = Xclose(xs, xp);
838 
839 	if (!(iop->flag & IOEVAL))
840 		ignore_backslash_newline--;
841 }
842 
843 void
844 #ifdef HAVE_PROTOTYPES
845 yyerror(const char *fmt, ...)
846 #else
847 yyerror(fmt, va_alist)
848 	const char *fmt;
849 	va_dcl
850 #endif
851 {
852 	va_list va;
853 
854 	/* pop aliases and re-reads */
855 	while (source->type == SALIAS || source->type == SREREAD)
856 		source = source->next;
857 	source->str = null;	/* zap pending input */
858 
859 	error_prefix(TRUE);
860 	SH_VA_START(va, fmt);
861 	shf_vfprintf(shl_out, fmt, va);
862 	va_end(va);
863 	errorf(null);
864 }
865 
866 /*
867  * input for yylex with alias expansion
868  */
869 
870 Source *
871 pushs(type, areap)
872 	int type;
873 	Area *areap;
874 {
875 	register Source *s;
876 
877 	s = (Source *) alloc(sizeof(Source), areap);
878 	s->type = type;
879 	s->str = null;
880 	s->start = NULL;
881 	s->line = 0;
882 	s->errline = 0;
883 	s->file = NULL;
884 	s->flags = 0;
885 	s->next = NULL;
886 	s->areap = areap;
887 	if (type == SFILE || type == SSTDIN) {
888 		char *dummy;
889 		Xinit(s->xs, dummy, 256, s->areap);
890 	} else
891 		memset(&s->xs, 0, sizeof(s->xs));
892 	return s;
893 }
894 
895 static int
896 getsc__()
897 {
898 	register Source *s = source;
899 	register int c;
900 
901 	while ((c = *s->str++) == 0) {
902 		s->str = NULL;		/* return 0 for EOF by default */
903 		switch (s->type) {
904 		  case SEOF:
905 			s->str = null;
906 			return 0;
907 
908 		  case SSTDIN:
909 		  case SFILE:
910 			getsc_line(s);
911 			break;
912 
913 		  case SWSTR:
914 			break;
915 
916 		  case SSTRING:
917 			break;
918 
919 		  case SWORDS:
920 			s->start = s->str = *s->u.strv++;
921 			s->type = SWORDSEP;
922 			break;
923 
924 		  case SWORDSEP:
925 			if (*s->u.strv == NULL) {
926 				s->start = s->str = newline;
927 				s->type = SEOF;
928 			} else {
929 				s->start = s->str = space;
930 				s->type = SWORDS;
931 			}
932 			break;
933 
934 		  case SALIAS:
935 			if (s->flags & SF_ALIASEND) {
936 				/* pass on an unused SF_ALIAS flag */
937 				source = s->next;
938 				source->flags |= s->flags & SF_ALIAS;
939 				s = source;
940 			} else if (*s->u.tblp->val.s
941 				 && isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1]))
942 			{
943 				source = s = s->next;	/* pop source stack */
944 				/* Note that this alias ended with a space,
945 				 * enabling alias expansion on the following
946 				 * word.
947 				 */
948 				s->flags |= SF_ALIAS;
949 			} else {
950 				/* At this point, we need to keep the current
951 				 * alias in the source list so recursive
952 				 * aliases can be detected and we also need
953 				 * to return the next character.  Do this
954 				 * by temporarily popping the alias to get
955 				 * the next character and then put it back
956 				 * in the source list with the SF_ALIASEND
957 				 * flag set.
958 				 */
959 				source = s->next;	/* pop source stack */
960 				source->flags |= s->flags & SF_ALIAS;
961 				c = getsc__();
962 				if (c) {
963 					s->flags |= SF_ALIASEND;
964 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
965 					s->start = s->str = s->ugbuf;
966 					s->next = source;
967 					source = s;
968 				} else {
969 					s = source;
970 					/* avoid reading eof twice */
971 					s->str = NULL;
972 					break;
973 				}
974 			}
975 			continue;
976 
977 		  case SREREAD:
978 			if (s->start != s->ugbuf) /* yuck */
979 				afree(s->u.freeme, ATEMP);
980 			source = s = s->next;
981 			continue;
982 		}
983 		if (s->str == NULL) {
984 			s->type = SEOF;
985 			s->start = s->str = null;
986 			return '\0';
987 		}
988 		if (s->flags & SF_ECHO) {
989 			shf_puts(s->str, shl_out);
990 			shf_flush(shl_out);
991 		}
992 	}
993 	return c;
994 }
995 
996 static void
997 getsc_line(s)
998 	Source *s;
999 {
1000 	char *xp = Xstring(s->xs, xp);
1001 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
1002 	int have_tty = interactive && (s->flags & SF_TTY);
1003 
1004 	/* Done here to ensure nothing odd happens when a timeout occurs */
1005 	XcheckN(s->xs, xp, LINE);
1006 	*xp = '\0';
1007 	s->start = s->str = xp;
1008 
1009 #ifdef KSH
1010 	if (have_tty && ksh_tmout) {
1011 		ksh_tmout_state = TMOUT_READING;
1012 		alarm(ksh_tmout);
1013 	}
1014 #endif /* KSH */
1015 #ifdef EDIT
1016 	if (have_tty && (0
1017 # ifdef VI
1018 			 || Flag(FVI)
1019 # endif /* VI */
1020 # ifdef EMACS
1021 			 || Flag(FEMACS) || Flag(FGMACS)
1022 # endif /* EMACS */
1023 		))
1024 	{
1025 		int nread;
1026 
1027 		nread = x_read(xp, LINE);
1028 		if (nread < 0)	/* read error */
1029 			nread = 0;
1030 		xp[nread] = '\0';
1031 		xp += nread;
1032 	}
1033 	else
1034 #endif /* EDIT */
1035 	{
1036 		if (interactive) {
1037 			pprompt(prompt, 0);
1038 		} else
1039 			s->line++;
1040 
1041 		while (1) {
1042 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1043 
1044 			if (!p && shf_error(s->u.shf)
1045 			    && shf_errno(s->u.shf) == EINTR)
1046 			{
1047 				shf_clearerr(s->u.shf);
1048 				if (trap)
1049 					runtraps(0);
1050 				continue;
1051 			}
1052 			if (!p || (xp = p, xp[-1] == '\n'))
1053 				break;
1054 			/* double buffer size */
1055 			xp++; /* move past null so doubling works... */
1056 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1057 			xp--; /* ...and move back again */
1058 		}
1059 		/* flush any unwanted input so other programs/builtins
1060 		 * can read it.  Not very optimal, but less error prone
1061 		 * than flushing else where, dealing with redirections,
1062 		 * etc..
1063 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1064 		 */
1065 		if (s->type == SSTDIN)
1066 			shf_flush(s->u.shf);
1067 	}
1068 	/* XXX: temporary kludge to restore source after a
1069 	 * trap may have been executed.
1070 	 */
1071 	source = s;
1072 #ifdef KSH
1073 	if (have_tty && ksh_tmout)
1074 	{
1075 		ksh_tmout_state = TMOUT_EXECUTING;
1076 		alarm(0);
1077 	}
1078 #endif /* KSH */
1079 	s->start = s->str = Xstring(s->xs, xp);
1080 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1081 	/* Note: if input is all nulls, this is not eof */
1082 	if (Xlength(s->xs, xp) == 0) { /* EOF */
1083 		if (s->type == SFILE)
1084 			shf_fdclose(s->u.shf);
1085 		s->str = NULL;
1086 	} else if (interactive) {
1087 #ifdef HISTORY
1088 		char *p = Xstring(s->xs, xp);
1089 		if (cur_prompt == PS1)
1090 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1091 				p++;
1092 		if (*p) {
1093 # ifdef EASY_HISTORY
1094 			if (cur_prompt == PS2)
1095 				histappend(Xstring(s->xs, xp), 1);
1096 			else
1097 # endif /* EASY_HISTORY */
1098 			{
1099 				s->line++;
1100 				histsave(s->line, s->str, 1);
1101 			}
1102 		}
1103 #endif /* HISTORY */
1104 	}
1105 	if (interactive)
1106 		set_prompt(PS2, (Source *) 0);
1107 }
1108 
1109 void
1110 set_prompt(to, s)
1111 	int to;
1112 	Source *s;
1113 {
1114 	cur_prompt = to;
1115 
1116 	switch (to) {
1117 	case PS1: /* command */
1118 #ifdef KSH
1119 		/* Substitute ! and !! here, before substitutions are done
1120 		 * so ! in expanded variables are not expanded.
1121 		 * NOTE: this is not what at&t ksh does (it does it after
1122 		 * substitutions, POSIX doesn't say which is to be done.
1123 		 */
1124 		{
1125 			struct shf *shf;
1126 			char *ps1;
1127 			Area *saved_atemp;
1128 #ifdef __GNUC__
1129 			(void) &ps1;
1130 #endif
1131 
1132 			ps1 = str_val(global("PS1"));
1133 			shf = shf_sopen((char *) 0, strlen(ps1) * 2,
1134 				SHF_WR | SHF_DYNAMIC, (struct shf *) 0);
1135 			while (*ps1) {
1136 				if (*ps1 != '!' || *++ps1 == '!')
1137 					shf_putchar(*ps1++, shf);
1138 				else
1139 					shf_fprintf(shf, "%d",
1140 						s ? s->line + 1 : 0);
1141 			}
1142 			ps1 = shf_sclose(shf);
1143 			saved_atemp = ATEMP;
1144 			newenv(E_ERRH);
1145 			if (ksh_sigsetjmp(e->jbuf, 0)) {
1146 				prompt = safe_prompt;
1147 				/* Don't print an error - assume it has already
1148 				 * been printed.  Reason is we may have forked
1149 				 * to run a command and the child may be
1150 				 * unwinding its stack through this code as it
1151 				 * exits.
1152 				 */
1153 			} else
1154 				prompt = str_save(substitute(ps1, 0),
1155 						 saved_atemp);
1156 			quitenv();
1157 		}
1158 #else /* KSH */
1159 		prompt = str_val(global("PS1"));
1160 #endif /* KSH */
1161 		break;
1162 
1163 	case PS2: /* command continuation */
1164 		prompt = str_val(global("PS2"));
1165 		break;
1166 	}
1167 }
1168 
1169 /* See also related routine, promptlen() in edit.c */
1170 void
1171 pprompt(cp, ntruncate)
1172 	const char *cp;
1173 	int ntruncate;
1174 {
1175 #if 0
1176 	char nbuf[32];
1177 	int c;
1178 
1179 	while (*cp != 0) {
1180 		if (*cp != '!')
1181 			c = *cp++;
1182 		else if (*++cp == '!')
1183 			c = *cp++;
1184 		else {
1185 			int len;
1186 			char *p;
1187 
1188 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1189 				source->line + 1);
1190 			len = strlen(nbuf);
1191 			if (ntruncate) {
1192 				if (ntruncate >= len) {
1193 					ntruncate -= len;
1194 					continue;
1195 				}
1196 				p += ntruncate;
1197 				len -= ntruncate;
1198 				ntruncate = 0;
1199 			}
1200 			shf_write(p, len, shl_out);
1201 			continue;
1202 		}
1203 		if (ntruncate)
1204 			--ntruncate;
1205 		else
1206 			shf_putc(c, shl_out);
1207 	}
1208 #endif /* 0 */
1209 	shf_puts(cp + ntruncate, shl_out);
1210 	shf_flush(shl_out);
1211 }
1212 
1213 /* Read the variable part of a ${...} expression (ie, up to but not including
1214  * the :[-+?=#%] or close-brace.
1215  */
1216 static char *
1217 get_brace_var(wsp, wp)
1218 	XString *wsp;
1219 	char *wp;
1220 {
1221 	enum parse_state {
1222 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1223 			   PS_NUMBER, PS_VAR1, PS_END
1224 			 }
1225 		state;
1226 	char c;
1227 
1228 	state = PS_INITIAL;
1229 	while (1) {
1230 		c = getsc();
1231 		/* State machine to figure out where the variable part ends. */
1232 		switch (state) {
1233 		  case PS_INITIAL:
1234 			if (c == '#') {
1235 				state = PS_SAW_HASH;
1236 				break;
1237 			}
1238 			/* fall through.. */
1239 		  case PS_SAW_HASH:
1240 			if (letter(c))
1241 				state = PS_IDENT;
1242 			else if (digit(c))
1243 				state = PS_NUMBER;
1244 			else if (ctype(c, C_VAR1))
1245 				state = PS_VAR1;
1246 			else
1247 				state = PS_END;
1248 			break;
1249 		  case PS_IDENT:
1250 			if (!letnum(c)) {
1251 				state = PS_END;
1252 				if (c == '[') {
1253 					char *tmp, *p;
1254 
1255 					if (!arraysub(&tmp))
1256 						yyerror("missing ]\n");
1257 					*wp++ = c;
1258 					for (p = tmp; *p; ) {
1259 						Xcheck(*wsp, wp);
1260 						*wp++ = *p++;
1261 					}
1262 					afree(tmp, ATEMP);
1263 					c = getsc(); /* the ] */
1264 				}
1265 			}
1266 			break;
1267 		  case PS_NUMBER:
1268 			if (!digit(c))
1269 				state = PS_END;
1270 			break;
1271 		  case PS_VAR1:
1272 			state = PS_END;
1273 			break;
1274 		  case PS_END: /* keep gcc happy */
1275 			break;
1276 		}
1277 		if (state == PS_END) {
1278 			*wp++ = '\0';	/* end of variable part */
1279 			ungetsc(c);
1280 			break;
1281 		}
1282 		Xcheck(*wsp, wp);
1283 		*wp++ = c;
1284 	}
1285 	return wp;
1286 }
1287 
1288 /*
1289  * Save an array subscript - returns true if matching bracket found, false
1290  * if eof or newline was found.
1291  * (Returned string double null terminated)
1292  */
1293 static int
1294 arraysub(strp)
1295 	char **strp;
1296 {
1297 	XString ws;
1298 	char	*wp;
1299 	char	c;
1300 	int 	depth = 1;	/* we are just past the initial [ */
1301 
1302 	Xinit(ws, wp, 32, ATEMP);
1303 
1304 	do {
1305 		c = getsc();
1306 		Xcheck(ws, wp);
1307 		*wp++ = c;
1308 		if (c == '[')
1309 			depth++;
1310 		else if (c == ']')
1311 			depth--;
1312 	} while (depth > 0 && c && c != '\n');
1313 
1314 	*wp++ = '\0';
1315 	*strp = Xclose(ws, wp);
1316 
1317 	return depth == 0 ? 1 : 0;
1318 }
1319 
1320 /* Unget a char: handles case when we are already at the start of the buffer */
1321 static const char *
1322 ungetsc(c)
1323 	int c;
1324 {
1325 	if (backslash_skip)
1326 		backslash_skip--;
1327 	/* Don't unget eof... */
1328 	if (source->str == null && c == '\0')
1329 		return source->str;
1330 	if (source->str > source->start)
1331 		source->str--;
1332 	else {
1333 		Source *s;
1334 
1335 		s = pushs(SREREAD, source->areap);
1336 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1337 		s->start = s->str = s->ugbuf;
1338 		s->next = source;
1339 		source = s;
1340 	}
1341 	return source->str;
1342 }
1343 
1344 
1345 /* Called to get a char that isn't a \newline sequence. */
1346 static int
1347 getsc_bn ARGS((void))
1348 {
1349 	int c, c2;
1350 
1351 	if (ignore_backslash_newline)
1352 		return getsc_();
1353 
1354 	if (backslash_skip == 1) {
1355 		backslash_skip = 2;
1356 		return getsc_();
1357 	}
1358 
1359 	backslash_skip = 0;
1360 
1361 	while (1) {
1362 		c = getsc_();
1363 		if (c == '\\') {
1364 			if ((c2 = getsc_()) == '\n')
1365 				/* ignore the \newline; get the next char... */
1366 				continue;
1367 			ungetsc(c2);
1368 			backslash_skip = 1;
1369 		}
1370 		return c;
1371 	}
1372 }
1373 
1374 static Lex_state *
1375 push_state_(si, old_end)
1376 	State_info *si;
1377 	Lex_state *old_end;
1378 {
1379 	Lex_state	*new = alloc(sizeof(Lex_state) * STATE_BSIZE, ATEMP);
1380 
1381 	new[0].ls_info.base = old_end;
1382 	si->base = &new[0];
1383 	si->end = &new[STATE_BSIZE];
1384 	return &new[1];
1385 }
1386 
1387 static Lex_state *
1388 pop_state_(si, old_end)
1389 	State_info *si;
1390 	Lex_state *old_end;
1391 {
1392 	Lex_state *old_base = si->base;
1393 
1394 	si->base = old_end->ls_info.base - STATE_BSIZE;
1395 	si->end = old_end->ls_info.base;
1396 
1397 	afree(old_base, ATEMP);
1398 
1399 	return si->base + STATE_BSIZE - 1;;
1400 }
1401