xref: /openbsd/bin/ksh/lex.c (revision 55449a4b)
1 /*	$OpenBSD: lex.c,v 1.80 2024/04/28 16:43:15 florian Exp $	*/
2 
3 /*
4  * lexical analysis and source input
5  */
6 
7 #include <ctype.h>
8 #include <errno.h>
9 #include <libgen.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <unistd.h>
13 
14 #include "sh.h"
15 
16 /*
17  * states while lexing word
18  */
19 #define	SINVALID	-1	/* invalid state */
20 #define	SBASE	0		/* outside any lexical constructs */
21 #define	SWORD	1		/* implicit quoting for substitute() */
22 #define	SLETPAREN 2		/* inside (( )), implicit quoting */
23 #define	SSQUOTE	3		/* inside '' */
24 #define	SDQUOTE	4		/* inside "" */
25 #define	SBRACE	5		/* inside ${} */
26 #define	SCSPAREN 6		/* inside $() */
27 #define	SBQUOTE	7		/* inside `` */
28 #define	SASPAREN 8		/* inside $(( )) */
29 #define SHEREDELIM 9		/* parsing <<,<<- delimiter */
30 #define SHEREDQUOTE 10		/* parsing " in <<,<<- delimiter */
31 #define SPATTERN 11		/* parsing *(...|...) pattern (*+?@!) */
32 #define STBRACE 12		/* parsing ${..[#%]..} */
33 #define	SBRACEQ	13		/* inside "${}" */
34 
35 /* Structure to keep track of the lexing state and the various pieces of info
36  * needed for each particular state.
37  */
38 typedef struct lex_state Lex_state;
39 struct lex_state {
40 	int ls_state;
41 	union {
42 		/* $(...) */
43 		struct scsparen_info {
44 			int nparen;	/* count open parenthesis */
45 			int csstate;	/* XXX remove */
46 #define ls_scsparen ls_info.u_scsparen
47 		} u_scsparen;
48 
49 		/* $((...)) */
50 		struct sasparen_info {
51 			int nparen;	/* count open parenthesis */
52 			int start;	/* marks start of $(( in output str */
53 #define ls_sasparen ls_info.u_sasparen
54 		} u_sasparen;
55 
56 		/* ((...)) */
57 		struct sletparen_info {
58 			int nparen;	/* count open parenthesis */
59 #define ls_sletparen ls_info.u_sletparen
60 		} u_sletparen;
61 
62 		/* `...` */
63 		struct sbquote_info {
64 			int indquotes;	/* true if in double quotes: "`...`" */
65 #define ls_sbquote ls_info.u_sbquote
66 		} u_sbquote;
67 
68 		Lex_state *base;	/* used to point to next state block */
69 	} ls_info;
70 };
71 
72 typedef struct State_info State_info;
73 struct State_info {
74 	Lex_state	*base;
75 	Lex_state	*end;
76 };
77 
78 
79 static void	readhere(struct ioword *);
80 static int	getsc__(void);
81 static void	getsc_line(Source *);
82 static int	getsc_bn(void);
83 static char	*get_brace_var(XString *, char *);
84 static int	arraysub(char **);
85 static const char *ungetsc(int);
86 static void	gethere(void);
87 static Lex_state *push_state_(State_info *, Lex_state *);
88 static Lex_state *pop_state_(State_info *, Lex_state *);
89 static char	*special_prompt_expand(char *);
90 static int	dopprompt(const char *, int, const char **, int);
91 int		promptlen(const char *cp, const char **spp);
92 
93 static int backslash_skip;
94 static int ignore_backslash_newline;
95 
96 Source *source;		/* yyparse/yylex source */
97 YYSTYPE	yylval;		/* result from yylex */
98 struct ioword *heres[HERES], **herep;
99 char	ident[IDENT+1];
100 
101 char   **history;	/* saved commands */
102 char   **histptr;	/* last history item */
103 uint32_t histsize;	/* history size */
104 
105 /* optimized getsc_bn() */
106 #define getsc()		(*source->str != '\0' && *source->str != '\\' \
107 			 && !backslash_skip ? *source->str++ : getsc_bn())
108 /* optimized getsc__() */
109 #define	getsc_()	((*source->str != '\0') ? *source->str++ : getsc__())
110 
111 #define STATE_BSIZE	32
112 
113 #define PUSH_STATE(s)	do { \
114 			    if (++statep == state_info.end) \
115 				statep = push_state_(&state_info, statep); \
116 			    state = statep->ls_state = (s); \
117 			} while (0)
118 
119 #define POP_STATE()	do { \
120 			    if (--statep == state_info.base) \
121 				statep = pop_state_(&state_info, statep); \
122 			    state = statep->ls_state; \
123 			} while (0)
124 
125 
126 
127 /*
128  * Lexical analyzer
129  *
130  * tokens are not regular expressions, they are LL(1).
131  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
132  * hence the state stack.
133  */
134 
135 int
yylex(int cf)136 yylex(int cf)
137 {
138 	Lex_state states[STATE_BSIZE], *statep;
139 	State_info state_info;
140 	int c, state;
141 	XString ws;		/* expandable output word */
142 	char *wp;		/* output word pointer */
143 	char *sp, *dp;
144 	int c2;
145 
146 
147   Again:
148 	states[0].ls_state = SINVALID;
149 	states[0].ls_info.base = NULL;
150 	statep = &states[1];
151 	state_info.base = states;
152 	state_info.end = &states[STATE_BSIZE];
153 
154 	Xinit(ws, wp, 64, ATEMP);
155 
156 	backslash_skip = 0;
157 	ignore_backslash_newline = 0;
158 
159 	if (cf&ONEWORD)
160 		state = SWORD;
161 	else if (cf&LETEXPR) {
162 		*wp++ = OQUOTE;	 /* enclose arguments in (double) quotes */
163 		state = SLETPAREN;
164 		statep->ls_sletparen.nparen = 0;
165 	} else {		/* normal lexing */
166 		state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
167 		while ((c = getsc()) == ' ' || c == '\t')
168 			;
169 		if (c == '#') {
170 			ignore_backslash_newline++;
171 			while ((c = getsc()) != '\0' && c != '\n')
172 				;
173 			ignore_backslash_newline--;
174 		}
175 		ungetsc(c);
176 	}
177 	if (source->flags & SF_ALIAS) {	/* trailing ' ' in alias definition */
178 		source->flags &= ~SF_ALIAS;
179 		/* In POSIX mode, a trailing space only counts if we are
180 		 * parsing a simple command
181 		 */
182 		if (!Flag(FPOSIX) || (cf & CMDWORD))
183 			cf |= ALIAS;
184 	}
185 
186 	/* Initial state: one of SBASE SHEREDELIM SWORD SASPAREN */
187 	statep->ls_state = state;
188 
189 	/* collect non-special or quoted characters to form word */
190 	while (!((c = getsc()) == 0 ||
191 	    ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
192 		Xcheck(ws, wp);
193 		switch (state) {
194 		case SBASE:
195 			if (Flag(FCSHHISTORY) && (source->flags & SF_TTY) &&
196 			    c == '!') {
197 				char **replace = NULL;
198 				int get, i;
199 				char match[200] = { 0 }, *str = match;
200 				size_t mlen;
201 
202 				c2 = getsc();
203 				if (c2 == '\0' || c2 == ' ' || c2 == '\t')
204 					;
205 				else if (c2 == '!')
206 					replace = hist_get_newest(0);
207 				else if (isdigit(c2) || c2 == '-' ||
208 				    isalpha(c2)) {
209 					get = !isalpha(c2);
210 
211 					*str++ = c2;
212 					do {
213 						if ((c2 = getsc()) == '\0')
214 							break;
215 						if (c2 == '\t' || c2 == ' ' ||
216 						    c2 == '\n') {
217 							ungetsc(c2);
218 							break;
219 						}
220 						*str++ = c2;
221 					} while (str < &match[sizeof(match)-1]);
222 					*str = '\0';
223 
224 					if (get) {
225 						int h = findhistrel(match);
226 						if (h >= 0)
227 							replace = &history[h];
228 					} else {
229 						int h = findhist(-1, 0, match, true);
230 						if (h >= 0)
231 							replace = &history[h];
232 					}
233 				}
234 
235 				/*
236 				 * XXX ksh history buffer saves un-expanded
237 				 * commands. Until the history buffer code is
238 				 * changed to contain expanded commands, we
239 				 * ignore the bad commands (spinning sucks)
240 				 */
241 				if (replace && **replace == '!')
242 					ungetsc(c2);
243 				else if (replace) {
244 					Source *s;
245 
246 					/* do not strdup replacement via alloc */
247 					s = pushs(SREREAD, source->areap);
248 					s->start = s->str = *replace;
249 					s->next = source;
250 					s->u.freeme = NULL;
251 					source = s;
252 					continue;
253 				} else if (*match != '\0') {
254 					/* restore what followed the '!' */
255 					mlen = strlen(match);
256 					for (i = mlen-1; i >= 0; i--)
257 						ungetsc(match[i]);
258 				} else
259 					ungetsc(c2);
260 			}
261 			if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
262 				*wp = EOS; /* temporary */
263 				if (is_wdvarname(Xstring(ws, wp), false)) {
264 					char *p, *tmp;
265 
266 					if (arraysub(&tmp)) {
267 						*wp++ = CHAR;
268 						*wp++ = c;
269 						for (p = tmp; *p; ) {
270 							Xcheck(ws, wp);
271 							*wp++ = CHAR;
272 							*wp++ = *p++;
273 						}
274 						afree(tmp, ATEMP);
275 						break;
276 					} else {
277 						Source *s;
278 
279 						s = pushs(SREREAD,
280 							  source->areap);
281 						s->start = s->str
282 							= s->u.freeme = tmp;
283 						s->next = source;
284 						source = s;
285 					}
286 				}
287 				*wp++ = CHAR;
288 				*wp++ = c;
289 				break;
290 			}
291 			/* FALLTHROUGH */
292 		  Sbase1:	/* includes *(...|...) pattern (*+?@!) */
293 			if (c == '*' || c == '@' || c == '+' || c == '?' ||
294 			    c == '!') {
295 				c2 = getsc();
296 				if (c2 == '(' /*)*/ ) {
297 					*wp++ = OPAT;
298 					*wp++ = c;
299 					PUSH_STATE(SPATTERN);
300 					break;
301 				}
302 				ungetsc(c2);
303 			}
304 			/* FALLTHROUGH */
305 		  Sbase2:	/* doesn't include *(...|...) pattern (*+?@!) */
306 			switch (c) {
307 			case '\\':
308 				c = getsc();
309 				if (c) /* trailing \ is lost */
310 					*wp++ = QCHAR, *wp++ = c;
311 				break;
312 			case '\'':
313 				if ((cf & HEREDOC) || state == SBRACEQ) {
314 					*wp++ = CHAR, *wp++ = c;
315 					break;
316 				}
317 				*wp++ = OQUOTE;
318 				ignore_backslash_newline++;
319 				PUSH_STATE(SSQUOTE);
320 				break;
321 			case '"':
322 				*wp++ = OQUOTE;
323 				PUSH_STATE(SDQUOTE);
324 				break;
325 			default:
326 				goto Subst;
327 			}
328 			break;
329 
330 		  Subst:
331 			switch (c) {
332 			case '\\':
333 				c = getsc();
334 				switch (c) {
335 				case '\\':
336 				case '$': case '`':
337 					*wp++ = QCHAR, *wp++ = c;
338 					break;
339 				case '"':
340 					if ((cf & HEREDOC) == 0) {
341 						*wp++ = QCHAR, *wp++ = c;
342 						break;
343 					}
344 					/* FALLTHROUGH */
345 				default:
346 					if (cf & UNESCAPE) {
347 						*wp++ = QCHAR, *wp++ = c;
348 						break;
349 					}
350 					Xcheck(ws, wp);
351 					if (c) { /* trailing \ is lost */
352 						*wp++ = CHAR, *wp++ = '\\';
353 						*wp++ = CHAR, *wp++ = c;
354 					}
355 					break;
356 				}
357 				break;
358 			case '$':
359 				c = getsc();
360 				if (c == '(') /*)*/ {
361 					c = getsc();
362 					if (c == '(') /*)*/ {
363 						PUSH_STATE(SASPAREN);
364 						statep->ls_sasparen.nparen = 2;
365 						statep->ls_sasparen.start =
366 						    Xsavepos(ws, wp);
367 						*wp++ = EXPRSUB;
368 					} else {
369 						ungetsc(c);
370 						PUSH_STATE(SCSPAREN);
371 						statep->ls_scsparen.nparen = 1;
372 						statep->ls_scsparen.csstate = 0;
373 						*wp++ = COMSUB;
374 					}
375 				} else if (c == '{') /*}*/ {
376 					*wp++ = OSUBST;
377 					*wp++ = '{'; /*}*/
378 					wp = get_brace_var(&ws, wp);
379 					c = getsc();
380 					/* allow :# and :% (ksh88 compat) */
381 					if (c == ':') {
382 						*wp++ = CHAR, *wp++ = c;
383 						c = getsc();
384 					}
385 					/* If this is a trim operation,
386 					 * treat (,|,) specially in STBRACE.
387 					 */
388 					if (c == '#' || c == '%') {
389 						ungetsc(c);
390 						PUSH_STATE(STBRACE);
391 					} else {
392 						ungetsc(c);
393 						if (state == SDQUOTE ||
394 						    state == SBRACEQ)
395 							PUSH_STATE(SBRACEQ);
396 						else
397 							PUSH_STATE(SBRACE);
398 					}
399 				} else if (ctype(c, C_ALPHA)) {
400 					*wp++ = OSUBST;
401 					*wp++ = 'X';
402 					do {
403 						Xcheck(ws, wp);
404 						*wp++ = c;
405 						c = getsc();
406 					} while (ctype(c, C_ALPHA) || digit(c));
407 					*wp++ = '\0';
408 					*wp++ = CSUBST;
409 					*wp++ = 'X';
410 					ungetsc(c);
411 				} else if (ctype(c, C_VAR1) || digit(c)) {
412 					Xcheck(ws, wp);
413 					*wp++ = OSUBST;
414 					*wp++ = 'X';
415 					*wp++ = c;
416 					*wp++ = '\0';
417 					*wp++ = CSUBST;
418 					*wp++ = 'X';
419 				} else {
420 					*wp++ = CHAR, *wp++ = '$';
421 					ungetsc(c);
422 				}
423 				break;
424 			case '`':
425 				PUSH_STATE(SBQUOTE);
426 				*wp++ = COMSUB;
427 				/* Need to know if we are inside double quotes
428 				 * since sh/at&t-ksh translate the \" to " in
429 				 * "`..\"..`".
430 				 */
431 				statep->ls_sbquote.indquotes = 0;
432 				Lex_state *s = statep;
433 				Lex_state *base = state_info.base;
434 				while (1) {
435 					for (; s != base; s--) {
436 						if (s->ls_state == SDQUOTE) {
437 							statep->ls_sbquote.indquotes = 1;
438 							break;
439 						}
440 					}
441 					if (s != base)
442 						break;
443 					if (!(s = s->ls_info.base))
444 						break;
445 					base = s-- - STATE_BSIZE;
446 				}
447 				break;
448 			default:
449 				*wp++ = CHAR, *wp++ = c;
450 			}
451 			break;
452 
453 		case SSQUOTE:
454 			if (c == '\'') {
455 				POP_STATE();
456 				if (state == SBRACEQ) {
457 					*wp++ = CHAR, *wp++ = c;
458 					break;
459 				}
460 				*wp++ = CQUOTE;
461 				ignore_backslash_newline--;
462 			} else
463 				*wp++ = QCHAR, *wp++ = c;
464 			break;
465 
466 		case SDQUOTE:
467 			if (c == '"') {
468 				POP_STATE();
469 				*wp++ = CQUOTE;
470 			} else
471 				goto Subst;
472 			break;
473 
474 		case SCSPAREN: /* $( .. ) */
475 			/* todo: deal with $(...) quoting properly
476 			 * kludge to partly fake quoting inside $(..): doesn't
477 			 * really work because nested $(..) or ${..} inside
478 			 * double quotes aren't dealt with.
479 			 */
480 			switch (statep->ls_scsparen.csstate) {
481 			case 0: /* normal */
482 				switch (c) {
483 				case '(':
484 					statep->ls_scsparen.nparen++;
485 					break;
486 				case ')':
487 					statep->ls_scsparen.nparen--;
488 					break;
489 				case '\\':
490 					statep->ls_scsparen.csstate = 1;
491 					break;
492 				case '"':
493 					statep->ls_scsparen.csstate = 2;
494 					break;
495 				case '\'':
496 					statep->ls_scsparen.csstate = 4;
497 					ignore_backslash_newline++;
498 					break;
499 				}
500 				break;
501 
502 			case 1: /* backslash in normal mode */
503 			case 3: /* backslash in double quotes */
504 				--statep->ls_scsparen.csstate;
505 				break;
506 
507 			case 2: /* double quotes */
508 				if (c == '"')
509 					statep->ls_scsparen.csstate = 0;
510 				else if (c == '\\')
511 					statep->ls_scsparen.csstate = 3;
512 				break;
513 
514 			case 4: /* single quotes */
515 				if (c == '\'') {
516 					statep->ls_scsparen.csstate = 0;
517 					ignore_backslash_newline--;
518 				}
519 				break;
520 			}
521 			if (statep->ls_scsparen.nparen == 0) {
522 				POP_STATE();
523 				*wp++ = 0; /* end of COMSUB */
524 			} else
525 				*wp++ = c;
526 			break;
527 
528 		case SASPAREN: /* $(( .. )) */
529 			/* todo: deal with $((...); (...)) properly */
530 			/* XXX should nest using existing state machine
531 			 * (embed "..", $(...), etc.) */
532 			if (c == '(')
533 				statep->ls_sasparen.nparen++;
534 			else if (c == ')') {
535 				statep->ls_sasparen.nparen--;
536 				if (statep->ls_sasparen.nparen == 1) {
537 					/*(*/
538 					if ((c2 = getsc()) == ')') {
539 						POP_STATE();
540 						*wp++ = 0; /* end of EXPRSUB */
541 						break;
542 					} else {
543 						char *s;
544 
545 						ungetsc(c2);
546 						/* mismatched parenthesis -
547 						 * assume we were really
548 						 * parsing a $(..) expression
549 						 */
550 						s = Xrestpos(ws, wp,
551 						    statep->ls_sasparen.start);
552 						memmove(s + 1, s, wp - s);
553 						*s++ = COMSUB;
554 						*s = '('; /*)*/
555 						wp++;
556 						statep->ls_scsparen.nparen = 1;
557 						statep->ls_scsparen.csstate = 0;
558 						state = statep->ls_state =
559 						    SCSPAREN;
560 					}
561 				}
562 			}
563 			*wp++ = c;
564 			break;
565 
566 		case SBRACEQ:
567 			/*{*/
568 			if (c == '}') {
569 				POP_STATE();
570 				*wp++ = CSUBST;
571 				*wp++ = /*{*/ '}';
572 			} else
573 				goto Sbase2;
574 			break;
575 
576 		case SBRACE:
577 			/*{*/
578 			if (c == '}') {
579 				POP_STATE();
580 				*wp++ = CSUBST;
581 				*wp++ = /*{*/ '}';
582 			} else
583 				goto Sbase1;
584 			break;
585 
586 		case STBRACE:
587 			/* Same as SBRACE, except (,|,) treated specially */
588 			/*{*/
589 			if (c == '}') {
590 				POP_STATE();
591 				*wp++ = CSUBST;
592 				*wp++ = /*{*/ '}';
593 			} else if (c == '|') {
594 				*wp++ = SPAT;
595 			} else if (c == '(') {
596 				*wp++ = OPAT;
597 				*wp++ = ' ';	/* simile for @ */
598 				PUSH_STATE(SPATTERN);
599 			} else
600 				goto Sbase1;
601 			break;
602 
603 		case SBQUOTE:
604 			if (c == '`') {
605 				*wp++ = 0;
606 				POP_STATE();
607 			} else if (c == '\\') {
608 				switch (c = getsc()) {
609 				case '\\':
610 				case '$': case '`':
611 					*wp++ = c;
612 					break;
613 				case '"':
614 					if (statep->ls_sbquote.indquotes) {
615 						*wp++ = c;
616 						break;
617 					}
618 					/* FALLTHROUGH */
619 				default:
620 					if (c) { /* trailing \ is lost */
621 						*wp++ = '\\';
622 						*wp++ = c;
623 					}
624 					break;
625 				}
626 			} else
627 				*wp++ = c;
628 			break;
629 
630 		case SWORD:	/* ONEWORD */
631 			goto Subst;
632 
633 		case SLETPAREN:	/* LETEXPR: (( ... )) */
634 			/*(*/
635 			if (c == ')') {
636 				if (statep->ls_sletparen.nparen > 0)
637 				    --statep->ls_sletparen.nparen;
638 				/*(*/
639 				else if ((c2 = getsc()) == ')') {
640 					c = 0;
641 					*wp++ = CQUOTE;
642 					goto Done;
643 				} else
644 					ungetsc(c2);
645 			} else if (c == '(')
646 				/* parenthesis inside quotes and backslashes
647 				 * are lost, but at&t ksh doesn't count them
648 				 * either
649 				 */
650 				++statep->ls_sletparen.nparen;
651 			goto Sbase2;
652 
653 		case SHEREDELIM:	/* <<,<<- delimiter */
654 			/* XXX chuck this state (and the next) - use
655 			 * the existing states ($ and \`..` should be
656 			 * stripped of their specialness after the
657 			 * fact).
658 			 */
659 			/* here delimiters need a special case since
660 			 * $ and `..` are not to be treated specially
661 			 */
662 			if (c == '\\') {
663 				c = getsc();
664 				if (c) { /* trailing \ is lost */
665 					*wp++ = QCHAR;
666 					*wp++ = c;
667 				}
668 			} else if (c == '\'') {
669 				PUSH_STATE(SSQUOTE);
670 				*wp++ = OQUOTE;
671 				ignore_backslash_newline++;
672 			} else if (c == '"') {
673 				state = statep->ls_state = SHEREDQUOTE;
674 				*wp++ = OQUOTE;
675 			} else {
676 				*wp++ = CHAR;
677 				*wp++ = c;
678 			}
679 			break;
680 
681 		case SHEREDQUOTE:	/* " in <<,<<- delimiter */
682 			if (c == '"') {
683 				*wp++ = CQUOTE;
684 				state = statep->ls_state = SHEREDELIM;
685 			} else {
686 				if (c == '\\') {
687 					switch (c = getsc()) {
688 					case '\\': case '"':
689 					case '$': case '`':
690 						break;
691 					default:
692 						if (c) { /* trailing \ lost */
693 							*wp++ = CHAR;
694 							*wp++ = '\\';
695 						}
696 						break;
697 					}
698 				}
699 				*wp++ = CHAR;
700 				*wp++ = c;
701 			}
702 			break;
703 
704 		case SPATTERN:	/* in *(...|...) pattern (*+?@!) */
705 			if ( /*(*/ c == ')') {
706 				*wp++ = CPAT;
707 				POP_STATE();
708 			} else if (c == '|') {
709 				*wp++ = SPAT;
710 			} else if (c == '(') {
711 				*wp++ = OPAT;
712 				*wp++ = ' ';	/* simile for @ */
713 				PUSH_STATE(SPATTERN);
714 			} else
715 				goto Sbase1;
716 			break;
717 		}
718 	}
719 Done:
720 	Xcheck(ws, wp);
721 	if (statep != &states[1])
722 		/* XXX figure out what is missing */
723 		yyerror("no closing quote\n");
724 
725 	/* This done to avoid tests for SHEREDELIM wherever SBASE tested */
726 	if (state == SHEREDELIM)
727 		state = SBASE;
728 
729 	dp = Xstring(ws, wp);
730 	if ((c == '<' || c == '>') && state == SBASE &&
731 	    ((c2 = Xlength(ws, wp)) == 0 ||
732 	    (c2 == 2 && dp[0] == CHAR && digit(dp[1])))) {
733 		struct ioword *iop = alloc(sizeof(*iop), ATEMP);
734 
735 		if (c2 == 2)
736 			iop->unit = dp[1] - '0';
737 		else
738 			iop->unit = c == '>'; /* 0 for <, 1 for > */
739 
740 		c2 = getsc();
741 		/* <<, >>, <> are ok, >< is not */
742 		if (c == c2 || (c == '<' && c2 == '>')) {
743 			iop->flag = c == c2 ?
744 			    (c == '>' ? IOCAT : IOHERE) : IORDWR;
745 			if (iop->flag == IOHERE) {
746 				if ((c2 = getsc()) == '-')
747 					iop->flag |= IOSKIP;
748 				else
749 					ungetsc(c2);
750 			}
751 		} else if (c2 == '&')
752 			iop->flag = IODUP | (c == '<' ? IORDUP : 0);
753 		else {
754 			iop->flag = c == '>' ? IOWRITE : IOREAD;
755 			if (c == '>' && c2 == '|')
756 				iop->flag |= IOCLOB;
757 			else
758 				ungetsc(c2);
759 		}
760 
761 		iop->name = NULL;
762 		iop->delim = NULL;
763 		iop->heredoc = NULL;
764 		Xfree(ws, wp);	/* free word */
765 		yylval.iop = iop;
766 		return REDIR;
767 	}
768 
769 	if (wp == dp && state == SBASE) {
770 		Xfree(ws, wp);	/* free word */
771 		/* no word, process LEX1 character */
772 		switch (c) {
773 		default:
774 			return c;
775 
776 		case '|':
777 		case '&':
778 		case ';':
779 			if ((c2 = getsc()) == c)
780 				c = (c == ';') ? BREAK :
781 				    (c == '|') ? LOGOR :
782 				    (c == '&') ? LOGAND :
783 				    YYERRCODE;
784 			else if (c == '|' && c2 == '&')
785 				c = COPROC;
786 			else
787 				ungetsc(c2);
788 			return c;
789 
790 		case '\n':
791 			gethere();
792 			if (cf & CONTIN)
793 				goto Again;
794 			return c;
795 
796 		case '(':  /*)*/
797 			if (!Flag(FSH)) {
798 				if ((c2 = getsc()) == '(') /*)*/
799 					/* XXX need to handle ((...); (...)) */
800 					c = MDPAREN;
801 				else
802 					ungetsc(c2);
803 			}
804 			return c;
805 		  /*(*/
806 		case ')':
807 			return c;
808 		}
809 	}
810 
811 	*wp++ = EOS;		/* terminate word */
812 	yylval.cp = Xclose(ws, wp);
813 	if (state == SWORD || state == SLETPAREN)	/* ONEWORD? */
814 		return LWORD;
815 	ungetsc(c);		/* unget terminator */
816 
817 	/* copy word to unprefixed string ident */
818 	for (sp = yylval.cp, dp = ident; dp < ident+IDENT && (c = *sp++) == CHAR; )
819 		*dp++ = *sp++;
820 	/* Make sure the ident array stays '\0' padded */
821 	memset(dp, 0, (ident+IDENT) - dp + 1);
822 	if (c != EOS)
823 		*ident = '\0';	/* word is not unquoted */
824 
825 	if (*ident != '\0' && (cf&(KEYWORD|ALIAS))) {
826 		struct tbl *p;
827 		int h = hash(ident);
828 
829 		/* { */
830 		if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
831 		    (!(cf & ESACONLY) || p->val.i == ESAC || p->val.i == '}')) {
832 			afree(yylval.cp, ATEMP);
833 			return p->val.i;
834 		}
835 		if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
836 		    (p->flag & ISSET)) {
837 			Source *s;
838 
839 			for (s = source; s->type == SALIAS; s = s->next)
840 				if (s->u.tblp == p)
841 					return LWORD;
842 			/* push alias expansion */
843 			s = pushs(SALIAS, source->areap);
844 			s->start = s->str = p->val.s;
845 			s->u.tblp = p;
846 			s->next = source;
847 			source = s;
848 			afree(yylval.cp, ATEMP);
849 			goto Again;
850 		}
851 	}
852 
853 	return LWORD;
854 }
855 
856 static void
gethere(void)857 gethere(void)
858 {
859 	struct ioword **p;
860 
861 	for (p = heres; p < herep; p++)
862 		readhere(*p);
863 	herep = heres;
864 }
865 
866 /*
867  * read "<<word" text into temp file
868  */
869 
870 static void
readhere(struct ioword * iop)871 readhere(struct ioword *iop)
872 {
873 	int c;
874 	char *volatile eof;
875 	char *eofp;
876 	int skiptabs;
877 	XString xs;
878 	char *xp;
879 	int xpos;
880 
881 	eof = evalstr(iop->delim, 0);
882 
883 	if (!(iop->flag & IOEVAL))
884 		ignore_backslash_newline++;
885 
886 	Xinit(xs, xp, 256, ATEMP);
887 
888 	for (;;) {
889 		eofp = eof;
890 		skiptabs = iop->flag & IOSKIP;
891 		xpos = Xsavepos(xs, xp);
892 		while ((c = getsc()) != 0) {
893 			if (skiptabs) {
894 				if (c == '\t')
895 					continue;
896 				skiptabs = 0;
897 			}
898 			if (c != *eofp)
899 				break;
900 			Xcheck(xs, xp);
901 			Xput(xs, xp, c);
902 			eofp++;
903 		}
904 		/* Allow EOF here so commands with out trailing newlines
905 		 * will work (eg, ksh -c '...', $(...), etc).
906 		 */
907 		if (*eofp == '\0' && (c == 0 || c == '\n')) {
908 			xp = Xrestpos(xs, xp, xpos);
909 			break;
910 		}
911 		ungetsc(c);
912 		while ((c = getsc()) != '\n') {
913 			if (c == 0)
914 				yyerror("here document `%s' unclosed\n", eof);
915 			Xcheck(xs, xp);
916 			Xput(xs, xp, c);
917 		}
918 		Xcheck(xs, xp);
919 		Xput(xs, xp, c);
920 	}
921 	Xput(xs, xp, '\0');
922 	iop->heredoc = Xclose(xs, xp);
923 
924 	if (!(iop->flag & IOEVAL))
925 		ignore_backslash_newline--;
926 }
927 
928 void
yyerror(const char * fmt,...)929 yyerror(const char *fmt, ...)
930 {
931 	va_list va;
932 
933 	/* pop aliases and re-reads */
934 	while (source->type == SALIAS || source->type == SREREAD)
935 		source = source->next;
936 	source->str = null;	/* zap pending input */
937 
938 	error_prefix(true);
939 	va_start(va, fmt);
940 	shf_vfprintf(shl_out, fmt, va);
941 	va_end(va);
942 	errorf(NULL);
943 }
944 
945 /*
946  * input for yylex with alias expansion
947  */
948 
949 Source *
pushs(int type,Area * areap)950 pushs(int type, Area *areap)
951 {
952 	Source *s;
953 
954 	s = alloc(sizeof(Source), areap);
955 	s->type = type;
956 	s->str = null;
957 	s->start = NULL;
958 	s->line = 0;
959 	s->cmd_offset = 0;
960 	s->errline = 0;
961 	s->file = NULL;
962 	s->flags = 0;
963 	s->next = NULL;
964 	s->areap = areap;
965 	if (type == SFILE || type == SSTDIN) {
966 		char *dummy;
967 		Xinit(s->xs, dummy, 256, s->areap);
968 	} else
969 		memset(&s->xs, 0, sizeof(s->xs));
970 	return s;
971 }
972 
973 static int
getsc__(void)974 getsc__(void)
975 {
976 	Source *s = source;
977 	int c;
978 
979 	while ((c = *s->str++) == 0) {
980 		s->str = NULL;		/* return 0 for EOF by default */
981 		switch (s->type) {
982 		case SEOF:
983 			s->str = null;
984 			return 0;
985 
986 		case SSTDIN:
987 		case SFILE:
988 			getsc_line(s);
989 			break;
990 
991 		case SWSTR:
992 			break;
993 
994 		case SSTRING:
995 			break;
996 
997 		case SWORDS:
998 			s->start = s->str = *s->u.strv++;
999 			s->type = SWORDSEP;
1000 			break;
1001 
1002 		case SWORDSEP:
1003 			if (*s->u.strv == NULL) {
1004 				s->start = s->str = "\n";
1005 				s->type = SEOF;
1006 			} else {
1007 				s->start = s->str = " ";
1008 				s->type = SWORDS;
1009 			}
1010 			break;
1011 
1012 		case SALIAS:
1013 			if (s->flags & SF_ALIASEND) {
1014 				/* pass on an unused SF_ALIAS flag */
1015 				source = s->next;
1016 				source->flags |= s->flags & SF_ALIAS;
1017 				s = source;
1018 			} else if (*s->u.tblp->val.s &&
1019 			    isspace((unsigned char)strchr(s->u.tblp->val.s, 0)[-1])) {
1020 				source = s = s->next;	/* pop source stack */
1021 				/* Note that this alias ended with a space,
1022 				 * enabling alias expansion on the following
1023 				 * word.
1024 				 */
1025 				s->flags |= SF_ALIAS;
1026 			} else {
1027 				/* At this point, we need to keep the current
1028 				 * alias in the source list so recursive
1029 				 * aliases can be detected and we also need
1030 				 * to return the next character.  Do this
1031 				 * by temporarily popping the alias to get
1032 				 * the next character and then put it back
1033 				 * in the source list with the SF_ALIASEND
1034 				 * flag set.
1035 				 */
1036 				source = s->next;	/* pop source stack */
1037 				source->flags |= s->flags & SF_ALIAS;
1038 				c = getsc__();
1039 				if (c) {
1040 					s->flags |= SF_ALIASEND;
1041 					s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1042 					s->start = s->str = s->ugbuf;
1043 					s->next = source;
1044 					source = s;
1045 				} else {
1046 					s = source;
1047 					/* avoid reading eof twice */
1048 					s->str = NULL;
1049 					break;
1050 				}
1051 			}
1052 			continue;
1053 
1054 		case SREREAD:
1055 			if (s->start != s->ugbuf) /* yuck */
1056 				afree(s->u.freeme, ATEMP);
1057 			source = s = s->next;
1058 			continue;
1059 		}
1060 		if (s->str == NULL) {
1061 			s->type = SEOF;
1062 			s->start = s->str = null;
1063 			return '\0';
1064 		}
1065 		if (s->flags & SF_ECHO) {
1066 			shf_puts(s->str, shl_out);
1067 			shf_flush(shl_out);
1068 		}
1069 	}
1070 	return c;
1071 }
1072 
1073 static void
getsc_line(Source * s)1074 getsc_line(Source *s)
1075 {
1076 	char *xp = Xstring(s->xs, xp);
1077 	int interactive = Flag(FTALKING) && s->type == SSTDIN;
1078 	int have_tty = interactive && (s->flags & SF_TTY);
1079 
1080 	/* Done here to ensure nothing odd happens when a timeout occurs */
1081 	XcheckN(s->xs, xp, LINE);
1082 	*xp = '\0';
1083 	s->start = s->str = xp;
1084 
1085 	if (have_tty && ksh_tmout) {
1086 		ksh_tmout_state = TMOUT_READING;
1087 		alarm(ksh_tmout);
1088 	}
1089 	if (have_tty && (0
1090 #ifdef VI
1091 	    || Flag(FVI)
1092 #endif /* VI */
1093 #ifdef EMACS
1094 	    || Flag(FEMACS) || Flag(FGMACS)
1095 #endif /* EMACS */
1096 	    )) {
1097 		int nread;
1098 
1099 		nread = x_read(xp, LINE);
1100 		if (nread < 0)	/* read error */
1101 			nread = 0;
1102 		xp[nread] = '\0';
1103 		xp += nread;
1104 	} else {
1105 		if (interactive) {
1106 			pprompt(prompt, 0);
1107 		} else
1108 			s->line++;
1109 
1110 		while (1) {
1111 			char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1112 
1113 			if (!p && shf_error(s->u.shf) &&
1114 			    s->u.shf->errno_ == EINTR) {
1115 				shf_clearerr(s->u.shf);
1116 				if (trap)
1117 					runtraps(0);
1118 				continue;
1119 			}
1120 			if (!p || (xp = p, xp[-1] == '\n'))
1121 				break;
1122 			/* double buffer size */
1123 			xp++; /* move past null so doubling works... */
1124 			XcheckN(s->xs, xp, Xlength(s->xs, xp));
1125 			xp--; /* ...and move back again */
1126 		}
1127 		/* flush any unwanted input so other programs/builtins
1128 		 * can read it.  Not very optimal, but less error prone
1129 		 * than flushing else where, dealing with redirections,
1130 		 * etc..
1131 		 * todo: reduce size of shf buffer (~128?) if SSTDIN
1132 		 */
1133 		if (s->type == SSTDIN)
1134 			shf_flush(s->u.shf);
1135 	}
1136 	/* XXX: temporary kludge to restore source after a
1137 	 * trap may have been executed.
1138 	 */
1139 	source = s;
1140 	if (have_tty && ksh_tmout) {
1141 		ksh_tmout_state = TMOUT_EXECUTING;
1142 		alarm(0);
1143 	}
1144 	s->start = s->str = Xstring(s->xs, xp);
1145 	strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1146 	/* Note: if input is all nulls, this is not eof */
1147 	if (Xlength(s->xs, xp) == 0) { /* EOF */
1148 		if (s->type == SFILE)
1149 			shf_fdclose(s->u.shf);
1150 		s->str = NULL;
1151 	} else if (interactive) {
1152 		char *p = Xstring(s->xs, xp);
1153 		if (cur_prompt == PS1)
1154 			while (*p && ctype(*p, C_IFS) && ctype(*p, C_IFSWS))
1155 				p++;
1156 		if (*p) {
1157 			s->line++;
1158 			histsave(s->line, s->str, 1);
1159 		}
1160 	}
1161 	if (interactive)
1162 		set_prompt(PS2);
1163 }
1164 
1165 static char *
special_prompt_expand(char * str)1166 special_prompt_expand(char *str)
1167 {
1168 	char *p = str;
1169 
1170 	while ((p = strstr(p, "\\$")) != NULL) {
1171 		*(p+1) = 'p';
1172 	}
1173 	return str;
1174 }
1175 
1176 void
set_prompt(int to)1177 set_prompt(int to)
1178 {
1179 	char *ps1;
1180 	Area *saved_atemp;
1181 
1182 	cur_prompt = to;
1183 
1184 	switch (to) {
1185 	case PS1: /* command */
1186 		ps1 = str_save(str_val(global("PS1")), ATEMP);
1187 		saved_atemp = ATEMP;	/* ps1 is freed by substitute() */
1188 		newenv(E_ERRH);
1189 		if (sigsetjmp(genv->jbuf, 0)) {
1190 			prompt = safe_prompt;
1191 			/* Don't print an error - assume it has already
1192 			 * been printed.  Reason is we may have forked
1193 			 * to run a command and the child may be
1194 			 * unwinding its stack through this code as it
1195 			 * exits.
1196 			 */
1197 		} else {
1198 			/* expand \$ before other substitutions are done */
1199 			char *tmp = special_prompt_expand(ps1);
1200 			prompt = str_save(substitute(tmp, 0), saved_atemp);
1201 		}
1202 		quitenv(NULL);
1203 		break;
1204 	case PS2: /* command continuation */
1205 		prompt = str_val(global("PS2"));
1206 		break;
1207 	}
1208 }
1209 
1210 static int
dopprompt(const char * sp,int ntruncate,const char ** spp,int doprint)1211 dopprompt(const char *sp, int ntruncate, const char **spp, int doprint)
1212 {
1213 	char strbuf[1024], tmpbuf[1024], *p, *str, nbuf[32], delimiter = '\0';
1214 	int len, c, n, totlen = 0, indelimit = 0, counting = 1, delimitthis;
1215 	const char *cp = sp;
1216 	struct tm *tm;
1217 	time_t t;
1218 
1219 	if (*cp && cp[1] == '\r') {
1220 		delimiter = *cp;
1221 		cp += 2;
1222 	}
1223 
1224 	while (*cp != 0) {
1225 		delimitthis = 0;
1226 		if (indelimit && *cp != delimiter)
1227 			;
1228 		else if (*cp == '\n' || *cp == '\r') {
1229 			totlen = 0;
1230 			sp = cp + 1;
1231 		} else if (*cp == '\t') {
1232 			if (counting)
1233 				totlen = (totlen | 7) + 1;
1234 		} else if (*cp == delimiter) {
1235 			indelimit = !indelimit;
1236 			delimitthis = 1;
1237 		}
1238 
1239 		if (*cp == '\\') {
1240 			cp++;
1241 			if (!*cp)
1242 				break;
1243 			/* Expand \h and \$ for both, sh(1) and ksh(1) */
1244 			if (Flag(FSH) && !(*cp == 'h' || *cp == 'p'))
1245 				snprintf(strbuf, sizeof strbuf, "\\%c", *cp);
1246 			else switch (*cp) {
1247 			case 'a':	/* '\' 'a' bell */
1248 				strbuf[0] = '\007';
1249 				strbuf[1] = '\0';
1250 				break;
1251 			case 'd':	/* '\' 'd' Dow Mon DD */
1252 				time(&t);
1253 				tm = localtime(&t);
1254 				if (tm)
1255 					strftime(strbuf, sizeof strbuf,
1256 					    "%a %b %d", tm);
1257 				else
1258 					strbuf[0] = '\0';
1259 				break;
1260 			case 'D': /* '\' 'D' '{' strftime format '}' */
1261 				p = strchr(cp + 2, '}');
1262 				if (cp[1] != '{' || p == NULL) {
1263 					snprintf(strbuf, sizeof strbuf,
1264 					    "\\%c", *cp);
1265 					break;
1266 				}
1267 				strlcpy(tmpbuf, cp + 2, sizeof tmpbuf);
1268 				p = strchr(tmpbuf, '}');
1269 				if (p)
1270 					*p = '\0';
1271 				time(&t);
1272 				tm = localtime(&t);
1273 				if (tm)
1274 					strftime(strbuf, sizeof strbuf, tmpbuf,
1275 					    tm);
1276 				else
1277 					strbuf[0] = '\0';
1278 				cp = strchr(cp + 2, '}');
1279 				break;
1280 			case 'e':	/* '\' 'e' escape */
1281 				strbuf[0] = '\033';
1282 				strbuf[1] = '\0';
1283 				break;
1284 			case 'h':	/* '\' 'h' shortened hostname */
1285 				gethostname(strbuf, sizeof strbuf);
1286 				p = strchr(strbuf, '.');
1287 				if (p)
1288 					*p = '\0';
1289 				break;
1290 			case 'H':	/* '\' 'H' full hostname */
1291 				gethostname(strbuf, sizeof strbuf);
1292 				break;
1293 			case 'j':	/* '\' 'j' number of jobs */
1294 				snprintf(strbuf, sizeof strbuf, "%d",
1295 				    j_njobs());
1296 				break;
1297 			case 'l':	/* '\' 'l' basename of tty */
1298 				p = ttyname(0);
1299 				if (p)
1300 					p = basename(p);
1301 				if (p)
1302 					strlcpy(strbuf, p, sizeof strbuf);
1303 				break;
1304 			case 'n':	/* '\' 'n' newline */
1305 				strbuf[0] = '\n';
1306 				strbuf[1] = '\0';
1307 				totlen = 0;	/* reset for prompt re-print */
1308 				sp = cp + 1;
1309 				break;
1310 			case 'p':	/* '\' '$' $ or # */
1311 				strbuf[0] = ksheuid ? '$' : '#';
1312 				strbuf[1] = '\0';
1313 				break;
1314 			case 'r':	/* '\' 'r' return */
1315 				strbuf[0] = '\r';
1316 				strbuf[1] = '\0';
1317 				totlen = 0;	/* reset for prompt re-print */
1318 				sp = cp + 1;
1319 				break;
1320 			case 's':	/* '\' 's' basename $0 */
1321 				strlcpy(strbuf, kshname, sizeof strbuf);
1322 				break;
1323 			case 't':	/* '\' 't' 24 hour HH:MM:SS */
1324 				time(&t);
1325 				tm = localtime(&t);
1326 				if (tm)
1327 					strftime(strbuf, sizeof strbuf, "%T",
1328 					    tm);
1329 				else
1330 					strbuf[0] = '\0';
1331 				break;
1332 			case 'T':	/* '\' 'T' 12 hour HH:MM:SS */
1333 				time(&t);
1334 				tm = localtime(&t);
1335 				if (tm)
1336 					strftime(strbuf, sizeof strbuf,
1337 					    "%l:%M:%S", tm);
1338 				else
1339 					strbuf[0] = '\0';
1340 				break;
1341 			case '@':	/* '\' '@' 12 hour am/pm format */
1342 				time(&t);
1343 				tm = localtime(&t);
1344 				if (tm)
1345 					strftime(strbuf, sizeof strbuf, "%r",
1346 					    tm);
1347 				else
1348 					strbuf[0] = '\0';
1349 				break;
1350 			case 'A':	/* '\' 'A' 24 hour HH:MM */
1351 				time(&t);
1352 				tm = localtime(&t);
1353 				if (tm)
1354 					strftime(strbuf, sizeof strbuf, "%R",
1355 					    tm);
1356 				else
1357 					strbuf[0] = '\0';
1358 				break;
1359 			case 'u':	/* '\' 'u' username */
1360 				strlcpy(strbuf, username, sizeof strbuf);
1361 				break;
1362 #ifndef SMALL
1363 			case 'v':	/* '\' 'v' version (short) */
1364 				p = strchr(ksh_version, ' ');
1365 				if (p)
1366 					p = strchr(p + 1, ' ');
1367 				if (p) {
1368 					p++;
1369 					strlcpy(strbuf, p, sizeof strbuf);
1370 					p = strchr(strbuf, ' ');
1371 					if (p)
1372 						*p = '\0';
1373 				}
1374 				break;
1375 			case 'V':	/* '\' 'V' version (long) */
1376 				strlcpy(strbuf, ksh_version, sizeof strbuf);
1377 				break;
1378 #endif /* SMALL */
1379 			case 'w':	/* '\' 'w' cwd */
1380 				p = str_val(global("PWD"));
1381 				n = strlen(str_val(global("HOME")));
1382 				if (strcmp(p, "/") == 0) {
1383 					strlcpy(strbuf, p, sizeof strbuf);
1384 				} else if (strcmp(p, str_val(global("HOME"))) == 0) {
1385 					strbuf[0] = '~';
1386 					strbuf[1] = '\0';
1387 				} else if (strncmp(p, str_val(global("HOME")), n)
1388 				    == 0 && p[n] == '/') {
1389 					snprintf(strbuf, sizeof strbuf, "~/%s",
1390 					    str_val(global("PWD")) + n + 1);
1391 				} else
1392 					strlcpy(strbuf, p, sizeof strbuf);
1393 				break;
1394 			case 'W':	/* '\' 'W' basename(cwd) */
1395 				p = str_val(global("PWD"));
1396 				if (strcmp(p, str_val(global("HOME"))) == 0) {
1397 					strbuf[0] = '~';
1398 					strbuf[1] = '\0';
1399 				} else
1400 					strlcpy(strbuf, basename(p), sizeof strbuf);
1401 				break;
1402 			case '!':	/* '\' '!' history line number */
1403 				snprintf(strbuf, sizeof strbuf, "%d",
1404 				    source->line + 1);
1405 				break;
1406 			case '#':	/* '\' '#' command line number */
1407 				snprintf(strbuf, sizeof strbuf, "%d",
1408 				    source->line - source->cmd_offset + 1);
1409 				break;
1410 			case '0':	/* '\' '#' '#' ' #' octal numeric handling */
1411 			case '1':
1412 			case '2':
1413 			case '3':
1414 			case '4':
1415 			case '5':
1416 			case '6':
1417 			case '7':
1418 				if ((cp[1] > '7' || cp[1] < '0') ||
1419 				    (cp[2] > '7' || cp[2] < '0')) {
1420 					snprintf(strbuf, sizeof strbuf,
1421 					    "\\%c", *cp);
1422 					break;
1423 				}
1424 				n = (cp[0] - '0') * 8 * 8 + (cp[1] - '0') * 8 +
1425 				    (cp[2] - '0');
1426 				snprintf(strbuf, sizeof strbuf, "%c", n);
1427 				cp += 2;
1428 				break;
1429 			case '\\':	/* '\' '\' */
1430 				strbuf[0] = '\\';
1431 				strbuf[1] = '\0';
1432 				break;
1433 			case '[': /* '\' '[' .... stop counting */
1434 				strbuf[0] = '\0';
1435 				counting = 0;
1436 				break;
1437 			case ']': /* '\' ']' restart counting */
1438 				strbuf[0] = '\0';
1439 				counting = 1;
1440 				break;
1441 
1442 			default:
1443 				snprintf(strbuf, sizeof strbuf, "\\%c", *cp);
1444 				break;
1445 			}
1446 			cp++;
1447 
1448 			str = strbuf;
1449 			len = strlen(str);
1450 			if (ntruncate) {
1451 				if (ntruncate >= len) {
1452 					ntruncate -= len;
1453 					continue;
1454 				}
1455 				str += ntruncate;
1456 				len -= ntruncate;
1457 				ntruncate = 0;
1458 			}
1459 			if (doprint)
1460 				shf_write(str, len, shl_out);
1461 			if (counting && !indelimit && !delimitthis)
1462 				totlen += len;
1463 			continue;
1464 		} else if (*cp != '!')
1465 			c = *cp++;
1466 		else if (*++cp == '!')
1467 			c = *cp++;
1468 		else {
1469 			shf_snprintf(p = nbuf, sizeof(nbuf), "%d",
1470 			    source->line + 1);
1471 			len = strlen(nbuf);
1472 			if (ntruncate) {
1473 				if (ntruncate >= len) {
1474 					ntruncate -= len;
1475 					continue;
1476 				}
1477 				p += ntruncate;
1478 				len -= ntruncate;
1479 				ntruncate = 0;
1480 			}
1481 			if (doprint)
1482 				shf_write(p, len, shl_out);
1483 			if (counting && !indelimit && !delimitthis)
1484 				totlen += len;
1485 			continue;
1486 		}
1487 		if (counting && ntruncate)
1488 			--ntruncate;
1489 		else if (doprint) {
1490 			shf_putc(c, shl_out);
1491 		}
1492 		if (counting && !indelimit && !delimitthis)
1493 			totlen++;
1494 	}
1495 	if (doprint)
1496 		shf_flush(shl_out);
1497 	if (spp)
1498 		*spp = sp;
1499 	return (totlen);
1500 }
1501 
1502 void
pprompt(const char * cp,int ntruncate)1503 pprompt(const char *cp, int ntruncate)
1504 {
1505 	dopprompt(cp, ntruncate, NULL, 1);
1506 }
1507 
1508 int
promptlen(const char * cp,const char ** spp)1509 promptlen(const char *cp, const char **spp)
1510 {
1511 	return dopprompt(cp, 0, spp, 0);
1512 }
1513 
1514 /* Read the variable part of a ${...} expression (ie, up to but not including
1515  * the :[-+?=#%] or close-brace.
1516  */
1517 static char *
get_brace_var(XString * wsp,char * wp)1518 get_brace_var(XString *wsp, char *wp)
1519 {
1520 	enum parse_state {
1521 			   PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1522 			   PS_NUMBER, PS_VAR1, PS_END
1523 			 }
1524 		state;
1525 	char c;
1526 
1527 	state = PS_INITIAL;
1528 	while (1) {
1529 		c = getsc();
1530 		/* State machine to figure out where the variable part ends. */
1531 		switch (state) {
1532 		case PS_INITIAL:
1533 			if (c == '#') {
1534 				state = PS_SAW_HASH;
1535 				break;
1536 			}
1537 			/* FALLTHROUGH */
1538 		case PS_SAW_HASH:
1539 			if (letter(c))
1540 				state = PS_IDENT;
1541 			else if (digit(c))
1542 				state = PS_NUMBER;
1543 			else if (ctype(c, C_VAR1))
1544 				state = PS_VAR1;
1545 			else
1546 				state = PS_END;
1547 			break;
1548 		case PS_IDENT:
1549 			if (!letnum(c)) {
1550 				state = PS_END;
1551 				if (c == '[') {
1552 					char *tmp, *p;
1553 
1554 					if (!arraysub(&tmp))
1555 						yyerror("missing ]\n");
1556 					*wp++ = c;
1557 					for (p = tmp; *p; ) {
1558 						Xcheck(*wsp, wp);
1559 						*wp++ = *p++;
1560 					}
1561 					afree(tmp, ATEMP);
1562 					c = getsc(); /* the ] */
1563 				}
1564 			}
1565 			break;
1566 		case PS_NUMBER:
1567 			if (!digit(c))
1568 				state = PS_END;
1569 			break;
1570 		case PS_VAR1:
1571 			state = PS_END;
1572 			break;
1573 		case PS_END: /* keep gcc happy */
1574 			break;
1575 		}
1576 		if (state == PS_END) {
1577 			*wp++ = '\0';	/* end of variable part */
1578 			ungetsc(c);
1579 			break;
1580 		}
1581 		Xcheck(*wsp, wp);
1582 		*wp++ = c;
1583 	}
1584 	return wp;
1585 }
1586 
1587 /*
1588  * Save an array subscript - returns true if matching bracket found, false
1589  * if eof or newline was found.
1590  * (Returned string double null terminated)
1591  */
1592 static int
arraysub(char ** strp)1593 arraysub(char **strp)
1594 {
1595 	XString ws;
1596 	char	*wp;
1597 	char	c;
1598 	int	depth = 1;	/* we are just past the initial [ */
1599 
1600 	Xinit(ws, wp, 32, ATEMP);
1601 
1602 	do {
1603 		c = getsc();
1604 		Xcheck(ws, wp);
1605 		*wp++ = c;
1606 		if (c == '[')
1607 			depth++;
1608 		else if (c == ']')
1609 			depth--;
1610 	} while (depth > 0 && c && c != '\n');
1611 
1612 	*wp++ = '\0';
1613 	*strp = Xclose(ws, wp);
1614 
1615 	return depth == 0 ? 1 : 0;
1616 }
1617 
1618 /* Unget a char: handles case when we are already at the start of the buffer */
1619 static const char *
ungetsc(int c)1620 ungetsc(int c)
1621 {
1622 	if (backslash_skip)
1623 		backslash_skip--;
1624 	/* Don't unget eof... */
1625 	if (source->str == null && c == '\0')
1626 		return source->str;
1627 	if (source->str > source->start)
1628 		source->str--;
1629 	else {
1630 		Source *s;
1631 
1632 		s = pushs(SREREAD, source->areap);
1633 		s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1634 		s->start = s->str = s->ugbuf;
1635 		s->next = source;
1636 		source = s;
1637 	}
1638 	return source->str;
1639 }
1640 
1641 
1642 /* Called to get a char that isn't a \newline sequence. */
1643 static int
getsc_bn(void)1644 getsc_bn(void)
1645 {
1646 	int c, c2;
1647 
1648 	if (ignore_backslash_newline)
1649 		return getsc_();
1650 
1651 	if (backslash_skip == 1) {
1652 		backslash_skip = 2;
1653 		return getsc_();
1654 	}
1655 
1656 	backslash_skip = 0;
1657 
1658 	while (1) {
1659 		c = getsc_();
1660 		if (c == '\\') {
1661 			if ((c2 = getsc_()) == '\n')
1662 				/* ignore the \newline; get the next char... */
1663 				continue;
1664 			ungetsc(c2);
1665 			backslash_skip = 1;
1666 		}
1667 		return c;
1668 	}
1669 }
1670 
1671 static Lex_state *
push_state_(State_info * si,Lex_state * old_end)1672 push_state_(State_info *si, Lex_state *old_end)
1673 {
1674 	Lex_state *new = areallocarray(NULL, STATE_BSIZE,
1675 	    sizeof(Lex_state), ATEMP);
1676 
1677 	new[0].ls_info.base = old_end;
1678 	si->base = &new[0];
1679 	si->end = &new[STATE_BSIZE];
1680 	return &new[1];
1681 }
1682 
1683 static Lex_state *
pop_state_(State_info * si,Lex_state * old_end)1684 pop_state_(State_info *si, Lex_state *old_end)
1685 {
1686 	Lex_state *old_base = si->base;
1687 
1688 	si->base = old_end->ls_info.base - STATE_BSIZE;
1689 	si->end = old_end->ls_info.base;
1690 
1691 	afree(old_base, ATEMP);
1692 
1693 	return si->base + STATE_BSIZE - 1;
1694 }
1695