xref: /openbsd/bin/ksh/eval.c (revision 76d0caae)
1 /*	$OpenBSD: eval.c,v 1.66 2020/09/13 15:39:09 tb Exp $	*/
2 
3 /*
4  * Expansion - quoting, separation, substitution, globbing
5  */
6 
7 #include <sys/stat.h>
8 
9 #include <ctype.h>
10 #include <dirent.h>
11 #include <fcntl.h>
12 #include <pwd.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <unistd.h>
16 
17 #include "sh.h"
18 
19 /*
20  * string expansion
21  *
22  * first pass: quoting, IFS separation, ~, ${}, $() and $(()) substitution.
23  * second pass: alternation ({,}), filename expansion (*?[]).
24  */
25 
26 /* expansion generator state */
27 typedef struct Expand {
28 	/* int  type; */	/* see expand() */
29 	const char *str;	/* string */
30 	union {
31 		const char **strv;/* string[] */
32 		struct shf *shf;/* file */
33 	} u;			/* source */
34 	struct tbl *var;	/* variable in ${var..} */
35 	short	split;		/* split "$@" / call waitlast $() */
36 } Expand;
37 
38 #define	XBASE		0	/* scanning original */
39 #define	XSUB		1	/* expanding ${} string */
40 #define	XARGSEP		2	/* ifs0 between "$*" */
41 #define	XARG		3	/* expanding $*, $@ */
42 #define	XCOM		4	/* expanding $() */
43 #define XNULLSUB	5	/* "$@" when $# is 0 (don't generate word) */
44 #define XSUBMID		6	/* middle of expanding ${} */
45 
46 /* States used for field splitting */
47 #define IFS_WORD	0	/* word has chars (or quotes) */
48 #define IFS_WS		1	/* have seen IFS white-space */
49 #define IFS_NWS		2	/* have seen IFS non-white-space */
50 #define IFS_IWS		3	/* beginning of word, ignore IFS white-space */
51 #define IFS_QUOTE	4	/* beg.w/quote, becomes IFS_WORD unless "$@" */
52 
53 static	int	varsub(Expand *, char *, char *, int *, int *);
54 static	int	comsub(Expand *, char *);
55 static	char   *trimsub(char *, char *, int);
56 static	void	glob(char *, XPtrV *, int);
57 static	void	globit(XString *, char **, char *, XPtrV *, int);
58 static char	*maybe_expand_tilde(char *, XString *, char **, int);
59 static	char   *tilde(char *);
60 static	char   *homedir(char *);
61 static void	alt_expand(XPtrV *, char *, char *, char *, int);
62 
63 static struct tbl *varcpy(struct tbl *);
64 
65 /* compile and expand word */
66 char *
67 substitute(const char *cp, int f)
68 {
69 	struct source *s, *sold;
70 
71 	if (disable_subst)
72 		return str_save(cp, ATEMP);
73 
74 	sold = source;
75 	s = pushs(SWSTR, ATEMP);
76 	s->start = s->str = cp;
77 	source = s;
78 	if (yylex(ONEWORD) != LWORD)
79 		internal_errorf("substitute");
80 	source = sold;
81 	afree(s, ATEMP);
82 	return evalstr(yylval.cp, f);
83 }
84 
85 /*
86  * expand arg-list
87  */
88 char **
89 eval(char **ap, int f)
90 {
91 	XPtrV w;
92 
93 	if (*ap == NULL)
94 		return ap;
95 	XPinit(w, 32);
96 	XPput(w, NULL);		/* space for shell name */
97 	while (*ap != NULL)
98 		expand(*ap++, &w, f);
99 	XPput(w, NULL);
100 	return (char **) XPclose(w) + 1;
101 }
102 
103 /*
104  * expand string
105  */
106 char *
107 evalstr(char *cp, int f)
108 {
109 	XPtrV w;
110 
111 	XPinit(w, 1);
112 	expand(cp, &w, f);
113 	cp = (XPsize(w) == 0) ? null : (char*) *XPptrv(w);
114 	XPfree(w);
115 	return cp;
116 }
117 
118 /*
119  * expand string - return only one component
120  * used from iosetup to expand redirection files
121  */
122 char *
123 evalonestr(char *cp, int f)
124 {
125 	XPtrV w;
126 
127 	XPinit(w, 1);
128 	expand(cp, &w, f);
129 	switch (XPsize(w)) {
130 	case 0:
131 		cp = null;
132 		break;
133 	case 1:
134 		cp = (char*) *XPptrv(w);
135 		break;
136 	default:
137 		cp = evalstr(cp, f&~DOGLOB);
138 		break;
139 	}
140 	XPfree(w);
141 	return cp;
142 }
143 
144 /* for nested substitution: ${var:=$var2} */
145 typedef struct SubType {
146 	short	stype;		/* [=+-?%#] action after expanded word */
147 	short	base;		/* begin position of expanded word */
148 	short	f;		/* saved value of f (DOPAT, etc) */
149 	struct tbl *var;	/* variable for ${var..} */
150 	short	quote;		/* saved value of quote (for ${..[%#]..}) */
151 	struct SubType *prev;	/* old type */
152 	struct SubType *next;	/* poped type (to avoid re-allocating) */
153 } SubType;
154 
155 void
156 expand(char *cp,	/* input word */
157     XPtrV *wp,		/* output words */
158     int f)		/* DO* flags */
159 {
160 	int c = 0;
161 	int type;		/* expansion type */
162 	int quote = 0;		/* quoted */
163 	XString ds;		/* destination string */
164 	char *dp, *sp;		/* dest., source */
165 	int fdo, word;		/* second pass flags; have word */
166 	int doblank;		/* field splitting of parameter/command subst */
167 	Expand x = {
168 		/* expansion variables */
169 		NULL, { NULL }, NULL, 0
170 	};
171 	SubType st_head, *st;
172 	int newlines = 0; /* For trailing newlines in COMSUB */
173 	int saw_eq, tilde_ok;
174 	int make_magic;
175 	size_t len;
176 
177 	if (cp == NULL)
178 		internal_errorf("expand(NULL)");
179 	/* for alias, readonly, set, typeset commands */
180 	if ((f & DOVACHECK) && is_wdvarassign(cp)) {
181 		f &= ~(DOVACHECK|DOBLANK|DOGLOB|DOTILDE);
182 		f |= DOASNTILDE;
183 	}
184 	if (Flag(FNOGLOB))
185 		f &= ~DOGLOB;
186 	if (Flag(FMARKDIRS))
187 		f |= DOMARKDIRS;
188 	if (Flag(FBRACEEXPAND) && (f & DOGLOB))
189 		f |= DOBRACE_;
190 
191 	Xinit(ds, dp, 128, ATEMP);	/* init dest. string */
192 	type = XBASE;
193 	sp = cp;
194 	fdo = 0;
195 	saw_eq = 0;
196 	tilde_ok = (f & (DOTILDE|DOASNTILDE)) ? 1 : 0; /* must be 1/0 */
197 	doblank = 0;
198 	make_magic = 0;
199 	word = (f&DOBLANK) ? IFS_WS : IFS_WORD;
200 
201 	memset(&st_head, 0, sizeof(st_head));
202 	st = &st_head;
203 
204 	while (1) {
205 		Xcheck(ds, dp);
206 
207 		switch (type) {
208 		case XBASE:	/* original prefixed string */
209 			c = *sp++;
210 			switch (c) {
211 			case EOS:
212 				c = 0;
213 				break;
214 			case CHAR:
215 				c = *sp++;
216 				break;
217 			case QCHAR:
218 				quote |= 2; /* temporary quote */
219 				c = *sp++;
220 				break;
221 			case OQUOTE:
222 				switch (word) {
223 				case IFS_QUOTE:
224 					/* """something */
225 					word = IFS_WORD;
226 					break;
227 				case IFS_WORD:
228 					break;
229 				default:
230 					word = IFS_QUOTE;
231 					break;
232 				}
233 				tilde_ok = 0;
234 				quote = 1;
235 				continue;
236 			case CQUOTE:
237 				quote = 0;
238 				continue;
239 			case COMSUB:
240 				tilde_ok = 0;
241 				if (f & DONTRUNCOMMAND) {
242 					word = IFS_WORD;
243 					*dp++ = '$'; *dp++ = '(';
244 					while (*sp != '\0') {
245 						Xcheck(ds, dp);
246 						*dp++ = *sp++;
247 					}
248 					*dp++ = ')';
249 				} else {
250 					type = comsub(&x, sp);
251 					if (type == XCOM && (f&DOBLANK))
252 						doblank++;
253 					sp = strchr(sp, 0) + 1;
254 					newlines = 0;
255 				}
256 				continue;
257 			case EXPRSUB:
258 				word = IFS_WORD;
259 				tilde_ok = 0;
260 				if (f & DONTRUNCOMMAND) {
261 					*dp++ = '$'; *dp++ = '('; *dp++ = '(';
262 					while (*sp != '\0') {
263 						Xcheck(ds, dp);
264 						*dp++ = *sp++;
265 					}
266 					*dp++ = ')'; *dp++ = ')';
267 				} else {
268 					struct tbl v;
269 					char *p;
270 
271 					v.flag = DEFINED|ISSET|INTEGER;
272 					v.type = 10; /* not default */
273 					v.name[0] = '\0';
274 					v_evaluate(&v, substitute(sp, 0),
275 					    KSH_UNWIND_ERROR, true);
276 					sp = strchr(sp, 0) + 1;
277 					for (p = str_val(&v); *p; ) {
278 						Xcheck(ds, dp);
279 						*dp++ = *p++;
280 					}
281 				}
282 				continue;
283 			case OSUBST: /* ${{#}var{:}[=+-?#%]word} */
284 			  /* format is:
285 			   *   OSUBST [{x] plain-variable-part \0
286 			   *     compiled-word-part CSUBST [}x]
287 			   * This is where all syntax checking gets done...
288 			   */
289 			    {
290 				char *varname = ++sp; /* skip the { or x (}) */
291 				int stype;
292 				int slen = 0;
293 
294 				sp = strchr(sp, '\0') + 1; /* skip variable */
295 				type = varsub(&x, varname, sp, &stype, &slen);
296 				if (type < 0) {
297 					char endc;
298 					char *str, *end;
299 
300 					sp = varname - 2; /* restore sp */
301 					end = (char *) wdscan(sp, CSUBST);
302 					/* ({) the } or x is already skipped */
303 					endc = *end;
304 					*end = EOS;
305 					str = snptreef(NULL, 64, "%S", sp);
306 					*end = endc;
307 					errorf("%s: bad substitution", str);
308 				}
309 				if (f&DOBLANK)
310 					doblank++;
311 				tilde_ok = 0;
312 				if (word == IFS_QUOTE && type != XNULLSUB)
313 					word = IFS_WORD;
314 				if (type == XBASE) {	/* expand? */
315 					if (!st->next) {
316 						SubType *newst;
317 
318 						newst = alloc(
319 						    sizeof(SubType), ATEMP);
320 						newst->next = NULL;
321 						newst->prev = st;
322 						st->next = newst;
323 					}
324 					st = st->next;
325 					st->stype = stype;
326 					st->base = Xsavepos(ds, dp);
327 					st->f = f;
328 					st->var = varcpy(x.var);
329 					st->quote = quote;
330 					/* skip qualifier(s) */
331 					if (stype)
332 						sp += slen;
333 					switch (stype & 0x7f) {
334 					case '#':
335 					case '%':
336 						/* ! DOBLANK,DOBRACE_,DOTILDE */
337 						f = DOPAT | (f&DONTRUNCOMMAND) |
338 						    DOTEMP_;
339 						quote = 0;
340 						/* Prepend open pattern (so |
341 						 * in a trim will work as
342 						 * expected)
343 						 */
344 						*dp++ = MAGIC;
345 						*dp++ = '@' + 0x80U;
346 						break;
347 					case '=':
348 						/* Enabling tilde expansion
349 						 * after :'s here is
350 						 * non-standard ksh, but is
351 						 * consistent with rules for
352 						 * other assignments.  Not
353 						 * sure what POSIX thinks of
354 						 * this.
355 						 * Not doing tilde expansion
356 						 * for integer variables is a
357 						 * non-POSIX thing - makes
358 						 * sense though, since ~ is
359 						 * a arithmetic operator.
360 						 */
361 						if (!(x.var->flag & INTEGER))
362 							f |= DOASNTILDE|DOTILDE;
363 						f |= DOTEMP_;
364 						/* These will be done after the
365 						 * value has been assigned.
366 						 */
367 						f &= ~(DOBLANK|DOGLOB|DOBRACE_);
368 						tilde_ok = 1;
369 						break;
370 					case '?':
371 						f &= ~DOBLANK;
372 						f |= DOTEMP_;
373 						/* FALLTHROUGH */
374 					default:
375 						/* '-' '+' '?' */
376 						if (quote)
377 							word = IFS_WORD;
378 						else if (dp == Xstring(ds, dp))
379 							word = IFS_IWS;
380 						/* Enable tilde expansion */
381 						tilde_ok = 1;
382 						f |= DOTILDE;
383 					}
384 				} else
385 					/* skip word */
386 					sp = (char *) wdscan(sp, CSUBST);
387 				continue;
388 			    }
389 			case CSUBST: /* only get here if expanding word */
390 				sp++; /* ({) skip the } or x */
391 				tilde_ok = 0;	/* in case of ${unset:-} */
392 				*dp = '\0';
393 				quote = st->quote;
394 				f = st->f;
395 				if (f&DOBLANK)
396 					doblank--;
397 				switch (st->stype&0x7f) {
398 				case '#':
399 				case '%':
400 					/* Append end-pattern */
401 					*dp++ = MAGIC; *dp++ = ')'; *dp = '\0';
402 					dp = Xrestpos(ds, dp, st->base);
403 					/* Must use st->var since calling
404 					 * global would break things
405 					 * like x[i+=1].
406 					 */
407 					x.str = trimsub(str_val(st->var),
408 						dp, st->stype);
409 					if (x.str[0] != '\0') {
410 						word = IFS_IWS;
411 						type = XSUB;
412 					} else if (quote) {
413 						word = IFS_WORD;
414 						type = XSUB;
415 					} else {
416 						if (dp == Xstring(ds, dp))
417 							word = IFS_IWS;
418 						type = XNULLSUB;
419 					}
420 					if (f&DOBLANK)
421 						doblank++;
422 					st = st->prev;
423 					continue;
424 				case '=':
425 					/* Restore our position and substitute
426 					 * the value of st->var (may not be
427 					 * the assigned value in the presence
428 					 * of integer/right-adj/etc attributes).
429 					 */
430 					dp = Xrestpos(ds, dp, st->base);
431 					/* Must use st->var since calling
432 					 * global would cause with things
433 					 * like x[i+=1] to be evaluated twice.
434 					 */
435 					/* Note: not exported by FEXPORT
436 					 * in at&t ksh.
437 					 */
438 					/* XXX POSIX says readonly is only
439 					 * fatal for special builtins (setstr
440 					 * does readonly check).
441 					 */
442 					len = strlen(dp) + 1;
443 					setstr(st->var,
444 					    debunk(alloc(len, ATEMP),
445 					    dp, len), KSH_UNWIND_ERROR);
446 					x.str = str_val(st->var);
447 					type = XSUB;
448 					if (f&DOBLANK)
449 						doblank++;
450 					st = st->prev;
451 					if (quote || !*x.str)
452 						word = IFS_WORD;
453 					else
454 						word = IFS_IWS;
455 					continue;
456 				case '?':
457 				    {
458 					char *s = Xrestpos(ds, dp, st->base);
459 
460 					errorf("%s: %s", st->var->name,
461 					    dp == s ?
462 					    "parameter null or not set" :
463 					    (debunk(s, s, strlen(s) + 1), s));
464 				    }
465 				}
466 				st = st->prev;
467 				type = XBASE;
468 				continue;
469 
470 			case OPAT: /* open pattern: *(foo|bar) */
471 				/* Next char is the type of pattern */
472 				make_magic = 1;
473 				c = *sp++ + 0x80;
474 				break;
475 
476 			case SPAT: /* pattern separator (|) */
477 				make_magic = 1;
478 				c = '|';
479 				break;
480 
481 			case CPAT: /* close pattern */
482 				make_magic = 1;
483 				c = /*(*/ ')';
484 				break;
485 			}
486 			break;
487 
488 		case XNULLSUB:
489 			/* Special case for "$@" (and "${foo[@]}") - no
490 			 * word is generated if $# is 0 (unless there is
491 			 * other stuff inside the quotes).
492 			 */
493 			type = XBASE;
494 			if (f&DOBLANK) {
495 				doblank--;
496 				if (dp == Xstring(ds, dp) && word != IFS_WORD)
497 					word = IFS_IWS;
498 			}
499 			continue;
500 
501 		case XSUB:
502 		case XSUBMID:
503 			if ((c = *x.str++) == 0) {
504 				type = XBASE;
505 				if (f&DOBLANK)
506 					doblank--;
507 				continue;
508 			}
509 			break;
510 
511 		case XARGSEP:
512 			type = XARG;
513 			quote = 1;
514 		case XARG:
515 			if ((c = *x.str++) == '\0') {
516 				/* force null words to be created so
517 				 * set -- '' 2 ''; foo "$@" will do
518 				 * the right thing
519 				 */
520 				if (quote && x.split)
521 					word = IFS_WORD;
522 				if ((x.str = *x.u.strv++) == NULL) {
523 					type = XBASE;
524 					if (f&DOBLANK)
525 						doblank--;
526 					continue;
527 				}
528 				c = ifs0;
529 				if (c == 0) {
530 					if (quote && !x.split)
531 						continue;
532 					if (!quote && word == IFS_WS)
533 						continue;
534 					/* this is so we don't terminate */
535 					c = ' ';
536 					/* now force-emit a word */
537 					goto emit_word;
538 				}
539 				if (quote && x.split) {
540 					/* terminate word for "$@" */
541 					type = XARGSEP;
542 					quote = 0;
543 				}
544 			}
545 			break;
546 
547 		case XCOM:
548 			if (x.u.shf == NULL)	/* $(< ...) failed, fake EOF */
549 				c = EOF;
550 			else if (newlines) {		/* Spit out saved nl's */
551 				c = '\n';
552 				--newlines;
553 			} else {
554 				while ((c = shf_getc(x.u.shf)) == 0 || c == '\n')
555 				    if (c == '\n')
556 					    newlines++;	/* Save newlines */
557 				if (newlines && c != EOF) {
558 					shf_ungetc(c, x.u.shf);
559 					c = '\n';
560 					--newlines;
561 				}
562 			}
563 			if (c == EOF) {
564 				newlines = 0;
565 				if (x.u.shf != NULL)
566 					shf_close(x.u.shf);
567 				if (x.split)
568 					subst_exstat = waitlast();
569 				else
570 					subst_exstat = (x.u.shf == NULL);
571 				type = XBASE;
572 				if (f&DOBLANK)
573 					doblank--;
574 				continue;
575 			}
576 			break;
577 		}
578 
579 		/* check for end of word or IFS separation */
580 		if (c == 0 || (!quote && (f & DOBLANK) && doblank &&
581 		    !make_magic && ctype(c, C_IFS))) {
582 			/* How words are broken up:
583 			 *		   |       value of c
584 			 *	  word	   |	ws	nws	0
585 			 *	-----------------------------------
586 			 *	IFS_WORD	w/WS	w/NWS	w
587 			 *	IFS_WS		-/WS	w/NWS	-
588 			 *	IFS_NWS		-/NWS	w/NWS	-
589 			 *	IFS_IWS		-/WS	w/NWS	-
590 			 *   (w means generate a word)
591 			 */
592 			if ((word == IFS_WORD) || (word == IFS_QUOTE) || (c &&
593 			    (word == IFS_IWS || word == IFS_NWS) &&
594 			    !ctype(c, C_IFSWS))) {
595  				char *p;
596  emit_word:
597 				*dp++ = '\0';
598 				p = Xclose(ds, dp);
599 				if (fdo & DOBRACE_)
600 					/* also does globbing */
601 					alt_expand(wp, p, p,
602 					    p + Xlength(ds, (dp - 1)),
603 					    fdo | (f & DOMARKDIRS));
604 				else if (fdo & DOGLOB)
605 					glob(p, wp, f & DOMARKDIRS);
606 				else if ((f & DOPAT) || !(fdo & DOMAGIC_))
607 					XPput(*wp, p);
608 				else
609 					XPput(*wp, debunk(p, p, strlen(p) + 1));
610 				fdo = 0;
611 				saw_eq = 0;
612 				tilde_ok = (f & (DOTILDE|DOASNTILDE)) ? 1 : 0;
613 				if (c != 0)
614 					Xinit(ds, dp, 128, ATEMP);
615 			}
616 			if (c == 0)
617 				goto done;
618 			if (word != IFS_NWS)
619 				word = ctype(c, C_IFSWS) ? IFS_WS : IFS_NWS;
620 		} else {
621 			if (type == XSUB) {
622 				if (word == IFS_NWS &&
623 				    Xlength(ds, dp) == 0) {
624 					char *p;
625 
626 					if ((p = strdup("")) == NULL)
627 						internal_errorf("unable "
628 						    "to allocate memory");
629 					XPput(*wp, p);
630 				}
631 				type = XSUBMID;
632 			}
633 
634 			/* age tilde_ok info - ~ code tests second bit */
635 			tilde_ok <<= 1;
636 			/* mark any special second pass chars */
637 			if (!quote)
638 				switch (c) {
639 				case '[':
640 				case '!':
641 				case '-':
642 				case ']':
643 					/* For character classes - doesn't hurt
644 					 * to have magic !,-,]'s outside of
645 					 * [...] expressions.
646 					 */
647 					if (f & (DOPAT | DOGLOB)) {
648 						fdo |= DOMAGIC_;
649 						if (c == '[')
650 							fdo |= f & DOGLOB;
651 						*dp++ = MAGIC;
652 					}
653 					break;
654 				case '*':
655 				case '?':
656 					if (f & (DOPAT | DOGLOB)) {
657 						fdo |= DOMAGIC_ | (f & DOGLOB);
658 						*dp++ = MAGIC;
659 					}
660 					break;
661 				case OBRACE:
662 				case ',':
663 				case CBRACE:
664 					if ((f & DOBRACE_) && (c == OBRACE ||
665 					    (fdo & DOBRACE_))) {
666 						fdo |= DOBRACE_|DOMAGIC_;
667 						*dp++ = MAGIC;
668 					}
669 					break;
670 				case '=':
671 					/* Note first unquoted = for ~ */
672 					if (!(f & DOTEMP_) && !saw_eq) {
673 						saw_eq = 1;
674 						tilde_ok = 1;
675 					}
676 					break;
677 				case ':': /* : */
678 					/* Note unquoted : for ~ */
679 					if (!(f & DOTEMP_) && (f & DOASNTILDE))
680 						tilde_ok = 1;
681 					break;
682 				case '~':
683 					/* tilde_ok is reset whenever
684 					 * any of ' " $( $(( ${ } are seen.
685 					 * Note that tilde_ok must be preserved
686 					 * through the sequence ${A=a=}~
687 					 */
688 					if (type == XBASE &&
689 					    (f & (DOTILDE|DOASNTILDE)) &&
690 					    (tilde_ok & 2)) {
691 						char *p, *dp_x;
692 
693 						dp_x = dp;
694 						p = maybe_expand_tilde(sp,
695 						    &ds, &dp_x,
696 						    f & DOASNTILDE);
697 						if (p) {
698 							if (dp != dp_x)
699 								word = IFS_WORD;
700 							dp = dp_x;
701 							sp = p;
702 							continue;
703 						}
704 					}
705 					break;
706 				}
707 			else
708 				quote &= ~2; /* undo temporary */
709 
710 			if (make_magic) {
711 				make_magic = 0;
712 				fdo |= DOMAGIC_ | (f & DOGLOB);
713 				*dp++ = MAGIC;
714 			} else if (ISMAGIC(c)) {
715 				fdo |= DOMAGIC_;
716 				*dp++ = MAGIC;
717 			}
718 			*dp++ = c; /* save output char */
719 			word = IFS_WORD;
720 		}
721 	}
722 
723 done:
724 	for (st = &st_head; st != NULL; st = st->next) {
725 		if (st->var == NULL || (st->var->flag & RDONLY) == 0)
726 			continue;
727 
728 		afree(st->var, ATEMP);
729 	}
730 }
731 
732 /*
733  * Prepare to generate the string returned by ${} substitution.
734  */
735 static int
736 varsub(Expand *xp, char *sp, char *word,
737     int *stypep,	/* becomes qualifier type */
738     int *slenp)		/* " " len (=, :=, etc.) valid iff *stypep != 0 */
739 {
740 	int c;
741 	int state;	/* next state: XBASE, XARG, XSUB, XNULLSUB */
742 	int stype;	/* substitution type */
743 	int slen;
744 	char *p;
745 	struct tbl *vp;
746 	int zero_ok = 0;
747 
748 	if (sp[0] == '\0')	/* Bad variable name */
749 		return -1;
750 
751 	xp->var = NULL;
752 
753 	/* ${#var}, string length or array size */
754 	if (sp[0] == '#' && (c = sp[1]) != '\0') {
755 		/* Can't have any modifiers for ${#...} */
756 		if (*word != CSUBST)
757 			return -1;
758 		sp++;
759 		/* Check for size of array */
760 		if ((p=strchr(sp,'[')) && (p[1]=='*'||p[1]=='@') && p[2]==']') {
761 			int n = 0;
762 
763 			vp = global(arrayname(sp));
764 			if (vp->flag & (ISSET|ARRAY))
765 				zero_ok = 1;
766 			for (; vp; vp = vp->u.array)
767 				if (vp->flag & ISSET)
768 					n++;
769 			c = n; /* ksh88/ksh93 go for number, not max index */
770 		} else if (c == '*' || c == '@')
771 			c = genv->loc->argc;
772 		else {
773 			p = str_val(global(sp));
774 			zero_ok = p != null;
775 			c = strlen(p);
776 		}
777 		if (Flag(FNOUNSET) && c == 0 && !zero_ok)
778 			errorf("%s: parameter not set", sp);
779 		*stypep = 0; /* unqualified variable/string substitution */
780 		xp->str = str_save(u64ton((uint64_t)c, 10), ATEMP);
781 		return XSUB;
782 	}
783 
784 	/* Check for qualifiers in word part */
785 	stype = 0;
786 	c = word[slen = 0] == CHAR ? word[1] : 0;
787 	if (c == ':') {
788 		slen += 2;
789 		stype = 0x80;
790 		c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
791 	}
792 	if (ctype(c, C_SUBOP1)) {
793 		slen += 2;
794 		stype |= c;
795 	} else if (ctype(c, C_SUBOP2)) { /* Note: ksh88 allows :%, :%%, etc */
796 		slen += 2;
797 		stype = c;
798 		if (word[slen + 0] == CHAR && c == word[slen + 1]) {
799 			stype |= 0x80;
800 			slen += 2;
801 		}
802 	} else if (stype)	/* : is not ok */
803 		return -1;
804 	if (!stype && *word != CSUBST)
805 		return -1;
806 	*stypep = stype;
807 	*slenp = slen;
808 
809 	c = sp[0];
810 	if (c == '*' || c == '@') {
811 		switch (stype & 0x7f) {
812 		case '=':	/* can't assign to a vector */
813 		case '%':	/* can't trim a vector (yet) */
814 		case '#':
815 			return -1;
816 		}
817 		if (genv->loc->argc == 0) {
818 			xp->str = null;
819 			xp->var = global(sp);
820 			state = c == '@' ? XNULLSUB : XSUB;
821 		} else {
822 			xp->u.strv = (const char **) genv->loc->argv + 1;
823 			xp->str = *xp->u.strv++;
824 			xp->split = c == '@'; /* $@ */
825 			state = XARG;
826 		}
827 		zero_ok = 1;	/* exempt "$@" and "$*" from 'set -u' */
828 	} else {
829 		if ((p=strchr(sp,'[')) && (p[1]=='*'||p[1]=='@') && p[2]==']') {
830 			XPtrV wv;
831 
832 			switch (stype & 0x7f) {
833 			case '=':	/* can't assign to a vector */
834 			case '%':	/* can't trim a vector (yet) */
835 			case '#':
836 			case '?':
837 				return -1;
838 			}
839 			XPinit(wv, 32);
840 			vp = global(arrayname(sp));
841 			for (; vp; vp = vp->u.array) {
842 				if (!(vp->flag&ISSET))
843 					continue;
844 				XPput(wv, str_val(vp));
845 			}
846 			if (XPsize(wv) == 0) {
847 				xp->str = null;
848 				state = p[1] == '@' ? XNULLSUB : XSUB;
849 				XPfree(wv);
850 			} else {
851 				XPput(wv, 0);
852 				xp->u.strv = (const char **) XPptrv(wv);
853 				xp->str = *xp->u.strv++;
854 				xp->split = p[1] == '@'; /* ${foo[@]} */
855 				state = XARG;
856 			}
857 		} else {
858 			/* Can't assign things like $! or $1 */
859 			if ((stype & 0x7f) == '=' &&
860 			    (ctype(*sp, C_VAR1) || digit(*sp)))
861 				return -1;
862 			xp->var = global(sp);
863 			xp->str = str_val(xp->var);
864 			state = XSUB;
865 		}
866 	}
867 
868 	c = stype&0x7f;
869 	/* test the compiler's code generator */
870 	if (ctype(c, C_SUBOP2) ||
871 	    (((stype&0x80) ? *xp->str=='\0' : xp->str==null) ? /* undef? */
872 	    c == '=' || c == '-' || c == '?' : c == '+'))
873 		state = XBASE;	/* expand word instead of variable value */
874 	if (Flag(FNOUNSET) && xp->str == null && !zero_ok &&
875 	    (ctype(c, C_SUBOP2) || (state != XBASE && c != '+')))
876 		errorf("%s: parameter not set", sp);
877 	return state;
878 }
879 
880 /*
881  * Run the command in $(...) and read its output.
882  */
883 static int
884 comsub(Expand *xp, char *cp)
885 {
886 	Source *s, *sold;
887 	struct op *t;
888 	struct shf *shf;
889 
890 	s = pushs(SSTRING, ATEMP);
891 	s->start = s->str = cp;
892 	sold = source;
893 	t = compile(s);
894 	afree(s, ATEMP);
895 	source = sold;
896 
897 	if (t == NULL)
898 		return XBASE;
899 
900 	if (t != NULL && t->type == TCOM && /* $(<file) */
901 	    *t->args == NULL && *t->vars == NULL && t->ioact != NULL) {
902 		struct ioword *io = *t->ioact;
903 		char *name;
904 
905 		if ((io->flag&IOTYPE) != IOREAD)
906 			errorf("funny $() command: %s",
907 			    snptreef(NULL, 32, "%R", io));
908 		shf = shf_open(name = evalstr(io->name, DOTILDE), O_RDONLY, 0,
909 			SHF_MAPHI|SHF_CLEXEC);
910 		if (shf == NULL)
911 			warningf(!Flag(FTALKING),
912 			    "%s: cannot open $(<) input", name);
913 		xp->split = 0;	/* no waitlast() */
914 	} else {
915 		int ofd1, pv[2];
916 		openpipe(pv);
917 		shf = shf_fdopen(pv[0], SHF_RD, NULL);
918 		ofd1 = savefd(1);
919 		if (pv[1] != 1) {
920 			ksh_dup2(pv[1], 1, false);
921 			close(pv[1]);
922 		}
923 		execute(t, XFORK|XXCOM|XPIPEO, NULL);
924 		restfd(1, ofd1);
925 		startlast();
926 		xp->split = 1;	/* waitlast() */
927 	}
928 
929 	xp->u.shf = shf;
930 	return XCOM;
931 }
932 
933 /*
934  * perform #pattern and %pattern substitution in ${}
935  */
936 
937 static char *
938 trimsub(char *str, char *pat, int how)
939 {
940 	char *end = strchr(str, 0);
941 	char *p, c;
942 
943 	switch (how&0xff) {	/* UCHAR_MAX maybe? */
944 	case '#':		/* shortest at beginning */
945 		for (p = str; p <= end; p++) {
946 			c = *p; *p = '\0';
947 			if (gmatch(str, pat, false)) {
948 				*p = c;
949 				return p;
950 			}
951 			*p = c;
952 		}
953 		break;
954 	case '#'|0x80:	/* longest match at beginning */
955 		for (p = end; p >= str; p--) {
956 			c = *p; *p = '\0';
957 			if (gmatch(str, pat, false)) {
958 				*p = c;
959 				return p;
960 			}
961 			*p = c;
962 		}
963 		break;
964 	case '%':		/* shortest match at end */
965 		for (p = end; p >= str; p--) {
966 			if (gmatch(p, pat, false))
967 				return str_nsave(str, p - str, ATEMP);
968 		}
969 		break;
970 	case '%'|0x80:	/* longest match at end */
971 		for (p = str; p <= end; p++) {
972 			if (gmatch(p, pat, false))
973 				return str_nsave(str, p - str, ATEMP);
974 		}
975 		break;
976 	}
977 
978 	return str;		/* no match, return string */
979 }
980 
981 /*
982  * glob
983  * Name derived from V6's /etc/glob, the program that expanded filenames.
984  */
985 
986 /* XXX cp not const 'cause slashes are temporarily replaced with nulls... */
987 static void
988 glob(char *cp, XPtrV *wp, int markdirs)
989 {
990 	int oldsize = XPsize(*wp);
991 
992 	if (glob_str(cp, wp, markdirs) == 0)
993 		XPput(*wp, debunk(cp, cp, strlen(cp) + 1));
994 	else
995 		qsortp(XPptrv(*wp) + oldsize, (size_t)(XPsize(*wp) - oldsize),
996 			xstrcmp);
997 }
998 
999 #define GF_NONE		0
1000 #define GF_EXCHECK	BIT(0)		/* do existence check on file */
1001 #define GF_GLOBBED	BIT(1)		/* some globbing has been done */
1002 #define GF_MARKDIR	BIT(2)		/* add trailing / to directories */
1003 
1004 /* Apply file globbing to cp and store the matching files in wp.  Returns
1005  * the number of matches found.
1006  */
1007 int
1008 glob_str(char *cp, XPtrV *wp, int markdirs)
1009 {
1010 	int oldsize = XPsize(*wp);
1011 	XString xs;
1012 	char *xp;
1013 
1014 	Xinit(xs, xp, 256, ATEMP);
1015 	globit(&xs, &xp, cp, wp, markdirs ? GF_MARKDIR : GF_NONE);
1016 	Xfree(xs, xp);
1017 
1018 	return XPsize(*wp) - oldsize;
1019 }
1020 
1021 static void
1022 globit(XString *xs,	/* dest string */
1023     char **xpp,		/* ptr to dest end */
1024     char *sp,		/* source path */
1025     XPtrV *wp,		/* output list */
1026     int check)		/* GF_* flags */
1027 {
1028 	char *np;		/* next source component */
1029 	char *xp = *xpp;
1030 	char *se;
1031 	char odirsep;
1032 
1033 	/* This to allow long expansions to be interrupted */
1034 	intrcheck();
1035 
1036 	if (sp == NULL) {	/* end of source path */
1037 		/* We only need to check if the file exists if a pattern
1038 		 * is followed by a non-pattern (eg, foo*x/bar; no check
1039 		 * is needed for foo* since the match must exist) or if
1040 		 * any patterns were expanded and the markdirs option is set.
1041 		 * Symlinks make things a bit tricky...
1042 		 */
1043 		if ((check & GF_EXCHECK) ||
1044 		    ((check & GF_MARKDIR) && (check & GF_GLOBBED))) {
1045 #define stat_check()	(stat_done ? stat_done : \
1046 			    (stat_done = stat(Xstring(*xs, xp), &statb) == -1 \
1047 				? -1 : 1))
1048 			struct stat lstatb, statb;
1049 			int stat_done = 0;	 /* -1: failed, 1 ok */
1050 
1051 			if (lstat(Xstring(*xs, xp), &lstatb) == -1)
1052 				return;
1053 			/* special case for systems which strip trailing
1054 			 * slashes from regular files (eg, /etc/passwd/).
1055 			 * SunOS 4.1.3 does this...
1056 			 */
1057 			if ((check & GF_EXCHECK) && xp > Xstring(*xs, xp) &&
1058 			    xp[-1] == '/' && !S_ISDIR(lstatb.st_mode) &&
1059 			    (!S_ISLNK(lstatb.st_mode) ||
1060 			    stat_check() < 0 || !S_ISDIR(statb.st_mode)))
1061 				return;
1062 			/* Possibly tack on a trailing / if there isn't already
1063 			 * one and if the file is a directory or a symlink to a
1064 			 * directory
1065 			 */
1066 			if (((check & GF_MARKDIR) && (check & GF_GLOBBED)) &&
1067 			    xp > Xstring(*xs, xp) && xp[-1] != '/' &&
1068 			    (S_ISDIR(lstatb.st_mode) ||
1069 			    (S_ISLNK(lstatb.st_mode) && stat_check() > 0 &&
1070 			    S_ISDIR(statb.st_mode)))) {
1071 				*xp++ = '/';
1072 				*xp = '\0';
1073 			}
1074 		}
1075 		XPput(*wp, str_nsave(Xstring(*xs, xp), Xlength(*xs, xp), ATEMP));
1076 		return;
1077 	}
1078 
1079 	if (xp > Xstring(*xs, xp))
1080 		*xp++ = '/';
1081 	while (*sp == '/') {
1082 		Xcheck(*xs, xp);
1083 		*xp++ = *sp++;
1084 	}
1085 	np = strchr(sp, '/');
1086 	if (np != NULL) {
1087 		se = np;
1088 		odirsep = *np;	/* don't assume '/', can be multiple kinds */
1089 		*np++ = '\0';
1090 	} else {
1091 		odirsep = '\0'; /* keep gcc quiet */
1092 		se = sp + strlen(sp);
1093 	}
1094 
1095 
1096 	/* Check if sp needs globbing - done to avoid pattern checks for strings
1097 	 * containing MAGIC characters, open ['s without the matching close ],
1098 	 * etc. (otherwise opendir() will be called which may fail because the
1099 	 * directory isn't readable - if no globbing is needed, only execute
1100 	 * permission should be required (as per POSIX)).
1101 	 */
1102 	if (!has_globbing(sp, se)) {
1103 		XcheckN(*xs, xp, se - sp + 1);
1104 		debunk(xp, sp, Xnleft(*xs, xp));
1105 		xp += strlen(xp);
1106 		*xpp = xp;
1107 		globit(xs, xpp, np, wp, check);
1108 	} else {
1109 		DIR *dirp;
1110 		struct dirent *d;
1111 		char *name;
1112 		int len;
1113 		int prefix_len;
1114 
1115 		*xp = '\0';
1116 		prefix_len = Xlength(*xs, xp);
1117 		dirp = opendir(prefix_len ? Xstring(*xs, xp) : ".");
1118 		if (dirp == NULL)
1119 			goto Nodir;
1120 		while ((d = readdir(dirp)) != NULL) {
1121 			name = d->d_name;
1122 			if (name[0] == '.' &&
1123 			    (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1124 				continue; /* always ignore . and .. */
1125 			if ((*name == '.' && *sp != '.') ||
1126 			    !gmatch(name, sp, true))
1127 				continue;
1128 
1129 			len = strlen(d->d_name) + 1;
1130 			XcheckN(*xs, xp, len);
1131 			memcpy(xp, name, len);
1132 			*xpp = xp + len - 1;
1133 			globit(xs, xpp, np, wp,
1134 				(check & GF_MARKDIR) | GF_GLOBBED
1135 				| (np ? GF_EXCHECK : GF_NONE));
1136 			xp = Xstring(*xs, xp) + prefix_len;
1137 		}
1138 		closedir(dirp);
1139 	  Nodir:;
1140 	}
1141 
1142 	if (np != NULL)
1143 		*--np = odirsep;
1144 }
1145 
1146 /* remove MAGIC from string */
1147 char *
1148 debunk(char *dp, const char *sp, size_t dlen)
1149 {
1150 	char *d, *s;
1151 
1152 	if ((s = strchr(sp, MAGIC))) {
1153 		size_t slen = s - sp;
1154 		if (slen >= dlen)
1155 			return dp;
1156 		memcpy(dp, sp, slen);
1157 		for (d = dp + slen; *s && (d < dp + dlen); s++)
1158 			if (!ISMAGIC(*s) || !(*++s & 0x80) ||
1159 			    !strchr("*+?@! ", *s & 0x7f))
1160 				*d++ = *s;
1161 			else {
1162 				/* extended pattern operators: *+?@! */
1163 				if ((*s & 0x7f) != ' ')
1164 					*d++ = *s & 0x7f;
1165 				if (d < dp + dlen)
1166 					*d++ = '(';
1167 			}
1168 		*d = '\0';
1169 	} else if (dp != sp)
1170 		strlcpy(dp, sp, dlen);
1171 	return dp;
1172 }
1173 
1174 /* Check if p is an unquoted name, possibly followed by a / or :.  If so
1175  * puts the expanded version in *dcp,dp and returns a pointer in p just
1176  * past the name, otherwise returns 0.
1177  */
1178 static char *
1179 maybe_expand_tilde(char *p, XString *dsp, char **dpp, int isassign)
1180 {
1181 	XString ts;
1182 	char *dp = *dpp;
1183 	char *tp, *r;
1184 
1185 	Xinit(ts, tp, 16, ATEMP);
1186 	/* : only for DOASNTILDE form */
1187 	while (p[0] == CHAR && p[1] != '/' && (!isassign || p[1] != ':'))
1188 	{
1189 		Xcheck(ts, tp);
1190 		*tp++ = p[1];
1191 		p += 2;
1192 	}
1193 	*tp = '\0';
1194 	r = (p[0] == EOS || p[0] == CHAR || p[0] == CSUBST) ?
1195 	    tilde(Xstring(ts, tp)) : NULL;
1196 	Xfree(ts, tp);
1197 	if (r) {
1198 		while (*r) {
1199 			Xcheck(*dsp, dp);
1200 			if (ISMAGIC(*r))
1201 				*dp++ = MAGIC;
1202 			*dp++ = *r++;
1203 		}
1204 		*dpp = dp;
1205 		r = p;
1206 	}
1207 	return r;
1208 }
1209 
1210 /*
1211  * tilde expansion
1212  *
1213  * based on a version by Arnold Robbins
1214  */
1215 
1216 static char *
1217 tilde(char *cp)
1218 {
1219 	char *dp;
1220 
1221 	if (cp[0] == '\0')
1222 		dp = str_val(global("HOME"));
1223 	else if (cp[0] == '+' && cp[1] == '\0')
1224 		dp = str_val(global("PWD"));
1225 	else if (cp[0] == '-' && cp[1] == '\0')
1226 		dp = str_val(global("OLDPWD"));
1227 	else
1228 		dp = homedir(cp);
1229 	/* If HOME, PWD or OLDPWD are not set, don't expand ~ */
1230 	if (dp == null)
1231 		dp = NULL;
1232 	return dp;
1233 }
1234 
1235 /*
1236  * map userid to user's home directory.
1237  * note that 4.3's getpw adds more than 6K to the shell,
1238  * and the YP version probably adds much more.
1239  * we might consider our own version of getpwnam() to keep the size down.
1240  */
1241 
1242 static char *
1243 homedir(char *name)
1244 {
1245 	struct tbl *ap;
1246 
1247 	ap = ktenter(&homedirs, name, hash(name));
1248 	if (!(ap->flag & ISSET)) {
1249 		struct passwd *pw;
1250 
1251 		pw = getpwnam(name);
1252 		if (pw == NULL)
1253 			return NULL;
1254 		ap->val.s = str_save(pw->pw_dir, APERM);
1255 		ap->flag |= DEFINED|ISSET|ALLOC;
1256 	}
1257 	return ap->val.s;
1258 }
1259 
1260 static void
1261 alt_expand(XPtrV *wp, char *start, char *exp_start, char *end, int fdo)
1262 {
1263 	int count = 0;
1264 	char *brace_start, *brace_end, *comma = NULL;
1265 	char *field_start;
1266 	char *p;
1267 
1268 	/* search for open brace */
1269 	for (p = exp_start; (p = strchr(p, MAGIC)) && p[1] != OBRACE; p += 2)
1270 		;
1271 	brace_start = p;
1272 
1273 	/* find matching close brace, if any */
1274 	if (p) {
1275 		comma = NULL;
1276 		count = 1;
1277 		for (p += 2; *p && count; p++) {
1278 			if (ISMAGIC(*p)) {
1279 				if (*++p == OBRACE)
1280 					count++;
1281 				else if (*p == CBRACE)
1282 					--count;
1283 				else if (*p == ',' && count == 1)
1284 					comma = p;
1285 			}
1286 		}
1287 	}
1288 	/* no valid expansions... */
1289 	if (!p || count != 0) {
1290 		/* Note that given a{{b,c} we do not expand anything (this is
1291 		 * what at&t ksh does.  This may be changed to do the {b,c}
1292 		 * expansion. }
1293 		 */
1294 		if (fdo & DOGLOB)
1295 			glob(start, wp, fdo & DOMARKDIRS);
1296 		else
1297 			XPput(*wp, debunk(start, start, end - start));
1298 		return;
1299 	}
1300 	brace_end = p;
1301 	if (!comma) {
1302 		alt_expand(wp, start, brace_end, end, fdo);
1303 		return;
1304 	}
1305 
1306 	/* expand expression */
1307 	field_start = brace_start + 2;
1308 	count = 1;
1309 	for (p = brace_start + 2; p != brace_end; p++) {
1310 		if (ISMAGIC(*p)) {
1311 			if (*++p == OBRACE)
1312 				count++;
1313 			else if ((*p == CBRACE && --count == 0) ||
1314 			    (*p == ',' && count == 1)) {
1315 				char *new;
1316 				int l1, l2, l3;
1317 
1318 				l1 = brace_start - start;
1319 				l2 = (p - 1) - field_start;
1320 				l3 = end - brace_end;
1321 				new = alloc(l1 + l2 + l3 + 1, ATEMP);
1322 				memcpy(new, start, l1);
1323 				memcpy(new + l1, field_start, l2);
1324 				memcpy(new + l1 + l2, brace_end, l3);
1325 				new[l1 + l2 + l3] = '\0';
1326 				alt_expand(wp, new, new + l1,
1327 				    new + l1 + l2 + l3, fdo);
1328 				field_start = p + 1;
1329 			}
1330 		}
1331 	}
1332 	return;
1333 }
1334 
1335 /*
1336  * Copy the given variable if it's flagged as read-only.
1337  * Such variables have static storage and only one can therefore be referenced
1338  * at a time.
1339  * This is necessary in order to allow variable expansion expressions to refer
1340  * to multiple read-only variables.
1341  */
1342 static struct tbl *
1343 varcpy(struct tbl *vp)
1344 {
1345 	struct tbl *cpy;
1346 
1347 	if (vp == NULL || (vp->flag & RDONLY) == 0)
1348 		return vp;
1349 
1350 	cpy = alloc(sizeof(struct tbl), ATEMP);
1351 	memcpy(cpy, vp, sizeof(struct tbl));
1352 	return cpy;
1353 }
1354