1 /*
2  * subst.c - various substitutions
3  *
4  * This file is part of zsh, the Z shell.
5  *
6  * Copyright (c) 1992-1997 Paul Falstad
7  * All rights reserved.
8  *
9  * Permission is hereby granted, without written agreement and without
10  * license or royalty fees, to use, copy, modify, and distribute this
11  * software and to distribute modified versions of this software for any
12  * purpose, provided that the above copyright notice and the following
13  * two paragraphs appear in all copies of this software.
14  *
15  * In no event shall Paul Falstad or the Zsh Development Group be liable
16  * to any party for direct, indirect, special, incidental, or consequential
17  * damages arising out of the use of this software and its documentation,
18  * even if Paul Falstad and the Zsh Development Group have been advised of
19  * the possibility of such damage.
20  *
21  * Paul Falstad and the Zsh Development Group specifically disclaim any
22  * warranties, including, but not limited to, the implied warranties of
23  * merchantability and fitness for a particular purpose.  The software
24  * provided hereunder is on an "as is" basis, and Paul Falstad and the
25  * Zsh Development Group have no obligation to provide maintenance,
26  * support, updates, enhancements, or modifications.
27  *
28  */
29 
30 #include "zsh.mdh"
31 #include "subst.pro"
32 
33 #define LF_ARRAY	1
34 
35 /**/
36 char nulstring[] = {Nularg, '\0'};
37 
38 /* Check for array assignent with entries like [key]=val.
39  *
40  * Insert Marker node, convert following nodes to list to alternate key
41  * / val form, perform appropriate substitution, and return last
42  * inserted (value) node if found.
43  *
44  * Caller to check errflag.
45  */
46 
47 /**/
48 static LinkNode
keyvalpairelement(LinkList list,LinkNode node)49 keyvalpairelement(LinkList list, LinkNode node)
50 {
51     char *start, *end, *dat;
52 
53     if ((start = (char *)getdata(node)) &&
54 	start[0] == Inbrack &&
55 	(end = strchr(start+1, Outbrack)) &&
56 	/* ..]=value or ]+=Value */
57 	(end[1] == Equals ||
58 	 (end[1] == '+' && end[2] == Equals))) {
59 	static char marker[2] = { Marker, '\0' };
60 	static char marker_plus[3] = { Marker, '+', '\0' };
61 	*end = '\0';
62 
63 	dat = start + 1;
64 	singsub(&dat);
65 	untokenize(dat);
66 	if (end[1] == '+') {
67 	    setdata(node, marker_plus);
68 	    node = insertlinknode(list, node, dat);
69 	    dat = end + 3;
70 	} else {
71 	    setdata(node, marker);
72 	    node = insertlinknode(list, node, dat);
73 	    dat = end + 2;
74 	}
75 	singsub(&dat);
76 	untokenize(dat);
77 	return insertlinknode(list, node, dat);
78     }
79     return NULL;
80 }
81 
82 /* Do substitutions before fork. These are:
83  *  - Process substitution: <(...), >(...), =(...)
84  *  - Parameter substitution
85  *  - Command substitution
86  * Followed by
87  *  - Quote removal
88  *  - Brace expansion
89  *  - Tilde and equals substitution
90  *
91  * "flag"s contains PREFORK_* flags, defined in zsh.h.
92  *
93  * "ret_flags" is used to return PREFORK_* values from nested parameter
94  * substitutions.  It may be NULL in which case PREFORK_SUBEXP must not
95  * appear in flags; any return value from below will be discarded.
96  */
97 
98 /**/
99 mod_export void
prefork(LinkList list,int flags,int * ret_flags)100 prefork(LinkList list, int flags, int *ret_flags)
101 {
102     LinkNode node, insnode, stop = 0;
103     int keep = 0, asssub = (flags & PREFORK_TYPESET) && isset(KSHTYPESET);
104     int ret_flags_local = 0;
105     if (!ret_flags)
106 	ret_flags = &ret_flags_local; /* will be discarded */
107 
108     queue_signals();
109     node = firstnode(list);
110     while (node) {
111 	if ((flags & (PREFORK_SINGLE|PREFORK_ASSIGN)) == PREFORK_ASSIGN &&
112 	    (insnode = keyvalpairelement(list, node))) {
113 	    node = insnode;
114 	    incnode(node);
115 	    *ret_flags |= PREFORK_KEY_VALUE;
116 	    continue;
117 	}
118 	if (errflag) {
119 	    unqueue_signals();
120 	    return;
121 	}
122 	if (isset(SHFILEEXPANSION)) {
123 	    /*
124 	     * Here and below we avoid taking the address
125 	     * of a void * and then pretending it's a char **
126 	     * instead of a void ** by a little inefficiency.
127 	     * This could be avoided with some extra linked list
128 	     * machinery, but that would need quite a lot of work
129 	     * to ensure consistency.  What we really need is
130 	     * templates...
131 	     */
132 	    char *cptr = (char *)getdata(node);
133 	    filesub(&cptr, flags & (PREFORK_TYPESET|PREFORK_ASSIGN));
134 	    /*
135 	     * The assignment is so simple it's not worth
136 	     * testing if cptr changed...
137 	     */
138 	    setdata(node, cptr);
139 	}
140 	else
141 	{
142 	    if (!(node = stringsubst(list, node,
143 				     flags & ~(PREFORK_TYPESET|PREFORK_ASSIGN),
144 				     ret_flags, asssub))) {
145 		unqueue_signals();
146 		return;
147 	    }
148 	}
149 	incnode(node);
150     }
151     if (isset(SHFILEEXPANSION)) {
152 	/*
153 	 * stringsubst() may insert new nodes, so doesn't work
154 	 * well in the same loop as file expansion.
155 	 */
156 	for (node = firstnode(list); node; incnode(node)) {
157 	    if (!(node = stringsubst(list, node,
158 				     flags & ~(PREFORK_TYPESET|PREFORK_ASSIGN),
159 				     ret_flags, asssub))) {
160 		unqueue_signals();
161 		return;
162 	    }
163 	}
164     }
165     for (node = firstnode(list); node; incnode(node)) {
166 	if (node == stop)
167 	    keep = 0;
168 	if (*(char *)getdata(node)) {
169 	    remnulargs(getdata(node));
170 	    if (unset(IGNOREBRACES) && !(flags & PREFORK_SINGLE)) {
171 		if (!keep)
172 		    stop = nextnode(node);
173 		while (hasbraces(getdata(node))) {
174 		    keep = 1;
175 		    xpandbraces(list, &node);
176 		}
177 	    }
178 	    if (unset(SHFILEEXPANSION)) {
179 		char *cptr = (char *)getdata(node);
180 		filesub(&cptr, flags & (PREFORK_TYPESET|PREFORK_ASSIGN));
181 		setdata(node, cptr);
182 	    }
183 	} else if (!(flags & PREFORK_SINGLE) &&
184 		   !(*ret_flags & PREFORK_KEY_VALUE) &&
185 		   !keep)
186 	    uremnode(list, node);
187 	if (errflag) {
188 	    unqueue_signals();
189 	    return;
190 	}
191     }
192     unqueue_signals();
193 }
194 
195 /*
196  * Perform $'...' quoting.  The arguments are
197  *   strstart   The start of the string
198  *   pstrdpos   Initially, *pstrdpos is the position where the $ of the $'
199  *              occurs.  It will be updated to the next character after the
200  *              last ' of the $'...'.
201  * The return value is the entire allocated string from strstart on the heap.
202  * Note the original string may be modified in the process.
203  */
204 /**/
205 static char *
stringsubstquote(char * strstart,char ** pstrdpos)206 stringsubstquote(char *strstart, char **pstrdpos)
207 {
208     int len;
209     char *strdpos = *pstrdpos, *strsub, *strret;
210 
211     strsub = getkeystring(strdpos+2, &len,
212 			  GETKEYS_DOLLARS_QUOTE, NULL);
213     len += 2;			/* measured from strdpos */
214 
215     if (strstart != strdpos) {
216 	*strdpos = '\0';
217 	if (strdpos[len])
218 	    strret = zhtricat(strstart, strsub, strdpos + len);
219 	else
220 	    strret = dyncat(strstart, strsub);
221     } else if (strdpos[len])
222 	strret = dyncat(strsub, strdpos + len);
223     else if (*strsub)
224 	strret = strsub;
225     else {
226 	/* This ensures a $'' doesn't get elided. */
227 	strret = dupstring(nulstring);
228     }
229 
230     *pstrdpos = strret + (strdpos - strstart) + strlen(strsub);
231 
232     return strret;
233 }
234 
235 /**/
236 static LinkNode
stringsubst(LinkList list,LinkNode node,int pf_flags,int * ret_flags,int asssub)237 stringsubst(LinkList list, LinkNode node, int pf_flags, int *ret_flags,
238 	    int asssub)
239 {
240     int qt;
241     char *str3 = (char *)getdata(node);
242     char *str  = str3, c;
243 
244     while (!errflag && (c = *str)) {
245 	if (((c = *str) == Inang || c == OutangProc ||
246 	     (str == str3 && c == Equals))
247 	    && str[1] == Inpar) {
248 	    char *subst, *rest, *snew, *sptr;
249 	    int str3len = str - str3, sublen, restlen;
250 
251 	    if (c == Inang || c == OutangProc)
252 		subst = getproc(str, &rest);	/* <(...) or >(...) */
253 	    else
254 		subst = getoutputfile(str, &rest);	/* =(...) */
255 	    if (errflag)
256 		return NULL;
257 	    if (!subst)
258 		rest = subst = "";
259 
260 	    sublen = strlen(subst);
261 	    restlen = strlen(rest);
262 	    sptr = snew = hcalloc(str3len + sublen + restlen + 1);
263 	    if (str3len) {
264 		memcpy(sptr, str3, str3len);
265 		sptr += str3len;
266 	    }
267 	    if (sublen) {
268 		memcpy(sptr, subst, sublen);
269 		sptr += sublen;
270 	    }
271 	    if (restlen)
272 		memcpy(sptr, rest, restlen);
273 	    sptr[restlen] = '\0';
274 	    str3 = snew;
275 	    str = snew + str3len + sublen;
276 	    setdata(node, str3);
277 	} else
278 	    str++;
279     }
280     str = str3;
281 
282     while (!errflag && (c = *str)) {
283 	if ((qt = c == Qstring) || c == String) {
284 	    if ((c = str[1]) == Inpar || c == Inparmath) {
285 		if (!qt)
286 		    list->list.flags |= LF_ARRAY;
287 		str++;
288 		goto comsub;
289 	    } else if (c == Inbrack) {
290 		/* $[...] */
291 		char *str2 = str;
292 		str2++;
293 		if (skipparens(Inbrack, Outbrack, &str2)) {
294 		    zerr("closing bracket missing");
295 		    return NULL;
296 		}
297 		str2[-1] = *str = '\0';
298 		str = arithsubst(str + 2, &str3, str2);
299 		setdata(node, (void *) str3);
300 		continue;
301 	    } else if (c == Snull) {
302 		str3 = stringsubstquote(str3, &str);
303 		setdata(node, (void *) str3);
304 		continue;
305 	    } else {
306 		/*
307 		 * To avoid setting and unsetting the SHWORDSPLIT
308 		 * option, we pass flags if we need to control it for
309 		 * recursive expansion via multsub()
310 		 * If PREFORK_NOSHWORDSPLIT is set, the option is
311 		 * disregarded; otherwise, use it if set.
312 		 * If PREFORK_SPLIT is set, splitting is forced,
313 		 * regardless of the option
314 		 * If PREFORK_SHWORDSPLIT is already set, or used by the
315 		 * previous two to signal paramsubst(), we'll do
316 		 * sh-style wordsplitting on parameters.
317 		 */
318 		if ((isset(SHWORDSPLIT) &&
319 		     !(pf_flags & PREFORK_NOSHWORDSPLIT)) ||
320 		    (pf_flags & PREFORK_SPLIT))
321 		    pf_flags |= PREFORK_SHWORDSPLIT;
322 		node = paramsubst(
323 		    list, node, &str, qt,
324 		    pf_flags & (PREFORK_SINGLE|PREFORK_SHWORDSPLIT|
325 				PREFORK_SUBEXP), ret_flags);
326 		if (errflag || !node)
327 		    return NULL;
328 		str3 = (char *)getdata(node);
329 		continue;
330 	    }
331 	} else if ((qt = c == Qtick) || (c == Tick ? (list->list.flags |= LF_ARRAY) : 0))
332 	  comsub: {
333 	    LinkList pl;
334 	    char *s, *str2 = str;
335 	    char endchar;
336 	    int l1, l2;
337 
338 	    if (c == Inpar) {
339 		endchar = Outpar;
340 		str[-1] = '\0';
341 #ifdef DEBUG
342 		if (skipparens(Inpar, Outpar, &str))
343 		    dputs("BUG: parse error in command substitution");
344 #else
345 		skipparens(Inpar, Outpar, &str);
346 #endif
347 		str--;
348 	    } else if (c == Inparmath) {
349 		/*
350 		 * Math substitution of the form $((...)).
351 		 * These can be nested, for goodness sake...
352 		 */
353 		int mathpar = 1;
354 		str[-1] = '\0';
355 		while (mathpar && *str) {
356 		    str++;
357 		    if (*str == Outparmath)
358 			mathpar--;
359 		    else if (*str == Inparmath)
360 			mathpar++;
361 		}
362 		if (*str != Outparmath) {
363 		    zerr("failed to find end of math substitution");
364 		    return NULL;
365 		}
366 		str[-1] = '\0';
367 		if (isset(EXECOPT))
368 		    str = arithsubst(str2 + 2, &str3, str+1);
369 		else
370 		    strncpy(str3, str2, 1);
371 		setdata(node, (void *) str3);
372 		continue;
373 	    } else {
374 		endchar = c;
375 		*str = '\0';
376 
377 		while (*++str != endchar) {
378 		    if (!*str) {
379 			zerr("failed to find end of command substitution");
380 			return NULL;
381 		    }
382 		}
383 	    }
384 	    *str++ = '\0';
385 
386 	    /* It is a command substitution, which will be parsed again   *
387 	     * by the lexer, so we untokenize it first, but we cannot use *
388 	     * untokenize() since in the case of `...` some Bnulls should *
389 	     * be left unchanged.  Note that the lexer doesn't tokenize   *
390 	     * the body of a command substitution so if there are some    *
391 	     * tokens here they are from a ${(e)~...} substitution.       */
392 	    for (str = str2; (c = *++str); )
393 		if (itok(c) && c != Nularg &&
394 		    !(endchar != Outpar && c == Bnull &&
395 		      (str[1] == '$' || str[1] == '\\' || str[1] == '`' ||
396 		       (qt && str[1] == '"'))))
397 		    *str = ztokens[c - Pound];
398 	    str++;
399 	    if (!(pl = getoutput(str2 + 1, qt ||
400 				 (pf_flags & PREFORK_SINGLE)))) {
401 		zerr("parse error in command substitution");
402 		return NULL;
403 	    }
404 	    if (endchar == Outpar)
405 		str2--;
406 	    if (!(s = (char *) ugetnode(pl))) {
407 		str = (char *)memmove(str2, str, strlen(str)+1);
408 		continue;
409 	    }
410 	    if (!qt && (pf_flags & PREFORK_SINGLE) && isset(GLOBSUBST))
411 		shtokenize(s);
412 	    l1 = str2 - str3;
413 	    l2 = strlen(s);
414 	    if (nonempty(pl)) {
415 		LinkNode n = lastnode(pl);
416 		str2 = (char *) hcalloc(l1 + l2 + 1);
417 		strcpy(str2, str3);
418 		strcpy(str2 + l1, s);
419 		setdata(node, str2);
420 		insertlinklist(pl, node, list);
421 		s = (char *) getdata(node = n);
422 		l1 = 0;
423 		l2 = strlen(s);
424 	    }
425 	    str2 = (char *) hcalloc(l1 + l2 + strlen(str) + 1);
426 	    if (l1)
427 		strcpy(str2, str3);
428 	    strcpy(str2 + l1, s);
429 	    str = strcpy(str2 + l1 + l2, str);
430 	    str3 = str2;
431 	    setdata(node, str3);
432 	    continue;
433 	} else if (asssub && ((c == '=') || c == Equals) && str != str3) {
434 	    /*
435 	     * We are in a normal argument which looks like an assignment
436 	     * and is to be treated like one, with no word splitting.
437 	     */
438 	    pf_flags |= PREFORK_SINGLE;
439 	}
440 	str++;
441     }
442     return errflag ? NULL : node;
443 }
444 
445 /*
446  * Simplified version of the prefork/singsub processing where
447  * we only do substitutions appropriate to quoting.  Currently
448  * this means only the expansions in $'....'.  This is used
449  * for the end tag for here documents.  As we are not doing
450  * `...` expansions, we just use those for quoting.  However,
451  * they stay in the text.  This is weird, but that's not
452  * my fault.
453  *
454  * The remnulargs() makes this consistent with the other forms
455  * of substitution, indicating that quotes have been fully
456  * processed.
457  *
458  * The fully processed string is returned.
459  */
460 
461 /**/
462 char *
quotesubst(char * str)463 quotesubst(char *str)
464 {
465     char *s = str;
466 
467     while (*s) {
468 	if (*s == String && s[1] == Snull) {
469 	    str = stringsubstquote(str, &s);
470 	} else {
471 	    s++;
472 	}
473     }
474     remnulargs(str);
475     return str;
476 }
477 
478 /* Glob entries of a linked list.
479  *
480  * flags are from PREFORK_*, but only two are handled:
481  * - PREFORK_NO_UNTOK: pass into zglob() a flag saying do not untokenise.
482  * - PREFORK_KEY_VALUE: look out for Marker / Key / Value list triads
483  *   and don't glob them.  The key and value should already have
484  *   been untokenised as they are not subject to further expansion.
485  */
486 
487 /**/
488 mod_export void
globlist(LinkList list,int flags)489 globlist(LinkList list, int flags)
490 {
491     LinkNode node, next;
492 
493     badcshglob = 0;
494     for (node = firstnode(list); !errflag && node; node = next) {
495 	next = nextnode(node);
496 	if ((flags & PREFORK_KEY_VALUE) &&
497 	    *(char *)getdata(node) == Marker) {
498 	    /* Skip key / value pair */
499 	    next = nextnode(nextnode(next));
500 	} else {
501 	    zglob(list, node, (flags & PREFORK_NO_UNTOK) != 0);
502 	}
503     }
504     if (noerrs)
505 	badcshglob = 0;
506     else if (badcshglob == 1)
507 	zerr("no match");
508 }
509 
510 /* perform substitution on a single word */
511 
512 /**/
513 mod_export void
singsub(char ** s)514 singsub(char **s)
515 {
516     local_list1(foo);
517 
518     init_list1(foo, *s);
519 
520     prefork(&foo, PREFORK_SINGLE, NULL);
521     if (errflag)
522 	return;
523     *s = (char *) ugetnode(&foo);
524     DPUTS(nonempty(&foo), "BUG: singsub() produced more than one word!");
525 }
526 
527 /* Perform substitution on a single word, *s. Unlike with singsub(), the
528  * result can be more than one word. If split is non-zero, the string is
529  * first word-split using IFS, but only for non-quoted "whitespace" (as
530  * indicated by Dnull, Snull, Tick, Bnull, Inpar, and Outpar).
531  *
532  * If arg "a" was non-NULL and we got an array as a result of the parsing,
533  * the strings are stored in *a (even for a 1-element array) and *isarr is
534  * set to 1.  Otherwise, *isarr is set to 0, and the result is put into *s,
535  * with any necessary joining of multiple elements using sep (which can be
536  * NULL to use IFS).  The return value is true iff the expansion resulted
537  * in an empty list.
538  *
539  * *ms_flags is set to bits in the enum above as needed.
540  */
541 
542 /**/
543 static int
multsub(char ** s,int pf_flags,char *** a,int * isarr,char * sep,int * ms_flags)544 multsub(char **s, int pf_flags, char ***a, int *isarr, char *sep,
545 	int *ms_flags)
546 {
547     int l;
548     char **r, **p, *x = *s;
549     local_list1(foo);
550 
551     if (pf_flags & PREFORK_SPLIT) {
552 	/*
553 	 * This doesn't handle multibyte characters, but we're
554 	 * looking for whitespace separators which must be ASCII.
555 	 */
556 	for ( ; *x; x += l) {
557 	    char c = (l = *x == Meta) ? x[1] ^ 32 : *x;
558 	    l++;
559 	    if (!iwsep(STOUC(c)))
560 		break;
561 	    *ms_flags |= MULTSUB_WS_AT_START;
562 	}
563     }
564 
565     init_list1(foo, x);
566 
567     if (pf_flags & PREFORK_SPLIT) {
568 	LinkNode n = firstnode(&foo);
569 	int inq = 0, inp = 0;
570 	MB_METACHARINIT();
571 	for ( ; *x; x += l) {
572 	    int rawc = -1;
573 	    convchar_t c;
574 	    if (*x == Dash)
575 		*x = '-';
576 	    if (itok(STOUC(*x))) {
577 		/* token, can't be separator, must be single byte */
578 		rawc = *x;
579 		l = 1;
580 	    } else {
581 		l = MB_METACHARLENCONV(x, &c);
582 		if (!inq && !inp && WC_ZISTYPE(c, ISEP)) {
583 		    *x = '\0';
584 		    for (x += l; *x; x += l) {
585 			if (itok(STOUC(*x))) {
586 			    /* as above */
587 			    rawc = *x;
588 			    l = 1;
589 			    break;
590 			}
591 			l = MB_METACHARLENCONV(x, &c);
592 			if (!WC_ZISTYPE(c, ISEP))
593 			    break;
594 		    }
595 		    if (!*x) {
596 			*ms_flags |= MULTSUB_WS_AT_END;
597 			break;
598 		    }
599 		    insertlinknode(&foo, n, (void *)x), incnode(n);
600 		}
601 	    }
602 	    switch (rawc) {
603 	    case Dnull:  /* " */
604 	    case Snull:  /* ' */
605 	    case Tick:   /* ` (note: no Qtick!) */
606 		/* These always occur in unnested pairs. */
607 		inq = !inq;
608 		break;
609 	    case Inpar:  /* ( */
610 		inp++;
611 		break;
612 	    case Outpar: /* ) */
613 		inp--;
614 		break;
615 	    case Bnull:  /* \ */
616 	    case Bnullkeep:
617 		/* The parser verified the following char's existence. */
618 		x += l;
619 		l = MB_METACHARLEN(x);
620 		break;
621 	    }
622 	}
623     }
624 
625     prefork(&foo, pf_flags, ms_flags);
626     if (errflag) {
627 	if (isarr)
628 	    *isarr = 0;
629 	return 0;
630     }
631 
632     if ((l = countlinknodes(&foo)) > 1 || (foo.list.flags & LF_ARRAY && a)) {
633 	p = r = hcalloc((l + 1) * sizeof(char*));
634 	while (nonempty(&foo))
635 	    *p++ = (char *)ugetnode(&foo);
636 	*p = NULL;
637 	/* We need a way to figure out if a one-item result was a scalar
638 	 * or a single-item array.  The parser will have set LF_ARRAY
639 	 * in the latter case, allowing us to return it as an array to
640 	 * our caller (if they provided for that result). */
641 	if (a && (l > 1 || foo.list.flags & LF_ARRAY)) {
642 	    *a = r;
643 	    *isarr = SCANPM_MATCHMANY;
644 	    return 0;
645 	}
646 	*s = sepjoin(r, sep, 1);
647 	if (isarr)
648 	    *isarr = 0;
649 	return 0;
650     }
651     if (l)
652 	*s = (char *) ugetnode(&foo);
653     else
654 	*s = dupstring("");
655     if (isarr)
656 	*isarr = 0;
657     return !l;
658 }
659 
660 /*
661  * ~, = subs: assign & PREFORK_TYPESET => typeset or magic equals
662  *            assign & PREFORK_ASSIGN => normal assignment
663  */
664 
665 /**/
666 mod_export void
filesub(char ** namptr,int assign)667 filesub(char **namptr, int assign)
668 {
669     char *eql = NULL, *sub = NULL, *str, *ptr;
670     int len;
671 
672     filesubstr(namptr, assign);
673 
674     if (!assign)
675 	return;
676 
677     if (assign & PREFORK_TYPESET) {
678 	if ((*namptr)[1] && (eql = sub = strchr(*namptr + 1, Equals))) {
679 	    str = sub + 1;
680 	    if ((sub[1] == Tilde || sub[1] == Equals) && filesubstr(&str, assign)) {
681 		sub[1] = '\0';
682 		*namptr = dyncat(*namptr, str);
683 	    }
684 	} else
685 	    return;
686     }
687 
688     ptr = *namptr;
689     while ((sub = strchr(ptr, ':'))) {
690 	str = sub + 1;
691 	len = sub - *namptr;
692 	if (sub > eql &&
693 	    (sub[1] == Tilde || sub[1] == Equals) &&
694 	    filesubstr(&str, assign)) {
695 	    sub[1] = '\0';
696 	    *namptr = dyncat(*namptr, str);
697 	}
698 	ptr = *namptr + len + 1;
699     }
700 }
701 
702 #define isend(c) ( !(c) || (c)=='/' || (c)==Inpar || (assign && (c)==':') )
703 #define isend2(c) ( !(c) || (c)==Inpar || (assign && (c)==':') )
704 
705 /*
706  * do =foo substitution, or equivalent.
707  * on entry, str should point to the "foo".
708  * if assign, this is in an assignment
709  * if nomatch, report hard error on failure.
710  * if successful, returns the expansion, else NULL.
711  */
712 
713 /**/
714 char *
equalsubstr(char * str,int assign,int nomatch)715 equalsubstr(char *str, int assign, int nomatch)
716 {
717     char *pp, *cnam, *cmdstr;
718 
719     for (pp = str; !isend2(*pp); pp++)
720 	;
721     cmdstr = dupstrpfx(str, pp-str);
722     untokenize(cmdstr);
723     remnulargs(cmdstr);
724     if (!(cnam = findcmd(cmdstr, 1, 0))) {
725 	if (nomatch)
726 	    zerr("%s not found", cmdstr);
727 	return NULL;
728     }
729     if (*pp)
730 	return dyncat(cnam, pp);
731     else
732 	return cnam;		/* already duplicated */
733 }
734 
735 /**/
736 mod_export int
filesubstr(char ** namptr,int assign)737 filesubstr(char **namptr, int assign)
738 {
739     char *str = *namptr;
740 
741     if (*str == Tilde && str[1] != '=' && str[1] != Equals) {
742 	char *ptr, *tmp, *res, *ptr2;
743 	int val;
744 
745 	if (str[1] == Dash)
746 	    str[1] = '-';
747 	val = zstrtol(str + 1, &ptr, 10);
748 	if (isend(str[1])) {   /* ~ */
749 	    *namptr = dyncat(home ? home : "", str + 1);
750 	    return 1;
751 	} else if (str[1] == '+' && isend(str[2])) {   /* ~+ */
752 	    *namptr = dyncat(pwd, str + 2);
753 	    return 1;
754 	} else if (str[1] == '-' && isend(str[2])) {   /* ~- */
755 	    *namptr = dyncat((tmp = oldpwd) ? tmp : pwd, str + 2);
756 	    return 1;
757 	} else if (str[1] == Inbrack &&
758 		   (ptr2 = strchr(str+2, Outbrack))) {
759 	    char **arr;
760 	    untokenize(tmp = dupstrpfx(str+2, ptr2 - (str+2)));
761 	    remnulargs(tmp);
762 	    arr = subst_string_by_hook("zsh_directory_name", "n", tmp);
763 	    res = arr ? *arr : NULL;
764 	    if (res) {
765 		*namptr = dyncat(res, ptr2+1);
766 		return 1;
767 	    }
768 	    if (isset(NOMATCH))
769 		zerr("no directory expansion: ~[%s]", tmp);
770 	    return 0;
771 	} else if (!inblank(str[1]) && isend(*ptr) &&
772 		   (!idigit(str[1]) || (ptr - str < 4))) {
773 	    char *ds;
774 
775 	    if (val < 0)
776 		val = -val;
777 	    ds = dstackent(str[1], val);
778 	    if (!ds)
779 		return 0;
780 	    *namptr = dyncat(ds, ptr);
781 	    return 1;
782 	} else if ((ptr = itype_end(str+1, IUSER, 0)) != str+1) {   /* ~foo */
783 	    char *untok, *hom;
784 
785 	    if (!isend(*ptr))
786 		return 0;
787 	    untok = dupstring(++str);
788 	    untok[ptr-str] = 0;
789 	    untokenize(untok);
790 
791 	    if (!(hom = getnameddir(untok))) {
792 		if (isset(NOMATCH) && isset(EXECOPT))
793 		    zerr("no such user or named directory: %s", untok);
794 		return 0;
795 	    }
796 	    *namptr = dyncat(hom, ptr);
797 	    return 1;
798 	}
799     } else if (*str == Equals && isset(EQUALS) && str[1]) {   /* =foo */
800 	char *expn = equalsubstr(str+1, assign, isset(NOMATCH));
801 	if (expn) {
802 	    *namptr = expn;
803 	    return 1;
804 	}
805     }
806     return 0;
807 }
808 
809 #undef isend
810 #undef isend2
811 
812 /**/
813 static char *
strcatsub(char ** d,char * pb,char * pe,char * src,int l,char * s,int glbsub,int copied)814 strcatsub(char **d, char *pb, char *pe, char *src, int l, char *s, int glbsub,
815 	  int copied)
816 {
817     char *dest;
818     int pl = pe - pb;
819 
820     if (!pl && (!s || !*s)) {
821 	*d = dest = (copied ? src : dupstring(src));
822 	if (glbsub)
823 	    shtokenize(dest);
824     } else {
825 	*d = dest = hcalloc(pl + l + (s ? strlen(s) : 0) + 1);
826 	strncpy(dest, pb, pl);
827 	dest += pl;
828 	strcpy(dest, src);
829 	if (glbsub)
830 	    shtokenize(dest);
831 	dest += l;
832 	if (s)
833 	    strcpy(dest, s);
834     }
835     return dest;
836 }
837 
838 #ifdef MULTIBYTE_SUPPORT
839 #define WCPADWIDTH(cchar, mw)	wcpadwidth(cchar, mw)
840 
841 /*
842  * Width of character for padding purposes.
843  * 0: all characters count 1.
844  * 1: use width of multibyte character.
845  * 2: non-zero width characters count 1, zero width 0.
846  */
847 static int
wcpadwidth(wchar_t wc,int multi_width)848 wcpadwidth(wchar_t wc, int multi_width)
849 {
850     int width;
851 
852     switch (multi_width)
853     {
854     case 0:
855 	return 1;
856 
857     case 1:
858 	width = WCWIDTH(wc);
859 	if (width >= 0)
860 	    return width;
861 	return 0;
862 
863     default:
864 	return WCWIDTH(wc) > 0 ? 1 : 0;
865     }
866 }
867 
868 #else
869 #define WCPADWIDTH(cchar, mw)	(1)
870 #endif
871 
872 /*
873  * Pad the string str, returning a result from the heap (or str itself,
874  * if it didn't need padding).  If str is too large, it will be truncated.
875  * Calculations are in terms of width if MULTIBYTE is in effect and
876  * multi_width is non-zero, else characters.
877  *
878  * prenum and postnum are the width to which the string needs padding
879  * on the left and right.
880  *
881  * preone and postone are string to insert once only before and after
882  * str.  They will be truncated on the left or right, respectively,
883  * if necessary to fit the width.  Either or both may be NULL in which
884  * case they will not be used.
885  *
886  * premul and postmul are the padding strings to be repeated before
887  * on the left (if prenum is non-zero) and right (if postnum is non-zero).  If
888  * NULL the first character of IFS (typically but not necessarily a space)
889  * will be used.
890  */
891 
892 static char *
dopadding(char * str,int prenum,int postnum,char * preone,char * postone,char * premul,char * postmul,int multi_width)893 dopadding(char *str, int prenum, int postnum, char *preone, char *postone,
894 	  char *premul, char *postmul
895 #ifdef MULTIBYTE_SUPPORT
896 	  , int multi_width
897 #endif
898     )
899 {
900     char *def, *ret, *t, *r;
901     int ls, ls2, lpreone, lpostone, lpremul, lpostmul, lr, f, m, c, cc, cl;
902     convchar_t cchar;
903 
904     MB_METACHARINIT();
905     if (!ifs || *ifs) {
906 	char *tmpifs = ifs ? ifs : DEFAULT_IFS;
907 	def = dupstrpfx(tmpifs, MB_METACHARLEN(tmpifs));
908     } else
909 	def = "";
910     if (preone && !*preone)
911 	preone = def;
912     if (postone && !*postone)
913 	postone = def;
914     if (!premul || !*premul)
915 	premul = def;
916     if (!postmul || !*postmul)
917 	postmul = def;
918 
919     ls = MB_METASTRLEN2(str, multi_width);
920     lpreone = preone ? MB_METASTRLEN2(preone, multi_width) : 0;
921     lpostone = postone ? MB_METASTRLEN2(postone, multi_width) : 0;
922     lpremul = MB_METASTRLEN2(premul, multi_width);
923     lpostmul = MB_METASTRLEN2(postmul, multi_width);
924 
925     if (prenum + postnum == ls)
926 	return str;
927 
928     /*
929      * Try to be careful with allocated lengths.  The following
930      * is a maximum, in case we need the entire repeated string
931      * for each repetition.  We probably don't, but in case the user
932      * has given us something pathological which doesn't convert
933      * easily into a width we'd better be safe.
934      */
935     lr = strlen(str) + strlen(premul) * prenum + strlen(postmul) * postnum;
936     /*
937      * Same logic for preone and postone, except those may be NULL.
938      */
939     if (preone)
940 	lr += strlen(preone);
941     if (postone)
942 	lr += strlen(postone);
943     r = ret = (char *)zhalloc(lr + 1);
944 
945     if (prenum) {
946 	/*
947 	 * Pad on the left.
948 	 */
949 	if (postnum) {
950 	    /*
951 	     * Pad on both right and left.
952 	     * The strategy is to divide the string into two halves.
953 	     * The first half is dealt with by the left hand padding
954 	     * code, the second by the right hand.
955 	     */
956 	    ls2 = ls / 2;
957 
958 	    /* The width left to pad for the first half. */
959 	    f = prenum - ls2;
960 	    if (f <= 0) {
961 		/* First half doesn't fit.  Skip the first -f width. */
962 		f = -f;
963 		MB_METACHARINIT();
964 		while (f > 0) {
965 		    cl = MB_METACHARLENCONV(str, &cchar);
966 		    if (!cl)
967 			break;
968 		    str += cl;
969 		    f -= WCPADWIDTH(cchar, multi_width);
970 		}
971 		/* Now finish the first half. */
972 		for (c = prenum; c > 0; ) {
973 		    cl = MB_METACHARLENCONV(str, &cchar);
974 		    if (!cl)
975 			break;
976 		    while (cl--)
977 			*r++ = *str++;
978 		    c -= WCPADWIDTH(cchar, multi_width);
979 		}
980 	    } else {
981 		if (f <= lpreone) {
982 		    if (preone) {
983 			/*
984 			 * The unrepeated string doesn't fit.
985 			 */
986 			MB_METACHARINIT();
987 			/* The width we need to skip */
988 			f = lpreone - f;
989 			/* So skip. */
990 			for (t = preone; f > 0; ) {
991 			    cl = MB_METACHARLENCONV(t, &cchar);
992 			    if (!cl)
993 				break;
994 			    t += cl;
995 			    f -= WCPADWIDTH(cchar, multi_width);
996 			}
997 			/* Then copy the entire remainder. */
998 			while (*t)
999 			    *r++ = *t++;
1000 		    }
1001 		} else {
1002 		    f -= lpreone;
1003 		    if (lpremul) {
1004 			if ((m = f % lpremul)) {
1005 			    /*
1006 			     * Left over fraction of repeated string.
1007 			     */
1008 			    MB_METACHARINIT();
1009 			    /* Skip this much. */
1010 			    m = lpremul - m;
1011 			    for (t = premul; m > 0; ) {
1012 				cl = MB_METACHARLENCONV(t, &cchar);
1013 				if (!cl)
1014 				    break;
1015 				t += cl;
1016 				m -= WCPADWIDTH(cchar, multi_width);
1017 			    }
1018 			    /* Output the rest. */
1019 			    while (*t)
1020 				*r++ = *t++;
1021 			}
1022 			for (cc = f / lpremul; cc--;) {
1023 			    /* Repeat the repeated string */
1024 			    MB_METACHARINIT();
1025 			    for (c = lpremul, t = premul; c > 0; ) {
1026 				cl = MB_METACHARLENCONV(t, &cchar);
1027 				if (!cl)
1028 				    break;
1029 				while (cl--)
1030 				    *r++ = *t++;
1031 				c -= WCPADWIDTH(cchar, multi_width);
1032 			    }
1033 			}
1034 		    }
1035 		    if (preone) {
1036 			/* Output the full unrepeated string */
1037 			while (*preone)
1038 			    *r++ = *preone++;
1039 		    }
1040 		}
1041 		/* Output the first half width of the original string. */
1042 		for (c = ls2; c > 0; ) {
1043 		    cl = MB_METACHARLENCONV(str, &cchar);
1044 		    if (!cl)
1045 			break;
1046 		    c -= WCPADWIDTH(cchar, multi_width);
1047 		    while (cl--)
1048 			*r++ = *str++;
1049 		}
1050 	    }
1051 	    /* Other half.  In case the string had an odd length... */
1052 	    ls2 = ls - ls2;
1053 	    /* Width that needs padding... */
1054 	    f = postnum - ls2;
1055 	    if (f <= 0) {
1056 		/* ...is negative, truncate original string */
1057 		MB_METACHARINIT();
1058 		for (c = postnum; c > 0; ) {
1059 		    cl = MB_METACHARLENCONV(str, &cchar);
1060 		    if (!cl)
1061 			break;
1062 		    c -= WCPADWIDTH(cchar, multi_width);
1063 		    while (cl--)
1064 			*r++ = *str++;
1065 		}
1066 	    } else {
1067 		/* Rest of original string fits, output it complete */
1068 		while (*str)
1069 		    *r++ = *str++;
1070 		if (f <= lpostone) {
1071 		    if (postone) {
1072 			/* Can't fit unrepeated string, truncate it */
1073 			for (c = f; c > 0; ) {
1074 			    cl = MB_METACHARLENCONV(postone, &cchar);
1075 			    if (!cl)
1076 				break;
1077 			    c -= WCPADWIDTH(cchar, multi_width);
1078 			    while (cl--)
1079 				*r++ = *postone++;
1080 			}
1081 		    }
1082 		} else {
1083 		    if (postone) {
1084 			f -= lpostone;
1085 			/* Output entire unrepeated string */
1086 			while (*postone)
1087 			    *r++ = *postone++;
1088 		    }
1089 		    if (lpostmul) {
1090 			for (cc = f / lpostmul; cc--;) {
1091 			    /* Begin the beguine */
1092 			    for (t = postmul; *t; )
1093 				*r++ = *t++;
1094 			}
1095 			if ((m = f % lpostmul)) {
1096 			    /* Fill leftovers with chunk of repeated string */
1097 			    MB_METACHARINIT();
1098 			    while (m > 0) {
1099 				cl = MB_METACHARLENCONV(postmul, &cchar);
1100 				if (!cl)
1101 				    break;
1102 				m -= WCPADWIDTH(cchar, multi_width);
1103 				while (cl--)
1104 				    *r++ = *postmul++;
1105 			    }
1106 			}
1107 		    }
1108 		}
1109 	    }
1110 	} else {
1111 	    /*
1112 	     * Pad only on the left.
1113 	     */
1114 	    f = prenum - ls;
1115 	    if (f <= 0) {
1116 		/*
1117 		 * Original string is at least as wide as padding.
1118 		 * Truncate original string to width.
1119 		 * Truncate on left, so skip the characters we
1120 		 * don't need.
1121 		 */
1122 		f = -f;
1123 		MB_METACHARINIT();
1124 		while (f > 0) {
1125 		    cl = MB_METACHARLENCONV(str, &cchar);
1126 		    if (!cl)
1127 			break;
1128 		    str += cl;
1129 		    f -= WCPADWIDTH(cchar, multi_width);
1130 		}
1131 		/* Copy the rest of the original string */
1132 		for (c = prenum; c > 0; ) {
1133 		    cl = MB_METACHARLENCONV(str, &cchar);
1134 		    if (!cl)
1135 			break;
1136 		    while (cl--)
1137 			*r++ = *str++;
1138 		    c -= WCPADWIDTH(cchar, multi_width);
1139 		}
1140 	    } else {
1141 		/*
1142 		 * We can fit the entire string...
1143 		 */
1144 		if (f <= lpreone) {
1145 		    if (preone) {
1146 			/*
1147 			 * ...with some fraction of the unrepeated string.
1148 			 */
1149 			/* We need this width of characters. */
1150 			c = f;
1151 			/*
1152 			 * We therefore need to skip this width of
1153 			 * characters.
1154 			 */
1155 			f = lpreone - f;
1156 			MB_METACHARINIT();
1157 			for (t = preone; f > 0; ) {
1158 			    cl = MB_METACHARLENCONV(t, &cchar);
1159 			    if (!cl)
1160 				break;
1161 			    t += cl;
1162 			    f -= WCPADWIDTH(cchar, multi_width);
1163 			}
1164 			/* Copy the rest of preone */
1165 			while (*t)
1166 			    *r++ = *t++;
1167 		    }
1168 		} else {
1169 		    /*
1170 		     * We can fit the whole of preone, needing this width
1171 		     * first
1172 		     */
1173 		    f -= lpreone;
1174 		    if (lpremul) {
1175 			if ((m = f % lpremul)) {
1176 			    /*
1177 			     * Some fraction of the repeated string needed.
1178 			     */
1179 			    /* Need this much... */
1180 			    c = m;
1181 			    /* ...skipping this much first. */
1182 			    m = lpremul - m;
1183 			    MB_METACHARINIT();
1184 			    for (t = premul; m > 0; ) {
1185 				cl = MB_METACHARLENCONV(t, &cchar);
1186 				if (!cl)
1187 				    break;
1188 				t += cl;
1189 				m -= WCPADWIDTH(cchar, multi_width);
1190 			    }
1191 			    /* Now the rest of the repeated string. */
1192 			    while (c > 0) {
1193 				cl = MB_METACHARLENCONV(t, &cchar);
1194 				if (!cl)
1195 				    break;
1196 				while (cl--)
1197 				    *r++ = *t++;
1198 				c -= WCPADWIDTH(cchar, multi_width);
1199 			    }
1200 			}
1201 			for (cc = f / lpremul; cc--;) {
1202 			    /*
1203 			     * Repeat the repeated string.
1204 			     */
1205 			    MB_METACHARINIT();
1206 			    for (c = lpremul, t = premul; c > 0; ) {
1207 				cl = MB_METACHARLENCONV(t, &cchar);
1208 				if (!cl)
1209 				    break;
1210 				while (cl--)
1211 				    *r++ = *t++;
1212 				c -= WCPADWIDTH(cchar, multi_width);
1213 			    }
1214 			}
1215 		    }
1216 		    if (preone) {
1217 			/*
1218 			 * Now the entire unrepeated string.  Don't
1219 			 * count the width, just dump it.  This is
1220 			 * significant if there are special characters
1221 			 * in this string.  It's sort of a historical
1222 			 * accident that this worked, but there's nothing
1223 			 * to stop us just dumping the thing out and assuming
1224 			 * the user knows what they're doing.
1225 			 */
1226 			while (*preone)
1227 			    *r++ = *preone++;
1228 		    }
1229 		}
1230 		/* Now the string being padded */
1231 		while (*str)
1232 		    *r++ = *str++;
1233 	    }
1234 	}
1235     } else if (postnum) {
1236 	/*
1237 	 * Pad on the right.
1238 	 */
1239 	f = postnum - ls;
1240 	MB_METACHARINIT();
1241 	if (f <= 0) {
1242 	    /*
1243 	     * Original string is at least as wide as padding.
1244 	     * Truncate original string to width.
1245 	     */
1246 	    for (c = postnum; c > 0; ) {
1247 		cl = MB_METACHARLENCONV(str, &cchar);
1248 		if (!cl)
1249 		    break;
1250 		while (cl--)
1251 		    *r++ = *str++;
1252 		c -= WCPADWIDTH(cchar, multi_width);
1253 	    }
1254 	} else {
1255 	    /*
1256 	     * There's some space to fill.  First copy the original
1257 	     * string, counting the width.  Make sure we copy the
1258 	     * entire string.
1259 	     */
1260 	    for (c = ls; *str; ) {
1261 		cl = MB_METACHARLENCONV(str, &cchar);
1262 		if (!cl)
1263 		    break;
1264 		while (cl--)
1265 		    *r++ = *str++;
1266 		c -= WCPADWIDTH(cchar, multi_width);
1267 	    }
1268 	    MB_METACHARINIT();
1269 	    if (f <= lpostone) {
1270 		if (postone) {
1271 		    /*
1272 		     * Not enough or only just enough space to fit
1273 		     * the unrepeated string.  Truncate as necessary.
1274 		     */
1275 		    for (c = f; c > 0; ) {
1276 			cl = MB_METACHARLENCONV(postone, &cchar);
1277 			if (!cl)
1278 			    break;
1279 			while (cl--)
1280 			    *r++ = *postone++;
1281 			c -= WCPADWIDTH(cchar, multi_width);
1282 		    }
1283 		}
1284 	    } else {
1285 		if (postone) {
1286 		    f -= lpostone;
1287 		    /* Copy the entire unrepeated string */
1288 		    for (c = lpostone; *postone; ) {
1289 			cl = MB_METACHARLENCONV(postone, &cchar);
1290 			if (!cl)
1291 			    break;
1292 			while (cl--)
1293 			    *r++ = *postone++;
1294 			c -= WCPADWIDTH(cchar, multi_width);
1295 		    }
1296 		}
1297 		if (lpostmul) {
1298 		    /* Repeat the repeated string */
1299 		    for (cc = f / lpostmul; cc--;) {
1300 			MB_METACHARINIT();
1301 			for (c = lpostmul, t = postmul; *t; ) {
1302 			    cl = MB_METACHARLENCONV(t, &cchar);
1303 			    if (!cl)
1304 				break;
1305 			    while (cl--)
1306 				*r++ = *t++;
1307 			    c -= WCPADWIDTH(cchar, multi_width);
1308 			}
1309 		    }
1310 		    /*
1311 		     * See if there's any fraction of the repeated
1312 		     * string needed to fill up the remaining space.
1313 		     */
1314 		    if ((m = f % lpostmul)) {
1315 			MB_METACHARINIT();
1316 			while (m > 0) {
1317 			    cl = MB_METACHARLENCONV(postmul, &cchar);
1318 			    if (!cl)
1319 				break;
1320 			    while (cl--)
1321 				*r++ = *postmul++;
1322 			    m -= WCPADWIDTH(cchar, multi_width);
1323 			}
1324 		    }
1325 		}
1326 	    }
1327 	}
1328     }
1329     *r = '\0';
1330 
1331     return ret;
1332 }
1333 
1334 
1335 /*
1336  * Look for a delimited portion of a string.  The first (possibly
1337  * multibyte) character at s is the delimiter.  Various forms
1338  * of brackets are treated separately, as documented.
1339  *
1340  * Returns a pointer to the final delimiter.  Sets *len to the
1341  * length of the final delimiter; a NULL causes *len to be set
1342  * to zero since we shouldn't advance past it.  (The string is
1343  * tokenized, so a NULL is a real end of string.)
1344  */
1345 
1346 /**/
1347 char *
get_strarg(char * s,int * lenp)1348 get_strarg(char *s, int *lenp)
1349 {
1350     convchar_t del;
1351     int len;
1352     char ctok = 0;
1353 
1354     MB_METACHARINIT();
1355     len = MB_METACHARLENCONV(s, &del);
1356     if (!len) {
1357 	*lenp = 0;
1358 	return s;
1359     }
1360 
1361 #ifdef MULTIBYTE_SUPPORT
1362     if (del == WEOF)
1363 	del = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
1364 #endif
1365     s += len;
1366     switch (del) {
1367     case ZWC('('):
1368 	del = ZWC(')');
1369 	break;
1370     case '[':
1371 	del = ZWC(']');
1372 	break;
1373     case '{':
1374 	del = ZWC('}');
1375 	break;
1376     case '<':
1377 	del = ZWC('>');
1378 	break;
1379     case Inpar:
1380 	ctok = Outpar;
1381 	break;
1382     case Inang:
1383 	ctok = Outang;
1384 	break;
1385     case Inbrace:
1386 	ctok = Outbrace;
1387 	break;
1388     case Inbrack:
1389 	ctok = Outbrack;
1390 	break;
1391     }
1392 
1393     if (ctok) {
1394 	/*
1395 	 * Looking for a matching token; we want the literal byte,
1396 	 * not a decoded multibyte character, so search specially.
1397 	 */
1398 	while (*s && *s != ctok)
1399 	    s++;
1400     } else {
1401 	convchar_t del2;
1402 	len = 0;
1403 	while (*s) {
1404 	    len = MB_METACHARLENCONV(s, &del2);
1405 #ifdef MULTIBYTE_SUPPORT
1406 	    if (del2 == WEOF)
1407 		del2 = (wint_t)((*s == Meta) ? s[1] ^ 32 : *s);
1408 #endif
1409 	    if (del == del2)
1410 		break;
1411 	    s += len;
1412 	}
1413     }
1414 
1415     *lenp = len;
1416     return s;
1417 }
1418 
1419 /*
1420  * Get an integer argument; update *s to the end of the
1421  * final delimiter.  *delmatchp is set to the length of the
1422  * matched delimiter if we have matching, delimiters and there was no error in
1423  * the evaluation, else 0.
1424  */
1425 
1426 /**/
1427 static int
get_intarg(char ** s,int * delmatchp)1428 get_intarg(char **s, int *delmatchp)
1429 {
1430     int arglen;
1431     char *t = get_strarg(*s, &arglen);
1432     char *p, sav;
1433     zlong ret;
1434 
1435     *delmatchp = 0;
1436     if (!*t)
1437 	return -1;
1438     sav = *t;
1439     *t = '\0';
1440     p = dupstring(*s + arglen);
1441     *s = t + arglen;
1442     *t = sav;
1443     if (parsestr(&p))
1444 	return -1;
1445     singsub(&p);
1446     if (errflag)
1447 	return -1;
1448     ret = mathevali(p);
1449     if (errflag)
1450 	return -1;
1451     if (ret < 0)
1452 	ret = -ret;
1453     *delmatchp = arglen;
1454     return ret;
1455 }
1456 
1457 /* Parsing for the (e) flag. */
1458 
1459 static int
subst_parse_str(char ** sp,int single,int err)1460 subst_parse_str(char **sp, int single, int err)
1461 {
1462     char *s;
1463 
1464     *sp = s = dupstring(*sp);
1465 
1466     if (!(err ? parsestr(&s) : parsestrnoerr(&s))) {
1467 	*sp = s;
1468 	if (!single) {
1469             int qt = 0;
1470 
1471 	    for (; *s; s++) {
1472 		if (!qt) {
1473 		    if (*s == Qstring)
1474 			*s = String;
1475 		    else if (*s == Qtick)
1476 			*s = Tick;
1477                 }
1478 		if (*s == Dnull)
1479                     qt = !qt;
1480 	    }
1481 	}
1482 	return 0;
1483     }
1484     return 1;
1485 }
1486 
1487 /* Evaluation for (#) flag */
1488 
1489 static char *
substevalchar(char * ptr)1490 substevalchar(char *ptr)
1491 {
1492     zlong ires = mathevali(ptr);
1493     int len = 0;
1494 
1495     if (errflag)
1496 	return NULL;
1497 #ifdef MULTIBYTE_SUPPORT
1498     if (isset(MULTIBYTE) && ires > 127) {
1499 	/* '\\' + 'U' + 8 bytes of character + '\0' */
1500 	char buf[11];
1501 
1502 	/* inefficient: should separate out \U handling from getkeystring */
1503 	sprintf(buf, "\\U%.8x", (unsigned int)ires & 0xFFFFFFFFu);
1504 	ptr = getkeystring(buf, &len, GETKEYS_BINDKEY, NULL);
1505     }
1506     if (len == 0)
1507 #endif
1508     {
1509 	ptr = zhalloc(2);
1510 	len = 1;
1511 	sprintf(ptr, "%c", (int)ires);
1512     }
1513     return metafy(ptr, len, META_USEHEAP);
1514 }
1515 
1516 /*
1517  * Helper function for arguments to parameter flags which
1518  * handles the (p) and (~) flags as escapes and tok_arg respectively.
1519  */
1520 
1521 static char *
untok_and_escape(char * s,int escapes,int tok_arg)1522 untok_and_escape(char *s, int escapes, int tok_arg)
1523 {
1524     int klen;
1525     char *dst = NULL;
1526 
1527     if (escapes && (*s == String || *s == Qstring) && s[1]) {
1528 	char *pstart = s+1, *pend;
1529 	for (pend = pstart; *pend; pend++)
1530 	    if (!iident(*pend))
1531 		break;
1532 	if (!*pend) {
1533 	    dst = dupstring(getsparam(pstart));
1534 	}
1535     }
1536     if (dst == NULL) {
1537 	untokenize(dst = dupstring(s));
1538 	if (escapes) {
1539 	    dst = getkeystring(dst, &klen, GETKEYS_SEP, NULL);
1540 	    dst = metafy(dst, klen, META_HREALLOC);
1541 	}
1542     }
1543     if (tok_arg)
1544 	shtokenize(dst);
1545     return dst;
1546 }
1547 
1548 /*
1549  * See if an argument str looks like a subscript or length following
1550  * a colon and parse it.  It must be followed by a ':' or nothing.
1551  * If this succeeds, expand and return the evaluated expression if
1552  * found, else return NULL.
1553  *
1554  * We assume this is what is meant if the first character is not
1555  * an alphabetic character or '&', which signify modifiers.
1556  *
1557  * Set *endp to point to the next character following.
1558  */
1559 static char *
check_colon_subscript(char * str,char ** endp)1560 check_colon_subscript(char *str, char **endp)
1561 {
1562     int sav;
1563 
1564     /* Could this be a modifier (or empty)? */
1565     if (!*str || ialpha(*str) || *str == '&')
1566 	return NULL;
1567 
1568     *endp = parse_subscript(str, 0, ':');
1569     if (!*endp) {
1570 	/* No trailing colon? */
1571 	*endp = parse_subscript(str, 0, '\0');
1572 	if (!*endp)
1573 	    return NULL;
1574     }
1575     sav = **endp;
1576     **endp = '\0';
1577     str = dupstring(str);
1578     if (parsestr(&str))
1579 	return NULL;
1580     singsub(&str);
1581     remnulargs(str);
1582     untokenize(str);
1583 
1584     **endp = sav;
1585     return str;
1586 }
1587 
1588 /* parameter substitution */
1589 
1590 #define	isstring(c) ((c) == '$' || (char)(c) == String || (char)(c) == Qstring)
1591 #define isbrack(c)  ((c) == '[' || (char)(c) == Inbrack)
1592 
1593 /*
1594  * Given a linked list l with node n, perform parameter substitution
1595  * starting from *str.  Return the node with the substitutuion performed
1596  * or NULL if it failed.
1597  *
1598  * If qt is true, the `$' was quoted.  TODO: why can't we just look
1599  * to see if the first character was String or Qstring?
1600  *
1601  * If ssub is true, we are being called via singsubst(), which means
1602  * the result will be a single word.  TODO: can we generate the
1603  * single word at the end?  TODO: if not, or maybe in any case,
1604  * can we pass down the ssub flag from prefork with the other flags
1605  * instead of pushing it into different arguments?  (How exactly
1606  * to qt and ssub differ?  Are both necessary, if so is there some
1607  * better way of separating the two?)
1608  */
1609 
1610 /**/
1611 static LinkNode
paramsubst(LinkList l,LinkNode n,char ** str,int qt,int pf_flags,int * ret_flags)1612 paramsubst(LinkList l, LinkNode n, char **str, int qt, int pf_flags,
1613 	   int *ret_flags)
1614 {
1615     char *aptr = *str, c, cc;
1616     char *s = aptr, *fstr, *idbeg, *idend, *ostr = (char *) getdata(n);
1617     int colf;			/* != 0 means we found a colon after the name */
1618     /*
1619      * There are far too many flags.  They need to be grouped
1620      * together into some structure which ties them to where they
1621      * came from.
1622      *
1623      * Some flags have a an obscure relationship to their effect which
1624      * depends on incrementing them to particular values in particular
1625      * ways.
1626      */
1627     /*
1628      * Whether the value is an array (in aval) or not (in val).  There's
1629      * a movement from storing the value in the stuff read from the
1630      * parameter (the value v) to storing them in val and aval.
1631      * However, sometimes you find v reappearing temporarily.
1632      *
1633      * The values -1 and 2 are special to isarr.  The value -1 is used
1634      * to force us to keep an empty array.  It's tested in the YUK chunk
1635      * (I mean the one explicitly marked as such).  The value 2
1636      * indicates an array has come from splitting a scalar.  We use
1637      * that to override the usual rule that in double quotes we don't
1638      * remove empty elements (so "${(s.:):-foo::bar}" produces two
1639      * words).  This seems to me to be quite the wrong thing to do,
1640      * but it looks like code may be relying on it.  So we require (@)
1641      * as well before we keep the empty fields (look for assignments
1642      * like "isarr = nojoin ? 1 : 2").
1643      */
1644     int isarr = 0;
1645     /*
1646      * This is just the setting of the option except we need to
1647      * take account of ^ and ^^.
1648      */
1649     int plan9 = isset(RCEXPANDPARAM);
1650     /*
1651      * Likwise, but with ~ and ~~.  Also, we turn it off later
1652      * on if qt is passed down. The value can go to 2 if we
1653      * use ~ to force this on.
1654      */
1655     int globsubst = isset(GLOBSUBST);
1656     /*
1657      * Indicates ${(#)...}.
1658      */
1659     int evalchar = 0;
1660     /*
1661      * Indicates ${#pm}, massaged by whichlen which is set by
1662      * the (c), (w), and (W) flags to indicate how we take the length.
1663      */
1664     int getlen = 0;
1665     int whichlen = 0;
1666     /*
1667      * Indicates ${+pm}: a simple boolean for once.
1668      */
1669     int chkset = 0;
1670     /*
1671      * Indicates we have tried to get a value in v but that was
1672      * unset.  I don't quite understand why (v == NULL) isn't
1673      * good enough, but there are places where we seem to need
1674      * to second guess whether a value is a real value or not.
1675      * See in particular the (colf && !vunset) test below.
1676      */
1677     int vunset = 0;
1678     /*
1679      * Indicates (t) flag, i.e. print out types.  The code for
1680      * this actually isn't too horrifically inbred compared with
1681      * that for (P).
1682      */
1683     int wantt = 0;
1684     /*
1685      * Indicates splitting a string into an array.  There aren't
1686      * actually that many special cases for this --- which may
1687      * be why it doesn't work properly; we split in some cases
1688      * where we shouldn't, in particular on the multsubs for
1689      * handling embedded values for ${...=...} and the like.
1690      */
1691     int spbreak = (pf_flags & PREFORK_SHWORDSPLIT) &&
1692 	!(pf_flags & PREFORK_SINGLE) && !qt;
1693     /* Scalar and array value, see isarr above */
1694     char *val = NULL, **aval = NULL;
1695     /*
1696      * vbuf and v are both used to retrieve parameter values; this
1697      * is a kludge, we pass down vbuf and it may or may not return v.
1698      */
1699     struct value vbuf;
1700     Value v = NULL;
1701     /*
1702      * This expressive name refers to the set of flags which
1703      * is applied to matching for #, %, / and their doubled variants:
1704      * (M), (R), (B), (E), (N), (S).
1705      */
1706     int flags = 0;
1707     /* Value from (I) flag, used for ditto. */
1708     int flnum = 0;
1709     /*
1710      * sortit is to be passed to strmetasort().
1711      * indord is the (a) flag, which for consistency doesn't get
1712      * combined into sortit.
1713      */
1714     int sortit = SORTIT_ANYOLDHOW, indord = 0;
1715     /* (u): straightforward. */
1716     int unique = 0;
1717     /* combination of (L), (U) and (C) flags. */
1718     int casmod = CASMOD_NONE;
1719     /*
1720      * quotemod says we are doing either (q/b) (positive), (Q) (negative)
1721      * or not (0).  quotetype counts the q's for the first case.
1722      * quoterr is simply (X) but gets passed around a lot because the
1723      * combination (eX) needs it.
1724      */
1725     int quotemod = 0, quotetype = QT_NONE, quoteerr = 0;
1726     /*
1727      * Various fairly straightforward modifications, except that as with so
1728      * many flags it's not easy to decide where to put them in the order.
1729      * bit 0: (D) flag.
1730      * bit 1: (V) flag.
1731      */
1732     int mods = 0;
1733     /*
1734      * The (z) flag, nothing to do with SH_WORD_SPLIT which is tied
1735      * spbreak, see above; fairly straightforward in use but cf.
1736      * the comment for mods.
1737      *
1738      * This gets set to one of the LEXFLAGS_* values.
1739      */
1740     int shsplit = 0;
1741     /*
1742      * "ssub" is true when we are called from singsub (via prefork):
1743      * it means that we must join arrays and should not split words.
1744      */
1745     int ssub = (pf_flags & PREFORK_SINGLE);
1746     /*
1747      * The separator from (j) and (s) respectively, or (F) and (f)
1748      * respectively (hardwired to "\n" in that case).  Slightly
1749      * confusingly also used for ${#pm}, thought that's at least
1750      * documented in the manual
1751      */
1752     char *sep = NULL, *spsep = NULL;
1753     /*
1754      * Padding strings.  The left and right padding strings which
1755      * are repeated, then the ones which only occur once, for
1756      * the (l) and (r) flags.
1757      */
1758     char *premul = NULL, *postmul = NULL, *preone = NULL, *postone = NULL;
1759     /* Replacement string for /orig/repl and //orig/repl */
1760     char *replstr = NULL;
1761     /* The numbers for (l) and (r) */
1762     zlong prenum = 0, postnum = 0;
1763 #ifdef MULTIBYTE_SUPPORT
1764     /* The (m) flag: use width of multibyte characters */
1765     int multi_width = 0;
1766 #endif
1767     /*
1768      * Whether the value has been copied.  Optimisation:  if we
1769      * are modifying an expression, we only need to copy it the
1770      * first time, and if we don't modify it we can just use the
1771      * value from the parameter or input.
1772      */
1773     int copied = 0;
1774     /*
1775      * The (A) flag for array assignment, with consequences for
1776      * splitting and joining; (AA) gives arrasg == 2 for associative
1777      * arrays.
1778      */
1779     int arrasg = 0;
1780     /*
1781      * The (e) flag.  As we need to do extra work not quite
1782      * at the end, the effect of this is kludged in several places.
1783      */
1784     int eval = 0;
1785     /*
1786      * The (P) flag.  This interacts a bit obscurely with whether
1787      * or not we are dealing with a sub expression (subexp).
1788      */
1789     int aspar = 0;
1790     /*
1791      * The (%) flag, c.f. mods again.
1792      */
1793     int presc = 0;
1794     /*
1795      * The (g) flag.  Process escape sequences with various GETKEY_ flags.
1796      */
1797     int getkeys = -1;
1798     /*
1799      * The (@) flag; interacts obscurely with qt and isarr.
1800      * This is one of the things that decides whether multsub
1801      * will produce an array, but in an extremely indirect fashion.
1802      */
1803     int nojoin = (pf_flags & PREFORK_SHWORDSPLIT) ? !(ifs && *ifs) && !qt : 0;
1804     /*
1805      * != 0 means ${...}, otherwise $...  What works without braces
1806      * is largely a historical artefact (everything works with braces,
1807      * I sincerely hope).
1808      */
1809     char inbrace = 0;
1810     /*
1811      * Use for the (k) flag.  Goes down into the parameter code,
1812      * sometimes.
1813      */
1814     char hkeys = 0;
1815     /*
1816      * Used for the (v) flag, ditto.  Not quite sure why they're
1817      * separate, but the tradition seems to be that things only
1818      * get combined when that makes the result more obscure rather
1819      * than less.
1820      */
1821     char hvals = 0;
1822     /*
1823      * Whether we had to evaluate a subexpression, i.e. an
1824      * internal ${...} or $(...) or plain $pm.  We almost don't
1825      * need to remember this (which would be neater), but the (P)
1826      * flag means the subexp and !subexp code is obscurely combined,
1827      * and the argument passing to fetchvalue has another kludge.
1828      */
1829     int subexp;
1830     /*
1831      * If we're referring to the positional parameters, then
1832      * e.g ${*:1:1} refers to $1.
1833      * This is for compatibility.
1834      */
1835     int horrible_offset_hack = 0;
1836     /*
1837      * Signal back from multsub: with something like
1838      *   x${:- $foo}
1839      * with word-splitting active we need to split on that leading
1840      * whitespace.  However, if there's no "x" the whitespace is
1841      * simply removed.
1842      */
1843     int ms_flags = 0;
1844     /*
1845      * We need to do an extra fetch to honour the (P) flag.
1846      * Complicated by the use of subexpressions that may have
1847      * nested (P) flags.
1848      */
1849     int fetch_needed;
1850 
1851     *s++ = '\0';
1852     /*
1853      * Nothing to do unless the character following the $ is
1854      * something we recognise.
1855      *
1856      * Shouldn't this be a table or something?  We test for all
1857      * these later on, too.
1858      */
1859     c = *s;
1860     if (itype_end(s, IIDENT, 1) == s && *s != '#' && c != Pound &&
1861 	!IS_DASH(c) &&
1862 	c != '!' && c != '$' && c != String && c != Qstring &&
1863 	c != '?' && c != Quest &&
1864 	c != '*' && c != Star && c != '@' && c != '{' &&
1865 	c != Inbrace && c != '=' && c != Equals && c != Hat &&
1866 	c != '^' && c != '~' && c != Tilde && c != '+') {
1867 	s[-1] = '$';
1868 	*str = s;
1869 	return n;
1870     }
1871     DPUTS(c == '{', "BUG: inbrace == '{' in paramsubst()");
1872     /*
1873      * Extra processing if there is an opening brace: mostly
1874      * flags in parentheses, but also one ksh hack.
1875      */
1876     if (c == Inbrace) {
1877 	inbrace = 1;
1878 	s++;
1879 	/*
1880 	 * In ksh emulation a leading `!' is a special flag working
1881 	 * sort of like our (k).
1882 	 * TODO: this is one of very few cases tied directly to
1883 	 * the emulation mode rather than an option.  Since ksh
1884 	 * doesn't have parameter flags it might be neater to
1885 	 * handle this with the ^, =, ~ stuff, below.
1886 	 */
1887 	if ((c = *s) == '!' && s[1] != Outbrace && EMULATION(EMULATE_KSH)) {
1888 	    hkeys = SCANPM_WANTKEYS;
1889 	    s++;
1890 	} else if (c == '(' || c == Inpar) {
1891 	    char *t, sav;
1892 	    int tt = 0;
1893 	    zlong num;
1894 	    /*
1895 	     * The (p) flag is only remembered within
1896 	     * this block.  It says we do print-style handling
1897 	     * on the values for flags, but only on those.
1898 	     */
1899 	    int escapes = 0;
1900 	    /*
1901 	     * '~' in parentheses caused tokenization of string arg:
1902 	     * similar to (p).
1903 	     */
1904 	    int tok_arg = 0;
1905 
1906 	    for (s++; (c = *s) != ')' && c != Outpar; s++, tt = 0) {
1907 		int arglen;	/* length of modifier argument */
1908 		int dellen;	/* length of matched delimiter, 0 if not */
1909 		char *del0;	/* pointer to initial delimiter */
1910 
1911 		switch (c) {
1912 		case ')':
1913 		case Outpar:
1914 		    /* how can this happen? */
1915 		    break;
1916 		case '~':
1917 		case Tilde:
1918 		    tok_arg = !tok_arg;
1919 		    break;
1920 		case 'A':
1921 		    ++arrasg;
1922 		    break;
1923 		case '@':
1924 		    nojoin = 2;	/* nojoin = 2 means force */
1925 		    break;
1926 		case 'M':
1927 		    flags |= SUB_MATCH;
1928 		    break;
1929 		case 'R':
1930 		    flags |= SUB_REST;
1931 		    break;
1932 		case 'B':
1933 		    flags |= SUB_BIND;
1934 		    break;
1935 		case 'E':
1936 		    flags |= SUB_EIND;
1937 		    break;
1938 		case 'N':
1939 		    flags |= SUB_LEN;
1940 		    break;
1941 		case 'S':
1942 		    flags |= SUB_SUBSTR;
1943 		    break;
1944 		case 'I':
1945 		    s++;
1946 		    flnum = get_intarg(&s, &dellen);
1947 		    if (flnum < 0)
1948 			goto flagerr;
1949 		    s--;
1950 		    break;
1951 
1952 		case 'L':
1953 		    casmod = CASMOD_LOWER;
1954 		    break;
1955 		case 'U':
1956 		    casmod = CASMOD_UPPER;
1957 		    break;
1958 		case 'C':
1959 		    casmod = CASMOD_CAPS;
1960 		    break;
1961 
1962 		case 'o':
1963 		    if (!sortit)
1964 			sortit |= SORTIT_SOMEHOW; /* sort, no modifiers */
1965 		    break;
1966 		case 'O':
1967 		    sortit |= SORTIT_BACKWARDS;
1968 		    break;
1969 		case 'i':
1970 		    sortit |= SORTIT_IGNORING_CASE;
1971 		    break;
1972 		case 'n':
1973 		    sortit |= SORTIT_NUMERICALLY;
1974 		    break;
1975 		case 'a':
1976 		    sortit |= SORTIT_SOMEHOW;
1977 		    indord = 1;
1978 		    break;
1979 
1980 		case 'D':
1981 		    mods |= 1;
1982 		    break;
1983 		case 'V':
1984 		    mods |= 2;
1985 		    break;
1986 
1987 		case 'q':
1988 		    if (quotetype == QT_DOLLARS ||
1989 			quotetype == QT_BACKSLASH_PATTERN)
1990 			goto flagerr;
1991 		    if (IS_DASH(s[1]) || s[1] == '+') {
1992 			if (quotemod)
1993 			    goto flagerr;
1994 			s++;
1995 			quotemod = 1;
1996 			quotetype = (*s == '+') ? QT_QUOTEDZPUTS :
1997 			    QT_SINGLE_OPTIONAL;
1998 		    } else {
1999 			if (quotetype == QT_SINGLE_OPTIONAL) {
2000 			    /* extra q's after '-' not allowed */
2001 			    goto flagerr;
2002 			}
2003 			quotemod++, quotetype++;
2004 		    }
2005 		    break;
2006 		case 'b':
2007 		    if (quotemod || quotetype != QT_NONE)
2008 			goto flagerr;
2009 		    quotemod = 1;
2010 		    quotetype = QT_BACKSLASH_PATTERN;
2011 		    break;
2012 		case 'Q':
2013 		    quotemod--;
2014 		    break;
2015 		case 'X':
2016 		    quoteerr = 1;
2017 		    break;
2018 
2019 		case 'e':
2020 		    eval = 1;
2021 		    break;
2022 		case 'P':
2023 		    aspar = 1;
2024 		    break;
2025 
2026 		case 'c':
2027 		    whichlen = 1;
2028 		    break;
2029 		case 'w':
2030 		    whichlen = 2;
2031 		    break;
2032 		case 'W':
2033 		    whichlen = 3;
2034 		    break;
2035 
2036 		case 'f':
2037 		    spsep = "\n";
2038 		    break;
2039 		case 'F':
2040 		    sep = "\n";
2041 		    break;
2042 
2043 		case '0':
2044 		    spsep = zhalloc(3);
2045 		    spsep[0] = Meta;
2046 		    spsep[1] = '\0' ^ 32;
2047 		    spsep[2] = '\0';
2048 		    break;
2049 
2050 		case 's':
2051 		    tt = 1;
2052 		/* fall through */
2053 		case 'j':
2054 		    t = get_strarg(++s, &arglen);
2055 		    if (*t) {
2056 			sav = *t;
2057 			*t = '\0';
2058 			if (tt)
2059 			    spsep = untok_and_escape(s + arglen,
2060 						     escapes, tok_arg);
2061 			else
2062 			    sep = untok_and_escape(s + arglen,
2063 						   escapes, tok_arg);
2064 			*t = sav;
2065 			s = t + arglen - 1;
2066 		    } else
2067 			goto flagerr;
2068 		    break;
2069 
2070 		case 'l':
2071 		    tt = 1;
2072 		/* fall through */
2073 		case 'r':
2074 		    s++;
2075 		    /* delimiter position */
2076 		    del0 = s;
2077 		    num = get_intarg(&s, &dellen);
2078 		    if (num < 0)
2079 			goto flagerr;
2080 		    if (tt)
2081 			prenum = num;
2082 		    else
2083 			postnum = num;
2084 		    /* must have same delimiter if more arguments */
2085 		    if (!dellen || memcmp(del0, s, dellen)) {
2086 			/* decrement since loop will increment */
2087 			s--;
2088 			break;
2089 		    }
2090 		    t = get_strarg(s, &arglen);
2091 		    if (!*t)
2092 			goto flagerr;
2093 		    sav = *t;
2094 		    *t = '\0';
2095 		    if (tt)
2096 			premul = untok_and_escape(s + arglen, escapes,
2097 						  tok_arg);
2098 		    else
2099 			postmul = untok_and_escape(s + arglen, escapes,
2100 						   tok_arg);
2101 		    *t = sav;
2102 		    sav = *s;
2103 		    s = t + arglen;
2104 		    /* again, continue only if another start delimiter */
2105 		    if (memcmp(del0, s, dellen)) {
2106 			/* decrement since loop will increment */
2107 			s--;
2108 			break;
2109 		    }
2110 		    t = get_strarg(s, &arglen);
2111 		    if (!*t)
2112 			goto flagerr;
2113 		    sav = *t;
2114 		    *t = '\0';
2115 		    if (tt)
2116 			preone = untok_and_escape(s + arglen,
2117 						  escapes, tok_arg);
2118 		    else
2119 			postone = untok_and_escape(s + arglen,
2120 						   escapes, tok_arg);
2121 		    *t = sav;
2122 		    /* -1 since loop will increment */
2123 		    s = t + arglen - 1;
2124 		    break;
2125 
2126 		case 'm':
2127 #ifdef MULTIBYTE_SUPPORT
2128 		    multi_width++;
2129 #endif
2130 		    break;
2131 
2132 		case 'p':
2133 		    escapes = 1;
2134 		    break;
2135 
2136 		case 'k':
2137 		    hkeys = SCANPM_WANTKEYS;
2138 		    break;
2139 		case 'v':
2140 		    hvals = SCANPM_WANTVALS;
2141 		    break;
2142 
2143 		case 't':
2144 		    wantt = 1;
2145 		    break;
2146 
2147 		case '%':
2148 		    presc++;
2149 		    break;
2150 
2151 		case 'g':
2152 		    t = get_strarg(++s, &arglen);
2153 		    if (getkeys < 0)
2154 			getkeys = 0;
2155 		    if (*t) {
2156 			sav = *t;
2157 			*t = 0;
2158 			while (*++s) {
2159 			    switch (*s) {
2160 			    case 'e':
2161 				getkeys |= GETKEY_EMACS;
2162 				break;
2163 			    case 'o':
2164 				getkeys |= GETKEY_OCTAL_ESC;
2165 				break;
2166 			    case 'c':
2167 				getkeys |= GETKEY_CTRL;
2168 				break;
2169 
2170 			    default:
2171 				*t = sav;
2172 				goto flagerr;
2173 			    }
2174 			}
2175 			*t = sav;
2176 			s = t + arglen - 1;
2177 		    } else
2178 			goto flagerr;
2179 		    break;
2180 
2181 		case 'z':
2182 		    shsplit = LEXFLAGS_ACTIVE;
2183 		    break;
2184 
2185 		case 'Z':
2186 		    t = get_strarg(++s, &arglen);
2187 		    if (*t) {
2188 			sav = *t;
2189 			*t = 0;
2190 			while (*++s) {
2191 			    switch (*s) {
2192 			    case 'c':
2193 				/* Parse and keep comments */
2194 				shsplit |= LEXFLAGS_COMMENTS_KEEP;
2195 				break;
2196 
2197 			    case 'C':
2198 				/* Parse and remove comments */
2199 				shsplit |= LEXFLAGS_COMMENTS_STRIP;
2200 				break;
2201 
2202 			    case 'n':
2203 				/* Treat newlines as whitespace */
2204 				shsplit |= LEXFLAGS_NEWLINE;
2205 				break;
2206 
2207 			    default:
2208 				*t = sav;
2209  				goto flagerr;
2210 			    }
2211 			}
2212 			*t = sav;
2213 			s = t + arglen - 1;
2214 		    } else
2215 			goto flagerr;
2216 		    break;
2217 
2218 		case 'u':
2219 		    unique = 1;
2220 		    break;
2221 
2222 		case '#':
2223 		case Pound:
2224 		    evalchar = 1;
2225 		    break;
2226 
2227 		case '_':
2228 		    t = get_strarg(++s, &arglen);
2229 		    if (*t) {
2230 			sav = *t;
2231 			*t = 0;
2232 			while (*++s) {
2233 			    /* Reserved for future use */
2234 			    switch (*s) {
2235 			    default:
2236 				*t = sav;
2237 				goto flagerr;
2238 			    }
2239 			}
2240 			*t = sav;
2241 			s = t + arglen - 1;
2242 		    } else
2243 			goto flagerr;
2244 		    break;
2245 
2246 		default:
2247 		  flagerr:
2248 		    zerr("error in flags");
2249 		    return NULL;
2250 		}
2251 	    }
2252 	    s++;
2253 	}
2254     }
2255 
2256     /*
2257      * premul, postmul specify the padding character to be used
2258      * multiple times with the (l) and (r) flags respectively.
2259      */
2260     if (!premul)
2261 	premul = " ";
2262     if (!postmul)
2263 	postmul = " ";
2264 
2265     /*
2266      * Look for special unparenthesised flags.
2267      * TODO: could make these able to appear inside parentheses, too,
2268      * i.e. ${(^)...} etc.
2269      */
2270     for (;;) {
2271 	if ((c = *s) == '^' || c == Hat) {
2272 	    /* RC_EXPAND_PARAM on or off (doubled )*/
2273 	    if ((c = *++s) == '^' || c == Hat) {
2274 		plan9 = 0;
2275 		s++;
2276 	    } else
2277 		plan9 = 1;
2278 	} else if ((c = *s) == '=' || c == Equals) {
2279 	    /* SH_WORD_SPLIT on or off (doubled). spbreak = 2 means force */
2280 	    if ((c = *++s) == '=' || c == Equals) {
2281 		spbreak = 0;
2282 		if (nojoin < 2)
2283 		    nojoin = 0;
2284 		s++;
2285 	    } else {
2286 		spbreak = 2;
2287 		if (nojoin < 2)
2288 		    nojoin = !(ifs && *ifs);
2289 	    }
2290 	} else if ((c == '#' || c == Pound) &&
2291 		   (inbrace || !isset(POSIXIDENTIFIERS)) &&
2292 		   (itype_end(s+1, IIDENT, 0) != s + 1
2293 		    || (cc = s[1]) == '*' || cc == Star || cc == '@'
2294 		    || cc == '?' || cc == Quest
2295 		    || cc == '$' || cc == String || cc == Qstring
2296 		    /*
2297 		     * Me And My Squiggle:
2298 		     * ${##} is the length of $#, but ${##foo}
2299 		     * is $# with a "foo" removed from the start.
2300 		     * If someone had defined the *@!@! language
2301 		     * properly in the first place we wouldn't
2302 		     * have this nonsense.
2303 		     */
2304 		    || ((cc == '#' || cc == Pound) && s[2] == Outbrace)
2305 		    || IS_DASH(cc)
2306 		    || (cc == ':' && IS_DASH(s[2]))
2307 		    || (isstring(cc) && (s[2] == Inbrace || s[2] == Inpar)))) {
2308 	    getlen = 1 + whichlen, s++;
2309 	    /*
2310 	     * Return the length of the parameter.
2311 	     * getlen can be more than 1 to indicate characters (2),
2312 	     * words ignoring multiple delimiters (3), words taking
2313 	     * account of multiple delimiters.  delimiter is in
2314 	     * spsep, NULL means $IFS.
2315 	     */
2316 	} else if (c == '~' || c == Tilde) {
2317 	    /* GLOB_SUBST (forced) on or off (doubled) */
2318 	    if ((c = *++s) == '~' || c == Tilde) {
2319 		globsubst = 0;
2320 		s++;
2321 	    } else
2322 		globsubst = 2;
2323 	} else if (c == '+') {
2324 	    /*
2325 	     * Return whether indicated parameter is set.
2326 	     * Try to handle this when parameter is named
2327 	     * by (P) (second part of test).
2328 	     */
2329 	    if (itype_end(s+1, IIDENT, 0) != s+1 || (aspar && isstring(s[1]) &&
2330 				 (s[2] == Inbrace || s[2] == Inpar)))
2331 		chkset = 1, s++;
2332 	    else if (!inbrace) {
2333 		/* Special case for `$+' on its own --- leave unmodified */
2334 		*aptr = '$';
2335 		*str = aptr + 1;
2336 		return n;
2337 	    } else {
2338 		zerr("bad substitution");
2339 		return NULL;
2340 	    }
2341 	} else if (inbrace && inull(*s) && *s != Bnull) {
2342 	    /*
2343 	     * Handles things like ${(f)"$(<file)"} by skipping
2344 	     * the double quotes.  We don't need to know what was
2345 	     * actually there; the presence of a String or Qstring
2346 	     * is good enough.
2347 	     */
2348 	    s++;
2349 	} else
2350 	    break;
2351     }
2352     /* Don't activate special pattern characters if inside quotes */
2353     if (qt)
2354 	globsubst = 0;
2355 
2356     /*
2357      * At this point, we usually expect a parameter name.
2358      * However, there may be a nested ${...} or $(...).
2359      * These say that the parameter itself is somewhere inside,
2360      * or that there isn't a parameter and we will get the values
2361      * from a command substitution itself.  In either case,
2362      * the current instance of paramsubst() doesn't fetch a value,
2363      * it just operates on what gets passed up.
2364      * (The first ought to have been {...}, reserving ${...}
2365      * for substituting a value at that point, but it's too late now.)
2366      */
2367     idbeg = s;
2368     if ((subexp = (inbrace && s[-1] && isstring(*s) &&
2369 		   (s[1] == Inbrace || s[1] == Inpar || s[1] == Inparmath)))) {
2370 	int sav;
2371 	int quoted = *s == Qstring;
2372 	int outtok;
2373 
2374 	val = s++;
2375 	switch (*s) {
2376 	case Inbrace:
2377 	    outtok = Outbrace;
2378 	    break;
2379 	case Inpar:
2380 	    outtok = Outpar;
2381 	    break;
2382 	case Inparmath:
2383 	    outtok = Outparmath;
2384 	    break;
2385 	default:
2386 	    /* "Can't Happen" (TM) */
2387 	    DPUTS(1, "Nested substitution: This Can't Happen (TM)");
2388 	    return NULL;
2389 	}
2390 	skipparens(*s, outtok, &s);
2391 	sav = *s;
2392 	*s = 0;
2393 	/*
2394 	 * This handles arrays.  TODO: this is not the most obscure call to
2395 	 * multsub() (see below) but even so it would be nicer to pass down
2396 	 * and back the arrayness more rationally.  In that case, we should
2397 	 * remove the aspar test and extract a value from an array, if
2398 	 * necessary, when we handle (P) lower down.
2399 	 */
2400 	if (multsub(&val, PREFORK_SUBEXP, (aspar ? NULL : &aval), &isarr, NULL,
2401 		    &ms_flags) && quoted) {
2402 	    /* Empty quoted string --- treat as null string, not elided */
2403 	    isarr = -1;
2404 	    aval = (char **) hcalloc(sizeof(char *));
2405 	    aspar = 0;
2406 	} else if (aspar)
2407 	    idbeg = val;
2408 	if (*val == Nularg)
2409 	    ++val;
2410 	*s = sav;
2411 	/*
2412 	 * This tests for the second double quote in an expression
2413 	 * like ${(f)"$(<file)"}, compare above.
2414 	 */
2415 	while (inull(*s))
2416 	    s++;
2417 	if (ms_flags & MULTSUB_PARAM_NAME) {
2418 	    /*
2419 	     * Downbelow has told us this is a parameter name, e.g.
2420 	     * ${${(P)name}...}.  We're going to behave as if
2421 	     * we have exactly that name followed by the rest of
2422 	     * the parameter for subscripting etc.
2423 	     *
2424 	     * See below for where we set the flag in the nested
2425 	     * substitution.
2426 	     */
2427 	    if (isarr) {
2428 		if (aval[0] && aval[1]) {
2429 		    zerr("parameter name reference used with array");
2430 		    return NULL;
2431 		}
2432 		val = aval[0];
2433 		isarr = 0;
2434 	    }
2435 	    s = val ? dyncat(val, s) : dupstring(s);
2436 	    /* Now behave po-faced as if it was always like that... */
2437 	    subexp = 0;
2438 	    /*
2439 	     * If this is a (P) (first test) and at the top level
2440 	     * (second test) we can't rely on the caller fetching
2441 	     * the result from the pending aspar.  So do it below.
2442 	     */
2443 	    fetch_needed = aspar && !(pf_flags & PREFORK_SUBEXP);
2444 	} else
2445 	    fetch_needed = 0; 	/* any initial aspar fetch already done */
2446 	v = (Value) NULL;
2447     } else
2448 	fetch_needed = aspar;	/* aspar fetch still needed */
2449     if (fetch_needed) {
2450 	/*
2451 	 * No subexpression, but in any case the value is going
2452 	 * to give us the name of a parameter on which we do
2453 	 * our remaining processing.  In other words, this
2454 	 * makes ${(P)param} work like ${(P)${param}}.  (Probably
2455 	 * better looked at, this is the basic code for ${(P)param}
2456 	 * and it's been kludged into the subexp code because no
2457 	 * opportunity for a kludge has been neglected.)
2458 	 */
2459 	if ((v = fetchvalue(&vbuf, &s, 1, (qt ? SCANPM_DQUOTED : 0)))) {
2460 	    val = idbeg = getstrvalue(v);
2461 	    subexp = 1;
2462 	} else
2463 	    vunset = 1;
2464     }
2465     if (aspar && (pf_flags & PREFORK_SUBEXP)) {
2466 	/*
2467 	 * This is the inner handling for the case referred to above
2468 	 * where we have something like ${${(P)name}...}.
2469 	 *
2470 	 * Treat this as a normal value here; all transformations on
2471 	 * result are in outer instance.
2472 	 */
2473 	aspar = 0;
2474 	*ret_flags |= MULTSUB_PARAM_NAME;
2475     }
2476     /*
2477      * We need to retrieve a value either if we haven't already
2478      * got it from a subexpression, or if the processing so
2479      * far has just yielded us a parameter name to be processed
2480      * with (P).
2481      */
2482     if (!subexp || aspar) {
2483 	char *ov = val;
2484 	int scanflags = hkeys | hvals;
2485 	if (arrasg)
2486 	    scanflags |= SCANPM_ASSIGNING;
2487 	if (qt)
2488 	    scanflags |= SCANPM_DQUOTED;
2489 	if (chkset)
2490 	    scanflags |= SCANPM_CHECKING;
2491 	/*
2492 	 * Second argument: decide whether to use the subexpression or
2493 	 *   the string next on the line as the parameter name.
2494 	 * Third argument:  decide how processing for brackets
2495 	 *   1 means full processing
2496 	 *   -1 appears to mean something along the lines of
2497 	 *     only handle single digits and don't handle brackets.
2498 	 *     I *think* (but it's really only a guess) that this
2499 	 *     is used by the test below the wantt handling, so
2500 	 *     that in certain cases we handle brackets there.
2501 	 *   0 would apparently mean something like we know we
2502 	 *     should have the name of a scalar and we get cross
2503 	 *     if there's anything present which disagrees with that
2504 	 * but you will search fetchvalue() in vain for comments on this.
2505 	 * Fourth argument gives flags to do with keys, values, quoting,
2506 	 * assigning depending on context and parameter flags.
2507 	 *
2508 	 * This is the last mention of subexp, so presumably this
2509 	 * is what the code which makes sure subexp is set if aspar (the
2510 	 * (P) flag) is set.  I *think* what's going on here is the
2511 	 * second argument is for both input and output: with
2512 	 * subexp, we only want the input effect, whereas normally
2513 	 * we let fetchvalue set the main string pointer s to
2514 	 * the end of the bit it's fetched.
2515 	 */
2516 	if (!(v = fetchvalue(&vbuf, (subexp ? &ov : &s),
2517 			     (wantt ? -1 :
2518 			      ((unset(KSHARRAYS) || inbrace) ? 1 : -1)),
2519 			     scanflags)) ||
2520 	    (v->pm && (v->pm->node.flags & PM_UNSET)) ||
2521 	    (v->flags & VALFLAG_EMPTY))
2522 	    vunset = 1;
2523 
2524 	if (wantt) {
2525 	    /*
2526 	     * Handle the (t) flag: value now becomes the type
2527 	     * information for the parameter.
2528 	     */
2529 	    if (v && v->pm && !(v->pm->node.flags & PM_UNSET)) {
2530 		int f = v->pm->node.flags;
2531 
2532 		switch (PM_TYPE(f)) {
2533 		case PM_SCALAR:  val = "scalar"; break;
2534 		case PM_ARRAY:   val = "array"; break;
2535 		case PM_INTEGER: val = "integer"; break;
2536 		case PM_EFLOAT:
2537 		case PM_FFLOAT:  val = "float"; break;
2538 		case PM_HASHED:  val = "association"; break;
2539 		}
2540 		val = dupstring(val);
2541 		if (v->pm->level)
2542 		    val = dyncat(val, "-local");
2543 		if (f & PM_LEFT)
2544 		    val = dyncat(val, "-left");
2545 		if (f & PM_RIGHT_B)
2546 		    val = dyncat(val, "-right_blanks");
2547 		if (f & PM_RIGHT_Z)
2548 		    val = dyncat(val, "-right_zeros");
2549 		if (f & PM_LOWER)
2550 		    val = dyncat(val, "-lower");
2551 		if (f & PM_UPPER)
2552 		    val = dyncat(val, "-upper");
2553 		if (f & PM_READONLY)
2554 		    val = dyncat(val, "-readonly");
2555 		if (f & PM_TAGGED)
2556 		    val = dyncat(val, "-tag");
2557 		if (f & PM_TIED)
2558 		    val = dyncat(val, "-tied");
2559 		if (f & PM_EXPORTED)
2560 		    val = dyncat(val, "-export");
2561 		if (f & PM_UNIQUE)
2562 		    val = dyncat(val, "-unique");
2563 		if (f & PM_HIDE)
2564 		    val = dyncat(val, "-hide");
2565 		if (f & PM_HIDEVAL)
2566 		    val = dyncat(val, "-hideval");
2567 		if (f & PM_SPECIAL)
2568 		    val = dyncat(val, "-special");
2569 		vunset = 0;
2570 	    } else
2571 		val = dupstring("");
2572 
2573 	    v = NULL;
2574 	    isarr = 0;
2575 	}
2576     }
2577     /*
2578      * We get in here two ways; either we need to convert v into
2579      * the local value system, or we need to get rid of brackets
2580      * even if there isn't a v.
2581      */
2582     while (v || ((inbrace || (unset(KSHARRAYS) && vunset)) && isbrack(*s))) {
2583 	if (!v) {
2584 	    /*
2585 	     * Index applied to non-existent parameter; we may or may
2586 	     * not have a value to index, however.  Create a temporary
2587 	     * empty parameter as a trick, and index on that.  This
2588 	     * usually happens the second time around the loop when
2589 	     * we've used up the original parameter value and want to
2590 	     * apply a subscript to what's left.  However, it's also
2591 	     * possible it's got something to do with some of that murky
2592 	     * passing of -1's as the third argument to fetchvalue() to
2593 	     * inhibit bracket parsing at that stage.
2594 	     */
2595 	    Param pm;
2596 	    char *os = s;
2597 
2598 	    if (!isbrack(*s))
2599 		break;
2600 	    if (vunset) {
2601 		val = dupstring("");
2602 		isarr = 0;
2603 	    }
2604 	    pm = createparam(nulstring, isarr ? PM_ARRAY : PM_SCALAR);
2605 	    DPUTS(!pm, "BUG: parameter not created");
2606 	    if (isarr)
2607 		pm->u.arr = aval;
2608 	    else
2609 		pm->u.str = val;
2610 	    v = (Value) hcalloc(sizeof *v);
2611 	    v->isarr = isarr;
2612 	    v->pm = pm;
2613 	    v->end = -1;
2614 	    if (getindex(&s, v, qt ? SCANPM_DQUOTED : 0) || s == os)
2615 		break;
2616 	}
2617 	/*
2618 	 * This is where we extract a value (we know now we have
2619 	 * one) into the local parameters for a scalar (val) or
2620 	 * array (aval) value.  TODO: move val and aval into
2621 	 * a structure with a discriminator.  Hope we can make
2622 	 * more things array values at this point and dearrayify later.
2623 	 * v->isarr tells us whether the stuff from down below looks
2624 	 * like an array.
2625 	 *
2626 	 * I think we get to discard the existing value of isarr
2627 	 * here because it's already been taken account of, either
2628 	 * in the subexp stuff or immediately above.
2629 	 */
2630 	if ((isarr = v->isarr)) {
2631 	    /*
2632 	     * No way to get here with v->flags & VALFLAG_INV, so
2633 	     * getvaluearr() is called by getarrvalue(); needn't test
2634 	     * PM_HASHED.
2635 	     */
2636 	    if (v->isarr == SCANPM_WANTINDEX) {
2637 		isarr = v->isarr = 0;
2638 		val = dupstring(v->pm->node.nam);
2639 	    } else
2640 		aval = getarrvalue(v);
2641 	} else {
2642 	    /* Value retrieved from parameter/subexpression is scalar */
2643 	    if (v->pm->node.flags & PM_ARRAY) {
2644 		/*
2645 		 * Although the value is a scalar, the parameter
2646 		 * itself is an array.  Presumably this is due to
2647 		 * being quoted, or doing single substitution or something,
2648 		 * TODO: we're about to do some definitely stringy
2649 		 * stuff, so something like this bit is probably
2650 		 * necessary.  However, I'd like to leave any
2651 		 * necessary joining of arrays until this point
2652 		 * to avoid the multsub() horror.
2653 		 */
2654 
2655 		/* arrlen() is expensive, so only compute it if needed. */
2656 		int tmplen = -1;
2657 
2658 		if (v->start < 0) {
2659 		    tmplen = arrlen(v->pm->gsu.a->getfn(v->pm));
2660 		    v->start += tmplen + ((v->flags & VALFLAG_INV) ? 1 : 0);
2661 		}
2662 		if (!(v->flags & VALFLAG_INV))
2663 		    if (v->start < 0 ||
2664 			(tmplen != -1
2665 			 ? v->start >= tmplen
2666 			 : arrlen_le(v->pm->gsu.a->getfn(v->pm), v->start)))
2667 		    vunset = 1;
2668 	    }
2669 	    if (!vunset) {
2670 		/*
2671 		 * There really is a value.  Padding and case
2672 		 * transformations used to be handled here, but
2673 		 * are now handled in getstrvalue() for greater
2674 		 * consistency.  However, we get unexpected effects
2675 		 * if we allow them to applied on every call, so
2676 		 * set the flag that allows them to be substituted.
2677 		 */
2678 		v->flags |= VALFLAG_SUBST;
2679 		val = getstrvalue(v);
2680 	    }
2681 	}
2682 	/* See if this is a reference to the positional parameters. */
2683 	if (v && v->pm && v->pm->gsu.a == &vararray_gsu &&
2684 	    (char ***)v->pm->u.data == &pparams)
2685 	    horrible_offset_hack = 1;
2686 	else
2687 	    horrible_offset_hack = 0;
2688 	/*
2689 	 * Finished with the original parameter and its indices;
2690 	 * carry on looping to see if we need to do more indexing.
2691 	 * This means we final get rid of v in favour of val and
2692 	 * aval.  We could do with somehow encapsulating the bit
2693 	 * where we need v.
2694 	 */
2695 	v = NULL;
2696 	if (!inbrace)
2697 	    break;
2698     }
2699     /*
2700      * We're now past the name or subexpression; the only things
2701      * which can happen now are a closing brace, one of the standard
2702      * parameter postmodifiers, or a history-style colon-modifier.
2703      *
2704      * Again, this duplicates tests for characters we're about to
2705      * examine properly later on.
2706      */
2707     if (inbrace) {
2708 	c = *s;
2709 	if (!IS_DASH(c) &&
2710 	    c != '+' && c != ':' && c != '%'  && c != '/' &&
2711 	    c != '=' && c != Equals &&
2712 	    c != '#' && c != Pound &&
2713 	    c != '?' && c != Quest &&
2714 	    c != '}' && c != Outbrace) {
2715 	    zerr("bad substitution");
2716 	    return NULL;
2717 	}
2718     }
2719     /*
2720      * Join arrays up if we're in quotes and there isn't some
2721      * override such as (@).
2722      * TODO: hmm, if we're called as part of some recursive
2723      * substitution do we want to delay this until we get back to
2724      * the top level?  Or is if there's a qt (i.e. this parameter
2725      * substitution is in quotes) always good enough?  Potentially
2726      * we may be OK by now --- all potential `@'s and subexpressions
2727      * have been handled, including any [@] index which comes up
2728      * by virtue of v->isarr being set to SCANPM_ISVAR_AT which
2729      * is now in isarr.
2730      *
2731      * However, if we are replacing multsub() with something that
2732      * doesn't mangle arrays, we may need to delay this step until after
2733      * the foo:- or foo:= or whatever that causes that.  Note the value
2734      * (string or array) at this point is irrelevant if we are going to
2735      * be doing that.  This would mean // and stuff get applied
2736      * arraywise even if quoted.  That's probably wrong, so maybe
2737      * this just stays.
2738      *
2739      * We do a separate stage of dearrayification in the YUK chunk,
2740      * I think mostly because of the way we make array or scalar
2741      * values appear to the caller.
2742      */
2743     if (isarr) {
2744 	if (nojoin)
2745 	    isarr = -1;
2746 	if (qt && !getlen && isarr > 0) {
2747 	    val = sepjoin(aval, sep, 1);
2748 	    isarr = 0;
2749 	}
2750     }
2751 
2752     idend = s;
2753     if (inbrace) {
2754 	/*
2755 	 * This is to match a closing double quote in case
2756 	 * we didn't have a subexpression, e.g. ${"foo"}.
2757 	 * This form is pointless, but logically it ought to work.
2758 	 */
2759 	while (inull(*s))
2760 	    s++;
2761     }
2762     /*
2763      * We don't yet know whether a `:' introduces a history-style
2764      * colon modifier or qualifies something like ${...:=...}.
2765      * But if we remember the colon here it's easy to check later.
2766      */
2767     if ((colf = *s == ':'))
2768 	s++;
2769 
2770 
2771     /* fstr is to be the text following the substitution.  If we have *
2772      * braces, we look for it here, else we infer it later on.        */
2773     fstr = s;
2774     if (inbrace) {
2775 	int bct;
2776 	for (bct = 1; (c = *fstr); fstr++) {
2777 	    if (c == Inbrace)
2778 		bct++;
2779 	    else if (c == Outbrace && !--bct)
2780 		break;
2781 	}
2782 
2783 	if (bct) {
2784 	noclosebrace:
2785 	    zerr("closing brace expected");
2786 	    return NULL;
2787 	}
2788 	if (c)
2789 	    *fstr++ = '\0';
2790     }
2791 
2792     /* Check for ${..?..} or ${..=..} or one of those. *
2793      * Only works if the name is in braces.            */
2794 
2795     if (inbrace && ((c = *s) == '+' ||
2796 		    IS_DASH(c) ||
2797 		    c == ':' ||	/* i.e. a doubled colon */
2798 		    c == '=' || c == Equals ||
2799 		    c == '%' ||
2800 		    c == '#' || c == Pound ||
2801 		    c == '?' || c == Quest ||
2802 		    c == '/')) {
2803 
2804 	/*
2805 	 * Default index is 1 if no (I) or (I) gave zero.   But
2806 	 * why don't we set the default explicitly at the start
2807 	 * and massage any passed index where we set flnum anyway?
2808 	 */
2809 	if (!flnum)
2810 	    flnum++;
2811 	if (c == '%')
2812 	    flags |= SUB_END;
2813 
2814 	/* Check for ${..%%..} or ${..##..} */
2815 	if ((c == '%' || c == '#' || c == Pound) && c == s[1]) {
2816 	    s++;
2817 	    /* we have %%, not %, or ##, not # */
2818 	    flags |= SUB_LONG;
2819 	}
2820 	s++;
2821 	if (s[-1] == '/') {
2822 	    char *ptr;
2823 	    /*
2824 	     * previous flags are irrelevant, except for (S) which
2825 	     * indicates shortest substring; else look for longest.
2826 	     */
2827 	    flags = (flags & SUB_SUBSTR) ? 0 : SUB_LONG;
2828 	    if ((c = *s) == '/') {
2829 		/* doubled, so replace all occurrences */
2830 		flags |= SUB_GLOBAL;
2831 		c = *++s;
2832 	    }
2833 	    /* Check for anchored substitution */
2834 	    if (c == '#' || c == Pound) {
2835 		/*
2836 		 * anchor at head: this is the `normal' case in
2837 		 * getmatch and we only require the flag if SUB_END
2838 		 * is also present.
2839 		 */
2840 		flags |= SUB_START;
2841 		s++;
2842 	    }
2843 	    if (*s == '%') {
2844 		/* anchor at tail */
2845 		flags |= SUB_END;
2846 		s++;
2847 	    }
2848 	    if (!(flags & (SUB_START|SUB_END))) {
2849 		/* No anchor, so substring */
2850 		flags |= SUB_SUBSTR;
2851 	    }
2852 	    /*
2853 	     * Find the / marking the end of the search pattern.
2854 	     * If there isn't one, we're just going to delete that,
2855 	     * i.e. replace it with an empty string.
2856 	     *
2857 	     * We used to use double backslashes to quote slashes,
2858 	     * but actually that was buggy and using a single backslash
2859 	     * is easier and more obvious.
2860 	     */
2861 	    for (ptr = s; (c = *ptr) && c != '/'; ptr++)
2862 	    {
2863 		if ((c == Bnull || c == Bnullkeep || c == '\\') && ptr[1])
2864 		{
2865 		    if (ptr[1] == '/')
2866 			chuck(ptr);
2867 		    else
2868 			ptr++;
2869 		}
2870 	    }
2871 	    replstr = (*ptr && ptr[1]) ? ptr+1 : "";
2872 	    *ptr = '\0';
2873 	}
2874 
2875 	/* See if this was ${...:-...}, ${...:=...}, etc. */
2876 	if (colf)
2877 	    flags |= SUB_ALL;
2878 	/*
2879 	 * With no special flags, i.e. just a # or % or whatever,
2880 	 * the matched portion is removed and we keep the rest.
2881 	 * We also want the rest when we're doing a substitution.
2882 	 */
2883 	if (!(flags & (SUB_MATCH|SUB_REST|SUB_BIND|SUB_EIND|SUB_LEN)))
2884 	    flags |= SUB_REST;
2885 
2886 	/*
2887 	 * With ":" treat a value as unset if the variable is set but
2888 	 * - (array) contains no elements
2889 	 * - (scalar) contains an empty string
2890 	 */
2891 	if (colf && !vunset) {
2892 	    vunset = (isarr) ? !*aval : !*val || (*val == Nularg && !val[1]);
2893 	    vunset *= -1; /* Record that vunset was originally false */
2894 	}
2895 
2896 	switch (s[-1]) {
2897 	case '+':
2898 	    if (vunset) {
2899 		val = dupstring("");
2900 		copied = 1;
2901 		isarr = 0;
2902 		break;
2903 	    }
2904 	    vunset = 1;
2905 	/* Fall Through! */
2906 	case '-':
2907 	case Dash:
2908 	    if (vunset) {
2909 		int split_flags;
2910 		val = dupstring(s);
2911 		/* If word-splitting is enabled, we ask multsub() to split
2912 		 * the substituted string at unquoted whitespace.  Then, we
2913 		 * turn off spbreak so that no further splitting occurs.
2914 		 * This allows a construct such as ${1+"$@"} to correctly
2915 		 * keep its array splits, and weird constructs such as
2916 		 * ${str+"one two" "3 2 1" foo "$str"} to only be split
2917 		 * at the unquoted spaces. */
2918 		if (spbreak) {
2919 		    split_flags = PREFORK_SHWORDSPLIT;
2920 		    if (!aspar)
2921 			split_flags |= PREFORK_SPLIT;
2922 		} else {
2923 		    /*
2924 		     * It's not good enough not passing the flag to use
2925 		     * SHWORDSPLIT, because when we get to a nested
2926 		     * paramsubst we need to ignore isset(SHWORDSPLIT).
2927 		     */
2928 		    split_flags = PREFORK_NOSHWORDSPLIT;
2929 		}
2930 		multsub(&val, split_flags, (aspar ? NULL : &aval),
2931 			&isarr, NULL, &ms_flags);
2932 		copied = 1;
2933 		spbreak = 0;
2934 		/* Leave globsubst on if forced */
2935 		if (globsubst != 2)
2936 		    globsubst = 0;
2937 	    }
2938 	    break;
2939 	case ':':
2940 	    /* this must be `::=', unconditional assignment */
2941 	    if (*s != '=' && *s != Equals)
2942 		goto noclosebrace;
2943 	    vunset = 1;
2944 	    s++;
2945 	    /* Fall through */
2946 	case '=':
2947 	case Equals:
2948 	    if (vunset) {
2949 		char sav = *idend;
2950 		int l, split_flags;
2951 
2952 		*idend = '\0';
2953 		val = dupstring(s);
2954 		if (spsep || !arrasg) {
2955 		    /* POSIX requires PREFORK_SINGLE semantics here, but
2956 		     * traditional zsh used PREFORK_NOSHWORDSPLIT.  Base
2957 		     * behavior on caller choice of PREFORK_SHWORDSPLIT. */
2958 		    multsub(&val,
2959 			    spbreak ? PREFORK_SINGLE : PREFORK_NOSHWORDSPLIT,
2960 			    NULL, &isarr, NULL, &ms_flags);
2961 		} else {
2962 		    if (spbreak)
2963 			split_flags = PREFORK_SPLIT|PREFORK_SHWORDSPLIT;
2964 		    else
2965 			split_flags = PREFORK_NOSHWORDSPLIT;
2966 		    multsub(&val, split_flags, &aval, &isarr, NULL,
2967 			    &ms_flags);
2968 		    spbreak = 0;
2969 		}
2970 		if (arrasg) {
2971 		    /* This is an array assignment. */
2972 		    char *arr[2], **t, **a, **p;
2973 		    if (spsep || spbreak) {
2974 			aval = sepsplit(val, spsep, 0, 1);
2975 			isarr = nojoin ? 1 : 2;
2976 			l = arrlen(aval);
2977 			if (l && !*(aval[l-1]))
2978 			    l--;
2979 			if (l && !**aval)
2980 			    l--, t = aval + 1;
2981 			else
2982 			    t = aval;
2983 		    } else if (!isarr) {
2984 			if (!*val && arrasg > 1) {
2985 			    arr[0] = NULL;
2986 			    l = 0;
2987 			} else {
2988 			    arr[0] = val;
2989 			    arr[1] = NULL;
2990 			    l = 1;
2991 			}
2992 			t = aval = arr;
2993 		    } else
2994 			l = arrlen(aval), t = aval;
2995 		    p = a = zalloc(sizeof(char *) * (l + 1));
2996 		    while (l--) {
2997 			untokenize(*t);
2998 			*p++ = ztrdup(*t++);
2999 		    }
3000 		    *p++ = NULL;
3001 		    if (arrasg > 1) {
3002 			Param pm = sethparam(idbeg, a);
3003 			if (pm)
3004 			    aval = paramvalarr(pm->gsu.h->getfn(pm), hkeys|hvals);
3005 		    } else
3006 			setaparam(idbeg, a);
3007 		    isarr = 1;
3008 		    arrasg = 0;
3009 		} else {
3010 		    untokenize(val);
3011 		    setsparam(idbeg, ztrdup(val));
3012 		}
3013 		*idend = sav;
3014 		copied = 1;
3015 		if (isarr) {
3016 		    if (nojoin)
3017 			isarr = -1;
3018 		    if (qt && !getlen && isarr > 0 && !spsep && spbreak < 2) {
3019 			val = sepjoin(aval, sep, 1);
3020 			isarr = 0;
3021 		    }
3022 		    sep = spsep = NULL;
3023 		    spbreak = 0;
3024 		}
3025 	    }
3026 	    break;
3027 	case '?':
3028 	case Quest:
3029 	    if (vunset) {
3030                 if (isset(EXECOPT)) {
3031                     *idend = '\0';
3032                     zerr("%s: %s", idbeg, *s ? s : "parameter not set");
3033                     /*
3034                      * In interactive shell we need to return to
3035                      * top-level prompt --- don't clear this error
3036                      * after handling a command as we do with
3037                      * most errors.
3038                      */
3039                     errflag |= ERRFLAG_HARD;
3040                     if (!interact) {
3041                         if (mypid == getpid()) {
3042                             /*
3043                              * paranoia: don't check for jobs, but there
3044                              * shouldn't be any if not interactive.
3045                              */
3046                             stopmsg = 1;
3047                             zexit(1, ZEXIT_NORMAL);
3048                         } else
3049                             _exit(1);
3050                     }
3051                 }
3052 		return NULL;
3053 	    }
3054 	    break;
3055 	case '%':
3056 	case '#':
3057 	case Pound:
3058 	case '/':
3059             /* This once was executed only `if (qt) ...'. But with that
3060              * patterns in a expansion resulting from a ${(e)...} aren't
3061              * tokenized even though this function thinks they are (it thinks
3062              * they are because parse_subst_str() turns Qstring tokens
3063              * into String tokens and for unquoted parameter expansions the
3064              * lexer normally does tokenize patterns inside parameter
3065              * expansions). */
3066             {
3067 		int one = noerrs, oef = errflag, haserr;
3068 
3069 		if (!quoteerr)
3070 		    noerrs = 1;
3071 		haserr = parse_subst_string(s);
3072 		noerrs = one;
3073 		if (!quoteerr) {
3074 		    /* Retain user interrupt error status */
3075 		    errflag = oef | (errflag & ERRFLAG_INT);
3076 		    if (haserr)
3077 			shtokenize(s);
3078 		} else if (haserr || errflag) {
3079 		    zerr("parse error in ${...%c...} substitution", s[-1]);
3080 		    return NULL;
3081 		}
3082 	    }
3083 	    {
3084 #if 0
3085 		/*
3086 		 * This allows # and % to be at the start of
3087 		 * a parameter in the substitution, which is
3088 		 * a bit nasty, and can be done (although
3089 		 * less efficiently) with anchors.
3090 		 */
3091 
3092 		char t = s[-1];
3093 
3094 		singsub(&s);
3095 
3096 		if (t == '/' && (flags & SUB_SUBSTR)) {
3097 		    if ((c = *s) == '#' || c == '%') {
3098 			flags &= ~SUB_SUBSTR;
3099 			if (c == '%')
3100 			    flags |= SUB_END;
3101 			s++;
3102 		    } else if (c == '\\') {
3103 			s++;
3104 		    }
3105 		}
3106 #else
3107 		singsub(&s);
3108 #endif
3109 	    }
3110 
3111 	    /*
3112 	     * Either loop over an array doing replacements or
3113 	     * do the replacement on a string.
3114 	     *
3115 	     * We need an untokenized value for matching.
3116 	     */
3117 	    if (!vunset && isarr) {
3118 		char **ap;
3119 		if (!copied) {
3120 		    aval = arrdup(aval);
3121 		    copied = 1;
3122 		}
3123 		for (ap = aval; *ap; ap++) {
3124 		    untokenize(*ap);
3125 		}
3126 		getmatcharr(&aval, s, flags, flnum, replstr);
3127 	    } else {
3128 		if (vunset) {
3129 		    if (vunset > 0 && unset(UNSET)) {
3130 			*idend = '\0';
3131 			zerr("%s: parameter not set", idbeg);
3132 			return NULL;
3133 		    }
3134 		    val = dupstring("");
3135 		}
3136 		if (!copied) {
3137 		    val = dupstring(val);
3138 		    copied = 1;
3139 		    untokenize(val);
3140 		}
3141 		getmatch(&val, s, flags, flnum, replstr);
3142 	    }
3143 	    break;
3144 	}
3145     } else if (inbrace && (*s == '^' || *s == Hat)) {
3146 	char **zip;
3147 	int shortest = 1;
3148 	++s;
3149 	if (*s == '^' || *s == Hat) {
3150 	    shortest = 0;
3151 	    ++s;
3152 	}
3153 	if (*itype_end(s, IIDENT, 0)) {
3154 	    untokenize(s);
3155 	    zerr("not an identifier: %s", s);
3156 	    return NULL;
3157 	}
3158 	if (vunset) {
3159 	    if (vunset > 0 && unset(UNSET)) {
3160 		*idend = '\0';
3161 		zerr("%s: parameter not set", idbeg);
3162 		return NULL;
3163 	    }
3164 	    val = dupstring("");
3165 	} else {
3166 	    char *sval;
3167 	    zip = getaparam(s);
3168 	    if (!zip) {
3169 		sval = getsparam(s);
3170 		if (sval)
3171 		    zip = hmkarray(sval);
3172 	    }
3173 	    if (!isarr) {
3174 		aval = hmkarray(val);
3175 		isarr = 1;
3176 	    }
3177 	    if (zip) {
3178 		char **out;
3179 		int alen, ziplen, outlen, i = 0;
3180 		alen = arrlen(aval);
3181 		ziplen = arrlen(zip);
3182 		outlen = shortest ^ (alen > ziplen) ? alen : ziplen;
3183 		if (!shortest && (alen == 0 || ziplen == 0)) {
3184 		    if (ziplen)
3185 			aval = arrdup(zip);
3186 		} else {
3187 		    out = zhalloc(sizeof(char *) * (2 * outlen + 1));
3188 		    while (i < outlen) {
3189 			if (copied)
3190 			    out[i*2] = aval[i % alen];
3191 			else
3192 			    out[i*2] = dupstring(aval[i % alen]);
3193 			out[i*2+1] = dupstring(zip[i % ziplen]);
3194 			i++;
3195 		    }
3196 		    out[i*2] = NULL;
3197 		    aval = out;
3198 		    copied = 1;
3199 		}
3200 	    } else {
3201 		if (unset(UNSET)) {
3202 		    zerr("%s: parameter not set", s);
3203 		    return NULL;
3204 		}
3205 		val = dupstring("");
3206 	    }
3207 	}
3208     } else if (inbrace && (*s == '|' || *s == Bar ||
3209 			   *s == '*' || *s == Star)) {
3210 	int intersect = (*s == '*' || *s == Star);
3211 	char **compare, **ap, **apsrc;
3212 	++s;
3213 	if (*itype_end(s, IIDENT, 0)) {
3214 	    untokenize(s);
3215 	    zerr("not an identifier: %s", s);
3216 	    return NULL;
3217 	}
3218 	compare = getaparam(s);
3219 	if (compare) {
3220 	    HashTable ht = newuniqtable(arrlen(compare)+1);
3221 	    int present;
3222 	    for (ap = compare; *ap; ap++)
3223 		(void)addhashnode2(ht, *ap, (HashNode)
3224 				   zhalloc(sizeof(struct hashnode)));
3225 	    if (!vunset && isarr) {
3226 		if (!copied) {
3227 		    aval = arrdup(aval);
3228 		    copied = 1;
3229 		}
3230 		for (ap = apsrc = aval; *apsrc; apsrc++) {
3231 		    untokenize(*apsrc);
3232 		    present = (gethashnode2(ht, *apsrc) != NULL);
3233 		    if (intersect ? present : !present) {
3234 			if (ap != apsrc) {
3235 			    *ap = *apsrc;
3236 			}
3237 			ap++;
3238 		    }
3239 		}
3240 		*ap = NULL;
3241 	    } else {
3242 		if (vunset) {
3243 		    if (vunset > 0 && unset(UNSET)) {
3244 			*idend = '\0';
3245 			zerr("%s: parameter not set", idbeg);
3246 			deletehashtable(ht);
3247 			return NULL;
3248 		    }
3249 		    val = dupstring("");
3250 		} else {
3251 		    present = (gethashnode2(ht, val) != NULL);
3252 		    if (intersect ? !present : present)
3253 			val = dupstring("");
3254 		}
3255 	    }
3256 	    deletehashtable(ht);
3257 	} else if (intersect) {
3258 	    /*
3259 	     * The intersection with nothing is nothing...
3260 	     * Seems a bit pointless complaining that the first
3261 	     * expression is unset here if the second is, too.
3262 	     */
3263 	    if (!vunset) {
3264 		if (isarr) {
3265 		    aval = hmkarray(NULL);
3266 		} else {
3267 		    val = dupstring("");
3268 		}
3269 	    }
3270 	}
3271 	if (vunset) {
3272 	    if (vunset > 0 && unset(UNSET)) {
3273 		*idend = '\0';
3274 		zerr("%s: parameter not set", idbeg);
3275 		return NULL;
3276 	    }
3277 	    val = dupstring("");
3278 	}
3279     } else {			/* no ${...=...} or anything, but possible modifiers. */
3280 	/*
3281 	 * Handler ${+...}.  TODO: strange, why do we handle this only
3282 	 * if there isn't a trailing modifier?  Why don't we do this
3283 	 * e.g. when we handle the ${(t)...} flag?
3284 	 */
3285 	if (chkset) {
3286 	    val = dupstring(vunset ? "0" : "1");
3287 	    isarr = 0;
3288 	} else if (vunset) {
3289 	    if (vunset > 0 && unset(UNSET)) {
3290 		*idend = '\0';
3291 		zerr("%s: parameter not set", idbeg);
3292 		return NULL;
3293 	    }
3294 	    val = dupstring("");
3295 	}
3296 	if (colf && inbrace) {
3297 	    /*
3298 	     * Look for ${PARAM:OFFSET} or ${PARAM:OFFSET:LENGTH}.
3299 	     * This must appear before modifiers.  For compatibility
3300 	     * with bash we perform both standard string substitutions
3301 	     * and math eval.
3302 	     */
3303 	    char *check_offset2;
3304 	    char *check_offset = check_colon_subscript(s, &check_offset2);
3305 	    if (check_offset) {
3306 		zlong offset = mathevali(check_offset);
3307 		zlong length = 0;
3308 		int length_set = 0;
3309 		int offset_hack_argzero = 0;
3310 		if (errflag)
3311 		    return NULL;
3312 		if ((*check_offset2 && *check_offset2 != ':')) {
3313 		    zerr("invalid subscript: %s", check_offset);
3314 		    return NULL;
3315 		}
3316 		if (*check_offset2) {
3317 		    check_offset = check_colon_subscript(check_offset2 + 1,
3318 							 &check_offset2);
3319 		    if (*check_offset2 && *check_offset2 != ':') {
3320 			zerr("invalid length: %s", check_offset);
3321 			return NULL;
3322 		    }
3323 		    if (check_offset) {
3324 			length = mathevali(check_offset);
3325 			length_set = 1;
3326 			if (errflag)
3327 			    return NULL;
3328 		    }
3329 		}
3330 		if (isarr) {
3331 		    int alen, count;
3332 		    char **srcptr, **dstptr, **newarr;
3333 
3334 		    if (horrible_offset_hack) {
3335 			/*
3336 			 * As part of the 'orrible hoffset 'ack,
3337 			 * (what hare you? Han 'orrible hoffset 'ack,
3338 			 * sergeant major), if we are given a ksh/bash/POSIX
3339 			 * style positional parameter array which includes
3340 			 * offset 0, we use $0.
3341 			 */
3342 			if (offset == 0) {
3343 			    offset_hack_argzero = 1;
3344 			} else if (offset > 0) {
3345 			    offset--;
3346 			}
3347 		    }
3348 
3349 		    alen = arrlen(aval);
3350 		    if (offset < 0) {
3351 			offset += alen;
3352 			if (offset < 0)
3353 			    offset = 0;
3354 		    }
3355 		    if (offset_hack_argzero)
3356 			alen++;
3357 		    if (length_set) {
3358 			if (length < 0)
3359 			    length += alen - offset;
3360 			if (length < 0) {
3361 			    zerr("substring expression: %d < %d",
3362 			         (int)(length + offset), (int)offset);
3363 			    return NULL;
3364 			}
3365 		    } else
3366 			length = alen;
3367 		    if (offset > alen)
3368 			offset = alen;
3369 		    if (offset + length > alen)
3370 			length = alen - offset;
3371 		    count = length;
3372 		    srcptr = aval + offset;
3373 		    newarr = dstptr = (char **)
3374 			zhalloc((length+1)*sizeof(char *));
3375 		    if (count && offset_hack_argzero) {
3376 			*dstptr++ = dupstring(argzero);
3377 			count--;
3378 		    }
3379 		    while (count--)
3380 			*dstptr++ = dupstring(*srcptr++);
3381 		    *dstptr = (char *)NULL;
3382 		    aval = newarr;
3383 		} else {
3384 		    char *sptr, *eptr;
3385 		    int given_offset;
3386 		    if (offset < 0) {
3387 			MB_METACHARINIT();
3388 			for (sptr = val; *sptr; ) {
3389 			    sptr += MB_METACHARLEN(sptr);
3390 			    offset++;
3391 			}
3392 			if (offset < 0)
3393 			    offset = 0;
3394 		    }
3395 		    given_offset = offset;
3396 		    MB_METACHARINIT();
3397 		    if (length_set && length < 0)
3398 			length -= offset;
3399 		    for (sptr = val; *sptr && offset; ) {
3400 			sptr += MB_METACHARLEN(sptr);
3401 			offset--;
3402 		    }
3403 		    if (length_set) {
3404 			if (length < 0) {
3405 			    MB_METACHARINIT();
3406 			    for (eptr = val; *eptr; ) {
3407 				eptr += MB_METACHARLEN(eptr);
3408 				length++;
3409 			    }
3410 			    if (length < 0) {
3411 				zerr("substring expression: %d < %d",
3412 				     (int)(length + given_offset),
3413 				     (int)given_offset);
3414 				return NULL;
3415 			    }
3416 			}
3417 			for (eptr = sptr; *eptr && length; ) {
3418 			    eptr += MB_METACHARLEN(eptr);
3419 			    length--;
3420 			}
3421 			val = dupstrpfx(sptr, eptr - sptr);
3422 		    } else {
3423 			val = dupstring(sptr);
3424 		    }
3425 		}
3426 		if (!*check_offset2) {
3427 		    colf = 0;
3428 		} else {
3429 		    s = check_offset2 + 1;
3430 		}
3431 	    }
3432 	}
3433 	if (colf) {
3434 	    /*
3435 	     * History style colon modifiers.  May need to apply
3436 	     * on multiple elements of an array.
3437 	     */
3438 	    s--;
3439 	    if (unset(KSHARRAYS) || inbrace) {
3440 		if (!isarr)
3441 		    modify(&val, &s, inbrace);
3442 		else {
3443 		    char *ss;
3444 		    char **ap = aval;
3445 		    char **pp = aval = (char **) hcalloc(sizeof(char *) *
3446 							 (arrlen(aval) + 1));
3447 
3448 		    while ((*pp = *ap++)) {
3449 			ss = s;
3450 			modify(pp++, &ss, inbrace);
3451 		    }
3452 		    if (pp == aval) {
3453 			char *t = "";
3454 			ss = s;
3455 			modify(&t, &ss, inbrace);
3456 		    }
3457 		    s = ss;
3458 		}
3459 		copied = 1;
3460 		if (inbrace && *s) {
3461 		    if (*s == ':' && !imeta(s[1]))
3462 			zerr("unrecognized modifier `%c'", s[1]);
3463 		    else
3464 			zerr("unrecognized modifier");
3465 		    return NULL;
3466 		}
3467 	    }
3468 	}
3469 	if (!inbrace)
3470 	    fstr = s;
3471     }
3472     if (errflag)
3473 	return NULL;
3474     if (evalchar) {
3475 	int one = noerrs, oef = errflag, haserr = 0;
3476 
3477 	if (!quoteerr)
3478 	    noerrs = 1;
3479 	/*
3480 	 * Evaluate the value numerically and output the result as
3481 	 * a character.
3482 	 */
3483 	if (isarr) {
3484 	    char **aval2, **avptr, **av2ptr;
3485 
3486 	    aval2 = (char **)zhalloc((arrlen(aval)+1)*sizeof(char *));
3487 
3488 	    for (avptr = aval, av2ptr = aval2; *avptr; avptr++, av2ptr++)
3489 	    {
3490 		/* When noerrs = 1, the only error is out-of-memory */
3491 		if (!(*av2ptr = substevalchar(*avptr))) {
3492 		    haserr = 1;
3493 		    break;
3494 		}
3495 	    }
3496 	    *av2ptr = NULL;
3497 	    aval = aval2;
3498 	} else {
3499 	    /* When noerrs = 1, the only error is out-of-memory */
3500 	    if (!(val = substevalchar(val)))
3501 		haserr = 1;
3502 	}
3503 	noerrs = one;
3504 	if (!quoteerr) {
3505 	    /* Retain user interrupt error status */
3506 	    errflag = oef | (errflag & ERRFLAG_INT);
3507 	}
3508 	if (haserr || errflag)
3509 	    return NULL;
3510 	ms_flags = 0;
3511     }
3512     /*
3513      * This handles taking a length with ${#foo} and variations.
3514      * TODO: again. one might naively have thought this had the
3515      * same sort of effect as the ${(t)...} flag and the ${+...}
3516      * test, although in this case we do need the value rather
3517      * the parameter, so maybe it's a bit different.
3518      */
3519     if (getlen) {
3520 	long len = 0;
3521 	char buf[14];
3522 
3523 	if (isarr) {
3524 	    char **ctr;
3525 	    int sl = sep ? MB_METASTRLEN(sep) : 1;
3526 
3527 	    if (getlen == 1)
3528 		for (ctr = aval; *ctr; ctr++, len++);
3529 	    else if (getlen == 2) {
3530 		if (*aval)
3531 		    for (len = -sl, ctr = aval;
3532 			 len += sl + MB_METASTRLEN2(*ctr, multi_width),
3533 			     *++ctr;);
3534 	    }
3535 	    else
3536 		for (ctr = aval;
3537 		     *ctr;
3538 		     len += wordcount(*ctr, spsep, getlen > 3), ctr++);
3539 	} else {
3540 	    if (getlen < 3)
3541 		len = MB_METASTRLEN2(val, multi_width);
3542 	    else
3543 		len = wordcount(val, spsep, getlen > 3);
3544 	}
3545 
3546 	sprintf(buf, "%ld", len);
3547 	val = dupstring(buf);
3548 	isarr = 0;
3549 	ms_flags = 0;
3550     }
3551     /* At this point we make sure that our arrayness has affected the
3552      * arrayness of the linked list.  Then, we can turn our value into
3553      * a scalar for convenience sake without affecting the arrayness
3554      * of the resulting value.  ## This is the YUK chunk. ## */
3555     if (isarr)
3556 	l->list.flags |= LF_ARRAY;
3557     else
3558 	l->list.flags &= ~LF_ARRAY;
3559     if (isarr > 0 && !plan9 && (!aval || !aval[0])) {
3560 	val = dupstring("");
3561 	isarr = 0;
3562     } else if (isarr && aval && aval[0] && !aval[1]) {
3563 	/* treat a one-element array as a scalar for purposes of   *
3564 	 * concatenation with surrounding text (some${param}thing) *
3565 	 * and rc_expand_param handling.  Note: LF_ARRAY (above)   *
3566 	 * propagates the true array type from nested expansions.  */
3567 	val = aval[0];
3568 	isarr = 0;
3569     }
3570     /* This is where we may join arrays together, e.g. (j:,:) sets "sep", and
3571      * (afterward) may split the joined value (e.g. (s:-:) sets "spsep").  One
3572      * exception is that ${name:-word} and ${name:+word} will have already
3573      * done any requested splitting of the word value with quoting preserved.
3574      */
3575     if (ssub || spbreak || spsep || sep) {
3576 	int force_split = !ssub && (spbreak || spsep);
3577 	if (isarr) {
3578 	    /* sep non-null here means F or j flag, force join */
3579 	    if (nojoin == 0 || sep) {
3580 		val = sepjoin(aval, sep, 1);
3581 		isarr = 0;
3582 	    } else if (force_split &&
3583 		       (spsep || nojoin == 2 || (!ifs && isarr < 0))) {
3584 		/* Hack to simulate splitting individual elements:
3585 		 * forced joining as previously determined, or
3586 		 * join on what we later use to forcibly split
3587 		 */
3588 		val = sepjoin(aval, (nojoin == 1 ? NULL : spsep), 1);
3589 		isarr = 0;
3590 	    }
3591 	    if (!isarr)
3592 		ms_flags = 0;
3593 	}
3594 	if (force_split && !isarr) {
3595 	    aval = sepsplit(val, spsep, 0, 1);
3596 	    if (!aval || !aval[0])
3597 		val = dupstring("");
3598 	    else if (!aval[1])
3599 		val = aval[0];
3600 	    else
3601 		isarr = nojoin ? 1 : 2;
3602 	}
3603 	if (isarr)
3604 	    l->list.flags |= LF_ARRAY;
3605 	else
3606 	    l->list.flags &= ~LF_ARRAY;
3607     }
3608     /*
3609      * Perform case modififications.
3610      */
3611     if (casmod != CASMOD_NONE) {
3612 	copied = 1;		/* string is always modified by copy */
3613 	if (isarr) {
3614 	    char **ap, **ap2;
3615 
3616 	    ap = aval;
3617 	    ap2 = aval = (char **) zhalloc(sizeof(char *) * (arrlen(aval)+1));
3618 
3619 	    while (*ap)
3620 		*ap2++ = casemodify(*ap++, casmod);
3621 	    *ap2++ = NULL;
3622 	} else {
3623 	    val = casemodify(val, casmod);
3624 	}
3625     }
3626     /*
3627      * Process echo- and print-style escape sequences.
3628      */
3629     if (getkeys >= 0) {
3630 	int len;
3631 
3632 	copied = 1;		/* string is always copied */
3633 	if (isarr) {
3634 	    char **ap, **ap2;
3635 
3636 	    ap = aval;
3637 	    aval = (char **) zhalloc(sizeof(char *) * (arrlen(aval)+1));
3638 	    for (ap2 = aval; *ap; ap++, ap2++) {
3639 		*ap2 = getkeystring(*ap, &len, getkeys, NULL);
3640 		*ap2 = metafy(*ap2, len, META_USEHEAP);
3641 	    }
3642 	    *ap2++ = NULL;
3643 	} else {
3644 	    val = getkeystring(val, &len, getkeys, NULL);
3645 	    val = metafy(val, len, META_USEHEAP);
3646 	}
3647     }
3648     /*
3649      * Perform prompt-style modifications.
3650      */
3651     if (presc) {
3652 	int ops = opts[PROMPTSUBST], opb = opts[PROMPTBANG];
3653 	int opp = opts[PROMPTPERCENT];
3654 
3655 	if (presc < 2) {
3656 	    opts[PROMPTPERCENT] = 1;
3657 	    opts[PROMPTSUBST] = opts[PROMPTBANG] = 0;
3658 	}
3659 	/*
3660 	 * TODO:  It would be really quite nice to abstract the
3661 	 * isarr and !isarr code into a function which gets
3662 	 * passed a pointer to a function with the effect of
3663 	 * the promptexpand bit.  Then we could use this for
3664 	 * a lot of stuff and bury val/aval/isarr inside a structure
3665 	 * which gets passed to it.
3666 	 */
3667 	if (isarr) {
3668 	    char **ap;
3669 
3670 	    if (!copied)
3671 		aval = arrdup(aval), copied = 1;
3672 	    ap = aval;
3673 	    for (; *ap; ap++) {
3674 		char *tmps;
3675 		untokenize(*ap);
3676 		tmps = promptexpand(*ap, 0, NULL, NULL, NULL);
3677 		*ap = dupstring(tmps);
3678 		free(tmps);
3679 	    }
3680 	} else {
3681 	    char *tmps;
3682 	    if (!copied)
3683 		val = dupstring(val), copied = 1;
3684 	    untokenize(val);
3685 	    tmps = promptexpand(val, 0, NULL, NULL, NULL);
3686 	    val = dupstring(tmps);
3687 	    free(tmps);
3688 	}
3689 	opts[PROMPTSUBST] = ops;
3690 	opts[PROMPTBANG] = opb;
3691 	opts[PROMPTPERCENT] = opp;
3692     }
3693     /*
3694      * One of the possible set of quotes to apply, depending on
3695      * the repetitions of the (q) flag.
3696      */
3697     if (quotemod) {
3698 	int pre = 0, post = 0;
3699 
3700 	if (quotemod > 0) {
3701 	    switch (quotetype)
3702 	    {
3703 	    case QT_DOLLARS:
3704 		/* space for "$" */
3705 		pre = 2;
3706 		post = 1;
3707 		break;
3708 
3709 	    case QT_SINGLE_OPTIONAL:
3710 		/* quotes will be added for us */
3711 	    case QT_BACKSLASH:
3712 	    case QT_BACKSLASH_PATTERN:
3713 		/* no quotes */
3714 		break;
3715 
3716 	    default:
3717 		pre = post = 1;
3718 		break;
3719 	    }
3720 	}
3721 	if (isarr) {
3722 	    char **ap;
3723 
3724 	    if (!copied)
3725 		aval = arrdup(aval), copied = 1;
3726 	    ap = aval;
3727 
3728 	    if (quotemod > 0) {
3729 		if (quotetype == QT_QUOTEDZPUTS) {
3730 		    for (; *ap; ap++)
3731 			*ap = quotedzputs(*ap, NULL);
3732 		} else if (quotetype > QT_BACKSLASH) {
3733 		    int sl;
3734 		    char *tmp;
3735 
3736 		    for (; *ap; ap++) {
3737 			tmp = quotestring(*ap, quotetype);
3738 			sl = strlen(tmp);
3739 			*ap = (char *) zhalloc(pre + sl + post + 1);
3740 			strcpy((*ap) + pre, tmp);
3741 			if (pre)
3742 			    ap[0][pre - 1] = ap[0][pre + sl] =
3743 				(quotetype != QT_DOUBLE ? '\'' : '"');
3744 			ap[0][pre + sl + post] = '\0';
3745 			if (quotetype == QT_DOLLARS)
3746 			  ap[0][0] = '$';
3747 		    }
3748 		} else
3749 		    for (; *ap; ap++)
3750 			*ap = quotestring(*ap, QT_BACKSLASH_SHOWNULL);
3751 	    } else {
3752 		int one = noerrs, oef = errflag, haserr = 0;
3753 
3754 		if (!quoteerr)
3755 		    noerrs = 1;
3756 		for (; *ap; ap++) {
3757 		    haserr |= parse_subst_string(*ap);
3758 		    remnulargs(*ap);
3759 		    untokenize(*ap);
3760 		}
3761 		noerrs = one;
3762 		if (!quoteerr) {
3763 		    /* Retain any user interrupt error status */
3764 		    errflag = oef | (errflag & ERRFLAG_INT);
3765 		}
3766 		else if (haserr || errflag) {
3767 		    zerr("parse error in parameter value");
3768 		    return NULL;
3769 		}
3770 	    }
3771 	} else {
3772 	    if (!copied)
3773 		val = dupstring(val), copied = 1;
3774 	    if (quotemod > 0) {
3775 		if (quotetype == QT_QUOTEDZPUTS) {
3776 		    val = quotedzputs(val, NULL);
3777 		} else if (quotetype > QT_BACKSLASH) {
3778 		    int sl;
3779 		    char *tmp;
3780 		    tmp = quotestring(val, quotetype);
3781 		    sl = strlen(tmp);
3782 		    val = (char *) zhalloc(pre + sl + post + 1);
3783 		    strcpy(val + pre, tmp);
3784 		    if (pre)
3785 			val[pre - 1] = val[pre + sl] =
3786 			    (quotetype != QT_DOUBLE ? '\'' : '"');
3787 		    val[pre + sl + post] = '\0';
3788 		    if (quotetype == QT_DOLLARS)
3789 		      val[0] = '$';
3790 		} else
3791 		    val = quotestring(val, QT_BACKSLASH_SHOWNULL);
3792 	    } else {
3793 		int one = noerrs, oef = errflag, haserr;
3794 
3795 		if (!quoteerr)
3796 		    noerrs = 1;
3797 		haserr = parse_subst_string(val);
3798 		noerrs = one;
3799 		if (!quoteerr) {
3800 		    /* Retain any user interrupt error status */
3801 		    errflag = oef | (errflag & ERRFLAG_INT);
3802 		}
3803 		else if (haserr || errflag) {
3804 		    zerr("parse error in parameter value");
3805 		    return NULL;
3806 		}
3807 		remnulargs(val);
3808 		untokenize(val);
3809 	    }
3810 	}
3811     }
3812     /*
3813      * Transform special characters in the string to make them
3814      * printable, or to show directories, or possibly even both.
3815      */
3816     if (mods) {
3817 	if (isarr) {
3818 	    char **ap;
3819 	    if (!copied)
3820 		aval = arrdup(aval), copied = 1;
3821 	    for (ap = aval; *ap; ap++) {
3822 		if (mods & 1)
3823 		    *ap = substnamedir(*ap);
3824 		if (mods & 2)
3825 		    *ap = nicedupstring(*ap);
3826 	    }
3827 	} else {
3828 	    if (!copied)
3829 		val = dupstring(val), copied = 1;
3830 	    if (mods & 1)
3831 		val = substnamedir(val);
3832 	    if (mods & 2)
3833 		val = nicedupstring(val);
3834 	}
3835     }
3836     /*
3837      * Nothing particularly to do with SH_WORD_SPLIT --- this
3838      * performs lexical splitting on a string as specified by
3839      * the (z) flag.
3840      */
3841     if (shsplit) {
3842 	LinkList list = NULL;
3843 
3844 	if (isarr) {
3845 	    char **ap;
3846 	    for (ap = aval; *ap; ap++) {
3847 		untokenize(*ap);
3848 		list = bufferwords(list, *ap, NULL, shsplit);
3849 	    }
3850 	    isarr = 0;
3851 	} else {
3852 	    untokenize(val);
3853 	    list = bufferwords(NULL, val, NULL, shsplit);
3854 	}
3855 
3856 	if (!list || !firstnode(list))
3857 	    val = dupstring("");
3858 	else if (!nextnode(firstnode(list)))
3859 	    val = getdata(firstnode(list));
3860 	else {
3861 	    aval = hlinklist2array(list, 0);
3862 	    isarr = nojoin ? 1 : 2;
3863 	    l->list.flags |= LF_ARRAY;
3864 	}
3865 	copied = 1;
3866     }
3867     /*
3868      * TODO: hmm.  At this point we have to be on our toes about
3869      * whether we're putting stuff into a line or not, i.e.
3870      * we don't want to do this from a recursive call.
3871      * Rather than passing back flags in a non-trivial way, maybe
3872      * we could decide on the basis of flags passed down to us.
3873      *
3874      * This is the ideal place to do any last-minute conversion from
3875      * array to strings.  However, given all the transformations we've
3876      * already done, probably if it's going to be done it will already
3877      * have been.  (I'd really like to keep everying in aval or
3878      * equivalent and only locally decide if we need to treat it
3879      * as a scalar.)
3880      */
3881 
3882     if (isarr && ssub) {
3883 	/* prefork() wants a scalar, so join no matter what else */
3884 	val = sepjoin(aval, NULL, 1);
3885 	isarr = 0;
3886 	l->list.flags &= ~LF_ARRAY;
3887     }
3888 
3889     /*
3890      * If a multsub result had whitespace at the start and we're
3891      * splitting and there's a previous string, now's the time to do so.
3892      */
3893     if ((ms_flags & MULTSUB_WS_AT_START) && aptr > ostr) {
3894 	insertlinknode(l, n, dupstrpfx(ostr, aptr - ostr)), incnode(n);
3895 	ostr = aptr;
3896     }
3897     /* Likewise at the end */
3898     if ((ms_flags & MULTSUB_WS_AT_END) && *fstr) {
3899 	insertlinknode(l, n, dupstring(fstr)); /* appended, no incnode */
3900 	*fstr = '\0';
3901     }
3902     if (arrasg && !isarr) {
3903 	/*
3904 	 * Caller requested this be forced to an array even if scalar.
3905 	 * Any point in distinguishing arrasg == 2 (assoc array) here?
3906 	 */
3907 	l->list.flags |= LF_ARRAY;
3908 	aval = hmkarray(val);
3909 	isarr = 1;
3910 	DPUTS(!val, "value is NULL in paramsubst, empty array");
3911     }
3912     if (isarr) {
3913 	char *x;
3914 	char *y;
3915 	int xlen;
3916 	int i;
3917 	LinkNode on = n;
3918 
3919 	/* Handle the (u) flag; we need this before the next test */
3920 	if (unique) {
3921 	    if(!copied)
3922 		aval = arrdup(aval);
3923 
3924 	    i = arrlen(aval);
3925 	    if (i > 1)
3926 		zhuniqarray(aval);
3927 	}
3928 	if ((!aval[0] || !aval[1]) && !plan9) {
3929 	    /*
3930 	     * Empty array or single element.  Currently you only
3931 	     * get a single element array at this point from the
3932 	     * unique expansion above. but we can potentially
3933 	     * have other reasons.
3934 	     *
3935 	     * The following test removes the markers
3936 	     * from surrounding double quotes, but I don't know why
3937 	     * that's necessary.
3938 	     */
3939 	    int vallen;
3940 	    if (aptr > (char *) getdata(n) &&
3941 		aptr[-1] == Dnull && *fstr == Dnull)
3942 		*--aptr = '\0', fstr++;
3943 	    vallen = aval[0] ? strlen(aval[0]) : 0;
3944 	    y = (char *) hcalloc((aptr - ostr) + vallen + strlen(fstr) + 1);
3945 	    strcpy(y, ostr);
3946 	    *str = y + (aptr - ostr);
3947 	    if (vallen)
3948 	    {
3949 		strcpy(*str, aval[0]);
3950 		*str += vallen;
3951 	    }
3952 	    strcpy(*str, fstr);
3953 	    setdata(n, y);
3954 	    return n;
3955 	}
3956 	/* Handle (o) and (O) and their variants */
3957 	if (sortit != SORTIT_ANYOLDHOW) {
3958 	    if (!copied)
3959 		aval = arrdup(aval);
3960 	    if (indord) {
3961 		if (sortit & SORTIT_BACKWARDS) {
3962 		    char *copy;
3963 		    char **end = aval + arrlen(aval) - 1, **start = aval;
3964 
3965 		    /* reverse the array */
3966 		    while (start < end) {
3967 			copy = *end;
3968 			*end-- = *start;
3969 			*start++ = copy;
3970 		    }
3971 		}
3972 	    } else {
3973 		/*
3974 		 * HERE: we tested if the last element of the array
3975 		 * was not a NULL string.  Why the last element?
3976 		 * Why didn't we expect NULL strings to work?
3977 		 * Was it just a clumsy way of testing whether there
3978 		 * was enough in the array to sort?
3979 		 */
3980 		strmetasort(aval, sortit, NULL);
3981 	    }
3982 	}
3983 	if (plan9) {
3984 	    /* Handle RC_EXPAND_PARAM */
3985 	    LinkNode tn;
3986 	    local_list1(tl);
3987 
3988 	    *--fstr = Marker;
3989 	    init_list1(tl, fstr);
3990 	    if (!eval && !stringsubst(&tl, firstnode(&tl), ssub, ret_flags, 0))
3991 		return NULL;
3992 	    *str = aptr;
3993 	    tn = firstnode(&tl);
3994 	    while ((x = *aval++)) {
3995 		if (prenum || postnum)
3996 		    x = dopadding(x, prenum, postnum, preone, postone,
3997 				  premul, postmul
3998 #ifdef MULTIBYTE_SUPPORT
3999 				  , multi_width
4000 #endif
4001 			);
4002 		if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
4003 		    return NULL;
4004 		xlen = strlen(x);
4005 		for (tn = firstnode(&tl);
4006 		     tn && *(y = (char *) getdata(tn)) == Marker;
4007 		     incnode(tn)) {
4008 		    strcatsub(&y, ostr, aptr, x, xlen, y + 1, globsubst,
4009 			      copied);
4010 		    if (qt && !*y && isarr != 2)
4011 			y = dupstring(nulstring);
4012 		    if (plan9)
4013 			setdata(n, (void *) y), plan9 = 0;
4014 		    else
4015 			insertlinknode(l, n, (void *) y), incnode(n);
4016 		}
4017 	    }
4018 	    for (; tn; incnode(tn)) {
4019 		y = (char *) getdata(tn);
4020 		if (*y == Marker)
4021 		    continue;
4022 		if (qt && !*y && isarr != 2)
4023 		    y = dupstring(nulstring);
4024 		if (plan9)
4025 		    setdata(n, (void *) y), plan9 = 0;
4026 		else
4027 		    insertlinknode(l, n, (void *) y), incnode(n);
4028 	    }
4029 	    if (plan9) {
4030 		uremnode(l, n);
4031 		return n;
4032 	    }
4033 	} else {
4034 	    /*
4035 	     * Not RC_EXPAND_PARAM: simply join the first and
4036 	     * last values.
4037 	     * TODO: how about removing the restriction that
4038 	     * aval[1] is non-NULL to promote consistency?, or
4039 	     * simply changing the test so that we drop into
4040 	     * the scalar branch, instead of tricking isarr?
4041 	     */
4042 	    x = aval[0];
4043 	    if (prenum || postnum)
4044 		x = dopadding(x, prenum, postnum, preone, postone,
4045 			      premul, postmul
4046 #ifdef MULTIBYTE_SUPPORT
4047 			      , multi_width
4048 #endif
4049 		    );
4050 	    if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
4051 		return NULL;
4052 	    xlen = strlen(x);
4053 	    strcatsub(&y, ostr, aptr, x, xlen, NULL, globsubst, copied);
4054 	    if (qt && !*y && isarr != 2)
4055 		y = dupstring(nulstring);
4056 	    setdata(n, (void *) y);
4057 
4058 	    i = 1;
4059 	    /* aval[1] is non-null here */
4060 	    while (aval[i + 1]) {
4061 		x = aval[i++];
4062 		if (prenum || postnum)
4063 		    x = dopadding(x, prenum, postnum, preone, postone,
4064 				  premul, postmul
4065 #ifdef MULTIBYTE_SUPPORT
4066 				  , multi_width
4067 #endif
4068 			);
4069 		if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
4070 		    return NULL;
4071 		if (qt && !*x && isarr != 2)
4072 		    y = dupstring(nulstring);
4073 		else {
4074 		    y = dupstring(x);
4075 		    if (globsubst)
4076 			shtokenize(y);
4077 		}
4078 		insertlinknode(l, n, (void *) y), incnode(n);
4079 	    }
4080 
4081 	    x = aval[i];
4082 	    if (prenum || postnum)
4083 		x = dopadding(x, prenum, postnum, preone, postone,
4084 			      premul, postmul
4085 #ifdef MULTIBYTE_SUPPORT
4086 			      , multi_width
4087 #endif
4088 		    );
4089 	    if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
4090 		return NULL;
4091 	    xlen = strlen(x);
4092 	    *str = strcatsub(&y, aptr, aptr, x, xlen, fstr, globsubst, copied);
4093 	    if (qt && !*y && isarr != 2)
4094 		y = dupstring(nulstring);
4095 	    insertlinknode(l, n, (void *) y), incnode(n);
4096 	}
4097 	/* This used to omit restoring of *str and instead test
4098 	 *   if (eval)
4099 	 *       n = on;
4100 	 * but that causes strange behavior of history modifiers when
4101 	 * applied across all values of an array.  What is magic about
4102 	 * eval here that *str seemed not to need restoring?
4103 	 */
4104 	*str = getdata(n = on);
4105     } else {
4106 	/*
4107 	 * Scalar value.  Handle last minute transformations
4108 	 * such as left- or right-padding and the (e) flag to
4109 	 * revaluate the result.
4110 	 */
4111 	int xlen;
4112 	char *x;
4113 	char *y;
4114 
4115 	x = val;
4116 	if (!x) {
4117 	    /* Shouldn't have got here with a NULL string. */
4118 	    DPUTS(1, "value is NULL in paramsubst");
4119 	    return NULL;
4120 	}
4121 	if (prenum || postnum)
4122 	    x = dopadding(x, prenum, postnum, preone, postone,
4123 			  premul, postmul
4124 #ifdef MULTIBYTE_SUPPORT
4125 			  , multi_width
4126 #endif
4127 		);
4128 	if (eval && subst_parse_str(&x, (qt && !nojoin), quoteerr))
4129 	    return NULL;
4130 	xlen = strlen(x);
4131 	*str = strcatsub(&y, ostr, aptr, x, xlen, fstr, globsubst, copied);
4132 	if (qt && !*y)
4133 	    y = dupstring(nulstring);
4134 	setdata(n, (void *) y);
4135     }
4136     if (eval)
4137 	*str = (char *) getdata(n);
4138 
4139     return n;
4140 }
4141 
4142 /*
4143  * Arithmetic substitution: `a' is the string to be evaluated, `bptr'
4144  * points to the beginning of the string containing it.  The tail of
4145  * the string is given by `rest'. *bptr is modified with the substituted
4146  * string. The function returns a pointer to the tail in the substituted
4147  * string.
4148  */
4149 
4150 /**/
4151 static char *
arithsubst(char * a,char ** bptr,char * rest)4152 arithsubst(char *a, char **bptr, char *rest)
4153 {
4154     char *s = *bptr, *t;
4155     char buf[BDIGBUFSIZE], *b;
4156     mnumber v;
4157 
4158     singsub(&a);
4159     v = matheval(a);
4160     if ((v.type & MN_FLOAT) && !outputradix)
4161 	b = convfloat_underscore(v.u.d, outputunderscore);
4162     else {
4163 	if (v.type & MN_FLOAT)
4164 	    v.u.l = (zlong) v.u.d;
4165 	b = convbase_underscore(buf, v.u.l, outputradix, outputunderscore);
4166     }
4167     t = *bptr = (char *) hcalloc(strlen(*bptr) + strlen(b) +
4168 				 strlen(rest) + 1);
4169     t--;
4170     while ((*++t = *s++));
4171     t--;
4172     while ((*++t = *b++));
4173     strcat(t, rest);
4174     return t;
4175 }
4176 
4177 /* This function implements colon modifiers.
4178  *
4179  * STR is an in/out parameter.  On entry it is the string (e.g., path)
4180  * to modified.  On return it is the modified path.
4181  *
4182  * PTR is an in/out parameter.  On entry it contains the string of colon
4183  * modifiers.  On return it points past the last recognised modifier.
4184  *
4185  * INBRACE is non-zero if we are in some form of a bracketed or
4186  * parenthesised expression; it is zero for modifiers ocurring
4187  * in an an unbracketed variable substitution.  This means that
4188  * $foo:t222 is treated ias ${foo:t}222 rather than ${foo:t222}
4189  * for backward compatibility.
4190  *
4191  * Example:
4192  *     ENTRY:   *str is "."   *ptr is ":AN"
4193  *     RETURN:  *str is "/home/foobar" (equal to $PWD)   *ptr points to the "N"
4194  */
4195 
4196 /**/
4197 void
modify(char ** str,char ** ptr,int inbrace)4198 modify(char **str, char **ptr, int inbrace)
4199 {
4200     char *ptr1, *ptr2, *ptr3, *lptr, c, *test, *sep, *t, *tt, tc, *e;
4201     char *copy, *all, *tmp, sav, sav1, *ptr1end;
4202     int gbal, wall, rec, al, nl, charlen, dellen;
4203     convchar_t del;
4204 
4205     test = NULL;
4206 
4207     if (**ptr == ':')
4208 	*str = dupstring(*str);
4209 
4210     while (**ptr == ':') {
4211 	int count = 0;
4212 
4213 	lptr = *ptr;
4214 	(*ptr)++;
4215 	wall = gbal = 0;
4216 	rec = 1;
4217 	c = '\0';
4218 	sep = NULL;
4219 
4220 	for (; !c && **ptr;) {
4221 	    switch (**ptr) {
4222             case 'a':
4223             case 'A':
4224 	    case 'c':
4225 	    case 'r':
4226 	    case 'e':
4227 	    case 'l':
4228 	    case 'u':
4229 	    case 'q':
4230 	    case 'Q':
4231 	    case 'P':
4232 		c = **ptr;
4233 		break;
4234 
4235 	    case 'h':
4236 	    case 't':
4237 		c = **ptr;
4238 		if (inbrace && idigit((*ptr)[1])) {
4239 		    do {
4240 			count = 10 * count + ((*ptr)[1] - '0');
4241 			++(*ptr);
4242 		    } while (idigit((*ptr)[1]));
4243 		}
4244 		break;
4245 
4246 	    case 's':
4247 		c = **ptr;
4248 		(*ptr)++;
4249 		ptr1 = *ptr;
4250 		MB_METACHARINIT();
4251 		charlen = MB_METACHARLENCONV(ptr1, &del);
4252 #ifdef MULTIBYTE_SUPPORT
4253 		if (del == WEOF)
4254 		    del = (wint_t)((*ptr1 == Meta) ? ptr1[1] ^ 32 : *ptr1);
4255 #endif
4256 		ptr1 += charlen;
4257 		for (ptr2 = ptr1, charlen = 0; *ptr2; ptr2 += charlen) {
4258 		    convchar_t del2;
4259 		    if ((*ptr2 == Bnull || *ptr2 == '\\') && ptr2[1]) {
4260 			/* in double quotes, the backslash isn't tokenized */
4261 			if (*ptr2 == '\\')
4262 			    *ptr2 = Bnull;
4263 			charlen = 2;
4264 			continue;
4265 		    }
4266 		    charlen = MB_METACHARLENCONV(ptr2, &del2);
4267 #ifdef MULTIBYTE_SUPPORT
4268 		    if (del2 == WEOF)
4269 			del2 = (wint_t)((*ptr2 == Meta) ?
4270 					ptr2[1] ^ 32 : *ptr2);
4271 #endif
4272 		    if (del2 == del)
4273 			break;
4274 		}
4275 		if (!*ptr2) {
4276 		    zerr("bad substitution");
4277 		    return;
4278 		}
4279 		ptr1end = ptr2;
4280 		ptr2 += charlen;
4281 		sav1 = *ptr1end;
4282 		*ptr1end = '\0';
4283 		for (ptr3 = ptr2, charlen = 0; *ptr3; ptr3 += charlen) {
4284 		    convchar_t del3;
4285 		    if ((*ptr3 == Bnull || *ptr3 == '\\') && ptr3[1]) {
4286 			/* in double quotes, the backslash isn't tokenized */
4287 			if (*ptr3 == '\\')
4288 			    *ptr3 = Bnull;
4289 			charlen = 2;
4290 			continue;
4291 		    }
4292 		    charlen = MB_METACHARLENCONV(ptr3, &del3);
4293 #ifdef MULTIBYTE_SUPPORT
4294 		    if (del3 == WEOF)
4295 			del3 = (wint_t)((*ptr3 == Meta) ?
4296 					ptr3[1] ^ 32 : *ptr3);
4297 #endif
4298 		    if (del3 == del)
4299 			break;
4300 		}
4301 		sav = *ptr3;
4302 		*ptr3 = '\0';
4303 		if (*ptr1) {
4304 		    zsfree(hsubl);
4305 		    hsubl = ztrdup(ptr1);
4306  		}
4307 		if (!hsubl) {
4308 		    zerr("no previous substitution");
4309 		    return;
4310 		}
4311 		zsfree(hsubr);
4312 		for (tt = hsubl; *tt; tt++)
4313 		    if (inull(*tt) && *tt != Bnullkeep)
4314 			chuck(tt--);
4315 		if (!isset(HISTSUBSTPATTERN))
4316 		    untokenize(hsubl);
4317 		for (tt = hsubr = ztrdup(ptr2); *tt; tt++) {
4318 		    if (inull(*tt) && *tt != Bnullkeep) {
4319 			if (*tt == Bnull && (tt[1] == '&' || tt[1] == '\\')) {
4320 			    /*
4321 			     * The substitution will treat \& and \\
4322 			     * specially.  We need to leave real \'s
4323 			     * as the first character for this to work.
4324 			     */
4325 			    *tt = '\\';
4326 			} else {
4327 			    chuck(tt--);
4328 			}
4329 		    }
4330 		}
4331 		*ptr1end = sav1;
4332 		*ptr3 = sav;
4333 		*ptr = ptr3 - 1;
4334 		if (*ptr3) {
4335 		    /* Final terminator is optional. */
4336 		    *ptr += charlen;
4337 		}
4338 		break;
4339 
4340 	    case '&':
4341 		c = 's';
4342 		break;
4343 
4344 	    case 'g':
4345 		(*ptr)++;
4346 		gbal = 1;
4347 		break;
4348 
4349 	    case 'w':
4350 		wall = 1;
4351 		(*ptr)++;
4352 		break;
4353 	    case 'W':
4354 		wall = 1;
4355 		(*ptr)++;
4356 		ptr1 = get_strarg(ptr2 = *ptr, &charlen);
4357 		if ((sav = *ptr1))
4358 		    *ptr1 = '\0';
4359 		sep = dupstring(ptr2 + charlen);
4360 		if (sav)
4361 		    *ptr1 = sav;
4362 		*ptr = ptr1 + charlen;
4363 		c = '\0';
4364 		break;
4365 
4366 	    case 'f':
4367 		rec = -1;
4368 		(*ptr)++;
4369 		break;
4370 	    case 'F':
4371 		(*ptr)++;
4372 		rec = get_intarg(ptr, &dellen);
4373 		break;
4374 	    default:
4375 		*ptr = lptr;
4376 		return;
4377 	    }
4378 	}
4379 	(*ptr)++;
4380 	if (!c) {
4381 	    *ptr = lptr;
4382 	    return;
4383 	}
4384 	if (rec < 0)
4385 	    test = dupstring(*str);
4386 
4387 	while (rec--) {
4388 	    if (wall) {
4389 		al = 0;
4390 		all = NULL;
4391 		for (t = e = *str; (tt = findword(&e, sep));) {
4392 		    tc = *e;
4393 		    *e = '\0';
4394 		    if (c != 'l' && c != 'u')
4395 			copy = dupstring(tt);
4396 		    *e = tc;
4397 		    switch (c) {
4398                     case 'a':
4399 			chabspath(&copy);
4400 			break;
4401 		    case 'A':
4402 			chrealpath(&copy);
4403 			break;
4404 		    case 'c':
4405 		    {
4406 			char *copy2 = equalsubstr(copy, 0, 0);
4407 			if (copy2)
4408 			    copy = copy2;
4409 			break;
4410 		    }
4411 		    case 'h':
4412 			remtpath(&copy, count);
4413 			break;
4414 		    case 'r':
4415 			remtext(&copy);
4416 			break;
4417 		    case 'e':
4418 			rembutext(&copy);
4419 			break;
4420 		    case 't':
4421 			remlpaths(&copy, count);
4422 			break;
4423 		    case 'l':
4424 			copy = casemodify(tt, CASMOD_LOWER);
4425 			break;
4426 		    case 'u':
4427 			copy = casemodify(tt, CASMOD_UPPER);
4428 			break;
4429 		    case 's':
4430 			if (hsubl && hsubr)
4431 			    subst(&copy, hsubl, hsubr, gbal);
4432 			break;
4433 		    case 'q':
4434 			copy = quotestring(copy, QT_BACKSLASH_SHOWNULL);
4435 			break;
4436 		    case 'Q':
4437 			{
4438 			    int one = noerrs, oef = errflag;
4439 
4440 			    noerrs = 1;
4441 			    parse_subst_string(copy);
4442 			    noerrs = one;
4443 			    /* Retain any user interrupt error status */
4444 			    errflag = oef | (errflag & ERRFLAG_INT);
4445 			    remnulargs(copy);
4446 			    untokenize(copy);
4447 			}
4448 			break;
4449 		    case 'P':
4450 			if (*copy != '/') {
4451 			    char *here = zgetcwd();
4452 			    if (here[strlen(here)-1] != '/')
4453 				copy = zhtricat(metafy(here, -1, META_HEAPDUP), "/", copy);
4454 			    else
4455 				copy = dyncat(here, copy);
4456 			}
4457 			copy = xsymlink(copy, 1);
4458 			break;
4459 		    }
4460 		    tc = *tt;
4461 		    *tt = '\0';
4462 		    nl = al + strlen(t) + strlen(copy);
4463 		    ptr1 = tmp = (char *)zhalloc(nl + 1);
4464 		    if (all)
4465 			for (ptr2 = all; *ptr2;)
4466 			    *ptr1++ = *ptr2++;
4467 		    for (ptr2 = t; *ptr2;)
4468 			*ptr1++ = *ptr2++;
4469 		    *tt = tc;
4470 		    for (ptr2 = copy; *ptr2;)
4471 			*ptr1++ = *ptr2++;
4472 		    *ptr1 = '\0';
4473 		    al = nl;
4474 		    all = tmp;
4475 		    t = e;
4476 		}
4477 		if (!all)
4478 		    *str = dupstring("");
4479 		else
4480 		    *str = all;
4481 
4482 	    } else {
4483 		switch (c) {
4484 		case 'a':
4485 		    chabspath(str);
4486 		    break;
4487 		case 'A':
4488 		    chrealpath(str);
4489 		    break;
4490 		case 'c':
4491 		{
4492 		    char *copy2 = equalsubstr(*str, 0, 0);
4493 		    if (copy2)
4494 			*str = copy2;
4495 		    break;
4496 		}
4497 		case 'h':
4498 		    remtpath(str, count);
4499 		    break;
4500 		case 'r':
4501 		    remtext(str);
4502 		    break;
4503 		case 'e':
4504 		    rembutext(str);
4505 		    break;
4506 		case 't':
4507 		    remlpaths(str, count);
4508 		    break;
4509 		case 'l':
4510 		    *str = casemodify(*str, CASMOD_LOWER);
4511 		    break;
4512 		case 'u':
4513 		    *str = casemodify(*str, CASMOD_UPPER);
4514 		    break;
4515 		case 's':
4516 		    if (hsubl && hsubr)
4517 			subst(str, hsubl, hsubr, gbal);
4518 		    break;
4519 		case 'q':
4520 		    *str = quotestring(*str, QT_BACKSLASH);
4521 		    break;
4522 		case 'Q':
4523 		    {
4524 			int one = noerrs, oef = errflag;
4525 
4526 			noerrs = 1;
4527 			parse_subst_string(*str);
4528 			noerrs = one;
4529 			/* Retain any user interrupt error status */
4530 			errflag = oef | (errflag & ERRFLAG_INT);
4531 			remnulargs(*str);
4532 			untokenize(*str);
4533 		    }
4534 		    break;
4535 		case 'P':
4536 		    if (**str != '/') {
4537 			char *here = zgetcwd();
4538 			if (here[strlen(here)-1] != '/')
4539 			    *str = zhtricat(metafy(here, -1, META_HEAPDUP), "/", *str);
4540 			else
4541 			    *str = dyncat(here, *str);
4542 		    }
4543 		    *str = xsymlink(*str, 1);
4544 		    break;
4545 		}
4546 	    }
4547 	    if (rec < 0) {
4548 		if (!strcmp(test, *str))
4549 		    rec = 0;
4550 		else
4551 		    test = dupstring(*str);
4552 	    }
4553 	}
4554     }
4555 }
4556 
4557 /* get a directory stack entry */
4558 
4559 /**/
4560 static char *
dstackent(char ch,int val)4561 dstackent(char ch, int val)
4562 {
4563     int backwards;
4564     LinkNode end=(LinkNode)dirstack, n;
4565 
4566     backwards = ch == (isset(PUSHDMINUS) ? '+' : '-');
4567     if(!backwards && !val--)
4568 	return pwd;
4569     if (backwards)
4570 	for (n=lastnode(dirstack); n != end && val; val--, n=prevnode(n));
4571     else
4572 	for (end=NULL, n=firstnode(dirstack); n && val; val--, n=nextnode(n));
4573     if (n == end) {
4574 	if (backwards && !val)
4575 	    return pwd;
4576 	if (isset(NOMATCH))
4577 	    zerr("not enough directory stack entries.");
4578 	return NULL;
4579     }
4580     return (char *)getdata(n);
4581 }
4582