xref: /original-bsd/bin/csh/dol.c (revision a91856c6)
1 /*-
2  * Copyright (c) 1980, 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  */
7 
8 #ifndef lint
9 static char sccsid[] = "@(#)dol.c	5.8 (Berkeley) 04/04/91";
10 #endif /* not lint */
11 
12 #include "sh.h"
13 
14 /*
15  * C shell
16  */
17 
18 /*
19  * These routines perform variable substitution and quoting via ' and ".
20  * To this point these constructs have been preserved in the divided
21  * input words.  Here we expand variables and turn quoting via ' and " into
22  * QUOTE bits on characters (which prevent further interpretation).
23  * If the `:q' modifier was applied during history expansion, then
24  * some QUOTEing may have occurred already, so we dont "trim()" here.
25  */
26 
27 int	Dpeekc, Dpeekrd;		/* Peeks for DgetC and Dreadc */
28 char	*Dcp, **Dvp;			/* Input vector for Dreadc */
29 
30 #define	DEOF	-1
31 
32 #define	unDgetC(c)	Dpeekc = c
33 
34 #define QUOTES		(_Q|_Q1|_ESC)	/* \ ' " ` */
35 
36 /*
37  * The following variables give the information about the current
38  * $ expansion, recording the current word position, the remaining
39  * words within this expansion, the count of remaining words, and the
40  * information about any : modifier which is being applied.
41  */
42 char	*dolp;			/* Remaining chars from this word */
43 char	**dolnxt;		/* Further words */
44 int	dolcnt;			/* Count of further words */
45 char	dolmod;			/* : modifier character */
46 int	dolmcnt;		/* :gx -> 10000, else 1 */
47 
48 /*
49  * Fix up the $ expansions and quotations in the
50  * argument list to command t.
51  */
52 Dfix(t)
53 	register struct command *t;
54 {
55 	register char **pp;
56 	register char *p;
57 
58 	if (noexec)
59 		return;
60 	/* Note that t_dcom isn't trimmed thus !...:q's aren't lost */
61 	for (pp = t->t_dcom; p = *pp++;)
62 		while (*p)
63 			if (cmap(*p++, _DOL|QUOTES)) {	/* $, \, ', ", ` */
64 				Dfix2(t->t_dcom);	/* found one */
65 				blkfree(t->t_dcom);
66 				t->t_dcom = gargv;
67 				gargv = 0;
68 				return;
69 			}
70 }
71 
72 /*
73  * $ substitute one word, for i/o redirection
74  */
75 char *
76 Dfix1(cp)
77 	register char *cp;
78 {
79 	char *Dv[2];
80 
81 	if (noexec)
82 		return (0);
83 	Dv[0] = cp; Dv[1] = NOSTR;
84 	Dfix2(Dv);
85 	if (gargc != 1) {
86 		setname(cp);
87 		bferr("Ambiguous");
88 	}
89 	cp = savestr(gargv[0]);
90 	blkfree(gargv), gargv = 0;
91 	return (cp);
92 }
93 
94 /*
95  * Subroutine to do actual fixing after state initialization.
96  */
97 Dfix2(v)
98 	char **v;
99 {
100 	ginit();			/* Initialize glob's area pointers */
101 	Dvp = v; Dcp = "";		/* Setup input vector for Dreadc */
102 	unDgetC(0); unDredc(0);		/* Clear out any old peeks (at error) */
103 	dolp = 0; dolcnt = 0;		/* Clear out residual $ expands (...) */
104 	while (Dword())
105 		continue;
106 }
107 
108 /*
109  * Get a word.  This routine is analogous to the routine
110  * word() in sh.lex.c for the main lexical input.  One difference
111  * here is that we don't get a newline to terminate our expansion.
112  * Rather, DgetC will return a DEOF when we hit the end-of-input.
113  */
114 Dword()
115 {
116 	register int c, c1;
117 	char wbuf[BUFSIZ];
118 	register char *wp = wbuf;
119 	register int i = BUFSIZ - 4;
120 	register bool dolflg;
121 	bool sofar = 0;
122 
123 loop:
124 	c = DgetC(DODOL);
125 	switch (c) {
126 
127 	case DEOF:
128 deof:
129 		if (sofar == 0)
130 			return (0);
131 		/* finish this word and catch the code above the next time */
132 		unDredc(c);
133 		/* fall into ... */
134 
135 	case '\n':
136 		*wp = 0;
137 		goto ret;
138 
139 	case ' ':
140 	case '\t':
141 		goto loop;
142 
143 	case '`':
144 		/* We preserve ` quotations which are done yet later */
145 		*wp++ = c, --i;
146 	case '\'':
147 	case '"':
148 		/*
149 		 * Note that DgetC never returns a QUOTES character
150 		 * from an expansion, so only true input quotes will
151 		 * get us here or out.
152 		 */
153 		c1 = c;
154 		dolflg = c1 == '"' ? DODOL : 0;
155 		for (;;) {
156 			c = DgetC(dolflg);
157 			if (c == c1)
158 				break;
159 			if (c == '\n' || c == DEOF)
160 				error("Unmatched %c", c1);
161 			if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE))
162 				--wp, ++i;
163 			if (--i <= 0)
164 				goto toochars;
165 			switch (c1) {
166 
167 			case '"':
168 				/*
169 				 * Leave any `s alone for later.
170 				 * Other chars are all quoted, thus `...`
171 				 * can tell it was within "...".
172 				 */
173 				*wp++ = c == '`' ? '`' : c | QUOTE;
174 				break;
175 
176 			case '\'':
177 				/* Prevent all further interpretation */
178 				*wp++ = c | QUOTE;
179 				break;
180 
181 			case '`':
182 				/* Leave all text alone for later */
183 				*wp++ = c;
184 				break;
185 			}
186 		}
187 		if (c1 == '`')
188 			*wp++ = '`', --i;
189 		goto pack;		/* continue the word */
190 
191 	case '\\':
192 		c = DgetC(0);		/* No $ subst! */
193 		if (c == '\n' || c == DEOF)
194 			goto loop;
195 		c |= QUOTE;
196 		break;
197 	}
198 	unDgetC(c);
199 pack:
200 	sofar = 1;
201 	/* pack up more characters in this word */
202 	for (;;) {
203 		c = DgetC(DODOL);
204 		if (c == '\\') {
205 			c = DgetC(0);
206 			if (c == DEOF)
207 				goto deof;
208 			if (c == '\n')
209 				c = ' ';
210 			else
211 				c |= QUOTE;
212 		}
213 		if (c == DEOF)
214 			goto deof;
215 		if (cmap(c, _SP|_NL|_Q|_Q1)) {		/* sp \t\n'"` */
216 			unDgetC(c);
217 			if (cmap(c, QUOTES))
218 				goto loop;
219 			*wp++ = 0;
220 			goto ret;
221 		}
222 		if (--i <= 0)
223 toochars:
224 			error("Word too long");
225 		*wp++ = c;
226 	}
227 ret:
228 	Gcat("", wbuf);
229 	return (1);
230 }
231 
232 /*
233  * Get a character, performing $ substitution unless flag is 0.
234  * Any QUOTES character which is returned from a $ expansion is
235  * QUOTEd so that it will not be recognized above.
236  */
237 DgetC(flag)
238 	register int flag;
239 {
240 	register int c;
241 
242 top:
243 	if (c = Dpeekc) {
244 		Dpeekc = 0;
245 		return (c);
246 	}
247 	if (lap) {
248 		c = *lap++ & (QUOTE|TRIM);
249 		if (c == 0) {
250 			lap = 0;
251 			goto top;
252 		}
253 quotspec:
254 		if (cmap(c, QUOTES))
255 			return (c | QUOTE);
256 		return (c);
257 	}
258 	if (dolp) {
259 		if (c = *dolp++ & (QUOTE|TRIM))
260 			goto quotspec;
261 		if (dolcnt > 0) {
262 			setDolp(*dolnxt++);
263 			--dolcnt;
264 			return (' ');
265 		}
266 		dolp = 0;
267 	}
268 	if (dolcnt > 0) {
269 		setDolp(*dolnxt++);
270 		--dolcnt;
271 		goto top;
272 	}
273 	c = Dredc();
274 	if (c == '$' && flag) {
275 		Dgetdol();
276 		goto top;
277 	}
278 	return (c);
279 }
280 
281 char	*nulvec[] = { 0 };
282 struct	varent nulargv = { nulvec, "argv", 0 };
283 
284 /*
285  * Handle the multitudinous $ expansion forms.
286  * Ugh.
287  */
288 Dgetdol()
289 {
290 	register char *np;
291 	register struct varent *vp;
292 	char name[20];
293 	int c, sc;
294 	int subscr = 0, lwb = 1, upb = 0;
295 	bool dimen = 0, bitset = 0;
296 	char wbuf[BUFSIZ];
297 
298 	dolmod = dolmcnt = 0;
299 	c = sc = DgetC(0);
300 	if (c == '{')
301 		c = DgetC(0);		/* sc is { to take } later */
302 	if ((c & TRIM) == '#')
303 		dimen++, c = DgetC(0);		/* $# takes dimension */
304 	else if (c == '?')
305 		bitset++, c = DgetC(0);		/* $? tests existence */
306 	switch (c) {
307 
308 	case '$':
309 		if (dimen || bitset)
310 			goto syntax;		/* No $?$, $#$ */
311 		setDolp(doldol);
312 		goto eatbrac;
313 
314 	case '<'|QUOTE:
315 		if (dimen || bitset)
316 			goto syntax;		/* No $?<, $#< */
317 		for (np = wbuf; read(OLDSTD, np, 1) == 1; np++) {
318 			if (np >= &wbuf[BUFSIZ-1])
319 				error("$< line too long");
320 			if (*np <= 0 || *np == '\n')
321 				break;
322 		}
323 		*np = 0;
324 		/*
325 		 * KLUDGE: dolmod is set here because it will
326 		 * cause setDolp to call domod and thus to copy wbuf.
327 		 * Otherwise setDolp would use it directly. If we saved
328 		 * it ourselves, no one would know when to free it.
329 		 * The actual function of the 'q' causes filename
330 		 * expansion not to be done on the interpolated value.
331 		 */
332 		dolmod = 'q';
333 		dolmcnt = 10000;
334 		setDolp(wbuf);
335 		goto eatbrac;
336 
337 	case DEOF:
338 	case '\n':
339 		goto syntax;
340 
341 	case '*':
342 		(void) strcpy(name, "argv");
343 		vp = adrof("argv");
344 		subscr = -1;			/* Prevent eating [...] */
345 		break;
346 
347 	default:
348 		np = name;
349 		if (digit(c)) {
350 			if (dimen)
351 				goto syntax;	/* No $#1, e.g. */
352 			subscr = 0;
353 			do {
354 				subscr = subscr * 10 + c - '0';
355 				c = DgetC(0);
356 			} while (digit(c));
357 			unDredc(c);
358 			if (subscr < 0)
359 				goto oob;
360 			if (subscr == 0) {
361 				if (bitset) {
362 					dolp = file ? "1" : "0";
363 					goto eatbrac;
364 				}
365 				if (file == 0)
366 					error("No file for $0");
367 				setDolp(file);
368 				goto eatbrac;
369 			}
370 			if (bitset)
371 				goto syntax;
372 			vp = adrof("argv");
373 			if (vp == 0) {
374 				vp = &nulargv;
375 				goto eatmod;
376 			}
377 			break;
378 		}
379 		if (!alnum(c))
380 			goto syntax;
381 		for (;;) {
382 			*np++ = c;
383 			c = DgetC(0);
384 			if (!alnum(c))
385 				break;
386 			if (np >= &name[sizeof name - 2])
387 syntax:
388 				error("Variable syntax");
389 		}
390 		*np++ = 0;
391 		unDredc(c);
392 		vp = adrof(name);
393 	}
394 	if (bitset) {
395 		dolp = (vp || getenv(name)) ? "1" : "0";
396 		goto eatbrac;
397 	}
398 	if (vp == 0) {
399 		np = getenv(name);
400 		if (np) {
401 			addla(np);
402 			goto eatbrac;
403 		}
404 		udvar(name);
405 		/*NOTREACHED*/
406 	}
407 	c = DgetC(0);
408 	upb = blklen(vp->vec);
409 	if (dimen == 0 && subscr == 0 && c == '[') {
410 		np = name;
411 		for (;;) {
412 			c = DgetC(DODOL);	/* Allow $ expand within [ ] */
413 			if (c == ']')
414 				break;
415 			if (c == '\n' || c == DEOF)
416 				goto syntax;
417 			if (np >= &name[sizeof name - 2])
418 				goto syntax;
419 			*np++ = c;
420 		}
421 		*np = 0, np = name;
422 		if (dolp || dolcnt)		/* $ exp must end before ] */
423 			goto syntax;
424 		if (!*np)
425 			goto syntax;
426 		if (digit(*np)) {
427 			register int i = 0;
428 
429 			while (digit(*np))
430 				i = i * 10 + *np++ - '0';
431 			if ((i < 0 || i > upb) && !index("-*", *np)) {
432 oob:
433 				setname(vp->v_name);
434 				error("Subscript out of range");
435 			}
436 			lwb = i;
437 			if (!*np)
438 				upb = lwb, np = "*";
439 		}
440 		if (*np == '*')
441 			np++;
442 		else if (*np != '-')
443 			goto syntax;
444 		else {
445 			register int i = upb;
446 
447 			np++;
448 			if (digit(*np)) {
449 				i = 0;
450 				while (digit(*np))
451 					i = i * 10 + *np++ - '0';
452 				if (i < 0 || i > upb)
453 					goto oob;
454 			}
455 			if (i < lwb)
456 				upb = lwb - 1;
457 			else
458 				upb = i;
459 		}
460 		if (lwb == 0) {
461 			if (upb != 0)
462 				goto oob;
463 			upb = -1;
464 		}
465 		if (*np)
466 			goto syntax;
467 	} else {
468 		if (subscr > 0)
469 			if (subscr > upb)
470 				lwb = 1, upb = 0;
471 			else
472 				lwb = upb = subscr;
473 		unDredc(c);
474 	}
475 	if (dimen) {
476 		char *cp = putn(upb - lwb + 1);
477 
478 		addla(cp);
479 		xfree(cp);
480 	} else {
481 eatmod:
482 		c = DgetC(0);
483 		if (c == ':') {
484 			c = DgetC(0), dolmcnt = 1;
485 			if (c == 'g')
486 				c = DgetC(0), dolmcnt = 10000;
487 			if (!index("htrqxe", c))
488 				error("Bad : mod in $");
489 			dolmod = c;
490 			if (c == 'q')
491 				dolmcnt = 10000;
492 		} else
493 			unDredc(c);
494 		dolnxt = &vp->vec[lwb - 1];
495 		dolcnt = upb - lwb + 1;
496 	}
497 eatbrac:
498 	if (sc == '{') {
499 		c = Dredc();
500 		if (c != '}')
501 			goto syntax;
502 	}
503 }
504 
505 setDolp(cp)
506 	register char *cp;
507 {
508 	register char *dp;
509 
510 	if (dolmod == 0 || dolmcnt == 0) {
511 		dolp = cp;
512 		return;
513 	}
514 	dp = domod(cp, dolmod);
515 	if (dp) {
516 		dolmcnt--;
517 		addla(dp);
518 		xfree(dp);
519 	} else
520 		addla(cp);
521 	dolp = "";
522 	if (err)
523 		error(err);
524 }
525 
526 unDredc(c)
527 	int c;
528 {
529 
530 	Dpeekrd = c;
531 }
532 
533 Dredc()
534 {
535 	register int c;
536 
537 	if (c = Dpeekrd) {
538 		Dpeekrd = 0;
539 		return (c);
540 	}
541 	if (Dcp && (c = *Dcp++))
542 		return (c&(QUOTE|TRIM));
543 	if (*Dvp == 0) {
544 		Dcp = 0;
545 		return (DEOF);
546 	}
547 	Dcp = *Dvp++;
548 	return (' ');
549 }
550 
551 Dtestq(c)
552 	register int c;
553 {
554 
555 	if (cmap(c, QUOTES))
556 		gflag = 1;
557 }
558 
559 /*
560  * Form a shell temporary file (in unit 0) from the words
561  * of the shell input up to EOF or a line the same as "term".
562  * Unit 0 should have been closed before this call.
563  */
564 heredoc(term)
565 	char *term;
566 {
567 	register int c;
568 	char *Dv[2];
569 	char obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ];
570 	int ocnt, lcnt, mcnt;
571 	register char *lbp, *obp, *mbp;
572 	char **vp;
573 	bool quoted;
574 
575 	if (creat(shtemp, 0600) < 0)
576 		Perror(shtemp);
577 	(void) close(0);
578 	if (open(shtemp, 2) < 0) {
579 		int oerrno = errno;
580 
581 		(void) unlink(shtemp);
582 		errno = oerrno;
583 		Perror(shtemp);
584 	}
585 	(void) unlink(shtemp);			/* 0 0 inode! */
586 	Dv[0] = term; Dv[1] = NOSTR; gflag = 0;
587 	trim(Dv); rscan(Dv, Dtestq); quoted = gflag;
588 	ocnt = BUFSIZ; obp = obuf;
589 	for (;;) {
590 		/*
591 		 * Read up a line
592 		 */
593 		lbp = lbuf; lcnt = BUFSIZ - 4;
594 		for (;;) {
595 			c = readc(1);		/* 1 -> Want EOF returns */
596 			if (c < 0 || c == '\n')
597 				break;
598 			if (c &= TRIM) {
599 				*lbp++ = c;
600 				if (--lcnt < 0) {
601 					setname("<<");
602 					error("Line overflow");
603 				}
604 			}
605 		}
606 		*lbp = 0;
607 
608 		/*
609 		 * Check for EOF or compare to terminator -- before expansion
610 		 */
611 		if (c < 0 || eq(lbuf, term)) {
612 			(void) write(0, obuf, BUFSIZ - ocnt);
613 			(void) lseek(0, (off_t)0, 0);
614 			return;
615 		}
616 
617 		/*
618 		 * If term was quoted or -n just pass it on
619 		 */
620 		if (quoted || noexec) {
621 			*lbp++ = '\n'; *lbp = 0;
622 			for (lbp = lbuf; c = *lbp++;) {
623 				*obp++ = c;
624 				if (--ocnt == 0) {
625 					(void) write(0, obuf, BUFSIZ);
626 					obp = obuf; ocnt = BUFSIZ;
627 				}
628 			}
629 			continue;
630 		}
631 
632 		/*
633 		 * Term wasn't quoted so variable and then command
634 		 * expand the input line
635 		 */
636 		Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4;
637 		for (;;) {
638 			c = DgetC(DODOL);
639 			if (c == DEOF)
640 				break;
641 			if ((c &= TRIM) == 0)
642 				continue;
643 			/* \ quotes \ $ ` here */
644 			if (c =='\\') {
645 				c = DgetC(0);
646 				if (!index("$\\`", c))
647 					unDgetC(c | QUOTE), c = '\\';
648 				else
649 					c |= QUOTE;
650 			}
651 			*mbp++ = c;
652 			if (--mcnt == 0) {
653 				setname("<<");
654 				bferr("Line overflow");
655 			}
656 		}
657 		*mbp++ = 0;
658 
659 		/*
660 		 * If any ` in line do command substitution
661 		 */
662 		mbp = mbuf;
663 		if (index(mbp, '`')) {
664 			/*
665 			 * 1 arg to dobackp causes substitution to be literal.
666 			 * Words are broken only at newlines so that all blanks
667 			 * and tabs are preserved.  Blank lines (null words)
668 			 * are not discarded.
669 			 */
670 			vp = dobackp(mbuf, 1);
671 		} else
672 			/* Setup trivial vector similar to return of dobackp */
673 			Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv;
674 
675 		/*
676 		 * Resurrect the words from the command substitution
677 		 * each separated by a newline.  Note that the last
678 		 * newline of a command substitution will have been
679 		 * discarded, but we put a newline after the last word
680 		 * because this represents the newline after the last
681 		 * input line!
682 		 */
683 		for (; *vp; vp++) {
684 			for (mbp = *vp; *mbp; mbp++) {
685 				*obp++ = *mbp & TRIM;
686 				if (--ocnt == 0) {
687 					(void) write(0, obuf, BUFSIZ);
688 					obp = obuf; ocnt = BUFSIZ;
689 				}
690 			}
691 			*obp++ = '\n';
692 			if (--ocnt == 0) {
693 				(void) write(0, obuf, BUFSIZ);
694 				obp = obuf; ocnt = BUFSIZ;
695 			}
696 		}
697 		if (pargv)
698 			blkfree(pargv), pargv = 0;
699 	}
700 }
701