xref: /original-bsd/bin/csh/dol.c (revision 95a66346)
1 /*
2  * Copyright (c) 1980 Regents of the University of California.
3  * All rights reserved.  The Berkeley Software License Agreement
4  * specifies the terms and conditions for redistribution.
5  */
6 
7 #ifndef lint
8 static char sccsid[] = "@(#)dol.c	5.7 (Berkeley) 03/14/91";
9 #endif /* not lint */
10 
11 #include "sh.h"
12 
13 /*
14  * C shell
15  */
16 
17 /*
18  * These routines perform variable substitution and quoting via ' and ".
19  * To this point these constructs have been preserved in the divided
20  * input words.  Here we expand variables and turn quoting via ' and " into
21  * QUOTE bits on characters (which prevent further interpretation).
22  * If the `:q' modifier was applied during history expansion, then
23  * some QUOTEing may have occurred already, so we dont "trim()" here.
24  */
25 
26 int	Dpeekc, Dpeekrd;		/* Peeks for DgetC and Dreadc */
27 char	*Dcp, **Dvp;			/* Input vector for Dreadc */
28 
29 #define	DEOF	-1
30 
31 #define	unDgetC(c)	Dpeekc = c
32 
33 #define QUOTES		(_Q|_Q1|_ESC)	/* \ ' " ` */
34 
35 /*
36  * The following variables give the information about the current
37  * $ expansion, recording the current word position, the remaining
38  * words within this expansion, the count of remaining words, and the
39  * information about any : modifier which is being applied.
40  */
41 char	*dolp;			/* Remaining chars from this word */
42 char	**dolnxt;		/* Further words */
43 int	dolcnt;			/* Count of further words */
44 char	dolmod;			/* : modifier character */
45 int	dolmcnt;		/* :gx -> 10000, else 1 */
46 
47 /*
48  * Fix up the $ expansions and quotations in the
49  * argument list to command t.
50  */
51 Dfix(t)
52 	register struct command *t;
53 {
54 	register char **pp;
55 	register char *p;
56 
57 	if (noexec)
58 		return;
59 	/* Note that t_dcom isn't trimmed thus !...:q's aren't lost */
60 	for (pp = t->t_dcom; p = *pp++;)
61 		while (*p)
62 			if (cmap(*p++, _DOL|QUOTES)) {	/* $, \, ', ", ` */
63 				Dfix2(t->t_dcom);	/* found one */
64 				blkfree(t->t_dcom);
65 				t->t_dcom = gargv;
66 				gargv = 0;
67 				return;
68 			}
69 }
70 
71 /*
72  * $ substitute one word, for i/o redirection
73  */
74 char *
75 Dfix1(cp)
76 	register char *cp;
77 {
78 	char *Dv[2];
79 
80 	if (noexec)
81 		return (0);
82 	Dv[0] = cp; Dv[1] = NOSTR;
83 	Dfix2(Dv);
84 	if (gargc != 1) {
85 		setname(cp);
86 		bferr("Ambiguous");
87 	}
88 	cp = savestr(gargv[0]);
89 	blkfree(gargv), gargv = 0;
90 	return (cp);
91 }
92 
93 /*
94  * Subroutine to do actual fixing after state initialization.
95  */
96 Dfix2(v)
97 	char **v;
98 {
99 	ginit();			/* Initialize glob's area pointers */
100 	Dvp = v; Dcp = "";		/* Setup input vector for Dreadc */
101 	unDgetC(0); unDredc(0);		/* Clear out any old peeks (at error) */
102 	dolp = 0; dolcnt = 0;		/* Clear out residual $ expands (...) */
103 	while (Dword())
104 		continue;
105 }
106 
107 /*
108  * Get a word.  This routine is analogous to the routine
109  * word() in sh.lex.c for the main lexical input.  One difference
110  * here is that we don't get a newline to terminate our expansion.
111  * Rather, DgetC will return a DEOF when we hit the end-of-input.
112  */
113 Dword()
114 {
115 	register int c, c1;
116 	char wbuf[BUFSIZ];
117 	register char *wp = wbuf;
118 	register int i = BUFSIZ - 4;
119 	register bool dolflg;
120 	bool sofar = 0;
121 
122 loop:
123 	c = DgetC(DODOL);
124 	switch (c) {
125 
126 	case DEOF:
127 deof:
128 		if (sofar == 0)
129 			return (0);
130 		/* finish this word and catch the code above the next time */
131 		unDredc(c);
132 		/* fall into ... */
133 
134 	case '\n':
135 		*wp = 0;
136 		goto ret;
137 
138 	case ' ':
139 	case '\t':
140 		goto loop;
141 
142 	case '`':
143 		/* We preserve ` quotations which are done yet later */
144 		*wp++ = c, --i;
145 	case '\'':
146 	case '"':
147 		/*
148 		 * Note that DgetC never returns a QUOTES character
149 		 * from an expansion, so only true input quotes will
150 		 * get us here or out.
151 		 */
152 		c1 = c;
153 		dolflg = c1 == '"' ? DODOL : 0;
154 		for (;;) {
155 			c = DgetC(dolflg);
156 			if (c == c1)
157 				break;
158 			if (c == '\n' || c == DEOF)
159 				error("Unmatched %c", c1);
160 			if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE))
161 				--wp, ++i;
162 			if (--i <= 0)
163 				goto toochars;
164 			switch (c1) {
165 
166 			case '"':
167 				/*
168 				 * Leave any `s alone for later.
169 				 * Other chars are all quoted, thus `...`
170 				 * can tell it was within "...".
171 				 */
172 				*wp++ = c == '`' ? '`' : c | QUOTE;
173 				break;
174 
175 			case '\'':
176 				/* Prevent all further interpretation */
177 				*wp++ = c | QUOTE;
178 				break;
179 
180 			case '`':
181 				/* Leave all text alone for later */
182 				*wp++ = c;
183 				break;
184 			}
185 		}
186 		if (c1 == '`')
187 			*wp++ = '`', --i;
188 		goto pack;		/* continue the word */
189 
190 	case '\\':
191 		c = DgetC(0);		/* No $ subst! */
192 		if (c == '\n' || c == DEOF)
193 			goto loop;
194 		c |= QUOTE;
195 		break;
196 	}
197 	unDgetC(c);
198 pack:
199 	sofar = 1;
200 	/* pack up more characters in this word */
201 	for (;;) {
202 		c = DgetC(DODOL);
203 		if (c == '\\') {
204 			c = DgetC(0);
205 			if (c == DEOF)
206 				goto deof;
207 			if (c == '\n')
208 				c = ' ';
209 			else
210 				c |= QUOTE;
211 		}
212 		if (c == DEOF)
213 			goto deof;
214 		if (cmap(c, _SP|_NL|_Q|_Q1)) {		/* sp \t\n'"` */
215 			unDgetC(c);
216 			if (cmap(c, QUOTES))
217 				goto loop;
218 			*wp++ = 0;
219 			goto ret;
220 		}
221 		if (--i <= 0)
222 toochars:
223 			error("Word too long");
224 		*wp++ = c;
225 	}
226 ret:
227 	Gcat("", wbuf);
228 	return (1);
229 }
230 
231 /*
232  * Get a character, performing $ substitution unless flag is 0.
233  * Any QUOTES character which is returned from a $ expansion is
234  * QUOTEd so that it will not be recognized above.
235  */
236 DgetC(flag)
237 	register int flag;
238 {
239 	register int c;
240 
241 top:
242 	if (c = Dpeekc) {
243 		Dpeekc = 0;
244 		return (c);
245 	}
246 	if (lap) {
247 		c = *lap++ & (QUOTE|TRIM);
248 		if (c == 0) {
249 			lap = 0;
250 			goto top;
251 		}
252 quotspec:
253 		if (cmap(c, QUOTES))
254 			return (c | QUOTE);
255 		return (c);
256 	}
257 	if (dolp) {
258 		if (c = *dolp++ & (QUOTE|TRIM))
259 			goto quotspec;
260 		if (dolcnt > 0) {
261 			setDolp(*dolnxt++);
262 			--dolcnt;
263 			return (' ');
264 		}
265 		dolp = 0;
266 	}
267 	if (dolcnt > 0) {
268 		setDolp(*dolnxt++);
269 		--dolcnt;
270 		goto top;
271 	}
272 	c = Dredc();
273 	if (c == '$' && flag) {
274 		Dgetdol();
275 		goto top;
276 	}
277 	return (c);
278 }
279 
280 char	*nulvec[] = { 0 };
281 struct	varent nulargv = { nulvec, "argv", 0 };
282 
283 /*
284  * Handle the multitudinous $ expansion forms.
285  * Ugh.
286  */
287 Dgetdol()
288 {
289 	register char *np;
290 	register struct varent *vp;
291 	char name[20];
292 	int c, sc;
293 	int subscr = 0, lwb = 1, upb = 0;
294 	bool dimen = 0, bitset = 0;
295 	char wbuf[BUFSIZ];
296 
297 	dolmod = dolmcnt = 0;
298 	c = sc = DgetC(0);
299 	if (c == '{')
300 		c = DgetC(0);		/* sc is { to take } later */
301 	if ((c & TRIM) == '#')
302 		dimen++, c = DgetC(0);		/* $# takes dimension */
303 	else if (c == '?')
304 		bitset++, c = DgetC(0);		/* $? tests existence */
305 	switch (c) {
306 
307 	case '$':
308 		if (dimen || bitset)
309 			goto syntax;		/* No $?$, $#$ */
310 		setDolp(doldol);
311 		goto eatbrac;
312 
313 	case '<'|QUOTE:
314 		if (dimen || bitset)
315 			goto syntax;		/* No $?<, $#< */
316 		for (np = wbuf; read(OLDSTD, np, 1) == 1; np++) {
317 			if (np >= &wbuf[BUFSIZ-1])
318 				error("$< line too long");
319 			if (*np <= 0 || *np == '\n')
320 				break;
321 		}
322 		*np = 0;
323 		/*
324 		 * KLUDGE: dolmod is set here because it will
325 		 * cause setDolp to call domod and thus to copy wbuf.
326 		 * Otherwise setDolp would use it directly. If we saved
327 		 * it ourselves, no one would know when to free it.
328 		 * The actual function of the 'q' causes filename
329 		 * expansion not to be done on the interpolated value.
330 		 */
331 		dolmod = 'q';
332 		dolmcnt = 10000;
333 		setDolp(wbuf);
334 		goto eatbrac;
335 
336 	case DEOF:
337 	case '\n':
338 		goto syntax;
339 
340 	case '*':
341 		(void) strcpy(name, "argv");
342 		vp = adrof("argv");
343 		subscr = -1;			/* Prevent eating [...] */
344 		break;
345 
346 	default:
347 		np = name;
348 		if (digit(c)) {
349 			if (dimen)
350 				goto syntax;	/* No $#1, e.g. */
351 			subscr = 0;
352 			do {
353 				subscr = subscr * 10 + c - '0';
354 				c = DgetC(0);
355 			} while (digit(c));
356 			unDredc(c);
357 			if (subscr < 0)
358 				goto oob;
359 			if (subscr == 0) {
360 				if (bitset) {
361 					dolp = file ? "1" : "0";
362 					goto eatbrac;
363 				}
364 				if (file == 0)
365 					error("No file for $0");
366 				setDolp(file);
367 				goto eatbrac;
368 			}
369 			if (bitset)
370 				goto syntax;
371 			vp = adrof("argv");
372 			if (vp == 0) {
373 				vp = &nulargv;
374 				goto eatmod;
375 			}
376 			break;
377 		}
378 		if (!alnum(c))
379 			goto syntax;
380 		for (;;) {
381 			*np++ = c;
382 			c = DgetC(0);
383 			if (!alnum(c))
384 				break;
385 			if (np >= &name[sizeof name - 2])
386 syntax:
387 				error("Variable syntax");
388 		}
389 		*np++ = 0;
390 		unDredc(c);
391 		vp = adrof(name);
392 	}
393 	if (bitset) {
394 		dolp = (vp || getenv(name)) ? "1" : "0";
395 		goto eatbrac;
396 	}
397 	if (vp == 0) {
398 		np = getenv(name);
399 		if (np) {
400 			addla(np);
401 			goto eatbrac;
402 		}
403 		udvar(name);
404 		/*NOTREACHED*/
405 	}
406 	c = DgetC(0);
407 	upb = blklen(vp->vec);
408 	if (dimen == 0 && subscr == 0 && c == '[') {
409 		np = name;
410 		for (;;) {
411 			c = DgetC(DODOL);	/* Allow $ expand within [ ] */
412 			if (c == ']')
413 				break;
414 			if (c == '\n' || c == DEOF)
415 				goto syntax;
416 			if (np >= &name[sizeof name - 2])
417 				goto syntax;
418 			*np++ = c;
419 		}
420 		*np = 0, np = name;
421 		if (dolp || dolcnt)		/* $ exp must end before ] */
422 			goto syntax;
423 		if (!*np)
424 			goto syntax;
425 		if (digit(*np)) {
426 			register int i = 0;
427 
428 			while (digit(*np))
429 				i = i * 10 + *np++ - '0';
430 			if ((i < 0 || i > upb) && !index("-*", *np)) {
431 oob:
432 				setname(vp->v_name);
433 				error("Subscript out of range");
434 			}
435 			lwb = i;
436 			if (!*np)
437 				upb = lwb, np = "*";
438 		}
439 		if (*np == '*')
440 			np++;
441 		else if (*np != '-')
442 			goto syntax;
443 		else {
444 			register int i = upb;
445 
446 			np++;
447 			if (digit(*np)) {
448 				i = 0;
449 				while (digit(*np))
450 					i = i * 10 + *np++ - '0';
451 				if (i < 0 || i > upb)
452 					goto oob;
453 			}
454 			if (i < lwb)
455 				upb = lwb - 1;
456 			else
457 				upb = i;
458 		}
459 		if (lwb == 0) {
460 			if (upb != 0)
461 				goto oob;
462 			upb = -1;
463 		}
464 		if (*np)
465 			goto syntax;
466 	} else {
467 		if (subscr > 0)
468 			if (subscr > upb)
469 				lwb = 1, upb = 0;
470 			else
471 				lwb = upb = subscr;
472 		unDredc(c);
473 	}
474 	if (dimen) {
475 		char *cp = putn(upb - lwb + 1);
476 
477 		addla(cp);
478 		xfree(cp);
479 	} else {
480 eatmod:
481 		c = DgetC(0);
482 		if (c == ':') {
483 			c = DgetC(0), dolmcnt = 1;
484 			if (c == 'g')
485 				c = DgetC(0), dolmcnt = 10000;
486 			if (!index("htrqxe", c))
487 				error("Bad : mod in $");
488 			dolmod = c;
489 			if (c == 'q')
490 				dolmcnt = 10000;
491 		} else
492 			unDredc(c);
493 		dolnxt = &vp->vec[lwb - 1];
494 		dolcnt = upb - lwb + 1;
495 	}
496 eatbrac:
497 	if (sc == '{') {
498 		c = Dredc();
499 		if (c != '}')
500 			goto syntax;
501 	}
502 }
503 
504 setDolp(cp)
505 	register char *cp;
506 {
507 	register char *dp;
508 
509 	if (dolmod == 0 || dolmcnt == 0) {
510 		dolp = cp;
511 		return;
512 	}
513 	dp = domod(cp, dolmod);
514 	if (dp) {
515 		dolmcnt--;
516 		addla(dp);
517 		xfree(dp);
518 	} else
519 		addla(cp);
520 	dolp = "";
521 	if (err)
522 		error(err);
523 }
524 
525 unDredc(c)
526 	int c;
527 {
528 
529 	Dpeekrd = c;
530 }
531 
532 Dredc()
533 {
534 	register int c;
535 
536 	if (c = Dpeekrd) {
537 		Dpeekrd = 0;
538 		return (c);
539 	}
540 	if (Dcp && (c = *Dcp++))
541 		return (c&(QUOTE|TRIM));
542 	if (*Dvp == 0) {
543 		Dcp = 0;
544 		return (DEOF);
545 	}
546 	Dcp = *Dvp++;
547 	return (' ');
548 }
549 
550 Dtestq(c)
551 	register int c;
552 {
553 
554 	if (cmap(c, QUOTES))
555 		gflag = 1;
556 }
557 
558 /*
559  * Form a shell temporary file (in unit 0) from the words
560  * of the shell input up to EOF or a line the same as "term".
561  * Unit 0 should have been closed before this call.
562  */
563 heredoc(term)
564 	char *term;
565 {
566 	register int c;
567 	char *Dv[2];
568 	char obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ];
569 	int ocnt, lcnt, mcnt;
570 	register char *lbp, *obp, *mbp;
571 	char **vp;
572 	bool quoted;
573 
574 	if (creat(shtemp, 0600) < 0)
575 		Perror(shtemp);
576 	(void) close(0);
577 	if (open(shtemp, 2) < 0) {
578 		int oerrno = errno;
579 
580 		(void) unlink(shtemp);
581 		errno = oerrno;
582 		Perror(shtemp);
583 	}
584 	(void) unlink(shtemp);			/* 0 0 inode! */
585 	Dv[0] = term; Dv[1] = NOSTR; gflag = 0;
586 	trim(Dv); rscan(Dv, Dtestq); quoted = gflag;
587 	ocnt = BUFSIZ; obp = obuf;
588 	for (;;) {
589 		/*
590 		 * Read up a line
591 		 */
592 		lbp = lbuf; lcnt = BUFSIZ - 4;
593 		for (;;) {
594 			c = readc(1);		/* 1 -> Want EOF returns */
595 			if (c < 0 || c == '\n')
596 				break;
597 			if (c &= TRIM) {
598 				*lbp++ = c;
599 				if (--lcnt < 0) {
600 					setname("<<");
601 					error("Line overflow");
602 				}
603 			}
604 		}
605 		*lbp = 0;
606 
607 		/*
608 		 * Check for EOF or compare to terminator -- before expansion
609 		 */
610 		if (c < 0 || eq(lbuf, term)) {
611 			(void) write(0, obuf, BUFSIZ - ocnt);
612 			(void) lseek(0, (off_t)0, 0);
613 			return;
614 		}
615 
616 		/*
617 		 * If term was quoted or -n just pass it on
618 		 */
619 		if (quoted || noexec) {
620 			*lbp++ = '\n'; *lbp = 0;
621 			for (lbp = lbuf; c = *lbp++;) {
622 				*obp++ = c;
623 				if (--ocnt == 0) {
624 					(void) write(0, obuf, BUFSIZ);
625 					obp = obuf; ocnt = BUFSIZ;
626 				}
627 			}
628 			continue;
629 		}
630 
631 		/*
632 		 * Term wasn't quoted so variable and then command
633 		 * expand the input line
634 		 */
635 		Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4;
636 		for (;;) {
637 			c = DgetC(DODOL);
638 			if (c == DEOF)
639 				break;
640 			if ((c &= TRIM) == 0)
641 				continue;
642 			/* \ quotes \ $ ` here */
643 			if (c =='\\') {
644 				c = DgetC(0);
645 				if (!index("$\\`", c))
646 					unDgetC(c | QUOTE), c = '\\';
647 				else
648 					c |= QUOTE;
649 			}
650 			*mbp++ = c;
651 			if (--mcnt == 0) {
652 				setname("<<");
653 				bferr("Line overflow");
654 			}
655 		}
656 		*mbp++ = 0;
657 
658 		/*
659 		 * If any ` in line do command substitution
660 		 */
661 		mbp = mbuf;
662 		if (index(mbp, '`')) {
663 			/*
664 			 * 1 arg to dobackp causes substitution to be literal.
665 			 * Words are broken only at newlines so that all blanks
666 			 * and tabs are preserved.  Blank lines (null words)
667 			 * are not discarded.
668 			 */
669 			vp = dobackp(mbuf, 1);
670 		} else
671 			/* Setup trivial vector similar to return of dobackp */
672 			Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv;
673 
674 		/*
675 		 * Resurrect the words from the command substitution
676 		 * each separated by a newline.  Note that the last
677 		 * newline of a command substitution will have been
678 		 * discarded, but we put a newline after the last word
679 		 * because this represents the newline after the last
680 		 * input line!
681 		 */
682 		for (; *vp; vp++) {
683 			for (mbp = *vp; *mbp; mbp++) {
684 				*obp++ = *mbp & TRIM;
685 				if (--ocnt == 0) {
686 					(void) write(0, obuf, BUFSIZ);
687 					obp = obuf; ocnt = BUFSIZ;
688 				}
689 			}
690 			*obp++ = '\n';
691 			if (--ocnt == 0) {
692 				(void) write(0, obuf, BUFSIZ);
693 				obp = obuf; ocnt = BUFSIZ;
694 			}
695 		}
696 		if (pargv)
697 			blkfree(pargv), pargv = 0;
698 	}
699 }
700