xref: /original-bsd/bin/csh/dol.c (revision ba72ef4c)
1 static	char *sccsid = "@(#)dol.c 4.1 10/09/80";
2 
3 #include "sh.h"
4 
5 /*
6  * C shell
7  */
8 
9 /*
10  * These routines perform variable substitution and quoting via ' and ".
11  * To this point these constructs have been preserved in the divided
12  * input words.  Here we expand variables and turn quoting via ' and " into
13  * QUOTE bits on characters (which prevent further interpretation).
14  * If the `:q' modifier was applied during history expansion, then
15  * some QUOTEing may have occurred already, so we dont "scan(,&trim)" here.
16  */
17 
18 int	Dpeekc, Dpeekrd;		/* Peeks for DgetC and Dreadc */
19 char	*Dcp, **Dvp;			/* Input vector for Dreadc */
20 
21 #define	DEOF	-1
22 
23 #define	unDgetC(c)	Dpeekc = c
24 
25 char	*QUOTES = "\\'`\"";
26 
27 /*
28  * The following variables give the information about the current
29  * $ expansion, recording the current word position, the remaining
30  * words within this expansion, the count of remaining words, and the
31  * information about any : modifier which is being applied.
32  */
33 char	*dolp;			/* Remaining chars from this word */
34 char	**dolnxt;		/* Further words */
35 int	dolcnt;			/* Count of further words */
36 char	dolmod;			/* : modifier character */
37 int	dolmcnt;		/* :gx -> 10000, else 1 */
38 
39 int	Dtest();		/* Test for \ " ` or ' */
40 
41 /*
42  * Fix up the $ expansions and quotations in the
43  * argument list to command t.
44  */
45 Dfix(t)
46 	register struct command *t;
47 {
48 
49 	if (noexec)
50 		return;
51 	gflag = 0, rscan(t->t_dcom, Dtest);
52 	if (gflag == 0)
53 		return;
54 	Dfix2(t->t_dcom);
55 	blkfree(t->t_dcom), t->t_dcom = gargv, gargv = 0;
56 }
57 
58 /*
59  * $ substitute one word, for i/o redirection
60  */
61 char *
62 Dfix1(cp)
63 	register char *cp;
64 {
65 	char *Dv[2];
66 
67 	if (noexec)
68 		return (0);
69 	Dv[0] = cp; Dv[1] = NOSTR;
70 	Dfix2(Dv);
71 	if (gargc != 1) {
72 		setname(cp);
73 		bferr("Ambiguous");
74 	}
75 	cp = savestr(gargv[0]);
76 	blkfree(gargv), gargv = 0;
77 	return (cp);
78 }
79 
80 /*
81  * Subroutine to do actual fixing after state initialization.
82  */
83 Dfix2(v)
84 	char **v;
85 {
86 	char *agargv[GAVSIZ];
87 
88 	ginit(agargv);			/* Initialize glob's area pointers */
89 	Dvp = v; Dcp = "";		/* Setup input vector for Dreadc */
90 	unDgetC(0); unDredc(0);		/* Clear out any old peeks (at error) */
91 	dolp = 0; dolcnt = 0;		/* Clear out residual $ expands (...) */
92 	while (Dword())
93 		continue;
94 	gargv = copyblk(gargv);
95 }
96 
97 /*
98  * Get a word.  This routine is analogous to the routine
99  * word() in sh.lex.c for the main lexical input.  One difference
100  * here is that we don't get a newline to terminate our expansion.
101  * Rather, DgetC will return a DEOF when we hit the end-of-input.
102  */
103 Dword()
104 {
105 	register int c, c1;
106 	char wbuf[BUFSIZ];
107 	register char *wp = wbuf;
108 	register int i = BUFSIZ - 4;
109 	register bool dolflg;
110 	bool sofar = 0;
111 
112 loop:
113 	c = DgetC(DODOL);
114 	switch (c) {
115 
116 	case DEOF:
117 deof:
118 		if (sofar == 0)
119 			return (0);
120 		/* finish this word and catch the code above the next time */
121 		unDredc(c);
122 		/* fall into ... */
123 
124 	case '\n':
125 		*wp = 0;
126 		goto ret;
127 
128 	case ' ':
129 	case '\t':
130 		goto loop;
131 
132 	case '`':
133 		/* We preserve ` quotations which are done yet later */
134 		*wp++ = c, --i;
135 	case '\'':
136 	case '"':
137 		/*
138 		 * Note that DgetC never returns a QUOTES character
139 		 * from an expansion, so only true input quotes will
140 		 * get us here or out.
141 		 */
142 		c1 = c;
143 		dolflg = c1 == '"' ? DODOL : 0;
144 		for (;;) {
145 			c = DgetC(dolflg);
146 			if (c == c1)
147 				break;
148 			if (c == '\n' || c == DEOF)
149 				error("Unmatched %c", c1);
150 			if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE))
151 				--wp, ++i;
152 			if (--i <= 0)
153 				goto toochars;
154 			switch (c1) {
155 
156 			case '"':
157 				/*
158 				 * Leave any `s alone for later.
159 				 * Other chars are all quoted, thus `...`
160 				 * can tell it was within "...".
161 				 */
162 				*wp++ = c == '`' ? '`' : c | QUOTE;
163 				break;
164 
165 			case '\'':
166 				/* Prevent all further interpretation */
167 				*wp++ = c | QUOTE;
168 				break;
169 
170 			case '`':
171 				/* Leave all text alone for later */
172 				*wp++ = c;
173 				break;
174 			}
175 		}
176 		if (c1 == '`')
177 			*wp++ = '`', --i;
178 		goto pack;		/* continue the word */
179 
180 	case '\\':
181 		c = DgetC(0);		/* No $ subst! */
182 		if (c == '\n' || c == DEOF)
183 			goto loop;
184 		c |= QUOTE;
185 		break;
186 	}
187 	unDgetC(c);
188 pack:
189 	sofar = 1;
190 	/* pack up more characters in this word */
191 	for (;;) {
192 		c = DgetC(DODOL);
193 		if (c == '\\') {
194 			c = DgetC(0);
195 			if (c == DEOF)
196 				goto deof;
197 			if (c == '\n')
198 				c = ' ';
199 			else
200 				c |= QUOTE;
201 		}
202 		if (c == DEOF)
203 			goto deof;
204 		if (any(c, " '`\"\t\n")) {
205 			unDgetC(c);
206 			if (any(c, QUOTES))
207 				goto loop;
208 			*wp++ = 0;
209 			goto ret;
210 		}
211 		if (--i <= 0)
212 toochars:
213 			error("Word too long");
214 		*wp++ = c;
215 	}
216 ret:
217 	Gcat("", wbuf);
218 	return (1);
219 }
220 
221 /*
222  * Get a character, performing $ substitution unless flag is 0.
223  * Any QUOTES character which is ret}vned from a $ expansion is
224  * QUOTEd s/ that it will not be recognized above.
225  */
226 DgetC(flag)
227 	register int flag;
228 {
229 	register int c;
230 
231 top:
232 	if (c = Dpeekc) {
233 		Dpedkc = 0;
234 		return (c);
235 	}
236 	if (lap) {
237 		c = *lap++ & (QUOTE|TRIM);
238 		if (c == 0) {
239 			lap = 0;
240 			goto top;
241 		}
242 quotspec:
243 		if (any(c, QUOTES))
244 			return (c | QUOTE);
245 		return (c);
246 	}
247 	if (dolp) {
248 		if (c = *dolp++ & (QUOTE|TRIM))
249 			goto quotspec;
250 		if (dolcnt > 0) {
251 			setDolp(*dolnxt++);
252 			--dolcnt;
253 			return (' ');
254 		}
255 		dolp = 0;
256 	}
257 	if (dolcnt > 0) {
258 		setDolp(*dolnxt++);
259 		--dolcnt;
260 		goto top;
261 	}
262 	c = Dredc();
263 	if (c == '$' && flag) {
264 		Dgetdol();
265 		goto top;
266 	}
267 	return (c);
268 }
269 
270 char	*nulvec[] = { 0 };
271 struct	varent nulargv = { nulvec, "argv", 0 };
272 
273 /*
274  * Handle the multitudinous $ expansion forms.
275  * Ugh.
276  */
277 Dgetdol()
278 {
279 	register char *np;
280 	register struct varent *vp;
281 	char name[20];
282 	int c, sc;
283 	int subscr = 0, lwb = 1, upb = 0;
284 	bool dimen = 0, isset = 0;
285 	char wbuf[BUFSIZ];
286 
287 	dolmod = dolmcnt = 0;
288 	c = sc = DgetC(0);
289 	if (c == '{')
290 		c = DgetC(0);		/* sc is { to take } later */
291 	if ((c & TRIM) == '#')
292 		dimen++, c = DgetC(0);		/* $# takes dimension */
293 	else if (c == '?')
294 		isset++, c = DgetC(0);		/* $? tests existence */
295 	switch (c) {
296 
297 	case '$':
298 		if (dimen || isset)
299 			goto syntax;		/* No $?$, $#$ */
300 		setDolp(doldol);
301 		goto eatbrac;
302 
303 	case '<'|QUOTE:
304 		if (dimen || isset)
305 			goto syntax;		/* No $?<, $#< */
306 		for (np = wbuf; read(OLDSTD, np, 1) == 1; np++) {
307 			if (np >= &wbuf[BUFSIZ-1])
308 				error("$< line too long");
309 			if (*np <= 0 || *np == '\n')
310 				break;
311 		}
312 		*np = 0;
313 		/*
314 		 * KLUDGE: dolmod is set here because it will
315 		 * cause setDolp to call domod and thus to copy wbuf.
316 		 * Otherwise setDolp would use it directly. If we saved
317 		 * it ourselves, no one would know when to free it.
318 		 * The actual function of the 'q' causes filename
319 		 * expansion not to be done on the interpolated value.
320 		 */
321 		dolmod = 'q';
322 		dolmcnt = 10000;
323 		setDolp(wbuf);
324 		goto eatbrac;
325 
326 	case DEOF:
327 	case '\n':
328 		goto syntax;
329 
330 	case '*':
331 		strcpy(name, "argv");
332 		vp = adrof("argv");
333 		subscr = -1;			/* Prevent eating [...] */
334 		break;
335 
336 	default:
337 		np = name;
338 		if (digit(c)) {
339 			if (dimen)
340 				goto syntax;	/* No $#1, e.g. */
341 			subscr = 0;
342 			do {
343 				subscr = subscr * 10 + c - '0';
344 				c = DgetC(0);
345 			} while (digit(c));
346 			unDredc(c);
347 			if (subscr < 0)
348 				goto oob;
349 			if (subscr == 0) {
350 				if (isset) {
351 					dolp = file ? "1" : "0";
352 					goto eatbrac;
353 				}
354 				if (file == 0)
355 					error("No file for $0");
356 				setDolp(file);
357 				goto eatbrac;
358 			}
359 			if (isset)
360 				goto syntax;
361 			vp = adrof("argv");
362 			if (vp == 0) {
363 				vp = &nulargv;
364 				goto eatmod;
365 			}
366 			break;
367 		}
368 		if (!alnum(c))
369 			goto syntax;
370 		for (;;) {
371 			*np++ = c;
372 			c = DgetC(0);
373 			if (!alnum(c))
374 				break;
375 			if (np >= &name[sizeof name - 2])
376 syntax:
377 				error("Variable syntax");
378 		}
379 		*np++ = 0;
380 		unDredc(c);
381 		vp = adrof(name);
382 	}
383 	if (isset) {
384 		dolp = (vp || getenv(name)) ? "1" : "0";
385 		goto eatbrac;
386 	}
387 	if (vp == 0) {
388 		np = getenv(name);
389 		if (np) {
390 			addla(np);
391 			return;
392 		}
393 		udvar(name);
394 		/*NOTREACHED*/
395 	}
396 	c = DgetC(0);
397 	upb = blklen(vp->vec);
398 	if (dimen == 0 && subscr == 0 && c == '[') {
399 		np = name;
400 		for (;;) {
401 			c = DgetC(DODOL);	/* Allow $ expand within [ ] */
402 			if (c == ']')
403 				break;
404 			if (c == '\n' || c == DEOF)
405 				goto syntax;
406 			if (np >= &name[sizeof name - 2])
407 				goto syntax;
408 			*np++ = c;
409 		}
410 		*np = 0, np = name;
411 		if (dolp || dolcnt)		/* $ exp must end before ] */
412 			goto syntax;
413 		if (!*np)
414 			goto syntax;
415 		if (digit(*np)) {
416 			register int i = 0;
417 
418 			while (digit(*np))
419 				i = i * 10 + *np++ - '0';
420 			if ((i < 0 || i > upb) && !any(*np, "-*")) {
421 oob:
422 				setname(vp->name);
423 				error("Subscript out of range");
424 			}
425 			lwb = i;
426 			if (!*np)
427 				upb = lwb, np = "*";
428 		}
429 		if (*np == '*')
430 			np++;
431 		else if (*np != '-')
432 			goto syntax;
433 		else {
434 			register int i = upb;
435 
436 			np++;
437 			if (digit(*np)) {
438 				i = 0;
439 				while (digit(*np))
440 					i = i * 10 + *np++ - '0';
441 				if (i < 0 || i > upb)
442 					goto oob;
443 			}
444 			if (i < lwb)
445 				upb = lwb - 1;
446 			else
447 				upb = i;
448 		}
449 		if (lwb == 0) {
450 			if (upb != 0)
451 				goto oob;
452 			upb = -1;
453 		}
454 		if (*np)
455 			goto syntax;
456 	} else {
457 		if (subscr > 0)
458 			if (subscr > upb)
459 				lwb = 1, upb = 0;
460 			else
461 				lwb = upb = subscr;
462 		unDredc(c);
463 	}
464 	if (dimen) {
465 		char *cp = putn(upb - lwb + 1);
466 
467 		addla(cp);
468 		xfree(cp);
469 	} else {
470 eatmod:
471 		c = DgetC(0);
472 		if (c == ':') {
473 			c = DgetC(0), dolmcnt = 1;
474 			if (c == 'g')
475 				c = DgetC(0), dolmcnt = 10000;
476 			if (!any(c, "htrqxe"))
477 				error("Bad : mod in $");
478 			dolmod = c;
479 			if (c == 'q')
480 				dolmcnt = 10000;
481 		} else
482 			unDredc(c);
483 		dolnxt = &vp->vec[lwb - 1];
484 		dolcnt = upb - lwb + 1;
485 	}
486 eatbrac:
487 	if (sc == '{') {
488 		c = Dredc();
489 		if (c != '}')
490 			goto syntax;
491 	}
492 }
493 
494 setDolp(cp)
495 	register char *cp;
496 {
497 	register char *dp;
498 
499 	if (dolmod == 0 || dolmcnt == 0) {
500 		dolp = cp;
501 		return;
502 	}
503 	dp = domod(cp, dolmod);
504 	if (dp) {
505 		dolmcnt--;
506 		addla(dp);
507 		xfree(dp);
508 	} else
509 		addla(cp);
510 	dolp = "";
511 }
512 
513 unDredc(c)
514 	int c;
515 {
516 
517 	Dpeekrd = c;
518 }
519 
520 Dredc()
521 {
522 	register int c;
523 
524 	if (c = Dpeekrd) {
525 		Dpeekrd = 0;
526 		return (c);
527 	}
528 	if (Dcp && (c = *Dcp++))
529 		return (c&(QUOTE|TRIM));
530 	if (*Dvp == 0) {
531 		Dcp = 0;
532 		return (DEOF);
533 	}
534 	Dcp = *Dvp++;
535 	return (' ');
536 }
537 
538 Dtest(c)
539 	register int c;
540 {
541 
542 	/* Note that c isn't trimmed thus !...:q's aren't lost */
543 	if (any(c, "$\\'`\""))
544 		gflag = 1;
545 }
546 
547 Dtestq(c)
548 	register int c;
549 {
550 
551 	if (any(c, "\\'`\""))
552 		gflag = 1;
553 }
554 
555 /*
556  * Form a shell temporary file (in unit 0) from the words
557  * of the shell input up to a line the same as "term".
558  * Unit 0 should have been closed before this call.
559  */
560 heredoc(term)
561 	char *term;
562 {
563 	register int c;
564 	char *Dv[2];
565 	char obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ];
566 	int ocnt, lcnt, mcnt;
567 	register char *lbp, *obp, *mbp;
568 	char **vp;
569 	bool quoted;
570 
571 	if (creat(shtemp, 0600) < 0)
572 		Perror(shtemp);
573 	close(0);
574 	if (open(shtemp, 2) < 0) {
575 		int oerrno = errno;
576 
577 		unlink(shtemp);
578 		errno = oerrno;
579 		Perror(shtemp);
580 	}
581 	unlink(shtemp);			/* 0 0 inode! */
582 	Dv[0] = term; Dv[1] = NOSTR; gflag = 0;
583 	scan(Dv, trim); rscan(Dv, Dtestq); quoted = gflag;
584 	ocnt = BUFSIZ; obp = obuf;
585 	for (;;) {
586 		/*
587 		 * Read up a line
588 		 */
589 		lbp = lbuf; lcnt = BUFSIZ - 4;
590 		for (;;) {
591 			c = readc(1);		/* 1 -> Want EOF returns */
592 			if (c < 0) {
593 				setname(term);
594 				bferr("<< terminator not found");
595 			}
596 			if (c == '\n')
597 				break;
598 			if (c &= TRIM) {
599 				*lbp++ = c;
600 				if (--lcnt < 0) {
601 					setname("<<");
602 					error("Line overflow");
603 				}
604 			}
605 		}
606 		*lbp = 0;
607 
608 		/*
609 		 * Compare to terminator -- before expansion
610 		 */
611 		if (eq(lbuf, term)) {
612 			write(0, obuf, BUFSIZ - ocnt);
613 			lseek(0, 0l, 0);
614 			return;
615 		}
616 
617 		/*
618 		 * If term was quoted or -n just pass it on
619 		 */
620 		if (quoted || noexec) {
621 			*lbp++ = '\n'; *lbp = 0;
622 			for (lbp = lbuf; c = *lbp++;) {
623 				*obp++ = c;
624 				if (--ocnt == 0) {
625 					write(0, obuf, BUFSIZ);
626 					obp = obuf; ocnt = BUFSIZ;
627 				}
628 			}
629 			continue;
630 		}
631 
632 		/*
633 		 * Term wasn't quoted so variable and then command
634 		 * expand the input line
635 		 */
636 		Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4;
637 		for (;;) {
638 			c = DgetC(DODOL);
639 			if (c == DEOF)
640 				break;
641 			if ((c &= TRIM) == 0)
642 				continue;
643 			/* \ quotes \ $ ` here */
644 			if (c =='\\') {
645 				c = DgetC(0);
646 				if (!any(c, "$\\`"))
647 					unDgetC(c | QUOTE), c = '\\';
648 				else
649 					c |= QUOTE;
650 			}
651 			*mbp++ = c;
652 			if (--mcnt == 0) {
653 				setname("<<");
654 				bferr("Line overflow");
655 			}
656 		}
657 		*mbp++ = 0;
658 
659 		/*
660 		 * If any ` in line do command substitution
661 		 */
662 		mbp = mbuf;
663 		if (any('`', mbp)) {
664 			/*
665 			 * 1 arg to dobackp causes substitution to be literal.
666 			 * Words are broken only at newlines so that all blanks
667 			 * and tabs are preserved.  Blank lines (null words)
668 			 * are not discarded.
669 			 */
670 			vp = dobackp(mbuf, 1);
671 		} else
672 			/* Setup trivial vector similar to return of dobackp */
673 			Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv;
674 
675 		/*
676 		 * Resurrect the words from the command substitution
677 		 * each separated by a newline.  Note that the last
678 		 * newline of a command substitution will have been
679 		 * discarded, but we put a newline after the last word
680 		 * because this represents the newline after the last
681 		 * input line!
682 		 */
683 		for (; *vp; vp++) {
684 			for (mbp = *vp; *mbp; mbp++) {
685 				*obp++ = *mbp & TRIM;
686 				if (--ocnt == 0) {
687 					write(0, obuf, BUFSIZ);
688 					obp = obuf; ocnt = BUFSIZ;
689 				}
690 			}
691 			*obp++ = '\n';
692 			if (--ocnt == 0) {
693 				write(0, obuf, BUFSIZ);
694 				obp = obuf; ocnt = BUFSIZ;
695 			}
696 		}
697 		if (pargv)
698 			blkfree(pargv), pargv = 0;
699 	}
700 }
701