xref: /original-bsd/bin/csh/dol.c (revision 7ecb520c)
1 #ifndef lint
2 static	char *sccsid = "@(#)dol.c	4.5 (Berkeley) 12/13/84";
3 #endif
4 
5 #include "sh.h"
6 #include "sh.char.h"
7 
8 /*
9  * C shell
10  */
11 
12 /*
13  * These routines perform variable substitution and quoting via ' and ".
14  * To this point these constructs have been preserved in the divided
15  * input words.  Here we expand variables and turn quoting via ' and " into
16  * QUOTE bits on characters (which prevent further interpretation).
17  * If the `:q' modifier was applied during history expansion, then
18  * some QUOTEing may have occurred already, so we dont "trim()" here.
19  */
20 
21 int	Dpeekc, Dpeekrd;		/* Peeks for DgetC and Dreadc */
22 char	*Dcp, **Dvp;			/* Input vector for Dreadc */
23 
24 #define	DEOF	-1
25 
26 #define	unDgetC(c)	Dpeekc = c
27 
28 #define QUOTES		(_Q|_Q1|_ESC)	/* \ ' " ` */
29 
30 /*
31  * The following variables give the information about the current
32  * $ expansion, recording the current word position, the remaining
33  * words within this expansion, the count of remaining words, and the
34  * information about any : modifier which is being applied.
35  */
36 char	*dolp;			/* Remaining chars from this word */
37 char	**dolnxt;		/* Further words */
38 int	dolcnt;			/* Count of further words */
39 char	dolmod;			/* : modifier character */
40 int	dolmcnt;		/* :gx -> 10000, else 1 */
41 
42 /*
43  * Fix up the $ expansions and quotations in the
44  * argument list to command t.
45  */
46 Dfix(t)
47 	register struct command *t;
48 {
49 	register char **pp;
50 	register char *p;
51 
52 	if (noexec)
53 		return;
54 	/* Note that t_dcom isn't trimmed thus !...:q's aren't lost */
55 	for (pp = t->t_dcom; p = *pp++;)
56 		while (*p)
57 			if (cmap(*p++, _DOL|QUOTES)) {	/* $, \, ', ", ` */
58 				Dfix2(t->t_dcom);	/* found one */
59 				blkfree(t->t_dcom);
60 				t->t_dcom = gargv;
61 				gargv = 0;
62 				return;
63 			}
64 }
65 
66 /*
67  * $ substitute one word, for i/o redirection
68  */
69 char *
70 Dfix1(cp)
71 	register char *cp;
72 {
73 	char *Dv[2];
74 
75 	if (noexec)
76 		return (0);
77 	Dv[0] = cp; Dv[1] = NOSTR;
78 	Dfix2(Dv);
79 	if (gargc != 1) {
80 		setname(cp);
81 		bferr("Ambiguous");
82 	}
83 	cp = savestr(gargv[0]);
84 	blkfree(gargv), gargv = 0;
85 	return (cp);
86 }
87 
88 /*
89  * Subroutine to do actual fixing after state initialization.
90  */
91 Dfix2(v)
92 	char **v;
93 {
94 	char *agargv[GAVSIZ];
95 
96 	ginit(agargv);			/* Initialize glob's area pointers */
97 	Dvp = v; Dcp = "";		/* Setup input vector for Dreadc */
98 	unDgetC(0); unDredc(0);		/* Clear out any old peeks (at error) */
99 	dolp = 0; dolcnt = 0;		/* Clear out residual $ expands (...) */
100 	while (Dword())
101 		continue;
102 	gargv = copyblk(gargv);
103 }
104 
105 /*
106  * Get a word.  This routine is analogous to the routine
107  * word() in sh.lex.c for the main lexical input.  One difference
108  * here is that we don't get a newline to terminate our expansion.
109  * Rather, DgetC will return a DEOF when we hit the end-of-input.
110  */
111 Dword()
112 {
113 	register int c, c1;
114 	char wbuf[BUFSIZ];
115 	register char *wp = wbuf;
116 	register int i = BUFSIZ - 4;
117 	register bool dolflg;
118 	bool sofar = 0;
119 
120 loop:
121 	c = DgetC(DODOL);
122 	switch (c) {
123 
124 	case DEOF:
125 deof:
126 		if (sofar == 0)
127 			return (0);
128 		/* finish this word and catch the code above the next time */
129 		unDredc(c);
130 		/* fall into ... */
131 
132 	case '\n':
133 		*wp = 0;
134 		goto ret;
135 
136 	case ' ':
137 	case '\t':
138 		goto loop;
139 
140 	case '`':
141 		/* We preserve ` quotations which are done yet later */
142 		*wp++ = c, --i;
143 	case '\'':
144 	case '"':
145 		/*
146 		 * Note that DgetC never returns a QUOTES character
147 		 * from an expansion, so only true input quotes will
148 		 * get us here or out.
149 		 */
150 		c1 = c;
151 		dolflg = c1 == '"' ? DODOL : 0;
152 		for (;;) {
153 			c = DgetC(dolflg);
154 			if (c == c1)
155 				break;
156 			if (c == '\n' || c == DEOF)
157 				error("Unmatched %c", c1);
158 			if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE))
159 				--wp, ++i;
160 			if (--i <= 0)
161 				goto toochars;
162 			switch (c1) {
163 
164 			case '"':
165 				/*
166 				 * Leave any `s alone for later.
167 				 * Other chars are all quoted, thus `...`
168 				 * can tell it was within "...".
169 				 */
170 				*wp++ = c == '`' ? '`' : c | QUOTE;
171 				break;
172 
173 			case '\'':
174 				/* Prevent all further interpretation */
175 				*wp++ = c | QUOTE;
176 				break;
177 
178 			case '`':
179 				/* Leave all text alone for later */
180 				*wp++ = c;
181 				break;
182 			}
183 		}
184 		if (c1 == '`')
185 			*wp++ = '`', --i;
186 		goto pack;		/* continue the word */
187 
188 	case '\\':
189 		c = DgetC(0);		/* No $ subst! */
190 		if (c == '\n' || c == DEOF)
191 			goto loop;
192 		c |= QUOTE;
193 		break;
194 	}
195 	unDgetC(c);
196 pack:
197 	sofar = 1;
198 	/* pack up more characters in this word */
199 	for (;;) {
200 		c = DgetC(DODOL);
201 		if (c == '\\') {
202 			c = DgetC(0);
203 			if (c == DEOF)
204 				goto deof;
205 			if (c == '\n')
206 				c = ' ';
207 			else
208 				c |= QUOTE;
209 		}
210 		if (c == DEOF)
211 			goto deof;
212 		if (cmap(c, _SP|_NL|_Q|_Q1)) {		/* sp \t\n'"` */
213 			unDgetC(c);
214 			if (cmap(c, QUOTES))
215 				goto loop;
216 			*wp++ = 0;
217 			goto ret;
218 		}
219 		if (--i <= 0)
220 toochars:
221 			error("Word too long");
222 		*wp++ = c;
223 	}
224 ret:
225 	Gcat("", wbuf);
226 	return (1);
227 }
228 
229 /*
230  * Get a character, performing $ substitution unless flag is 0.
231  * Any QUOTES character which is returned from a $ expansion is
232  * QUOTEd so that it will not be recognized above.
233  */
234 DgetC(flag)
235 	register int flag;
236 {
237 	register int c;
238 
239 top:
240 	if (c = Dpeekc) {
241 		Dpeekc = 0;
242 		return (c);
243 	}
244 	if (lap) {
245 		c = *lap++ & (QUOTE|TRIM);
246 		if (c == 0) {
247 			lap = 0;
248 			goto top;
249 		}
250 quotspec:
251 		if (cmap(c, QUOTES))
252 			return (c | QUOTE);
253 		return (c);
254 	}
255 	if (dolp) {
256 		if (c = *dolp++ & (QUOTE|TRIM))
257 			goto quotspec;
258 		if (dolcnt > 0) {
259 			setDolp(*dolnxt++);
260 			--dolcnt;
261 			return (' ');
262 		}
263 		dolp = 0;
264 	}
265 	if (dolcnt > 0) {
266 		setDolp(*dolnxt++);
267 		--dolcnt;
268 		goto top;
269 	}
270 	c = Dredc();
271 	if (c == '$' && flag) {
272 		Dgetdol();
273 		goto top;
274 	}
275 	return (c);
276 }
277 
278 char	*nulvec[] = { 0 };
279 struct	varent nulargv = { nulvec, "argv", 0 };
280 
281 /*
282  * Handle the multitudinous $ expansion forms.
283  * Ugh.
284  */
285 Dgetdol()
286 {
287 	register char *np;
288 	register struct varent *vp;
289 	char name[20];
290 	int c, sc;
291 	int subscr = 0, lwb = 1, upb = 0;
292 	bool dimen = 0, bitset = 0;
293 	char wbuf[BUFSIZ];
294 
295 	dolmod = dolmcnt = 0;
296 	c = sc = DgetC(0);
297 	if (c == '{')
298 		c = DgetC(0);		/* sc is { to take } later */
299 	if ((c & TRIM) == '#')
300 		dimen++, c = DgetC(0);		/* $# takes dimension */
301 	else if (c == '?')
302 		bitset++, c = DgetC(0);		/* $? tests existence */
303 	switch (c) {
304 
305 	case '$':
306 		if (dimen || bitset)
307 			goto syntax;		/* No $?$, $#$ */
308 		setDolp(doldol);
309 		goto eatbrac;
310 
311 	case '<'|QUOTE:
312 		if (dimen || bitset)
313 			goto syntax;		/* No $?<, $#< */
314 		for (np = wbuf; read(OLDSTD, np, 1) == 1; np++) {
315 			if (np >= &wbuf[BUFSIZ-1])
316 				error("$< line too long");
317 			if (*np <= 0 || *np == '\n')
318 				break;
319 		}
320 		*np = 0;
321 		/*
322 		 * KLUDGE: dolmod is set here because it will
323 		 * cause setDolp to call domod and thus to copy wbuf.
324 		 * Otherwise setDolp would use it directly. If we saved
325 		 * it ourselves, no one would know when to free it.
326 		 * The actual function of the 'q' causes filename
327 		 * expansion not to be done on the interpolated value.
328 		 */
329 		dolmod = 'q';
330 		dolmcnt = 10000;
331 		setDolp(wbuf);
332 		goto eatbrac;
333 
334 	case DEOF:
335 	case '\n':
336 		goto syntax;
337 
338 	case '*':
339 		(void) strcpy(name, "argv");
340 		vp = adrof("argv");
341 		subscr = -1;			/* Prevent eating [...] */
342 		break;
343 
344 	default:
345 		np = name;
346 		if (digit(c)) {
347 			if (dimen)
348 				goto syntax;	/* No $#1, e.g. */
349 			subscr = 0;
350 			do {
351 				subscr = subscr * 10 + c - '0';
352 				c = DgetC(0);
353 			} while (digit(c));
354 			unDredc(c);
355 			if (subscr < 0)
356 				goto oob;
357 			if (subscr == 0) {
358 				if (bitset) {
359 					dolp = file ? "1" : "0";
360 					goto eatbrac;
361 				}
362 				if (file == 0)
363 					error("No file for $0");
364 				setDolp(file);
365 				goto eatbrac;
366 			}
367 			if (bitset)
368 				goto syntax;
369 			vp = adrof("argv");
370 			if (vp == 0) {
371 				vp = &nulargv;
372 				goto eatmod;
373 			}
374 			break;
375 		}
376 		if (!alnum(c))
377 			goto syntax;
378 		for (;;) {
379 			*np++ = c;
380 			c = DgetC(0);
381 			if (!alnum(c))
382 				break;
383 			if (np >= &name[sizeof name - 2])
384 syntax:
385 				error("Variable syntax");
386 		}
387 		*np++ = 0;
388 		unDredc(c);
389 		vp = adrof(name);
390 	}
391 	if (bitset) {
392 		dolp = (vp || getenv(name)) ? "1" : "0";
393 		goto eatbrac;
394 	}
395 	if (vp == 0) {
396 		np = getenv(name);
397 		if (np) {
398 			addla(np);
399 			goto eatbrac;
400 		}
401 		udvar(name);
402 		/*NOTREACHED*/
403 	}
404 	c = DgetC(0);
405 	upb = blklen(vp->vec);
406 	if (dimen == 0 && subscr == 0 && c == '[') {
407 		np = name;
408 		for (;;) {
409 			c = DgetC(DODOL);	/* Allow $ expand within [ ] */
410 			if (c == ']')
411 				break;
412 			if (c == '\n' || c == DEOF)
413 				goto syntax;
414 			if (np >= &name[sizeof name - 2])
415 				goto syntax;
416 			*np++ = c;
417 		}
418 		*np = 0, np = name;
419 		if (dolp || dolcnt)		/* $ exp must end before ] */
420 			goto syntax;
421 		if (!*np)
422 			goto syntax;
423 		if (digit(*np)) {
424 			register int i = 0;
425 
426 			while (digit(*np))
427 				i = i * 10 + *np++ - '0';
428 			if ((i < 0 || i > upb) && !any(*np, "-*")) {
429 oob:
430 				setname(vp->v_name);
431 				error("Subscript out of range");
432 			}
433 			lwb = i;
434 			if (!*np)
435 				upb = lwb, np = "*";
436 		}
437 		if (*np == '*')
438 			np++;
439 		else if (*np != '-')
440 			goto syntax;
441 		else {
442 			register int i = upb;
443 
444 			np++;
445 			if (digit(*np)) {
446 				i = 0;
447 				while (digit(*np))
448 					i = i * 10 + *np++ - '0';
449 				if (i < 0 || i > upb)
450 					goto oob;
451 			}
452 			if (i < lwb)
453 				upb = lwb - 1;
454 			else
455 				upb = i;
456 		}
457 		if (lwb == 0) {
458 			if (upb != 0)
459 				goto oob;
460 			upb = -1;
461 		}
462 		if (*np)
463 			goto syntax;
464 	} else {
465 		if (subscr > 0)
466 			if (subscr > upb)
467 				lwb = 1, upb = 0;
468 			else
469 				lwb = upb = subscr;
470 		unDredc(c);
471 	}
472 	if (dimen) {
473 		char *cp = putn(upb - lwb + 1);
474 
475 		addla(cp);
476 		xfree(cp);
477 	} else {
478 eatmod:
479 		c = DgetC(0);
480 		if (c == ':') {
481 			c = DgetC(0), dolmcnt = 1;
482 			if (c == 'g')
483 				c = DgetC(0), dolmcnt = 10000;
484 			if (!any(c, "htrqxe"))
485 				error("Bad : mod in $");
486 			dolmod = c;
487 			if (c == 'q')
488 				dolmcnt = 10000;
489 		} else
490 			unDredc(c);
491 		dolnxt = &vp->vec[lwb - 1];
492 		dolcnt = upb - lwb + 1;
493 	}
494 eatbrac:
495 	if (sc == '{') {
496 		c = Dredc();
497 		if (c != '}')
498 			goto syntax;
499 	}
500 }
501 
502 setDolp(cp)
503 	register char *cp;
504 {
505 	register char *dp;
506 
507 	if (dolmod == 0 || dolmcnt == 0) {
508 		dolp = cp;
509 		return;
510 	}
511 	dp = domod(cp, dolmod);
512 	if (dp) {
513 		dolmcnt--;
514 		addla(dp);
515 		xfree(dp);
516 	} else
517 		addla(cp);
518 	dolp = "";
519 }
520 
521 unDredc(c)
522 	int c;
523 {
524 
525 	Dpeekrd = c;
526 }
527 
528 Dredc()
529 {
530 	register int c;
531 
532 	if (c = Dpeekrd) {
533 		Dpeekrd = 0;
534 		return (c);
535 	}
536 	if (Dcp && (c = *Dcp++))
537 		return (c&(QUOTE|TRIM));
538 	if (*Dvp == 0) {
539 		Dcp = 0;
540 		return (DEOF);
541 	}
542 	Dcp = *Dvp++;
543 	return (' ');
544 }
545 
546 Dtestq(c)
547 	register int c;
548 {
549 
550 	if (cmap(c, QUOTES))
551 		gflag = 1;
552 }
553 
554 /*
555  * Form a shell temporary file (in unit 0) from the words
556  * of the shell input up to a line the same as "term".
557  * Unit 0 should have been closed before this call.
558  */
559 heredoc(term)
560 	char *term;
561 {
562 	register int c;
563 	char *Dv[2];
564 	char obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ];
565 	int ocnt, lcnt, mcnt;
566 	register char *lbp, *obp, *mbp;
567 	char **vp;
568 	bool quoted;
569 
570 	if (creat(shtemp, 0600) < 0)
571 		Perror(shtemp);
572 	(void) close(0);
573 	if (open(shtemp, 2) < 0) {
574 		int oerrno = errno;
575 
576 		(void) unlink(shtemp);
577 		errno = oerrno;
578 		Perror(shtemp);
579 	}
580 	(void) unlink(shtemp);			/* 0 0 inode! */
581 	Dv[0] = term; Dv[1] = NOSTR; gflag = 0;
582 	trim(Dv); rscan(Dv, Dtestq); quoted = gflag;
583 	ocnt = BUFSIZ; obp = obuf;
584 	for (;;) {
585 		/*
586 		 * Read up a line
587 		 */
588 		lbp = lbuf; lcnt = BUFSIZ - 4;
589 		for (;;) {
590 			c = readc(1);		/* 1 -> Want EOF returns */
591 			if (c < 0) {
592 				setname(term);
593 				bferr("<< terminator not found");
594 			}
595 			if (c == '\n')
596 				break;
597 			if (c &= TRIM) {
598 				*lbp++ = c;
599 				if (--lcnt < 0) {
600 					setname("<<");
601 					error("Line overflow");
602 				}
603 			}
604 		}
605 		*lbp = 0;
606 
607 		/*
608 		 * Compare to terminator -- before expansion
609 		 */
610 		if (eq(lbuf, term)) {
611 			(void) write(0, obuf, BUFSIZ - ocnt);
612 			(void) lseek(0, (off_t)0, 0);
613 			return;
614 		}
615 
616 		/*
617 		 * If term was quoted or -n just pass it on
618 		 */
619 		if (quoted || noexec) {
620 			*lbp++ = '\n'; *lbp = 0;
621 			for (lbp = lbuf; c = *lbp++;) {
622 				*obp++ = c;
623 				if (--ocnt == 0) {
624 					(void) write(0, obuf, BUFSIZ);
625 					obp = obuf; ocnt = BUFSIZ;
626 				}
627 			}
628 			continue;
629 		}
630 
631 		/*
632 		 * Term wasn't quoted so variable and then command
633 		 * expand the input line
634 		 */
635 		Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4;
636 		for (;;) {
637 			c = DgetC(DODOL);
638 			if (c == DEOF)
639 				break;
640 			if ((c &= TRIM) == 0)
641 				continue;
642 			/* \ quotes \ $ ` here */
643 			if (c =='\\') {
644 				c = DgetC(0);
645 				if (!any(c, "$\\`"))
646 					unDgetC(c | QUOTE), c = '\\';
647 				else
648 					c |= QUOTE;
649 			}
650 			*mbp++ = c;
651 			if (--mcnt == 0) {
652 				setname("<<");
653 				bferr("Line overflow");
654 			}
655 		}
656 		*mbp++ = 0;
657 
658 		/*
659 		 * If any ` in line do command substitution
660 		 */
661 		mbp = mbuf;
662 		if (any('`', mbp)) {
663 			/*
664 			 * 1 arg to dobackp causes substitution to be literal.
665 			 * Words are broken only at newlines so that all blanks
666 			 * and tabs are preserved.  Blank lines (null words)
667 			 * are not discarded.
668 			 */
669 			vp = dobackp(mbuf, 1);
670 		} else
671 			/* Setup trivial vector similar to return of dobackp */
672 			Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv;
673 
674 		/*
675 		 * Resurrect the words from the command substitution
676 		 * each separated by a newline.  Note that the last
677 		 * newline of a command substitution will have been
678 		 * discarded, but we put a newline after the last word
679 		 * because this represents the newline after the last
680 		 * input line!
681 		 */
682 		for (; *vp; vp++) {
683 			for (mbp = *vp; *mbp; mbp++) {
684 				*obp++ = *mbp & TRIM;
685 				if (--ocnt == 0) {
686 					(void) write(0, obuf, BUFSIZ);
687 					obp = obuf; ocnt = BUFSIZ;
688 				}
689 			}
690 			*obp++ = '\n';
691 			if (--ocnt == 0) {
692 				(void) write(0, obuf, BUFSIZ);
693 				obp = obuf; ocnt = BUFSIZ;
694 			}
695 		}
696 		if (pargv)
697 			blkfree(pargv), pargv = 0;
698 	}
699 }
700