xref: /original-bsd/bin/csh/dol.c (revision 648cab2a)
1 /*
2  * Copyright (c) 1980 Regents of the University of California.
3  * All rights reserved.  The Berkeley Software License Agreement
4  * specifies the terms and conditions for redistribution.
5  */
6 
7 #ifndef lint
8 static char *sccsid = "@(#)dol.c	5.5 (Berkeley) 01/15/88";
9 #endif
10 
11 #include "sh.h"
12 
13 /*
14  * C shell
15  */
16 
17 /*
18  * These routines perform variable substitution and quoting via ' and ".
19  * To this point these constructs have been preserved in the divided
20  * input words.  Here we expand variables and turn quoting via ' and " into
21  * QUOTE bits on characters (which prevent further interpretation).
22  * If the `:q' modifier was applied during history expansion, then
23  * some QUOTEing may have occurred already, so we dont "trim()" here.
24  */
25 
26 int	Dpeekc, Dpeekrd;		/* Peeks for DgetC and Dreadc */
27 char	*Dcp, **Dvp;			/* Input vector for Dreadc */
28 
29 #define	DEOF	-1
30 
31 #define	unDgetC(c)	Dpeekc = c
32 
33 #define QUOTES		(_Q|_Q1|_ESC)	/* \ ' " ` */
34 
35 /*
36  * The following variables give the information about the current
37  * $ expansion, recording the current word position, the remaining
38  * words within this expansion, the count of remaining words, and the
39  * information about any : modifier which is being applied.
40  */
41 char	*dolp;			/* Remaining chars from this word */
42 char	**dolnxt;		/* Further words */
43 int	dolcnt;			/* Count of further words */
44 char	dolmod;			/* : modifier character */
45 int	dolmcnt;		/* :gx -> 10000, else 1 */
46 
47 /*
48  * Fix up the $ expansions and quotations in the
49  * argument list to command t.
50  */
51 Dfix(t)
52 	register struct command *t;
53 {
54 	register char **pp;
55 	register char *p;
56 
57 	if (noexec)
58 		return;
59 	/* Note that t_dcom isn't trimmed thus !...:q's aren't lost */
60 	for (pp = t->t_dcom; p = *pp++;)
61 		while (*p)
62 			if (cmap(*p++, _DOL|QUOTES)) {	/* $, \, ', ", ` */
63 				Dfix2(t->t_dcom);	/* found one */
64 				blkfree(t->t_dcom);
65 				t->t_dcom = gargv;
66 				gargv = 0;
67 				return;
68 			}
69 }
70 
71 /*
72  * $ substitute one word, for i/o redirection
73  */
74 char *
75 Dfix1(cp)
76 	register char *cp;
77 {
78 	char *Dv[2];
79 
80 	if (noexec)
81 		return (0);
82 	Dv[0] = cp; Dv[1] = NOSTR;
83 	Dfix2(Dv);
84 	if (gargc != 1) {
85 		setname(cp);
86 		bferr("Ambiguous");
87 	}
88 	cp = savestr(gargv[0]);
89 	blkfree(gargv), gargv = 0;
90 	return (cp);
91 }
92 
93 /*
94  * Subroutine to do actual fixing after state initialization.
95  */
96 Dfix2(v)
97 	char **v;
98 {
99 	char *agargv[GAVSIZ];
100 
101 	ginit(agargv);			/* Initialize glob's area pointers */
102 	Dvp = v; Dcp = "";		/* Setup input vector for Dreadc */
103 	unDgetC(0); unDredc(0);		/* Clear out any old peeks (at error) */
104 	dolp = 0; dolcnt = 0;		/* Clear out residual $ expands (...) */
105 	while (Dword())
106 		continue;
107 	gargv = copyblk(gargv);
108 }
109 
110 /*
111  * Get a word.  This routine is analogous to the routine
112  * word() in sh.lex.c for the main lexical input.  One difference
113  * here is that we don't get a newline to terminate our expansion.
114  * Rather, DgetC will return a DEOF when we hit the end-of-input.
115  */
116 Dword()
117 {
118 	register int c, c1;
119 	char wbuf[BUFSIZ];
120 	register char *wp = wbuf;
121 	register int i = BUFSIZ - 4;
122 	register bool dolflg;
123 	bool sofar = 0;
124 
125 loop:
126 	c = DgetC(DODOL);
127 	switch (c) {
128 
129 	case DEOF:
130 deof:
131 		if (sofar == 0)
132 			return (0);
133 		/* finish this word and catch the code above the next time */
134 		unDredc(c);
135 		/* fall into ... */
136 
137 	case '\n':
138 		*wp = 0;
139 		goto ret;
140 
141 	case ' ':
142 	case '\t':
143 		goto loop;
144 
145 	case '`':
146 		/* We preserve ` quotations which are done yet later */
147 		*wp++ = c, --i;
148 	case '\'':
149 	case '"':
150 		/*
151 		 * Note that DgetC never returns a QUOTES character
152 		 * from an expansion, so only true input quotes will
153 		 * get us here or out.
154 		 */
155 		c1 = c;
156 		dolflg = c1 == '"' ? DODOL : 0;
157 		for (;;) {
158 			c = DgetC(dolflg);
159 			if (c == c1)
160 				break;
161 			if (c == '\n' || c == DEOF)
162 				error("Unmatched %c", c1);
163 			if ((c & (QUOTE|TRIM)) == ('\n' | QUOTE))
164 				--wp, ++i;
165 			if (--i <= 0)
166 				goto toochars;
167 			switch (c1) {
168 
169 			case '"':
170 				/*
171 				 * Leave any `s alone for later.
172 				 * Other chars are all quoted, thus `...`
173 				 * can tell it was within "...".
174 				 */
175 				*wp++ = c == '`' ? '`' : c | QUOTE;
176 				break;
177 
178 			case '\'':
179 				/* Prevent all further interpretation */
180 				*wp++ = c | QUOTE;
181 				break;
182 
183 			case '`':
184 				/* Leave all text alone for later */
185 				*wp++ = c;
186 				break;
187 			}
188 		}
189 		if (c1 == '`')
190 			*wp++ = '`', --i;
191 		goto pack;		/* continue the word */
192 
193 	case '\\':
194 		c = DgetC(0);		/* No $ subst! */
195 		if (c == '\n' || c == DEOF)
196 			goto loop;
197 		c |= QUOTE;
198 		break;
199 	}
200 	unDgetC(c);
201 pack:
202 	sofar = 1;
203 	/* pack up more characters in this word */
204 	for (;;) {
205 		c = DgetC(DODOL);
206 		if (c == '\\') {
207 			c = DgetC(0);
208 			if (c == DEOF)
209 				goto deof;
210 			if (c == '\n')
211 				c = ' ';
212 			else
213 				c |= QUOTE;
214 		}
215 		if (c == DEOF)
216 			goto deof;
217 		if (cmap(c, _SP|_NL|_Q|_Q1)) {		/* sp \t\n'"` */
218 			unDgetC(c);
219 			if (cmap(c, QUOTES))
220 				goto loop;
221 			*wp++ = 0;
222 			goto ret;
223 		}
224 		if (--i <= 0)
225 toochars:
226 			error("Word too long");
227 		*wp++ = c;
228 	}
229 ret:
230 	Gcat("", wbuf);
231 	return (1);
232 }
233 
234 /*
235  * Get a character, performing $ substitution unless flag is 0.
236  * Any QUOTES character which is returned from a $ expansion is
237  * QUOTEd so that it will not be recognized above.
238  */
239 DgetC(flag)
240 	register int flag;
241 {
242 	register int c;
243 
244 top:
245 	if (c = Dpeekc) {
246 		Dpeekc = 0;
247 		return (c);
248 	}
249 	if (lap) {
250 		c = *lap++ & (QUOTE|TRIM);
251 		if (c == 0) {
252 			lap = 0;
253 			goto top;
254 		}
255 quotspec:
256 		if (cmap(c, QUOTES))
257 			return (c | QUOTE);
258 		return (c);
259 	}
260 	if (dolp) {
261 		if (c = *dolp++ & (QUOTE|TRIM))
262 			goto quotspec;
263 		if (dolcnt > 0) {
264 			setDolp(*dolnxt++);
265 			--dolcnt;
266 			return (' ');
267 		}
268 		dolp = 0;
269 	}
270 	if (dolcnt > 0) {
271 		setDolp(*dolnxt++);
272 		--dolcnt;
273 		goto top;
274 	}
275 	c = Dredc();
276 	if (c == '$' && flag) {
277 		Dgetdol();
278 		goto top;
279 	}
280 	return (c);
281 }
282 
283 char	*nulvec[] = { 0 };
284 struct	varent nulargv = { nulvec, "argv", 0 };
285 
286 /*
287  * Handle the multitudinous $ expansion forms.
288  * Ugh.
289  */
290 Dgetdol()
291 {
292 	register char *np;
293 	register struct varent *vp;
294 	char name[20];
295 	int c, sc;
296 	int subscr = 0, lwb = 1, upb = 0;
297 	bool dimen = 0, bitset = 0;
298 	char wbuf[BUFSIZ];
299 
300 	dolmod = dolmcnt = 0;
301 	c = sc = DgetC(0);
302 	if (c == '{')
303 		c = DgetC(0);		/* sc is { to take } later */
304 	if ((c & TRIM) == '#')
305 		dimen++, c = DgetC(0);		/* $# takes dimension */
306 	else if (c == '?')
307 		bitset++, c = DgetC(0);		/* $? tests existence */
308 	switch (c) {
309 
310 	case '$':
311 		if (dimen || bitset)
312 			goto syntax;		/* No $?$, $#$ */
313 		setDolp(doldol);
314 		goto eatbrac;
315 
316 	case '<'|QUOTE:
317 		if (dimen || bitset)
318 			goto syntax;		/* No $?<, $#< */
319 		for (np = wbuf; read(OLDSTD, np, 1) == 1; np++) {
320 			if (np >= &wbuf[BUFSIZ-1])
321 				error("$< line too long");
322 			if (*np <= 0 || *np == '\n')
323 				break;
324 		}
325 		*np = 0;
326 		/*
327 		 * KLUDGE: dolmod is set here because it will
328 		 * cause setDolp to call domod and thus to copy wbuf.
329 		 * Otherwise setDolp would use it directly. If we saved
330 		 * it ourselves, no one would know when to free it.
331 		 * The actual function of the 'q' causes filename
332 		 * expansion not to be done on the interpolated value.
333 		 */
334 		dolmod = 'q';
335 		dolmcnt = 10000;
336 		setDolp(wbuf);
337 		goto eatbrac;
338 
339 	case DEOF:
340 	case '\n':
341 		goto syntax;
342 
343 	case '*':
344 		(void) strcpy(name, "argv");
345 		vp = adrof("argv");
346 		subscr = -1;			/* Prevent eating [...] */
347 		break;
348 
349 	default:
350 		np = name;
351 		if (digit(c)) {
352 			if (dimen)
353 				goto syntax;	/* No $#1, e.g. */
354 			subscr = 0;
355 			do {
356 				subscr = subscr * 10 + c - '0';
357 				c = DgetC(0);
358 			} while (digit(c));
359 			unDredc(c);
360 			if (subscr < 0)
361 				goto oob;
362 			if (subscr == 0) {
363 				if (bitset) {
364 					dolp = file ? "1" : "0";
365 					goto eatbrac;
366 				}
367 				if (file == 0)
368 					error("No file for $0");
369 				setDolp(file);
370 				goto eatbrac;
371 			}
372 			if (bitset)
373 				goto syntax;
374 			vp = adrof("argv");
375 			if (vp == 0) {
376 				vp = &nulargv;
377 				goto eatmod;
378 			}
379 			break;
380 		}
381 		if (!alnum(c))
382 			goto syntax;
383 		for (;;) {
384 			*np++ = c;
385 			c = DgetC(0);
386 			if (!alnum(c))
387 				break;
388 			if (np >= &name[sizeof name - 2])
389 syntax:
390 				error("Variable syntax");
391 		}
392 		*np++ = 0;
393 		unDredc(c);
394 		vp = adrof(name);
395 	}
396 	if (bitset) {
397 		dolp = (vp || getenv(name)) ? "1" : "0";
398 		goto eatbrac;
399 	}
400 	if (vp == 0) {
401 		np = getenv(name);
402 		if (np) {
403 			addla(np);
404 			goto eatbrac;
405 		}
406 		udvar(name);
407 		/*NOTREACHED*/
408 	}
409 	c = DgetC(0);
410 	upb = blklen(vp->vec);
411 	if (dimen == 0 && subscr == 0 && c == '[') {
412 		np = name;
413 		for (;;) {
414 			c = DgetC(DODOL);	/* Allow $ expand within [ ] */
415 			if (c == ']')
416 				break;
417 			if (c == '\n' || c == DEOF)
418 				goto syntax;
419 			if (np >= &name[sizeof name - 2])
420 				goto syntax;
421 			*np++ = c;
422 		}
423 		*np = 0, np = name;
424 		if (dolp || dolcnt)		/* $ exp must end before ] */
425 			goto syntax;
426 		if (!*np)
427 			goto syntax;
428 		if (digit(*np)) {
429 			register int i = 0;
430 
431 			while (digit(*np))
432 				i = i * 10 + *np++ - '0';
433 			if ((i < 0 || i > upb) && !any(*np, "-*")) {
434 oob:
435 				setname(vp->v_name);
436 				error("Subscript out of range");
437 			}
438 			lwb = i;
439 			if (!*np)
440 				upb = lwb, np = "*";
441 		}
442 		if (*np == '*')
443 			np++;
444 		else if (*np != '-')
445 			goto syntax;
446 		else {
447 			register int i = upb;
448 
449 			np++;
450 			if (digit(*np)) {
451 				i = 0;
452 				while (digit(*np))
453 					i = i * 10 + *np++ - '0';
454 				if (i < 0 || i > upb)
455 					goto oob;
456 			}
457 			if (i < lwb)
458 				upb = lwb - 1;
459 			else
460 				upb = i;
461 		}
462 		if (lwb == 0) {
463 			if (upb != 0)
464 				goto oob;
465 			upb = -1;
466 		}
467 		if (*np)
468 			goto syntax;
469 	} else {
470 		if (subscr > 0)
471 			if (subscr > upb)
472 				lwb = 1, upb = 0;
473 			else
474 				lwb = upb = subscr;
475 		unDredc(c);
476 	}
477 	if (dimen) {
478 		char *cp = putn(upb - lwb + 1);
479 
480 		addla(cp);
481 		xfree(cp);
482 	} else {
483 eatmod:
484 		c = DgetC(0);
485 		if (c == ':') {
486 			c = DgetC(0), dolmcnt = 1;
487 			if (c == 'g')
488 				c = DgetC(0), dolmcnt = 10000;
489 			if (!any(c, "htrqxe"))
490 				error("Bad : mod in $");
491 			dolmod = c;
492 			if (c == 'q')
493 				dolmcnt = 10000;
494 		} else
495 			unDredc(c);
496 		dolnxt = &vp->vec[lwb - 1];
497 		dolcnt = upb - lwb + 1;
498 	}
499 eatbrac:
500 	if (sc == '{') {
501 		c = Dredc();
502 		if (c != '}')
503 			goto syntax;
504 	}
505 }
506 
507 setDolp(cp)
508 	register char *cp;
509 {
510 	register char *dp;
511 
512 	if (dolmod == 0 || dolmcnt == 0) {
513 		dolp = cp;
514 		return;
515 	}
516 	dp = domod(cp, dolmod);
517 	if (dp) {
518 		dolmcnt--;
519 		addla(dp);
520 		xfree(dp);
521 	} else
522 		addla(cp);
523 	dolp = "";
524 	if (err)
525 		error(err);
526 }
527 
528 unDredc(c)
529 	int c;
530 {
531 
532 	Dpeekrd = c;
533 }
534 
535 Dredc()
536 {
537 	register int c;
538 
539 	if (c = Dpeekrd) {
540 		Dpeekrd = 0;
541 		return (c);
542 	}
543 	if (Dcp && (c = *Dcp++))
544 		return (c&(QUOTE|TRIM));
545 	if (*Dvp == 0) {
546 		Dcp = 0;
547 		return (DEOF);
548 	}
549 	Dcp = *Dvp++;
550 	return (' ');
551 }
552 
553 Dtestq(c)
554 	register int c;
555 {
556 
557 	if (cmap(c, QUOTES))
558 		gflag = 1;
559 }
560 
561 /*
562  * Form a shell temporary file (in unit 0) from the words
563  * of the shell input up to EOF or a line the same as "term".
564  * Unit 0 should have been closed before this call.
565  */
566 heredoc(term)
567 	char *term;
568 {
569 	register int c;
570 	char *Dv[2];
571 	char obuf[BUFSIZ], lbuf[BUFSIZ], mbuf[BUFSIZ];
572 	int ocnt, lcnt, mcnt;
573 	register char *lbp, *obp, *mbp;
574 	char **vp;
575 	bool quoted;
576 
577 	if (creat(shtemp, 0600) < 0)
578 		Perror(shtemp);
579 	(void) close(0);
580 	if (open(shtemp, 2) < 0) {
581 		int oerrno = errno;
582 
583 		(void) unlink(shtemp);
584 		errno = oerrno;
585 		Perror(shtemp);
586 	}
587 	(void) unlink(shtemp);			/* 0 0 inode! */
588 	Dv[0] = term; Dv[1] = NOSTR; gflag = 0;
589 	trim(Dv); rscan(Dv, Dtestq); quoted = gflag;
590 	ocnt = BUFSIZ; obp = obuf;
591 	for (;;) {
592 		/*
593 		 * Read up a line
594 		 */
595 		lbp = lbuf; lcnt = BUFSIZ - 4;
596 		for (;;) {
597 			c = readc(1);		/* 1 -> Want EOF returns */
598 			if (c < 0 || c == '\n')
599 				break;
600 			if (c &= TRIM) {
601 				*lbp++ = c;
602 				if (--lcnt < 0) {
603 					setname("<<");
604 					error("Line overflow");
605 				}
606 			}
607 		}
608 		*lbp = 0;
609 
610 		/*
611 		 * Check for EOF or compare to terminator -- before expansion
612 		 */
613 		if (c < 0 || eq(lbuf, term)) {
614 			(void) write(0, obuf, BUFSIZ - ocnt);
615 			(void) lseek(0, (off_t)0, 0);
616 			return;
617 		}
618 
619 		/*
620 		 * If term was quoted or -n just pass it on
621 		 */
622 		if (quoted || noexec) {
623 			*lbp++ = '\n'; *lbp = 0;
624 			for (lbp = lbuf; c = *lbp++;) {
625 				*obp++ = c;
626 				if (--ocnt == 0) {
627 					(void) write(0, obuf, BUFSIZ);
628 					obp = obuf; ocnt = BUFSIZ;
629 				}
630 			}
631 			continue;
632 		}
633 
634 		/*
635 		 * Term wasn't quoted so variable and then command
636 		 * expand the input line
637 		 */
638 		Dcp = lbuf; Dvp = Dv + 1; mbp = mbuf; mcnt = BUFSIZ - 4;
639 		for (;;) {
640 			c = DgetC(DODOL);
641 			if (c == DEOF)
642 				break;
643 			if ((c &= TRIM) == 0)
644 				continue;
645 			/* \ quotes \ $ ` here */
646 			if (c =='\\') {
647 				c = DgetC(0);
648 				if (!any(c, "$\\`"))
649 					unDgetC(c | QUOTE), c = '\\';
650 				else
651 					c |= QUOTE;
652 			}
653 			*mbp++ = c;
654 			if (--mcnt == 0) {
655 				setname("<<");
656 				bferr("Line overflow");
657 			}
658 		}
659 		*mbp++ = 0;
660 
661 		/*
662 		 * If any ` in line do command substitution
663 		 */
664 		mbp = mbuf;
665 		if (any('`', mbp)) {
666 			/*
667 			 * 1 arg to dobackp causes substitution to be literal.
668 			 * Words are broken only at newlines so that all blanks
669 			 * and tabs are preserved.  Blank lines (null words)
670 			 * are not discarded.
671 			 */
672 			vp = dobackp(mbuf, 1);
673 		} else
674 			/* Setup trivial vector similar to return of dobackp */
675 			Dv[0] = mbp, Dv[1] = NOSTR, vp = Dv;
676 
677 		/*
678 		 * Resurrect the words from the command substitution
679 		 * each separated by a newline.  Note that the last
680 		 * newline of a command substitution will have been
681 		 * discarded, but we put a newline after the last word
682 		 * because this represents the newline after the last
683 		 * input line!
684 		 */
685 		for (; *vp; vp++) {
686 			for (mbp = *vp; *mbp; mbp++) {
687 				*obp++ = *mbp & TRIM;
688 				if (--ocnt == 0) {
689 					(void) write(0, obuf, BUFSIZ);
690 					obp = obuf; ocnt = BUFSIZ;
691 				}
692 			}
693 			*obp++ = '\n';
694 			if (--ocnt == 0) {
695 				(void) write(0, obuf, BUFSIZ);
696 				obp = obuf; ocnt = BUFSIZ;
697 			}
698 		}
699 		if (pargv)
700 			blkfree(pargv), pargv = 0;
701 	}
702 }
703