xref: /386bsd/usr/src/bin/sh/parser.c (revision a2142627)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Kenneth Almquist.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 static char sccsid[] = "@(#)parser.c	5.3 (Berkeley) 4/12/91";
39 #endif /* not lint */
40 
41 #include "shell.h"
42 #include "parser.h"
43 #include "nodes.h"
44 #include "expand.h"	/* defines rmescapes() */
45 #include "redir.h"	/* defines copyfd() */
46 #include "syntax.h"
47 #include "options.h"
48 #include "input.h"
49 #include "output.h"
50 #include "var.h"
51 #include "error.h"
52 #include "memalloc.h"
53 #include "mystring.h"
54 
55 
56 /*
57  * Shell command parser.
58  */
59 
60 #define EOFMARKLEN 79
61 
62 /* values returned by readtoken */
63 #include "token.def"
64 
65 
66 
67 struct heredoc {
68 	struct heredoc *next;	/* next here document in list */
69 	union node *here;		/* redirection node */
70 	char *eofmark;		/* string indicating end of input */
71 	int striptabs;		/* if set, strip leading tabs */
72 };
73 
74 
75 
76 struct heredoc *heredoclist;	/* list of here documents to read */
77 int parsebackquote;		/* nonzero if we are inside backquotes */
78 int doprompt;			/* if set, prompt the user */
79 int needprompt;			/* true if interactive and at start of line */
80 int lasttoken;			/* last token read */
81 MKINIT int tokpushback;		/* last token pushed back */
82 char *wordtext;			/* text of last word returned by readtoken */
83 int checkkwd;               /* 1 == check for kwds, 2 == also eat newlines */
84 struct nodelist *backquotelist;
85 union node *redirnode;
86 struct heredoc *heredoc;
87 int quoteflag;			/* set if (part of) last token was quoted */
88 int startlinno;			/* line # where last token started */
89 
90 
91 #define GDB_HACK 1 /* avoid local declarations which gdb can't handle */
92 #ifdef GDB_HACK
93 static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE, '@', '=', '\0'};
94 static const char types[] = "}-+?=";
95 #endif
96 
97 
98 STATIC union node *list __P((int));
99 STATIC union node *andor __P((void));
100 STATIC union node *pipeline __P((void));
101 STATIC union node *command __P((void));
102 STATIC union node *simplecmd __P((union node **, union node *));
103 STATIC void parsefname __P((void));
104 STATIC void parseheredoc __P((void));
105 STATIC int readtoken __P((void));
106 STATIC int readtoken1 __P((int, char const *, char *, int));
107 STATIC void attyline __P((void));
108 STATIC int noexpand __P((char *));
109 STATIC void synexpect __P((int));
110 STATIC void synerror __P((char *));
111 
112 #if ATTY
113 STATIC void putprompt __P((char *));
114 #else /* not ATTY */
115 #define putprompt(s)	out2str(s)
116 #endif
117 
118 
119 
120 
121 /*
122  * Read and parse a command.  Returns NEOF on end of file.  (NULL is a
123  * valid parse tree indicating a blank line.)
124  */
125 
126 union node *
parsecmd(interact)127 parsecmd(interact) {
128 	int t;
129 
130 	doprompt = interact;
131 	if (doprompt)
132 		putprompt(ps1val());
133 	needprompt = 0;
134 	if ((t = readtoken()) == TEOF)
135 		return NEOF;
136 	if (t == TNL)
137 		return NULL;
138 	tokpushback++;
139 	return list(1);
140 }
141 
142 
143 STATIC union node *
list(nlflag)144 list(nlflag) {
145 	union node *n1, *n2, *n3;
146 
147 	checkkwd = 2;
148 	if (nlflag == 0 && tokendlist[peektoken()])
149 		return NULL;
150 	n1 = andor();
151 	for (;;) {
152 		switch (readtoken()) {
153 		case TBACKGND:
154 			if (n1->type == NCMD || n1->type == NPIPE) {
155 				n1->ncmd.backgnd = 1;
156 			} else if (n1->type == NREDIR) {
157 				n1->type = NBACKGND;
158 			} else {
159 				n3 = (union node *)stalloc(sizeof (struct nredir));
160 				n3->type = NBACKGND;
161 				n3->nredir.n = n1;
162 				n3->nredir.redirect = NULL;
163 				n1 = n3;
164 			}
165 			goto tsemi;
166 		case TNL:
167 			tokpushback++;
168 			/* fall through */
169 tsemi:	    case TSEMI:
170 			if (readtoken() == TNL) {
171 				parseheredoc();
172 				if (nlflag)
173 					return n1;
174 			} else {
175 				tokpushback++;
176 			}
177 			checkkwd = 2;
178 			if (tokendlist[peektoken()])
179 				return n1;
180 			n2 = andor();
181 			n3 = (union node *)stalloc(sizeof (struct nbinary));
182 			n3->type = NSEMI;
183 			n3->nbinary.ch1 = n1;
184 			n3->nbinary.ch2 = n2;
185 			n1 = n3;
186 			break;
187 		case TEOF:
188 			if (heredoclist)
189 				parseheredoc();
190 			else
191 				pungetc();		/* push back EOF on input */
192 			return n1;
193 		default:
194 			if (nlflag)
195 				synexpect(-1);
196 			tokpushback++;
197 			return n1;
198 		}
199 	}
200 }
201 
202 
203 
204 STATIC union node *
andor()205 andor() {
206 	union node *n1, *n2, *n3;
207 	int t;
208 
209 	n1 = pipeline();
210 	for (;;) {
211 		if ((t = readtoken()) == TAND) {
212 			t = NAND;
213 		} else if (t == TOR) {
214 			t = NOR;
215 		} else {
216 			tokpushback++;
217 			return n1;
218 		}
219 		n2 = pipeline();
220 		n3 = (union node *)stalloc(sizeof (struct nbinary));
221 		n3->type = t;
222 		n3->nbinary.ch1 = n1;
223 		n3->nbinary.ch2 = n2;
224 		n1 = n3;
225 	}
226 }
227 
228 
229 
230 STATIC union node *
pipeline()231 pipeline() {
232 	union node *n1, *pipenode;
233 	struct nodelist *lp, *prev;
234 
235 	n1 = command();
236 	if (readtoken() == TPIPE) {
237 		pipenode = (union node *)stalloc(sizeof (struct npipe));
238 		pipenode->type = NPIPE;
239 		pipenode->npipe.backgnd = 0;
240 		lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
241 		pipenode->npipe.cmdlist = lp;
242 		lp->n = n1;
243 		do {
244 			prev = lp;
245 			lp = (struct nodelist *)stalloc(sizeof (struct nodelist));
246 			lp->n = command();
247 			prev->next = lp;
248 		} while (readtoken() == TPIPE);
249 		lp->next = NULL;
250 		n1 = pipenode;
251 	}
252 	tokpushback++;
253 	return n1;
254 }
255 
256 
257 
258 STATIC union node *
command()259 command() {
260 	union node *n1, *n2;
261 	union node *ap, **app;
262 	union node *cp, **cpp;
263 	union node *redir, **rpp;
264 	int t;
265 
266 	checkkwd = 2;
267 	redir = 0;
268 	rpp = &redir;
269 	/* Check for redirection which may precede command */
270 	while (readtoken() == TREDIR) {
271 		*rpp = n2 = redirnode;
272 		rpp = &n2->nfile.next;
273 		parsefname();
274 	}
275 	tokpushback++;
276 
277 	switch (readtoken()) {
278 	case TIF:
279 		n1 = (union node *)stalloc(sizeof (struct nif));
280 		n1->type = NIF;
281 		n1->nif.test = list(0);
282 		if (readtoken() != TTHEN)
283 			synexpect(TTHEN);
284 		n1->nif.ifpart = list(0);
285 		n2 = n1;
286 		while (readtoken() == TELIF) {
287 			n2->nif.elsepart = (union node *)stalloc(sizeof (struct nif));
288 			n2 = n2->nif.elsepart;
289 			n2->type = NIF;
290 			n2->nif.test = list(0);
291 			if (readtoken() != TTHEN)
292 				synexpect(TTHEN);
293 			n2->nif.ifpart = list(0);
294 		}
295 		if (lasttoken == TELSE)
296 			n2->nif.elsepart = list(0);
297 		else {
298 			n2->nif.elsepart = NULL;
299 			tokpushback++;
300 		}
301 		if (readtoken() != TFI)
302 			synexpect(TFI);
303 		checkkwd = 1;
304 		break;
305 	case TWHILE:
306 	case TUNTIL: {
307 		int got;
308 		n1 = (union node *)stalloc(sizeof (struct nbinary));
309 		n1->type = (lasttoken == TWHILE)? NWHILE : NUNTIL;
310 		n1->nbinary.ch1 = list(0);
311 		if ((got=readtoken()) != TDO) {
312 TRACE(("expecting DO got %s %s\n", tokname[got], got == TWORD ? wordtext : ""));
313 			synexpect(TDO);
314 		}
315 		n1->nbinary.ch2 = list(0);
316 		if (readtoken() != TDONE)
317 			synexpect(TDONE);
318 		checkkwd = 1;
319 		break;
320 	}
321 	case TFOR:
322 		if (readtoken() != TWORD || quoteflag || ! goodname(wordtext))
323 			synerror("Bad for loop variable");
324 		n1 = (union node *)stalloc(sizeof (struct nfor));
325 		n1->type = NFOR;
326 		n1->nfor.var = wordtext;
327 		if (readtoken() == TWORD && ! quoteflag && equal(wordtext, "in")) {
328 			app = ≈
329 			while (readtoken() == TWORD) {
330 				n2 = (union node *)stalloc(sizeof (struct narg));
331 				n2->type = NARG;
332 				n2->narg.text = wordtext;
333 				n2->narg.backquote = backquotelist;
334 				*app = n2;
335 				app = &n2->narg.next;
336 			}
337 			*app = NULL;
338 			n1->nfor.args = ap;
339 			/* A newline or semicolon is required here to end
340 			   the list.  */
341 			if (lasttoken != TNL && lasttoken != TSEMI)
342 				synexpect(-1);
343 		} else {
344 #ifndef GDB_HACK
345 			static const char argvars[5] = {CTLVAR, VSNORMAL|VSQUOTE,
346 								   '@', '=', '\0'};
347 #endif
348 			n2 = (union node *)stalloc(sizeof (struct narg));
349 			n2->type = NARG;
350 			n2->narg.text = (char *)argvars;
351 			n2->narg.backquote = NULL;
352 			n2->narg.next = NULL;
353 			n1->nfor.args = n2;
354 			/* A newline or semicolon is optional here. Anything
355 			   else gets pushed back so we can read it again.  */
356 			if (lasttoken != TNL && lasttoken != TSEMI)
357 				tokpushback++;
358 		}
359 		checkkwd = 2;
360 		if ((t = readtoken()) == TDO)
361 			t = TDONE;
362 		else if (t == TBEGIN)
363 			t = TEND;
364 		else
365 			synexpect(-1);
366 		n1->nfor.body = list(0);
367 		if (readtoken() != t)
368 			synexpect(t);
369 		checkkwd = 1;
370 		break;
371 	case TCASE:
372 		n1 = (union node *)stalloc(sizeof (struct ncase));
373 		n1->type = NCASE;
374 		if (readtoken() != TWORD)
375 			synexpect(TWORD);
376 		n1->ncase.expr = n2 = (union node *)stalloc(sizeof (struct narg));
377 		n2->type = NARG;
378 		n2->narg.text = wordtext;
379 		n2->narg.backquote = backquotelist;
380 		n2->narg.next = NULL;
381 		while (readtoken() == TNL);
382 		if (lasttoken != TWORD || ! equal(wordtext, "in"))
383 			synerror("expecting \"in\"");
384 		cpp = &n1->ncase.cases;
385 		while (checkkwd = 2, readtoken() == TWORD) {
386 			*cpp = cp = (union node *)stalloc(sizeof (struct nclist));
387 			cp->type = NCLIST;
388 			app = &cp->nclist.pattern;
389 			for (;;) {
390 				*app = ap = (union node *)stalloc(sizeof (struct narg));
391 				ap->type = NARG;
392 				ap->narg.text = wordtext;
393 				ap->narg.backquote = backquotelist;
394 				if (readtoken() != TPIPE)
395 					break;
396 				app = &ap->narg.next;
397 				if (readtoken() != TWORD)
398 					synexpect(TWORD);
399 			}
400 			ap->narg.next = NULL;
401 			if (lasttoken != TRP)
402 				synexpect(TRP);
403 			cp->nclist.body = list(0);
404 			if ((t = readtoken()) == TESAC)
405 				tokpushback++;
406 			else if (t != TENDCASE)
407 				synexpect(TENDCASE);
408 			cpp = &cp->nclist.next;
409 		}
410 		*cpp = NULL;
411 		if (lasttoken != TESAC)
412 			synexpect(TESAC);
413 		checkkwd = 1;
414 		break;
415 	case TLP:
416 		n1 = (union node *)stalloc(sizeof (struct nredir));
417 		n1->type = NSUBSHELL;
418 		n1->nredir.n = list(0);
419 		n1->nredir.redirect = NULL;
420 		if (readtoken() != TRP)
421 			synexpect(TRP);
422 		checkkwd = 1;
423 		break;
424 	case TBEGIN:
425 		n1 = list(0);
426 		if (readtoken() != TEND)
427 			synexpect(TEND);
428 		checkkwd = 1;
429 		break;
430 	/* Handle an empty command like other simple commands.  */
431 	case TNL:
432 	case TWORD:
433 		tokpushback++;
434 		return simplecmd(rpp, redir);
435 	default:
436 		synexpect(-1);
437 	}
438 
439 	/* Now check for redirection which may follow command */
440 	while (readtoken() == TREDIR) {
441 		*rpp = n2 = redirnode;
442 		rpp = &n2->nfile.next;
443 		parsefname();
444 	}
445 	tokpushback++;
446 	*rpp = NULL;
447 	if (redir) {
448 		if (n1->type != NSUBSHELL) {
449 			n2 = (union node *)stalloc(sizeof (struct nredir));
450 			n2->type = NREDIR;
451 			n2->nredir.n = n1;
452 			n1 = n2;
453 		}
454 		n1->nredir.redirect = redir;
455 	}
456 	return n1;
457 }
458 
459 
460 STATIC union node *
simplecmd(rpp,redir)461 simplecmd(rpp, redir)
462 	union node **rpp, *redir;
463 	{
464 	union node *args, **app;
465 	union node **orig_rpp = rpp;
466 	union node *n;
467 
468 	/* If we don't have any redirections already, then we must reset
469 	   rpp to be the address of the local redir variable.  */
470 	if (redir == 0)
471 		rpp = &redir;
472 
473 	args = NULL;
474 	app = &args;
475 	/* We save the incoming value, because we need this for shell
476 	   functions.  There can not be a redirect or an argument between
477 	   the function name and the open parenthesis.  */
478 	orig_rpp = rpp;
479 	for (;;) {
480 		if (readtoken() == TWORD) {
481 			n = (union node *)stalloc(sizeof (struct narg));
482 			n->type = NARG;
483 			n->narg.text = wordtext;
484 			n->narg.backquote = backquotelist;
485 			*app = n;
486 			app = &n->narg.next;
487 		} else if (lasttoken == TREDIR) {
488 			*rpp = n = redirnode;
489 			rpp = &n->nfile.next;
490 			parsefname();	/* read name of redirection file */
491 		} else if (lasttoken == TLP && app == &args->narg.next
492 					    && rpp == orig_rpp) {
493 			/* We have a function */
494 			if (readtoken() != TRP)
495 				synexpect(TRP);
496 #ifdef notdef
497 			if (! goodname(n->narg.text))
498 				synerror("Bad function name");
499 #endif
500 			n->type = NDEFUN;
501 			n->narg.next = command();
502 			return n;
503 		} else {
504 			tokpushback++;
505 			break;
506 		}
507 	}
508 	*app = NULL;
509 	*rpp = NULL;
510 	n = (union node *)stalloc(sizeof (struct ncmd));
511 	n->type = NCMD;
512 	n->ncmd.backgnd = 0;
513 	n->ncmd.args = args;
514 	n->ncmd.redirect = redir;
515 	return n;
516 }
517 
518 
519 STATIC void
parsefname()520 parsefname() {
521 	union node *n = redirnode;
522 
523 	if (readtoken() != TWORD)
524 		synexpect(-1);
525 	if (n->type == NHERE) {
526 		struct heredoc *here = heredoc;
527 		struct heredoc *p;
528 		int i;
529 
530 		if (quoteflag == 0)
531 			n->type = NXHERE;
532 		TRACE(("Here document %d\n", n->type));
533 		if (here->striptabs) {
534 			while (*wordtext == '\t')
535 				wordtext++;
536 		}
537 		if (! noexpand(wordtext) || (i = strlen(wordtext)) == 0 || i > EOFMARKLEN)
538 			synerror("Illegal eof marker for << redirection");
539 		rmescapes(wordtext);
540 		here->eofmark = wordtext;
541 		here->next = NULL;
542 		if (heredoclist == NULL)
543 			heredoclist = here;
544 		else {
545 			for (p = heredoclist ; p->next ; p = p->next);
546 			p->next = here;
547 		}
548 	} else if (n->type == NTOFD || n->type == NFROMFD) {
549 		if (is_digit(wordtext[0]))
550 			n->ndup.dupfd = digit_val(wordtext[0]);
551 		else if (wordtext[0] == '-')
552 			n->ndup.dupfd = -1;
553 		else
554 			goto bad;
555 		if (wordtext[1] != '\0') {
556 bad:
557 			synerror("Bad fd number");
558 		}
559 	} else {
560 		n->nfile.fname = (union node *)stalloc(sizeof (struct narg));
561 		n = n->nfile.fname;
562 		n->type = NARG;
563 		n->narg.next = NULL;
564 		n->narg.text = wordtext;
565 		n->narg.backquote = backquotelist;
566 	}
567 }
568 
569 
570 /*
571  * Input any here documents.
572  */
573 
574 STATIC void
parseheredoc()575 parseheredoc() {
576 	struct heredoc *here;
577 	union node *n;
578 
579 	while (heredoclist) {
580 		here = heredoclist;
581 		heredoclist = here->next;
582 		if (needprompt) {
583 			putprompt(ps2val());
584 			needprompt = 0;
585 		}
586 		readtoken1(pgetc(), here->here->type == NHERE? SQSYNTAX : DQSYNTAX,
587 				here->eofmark, here->striptabs);
588 		n = (union node *)stalloc(sizeof (struct narg));
589 		n->narg.type = NARG;
590 		n->narg.next = NULL;
591 		n->narg.text = wordtext;
592 		n->narg.backquote = backquotelist;
593 		here->here->nhere.doc = n;
594 	}
595 }
596 
597 STATIC int
peektoken()598 peektoken() {
599 	int t;
600 
601 	t = readtoken();
602 	tokpushback++;
603 	return (t);
604 }
605 
606 STATIC int xxreadtoken();
607 
608 STATIC int
readtoken()609 readtoken() {
610 	int t;
611 #ifdef DEBUG
612 	int alreadyseen = tokpushback;
613 #endif
614 
615 	t = xxreadtoken();
616 
617 	if (checkkwd) {
618 		/*
619 		 * eat newlines
620 		 */
621 		if (checkkwd == 2) {
622 			checkkwd = 0;
623 			while (t == TNL) {
624 				parseheredoc();
625 				t = xxreadtoken();
626 			}
627 		} else
628 			checkkwd = 0;
629 		/*
630 		 * check for keywords
631 		 */
632 		if (t == TWORD && !quoteflag) {
633 			register char **pp;
634 
635 			for (pp = (char **)parsekwd; *pp; pp++) {
636 				if (**pp == *wordtext && equal(*pp, wordtext)) {
637 					lasttoken = t = (pp - parsekwd) + KWDOFFSET;
638 					TRACE(("keyword %s recognized\n", tokname[t]));
639 					break;
640 				}
641 			}
642 		}
643 	}
644 #ifdef DEBUG
645 	if (!alreadyseen)
646 	    TRACE(("token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
647 	else
648 	    TRACE(("reread token %s %s\n", tokname[t], t == TWORD ? wordtext : ""));
649 #endif
650 	return (t);
651 }
652 
653 
654 /*
655  * Read the next input token.
656  * If the token is a word, we set backquotelist to the list of cmds in
657  *	backquotes.  We set quoteflag to true if any part of the word was
658  *	quoted.
659  * If the token is TREDIR, then we set redirnode to a structure containing
660  *	the redirection.
661  * In all cases, the variable startlinno is set to the number of the line
662  *	on which the token starts.
663  *
664  * [Change comment:  here documents and internal procedures]
665  * [Readtoken shouldn't have any arguments.  Perhaps we should make the
666  *  word parsing code into a separate routine.  In this case, readtoken
667  *  doesn't need to have any internal procedures, but parseword does.
668  *  We could also make parseoperator in essence the main routine, and
669  *  have parseword (readtoken1?) handle both words and redirection.]
670  */
671 
672 #define RETURN(token)	return lasttoken = token
673 
674 STATIC int
xxreadtoken()675 xxreadtoken() {
676 	register c;
677 
678 	if (tokpushback) {
679 		tokpushback = 0;
680 		return lasttoken;
681 	}
682 	if (needprompt) {
683 		putprompt(ps2val());
684 		needprompt = 0;
685 	}
686 	startlinno = plinno;
687 	for (;;) {	/* until token or start of word found */
688 		c = pgetc_macro();
689 		if (c == ' ' || c == '\t')
690 			continue;		/* quick check for white space first */
691 		switch (c) {
692 		case ' ': case '\t':
693 			continue;
694 		case '#':
695 			while ((c = pgetc()) != '\n' && c != PEOF);
696 			pungetc();
697 			continue;
698 		case '\\':
699 			if (pgetc() == '\n') {
700 				startlinno = ++plinno;
701 				if (doprompt)
702 					putprompt(ps2val());
703 				continue;
704 			}
705 			pungetc();
706 			goto breakloop;
707 		case '\n':
708 			plinno++;
709 			needprompt = doprompt;
710 			RETURN(TNL);
711 		case PEOF:
712 			RETURN(TEOF);
713 		case '&':
714 			if (pgetc() == '&')
715 				RETURN(TAND);
716 			pungetc();
717 			RETURN(TBACKGND);
718 		case '|':
719 			if (pgetc() == '|')
720 				RETURN(TOR);
721 			pungetc();
722 			RETURN(TPIPE);
723 		case ';':
724 			if (pgetc() == ';')
725 				RETURN(TENDCASE);
726 			pungetc();
727 			RETURN(TSEMI);
728 		case '(':
729 			RETURN(TLP);
730 		case ')':
731 			RETURN(TRP);
732 		default:
733 			goto breakloop;
734 		}
735 	}
736 breakloop:
737 	return readtoken1(c, BASESYNTAX, (char *)NULL, 0);
738 #undef RETURN
739 }
740 
741 
742 
743 /*
744  * If eofmark is NULL, read a word or a redirection symbol.  If eofmark
745  * is not NULL, read a here document.  In the latter case, eofmark is the
746  * word which marks the end of the document and striptabs is true if
747  * leading tabs should be stripped from the document.  The argument firstc
748  * is the first character of the input token or document.
749  *
750  * Because C does not have internal subroutines, I have simulated them
751  * using goto's to implement the subroutine linkage.  The following macros
752  * will run code that appears at the end of readtoken1.
753  */
754 
755 #define CHECKEND()	{goto checkend; checkend_return:;}
756 #define PARSEREDIR()	{goto parseredir; parseredir_return:;}
757 #define PARSESUB()	{goto parsesub; parsesub_return:;}
758 #define PARSEBACKQOLD()	{oldstyle = 1; goto parsebackq; parsebackq_oldreturn:;}
759 #define PARSEBACKQNEW()	{oldstyle = 0; goto parsebackq; parsebackq_newreturn:;}
760 
761 STATIC int
readtoken1(firstc,syntax,eofmark,striptabs)762 readtoken1(firstc, syntax, eofmark, striptabs)
763 	int firstc;
764 	char const *syntax;
765 	char *eofmark;
766 	int striptabs;
767 	{
768 	register c = firstc;
769 	register char *out;
770 	int len;
771 	char line[EOFMARKLEN + 1];
772 	struct nodelist *bqlist;
773 	int quotef;
774 	int dblquote;
775 	int varnest;
776 	int oldstyle;
777 
778 	startlinno = plinno;
779 	dblquote = 0;
780 	if (syntax == DQSYNTAX)
781 		dblquote = 1;
782 	quotef = 0;
783 	bqlist = NULL;
784 	varnest = 0;
785 	STARTSTACKSTR(out);
786 	loop: {	/* for each line, until end of word */
787 #if ATTY
788 		if (c == '\034' && doprompt
789 		 && attyset() && ! equal(termval(), "emacs")) {
790 			attyline();
791 			if (syntax == BASESYNTAX)
792 				return readtoken();
793 			c = pgetc();
794 			goto loop;
795 		}
796 #endif
797 		CHECKEND();	/* set c to PEOF if at end of here document */
798 		for (;;) {	/* until end of line or end of word */
799 			CHECKSTRSPACE(3, out);	/* permit 3 calls to USTPUTC */
800 			switch(syntax[c]) {
801 			case CNL:	/* '\n' */
802 				if (syntax == BASESYNTAX)
803 					goto endword;	/* exit outer loop */
804 				USTPUTC(c, out);
805 				plinno++;
806 				if (doprompt) {
807 					putprompt(ps2val());
808 				}
809 				c = pgetc();
810 				goto loop;		/* continue outer loop */
811 			case CWORD:
812 				USTPUTC(c, out);
813 				break;
814 			case CCTL:
815 				if (eofmark == NULL || dblquote)
816 					USTPUTC(CTLESC, out);
817 				USTPUTC(c, out);
818 				break;
819 			case CBACK:	/* backslash */
820 				c = pgetc();
821 				if (c == PEOF) {
822 					USTPUTC('\\', out);
823 					pungetc();
824 				} else if (c == '\n') {
825 					if (doprompt)
826 						putprompt(ps2val());
827 				} else {
828 					if (dblquote && c != '\\' && c != '`' && c != '$'
829 							 && (c != '"' || eofmark != NULL))
830 						USTPUTC('\\', out);
831 					if (SQSYNTAX[c] == CCTL)
832 						USTPUTC(CTLESC, out);
833 					USTPUTC(c, out);
834 					quotef++;
835 				}
836 				break;
837 			case CSQUOTE:
838 				syntax = SQSYNTAX;
839 				break;
840 			case CDQUOTE:
841 				syntax = DQSYNTAX;
842 				dblquote = 1;
843 				break;
844 			case CENDQUOTE:
845 				if (eofmark) {
846 					USTPUTC(c, out);
847 				} else {
848 					syntax = BASESYNTAX;
849 					quotef++;
850 					dblquote = 0;
851 				}
852 				break;
853 			case CVAR:	/* '$' */
854 				PARSESUB();		/* parse substitution */
855 				break;
856 			case CENDVAR:	/* '}' */
857 				if (varnest > 0) {
858 					varnest--;
859 					USTPUTC(CTLENDVAR, out);
860 				} else {
861 					USTPUTC(c, out);
862 				}
863 				break;
864 			case CBQUOTE:	/* '`' */
865 				PARSEBACKQOLD();
866 				break;
867 			case CEOF:
868 				goto endword;		/* exit outer loop */
869 			default:
870 				if (varnest == 0)
871 					goto endword;	/* exit outer loop */
872 				USTPUTC(c, out);
873 			}
874 			c = pgetc_macro();
875 		}
876 	}
877 endword:
878 	if (syntax != BASESYNTAX && ! parsebackquote && eofmark == NULL)
879 		synerror("Unterminated quoted string");
880 	if (varnest != 0) {
881 		startlinno = plinno;
882 		synerror("Missing '}'");
883 	}
884 	USTPUTC('\0', out);
885 	len = out - stackblock();
886 	out = stackblock();
887 	if (eofmark == NULL) {
888 		if ((c == '>' || c == '<')
889 		 && quotef == 0
890 		 && len <= 2
891 		 && (*out == '\0' || is_digit(*out))) {
892 			PARSEREDIR();
893 			return lasttoken = TREDIR;
894 		} else {
895 			pungetc();
896 		}
897 	}
898 	quoteflag = quotef;
899 	backquotelist = bqlist;
900 	grabstackblock(len);
901 	wordtext = out;
902 	return lasttoken = TWORD;
903 /* end of readtoken routine */
904 
905 
906 
907 /*
908  * Check to see whether we are at the end of the here document.  When this
909  * is called, c is set to the first character of the next input line.  If
910  * we are at the end of the here document, this routine sets the c to PEOF.
911  */
912 
913 checkend: {
914 	if (eofmark) {
915 		if (striptabs) {
916 			while (c == '\t')
917 				c = pgetc();
918 		}
919 		if (c == *eofmark) {
920 			if (pfgets(line, sizeof line) != NULL) {
921 				register char *p, *q;
922 
923 				p = line;
924 				for (q = eofmark + 1 ; *q && *p == *q ; p++, q++);
925 				if (*p == '\n' && *q == '\0') {
926 					c = PEOF;
927 					plinno++;
928 					needprompt = doprompt;
929 				} else {
930 					ppushback(line, strlen(line));
931 				}
932 			}
933 		}
934 	}
935 	goto checkend_return;
936 }
937 
938 
939 /*
940  * Parse a redirection operator.  The variable "out" points to a string
941  * specifying the fd to be redirected.  The variable "c" contains the
942  * first character of the redirection operator.
943  */
944 
945 parseredir: {
946 	char fd = *out;
947 	union node *np;
948 
949 	np = (union node *)stalloc(sizeof (struct nfile));
950 	if (c == '>') {
951 		np->nfile.fd = 1;
952 		c = pgetc();
953 		if (c == '>')
954 			np->type = NAPPEND;
955 		else if (c == '&')
956 			np->type = NTOFD;
957 		else {
958 			np->type = NTO;
959 			pungetc();
960 		}
961 	} else {	/* c == '<' */
962 		np->nfile.fd = 0;
963 		c = pgetc();
964 		if (c == '<') {
965 			if (sizeof (struct nfile) != sizeof (struct nhere)) {
966 				np = (union node *)stalloc(sizeof (struct nhere));
967 				np->nfile.fd = 0;
968 			}
969 			np->type = NHERE;
970 			heredoc = (struct heredoc *)stalloc(sizeof (struct heredoc));
971 			heredoc->here = np;
972 			if ((c = pgetc()) == '-') {
973 				heredoc->striptabs = 1;
974 			} else {
975 				heredoc->striptabs = 0;
976 				pungetc();
977 			}
978 		} else if (c == '&')
979 			np->type = NFROMFD;
980 		else {
981 			np->type = NFROM;
982 			pungetc();
983 		}
984 	}
985 	if (fd != '\0')
986 		np->nfile.fd = digit_val(fd);
987 	redirnode = np;
988 	goto parseredir_return;
989 }
990 
991 
992 /*
993  * Parse a substitution.  At this point, we have read the dollar sign
994  * and nothing else.
995  */
996 
997 parsesub: {
998 	int subtype;
999 	int typeloc;
1000 	int flags;
1001 	char *p;
1002 #ifndef GDB_HACK
1003 	static const char types[] = "}-+?=";
1004 #endif
1005 
1006 	c = pgetc();
1007 	if (c != '(' && c != '{' && !is_name(c) && !is_special(c)) {
1008 		USTPUTC('$', out);
1009 		pungetc();
1010 	} else if (c == '(') {	/* $(command) */
1011 		PARSEBACKQNEW();
1012 	} else {
1013 		USTPUTC(CTLVAR, out);
1014 		typeloc = out - stackblock();
1015 		USTPUTC(VSNORMAL, out);
1016 		subtype = VSNORMAL;
1017 		if (c == '{') {
1018 			c = pgetc();
1019 			subtype = 0;
1020 		}
1021 		if (is_name(c)) {
1022 			do {
1023 				STPUTC(c, out);
1024 				c = pgetc();
1025 			} while (is_in_name(c));
1026 		} else {
1027 			if (! is_special(c))
1028 badsub:				synerror("Bad substitution");
1029 			USTPUTC(c, out);
1030 			c = pgetc();
1031 		}
1032 		STPUTC('=', out);
1033 		flags = 0;
1034 		if (subtype == 0) {
1035 			if (c == ':') {
1036 				flags = VSNUL;
1037 				c = pgetc();
1038 			}
1039 			p = strchr(types, c);
1040 			if (p == NULL)
1041 				goto badsub;
1042 			subtype = p - types + VSNORMAL;
1043 		} else {
1044 			pungetc();
1045 		}
1046 		if (dblquote)
1047 			flags |= VSQUOTE;
1048 		*(stackblock() + typeloc) = subtype | flags;
1049 		if (subtype != VSNORMAL)
1050 			varnest++;
1051 	}
1052 	goto parsesub_return;
1053 }
1054 
1055 
1056 /*
1057  * Called to parse command substitutions.  Newstyle is set if the command
1058  * is enclosed inside $(...); nlpp is a pointer to the head of the linked
1059  * list of commands (passed by reference), and savelen is the number of
1060  * characters on the top of the stack which must be preserved.
1061  */
1062 
1063 parsebackq: {
1064 	struct nodelist **nlpp;
1065 	int savepbq;
1066 	union node *n;
1067 	char *volatile str;
1068 	struct jmploc jmploc;
1069 	struct jmploc *volatile savehandler;
1070 	int savelen;
1071 
1072 	savepbq = parsebackquote;
1073 	if (setjmp(jmploc.loc)) {
1074 		if (str)
1075 			ckfree(str);
1076 		parsebackquote = 0;
1077 		handler = savehandler;
1078 		longjmp(handler->loc, 1);
1079 	}
1080 	INTOFF;
1081 	str = NULL;
1082 	savelen = out - stackblock();
1083 	if (savelen > 0) {
1084 		str = ckmalloc(savelen);
1085 		bcopy(stackblock(), str, savelen);
1086 	}
1087 	savehandler = handler;
1088 	handler = &jmploc;
1089 	INTON;
1090 	if (oldstyle) {
1091 		/* We must read until the closing backquote, giving special
1092 		   treatment to some slashes, and then push the string and
1093 		   reread it as input, interpreting it normally.  */
1094 		register char *out;
1095 		register c;
1096 		int savelen;
1097 		char *str;
1098 
1099 		STARTSTACKSTR(out);
1100 		while ((c = pgetc ()) != '`') {
1101 			if (c == '\\') {
1102 				c = pgetc ();
1103 				if (c != '\\' && c != '`' && c != '$'
1104 				    && (!dblquote || c != '"'))
1105 					STPUTC('\\', out);
1106 			}
1107 			STPUTC(c, out);
1108 		}
1109 		STPUTC('\0', out);
1110 		savelen = out - stackblock();
1111 		if (savelen > 0) {
1112 			str = ckmalloc(savelen);
1113 			bcopy(stackblock(), str, savelen);
1114 		}
1115 		setinputstring(str, 1);
1116 	}
1117 	nlpp = &bqlist;
1118 	while (*nlpp)
1119 		nlpp = &(*nlpp)->next;
1120 	*nlpp = (struct nodelist *)stalloc(sizeof (struct nodelist));
1121 	(*nlpp)->next = NULL;
1122 	parsebackquote = oldstyle;
1123 	n = list(0);
1124 	if (!oldstyle && (readtoken() != TRP))
1125 		synexpect(TRP);
1126 	(*nlpp)->n = n;
1127 	/* Start reading from old file again.  */
1128 	if (oldstyle)
1129 		popfile();
1130 	while (stackblocksize() <= savelen)
1131 		growstackblock();
1132 	STARTSTACKSTR(out);
1133 	if (str) {
1134 		bcopy(str, out, savelen);
1135 		STADJUST(savelen, out);
1136 		INTOFF;
1137 		ckfree(str);
1138 		str = NULL;
1139 		INTON;
1140 	}
1141 	parsebackquote = savepbq;
1142 	handler = savehandler;
1143 	USTPUTC(CTLBACKQ + dblquote, out);
1144 	if (oldstyle)
1145 		goto parsebackq_oldreturn;
1146 	else
1147 		goto parsebackq_newreturn;
1148 }
1149 
1150 } /* end of readtoken */
1151 
1152 
1153 
1154 #ifdef mkinit
1155 RESET {
1156 	tokpushback = 0;
1157 }
1158 #endif
1159 
1160 
1161 #if ATTY
1162 /*
1163  * Called to process a command generated by atty.  We execute the line,
1164  * and catch any errors that occur so they don't propagate outside of
1165  * this routine.
1166  */
1167 
1168 STATIC void
attyline()1169 attyline() {
1170 	char line[256];
1171 	struct stackmark smark;
1172 	struct jmploc jmploc;
1173 	struct jmploc *volatile savehandler;
1174 
1175 	if (pfgets(line, sizeof line) == NULL)
1176 		return;				/* "can't happen" */
1177 	if (setjmp(jmploc.loc)) {
1178 		if (exception == EXERROR)
1179 			out2str("\033]D\n");
1180 		handler = savehandler;
1181 		longjmp(handler, 1);
1182 	}
1183 	savehandler = handler;
1184 	handler = &jmploc;
1185 	setstackmark(&smark);
1186 	evalstring(line);
1187 	popstackmark(&smark);
1188 	handler = savehandler;
1189 	doprompt = 1;
1190 }
1191 
1192 
1193 /*
1194  * Output a prompt for atty.  We output the prompt as part of the
1195  * appropriate escape sequence.
1196  */
1197 
1198 STATIC void
putprompt(s)1199 putprompt(s)
1200 	char *s;
1201 	{
1202 	register char *p;
1203 
1204 	if (attyset() && ! equal(termval(), "emacs")) {
1205 		if (strchr(s, '\7'))
1206 			out2c('\7');
1207 		out2str("\033]P1;");
1208 		for (p = s ; *p ; p++) {
1209 			if ((unsigned)(*p - ' ') <= '~' - ' ')
1210 				out2c(*p);
1211 		}
1212 		out2c('\n');
1213 	} else {
1214 		out2str(s);
1215 	}
1216 }
1217 #endif
1218 
1219 
1220 
1221 /*
1222  * Returns true if the text contains nothing to expand (no dollar signs
1223  * or backquotes).
1224  */
1225 
1226 STATIC int
noexpand(text)1227 noexpand(text)
1228 	char *text;
1229 	{
1230 	register char *p;
1231 	register char c;
1232 
1233 	p = text;
1234 	while ((c = *p++) != '\0') {
1235 		if (c == CTLESC)
1236 			p++;
1237 		else if (BASESYNTAX[c] == CCTL)
1238 			return 0;
1239 	}
1240 	return 1;
1241 }
1242 
1243 
1244 /*
1245  * Return true if the argument is a legal variable name (a letter or
1246  * underscore followed by zero or more letters, underscores, and digits).
1247  */
1248 
1249 int
goodname(name)1250 goodname(name)
1251 	char *name;
1252 	{
1253 	register char *p;
1254 
1255 	p = name;
1256 	if (! is_name(*p))
1257 		return 0;
1258 	while (*++p) {
1259 		if (! is_in_name(*p))
1260 			return 0;
1261 	}
1262 	return 1;
1263 }
1264 
1265 
1266 /*
1267  * Called when an unexpected token is read during the parse.  The argument
1268  * is the token that is expected, or -1 if more than one type of token can
1269  * occur at this point.
1270  */
1271 
1272 STATIC void
synexpect(token)1273 synexpect(token) {
1274 	char msg[64];
1275 
1276 	if (token >= 0) {
1277 		fmtstr(msg, 64, "%s unexpected (expecting %s)",
1278 			tokname[lasttoken], tokname[token]);
1279 	} else {
1280 		fmtstr(msg, 64, "%s unexpected", tokname[lasttoken]);
1281 	}
1282 	synerror(msg);
1283 }
1284 
1285 
1286 STATIC void
synerror(msg)1287 synerror(msg)
1288 	char *msg;
1289 	{
1290 	if (commandname)
1291 		outfmt(&errout, "%s: %d: ", commandname, startlinno);
1292 	outfmt(&errout, "Syntax error: %s\n", msg);
1293 	error((char *)NULL);
1294 }
1295