1 /*
2  * lex.c - lexical analysis
3  *
4  * This file is part of zsh, the Z shell.
5  *
6  * Copyright (c) 1992-1997 Paul Falstad
7  * All rights reserved.
8  *
9  * Permission is hereby granted, without written agreement and without
10  * license or royalty fees, to use, copy, modify, and distribute this
11  * software and to distribute modified versions of this software for any
12  * purpose, provided that the above copyright notice and the following
13  * two paragraphs appear in all copies of this software.
14  *
15  * In no event shall Paul Falstad or the Zsh Development Group be liable
16  * to any party for direct, indirect, special, incidental, or consequential
17  * damages arising out of the use of this software and its documentation,
18  * even if Paul Falstad and the Zsh Development Group have been advised of
19  * the possibility of such damage.
20  *
21  * Paul Falstad and the Zsh Development Group specifically disclaim any
22  * warranties, including, but not limited to, the implied warranties of
23  * merchantability and fitness for a particular purpose.  The software
24  * provided hereunder is on an "as is" basis, and Paul Falstad and the
25  * Zsh Development Group have no obligation to provide maintenance,
26  * support, updates, enhancements, or modifications.
27  *
28  */
29 
30 #include "zsh.mdh"
31 #include "lex.pro"
32 
33 #define LEX_HEAP_SIZE (32)
34 
35 /* tokens */
36 
37 /**/
38 mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\";
39 
40 /* parts of the current token */
41 
42 /**/
43 char *zshlextext;
44 /**/
45 mod_export char *tokstr;
46 /**/
47 mod_export enum lextok tok;
48 /**/
49 mod_export int tokfd;
50 
51 /*
52  * Line number at which the first character of a token was found.
53  * We always set this in gettok(), which is always called from
54  * zshlex() unless we have reached an error.  So it is always
55  * valid when parsing.  It is not useful during execution
56  * of the parsed structure.
57  */
58 
59 /**/
60 zlong toklineno;
61 
62 /* lexical analyzer error flag */
63 
64 /**/
65 mod_export int lexstop;
66 
67 /* if != 0, this is the first line of the command */
68 
69 /**/
70 mod_export int isfirstln;
71 
72 /* if != 0, this is the first char of the command (not including white space) */
73 
74 /**/
75 int isfirstch;
76 
77 /* flag that an alias should be expanded after expansion ending in space */
78 
79 /**/
80 int inalmore;
81 
82 /*
83  * Don't do spelling correction.
84  * Bit 1 is only valid for the current word.  It's
85  * set when we detect a lookahead that stops the word from
86  * needing correction.
87  */
88 
89 /**/
90 int nocorrect;
91 
92 /*
93  * TBD: the following exported variables are part of the non-interface
94  * with ZLE for completion.  They are poorly named and the whole
95  * scheme is incredibly brittle.  One piece of robustness is applied:
96  * the variables are only set if LEXFLAGS_ZLE is set.  Improvements
97  * should therefore concentrate on areas with this flag set.
98  *
99  * Cursor position and line length in zle when the line is
100  * metafied for access from the main shell.
101  */
102 
103 /**/
104 mod_export int zlemetacs, zlemetall;
105 
106 /* inwhat says what exactly we are in     *
107  * (its value is one of the IN_* things). */
108 
109 /**/
110 mod_export int inwhat;
111 
112 /* 1 if x added to complete in a blank between words */
113 
114 /**/
115 mod_export int addedx;
116 
117 /* wb and we hold the beginning/end position of the word we are completing. */
118 
119 /**/
120 mod_export int wb, we;
121 
122 /**/
123 mod_export int wordbeg;
124 
125 /**/
126 mod_export int parbegin;
127 
128 /**/
129 mod_export int parend;
130 
131 
132 /* 1 if aliases should not be expanded */
133 
134 /**/
135 mod_export int noaliases;
136 
137 /*
138  * If non-zero, we are parsing a line sent to use by the editor, or some
139  * other string that's not part of standard command input (e.g. eval is
140  * part of normal command input).
141  *
142  * Set of bits from LEXFLAGS_*.
143  *
144  * Note that although it is passed into the lexer as an input, the
145  * lexer can set it to zero after finding the word it's searching for.
146  * This only happens if the line being parsed actually does come from
147  * ZLE, and hence the bit LEXFLAGS_ZLE is set.
148  */
149 
150 /**/
151 mod_export int lexflags;
152 
153 /* don't recognize comments */
154 
155 /**/
156 mod_export int nocomments;
157 
158 /* add raw input characters while parsing command substitution */
159 
160 /**/
161 int lex_add_raw;
162 
163 /* variables associated with the above */
164 
165 static char *tokstr_raw;
166 static struct lexbufstate lexbuf_raw;
167 
168 /* text of punctuation tokens */
169 
170 /**/
171 mod_export char *tokstrings[WHILE + 1] = {
172     NULL,	/* NULLTOK	  0  */
173     ";",	/* SEPER	     */
174     "\\n",	/* NEWLIN	     */
175     ";",	/* SEMI		     */
176     ";;",	/* DSEMI	     */
177     "&",	/* AMPER	  5  */
178     "(",	/* INPAR	     */
179     ")",	/* OUTPAR	     */
180     "||",	/* DBAR		     */
181     "&&",	/* DAMPER	     */
182     ">",	/* OUTANG	  10 */
183     ">|",	/* OUTANGBANG	     */
184     ">>",	/* DOUTANG	     */
185     ">>|",	/* DOUTANGBANG	     */
186     "<",	/* INANG	     */
187     "<>",	/* INOUTANG	  15 */
188     "<<",	/* DINANG	     */
189     "<<-",	/* DINANGDASH	     */
190     "<&",	/* INANGAMP	     */
191     ">&",	/* OUTANGAMP	     */
192     "&>",	/* AMPOUTANG	  20 */
193     "&>|",	/* OUTANGAMPBANG     */
194     ">>&",	/* DOUTANGAMP	     */
195     ">>&|",	/* DOUTANGAMPBANG    */
196     "<<<",	/* TRINANG	     */
197     "|",	/* BAR		  25 */
198     "|&",	/* BARAMP	     */
199     "()",	/* INOUTPAR	     */
200     "((",	/* DINPAR	     */
201     "))",	/* DOUTPAR	     */
202     "&|",	/* AMPERBANG	  30 */
203     ";&",	/* SEMIAMP	     */
204     ";|",	/* SEMIBAR	     */
205 };
206 
207 /* lexical state */
208 
209 static int dbparens;
210 static struct lexbufstate lexbuf = { NULL, 256, 0 };
211 
212 /* save lexical context */
213 
214 /**/
215 void
lex_context_save(struct lex_stack * ls,int toplevel)216 lex_context_save(struct lex_stack *ls, int toplevel)
217 {
218     (void)toplevel;
219 
220     ls->dbparens = dbparens;
221     ls->isfirstln = isfirstln;
222     ls->isfirstch = isfirstch;
223     ls->lexflags = lexflags;
224 
225     ls->tok = tok;
226     ls->tokstr = tokstr;
227     ls->zshlextext = zshlextext;
228     ls->lexbuf = lexbuf;
229     ls->lex_add_raw = lex_add_raw;
230     ls->tokstr_raw = tokstr_raw;
231     ls->lexbuf_raw = lexbuf_raw;
232     ls->lexstop = lexstop;
233     ls->toklineno = toklineno;
234 
235     tokstr = zshlextext = lexbuf.ptr = NULL;
236     lexbuf.siz = 256;
237     tokstr_raw = lexbuf_raw.ptr = NULL;
238     lexbuf_raw.siz = lexbuf_raw.len = lex_add_raw = 0;
239 }
240 
241 /* restore lexical context */
242 
243 /**/
244 mod_export void
lex_context_restore(const struct lex_stack * ls,int toplevel)245 lex_context_restore(const struct lex_stack *ls, int toplevel)
246 {
247     (void)toplevel;
248 
249     dbparens = ls->dbparens;
250     isfirstln = ls->isfirstln;
251     isfirstch = ls->isfirstch;
252     lexflags = ls->lexflags;
253     tok = ls->tok;
254     tokstr = ls->tokstr;
255     zshlextext = ls->zshlextext;
256     lexbuf = ls->lexbuf;
257     lex_add_raw = ls->lex_add_raw;
258     tokstr_raw = ls->tokstr_raw;
259     lexbuf_raw = ls->lexbuf_raw;
260     lexstop = ls->lexstop;
261     toklineno = ls->toklineno;
262 }
263 
264 /**/
265 void
zshlex(void)266 zshlex(void)
267 {
268     if (tok == LEXERR)
269 	return;
270     do {
271 	if (inrepeat_)
272 	    ++inrepeat_;
273 	if (inrepeat_ == 3 && isset(SHORTLOOPS))
274 	    incmdpos = 1;
275 	tok = gettok();
276     } while (tok != ENDINPUT && exalias());
277     nocorrect &= 1;
278     if (tok == NEWLIN || tok == ENDINPUT) {
279 	while (hdocs) {
280 	    struct heredocs *next = hdocs->next;
281 	    char *doc, *munged_term;
282 
283 	    hwbegin(0);
284 	    cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
285 	    munged_term = dupstring(hdocs->str);
286 	    STOPHIST
287 	    doc = gethere(&munged_term, hdocs->type);
288 	    ALLOWHIST
289 	    cmdpop();
290 	    hwend();
291 	    if (!doc) {
292 		zerr("here document too large");
293 		while (hdocs) {
294 		    next = hdocs->next;
295 		    zfree(hdocs, sizeof(struct heredocs));
296 		    hdocs = next;
297 		}
298 		tok = LEXERR;
299 		break;
300 	    }
301 	    setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str,
302 		       munged_term);
303 	    zfree(hdocs, sizeof(struct heredocs));
304 	    hdocs = next;
305 	}
306     }
307     if (tok != NEWLIN)
308 	isnewlin = 0;
309     else
310 	isnewlin = (inbufct) ? -1 : 1;
311     if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE)))
312 	tok = SEPER;
313 }
314 
315 /**/
316 mod_export void
ctxtlex(void)317 ctxtlex(void)
318 {
319     static int oldpos;
320 
321     zshlex();
322     switch (tok) {
323     case SEPER:
324     case NEWLIN:
325     case SEMI:
326     case DSEMI:
327     case SEMIAMP:
328     case SEMIBAR:
329     case AMPER:
330     case AMPERBANG:
331     case INPAR:
332     case INBRACE:
333     case DBAR:
334     case DAMPER:
335     case BAR:
336     case BARAMP:
337     case INOUTPAR:
338     case DOLOOP:
339     case THEN:
340     case ELIF:
341     case ELSE:
342     case DOUTBRACK:
343 	incmdpos = 1;
344 	break;
345     case STRING:
346     case TYPESET:
347  /* case ENVSTRING: */
348     case ENVARRAY:
349     case OUTPAR:
350     case CASE:
351     case DINBRACK:
352 	incmdpos = 0;
353 	break;
354 
355     default:
356 	/* nothing to do, keep compiler happy */
357 	break;
358     }
359     if (tok != DINPAR)
360 	infor = tok == FOR ? 2 : 0;
361     if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) {
362 	inredir = 1;
363 	oldpos = incmdpos;
364 	incmdpos = 0;
365     } else if (inredir) {
366 	incmdpos = oldpos;
367 	inredir = 0;
368     }
369 }
370 
371 #define LX1_BKSLASH 0
372 #define LX1_COMMENT 1
373 #define LX1_NEWLIN 2
374 #define LX1_SEMI 3
375 #define LX1_AMPER 5
376 #define LX1_BAR 6
377 #define LX1_INPAR 7
378 #define LX1_OUTPAR 8
379 #define LX1_INANG 13
380 #define LX1_OUTANG 14
381 #define LX1_OTHER 15
382 
383 #define LX2_BREAK 0
384 #define LX2_OUTPAR 1
385 #define LX2_BAR 2
386 #define LX2_STRING 3
387 #define LX2_INBRACK 4
388 #define LX2_OUTBRACK 5
389 #define LX2_TILDE 6
390 #define LX2_INPAR 7
391 #define LX2_INBRACE 8
392 #define LX2_OUTBRACE 9
393 #define LX2_OUTANG 10
394 #define LX2_INANG 11
395 #define LX2_EQUALS 12
396 #define LX2_BKSLASH 13
397 #define LX2_QUOTE 14
398 #define LX2_DQUOTE 15
399 #define LX2_BQUOTE 16
400 #define LX2_COMMA 17
401 #define LX2_DASH 18
402 #define LX2_BANG 19
403 #define LX2_OTHER 20
404 #define LX2_META 21
405 
406 static unsigned char lexact1[256], lexact2[256], lextok2[256];
407 
408 /**/
409 void
initlextabs(void)410 initlextabs(void)
411 {
412     int t0;
413     static char *lx1 = "\\q\n;!&|(){}[]<>";
414     static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!";
415 
416     for (t0 = 0; t0 != 256; t0++) {
417        lexact1[t0] = LX1_OTHER;
418 	lexact2[t0] = LX2_OTHER;
419 	lextok2[t0] = t0;
420     }
421     for (t0 = 0; lx1[t0]; t0++)
422 	lexact1[(int)lx1[t0]] = t0;
423     for (t0 = 0; lx2[t0]; t0++)
424 	lexact2[(int)lx2[t0]] = t0;
425     lexact2['&'] = LX2_BREAK;
426     lexact2[STOUC(Meta)] = LX2_META;
427     lextok2['*'] = Star;
428     lextok2['?'] = Quest;
429     lextok2['{'] = Inbrace;
430     lextok2['['] = Inbrack;
431     lextok2['$'] = String;
432     lextok2['~'] = Tilde;
433     lextok2['#'] = Pound;
434     lextok2['^'] = Hat;
435 }
436 
437 /* initialize lexical state */
438 
439 /**/
440 void
lexinit(void)441 lexinit(void)
442 {
443     nocorrect = dbparens = lexstop = 0;
444     tok = ENDINPUT;
445 }
446 
447 /* add a char to the string buffer */
448 
449 /**/
450 void
add(int c)451 add(int c)
452 {
453     *lexbuf.ptr++ = c;
454     if (lexbuf.siz == ++lexbuf.len) {
455 	int newbsiz = lexbuf.siz * 2;
456 
457 	if (newbsiz > inbufct && inbufct > lexbuf.siz)
458 	    newbsiz = inbufct;
459 
460 	tokstr = (char *)hrealloc(tokstr, lexbuf.siz, newbsiz);
461 	lexbuf.ptr = tokstr + lexbuf.len;
462 	/* len == bsiz, so bptr is at the start of newly allocated memory */
463 	memset(lexbuf.ptr, 0, newbsiz - lexbuf.siz);
464 	lexbuf.siz = newbsiz;
465     }
466 }
467 
468 #define SETPARBEGIN {							\
469 	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) &&	\
470 	    zlemetacs >= zlemetall+1-inbufct)				\
471 	    parbegin = inbufct;		      \
472     }
473 #define SETPAREND {						      \
474 	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
475 	    parbegin != -1 && parend == -1) {			      \
476 	    if (zlemetacs >= zlemetall + 1 - inbufct)		      \
477 		parbegin = -1;					      \
478 	    else						      \
479 		parend = inbufct;				      \
480 	}							      \
481     }
482 
483 enum {
484     CMD_OR_MATH_CMD,
485     CMD_OR_MATH_MATH,
486     CMD_OR_MATH_ERR
487 };
488 
489 /*
490  * Return one of the above.  If it couldn't be
491  * parsed as math, but there was no gross error, it's a command.
492  */
493 
494 static int
cmd_or_math(int cs_type)495 cmd_or_math(int cs_type)
496 {
497     int oldlen = lexbuf.len;
498     int c;
499     int oinflags = inbufflags;
500 
501     cmdpush(cs_type);
502     inbufflags |= INP_APPEND;
503     c = dquote_parse(')', 0);
504     if (!(oinflags & INP_APPEND))
505 	inbufflags &= ~INP_APPEND;
506     cmdpop();
507     *lexbuf.ptr = '\0';
508     if (!c) {
509 	/* Successfully parsed, see if it was math */
510 	c = hgetc();
511 	if (c == ')')
512 	    return CMD_OR_MATH_MATH; /* yes */
513 	hungetc(c);
514 	lexstop = 0;
515 	c = ')';
516     } else if (lexstop) {
517 	/* we haven't got anything to unget */
518 	return CMD_OR_MATH_ERR;
519     }
520     /* else unsuccessful: unget the whole thing */
521     hungetc(c);
522     lexstop = 0;
523     while (lexbuf.len > oldlen && !(errflag & ERRFLAG_ERROR)) {
524 	lexbuf.len--;
525 	hungetc(itok(*--lexbuf.ptr) ?
526 		ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr);
527     }
528     if (errflag)
529 	return CMD_OR_MATH_ERR;
530     hungetc('(');
531     return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
532 }
533 
534 
535 /*
536  * Parse either a $(( ... )) or a $(...)
537  * Return the same as cmd_or_math().
538  */
539 static int
cmd_or_math_sub(void)540 cmd_or_math_sub(void)
541 {
542     int c = hgetc(), ret;
543 
544     if (c == '(') {
545 	int lexpos = (int)(lexbuf.ptr - tokstr);
546 	add(Inpar);
547 	add('(');
548 	if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) {
549 	    tokstr[lexpos] = Inparmath;
550 	    add(')');
551 	    return CMD_OR_MATH_MATH;
552 	}
553 	if (ret == CMD_OR_MATH_ERR)
554 	    return CMD_OR_MATH_ERR;
555 	lexbuf.ptr -= 2;
556 	lexbuf.len -= 2;
557     } else {
558 	hungetc(c);
559 	lexstop = 0;
560     }
561     return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
562 }
563 
564 /* Check whether we're looking at valid numeric globbing syntax      *
565  * (/\<[0-9]*-[0-9]*\>/).  Call pointing just after the opening "<". *
566  * Leaves the input in the same place, returning 0 or 1.             */
567 
568 /**/
569 static int
isnumglob(void)570 isnumglob(void)
571 {
572     int c, ec = '-', ret = 0;
573     int tbs = 256, n = 0;
574     char *tbuf = (char *)zalloc(tbs);
575 
576     while(1) {
577 	c = hgetc();
578 	if(lexstop) {
579 	    lexstop = 0;
580 	    break;
581 	}
582 	tbuf[n++] = c;
583 	if(!idigit(c)) {
584 	    if(c != ec)
585 		break;
586 	    if(ec == '>') {
587 		ret = 1;
588 		break;
589 	    }
590 	    ec = '>';
591 	}
592 	if(n == tbs)
593 	    tbuf = (char *)realloc(tbuf, tbs *= 2);
594     }
595     while(n--)
596 	hungetc(tbuf[n]);
597     zfree(tbuf, tbs);
598     return ret;
599 }
600 
601 /**/
602 static enum lextok
gettok(void)603 gettok(void)
604 {
605     int c, d;
606     int peekfd = -1;
607     enum lextok peek;
608 
609   beginning:
610     tokstr = NULL;
611     while (iblank(c = hgetc()) && !lexstop);
612     toklineno = lineno;
613     if (lexstop)
614 	return (errflag) ? LEXERR : ENDINPUT;
615     isfirstln = 0;
616     if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS))
617 	wordbeg = inbufct - (qbang && c == bangchar);
618     hwbegin(-1-(qbang && c == bangchar));
619     /* word includes the last character read and possibly \ before ! */
620     if (dbparens) {
621 	lexbuf.len = 0;
622 	lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
623 	hungetc(c);
624 	cmdpush(CS_MATH);
625 	c = dquote_parse(infor ? ';' : ')', 0);
626 	cmdpop();
627 	*lexbuf.ptr = '\0';
628 	if (!c && infor) {
629 	    infor--;
630 	    return DINPAR;
631 	}
632 	if (c || (c = hgetc()) != ')') {
633 	    hungetc(c);
634 	    return LEXERR;
635 	}
636 	dbparens = 0;
637 	return DOUTPAR;
638     } else if (idigit(c)) {	/* handle 1< foo */
639 	d = hgetc();
640 	if(d == '&') {
641 	    d = hgetc();
642 	    if(d == '>') {
643 		peekfd = c - '0';
644 		hungetc('>');
645 		c = '&';
646 	    } else {
647 		hungetc(d);
648 		lexstop = 0;
649 		hungetc('&');
650 	    }
651 	} else if (d == '>' || d == '<') {
652 	    peekfd = c - '0';
653 	    c = d;
654 	} else {
655 	    hungetc(d);
656 	    lexstop = 0;
657 	}
658     }
659 
660     /* chars in initial position in word */
661 
662     /*
663      * Handle comments.  There are some special cases when this
664      * is not normal command input: lexflags implies we are examining
665      * a line lexically without it being used for normal command input.
666      */
667     if (c == hashchar && !nocomments &&
668 	(isset(INTERACTIVECOMMENTS) ||
669 	 ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding &&
670 	  (!interact || unset(SHINSTDIN) || strin)))) {
671 	/* History is handled here to prevent extra  *
672 	 * newlines being inserted into the history. */
673 
674 	if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
675 	    lexbuf.len = 0;
676 	    lexbuf.ptr = tokstr =
677 		(char *)hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
678 	    add(c);
679 	}
680 	hwabort();
681 	while ((c = ingetc()) != '\n' && !lexstop) {
682 	    hwaddc(c);
683 	    addtoline(c);
684 	    if (lexflags & LEXFLAGS_COMMENTS_KEEP)
685 		add(c);
686 	}
687 
688 	if (errflag)
689 	    peek = LEXERR;
690 	else {
691 	    if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
692 		*lexbuf.ptr = '\0';
693 		if (!lexstop)
694 		    hungetc(c);
695 		peek = STRING;
696 	    } else {
697 		hwend();
698 		hwbegin(0);
699 		hwaddc('\n');
700 		addtoline('\n');
701 		/*
702 		 * If splitting a line and removing comments,
703 		 * we don't want a newline token since it's
704 		 * treated specially.
705 		 */
706 		if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop)
707 		    peek = ENDINPUT;
708 		else
709 		    peek = NEWLIN;
710 	    }
711 	}
712 	return peek;
713     }
714     switch (lexact1[STOUC(c)]) {
715     case LX1_BKSLASH:
716 	d = hgetc();
717 	if (d == '\n')
718 	    goto beginning;
719 	hungetc(d);
720 	lexstop = 0;
721 	break;
722     case LX1_NEWLIN:
723 	return NEWLIN;
724     case LX1_SEMI:
725 	d = hgetc();
726 	if(d == ';')
727 	    return DSEMI;
728 	else if(d == '&')
729 	    return SEMIAMP;
730 	else if (d == '|')
731 	    return SEMIBAR;
732 	hungetc(d);
733 	lexstop = 0;
734 	return SEMI;
735     case LX1_AMPER:
736 	d = hgetc();
737 	if (d == '&')
738 	    return DAMPER;
739 	else if (d == '!' || d == '|')
740 	    return AMPERBANG;
741 	else if (d == '>') {
742 	    tokfd = peekfd;
743 	    d = hgetc();
744 	    if (d == '!' || d == '|')
745 		return OUTANGAMPBANG;
746 	    else if (d == '>') {
747 		d = hgetc();
748 		if (d == '!' || d == '|')
749 		    return DOUTANGAMPBANG;
750 		hungetc(d);
751 		lexstop = 0;
752 		return DOUTANGAMP;
753 	    }
754 	    hungetc(d);
755 	    lexstop = 0;
756 	    return AMPOUTANG;
757 	}
758 	hungetc(d);
759 	lexstop = 0;
760 	return AMPER;
761     case LX1_BAR:
762 	d = hgetc();
763 	if (d == '|' && !incasepat)
764 	    return DBAR;
765 	else if (d == '&')
766 	    return BARAMP;
767 	hungetc(d);
768 	lexstop = 0;
769 	return BAR;
770     case LX1_INPAR:
771 	d = hgetc();
772 	if (d == '(') {
773 	    if (infor) {
774 		dbparens = 1;
775 		return DINPAR;
776 	    }
777 	    if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) {
778 		lexbuf.len = 0;
779 		lexbuf.ptr = tokstr = (char *)
780 		    hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
781 		switch (cmd_or_math(CS_MATH)) {
782 		case CMD_OR_MATH_MATH:
783 		    return DINPAR;
784 
785 		case CMD_OR_MATH_CMD:
786 		    /*
787 		     * Not math, so we don't return the contents
788 		     * as a string in this case.
789 		     */
790 		    tokstr = NULL;
791 		    return INPAR;
792 
793 		case CMD_OR_MATH_ERR:
794 		    /*
795 		     * LEXFLAGS_ACTIVE means we came from bufferwords(),
796 		     * so we treat as an incomplete math expression
797 		     */
798 		    if (lexflags & LEXFLAGS_ACTIVE)
799 			tokstr = dyncat("((", tokstr ? tokstr : "");
800 		    /* fall through */
801 
802 		default:
803 		    return LEXERR;
804 		}
805 	    }
806 	} else if (d == ')')
807 	    return INOUTPAR;
808 	hungetc(d);
809 	lexstop = 0;
810 	if (!(isset(SHGLOB) || incond == 1 || incmdpos))
811 	    break;
812 	return INPAR;
813     case LX1_OUTPAR:
814 	return OUTPAR;
815     case LX1_INANG:
816 	d = hgetc();
817 	if (d == '(') {
818 	    hungetc(d);
819 	    lexstop = 0;
820 	    unpeekfd:
821 	    if(peekfd != -1) {
822 		hungetc(c);
823 		c = '0' + peekfd;
824 	    }
825 	    break;
826 	}
827 	if (d == '>') {
828 	    peek = INOUTANG;
829 	} else if (d == '<') {
830 	    int e = hgetc();
831 
832 	    if (e == '(') {
833 		hungetc(e);
834 		hungetc(d);
835 		peek = INANG;
836 	    } else if (e == '<')
837 		peek = TRINANG;
838 	    else if (e == '-')
839 		peek = DINANGDASH;
840 	    else {
841 		hungetc(e);
842 		lexstop = 0;
843 		peek = DINANG;
844 	    }
845 	} else if (d == '&') {
846 	    peek = INANGAMP;
847 	} else {
848 	    hungetc(d);
849 	    if(isnumglob())
850 		goto unpeekfd;
851 	    peek = INANG;
852 	}
853 	tokfd = peekfd;
854 	return peek;
855     case LX1_OUTANG:
856 	d = hgetc();
857 	if (d == '(') {
858 	    hungetc(d);
859 	    goto unpeekfd;
860 	} else if (d == '&') {
861 	    d = hgetc();
862 	    if (d == '!' || d == '|')
863 		peek = OUTANGAMPBANG;
864 	    else {
865 		hungetc(d);
866 		lexstop = 0;
867 		peek = OUTANGAMP;
868 	    }
869 	} else if (d == '!' || d == '|')
870 	    peek = OUTANGBANG;
871 	else if (d == '>') {
872 	    d = hgetc();
873 	    if (d == '&') {
874 		d = hgetc();
875 		if (d == '!' || d == '|')
876 		    peek = DOUTANGAMPBANG;
877 		else {
878 		    hungetc(d);
879 		    lexstop = 0;
880 		    peek = DOUTANGAMP;
881 		}
882 	    } else if (d == '!' || d == '|')
883 		peek = DOUTANGBANG;
884 	    else if (d == '(') {
885 		hungetc(d);
886 		hungetc('>');
887 		peek = OUTANG;
888 	    } else {
889 		hungetc(d);
890 		lexstop = 0;
891 		peek = DOUTANG;
892 		if (isset(HISTALLOWCLOBBER))
893 		    hwaddc('|');
894 	    }
895 	} else {
896 	    hungetc(d);
897 	    lexstop = 0;
898 	    peek = OUTANG;
899 	    if (!incond && isset(HISTALLOWCLOBBER))
900 		hwaddc('|');
901 	}
902 	tokfd = peekfd;
903 	return peek;
904     }
905 
906     /* we've started a string, now get the *
907      * rest of it, performing tokenization */
908     return gettokstr(c, 0);
909 }
910 
911 /*
912  * Get the remains of a token string.  This has two uses.
913  * When called from gettok(), with sub = 0, we have already identified
914  * any interesting initial character and want to get the rest of
915  * what we now know is a string.  However, the string may still include
916  * metacharacters and potentially substitutions.
917  *
918  * When called from parse_subst_string() with sub = 1, we are not
919  * fully parsing a command line, merely tokenizing a string.
920  * In this case we always add characters to the parsed string
921  * unless there is a parse error.
922  */
923 
924 /**/
925 static enum lextok
gettokstr(int c,int sub)926 gettokstr(int c, int sub)
927 {
928     int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
929     int intpos = 1, in_brace_param = 0;
930     int inquote, unmatched = 0;
931     enum lextok peek;
932 #ifdef DEBUG
933     int ocmdsp = cmdsp;
934 #endif
935 
936     peek = STRING;
937     if (!sub) {
938 	lexbuf.len = 0;
939 	lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
940     }
941     for (;;) {
942 	int act;
943 	int e;
944 	int inbl = inblank(c);
945 
946 	if (fdpar && !inbl && c != ')')
947 	    fdpar = 0;
948 
949 	if (inbl && !in_brace_param && !pct)
950 	    act = LX2_BREAK;
951 	else {
952 	    act = lexact2[STOUC(c)];
953 	    c = lextok2[STOUC(c)];
954 	}
955 	switch (act) {
956 	case LX2_BREAK:
957 	    if (!in_brace_param && !sub)
958 		goto brk;
959 	    break;
960 	case LX2_META:
961 	    c = hgetc();
962 #ifdef DEBUG
963 	    if (lexstop) {
964 		fputs("BUG: input terminated by Meta\n", stderr);
965 		fflush(stderr);
966 		goto brk;
967 	    }
968 #endif
969 	    add(Meta);
970 	    break;
971 	case LX2_OUTPAR:
972 	    if (fdpar) {
973 		/* this is a single word `(   )', treat as INOUTPAR */
974 		add(c);
975 		*lexbuf.ptr = '\0';
976 		return INOUTPAR;
977 	    }
978 	    if ((sub || in_brace_param) && isset(SHGLOB))
979 		break;
980 	    if (!in_brace_param && !pct--) {
981 		if (sub) {
982 		    pct = 0;
983 		    break;
984 		} else
985 		    goto brk;
986 	    }
987 	    c = Outpar;
988 	    break;
989 	case LX2_BAR:
990 	    if (!pct && !in_brace_param) {
991 		if (sub)
992 		    break;
993 		else
994 		    goto brk;
995 	    }
996 	    if (unset(SHGLOB) || (!sub && !in_brace_param))
997 		c = Bar;
998 	    break;
999 	case LX2_STRING:
1000 	    e = hgetc();
1001 	    if (e == '[') {
1002 		cmdpush(CS_MATHSUBST);
1003 		add(String);
1004 		add(Inbrack);
1005 		c = dquote_parse(']', sub);
1006 		cmdpop();
1007 		if (c) {
1008 		    peek = LEXERR;
1009 		    goto brk;
1010 		}
1011 		c = Outbrack;
1012 	    } else if (e == '(') {
1013 		add(String);
1014 		switch (cmd_or_math_sub()) {
1015 		case CMD_OR_MATH_CMD:
1016 		    c = Outpar;
1017 		    break;
1018 
1019 		case CMD_OR_MATH_MATH:
1020 		    c = Outparmath;
1021 		    break;
1022 
1023 		default:
1024 		    peek = LEXERR;
1025 		    goto brk;
1026 		}
1027 	    } else {
1028 		if (e == '{') {
1029 		    add(c);
1030 		    c = Inbrace;
1031 		    ++bct;
1032 		    cmdpush(CS_BRACEPAR);
1033 		    if (!in_brace_param) {
1034 			if ((in_brace_param = bct))
1035 			    seen_brct = 0;
1036 		    }
1037 		} else {
1038 		    hungetc(e);
1039 		    lexstop = 0;
1040 		}
1041 	    }
1042 	    break;
1043 	case LX2_INBRACK:
1044 	    if (!in_brace_param) {
1045 		brct++;
1046 		seen_brct = 1;
1047 	    }
1048 	    c = Inbrack;
1049 	    break;
1050 	case LX2_OUTBRACK:
1051 	    if (!in_brace_param)
1052 		brct--;
1053 	    if (brct < 0)
1054 		brct = 0;
1055 	    c = Outbrack;
1056 	    break;
1057 	case LX2_INPAR:
1058 	    if (isset(SHGLOB)) {
1059 		if (sub || in_brace_param)
1060 		    break;
1061 		if (incasepat > 0 && !lexbuf.len)
1062 		    return INPAR;
1063 		if (!isset(KSHGLOB) && lexbuf.len)
1064 		    goto brk;
1065 	    }
1066 	    if (!in_brace_param) {
1067 		if (!sub) {
1068 		    e = hgetc();
1069 		    hungetc(e);
1070 		    lexstop = 0;
1071 		    /* For command words, parentheses are only
1072 		     * special at the start.  But now we're tokenising
1073 		     * the remaining string.  So I don't see what
1074 		     * the old incmdpos test here is for.
1075 		     *   pws 1999/6/8
1076 		     *
1077 		     * Oh, no.
1078 		     *  func1(   )
1079 		     * is a valid function definition in [k]sh.  The best
1080 		     * thing we can do, without really nasty lookahead tricks,
1081 		     * is break if we find a blank after a parenthesis.  At
1082 		     * least this can't happen inside braces or brackets.  We
1083 		     * only allow this with SHGLOB (set for both sh and ksh).
1084 		     *
1085 		     * Things like `print @( |foo)' should still
1086 		     * work, because [k]sh don't allow multiple words
1087 		     * in a function definition, so we only do this
1088 		     * in command position.
1089 		     *   pws 1999/6/14
1090 		     */
1091 		    if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
1092 				     !brct && !intpos && incmdpos)) {
1093 			/*
1094 			 * Either a () token, or a command word with
1095 			 * something suspiciously like a ksh function
1096 			 * definition.
1097 			 * The current word isn't spellcheckable.
1098 			 */
1099 			nocorrect |= 2;
1100 			goto brk;
1101 		    }
1102 		}
1103 		/*
1104 		 * This also handles the [k]sh `foo( )' function definition.
1105 		 * Maintain a variable fdpar, set as long as a single set of
1106 		 * parentheses contains only space.  Then if we get to the
1107 		 * closing parenthesis and it is still set, we can assume we
1108 		 * have a function definition.  Only do this at the start of
1109 		 * the word, since the (...) must be a separate token.
1110 		 */
1111 		if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
1112 		    fdpar = 1;
1113 	    }
1114 	    c = Inpar;
1115 	    break;
1116 	case LX2_INBRACE:
1117 	    if (isset(IGNOREBRACES) || sub)
1118 		c = '{';
1119 	    else {
1120 		if (!lexbuf.len && incmdpos) {
1121 		    add('{');
1122 		    *lexbuf.ptr = '\0';
1123 		    return STRING;
1124 		}
1125 		if (in_brace_param) {
1126 		    cmdpush(CS_BRACE);
1127 		}
1128 		bct++;
1129 	    }
1130 	    break;
1131 	case LX2_OUTBRACE:
1132 	    if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
1133 		break;
1134 	    if (!bct)
1135 		break;
1136 	    if (in_brace_param) {
1137 		cmdpop();
1138 	    }
1139 	    if (bct-- == in_brace_param)
1140 		in_brace_param = 0;
1141 	    c = Outbrace;
1142 	    break;
1143 	case LX2_COMMA:
1144 	    if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
1145 		c = Comma;
1146 	    break;
1147 	case LX2_OUTANG:
1148 	    if (in_brace_param || sub)
1149 		break;
1150 	    e = hgetc();
1151 	    if (e != '(') {
1152 		hungetc(e);
1153 		lexstop = 0;
1154 		goto brk;
1155 	    }
1156 	    add(OutangProc);
1157 	    if (skipcomm()) {
1158 		peek = LEXERR;
1159 		goto brk;
1160 	    }
1161 	    c = Outpar;
1162 	    break;
1163 	case LX2_INANG:
1164 	    if (isset(SHGLOB) && sub)
1165 		break;
1166 	    e = hgetc();
1167 	    if (!(in_brace_param || sub) && e == '(') {
1168 		add(Inang);
1169 		if (skipcomm()) {
1170 		    peek = LEXERR;
1171 		    goto brk;
1172 		}
1173 		c = Outpar;
1174 		break;
1175 	    }
1176 	    hungetc(e);
1177 	    if(isnumglob()) {
1178 		add(Inang);
1179 		while ((c = hgetc()) != '>')
1180 		    add(c);
1181 		c = Outang;
1182 		break;
1183 	    }
1184 	    lexstop = 0;
1185 	    if (in_brace_param || sub)
1186 		break;
1187 	    goto brk;
1188 	case LX2_EQUALS:
1189 	    if (!sub) {
1190 		if (intpos) {
1191 		    e = hgetc();
1192 		    if (e != '(') {
1193 			hungetc(e);
1194 			lexstop = 0;
1195 			c = Equals;
1196 		    } else {
1197 			add(Equals);
1198 			if (skipcomm()) {
1199 			    peek = LEXERR;
1200 			    goto brk;
1201 			}
1202 			c = Outpar;
1203 		    }
1204 		} else if (peek != ENVSTRING &&
1205 			   (incmdpos || intypeset) && !bct && !brct) {
1206 		    char *t = tokstr;
1207 		    if (idigit(*t))
1208 			while (++t < lexbuf.ptr && idigit(*t));
1209 		    else {
1210 			int sav = *lexbuf.ptr;
1211 			*lexbuf.ptr = '\0';
1212 			t = itype_end(t, IIDENT, 0);
1213 			if (t < lexbuf.ptr) {
1214 			    skipparens(Inbrack, Outbrack, &t);
1215 			} else {
1216 			    *lexbuf.ptr = sav;
1217 			}
1218 		    }
1219 		    if (*t == '+')
1220 			t++;
1221 		    if (t == lexbuf.ptr) {
1222 			e = hgetc();
1223 			if (e == '(') {
1224 			    *lexbuf.ptr = '\0';
1225 			    return ENVARRAY;
1226 			}
1227 			hungetc(e);
1228 			lexstop = 0;
1229 			peek = ENVSTRING;
1230 			intpos = 2;
1231 		    } else
1232 			c = Equals;
1233 		} else
1234 		    c = Equals;
1235 	    }
1236 	    break;
1237 	case LX2_BKSLASH:
1238 	    c = hgetc();
1239 	    if (c == '\n') {
1240 		c = hgetc();
1241 		if (!lexstop)
1242 		    continue;
1243 	    } else {
1244 		add(Bnull);
1245 		if (c == STOUC(Meta)) {
1246 		    c = hgetc();
1247 #ifdef DEBUG
1248 		    if (lexstop) {
1249 			fputs("BUG: input terminated by Meta\n", stderr);
1250 			fflush(stderr);
1251 			goto brk;
1252 		    }
1253 #endif
1254 		    add(Meta);
1255 		}
1256 	    }
1257 	    if (lexstop)
1258 		goto brk;
1259 	    break;
1260 	case LX2_QUOTE: {
1261 	    int strquote = (lexbuf.len && lexbuf.ptr[-1] == String);
1262 
1263 	    add(Snull);
1264 	    cmdpush(CS_QUOTE);
1265 	    for (;;) {
1266 		STOPHIST
1267 		while ((c = hgetc()) != '\'' && !lexstop) {
1268 		    if (strquote && c == '\\') {
1269 			c = hgetc();
1270 			if (lexstop)
1271 			    break;
1272 			/*
1273 			 * Mostly we don't need to do anything special
1274 			 * with escape backslashes or closing quotes
1275 			 * inside $'...'; however in completion we
1276 			 * need to be able to strip multiple backslashes
1277 			 * neatly.
1278 			 */
1279 			if (c == '\\' || c == '\'')
1280 			    add(Bnull);
1281 			else
1282 			    add('\\');
1283 		    } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
1284 			if (lexbuf.ptr[-1] == '\\')
1285 			    lexbuf.ptr--, lexbuf.len--;
1286 			else
1287 			    break;
1288 		    }
1289 		    add(c);
1290 		}
1291 		ALLOWHIST
1292 		if (c != '\'') {
1293 		    unmatched = '\'';
1294 		    /* Not an error when called from bufferwords() */
1295 		    if (!(lexflags & LEXFLAGS_ACTIVE))
1296 			peek = LEXERR;
1297 		    cmdpop();
1298 		    goto brk;
1299 		}
1300 		e = hgetc();
1301 		if (e != '\'' || unset(RCQUOTES) || strquote)
1302 		    break;
1303 		add(c);
1304 	    }
1305 	    cmdpop();
1306 	    hungetc(e);
1307 	    lexstop = 0;
1308 	    c = Snull;
1309 	    break;
1310 	}
1311 	case LX2_DQUOTE:
1312 	    add(Dnull);
1313 	    cmdpush(CS_DQUOTE);
1314 	    c = dquote_parse('"', sub);
1315 	    cmdpop();
1316 	    if (c) {
1317 		unmatched = '"';
1318 		/* Not an error when called from bufferwords() */
1319 		if (!(lexflags & LEXFLAGS_ACTIVE))
1320 		    peek = LEXERR;
1321 		goto brk;
1322 	    }
1323 	    c = Dnull;
1324 	    break;
1325 	case LX2_BQUOTE:
1326 	    add(Tick);
1327 	    cmdpush(CS_BQUOTE);
1328 	    SETPARBEGIN
1329 	    inquote = 0;
1330 	    while ((c = hgetc()) != '`' && !lexstop) {
1331 		if (c == '\\') {
1332 		    c = hgetc();
1333 		    if (c != '\n') {
1334 			add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
1335 			add(c);
1336 		    }
1337 		    else if (!sub && isset(CSHJUNKIEQUOTES))
1338 			add(c);
1339 		} else {
1340 		    if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
1341 			break;
1342 		    }
1343 		    add(c);
1344 		    if (c == '\'') {
1345 			if ((inquote = !inquote))
1346 			    STOPHIST
1347 			else
1348 			    ALLOWHIST
1349 		    }
1350 		}
1351 	    }
1352 	    if (inquote)
1353 		ALLOWHIST
1354 	    cmdpop();
1355 	    if (c != '`') {
1356 		unmatched = '`';
1357 		/* Not an error when called from bufferwords() */
1358 		if (!(lexflags & LEXFLAGS_ACTIVE))
1359 		    peek = LEXERR;
1360 		goto brk;
1361 	    }
1362 	    c = Tick;
1363 	    SETPAREND
1364 	    break;
1365 	case LX2_DASH:
1366 	    /*
1367 	     * - shouldn't be treated as a special character unless
1368 	     * we're in a pattern.  Unfortunately, working out for
1369 	     * sure in complicated expressions whether we're in a
1370 	     * pattern is tricky.  So we'll make it special and
1371 	     * turn it back any time we don't need it special.
1372 	     * This is not ideal as it's a lot of work.
1373 	     */
1374 	    c = Dash;
1375            break;
1376        case LX2_BANG:
1377            /*
1378             * Same logic as Dash, for ! to perform negation in range.
1379             */
1380            if (seen_brct)
1381                c = Bang;
1382            else
1383                c = '!';
1384        }
1385        add(c);
1386        c = hgetc();
1387 	if (intpos)
1388 	    intpos--;
1389 	if (lexstop)
1390 	    break;
1391     }
1392   brk:
1393     if (errflag) {
1394 	if (in_brace_param) {
1395 	    while(bct-- >= in_brace_param)
1396 		cmdpop();
1397 	}
1398 	return LEXERR;
1399     }
1400     hungetc(c);
1401     if (unmatched && !(lexflags & LEXFLAGS_ACTIVE))
1402 	zerr("unmatched %c", unmatched);
1403     if (in_brace_param) {
1404 	while(bct-- >= in_brace_param)
1405 	    cmdpop();
1406 	zerr("closing brace expected");
1407     } else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
1408 	       peek == STRING && lexbuf.ptr[-1] == '}' &&
1409 	       lexbuf.ptr[-2] != Bnull) {
1410 	/* hack to get {foo} command syntax work */
1411 	lexbuf.ptr--;
1412 	lexbuf.len--;
1413 	lexstop = 0;
1414 	hungetc('}');
1415     }
1416     *lexbuf.ptr = '\0';
1417     DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
1418     return peek;
1419 }
1420 
1421 
1422 /*
1423  * Parse input as if in double quotes.
1424  * endchar is the end character to expect.
1425  * sub has got something to do with whether we are doing quoted substitution.
1426  * Return non-zero for error (character to unget), else zero
1427  */
1428 
1429 /**/
1430 static int
dquote_parse(char endchar,int sub)1431 dquote_parse(char endchar, int sub)
1432 {
1433     int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
1434     int c;
1435     int math = endchar == ')' || endchar == ']' || infor;
1436     int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
1437 
1438     while (((c = hgetc()) != endchar || bct ||
1439 	    (math && ((pct > 0) || (brct > 0))) ||
1440 	    intick) && !lexstop) {
1441       cont:
1442 	switch (c) {
1443 	case '\\':
1444 	    c = hgetc();
1445 	    if (c != '\n') {
1446 		if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
1447 		    c == endchar || c == '`' ||
1448 		    (endchar == ']' && (c == '[' || c == ']' ||
1449 					c == '(' || c == ')' ||
1450 					c == '{' || c == '}' ||
1451 					(c == '"' && sub))))
1452 		    add(Bnull);
1453 		else {
1454 		    /* lexstop is implicitly handled here */
1455 		    add('\\');
1456 		    goto cont;
1457 		}
1458 	    } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"')
1459 		continue;
1460 	    break;
1461 	case '\n':
1462 	    err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"';
1463 	    break;
1464 	case '$':
1465 	    if (intick)
1466 		break;
1467 	    c = hgetc();
1468 	    if (c == '(') {
1469 		add(Qstring);
1470 		switch (cmd_or_math_sub()) {
1471 		case CMD_OR_MATH_CMD:
1472 		    c = Outpar;
1473 		    break;
1474 
1475 		case CMD_OR_MATH_MATH:
1476 		    c = Outparmath;
1477 		    break;
1478 
1479 		default:
1480 		    err = 1;
1481 		    break;
1482 		}
1483 	    } else if (c == '[') {
1484 		add(String);
1485 		add(Inbrack);
1486 		cmdpush(CS_MATHSUBST);
1487 		err = dquote_parse(']', sub);
1488 		cmdpop();
1489 		c = Outbrack;
1490 	    } else if (c == '{') {
1491 		add(Qstring);
1492 		c = Inbrace;
1493 		cmdpush(CS_BRACEPAR);
1494 		bct++;
1495 	    } else if (c == '$')
1496 		add(Qstring);
1497 	    else {
1498 		hungetc(c);
1499 		lexstop = 0;
1500 		c = Qstring;
1501 	    }
1502 	    break;
1503 	case '}':
1504 	    if (intick || !bct)
1505 		break;
1506 	    c = Outbrace;
1507 	    bct--;
1508 	    cmdpop();
1509 	    break;
1510 	case '`':
1511 	    c = Qtick;
1512 	    if (intick == 2)
1513 		ALLOWHIST
1514 	    if ((intick = !intick)) {
1515 		SETPARBEGIN
1516 		cmdpush(CS_BQUOTE);
1517 	    } else {
1518 		SETPAREND
1519 	        cmdpop();
1520 	    }
1521 	    break;
1522 	case '\'':
1523 	    if (!intick)
1524 		break;
1525 	    if (intick == 1)
1526 		intick = 2, STOPHIST
1527 	    else
1528 		intick = 1, ALLOWHIST
1529 	    break;
1530 	case '(':
1531 	    if (!math || !bct)
1532 		pct++;
1533 	    break;
1534 	case ')':
1535 	    if (!math || !bct)
1536 		err = (!pct-- && math);
1537 	    break;
1538 	case '[':
1539 	    if (!math || !bct)
1540 		brct++;
1541 	    break;
1542 	case ']':
1543 	    if (!math || !bct)
1544 		err = (!brct-- && math);
1545 	    break;
1546 	case '"':
1547 	    if (intick || (endchar != '"' && !bct))
1548 		break;
1549 	    if (bct) {
1550 		add(Dnull);
1551 		cmdpush(CS_DQUOTE);
1552 		err = dquote_parse('"', sub);
1553 		cmdpop();
1554 		c = Dnull;
1555 	    } else
1556 		err = 1;
1557 	    break;
1558 	}
1559 	if (err || lexstop)
1560 	    break;
1561 	add(c);
1562     }
1563     if (intick == 2)
1564 	ALLOWHIST
1565     if (intick) {
1566 	cmdpop();
1567     }
1568     while (bct--)
1569 	cmdpop();
1570     if (lexstop)
1571 	err = intick || endchar || err;
1572     else if (err == 1) {
1573 	/*
1574 	 * TODO: as far as I can see, this hack is used in gettokstr()
1575 	 * to hungetc() a character on an error.  However, I don't
1576 	 * understand what that actually gets us, and we can't guarantee
1577 	 * it's a character anyway, because of the previous test.
1578 	 *
1579 	 * We use the same feature in cmd_or_math where we actually do
1580 	 * need to unget if we decide it's really a command substitution.
1581 	 * We try to handle the other case by testing for lexstop.
1582 	 */
1583 	err = c;
1584     }
1585     if (zlemath && zlemetacs <= zlemetall + 1 - inbufct)
1586 	inwhat = IN_MATH;
1587     return err;
1588 }
1589 
1590 /*
1591  * Tokenize a string given in s. Parsing is done as in double
1592  * quotes.  This is usually called before singsub().
1593  *
1594  * parsestr() is noisier, reporting an error if the parse failed.
1595  *
1596  * On entry, *s must point to a string allocated from the stack of
1597  * exactly the right length, i.e. strlen(*s) + 1, as the string
1598  * is used as the lexical token string whose memory management
1599  * demands this.  Usually the input string will therefore be
1600  * the result of an immediately preceding dupstring().
1601  */
1602 
1603 /**/
1604 mod_export int
parsestr(char ** s)1605 parsestr(char **s)
1606 {
1607     int err;
1608 
1609     if ((err = parsestrnoerr(s))) {
1610 	untokenize(*s);
1611 	if (!(errflag & ERRFLAG_INT)) {
1612 	    if (err > 32 && err < 127)
1613 		zerr("parse error near `%c'", err);
1614 	    else
1615 		zerr("parse error");
1616 	    tok = LEXERR;
1617 	}
1618     }
1619     return err;
1620 }
1621 
1622 /**/
1623 mod_export int
parsestrnoerr(char ** s)1624 parsestrnoerr(char **s)
1625 {
1626     int l = strlen(*s), err;
1627 
1628     zcontext_save();
1629     untokenize(*s);
1630     inpush(dupstring_wlen(*s, l), 0, NULL);
1631     strinbeg(0);
1632     lexbuf.len = 0;
1633     lexbuf.ptr = tokstr = *s;
1634     lexbuf.siz = l + 1;
1635     err = dquote_parse('\0', 1);
1636     if (tokstr)
1637 	*s = tokstr;
1638     *lexbuf.ptr = '\0';
1639     strinend();
1640     inpop();
1641     DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
1642     zcontext_restore();
1643     return err;
1644 }
1645 
1646 /*
1647  * Parse a subscript in string s.
1648  * sub is passed down to dquote_parse().
1649  * endchar is the final character.
1650  * Return the next character, or NULL.
1651  */
1652 /**/
1653 mod_export char *
parse_subscript(char * s,int sub,int endchar)1654 parse_subscript(char *s, int sub, int endchar)
1655 {
1656     int l = strlen(s), err, toklen;
1657     char *t;
1658 
1659     if (!*s || *s == endchar)
1660 	return 0;
1661     zcontext_save();
1662     untokenize(t = dupstring_wlen(s, l));
1663     inpush(t, 0, NULL);
1664     strinbeg(0);
1665     /*
1666      * Warning to Future Generations:
1667      *
1668      * This way of passing the subscript through the lexer is brittle.
1669      * Code above this for several layers assumes that when we tokenise
1670      * the input it goes into the same place as the original string.
1671      * However, the lexer may overwrite later bits of the string or
1672      * reallocate it, in particular when expanding aliaes.  To get
1673      * around this, we copy the string and then copy it back.  This is a
1674      * bit more robust but still relies on the underlying assumption of
1675      * length preservation.
1676      */
1677     lexbuf.len = 0;
1678     lexbuf.ptr = tokstr = dupstring_wlen(s, l);
1679     lexbuf.siz = l + 1;
1680     err = dquote_parse(endchar, sub);
1681     toklen = (int)(lexbuf.ptr - tokstr);
1682     DPUTS(toklen > l, "Bad length for parsed subscript");
1683     memcpy(s, tokstr, toklen);
1684     if (err) {
1685 	char *strend = s + toklen;
1686 	err = *strend;
1687 	*strend = '\0';
1688 	untokenize(s);
1689 	*strend = err;
1690 	s = NULL;
1691     } else {
1692 	s += toklen;
1693     }
1694     strinend();
1695     inpop();
1696     DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
1697     zcontext_restore();
1698     return s;
1699 }
1700 
1701 /* Tokenize a string given in s. Parsing is done as if s were a normal *
1702  * command-line argument but it may contain separators.  This is used  *
1703  * to parse the right-hand side of ${...%...} substitutions.           */
1704 
1705 /**/
1706 mod_export int
parse_subst_string(char * s)1707 parse_subst_string(char *s)
1708 {
1709     int c, l = strlen(s), err;
1710     char *ptr;
1711     enum lextok ctok;
1712 
1713     if (!*s || !strcmp(s, nulstring))
1714 	return 0;
1715     zcontext_save();
1716     untokenize(s);
1717     inpush(dupstring_wlen(s, l), 0, NULL);
1718     strinbeg(0);
1719     lexbuf.len = 0;
1720     lexbuf.ptr = tokstr = s;
1721     lexbuf.siz = l + 1;
1722     c = hgetc();
1723     ctok = gettokstr(c, 1);
1724     err = errflag;
1725     strinend();
1726     inpop();
1727     DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
1728     zcontext_restore();
1729     /* Keep any interrupt error status */
1730     errflag = err | (errflag & ERRFLAG_INT);
1731     if (ctok == LEXERR) {
1732 	untokenize(s);
1733 	return 1;
1734     }
1735 #ifdef DEBUG
1736     /*
1737      * Historical note: we used to check here for olen (the value of lexbuf.len
1738      * before zcontext_restore()) == l, but that's not necessarily the case if
1739      * we stripped an RCQUOTE.
1740      */
1741     if (ctok != STRING || (errflag && !noerrs)) {
1742 	fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
1743 		errflag ? "errflag" : "ctok != STRING");
1744 	fflush(stderr);
1745 	untokenize(s);
1746 	return 1;
1747     }
1748 #endif
1749     /* Check for $'...' quoting.  This needs special handling. */
1750     for (ptr = s; *ptr; )
1751     {
1752 	if (*ptr == String && ptr[1] == Snull)
1753 	{
1754 	    char *t;
1755 	    int len, tlen, diff;
1756 	    t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL);
1757 	    len += 2;
1758 	    tlen = strlen(t);
1759 	    diff = len - tlen;
1760 	    /*
1761 	     * Yuk.
1762 	     * parse_subst_string() currently handles strings in-place.
1763 	     * That's not so easy to fix without knowing whether
1764 	     * additional memory should come off the heap or
1765 	     * otherwise.  So we cheat by copying the unquoted string
1766 	     * into place, unless it's too long.  That's not the
1767 	     * normal case, but I'm worried there are pathological
1768 	     * cases with converting metafied multibyte strings.
1769 	     * If someone can prove there aren't I will be very happy.
1770 	     */
1771 	    if (diff < 0) {
1772 		DPUTS(1, "$'...' subst too long: fix get_parse_string()");
1773 		return 1;
1774 	    }
1775 	    memcpy(ptr, t, tlen);
1776 	    ptr += tlen;
1777 	    if (diff > 0) {
1778 		char *dptr = ptr;
1779 		char *sptr = ptr + diff;
1780 		while ((*dptr++ = *sptr++))
1781 		    ;
1782 	    }
1783 	} else
1784 	    ptr++;
1785     }
1786     return 0;
1787 }
1788 
1789 /* Called below to report word positions. */
1790 
1791 /**/
1792 static void
gotword(void)1793 gotword(void)
1794 {
1795     int nwe = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0);
1796     if (zlemetacs <= nwe) {
1797 	int nwb = zlemetall - wordbeg + addedx;
1798 	if (zlemetacs >= nwb) {
1799 	    wb = nwb;
1800 	    we = nwe;
1801 	} else {
1802 	    wb = zlemetacs + addedx;
1803 	    if (we < wb)
1804 		we = wb;
1805 	}
1806 	lexflags = 0;
1807     }
1808 }
1809 
1810 /* Check if current lex text matches an alias: 1 if so, else 0 */
1811 
1812 static int
checkalias(void)1813 checkalias(void)
1814 {
1815     Alias an;
1816 
1817     if (!zshlextext)
1818 	return 0;
1819 
1820     if (!noaliases && isset(ALIASESOPT) &&
1821 	(!isset(POSIXALIASES) ||
1822 	 (tok == STRING && !reswdtab->getnode(reswdtab, zshlextext)))) {
1823 	char *suf;
1824 
1825 	an = (Alias) aliastab->getnode(aliastab, zshlextext);
1826 	if (an && !an->inuse &&
1827 	    ((an->node.flags & ALIAS_GLOBAL) ||
1828 	     (incmdpos && tok == STRING) || inalmore)) {
1829 	    if (!lexstop) {
1830 		/*
1831 		 * Tokens that don't require a space after, get one,
1832 		 * because they are treated as if preceded by one.
1833 		 */
1834 		int c = hgetc();
1835 		hungetc(c);
1836 		if (!iblank(c))
1837 		    inpush(" ", INP_ALIAS, 0);
1838 	    }
1839 	    inpush(an->text, INP_ALIAS, an);
1840 	    if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL))
1841 		aliasspaceflag = 1;
1842 	    lexstop = 0;
1843 	    return 1;
1844 	}
1845 	if ((suf = strrchr(zshlextext, '.')) && suf[1] &&
1846 	    suf > zshlextext && suf[-1] != Meta &&
1847 	    (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) &&
1848 	    !an->inuse && incmdpos) {
1849 	    inpush(dupstring(zshlextext), INP_ALIAS, an);
1850 	    inpush(" ", INP_ALIAS, NULL);
1851 	    inpush(an->text, INP_ALIAS, NULL);
1852 	    lexstop = 0;
1853 	    return 1;
1854 	}
1855     }
1856 
1857     return 0;
1858 }
1859 
1860 /* expand aliases and reserved words */
1861 
1862 /**/
1863 int
exalias(void)1864 exalias(void)
1865 {
1866     Reswd rw;
1867 
1868     hwend();
1869     if (interact && isset(SHINSTDIN) && !strin && incasepat <= 0 &&
1870 	tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) &&
1871 	(isset(CORRECTALL) || (isset(CORRECT) && incmdpos)))
1872 	spckword(&tokstr, 1, incmdpos, 1);
1873 
1874     if (!tokstr) {
1875 	zshlextext = tokstrings[tok];
1876 
1877 	if (tok == NEWLIN)
1878 	    return 0;
1879 	return checkalias();
1880     } else {
1881 	VARARR(char, copy, (strlen(tokstr) + 1));
1882 
1883 	if (has_token(tokstr)) {
1884 	    char *p, *t;
1885 
1886 	    zshlextext = p = copy;
1887 	    for (t = tokstr;
1888 		 (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++););
1889 	} else
1890 	    zshlextext = tokstr;
1891 
1892 	if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) {
1893 	    int zp = lexflags;
1894 
1895 	    gotword();
1896 	    if ((zp & LEXFLAGS_ZLE) && !lexflags) {
1897 		if (zshlextext == copy)
1898 		    zshlextext = tokstr;
1899 		return 0;
1900 	    }
1901 	}
1902 
1903 	if (tok == STRING) {
1904 	    /* Check for an alias */
1905 	    if ((zshlextext != copy || !isset(POSIXALIASES)) && checkalias()) {
1906 		if (zshlextext == copy)
1907 		    zshlextext = tokstr;
1908 		return 1;
1909 	    }
1910 
1911 	    /* Then check for a reserved word */
1912 	    if ((incmdpos ||
1913 		 (unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) &&
1914 		  zshlextext[0] == '}' && !zshlextext[1])) &&
1915 		(rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) {
1916 		tok = rw->token;
1917 		inrepeat_ = (tok == REPEAT);
1918 		if (tok == DINBRACK)
1919 		    incond = 1;
1920 	    } else if (incond && !strcmp(zshlextext, "]]")) {
1921 		tok = DOUTBRACK;
1922 		incond = 0;
1923 	    } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1])
1924 		tok = BANG;
1925 	}
1926 	inalmore = 0;
1927 	if (zshlextext == copy)
1928 	    zshlextext = tokstr;
1929     }
1930     return 0;
1931 }
1932 
1933 /**/
1934 void
zshlex_raw_add(int c)1935 zshlex_raw_add(int c)
1936 {
1937     if (!lex_add_raw)
1938 	return;
1939 
1940     *lexbuf_raw.ptr++ = c;
1941     if (lexbuf_raw.siz == ++lexbuf_raw.len) {
1942 	int newbsiz = lexbuf_raw.siz * 2;
1943 
1944 	tokstr_raw = (char *)hrealloc(tokstr_raw, lexbuf_raw.siz, newbsiz);
1945 	lexbuf_raw.ptr = tokstr_raw + lexbuf_raw.len;
1946 	memset(lexbuf_raw.ptr, 0, newbsiz - lexbuf_raw.siz);
1947 	lexbuf_raw.siz = newbsiz;
1948     }
1949 }
1950 
1951 /**/
1952 void
zshlex_raw_back(void)1953 zshlex_raw_back(void)
1954 {
1955     if (!lex_add_raw)
1956 	return;
1957     lexbuf_raw.ptr--;
1958     lexbuf_raw.len--;
1959 }
1960 
1961 /**/
1962 int
zshlex_raw_mark(int offset)1963 zshlex_raw_mark(int offset)
1964 {
1965     if (!lex_add_raw)
1966 	return 0;
1967     return lexbuf_raw.len + offset;
1968 }
1969 
1970 /**/
1971 void
zshlex_raw_back_to_mark(int mark)1972 zshlex_raw_back_to_mark(int mark)
1973 {
1974     if (!lex_add_raw)
1975 	return;
1976     lexbuf_raw.ptr = tokstr_raw + mark;
1977     lexbuf_raw.len = mark;
1978 }
1979 
1980 /*
1981  * Skip (...) for command-style substitutions: $(...), <(...), >(...)
1982  *
1983  * In order to ensure we don't stop at closing parentheses with
1984  * some other syntactic significance, we'll parse the input until
1985  * we find an unmatched closing parenthesis.  However, we'll throw
1986  * away the result of the parsing and just keep the string we've built
1987  * up on the way.
1988  */
1989 
1990 /**/
1991 static int
skipcomm(void)1992 skipcomm(void)
1993 {
1994 #ifdef ZSH_OLD_SKIPCOMM
1995     int pct = 1, c, start = 1;
1996 
1997     cmdpush(CS_CMDSUBST);
1998     SETPARBEGIN
1999     c = Inpar;
2000     do {
2001 	int iswhite;
2002 	add(c);
2003 	c = hgetc();
2004 	if (itok(c) || lexstop)
2005 	    break;
2006 	iswhite = inblank(c);
2007 	switch (c) {
2008 	case '(':
2009 	    pct++;
2010 	    break;
2011 	case ')':
2012 	    pct--;
2013 	    break;
2014 	case '\\':
2015 	    add(c);
2016 	    c = hgetc();
2017 	    break;
2018 	case '\'': {
2019 	    int strquote = lexbuf.ptr[-1] == '$';
2020 	    add(c);
2021 	    STOPHIST
2022 	    while ((c = hgetc()) != '\'' && !lexstop) {
2023 		if (c == '\\' && strquote) {
2024 		    add(c);
2025 		    c = hgetc();
2026 		}
2027 		add(c);
2028 	    }
2029 	    ALLOWHIST
2030 	    break;
2031 	}
2032 	case '\"':
2033 	    add(c);
2034 	    while ((c = hgetc()) != '\"' && !lexstop)
2035 		if (c == '\\') {
2036 		    add(c);
2037 		    add(hgetc());
2038 		} else
2039 		    add(c);
2040 	    break;
2041 	case '`':
2042 	    add(c);
2043 	    while ((c = hgetc()) != '`' && !lexstop)
2044 		if (c == '\\')
2045 		    add(c), add(hgetc());
2046 		else
2047 		    add(c);
2048 	    break;
2049 	case '#':
2050 	    if (start) {
2051 		add(c);
2052 		while ((c = hgetc()) != '\n' && !lexstop)
2053 		    add(c);
2054 		iswhite = 1;
2055 	    }
2056 	    break;
2057 	}
2058 	start = iswhite;
2059     }
2060     while (pct);
2061     if (!lexstop)
2062 	SETPAREND
2063     cmdpop();
2064     return lexstop;
2065 #else
2066     char *new_tokstr;
2067     int new_lexstop, new_lex_add_raw;
2068     int save_infor = infor;
2069     struct lexbufstate new_lexbuf;
2070 
2071     infor = 0;
2072     cmdpush(CS_CMDSUBST);
2073     SETPARBEGIN
2074     add(Inpar);
2075 
2076     new_lex_add_raw = lex_add_raw + 1;
2077     if (!lex_add_raw) {
2078 	/*
2079 	 * We'll combine the string so far with the input
2080 	 * read in for the command substitution.  To do this
2081 	 * we'll just propagate the current tokstr etc. as the
2082 	 * variables used for adding raw input, and
2083 	 * ensure we swap those for the real tokstr etc. at the end.
2084 	 *
2085 	 * However, we need to save and restore the rest of the
2086 	 * lexical and parse state as we're effectively parsing
2087 	 * an internal string.  Because we're still parsing it from
2088 	 * the original input source (we have to --- we don't know
2089 	 * when to stop inputting it otherwise and can't rely on
2090 	 * the input being recoverable until we've read it) we need
2091 	 * to keep the same history context.
2092 	 */
2093 	new_tokstr = tokstr;
2094 	new_lexbuf = lexbuf;
2095 
2096 	/*
2097 	 * If we're expanding an alias at this point, we need the whole
2098 	 * remaining text as part of the string for the command in
2099 	 * parentheses, so don't backtrack.  This is different from the
2100 	 * usual case where the alias is fully within the command, where
2101 	 * we want the unexpanded text so that it will be expanded
2102 	 * again when the command in the parentheses is executed.
2103 	 *
2104 	 * I never wanted to be a software engineer, you know.
2105 	 */
2106 	if (inbufflags & INP_ALIAS)
2107 	    inbufflags |= INP_RAW_KEEP;
2108 	zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
2109 	hist_in_word(1);
2110     } else {
2111 	/*
2112 	 * Set up for nested command substitution, however
2113 	 * we don't actually need the string until we get
2114 	 * back to the top level and recover the lot.
2115 	 * The $() body just appears empty.
2116 	 *
2117 	 * We do need to propagate the raw variables which would
2118 	 * otherwise by cleared, though.
2119 	 */
2120 	new_tokstr = tokstr_raw;
2121 	new_lexbuf = lexbuf_raw;
2122 
2123 	zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
2124     }
2125     tokstr_raw = new_tokstr;
2126     lexbuf_raw = new_lexbuf;
2127     lex_add_raw = new_lex_add_raw;
2128     /*
2129      * Don't do any ZLE specials down here: they're only needed
2130      * when we return the string from the recursive parse.
2131      * (TBD: this probably means we should be initialising lexflags
2132      * more consistently.)
2133      *
2134      * Note that in that case we're still using the ZLE line reading
2135      * function at the history layer --- this is consistent with the
2136      * intention of maintaining the history and input layers across
2137      * the recursive parsing.
2138      *
2139      * Also turn off LEXFLAGS_NEWLINE because this is already skipping
2140      * across the entire construct, and parse_event() needs embedded
2141      * newlines to be "real" when looking for the OUTPAR token.
2142      */
2143     lexflags &= ~(LEXFLAGS_ZLE|LEXFLAGS_NEWLINE);
2144     dbparens = 0;	/* restored by zcontext_restore_partial() */
2145 
2146     if (!parse_event(OUTPAR) || tok != OUTPAR) {
2147 	if (strin) {
2148 	    /*
2149 	     * Get the rest of the string raw since we don't
2150 	     * know where this token ends.
2151 	     */
2152 	    while (!lexstop)
2153 		(void)ingetc();
2154 	} else
2155 	    lexstop = 1;
2156     }
2157      /* Outpar lexical token gets added in caller if present */
2158 
2159     /*
2160      * We're going to keep the full raw input string
2161      * as the current token string after popping the stack.
2162      */
2163     new_tokstr = tokstr_raw;
2164     new_lexbuf = lexbuf_raw;
2165     /*
2166      * We're also going to propagate the lexical state:
2167      * if we couldn't parse the command substitution we
2168      * can't continue.
2169      */
2170     new_lexstop = lexstop;
2171 
2172     zcontext_restore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
2173 
2174     if (lex_add_raw) {
2175 	/*
2176 	 * Keep going, so retain the raw variables.
2177 	 */
2178 	tokstr_raw = new_tokstr;
2179 	lexbuf_raw = new_lexbuf;
2180     } else {
2181 	if (!new_lexstop) {
2182 	    /* Ignore the ')' added on input */
2183 	    new_lexbuf.len--;
2184 	    *--new_lexbuf.ptr = '\0';
2185 	}
2186 
2187 	/*
2188 	 * Convince the rest of lex.c we were examining a string
2189 	 * all along.
2190 	 */
2191 	tokstr = new_tokstr;
2192 	lexbuf = new_lexbuf;
2193 	lexstop = new_lexstop;
2194 	hist_in_word(0);
2195     }
2196 
2197     if (!lexstop)
2198 	SETPAREND
2199     cmdpop();
2200     infor = save_infor;
2201 
2202     return lexstop;
2203 #endif
2204 }
2205