1 /*
2 * lex.c - lexical analysis
3 *
4 * This file is part of zsh, the Z shell.
5 *
6 * Copyright (c) 1992-1997 Paul Falstad
7 * All rights reserved.
8 *
9 * Permission is hereby granted, without written agreement and without
10 * license or royalty fees, to use, copy, modify, and distribute this
11 * software and to distribute modified versions of this software for any
12 * purpose, provided that the above copyright notice and the following
13 * two paragraphs appear in all copies of this software.
14 *
15 * In no event shall Paul Falstad or the Zsh Development Group be liable
16 * to any party for direct, indirect, special, incidental, or consequential
17 * damages arising out of the use of this software and its documentation,
18 * even if Paul Falstad and the Zsh Development Group have been advised of
19 * the possibility of such damage.
20 *
21 * Paul Falstad and the Zsh Development Group specifically disclaim any
22 * warranties, including, but not limited to, the implied warranties of
23 * merchantability and fitness for a particular purpose. The software
24 * provided hereunder is on an "as is" basis, and Paul Falstad and the
25 * Zsh Development Group have no obligation to provide maintenance,
26 * support, updates, enhancements, or modifications.
27 *
28 */
29
30 #include "zsh.mdh"
31 #include "lex.pro"
32
33 #define LEX_HEAP_SIZE (32)
34
35 /* tokens */
36
37 /**/
38 mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,-!'\"\\\\";
39
40 /* parts of the current token */
41
42 /**/
43 char *zshlextext;
44 /**/
45 mod_export char *tokstr;
46 /**/
47 mod_export enum lextok tok;
48 /**/
49 mod_export int tokfd;
50
51 /*
52 * Line number at which the first character of a token was found.
53 * We always set this in gettok(), which is always called from
54 * zshlex() unless we have reached an error. So it is always
55 * valid when parsing. It is not useful during execution
56 * of the parsed structure.
57 */
58
59 /**/
60 zlong toklineno;
61
62 /* lexical analyzer error flag */
63
64 /**/
65 mod_export int lexstop;
66
67 /* if != 0, this is the first line of the command */
68
69 /**/
70 mod_export int isfirstln;
71
72 /* if != 0, this is the first char of the command (not including white space) */
73
74 /**/
75 int isfirstch;
76
77 /* flag that an alias should be expanded after expansion ending in space */
78
79 /**/
80 int inalmore;
81
82 /*
83 * Don't do spelling correction.
84 * Bit 1 is only valid for the current word. It's
85 * set when we detect a lookahead that stops the word from
86 * needing correction.
87 */
88
89 /**/
90 int nocorrect;
91
92 /*
93 * TBD: the following exported variables are part of the non-interface
94 * with ZLE for completion. They are poorly named and the whole
95 * scheme is incredibly brittle. One piece of robustness is applied:
96 * the variables are only set if LEXFLAGS_ZLE is set. Improvements
97 * should therefore concentrate on areas with this flag set.
98 *
99 * Cursor position and line length in zle when the line is
100 * metafied for access from the main shell.
101 */
102
103 /**/
104 mod_export int zlemetacs, zlemetall;
105
106 /* inwhat says what exactly we are in *
107 * (its value is one of the IN_* things). */
108
109 /**/
110 mod_export int inwhat;
111
112 /* 1 if x added to complete in a blank between words */
113
114 /**/
115 mod_export int addedx;
116
117 /* wb and we hold the beginning/end position of the word we are completing. */
118
119 /**/
120 mod_export int wb, we;
121
122 /**/
123 mod_export int wordbeg;
124
125 /**/
126 mod_export int parbegin;
127
128 /**/
129 mod_export int parend;
130
131
132 /* 1 if aliases should not be expanded */
133
134 /**/
135 mod_export int noaliases;
136
137 /*
138 * If non-zero, we are parsing a line sent to use by the editor, or some
139 * other string that's not part of standard command input (e.g. eval is
140 * part of normal command input).
141 *
142 * Set of bits from LEXFLAGS_*.
143 *
144 * Note that although it is passed into the lexer as an input, the
145 * lexer can set it to zero after finding the word it's searching for.
146 * This only happens if the line being parsed actually does come from
147 * ZLE, and hence the bit LEXFLAGS_ZLE is set.
148 */
149
150 /**/
151 mod_export int lexflags;
152
153 /* don't recognize comments */
154
155 /**/
156 mod_export int nocomments;
157
158 /* add raw input characters while parsing command substitution */
159
160 /**/
161 int lex_add_raw;
162
163 /* variables associated with the above */
164
165 static char *tokstr_raw;
166 static struct lexbufstate lexbuf_raw;
167
168 /* text of punctuation tokens */
169
170 /**/
171 mod_export char *tokstrings[WHILE + 1] = {
172 NULL, /* NULLTOK 0 */
173 ";", /* SEPER */
174 "\\n", /* NEWLIN */
175 ";", /* SEMI */
176 ";;", /* DSEMI */
177 "&", /* AMPER 5 */
178 "(", /* INPAR */
179 ")", /* OUTPAR */
180 "||", /* DBAR */
181 "&&", /* DAMPER */
182 ">", /* OUTANG 10 */
183 ">|", /* OUTANGBANG */
184 ">>", /* DOUTANG */
185 ">>|", /* DOUTANGBANG */
186 "<", /* INANG */
187 "<>", /* INOUTANG 15 */
188 "<<", /* DINANG */
189 "<<-", /* DINANGDASH */
190 "<&", /* INANGAMP */
191 ">&", /* OUTANGAMP */
192 "&>", /* AMPOUTANG 20 */
193 "&>|", /* OUTANGAMPBANG */
194 ">>&", /* DOUTANGAMP */
195 ">>&|", /* DOUTANGAMPBANG */
196 "<<<", /* TRINANG */
197 "|", /* BAR 25 */
198 "|&", /* BARAMP */
199 "()", /* INOUTPAR */
200 "((", /* DINPAR */
201 "))", /* DOUTPAR */
202 "&|", /* AMPERBANG 30 */
203 ";&", /* SEMIAMP */
204 ";|", /* SEMIBAR */
205 };
206
207 /* lexical state */
208
209 static int dbparens;
210 static struct lexbufstate lexbuf = { NULL, 256, 0 };
211
212 /* save lexical context */
213
214 /**/
215 void
lex_context_save(struct lex_stack * ls,int toplevel)216 lex_context_save(struct lex_stack *ls, int toplevel)
217 {
218 (void)toplevel;
219
220 ls->dbparens = dbparens;
221 ls->isfirstln = isfirstln;
222 ls->isfirstch = isfirstch;
223 ls->lexflags = lexflags;
224
225 ls->tok = tok;
226 ls->tokstr = tokstr;
227 ls->zshlextext = zshlextext;
228 ls->lexbuf = lexbuf;
229 ls->lex_add_raw = lex_add_raw;
230 ls->tokstr_raw = tokstr_raw;
231 ls->lexbuf_raw = lexbuf_raw;
232 ls->lexstop = lexstop;
233 ls->toklineno = toklineno;
234
235 tokstr = zshlextext = lexbuf.ptr = NULL;
236 lexbuf.siz = 256;
237 tokstr_raw = lexbuf_raw.ptr = NULL;
238 lexbuf_raw.siz = lexbuf_raw.len = lex_add_raw = 0;
239 }
240
241 /* restore lexical context */
242
243 /**/
244 mod_export void
lex_context_restore(const struct lex_stack * ls,int toplevel)245 lex_context_restore(const struct lex_stack *ls, int toplevel)
246 {
247 (void)toplevel;
248
249 dbparens = ls->dbparens;
250 isfirstln = ls->isfirstln;
251 isfirstch = ls->isfirstch;
252 lexflags = ls->lexflags;
253 tok = ls->tok;
254 tokstr = ls->tokstr;
255 zshlextext = ls->zshlextext;
256 lexbuf = ls->lexbuf;
257 lex_add_raw = ls->lex_add_raw;
258 tokstr_raw = ls->tokstr_raw;
259 lexbuf_raw = ls->lexbuf_raw;
260 lexstop = ls->lexstop;
261 toklineno = ls->toklineno;
262 }
263
264 /**/
265 void
zshlex(void)266 zshlex(void)
267 {
268 if (tok == LEXERR)
269 return;
270 do {
271 if (inrepeat_)
272 ++inrepeat_;
273 if (inrepeat_ == 3 && isset(SHORTLOOPS))
274 incmdpos = 1;
275 tok = gettok();
276 } while (tok != ENDINPUT && exalias());
277 nocorrect &= 1;
278 if (tok == NEWLIN || tok == ENDINPUT) {
279 while (hdocs) {
280 struct heredocs *next = hdocs->next;
281 char *doc, *munged_term;
282
283 hwbegin(0);
284 cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD);
285 munged_term = dupstring(hdocs->str);
286 STOPHIST
287 doc = gethere(&munged_term, hdocs->type);
288 ALLOWHIST
289 cmdpop();
290 hwend();
291 if (!doc) {
292 zerr("here document too large");
293 while (hdocs) {
294 next = hdocs->next;
295 zfree(hdocs, sizeof(struct heredocs));
296 hdocs = next;
297 }
298 tok = LEXERR;
299 break;
300 }
301 setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str,
302 munged_term);
303 zfree(hdocs, sizeof(struct heredocs));
304 hdocs = next;
305 }
306 }
307 if (tok != NEWLIN)
308 isnewlin = 0;
309 else
310 isnewlin = (inbufct) ? -1 : 1;
311 if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE)))
312 tok = SEPER;
313 }
314
315 /**/
316 mod_export void
ctxtlex(void)317 ctxtlex(void)
318 {
319 static int oldpos;
320
321 zshlex();
322 switch (tok) {
323 case SEPER:
324 case NEWLIN:
325 case SEMI:
326 case DSEMI:
327 case SEMIAMP:
328 case SEMIBAR:
329 case AMPER:
330 case AMPERBANG:
331 case INPAR:
332 case INBRACE:
333 case DBAR:
334 case DAMPER:
335 case BAR:
336 case BARAMP:
337 case INOUTPAR:
338 case DOLOOP:
339 case THEN:
340 case ELIF:
341 case ELSE:
342 case DOUTBRACK:
343 incmdpos = 1;
344 break;
345 case STRING:
346 case TYPESET:
347 /* case ENVSTRING: */
348 case ENVARRAY:
349 case OUTPAR:
350 case CASE:
351 case DINBRACK:
352 incmdpos = 0;
353 break;
354
355 default:
356 /* nothing to do, keep compiler happy */
357 break;
358 }
359 if (tok != DINPAR)
360 infor = tok == FOR ? 2 : 0;
361 if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) {
362 inredir = 1;
363 oldpos = incmdpos;
364 incmdpos = 0;
365 } else if (inredir) {
366 incmdpos = oldpos;
367 inredir = 0;
368 }
369 }
370
371 #define LX1_BKSLASH 0
372 #define LX1_COMMENT 1
373 #define LX1_NEWLIN 2
374 #define LX1_SEMI 3
375 #define LX1_AMPER 5
376 #define LX1_BAR 6
377 #define LX1_INPAR 7
378 #define LX1_OUTPAR 8
379 #define LX1_INANG 13
380 #define LX1_OUTANG 14
381 #define LX1_OTHER 15
382
383 #define LX2_BREAK 0
384 #define LX2_OUTPAR 1
385 #define LX2_BAR 2
386 #define LX2_STRING 3
387 #define LX2_INBRACK 4
388 #define LX2_OUTBRACK 5
389 #define LX2_TILDE 6
390 #define LX2_INPAR 7
391 #define LX2_INBRACE 8
392 #define LX2_OUTBRACE 9
393 #define LX2_OUTANG 10
394 #define LX2_INANG 11
395 #define LX2_EQUALS 12
396 #define LX2_BKSLASH 13
397 #define LX2_QUOTE 14
398 #define LX2_DQUOTE 15
399 #define LX2_BQUOTE 16
400 #define LX2_COMMA 17
401 #define LX2_DASH 18
402 #define LX2_BANG 19
403 #define LX2_OTHER 20
404 #define LX2_META 21
405
406 static unsigned char lexact1[256], lexact2[256], lextok2[256];
407
408 /**/
409 void
initlextabs(void)410 initlextabs(void)
411 {
412 int t0;
413 static char *lx1 = "\\q\n;!&|(){}[]<>";
414 static char *lx2 = ";)|$[]~({}><=\\\'\"`,-!";
415
416 for (t0 = 0; t0 != 256; t0++) {
417 lexact1[t0] = LX1_OTHER;
418 lexact2[t0] = LX2_OTHER;
419 lextok2[t0] = t0;
420 }
421 for (t0 = 0; lx1[t0]; t0++)
422 lexact1[(int)lx1[t0]] = t0;
423 for (t0 = 0; lx2[t0]; t0++)
424 lexact2[(int)lx2[t0]] = t0;
425 lexact2['&'] = LX2_BREAK;
426 lexact2[STOUC(Meta)] = LX2_META;
427 lextok2['*'] = Star;
428 lextok2['?'] = Quest;
429 lextok2['{'] = Inbrace;
430 lextok2['['] = Inbrack;
431 lextok2['$'] = String;
432 lextok2['~'] = Tilde;
433 lextok2['#'] = Pound;
434 lextok2['^'] = Hat;
435 }
436
437 /* initialize lexical state */
438
439 /**/
440 void
lexinit(void)441 lexinit(void)
442 {
443 nocorrect = dbparens = lexstop = 0;
444 tok = ENDINPUT;
445 }
446
447 /* add a char to the string buffer */
448
449 /**/
450 void
add(int c)451 add(int c)
452 {
453 *lexbuf.ptr++ = c;
454 if (lexbuf.siz == ++lexbuf.len) {
455 int newbsiz = lexbuf.siz * 2;
456
457 if (newbsiz > inbufct && inbufct > lexbuf.siz)
458 newbsiz = inbufct;
459
460 tokstr = (char *)hrealloc(tokstr, lexbuf.siz, newbsiz);
461 lexbuf.ptr = tokstr + lexbuf.len;
462 /* len == bsiz, so bptr is at the start of newly allocated memory */
463 memset(lexbuf.ptr, 0, newbsiz - lexbuf.siz);
464 lexbuf.siz = newbsiz;
465 }
466 }
467
468 #define SETPARBEGIN { \
469 if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
470 zlemetacs >= zlemetall+1-inbufct) \
471 parbegin = inbufct; \
472 }
473 #define SETPAREND { \
474 if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \
475 parbegin != -1 && parend == -1) { \
476 if (zlemetacs >= zlemetall + 1 - inbufct) \
477 parbegin = -1; \
478 else \
479 parend = inbufct; \
480 } \
481 }
482
483 enum {
484 CMD_OR_MATH_CMD,
485 CMD_OR_MATH_MATH,
486 CMD_OR_MATH_ERR
487 };
488
489 /*
490 * Return one of the above. If it couldn't be
491 * parsed as math, but there was no gross error, it's a command.
492 */
493
494 static int
cmd_or_math(int cs_type)495 cmd_or_math(int cs_type)
496 {
497 int oldlen = lexbuf.len;
498 int c;
499 int oinflags = inbufflags;
500
501 cmdpush(cs_type);
502 inbufflags |= INP_APPEND;
503 c = dquote_parse(')', 0);
504 if (!(oinflags & INP_APPEND))
505 inbufflags &= ~INP_APPEND;
506 cmdpop();
507 *lexbuf.ptr = '\0';
508 if (!c) {
509 /* Successfully parsed, see if it was math */
510 c = hgetc();
511 if (c == ')')
512 return CMD_OR_MATH_MATH; /* yes */
513 hungetc(c);
514 lexstop = 0;
515 c = ')';
516 } else if (lexstop) {
517 /* we haven't got anything to unget */
518 return CMD_OR_MATH_ERR;
519 }
520 /* else unsuccessful: unget the whole thing */
521 hungetc(c);
522 lexstop = 0;
523 while (lexbuf.len > oldlen && !(errflag & ERRFLAG_ERROR)) {
524 lexbuf.len--;
525 hungetc(itok(*--lexbuf.ptr) ?
526 ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr);
527 }
528 if (errflag)
529 return CMD_OR_MATH_ERR;
530 hungetc('(');
531 return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
532 }
533
534
535 /*
536 * Parse either a $(( ... )) or a $(...)
537 * Return the same as cmd_or_math().
538 */
539 static int
cmd_or_math_sub(void)540 cmd_or_math_sub(void)
541 {
542 int c = hgetc(), ret;
543
544 if (c == '(') {
545 int lexpos = (int)(lexbuf.ptr - tokstr);
546 add(Inpar);
547 add('(');
548 if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) {
549 tokstr[lexpos] = Inparmath;
550 add(')');
551 return CMD_OR_MATH_MATH;
552 }
553 if (ret == CMD_OR_MATH_ERR)
554 return CMD_OR_MATH_ERR;
555 lexbuf.ptr -= 2;
556 lexbuf.len -= 2;
557 } else {
558 hungetc(c);
559 lexstop = 0;
560 }
561 return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD;
562 }
563
564 /* Check whether we're looking at valid numeric globbing syntax *
565 * (/\<[0-9]*-[0-9]*\>/). Call pointing just after the opening "<". *
566 * Leaves the input in the same place, returning 0 or 1. */
567
568 /**/
569 static int
isnumglob(void)570 isnumglob(void)
571 {
572 int c, ec = '-', ret = 0;
573 int tbs = 256, n = 0;
574 char *tbuf = (char *)zalloc(tbs);
575
576 while(1) {
577 c = hgetc();
578 if(lexstop) {
579 lexstop = 0;
580 break;
581 }
582 tbuf[n++] = c;
583 if(!idigit(c)) {
584 if(c != ec)
585 break;
586 if(ec == '>') {
587 ret = 1;
588 break;
589 }
590 ec = '>';
591 }
592 if(n == tbs)
593 tbuf = (char *)realloc(tbuf, tbs *= 2);
594 }
595 while(n--)
596 hungetc(tbuf[n]);
597 zfree(tbuf, tbs);
598 return ret;
599 }
600
601 /**/
602 static enum lextok
gettok(void)603 gettok(void)
604 {
605 int c, d;
606 int peekfd = -1;
607 enum lextok peek;
608
609 beginning:
610 tokstr = NULL;
611 while (iblank(c = hgetc()) && !lexstop);
612 toklineno = lineno;
613 if (lexstop)
614 return (errflag) ? LEXERR : ENDINPUT;
615 isfirstln = 0;
616 if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS))
617 wordbeg = inbufct - (qbang && c == bangchar);
618 hwbegin(-1-(qbang && c == bangchar));
619 /* word includes the last character read and possibly \ before ! */
620 if (dbparens) {
621 lexbuf.len = 0;
622 lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
623 hungetc(c);
624 cmdpush(CS_MATH);
625 c = dquote_parse(infor ? ';' : ')', 0);
626 cmdpop();
627 *lexbuf.ptr = '\0';
628 if (!c && infor) {
629 infor--;
630 return DINPAR;
631 }
632 if (c || (c = hgetc()) != ')') {
633 hungetc(c);
634 return LEXERR;
635 }
636 dbparens = 0;
637 return DOUTPAR;
638 } else if (idigit(c)) { /* handle 1< foo */
639 d = hgetc();
640 if(d == '&') {
641 d = hgetc();
642 if(d == '>') {
643 peekfd = c - '0';
644 hungetc('>');
645 c = '&';
646 } else {
647 hungetc(d);
648 lexstop = 0;
649 hungetc('&');
650 }
651 } else if (d == '>' || d == '<') {
652 peekfd = c - '0';
653 c = d;
654 } else {
655 hungetc(d);
656 lexstop = 0;
657 }
658 }
659
660 /* chars in initial position in word */
661
662 /*
663 * Handle comments. There are some special cases when this
664 * is not normal command input: lexflags implies we are examining
665 * a line lexically without it being used for normal command input.
666 */
667 if (c == hashchar && !nocomments &&
668 (isset(INTERACTIVECOMMENTS) ||
669 ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding &&
670 (!interact || unset(SHINSTDIN) || strin)))) {
671 /* History is handled here to prevent extra *
672 * newlines being inserted into the history. */
673
674 if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
675 lexbuf.len = 0;
676 lexbuf.ptr = tokstr =
677 (char *)hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
678 add(c);
679 }
680 hwabort();
681 while ((c = ingetc()) != '\n' && !lexstop) {
682 hwaddc(c);
683 addtoline(c);
684 if (lexflags & LEXFLAGS_COMMENTS_KEEP)
685 add(c);
686 }
687
688 if (errflag)
689 peek = LEXERR;
690 else {
691 if (lexflags & LEXFLAGS_COMMENTS_KEEP) {
692 *lexbuf.ptr = '\0';
693 if (!lexstop)
694 hungetc(c);
695 peek = STRING;
696 } else {
697 hwend();
698 hwbegin(0);
699 hwaddc('\n');
700 addtoline('\n');
701 /*
702 * If splitting a line and removing comments,
703 * we don't want a newline token since it's
704 * treated specially.
705 */
706 if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop)
707 peek = ENDINPUT;
708 else
709 peek = NEWLIN;
710 }
711 }
712 return peek;
713 }
714 switch (lexact1[STOUC(c)]) {
715 case LX1_BKSLASH:
716 d = hgetc();
717 if (d == '\n')
718 goto beginning;
719 hungetc(d);
720 lexstop = 0;
721 break;
722 case LX1_NEWLIN:
723 return NEWLIN;
724 case LX1_SEMI:
725 d = hgetc();
726 if(d == ';')
727 return DSEMI;
728 else if(d == '&')
729 return SEMIAMP;
730 else if (d == '|')
731 return SEMIBAR;
732 hungetc(d);
733 lexstop = 0;
734 return SEMI;
735 case LX1_AMPER:
736 d = hgetc();
737 if (d == '&')
738 return DAMPER;
739 else if (d == '!' || d == '|')
740 return AMPERBANG;
741 else if (d == '>') {
742 tokfd = peekfd;
743 d = hgetc();
744 if (d == '!' || d == '|')
745 return OUTANGAMPBANG;
746 else if (d == '>') {
747 d = hgetc();
748 if (d == '!' || d == '|')
749 return DOUTANGAMPBANG;
750 hungetc(d);
751 lexstop = 0;
752 return DOUTANGAMP;
753 }
754 hungetc(d);
755 lexstop = 0;
756 return AMPOUTANG;
757 }
758 hungetc(d);
759 lexstop = 0;
760 return AMPER;
761 case LX1_BAR:
762 d = hgetc();
763 if (d == '|' && !incasepat)
764 return DBAR;
765 else if (d == '&')
766 return BARAMP;
767 hungetc(d);
768 lexstop = 0;
769 return BAR;
770 case LX1_INPAR:
771 d = hgetc();
772 if (d == '(') {
773 if (infor) {
774 dbparens = 1;
775 return DINPAR;
776 }
777 if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) {
778 lexbuf.len = 0;
779 lexbuf.ptr = tokstr = (char *)
780 hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
781 switch (cmd_or_math(CS_MATH)) {
782 case CMD_OR_MATH_MATH:
783 return DINPAR;
784
785 case CMD_OR_MATH_CMD:
786 /*
787 * Not math, so we don't return the contents
788 * as a string in this case.
789 */
790 tokstr = NULL;
791 return INPAR;
792
793 case CMD_OR_MATH_ERR:
794 /*
795 * LEXFLAGS_ACTIVE means we came from bufferwords(),
796 * so we treat as an incomplete math expression
797 */
798 if (lexflags & LEXFLAGS_ACTIVE)
799 tokstr = dyncat("((", tokstr ? tokstr : "");
800 /* fall through */
801
802 default:
803 return LEXERR;
804 }
805 }
806 } else if (d == ')')
807 return INOUTPAR;
808 hungetc(d);
809 lexstop = 0;
810 if (!(isset(SHGLOB) || incond == 1 || incmdpos))
811 break;
812 return INPAR;
813 case LX1_OUTPAR:
814 return OUTPAR;
815 case LX1_INANG:
816 d = hgetc();
817 if (d == '(') {
818 hungetc(d);
819 lexstop = 0;
820 unpeekfd:
821 if(peekfd != -1) {
822 hungetc(c);
823 c = '0' + peekfd;
824 }
825 break;
826 }
827 if (d == '>') {
828 peek = INOUTANG;
829 } else if (d == '<') {
830 int e = hgetc();
831
832 if (e == '(') {
833 hungetc(e);
834 hungetc(d);
835 peek = INANG;
836 } else if (e == '<')
837 peek = TRINANG;
838 else if (e == '-')
839 peek = DINANGDASH;
840 else {
841 hungetc(e);
842 lexstop = 0;
843 peek = DINANG;
844 }
845 } else if (d == '&') {
846 peek = INANGAMP;
847 } else {
848 hungetc(d);
849 if(isnumglob())
850 goto unpeekfd;
851 peek = INANG;
852 }
853 tokfd = peekfd;
854 return peek;
855 case LX1_OUTANG:
856 d = hgetc();
857 if (d == '(') {
858 hungetc(d);
859 goto unpeekfd;
860 } else if (d == '&') {
861 d = hgetc();
862 if (d == '!' || d == '|')
863 peek = OUTANGAMPBANG;
864 else {
865 hungetc(d);
866 lexstop = 0;
867 peek = OUTANGAMP;
868 }
869 } else if (d == '!' || d == '|')
870 peek = OUTANGBANG;
871 else if (d == '>') {
872 d = hgetc();
873 if (d == '&') {
874 d = hgetc();
875 if (d == '!' || d == '|')
876 peek = DOUTANGAMPBANG;
877 else {
878 hungetc(d);
879 lexstop = 0;
880 peek = DOUTANGAMP;
881 }
882 } else if (d == '!' || d == '|')
883 peek = DOUTANGBANG;
884 else if (d == '(') {
885 hungetc(d);
886 hungetc('>');
887 peek = OUTANG;
888 } else {
889 hungetc(d);
890 lexstop = 0;
891 peek = DOUTANG;
892 if (isset(HISTALLOWCLOBBER))
893 hwaddc('|');
894 }
895 } else {
896 hungetc(d);
897 lexstop = 0;
898 peek = OUTANG;
899 if (!incond && isset(HISTALLOWCLOBBER))
900 hwaddc('|');
901 }
902 tokfd = peekfd;
903 return peek;
904 }
905
906 /* we've started a string, now get the *
907 * rest of it, performing tokenization */
908 return gettokstr(c, 0);
909 }
910
911 /*
912 * Get the remains of a token string. This has two uses.
913 * When called from gettok(), with sub = 0, we have already identified
914 * any interesting initial character and want to get the rest of
915 * what we now know is a string. However, the string may still include
916 * metacharacters and potentially substitutions.
917 *
918 * When called from parse_subst_string() with sub = 1, we are not
919 * fully parsing a command line, merely tokenizing a string.
920 * In this case we always add characters to the parsed string
921 * unless there is a parse error.
922 */
923
924 /**/
925 static enum lextok
gettokstr(int c,int sub)926 gettokstr(int c, int sub)
927 {
928 int bct = 0, pct = 0, brct = 0, seen_brct = 0, fdpar = 0;
929 int intpos = 1, in_brace_param = 0;
930 int inquote, unmatched = 0;
931 enum lextok peek;
932 #ifdef DEBUG
933 int ocmdsp = cmdsp;
934 #endif
935
936 peek = STRING;
937 if (!sub) {
938 lexbuf.len = 0;
939 lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE);
940 }
941 for (;;) {
942 int act;
943 int e;
944 int inbl = inblank(c);
945
946 if (fdpar && !inbl && c != ')')
947 fdpar = 0;
948
949 if (inbl && !in_brace_param && !pct)
950 act = LX2_BREAK;
951 else {
952 act = lexact2[STOUC(c)];
953 c = lextok2[STOUC(c)];
954 }
955 switch (act) {
956 case LX2_BREAK:
957 if (!in_brace_param && !sub)
958 goto brk;
959 break;
960 case LX2_META:
961 c = hgetc();
962 #ifdef DEBUG
963 if (lexstop) {
964 fputs("BUG: input terminated by Meta\n", stderr);
965 fflush(stderr);
966 goto brk;
967 }
968 #endif
969 add(Meta);
970 break;
971 case LX2_OUTPAR:
972 if (fdpar) {
973 /* this is a single word `( )', treat as INOUTPAR */
974 add(c);
975 *lexbuf.ptr = '\0';
976 return INOUTPAR;
977 }
978 if ((sub || in_brace_param) && isset(SHGLOB))
979 break;
980 if (!in_brace_param && !pct--) {
981 if (sub) {
982 pct = 0;
983 break;
984 } else
985 goto brk;
986 }
987 c = Outpar;
988 break;
989 case LX2_BAR:
990 if (!pct && !in_brace_param) {
991 if (sub)
992 break;
993 else
994 goto brk;
995 }
996 if (unset(SHGLOB) || (!sub && !in_brace_param))
997 c = Bar;
998 break;
999 case LX2_STRING:
1000 e = hgetc();
1001 if (e == '[') {
1002 cmdpush(CS_MATHSUBST);
1003 add(String);
1004 add(Inbrack);
1005 c = dquote_parse(']', sub);
1006 cmdpop();
1007 if (c) {
1008 peek = LEXERR;
1009 goto brk;
1010 }
1011 c = Outbrack;
1012 } else if (e == '(') {
1013 add(String);
1014 switch (cmd_or_math_sub()) {
1015 case CMD_OR_MATH_CMD:
1016 c = Outpar;
1017 break;
1018
1019 case CMD_OR_MATH_MATH:
1020 c = Outparmath;
1021 break;
1022
1023 default:
1024 peek = LEXERR;
1025 goto brk;
1026 }
1027 } else {
1028 if (e == '{') {
1029 add(c);
1030 c = Inbrace;
1031 ++bct;
1032 cmdpush(CS_BRACEPAR);
1033 if (!in_brace_param) {
1034 if ((in_brace_param = bct))
1035 seen_brct = 0;
1036 }
1037 } else {
1038 hungetc(e);
1039 lexstop = 0;
1040 }
1041 }
1042 break;
1043 case LX2_INBRACK:
1044 if (!in_brace_param) {
1045 brct++;
1046 seen_brct = 1;
1047 }
1048 c = Inbrack;
1049 break;
1050 case LX2_OUTBRACK:
1051 if (!in_brace_param)
1052 brct--;
1053 if (brct < 0)
1054 brct = 0;
1055 c = Outbrack;
1056 break;
1057 case LX2_INPAR:
1058 if (isset(SHGLOB)) {
1059 if (sub || in_brace_param)
1060 break;
1061 if (incasepat > 0 && !lexbuf.len)
1062 return INPAR;
1063 if (!isset(KSHGLOB) && lexbuf.len)
1064 goto brk;
1065 }
1066 if (!in_brace_param) {
1067 if (!sub) {
1068 e = hgetc();
1069 hungetc(e);
1070 lexstop = 0;
1071 /* For command words, parentheses are only
1072 * special at the start. But now we're tokenising
1073 * the remaining string. So I don't see what
1074 * the old incmdpos test here is for.
1075 * pws 1999/6/8
1076 *
1077 * Oh, no.
1078 * func1( )
1079 * is a valid function definition in [k]sh. The best
1080 * thing we can do, without really nasty lookahead tricks,
1081 * is break if we find a blank after a parenthesis. At
1082 * least this can't happen inside braces or brackets. We
1083 * only allow this with SHGLOB (set for both sh and ksh).
1084 *
1085 * Things like `print @( |foo)' should still
1086 * work, because [k]sh don't allow multiple words
1087 * in a function definition, so we only do this
1088 * in command position.
1089 * pws 1999/6/14
1090 */
1091 if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct &&
1092 !brct && !intpos && incmdpos)) {
1093 /*
1094 * Either a () token, or a command word with
1095 * something suspiciously like a ksh function
1096 * definition.
1097 * The current word isn't spellcheckable.
1098 */
1099 nocorrect |= 2;
1100 goto brk;
1101 }
1102 }
1103 /*
1104 * This also handles the [k]sh `foo( )' function definition.
1105 * Maintain a variable fdpar, set as long as a single set of
1106 * parentheses contains only space. Then if we get to the
1107 * closing parenthesis and it is still set, we can assume we
1108 * have a function definition. Only do this at the start of
1109 * the word, since the (...) must be a separate token.
1110 */
1111 if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct)
1112 fdpar = 1;
1113 }
1114 c = Inpar;
1115 break;
1116 case LX2_INBRACE:
1117 if (isset(IGNOREBRACES) || sub)
1118 c = '{';
1119 else {
1120 if (!lexbuf.len && incmdpos) {
1121 add('{');
1122 *lexbuf.ptr = '\0';
1123 return STRING;
1124 }
1125 if (in_brace_param) {
1126 cmdpush(CS_BRACE);
1127 }
1128 bct++;
1129 }
1130 break;
1131 case LX2_OUTBRACE:
1132 if ((isset(IGNOREBRACES) || sub) && !in_brace_param)
1133 break;
1134 if (!bct)
1135 break;
1136 if (in_brace_param) {
1137 cmdpop();
1138 }
1139 if (bct-- == in_brace_param)
1140 in_brace_param = 0;
1141 c = Outbrace;
1142 break;
1143 case LX2_COMMA:
1144 if (unset(IGNOREBRACES) && !sub && bct > in_brace_param)
1145 c = Comma;
1146 break;
1147 case LX2_OUTANG:
1148 if (in_brace_param || sub)
1149 break;
1150 e = hgetc();
1151 if (e != '(') {
1152 hungetc(e);
1153 lexstop = 0;
1154 goto brk;
1155 }
1156 add(OutangProc);
1157 if (skipcomm()) {
1158 peek = LEXERR;
1159 goto brk;
1160 }
1161 c = Outpar;
1162 break;
1163 case LX2_INANG:
1164 if (isset(SHGLOB) && sub)
1165 break;
1166 e = hgetc();
1167 if (!(in_brace_param || sub) && e == '(') {
1168 add(Inang);
1169 if (skipcomm()) {
1170 peek = LEXERR;
1171 goto brk;
1172 }
1173 c = Outpar;
1174 break;
1175 }
1176 hungetc(e);
1177 if(isnumglob()) {
1178 add(Inang);
1179 while ((c = hgetc()) != '>')
1180 add(c);
1181 c = Outang;
1182 break;
1183 }
1184 lexstop = 0;
1185 if (in_brace_param || sub)
1186 break;
1187 goto brk;
1188 case LX2_EQUALS:
1189 if (!sub) {
1190 if (intpos) {
1191 e = hgetc();
1192 if (e != '(') {
1193 hungetc(e);
1194 lexstop = 0;
1195 c = Equals;
1196 } else {
1197 add(Equals);
1198 if (skipcomm()) {
1199 peek = LEXERR;
1200 goto brk;
1201 }
1202 c = Outpar;
1203 }
1204 } else if (peek != ENVSTRING &&
1205 (incmdpos || intypeset) && !bct && !brct) {
1206 char *t = tokstr;
1207 if (idigit(*t))
1208 while (++t < lexbuf.ptr && idigit(*t));
1209 else {
1210 int sav = *lexbuf.ptr;
1211 *lexbuf.ptr = '\0';
1212 t = itype_end(t, IIDENT, 0);
1213 if (t < lexbuf.ptr) {
1214 skipparens(Inbrack, Outbrack, &t);
1215 } else {
1216 *lexbuf.ptr = sav;
1217 }
1218 }
1219 if (*t == '+')
1220 t++;
1221 if (t == lexbuf.ptr) {
1222 e = hgetc();
1223 if (e == '(') {
1224 *lexbuf.ptr = '\0';
1225 return ENVARRAY;
1226 }
1227 hungetc(e);
1228 lexstop = 0;
1229 peek = ENVSTRING;
1230 intpos = 2;
1231 } else
1232 c = Equals;
1233 } else
1234 c = Equals;
1235 }
1236 break;
1237 case LX2_BKSLASH:
1238 c = hgetc();
1239 if (c == '\n') {
1240 c = hgetc();
1241 if (!lexstop)
1242 continue;
1243 } else {
1244 add(Bnull);
1245 if (c == STOUC(Meta)) {
1246 c = hgetc();
1247 #ifdef DEBUG
1248 if (lexstop) {
1249 fputs("BUG: input terminated by Meta\n", stderr);
1250 fflush(stderr);
1251 goto brk;
1252 }
1253 #endif
1254 add(Meta);
1255 }
1256 }
1257 if (lexstop)
1258 goto brk;
1259 break;
1260 case LX2_QUOTE: {
1261 int strquote = (lexbuf.len && lexbuf.ptr[-1] == String);
1262
1263 add(Snull);
1264 cmdpush(CS_QUOTE);
1265 for (;;) {
1266 STOPHIST
1267 while ((c = hgetc()) != '\'' && !lexstop) {
1268 if (strquote && c == '\\') {
1269 c = hgetc();
1270 if (lexstop)
1271 break;
1272 /*
1273 * Mostly we don't need to do anything special
1274 * with escape backslashes or closing quotes
1275 * inside $'...'; however in completion we
1276 * need to be able to strip multiple backslashes
1277 * neatly.
1278 */
1279 if (c == '\\' || c == '\'')
1280 add(Bnull);
1281 else
1282 add('\\');
1283 } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
1284 if (lexbuf.ptr[-1] == '\\')
1285 lexbuf.ptr--, lexbuf.len--;
1286 else
1287 break;
1288 }
1289 add(c);
1290 }
1291 ALLOWHIST
1292 if (c != '\'') {
1293 unmatched = '\'';
1294 /* Not an error when called from bufferwords() */
1295 if (!(lexflags & LEXFLAGS_ACTIVE))
1296 peek = LEXERR;
1297 cmdpop();
1298 goto brk;
1299 }
1300 e = hgetc();
1301 if (e != '\'' || unset(RCQUOTES) || strquote)
1302 break;
1303 add(c);
1304 }
1305 cmdpop();
1306 hungetc(e);
1307 lexstop = 0;
1308 c = Snull;
1309 break;
1310 }
1311 case LX2_DQUOTE:
1312 add(Dnull);
1313 cmdpush(CS_DQUOTE);
1314 c = dquote_parse('"', sub);
1315 cmdpop();
1316 if (c) {
1317 unmatched = '"';
1318 /* Not an error when called from bufferwords() */
1319 if (!(lexflags & LEXFLAGS_ACTIVE))
1320 peek = LEXERR;
1321 goto brk;
1322 }
1323 c = Dnull;
1324 break;
1325 case LX2_BQUOTE:
1326 add(Tick);
1327 cmdpush(CS_BQUOTE);
1328 SETPARBEGIN
1329 inquote = 0;
1330 while ((c = hgetc()) != '`' && !lexstop) {
1331 if (c == '\\') {
1332 c = hgetc();
1333 if (c != '\n') {
1334 add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\');
1335 add(c);
1336 }
1337 else if (!sub && isset(CSHJUNKIEQUOTES))
1338 add(c);
1339 } else {
1340 if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') {
1341 break;
1342 }
1343 add(c);
1344 if (c == '\'') {
1345 if ((inquote = !inquote))
1346 STOPHIST
1347 else
1348 ALLOWHIST
1349 }
1350 }
1351 }
1352 if (inquote)
1353 ALLOWHIST
1354 cmdpop();
1355 if (c != '`') {
1356 unmatched = '`';
1357 /* Not an error when called from bufferwords() */
1358 if (!(lexflags & LEXFLAGS_ACTIVE))
1359 peek = LEXERR;
1360 goto brk;
1361 }
1362 c = Tick;
1363 SETPAREND
1364 break;
1365 case LX2_DASH:
1366 /*
1367 * - shouldn't be treated as a special character unless
1368 * we're in a pattern. Unfortunately, working out for
1369 * sure in complicated expressions whether we're in a
1370 * pattern is tricky. So we'll make it special and
1371 * turn it back any time we don't need it special.
1372 * This is not ideal as it's a lot of work.
1373 */
1374 c = Dash;
1375 break;
1376 case LX2_BANG:
1377 /*
1378 * Same logic as Dash, for ! to perform negation in range.
1379 */
1380 if (seen_brct)
1381 c = Bang;
1382 else
1383 c = '!';
1384 }
1385 add(c);
1386 c = hgetc();
1387 if (intpos)
1388 intpos--;
1389 if (lexstop)
1390 break;
1391 }
1392 brk:
1393 if (errflag) {
1394 if (in_brace_param) {
1395 while(bct-- >= in_brace_param)
1396 cmdpop();
1397 }
1398 return LEXERR;
1399 }
1400 hungetc(c);
1401 if (unmatched && !(lexflags & LEXFLAGS_ACTIVE))
1402 zerr("unmatched %c", unmatched);
1403 if (in_brace_param) {
1404 while(bct-- >= in_brace_param)
1405 cmdpop();
1406 zerr("closing brace expected");
1407 } else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 &&
1408 peek == STRING && lexbuf.ptr[-1] == '}' &&
1409 lexbuf.ptr[-2] != Bnull) {
1410 /* hack to get {foo} command syntax work */
1411 lexbuf.ptr--;
1412 lexbuf.len--;
1413 lexstop = 0;
1414 hungetc('}');
1415 }
1416 *lexbuf.ptr = '\0';
1417 DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed.");
1418 return peek;
1419 }
1420
1421
1422 /*
1423 * Parse input as if in double quotes.
1424 * endchar is the end character to expect.
1425 * sub has got something to do with whether we are doing quoted substitution.
1426 * Return non-zero for error (character to unget), else zero
1427 */
1428
1429 /**/
1430 static int
dquote_parse(char endchar,int sub)1431 dquote_parse(char endchar, int sub)
1432 {
1433 int pct = 0, brct = 0, bct = 0, intick = 0, err = 0;
1434 int c;
1435 int math = endchar == ')' || endchar == ']' || infor;
1436 int zlemath = math && zlemetacs > zlemetall + addedx - inbufct;
1437
1438 while (((c = hgetc()) != endchar || bct ||
1439 (math && ((pct > 0) || (brct > 0))) ||
1440 intick) && !lexstop) {
1441 cont:
1442 switch (c) {
1443 case '\\':
1444 c = hgetc();
1445 if (c != '\n') {
1446 if (c == '$' || c == '\\' || (c == '}' && !intick && bct) ||
1447 c == endchar || c == '`' ||
1448 (endchar == ']' && (c == '[' || c == ']' ||
1449 c == '(' || c == ')' ||
1450 c == '{' || c == '}' ||
1451 (c == '"' && sub))))
1452 add(Bnull);
1453 else {
1454 /* lexstop is implicitly handled here */
1455 add('\\');
1456 goto cont;
1457 }
1458 } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"')
1459 continue;
1460 break;
1461 case '\n':
1462 err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"';
1463 break;
1464 case '$':
1465 if (intick)
1466 break;
1467 c = hgetc();
1468 if (c == '(') {
1469 add(Qstring);
1470 switch (cmd_or_math_sub()) {
1471 case CMD_OR_MATH_CMD:
1472 c = Outpar;
1473 break;
1474
1475 case CMD_OR_MATH_MATH:
1476 c = Outparmath;
1477 break;
1478
1479 default:
1480 err = 1;
1481 break;
1482 }
1483 } else if (c == '[') {
1484 add(String);
1485 add(Inbrack);
1486 cmdpush(CS_MATHSUBST);
1487 err = dquote_parse(']', sub);
1488 cmdpop();
1489 c = Outbrack;
1490 } else if (c == '{') {
1491 add(Qstring);
1492 c = Inbrace;
1493 cmdpush(CS_BRACEPAR);
1494 bct++;
1495 } else if (c == '$')
1496 add(Qstring);
1497 else {
1498 hungetc(c);
1499 lexstop = 0;
1500 c = Qstring;
1501 }
1502 break;
1503 case '}':
1504 if (intick || !bct)
1505 break;
1506 c = Outbrace;
1507 bct--;
1508 cmdpop();
1509 break;
1510 case '`':
1511 c = Qtick;
1512 if (intick == 2)
1513 ALLOWHIST
1514 if ((intick = !intick)) {
1515 SETPARBEGIN
1516 cmdpush(CS_BQUOTE);
1517 } else {
1518 SETPAREND
1519 cmdpop();
1520 }
1521 break;
1522 case '\'':
1523 if (!intick)
1524 break;
1525 if (intick == 1)
1526 intick = 2, STOPHIST
1527 else
1528 intick = 1, ALLOWHIST
1529 break;
1530 case '(':
1531 if (!math || !bct)
1532 pct++;
1533 break;
1534 case ')':
1535 if (!math || !bct)
1536 err = (!pct-- && math);
1537 break;
1538 case '[':
1539 if (!math || !bct)
1540 brct++;
1541 break;
1542 case ']':
1543 if (!math || !bct)
1544 err = (!brct-- && math);
1545 break;
1546 case '"':
1547 if (intick || (endchar != '"' && !bct))
1548 break;
1549 if (bct) {
1550 add(Dnull);
1551 cmdpush(CS_DQUOTE);
1552 err = dquote_parse('"', sub);
1553 cmdpop();
1554 c = Dnull;
1555 } else
1556 err = 1;
1557 break;
1558 }
1559 if (err || lexstop)
1560 break;
1561 add(c);
1562 }
1563 if (intick == 2)
1564 ALLOWHIST
1565 if (intick) {
1566 cmdpop();
1567 }
1568 while (bct--)
1569 cmdpop();
1570 if (lexstop)
1571 err = intick || endchar || err;
1572 else if (err == 1) {
1573 /*
1574 * TODO: as far as I can see, this hack is used in gettokstr()
1575 * to hungetc() a character on an error. However, I don't
1576 * understand what that actually gets us, and we can't guarantee
1577 * it's a character anyway, because of the previous test.
1578 *
1579 * We use the same feature in cmd_or_math where we actually do
1580 * need to unget if we decide it's really a command substitution.
1581 * We try to handle the other case by testing for lexstop.
1582 */
1583 err = c;
1584 }
1585 if (zlemath && zlemetacs <= zlemetall + 1 - inbufct)
1586 inwhat = IN_MATH;
1587 return err;
1588 }
1589
1590 /*
1591 * Tokenize a string given in s. Parsing is done as in double
1592 * quotes. This is usually called before singsub().
1593 *
1594 * parsestr() is noisier, reporting an error if the parse failed.
1595 *
1596 * On entry, *s must point to a string allocated from the stack of
1597 * exactly the right length, i.e. strlen(*s) + 1, as the string
1598 * is used as the lexical token string whose memory management
1599 * demands this. Usually the input string will therefore be
1600 * the result of an immediately preceding dupstring().
1601 */
1602
1603 /**/
1604 mod_export int
parsestr(char ** s)1605 parsestr(char **s)
1606 {
1607 int err;
1608
1609 if ((err = parsestrnoerr(s))) {
1610 untokenize(*s);
1611 if (!(errflag & ERRFLAG_INT)) {
1612 if (err > 32 && err < 127)
1613 zerr("parse error near `%c'", err);
1614 else
1615 zerr("parse error");
1616 tok = LEXERR;
1617 }
1618 }
1619 return err;
1620 }
1621
1622 /**/
1623 mod_export int
parsestrnoerr(char ** s)1624 parsestrnoerr(char **s)
1625 {
1626 int l = strlen(*s), err;
1627
1628 zcontext_save();
1629 untokenize(*s);
1630 inpush(dupstring_wlen(*s, l), 0, NULL);
1631 strinbeg(0);
1632 lexbuf.len = 0;
1633 lexbuf.ptr = tokstr = *s;
1634 lexbuf.siz = l + 1;
1635 err = dquote_parse('\0', 1);
1636 if (tokstr)
1637 *s = tokstr;
1638 *lexbuf.ptr = '\0';
1639 strinend();
1640 inpop();
1641 DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty.");
1642 zcontext_restore();
1643 return err;
1644 }
1645
1646 /*
1647 * Parse a subscript in string s.
1648 * sub is passed down to dquote_parse().
1649 * endchar is the final character.
1650 * Return the next character, or NULL.
1651 */
1652 /**/
1653 mod_export char *
parse_subscript(char * s,int sub,int endchar)1654 parse_subscript(char *s, int sub, int endchar)
1655 {
1656 int l = strlen(s), err, toklen;
1657 char *t;
1658
1659 if (!*s || *s == endchar)
1660 return 0;
1661 zcontext_save();
1662 untokenize(t = dupstring_wlen(s, l));
1663 inpush(t, 0, NULL);
1664 strinbeg(0);
1665 /*
1666 * Warning to Future Generations:
1667 *
1668 * This way of passing the subscript through the lexer is brittle.
1669 * Code above this for several layers assumes that when we tokenise
1670 * the input it goes into the same place as the original string.
1671 * However, the lexer may overwrite later bits of the string or
1672 * reallocate it, in particular when expanding aliaes. To get
1673 * around this, we copy the string and then copy it back. This is a
1674 * bit more robust but still relies on the underlying assumption of
1675 * length preservation.
1676 */
1677 lexbuf.len = 0;
1678 lexbuf.ptr = tokstr = dupstring_wlen(s, l);
1679 lexbuf.siz = l + 1;
1680 err = dquote_parse(endchar, sub);
1681 toklen = (int)(lexbuf.ptr - tokstr);
1682 DPUTS(toklen > l, "Bad length for parsed subscript");
1683 memcpy(s, tokstr, toklen);
1684 if (err) {
1685 char *strend = s + toklen;
1686 err = *strend;
1687 *strend = '\0';
1688 untokenize(s);
1689 *strend = err;
1690 s = NULL;
1691 } else {
1692 s += toklen;
1693 }
1694 strinend();
1695 inpop();
1696 DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty.");
1697 zcontext_restore();
1698 return s;
1699 }
1700
1701 /* Tokenize a string given in s. Parsing is done as if s were a normal *
1702 * command-line argument but it may contain separators. This is used *
1703 * to parse the right-hand side of ${...%...} substitutions. */
1704
1705 /**/
1706 mod_export int
parse_subst_string(char * s)1707 parse_subst_string(char *s)
1708 {
1709 int c, l = strlen(s), err;
1710 char *ptr;
1711 enum lextok ctok;
1712
1713 if (!*s || !strcmp(s, nulstring))
1714 return 0;
1715 zcontext_save();
1716 untokenize(s);
1717 inpush(dupstring_wlen(s, l), 0, NULL);
1718 strinbeg(0);
1719 lexbuf.len = 0;
1720 lexbuf.ptr = tokstr = s;
1721 lexbuf.siz = l + 1;
1722 c = hgetc();
1723 ctok = gettokstr(c, 1);
1724 err = errflag;
1725 strinend();
1726 inpop();
1727 DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty.");
1728 zcontext_restore();
1729 /* Keep any interrupt error status */
1730 errflag = err | (errflag & ERRFLAG_INT);
1731 if (ctok == LEXERR) {
1732 untokenize(s);
1733 return 1;
1734 }
1735 #ifdef DEBUG
1736 /*
1737 * Historical note: we used to check here for olen (the value of lexbuf.len
1738 * before zcontext_restore()) == l, but that's not necessarily the case if
1739 * we stripped an RCQUOTE.
1740 */
1741 if (ctok != STRING || (errflag && !noerrs)) {
1742 fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n",
1743 errflag ? "errflag" : "ctok != STRING");
1744 fflush(stderr);
1745 untokenize(s);
1746 return 1;
1747 }
1748 #endif
1749 /* Check for $'...' quoting. This needs special handling. */
1750 for (ptr = s; *ptr; )
1751 {
1752 if (*ptr == String && ptr[1] == Snull)
1753 {
1754 char *t;
1755 int len, tlen, diff;
1756 t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL);
1757 len += 2;
1758 tlen = strlen(t);
1759 diff = len - tlen;
1760 /*
1761 * Yuk.
1762 * parse_subst_string() currently handles strings in-place.
1763 * That's not so easy to fix without knowing whether
1764 * additional memory should come off the heap or
1765 * otherwise. So we cheat by copying the unquoted string
1766 * into place, unless it's too long. That's not the
1767 * normal case, but I'm worried there are pathological
1768 * cases with converting metafied multibyte strings.
1769 * If someone can prove there aren't I will be very happy.
1770 */
1771 if (diff < 0) {
1772 DPUTS(1, "$'...' subst too long: fix get_parse_string()");
1773 return 1;
1774 }
1775 memcpy(ptr, t, tlen);
1776 ptr += tlen;
1777 if (diff > 0) {
1778 char *dptr = ptr;
1779 char *sptr = ptr + diff;
1780 while ((*dptr++ = *sptr++))
1781 ;
1782 }
1783 } else
1784 ptr++;
1785 }
1786 return 0;
1787 }
1788
1789 /* Called below to report word positions. */
1790
1791 /**/
1792 static void
gotword(void)1793 gotword(void)
1794 {
1795 int nwe = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0);
1796 if (zlemetacs <= nwe) {
1797 int nwb = zlemetall - wordbeg + addedx;
1798 if (zlemetacs >= nwb) {
1799 wb = nwb;
1800 we = nwe;
1801 } else {
1802 wb = zlemetacs + addedx;
1803 if (we < wb)
1804 we = wb;
1805 }
1806 lexflags = 0;
1807 }
1808 }
1809
1810 /* Check if current lex text matches an alias: 1 if so, else 0 */
1811
1812 static int
checkalias(void)1813 checkalias(void)
1814 {
1815 Alias an;
1816
1817 if (!zshlextext)
1818 return 0;
1819
1820 if (!noaliases && isset(ALIASESOPT) &&
1821 (!isset(POSIXALIASES) ||
1822 (tok == STRING && !reswdtab->getnode(reswdtab, zshlextext)))) {
1823 char *suf;
1824
1825 an = (Alias) aliastab->getnode(aliastab, zshlextext);
1826 if (an && !an->inuse &&
1827 ((an->node.flags & ALIAS_GLOBAL) ||
1828 (incmdpos && tok == STRING) || inalmore)) {
1829 if (!lexstop) {
1830 /*
1831 * Tokens that don't require a space after, get one,
1832 * because they are treated as if preceded by one.
1833 */
1834 int c = hgetc();
1835 hungetc(c);
1836 if (!iblank(c))
1837 inpush(" ", INP_ALIAS, 0);
1838 }
1839 inpush(an->text, INP_ALIAS, an);
1840 if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL))
1841 aliasspaceflag = 1;
1842 lexstop = 0;
1843 return 1;
1844 }
1845 if ((suf = strrchr(zshlextext, '.')) && suf[1] &&
1846 suf > zshlextext && suf[-1] != Meta &&
1847 (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) &&
1848 !an->inuse && incmdpos) {
1849 inpush(dupstring(zshlextext), INP_ALIAS, an);
1850 inpush(" ", INP_ALIAS, NULL);
1851 inpush(an->text, INP_ALIAS, NULL);
1852 lexstop = 0;
1853 return 1;
1854 }
1855 }
1856
1857 return 0;
1858 }
1859
1860 /* expand aliases and reserved words */
1861
1862 /**/
1863 int
exalias(void)1864 exalias(void)
1865 {
1866 Reswd rw;
1867
1868 hwend();
1869 if (interact && isset(SHINSTDIN) && !strin && incasepat <= 0 &&
1870 tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) &&
1871 (isset(CORRECTALL) || (isset(CORRECT) && incmdpos)))
1872 spckword(&tokstr, 1, incmdpos, 1);
1873
1874 if (!tokstr) {
1875 zshlextext = tokstrings[tok];
1876
1877 if (tok == NEWLIN)
1878 return 0;
1879 return checkalias();
1880 } else {
1881 VARARR(char, copy, (strlen(tokstr) + 1));
1882
1883 if (has_token(tokstr)) {
1884 char *p, *t;
1885
1886 zshlextext = p = copy;
1887 for (t = tokstr;
1888 (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++););
1889 } else
1890 zshlextext = tokstr;
1891
1892 if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) {
1893 int zp = lexflags;
1894
1895 gotword();
1896 if ((zp & LEXFLAGS_ZLE) && !lexflags) {
1897 if (zshlextext == copy)
1898 zshlextext = tokstr;
1899 return 0;
1900 }
1901 }
1902
1903 if (tok == STRING) {
1904 /* Check for an alias */
1905 if ((zshlextext != copy || !isset(POSIXALIASES)) && checkalias()) {
1906 if (zshlextext == copy)
1907 zshlextext = tokstr;
1908 return 1;
1909 }
1910
1911 /* Then check for a reserved word */
1912 if ((incmdpos ||
1913 (unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) &&
1914 zshlextext[0] == '}' && !zshlextext[1])) &&
1915 (rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) {
1916 tok = rw->token;
1917 inrepeat_ = (tok == REPEAT);
1918 if (tok == DINBRACK)
1919 incond = 1;
1920 } else if (incond && !strcmp(zshlextext, "]]")) {
1921 tok = DOUTBRACK;
1922 incond = 0;
1923 } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1])
1924 tok = BANG;
1925 }
1926 inalmore = 0;
1927 if (zshlextext == copy)
1928 zshlextext = tokstr;
1929 }
1930 return 0;
1931 }
1932
1933 /**/
1934 void
zshlex_raw_add(int c)1935 zshlex_raw_add(int c)
1936 {
1937 if (!lex_add_raw)
1938 return;
1939
1940 *lexbuf_raw.ptr++ = c;
1941 if (lexbuf_raw.siz == ++lexbuf_raw.len) {
1942 int newbsiz = lexbuf_raw.siz * 2;
1943
1944 tokstr_raw = (char *)hrealloc(tokstr_raw, lexbuf_raw.siz, newbsiz);
1945 lexbuf_raw.ptr = tokstr_raw + lexbuf_raw.len;
1946 memset(lexbuf_raw.ptr, 0, newbsiz - lexbuf_raw.siz);
1947 lexbuf_raw.siz = newbsiz;
1948 }
1949 }
1950
1951 /**/
1952 void
zshlex_raw_back(void)1953 zshlex_raw_back(void)
1954 {
1955 if (!lex_add_raw)
1956 return;
1957 lexbuf_raw.ptr--;
1958 lexbuf_raw.len--;
1959 }
1960
1961 /**/
1962 int
zshlex_raw_mark(int offset)1963 zshlex_raw_mark(int offset)
1964 {
1965 if (!lex_add_raw)
1966 return 0;
1967 return lexbuf_raw.len + offset;
1968 }
1969
1970 /**/
1971 void
zshlex_raw_back_to_mark(int mark)1972 zshlex_raw_back_to_mark(int mark)
1973 {
1974 if (!lex_add_raw)
1975 return;
1976 lexbuf_raw.ptr = tokstr_raw + mark;
1977 lexbuf_raw.len = mark;
1978 }
1979
1980 /*
1981 * Skip (...) for command-style substitutions: $(...), <(...), >(...)
1982 *
1983 * In order to ensure we don't stop at closing parentheses with
1984 * some other syntactic significance, we'll parse the input until
1985 * we find an unmatched closing parenthesis. However, we'll throw
1986 * away the result of the parsing and just keep the string we've built
1987 * up on the way.
1988 */
1989
1990 /**/
1991 static int
skipcomm(void)1992 skipcomm(void)
1993 {
1994 #ifdef ZSH_OLD_SKIPCOMM
1995 int pct = 1, c, start = 1;
1996
1997 cmdpush(CS_CMDSUBST);
1998 SETPARBEGIN
1999 c = Inpar;
2000 do {
2001 int iswhite;
2002 add(c);
2003 c = hgetc();
2004 if (itok(c) || lexstop)
2005 break;
2006 iswhite = inblank(c);
2007 switch (c) {
2008 case '(':
2009 pct++;
2010 break;
2011 case ')':
2012 pct--;
2013 break;
2014 case '\\':
2015 add(c);
2016 c = hgetc();
2017 break;
2018 case '\'': {
2019 int strquote = lexbuf.ptr[-1] == '$';
2020 add(c);
2021 STOPHIST
2022 while ((c = hgetc()) != '\'' && !lexstop) {
2023 if (c == '\\' && strquote) {
2024 add(c);
2025 c = hgetc();
2026 }
2027 add(c);
2028 }
2029 ALLOWHIST
2030 break;
2031 }
2032 case '\"':
2033 add(c);
2034 while ((c = hgetc()) != '\"' && !lexstop)
2035 if (c == '\\') {
2036 add(c);
2037 add(hgetc());
2038 } else
2039 add(c);
2040 break;
2041 case '`':
2042 add(c);
2043 while ((c = hgetc()) != '`' && !lexstop)
2044 if (c == '\\')
2045 add(c), add(hgetc());
2046 else
2047 add(c);
2048 break;
2049 case '#':
2050 if (start) {
2051 add(c);
2052 while ((c = hgetc()) != '\n' && !lexstop)
2053 add(c);
2054 iswhite = 1;
2055 }
2056 break;
2057 }
2058 start = iswhite;
2059 }
2060 while (pct);
2061 if (!lexstop)
2062 SETPAREND
2063 cmdpop();
2064 return lexstop;
2065 #else
2066 char *new_tokstr;
2067 int new_lexstop, new_lex_add_raw;
2068 int save_infor = infor;
2069 struct lexbufstate new_lexbuf;
2070
2071 infor = 0;
2072 cmdpush(CS_CMDSUBST);
2073 SETPARBEGIN
2074 add(Inpar);
2075
2076 new_lex_add_raw = lex_add_raw + 1;
2077 if (!lex_add_raw) {
2078 /*
2079 * We'll combine the string so far with the input
2080 * read in for the command substitution. To do this
2081 * we'll just propagate the current tokstr etc. as the
2082 * variables used for adding raw input, and
2083 * ensure we swap those for the real tokstr etc. at the end.
2084 *
2085 * However, we need to save and restore the rest of the
2086 * lexical and parse state as we're effectively parsing
2087 * an internal string. Because we're still parsing it from
2088 * the original input source (we have to --- we don't know
2089 * when to stop inputting it otherwise and can't rely on
2090 * the input being recoverable until we've read it) we need
2091 * to keep the same history context.
2092 */
2093 new_tokstr = tokstr;
2094 new_lexbuf = lexbuf;
2095
2096 /*
2097 * If we're expanding an alias at this point, we need the whole
2098 * remaining text as part of the string for the command in
2099 * parentheses, so don't backtrack. This is different from the
2100 * usual case where the alias is fully within the command, where
2101 * we want the unexpanded text so that it will be expanded
2102 * again when the command in the parentheses is executed.
2103 *
2104 * I never wanted to be a software engineer, you know.
2105 */
2106 if (inbufflags & INP_ALIAS)
2107 inbufflags |= INP_RAW_KEEP;
2108 zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
2109 hist_in_word(1);
2110 } else {
2111 /*
2112 * Set up for nested command substitution, however
2113 * we don't actually need the string until we get
2114 * back to the top level and recover the lot.
2115 * The $() body just appears empty.
2116 *
2117 * We do need to propagate the raw variables which would
2118 * otherwise by cleared, though.
2119 */
2120 new_tokstr = tokstr_raw;
2121 new_lexbuf = lexbuf_raw;
2122
2123 zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
2124 }
2125 tokstr_raw = new_tokstr;
2126 lexbuf_raw = new_lexbuf;
2127 lex_add_raw = new_lex_add_raw;
2128 /*
2129 * Don't do any ZLE specials down here: they're only needed
2130 * when we return the string from the recursive parse.
2131 * (TBD: this probably means we should be initialising lexflags
2132 * more consistently.)
2133 *
2134 * Note that in that case we're still using the ZLE line reading
2135 * function at the history layer --- this is consistent with the
2136 * intention of maintaining the history and input layers across
2137 * the recursive parsing.
2138 *
2139 * Also turn off LEXFLAGS_NEWLINE because this is already skipping
2140 * across the entire construct, and parse_event() needs embedded
2141 * newlines to be "real" when looking for the OUTPAR token.
2142 */
2143 lexflags &= ~(LEXFLAGS_ZLE|LEXFLAGS_NEWLINE);
2144 dbparens = 0; /* restored by zcontext_restore_partial() */
2145
2146 if (!parse_event(OUTPAR) || tok != OUTPAR) {
2147 if (strin) {
2148 /*
2149 * Get the rest of the string raw since we don't
2150 * know where this token ends.
2151 */
2152 while (!lexstop)
2153 (void)ingetc();
2154 } else
2155 lexstop = 1;
2156 }
2157 /* Outpar lexical token gets added in caller if present */
2158
2159 /*
2160 * We're going to keep the full raw input string
2161 * as the current token string after popping the stack.
2162 */
2163 new_tokstr = tokstr_raw;
2164 new_lexbuf = lexbuf_raw;
2165 /*
2166 * We're also going to propagate the lexical state:
2167 * if we couldn't parse the command substitution we
2168 * can't continue.
2169 */
2170 new_lexstop = lexstop;
2171
2172 zcontext_restore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE);
2173
2174 if (lex_add_raw) {
2175 /*
2176 * Keep going, so retain the raw variables.
2177 */
2178 tokstr_raw = new_tokstr;
2179 lexbuf_raw = new_lexbuf;
2180 } else {
2181 if (!new_lexstop) {
2182 /* Ignore the ')' added on input */
2183 new_lexbuf.len--;
2184 *--new_lexbuf.ptr = '\0';
2185 }
2186
2187 /*
2188 * Convince the rest of lex.c we were examining a string
2189 * all along.
2190 */
2191 tokstr = new_tokstr;
2192 lexbuf = new_lexbuf;
2193 lexstop = new_lexstop;
2194 hist_in_word(0);
2195 }
2196
2197 if (!lexstop)
2198 SETPAREND
2199 cmdpop();
2200 infor = save_infor;
2201
2202 return lexstop;
2203 #endif
2204 }
2205