xref: /reactos/base/shell/cmd/parser.c (revision 8a92b556)
1 /*
2  *  PARSER.C - Command-line Lexical Analyzer/Tokenizer and Parser.
3  */
4 
5 #include "precomp.h"
6 
7 /*
8  * Defines for enabling different Windows' CMD compatibility behaviours.
9  */
10 
11 /* Enable this define for command echoer compatibility */
12 #define MSCMD_ECHO_COMMAND_COMPAT
13 
14 /* Enable this define for parser quirks (see UnParseToken() for more details) */
15 #define MSCMD_PARSER_BUGS
16 
17 /* Enable this define for parenthesized blocks parsing quirks */
18 // #define MSCMD_PARENS_PARSE_BUGS
19 
20 /* Enable this define for redirection parsing quirks */
21 #define MSCMD_REDIR_PARSE_BUGS
22 
23 /* Enable this define for allowing '&' commands with an empty RHS.
24  * The default behaviour is to just return the LHS instead.
25  * See ParseCommandBinaryOp() for details. */
26 // #define MSCMD_MULTI_EMPTY_RHS
27 
28 
29 /*
30  * Parser debugging support. These flags are global so that their values can be
31  * modified at runtime from a debugger. They correspond to the public Windows'
32  * cmd!fDumpTokens and cmd!fDumpParse booleans.
33  * (Same names are used for compatibility as they are documented online.)
34  */
35 BOOLEAN fDumpTokens = FALSE;
36 BOOLEAN fDumpParse  = FALSE;
37 
38 #define C_OP_LOWEST C_MULTI
39 #define C_OP_HIGHEST C_PIPE
40 static const TCHAR OpString[][3] = { _T("&"), _T("||"), _T("&&"), _T("|") };
41 
42 static const TCHAR RedirString[][3] = { _T("<"), _T(">"), _T(">>") };
43 
44 static const TCHAR* const IfOperatorString[] =
45 {
46     /** Unary operators **/
47 
48     /* Standard */
49     _T("errorlevel"),
50     _T("exist"),
51 
52     /* Extended */
53     _T("cmdextversion"),
54     _T("defined"),
55 #define IF_MAX_UNARY IF_DEFINED
56 
57     /** Binary operators **/
58 
59     /* Standard */
60     _T("=="),
61 
62     /* Extended */
63     _T("equ"),
64     _T("neq"),
65     _T("lss"),
66     _T("leq"),
67     _T("gtr"),
68     _T("geq"),
69 #define IF_MAX_COMPARISON IF_GEQ
70 };
71 
72 static __inline BOOL IsSeparator(TCHAR Char)
73 {
74     return _istspace(Char) || (Char && !!_tcschr(STANDARD_SEPS, Char));
75 }
76 
77 typedef enum _TOK_TYPE
78 {
79     TOK_END,
80     TOK_NORMAL,
81     TOK_OPERATOR,
82     TOK_REDIRECTION,
83     TOK_BEGIN_BLOCK,
84     TOK_END_BLOCK
85 } TOK_TYPE;
86 
87 /* Scratch buffer for temporary command substitutions / expansions */
88 static TCHAR TempBuf[CMDLINE_LENGTH];
89 
90 /*static*/ BOOL bParseError;
91 static BOOL bLineContinuations;
92 /*static*/ TCHAR ParseLine[CMDLINE_LENGTH];
93 static PTCHAR ParsePos;
94 static PTCHAR OldParsePos;
95 
96 BOOL bIgnoreParserComments = TRUE;
97 BOOL bHandleContinuations  = TRUE;
98 
99 static TCHAR CurrentToken[CMDLINE_LENGTH];
100 static TOK_TYPE CurrentTokenType = TOK_END;
101 #ifndef MSCMD_PARSER_BUGS
102 static BOOL bReparseToken = FALSE;
103 static PTCHAR LastCurTokPos;
104 #endif
105 static INT InsideBlock = 0;
106 
107 static VOID ResetParser(IN PTCHAR Pos)
108 {
109     bParseError = FALSE;
110     ParsePos = Pos;
111     OldParsePos = ParsePos;
112 }
113 
114 /*
115  * This function "refetches" the last parsed token back into the stream
116  * for later reparsing -- since the way of lexing it is context-dependent.
117  * This "feature" is at the root of many obscure CMD parsing quirks,
118  * due to the fact this feature is in opposition with line-continuation.
119  * Indeed, when a stream of characters has a line-continuation, the lexer-
120  * parser will parse the stream up to the end of the line, then will
121  * reset the parser state and position back to the beginning of the line
122  * before accepting the rest of the character stream and continuing
123  * parsing them. This means that all the non-parsed characters before the
124  * line-continuation have been lost. Of course, their parsed form is now
125  * within the current parsed token. However, suppose now we need to
126  * unparse this token for reparsing it a different way later on. If we
127  * somehow pushed the already-parsed current token back into the beginning
128  * of the character stream, besides the complications of moving up the
129  * characters in the stream buffer, we would basically have "new" data
130  * that has been already parsed one way, to be now parsed another way.
131  * If instead we had saved somehow the unparsed form of the token, and
132  * we push back that form into the stream buffer for reparsing, we would
133  * encounter again the line-continuation, that, depending on which
134  * context the token is reparsed, would cause problems:
135  * e.g. in the case of REM command parsing, the parser would stop at the
136  * first line-continuation.
137  *
138  * When MSCMD_PARSER_BUGS is undefined, the UnParseToken() / ParseToken()
139  * cycle keeps the current token in its buffer, but also saves the start
140  * position corresponding to the batch of characters that have been parsed
141  * during the last line-continuation. The next ParseToken() would then
142  * reparse these latest charcters and the result replaces the last part
143  * in the current token.
144  *
145  * For example, a first parsing of
146  *    foo^\n
147  *    bar^\n
148  *    baz
149  * would result in the current token "foobarbaz", where the start position
150  * corresponding to the batch of characters parsed during the last line-continuation
151  * being pointing at "baz". The stream buffer only contains "baz" (and following data).
152  * Then UnParseToken() saves this info so that at the next ParseToken(), the "baz"
153  * part of the stream buffer gets reparsed (possibly differently) and the result
154  * would replace the "baz" part in the current token.
155  *
156  * If MSCMD_PARSER_BUGS is defined however, then the behaviour of the Windows' CMD
157  * parser applies: in the example above, the last ParseToken() call would completely
158  * replace the current token "foobarbaz" with the new result of the parsing of "baz".
159  */
160 static VOID UnParseToken(VOID)
161 {
162     ParsePos = OldParsePos;
163 
164     /* Debugging support */
165     if (fDumpTokens)
166         ConOutPrintf(_T("Ungetting: '%s'\n"), ParsePos);
167 
168 #ifndef MSCMD_PARSER_BUGS
169     bReparseToken = TRUE;
170 #endif
171 }
172 
173 static VOID InitParser(VOID)
174 {
175     *CurrentToken = 0;
176     CurrentTokenType = TOK_END;
177     InsideBlock = 0;
178 
179 #ifndef MSCMD_PARSER_BUGS
180     bReparseToken = FALSE;
181     LastCurTokPos = NULL;
182 #endif
183 
184     ResetParser(ParseLine);
185 }
186 
187 static TCHAR ParseChar(VOID)
188 {
189     TCHAR Char;
190 
191     if (bParseError)
192         return 0;
193 
194 restart:
195     /*
196      * Although CRs can be injected into a line via an environment
197      * variable substitution, the parser ignores them - they won't
198      * even separate tokens.
199      */
200     do
201     {
202         Char = *ParsePos++;
203     }
204     while (Char == _T('\r'));
205 
206     if (!Char) --ParsePos;
207     if (!Char && bLineContinuations)
208     {
209         if (!ReadLine(ParseLine, TRUE))
210         {
211             /* ^C pressed, or line was too long */
212             //
213             // FIXME: Distinguish with respect to BATCH end of file !!
214             //
215             bParseError = TRUE;
216         }
217         else
218         {
219             ResetParser(ParseLine);
220             if (*ParsePos)
221                 goto restart;
222         }
223     }
224     return Char;
225 }
226 
227 VOID ParseErrorEx(IN PCTSTR s)
228 {
229     /* Only display the first error we encounter */
230     if (!bParseError)
231         error_syntax(s);
232     bParseError = TRUE;
233 }
234 
235 static __inline VOID ParseError(VOID)
236 {
237     ParseErrorEx(CurrentTokenType != TOK_END ? CurrentToken : NULL);
238 }
239 
240 static TOK_TYPE
241 ParseTokenEx(
242     IN TCHAR PrefixOperator OPTIONAL,
243     IN TCHAR ExtraEnd OPTIONAL,
244     IN PCTSTR Separators OPTIONAL,
245     IN BOOL bHandleContinuations)
246 {
247     TOK_TYPE Type;
248     PTCHAR CurrentTokStart = CurrentToken;
249     PTCHAR Out = CurrentTokStart;
250     TCHAR Char;
251     BOOL bInQuote = FALSE;
252 
253 #ifndef MSCMD_PARSER_BUGS
254     if (bReparseToken)
255     {
256         bReparseToken = FALSE;
257 
258         /*
259          * We will append the part to be reparsed to the old one
260          * (still present in CurrentToken).
261          */
262         CurrentTokStart = LastCurTokPos;
263         Out = CurrentTokStart;
264     }
265     else
266     {
267         LastCurTokPos = CurrentToken;
268     }
269 #endif
270 
271     /* Start with what we have at current ParsePos */
272     OldParsePos = ParsePos;
273 
274     for (Char = ParseChar(); Char && Char != _T('\n'); Char = ParseChar())
275     {
276         bInQuote ^= (Char == _T('"'));
277         if (!bInQuote)
278         {
279             if (Separators != NULL)
280             {
281                 if (_istspace(Char) || !!_tcschr(Separators, Char))
282                 {
283                     /* Skip leading separators */
284                     if (Out == CurrentTokStart)
285                         continue;
286                     break;
287                 }
288             }
289 
290             /* Check for prefix operator */
291             if ((Out == CurrentTokStart) && (Char == PrefixOperator))
292                 break;
293 
294             /*
295              * Check for numbered redirection.
296              *
297              * For this purpose, we check whether this is a number, that is
298              * in first position in the current parsing buffer (remember that
299              * ParsePos points to the next character) or is preceded by a
300              * whitespace-like separator, including standard command operators
301              * (excepting '@' !) and double-quotes.
302              */
303             if ( _istdigit(Char) &&
304                  (ParsePos == &OldParsePos[1]  ||
305                      IsSeparator(ParsePos[-2]) ||
306                      !!_tcschr(_T("()&|\""), ParsePos[-2])) &&
307                  (*ParsePos == _T('<') || *ParsePos == _T('>')) )
308             {
309                 break;
310             }
311 
312             /* Check for other delimiters / operators */
313             if (Char == ExtraEnd)
314                 break;
315             if (InsideBlock && Char == _T(')'))
316                 break;
317             if (_tcschr(_T("&|<>"), Char))
318                 break;
319 
320             if (bHandleContinuations && (Char == _T('^')))
321             {
322                 Char = ParseChar();
323                 /* Eat up a \n, allowing line continuation */
324                 if (Char == _T('\n'))
325                 {
326 #ifndef MSCMD_PARSER_BUGS
327                     LastCurTokPos = Out;
328 #endif
329                     Char = ParseChar();
330                 }
331                 /* Next character is a forced literal */
332 
333                 if (Out == CurrentTokStart)
334                 {
335                     /* Ignore any prefix operator if we don't start a new command block */
336                     if (CurrentTokenType != TOK_BEGIN_BLOCK)
337                         PrefixOperator = 0;
338                 }
339             }
340         }
341         if (Out == &CurrentToken[CMDLINE_LENGTH - 1])
342             break;
343         *Out++ = Char;
344 
345         // PrefixOperator = 0;
346     }
347 
348     /*
349      * We exited the parsing loop. If the current character is the first one
350      * (Out == CurrentTokStart), interpret it as an operator. Otherwise,
351      * terminate the current token (type TOK_NORMAL) and keep the current
352      * character so that it can be refetched as an operator at the next call.
353      */
354 
355     if (Out != CurrentTokStart)
356     {
357         Type = TOK_NORMAL;
358     }
359     /*
360      * Else we have an operator.
361      */
362     else if (Char == _T('@'))
363     {
364         Type = TOK_OPERATOR; // TOK_QUIET / TOK_PREFIX_OPERATOR
365         *Out++ = Char;
366         Char = ParseChar();
367     }
368     else if (Char == _T('('))
369     {
370         Type = TOK_BEGIN_BLOCK;
371         *Out++ = Char;
372         Char = ParseChar();
373     }
374     else if (Char == _T(')'))
375     {
376         Type = TOK_END_BLOCK;
377         *Out++ = Char;
378         Char = ParseChar();
379     }
380     else if (Char == _T('&') || Char == _T('|'))
381     {
382         Type = TOK_OPERATOR;
383         *Out++ = Char;
384         Char = ParseChar();
385         /* Check for '&&' or '||' */
386         if (Char == Out[-1])
387         {
388             *Out++ = Char;
389             Char = ParseChar();
390         }
391     }
392     else if ( _istdigit(Char)  ||
393               (Char == _T('<') || Char == _T('>')) )
394     {
395         Type = TOK_REDIRECTION;
396         if (_istdigit(Char))
397         {
398             *Out++ = Char;
399             Char = ParseChar();
400         }
401         /* By construction (see the while-loop above),
402          * the next character must be a redirection. */
403         ASSERT(Char == _T('<') || Char == _T('>'));
404         *Out++ = Char;
405         Char = ParseChar();
406         if (Char == Out[-1])
407         {
408             /* Strangely, the tokenizer allows << as well as >>... (it
409              * will cause an error when trying to parse it though) */
410             *Out++ = Char;
411             Char = ParseChar();
412         }
413         if (Char == _T('&'))
414         {
415             *Out++ = Char;
416             while (IsSeparator(Char = ParseChar()))
417                 ;
418             if (_istdigit(Char))
419             {
420                 *Out++ = Char;
421                 Char = ParseChar();
422             }
423         }
424     }
425     else
426     {
427         Type = TOK_END;
428         *Out++ = Char;
429     }
430     *Out = _T('\0');
431 
432     /*
433      * Rewind the parsing position, so that the current character can be
434      * refetched later on. However do this only if it is not NULL and if
435      * this is not TOK_END, since we do not want to reparse later the line
436      * termination (we could enter into infinite loops, or, in case of line
437      * continuation, get unwanted "More?" prompts).
438      */
439     if (Char != 0 && Type != TOK_END)
440         --ParsePos;
441 
442     /* Debugging support */
443     if (fDumpTokens)
444         ConOutPrintf(_T("ParseToken: (%d) '%s'\n"), Type, CurrentToken);
445 
446     return (CurrentTokenType = Type);
447 }
448 
449 static __inline INT
450 ParseToken(
451     IN TCHAR ExtraEnd OPTIONAL,
452     IN PCTSTR Separators OPTIONAL)
453 {
454     return ParseTokenEx(0, ExtraEnd, Separators, bHandleContinuations);
455 }
456 
457 
458 static PARSED_COMMAND*
459 AllocCommand(
460     IN COMMAND_TYPE Type,
461     IN PCTSTR CmdHead OPTIONAL,
462     IN PCTSTR CmdTail OPTIONAL)
463 {
464     PARSED_COMMAND* Cmd;
465 
466     switch (Type)
467     {
468     case C_COMMAND:
469     case C_REM:
470     {
471         SIZE_T CmdHeadLen = _tcslen(CmdHead) + 1;
472         SIZE_T CmdTailLen = _tcslen(CmdTail) + 1;
473 
474         Cmd = cmd_alloc(FIELD_OFFSET(PARSED_COMMAND,
475                                      Command.First[CmdHeadLen + CmdTailLen]));
476         if (!Cmd)
477             return NULL;
478 
479         Cmd->Type = Type;
480         Cmd->Next = NULL;
481         Cmd->Subcommands = NULL;
482         Cmd->Redirections = NULL; /* Is assigned by the calling function */
483         memcpy(Cmd->Command.First, CmdHead, CmdHeadLen * sizeof(TCHAR));
484         Cmd->Command.Rest = Cmd->Command.First + CmdHeadLen;
485         memcpy(Cmd->Command.Rest, CmdTail, CmdTailLen * sizeof(TCHAR));
486         return Cmd;
487     }
488 
489     case C_QUIET:
490     case C_BLOCK:
491     case C_MULTI:
492     case C_OR:
493     case C_AND:
494     case C_PIPE:
495     {
496         Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
497         if (!Cmd)
498             return NULL;
499 
500         Cmd->Type = Type;
501         Cmd->Next = NULL;
502         Cmd->Subcommands = NULL;
503         Cmd->Redirections = NULL; /* For C_BLOCK only: is assigned by the calling function */
504         return Cmd;
505     }
506 
507     case C_FOR:
508     case C_IF:
509     {
510         Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
511         if (!Cmd)
512             return NULL;
513 
514         memset(Cmd, 0, sizeof(PARSED_COMMAND));
515         Cmd->Type = Type;
516         return Cmd;
517     }
518 
519     default:
520         ERR("Unknown command type 0x%x\n", Type);
521         ASSERT(FALSE);
522         return NULL;
523     }
524 }
525 
526 VOID
527 FreeCommand(
528     IN OUT PARSED_COMMAND* Cmd)
529 {
530     if (Cmd->Subcommands)
531         FreeCommand(Cmd->Subcommands);
532     if (Cmd->Next)
533         FreeCommand(Cmd->Next);
534     FreeRedirection(Cmd->Redirections);
535     if (Cmd->Type == C_FOR)
536     {
537         cmd_free(Cmd->For.Params);
538         cmd_free(Cmd->For.List);
539     }
540     else if (Cmd->Type == C_IF)
541     {
542         cmd_free(Cmd->If.LeftArg);
543         cmd_free(Cmd->If.RightArg);
544     }
545     cmd_free(Cmd);
546 }
547 
548 
549 /* Parse redirections and append them to the list */
550 static BOOL
551 ParseRedirection(
552     IN OUT REDIRECTION** List)
553 {
554     PTSTR Tok = CurrentToken;
555     REDIRECTION* Redir;
556     REDIR_MODE RedirMode;
557     BYTE Number;
558 
559     if ( !(*Tok == _T('<') || *Tok == _T('>')) &&
560          !(_istdigit(*Tok) &&
561            (Tok[1] == _T('<') || Tok[1] == _T('>')) ) )
562     {
563         ASSERT(CurrentTokenType != TOK_REDIRECTION);
564         return FALSE;
565     }
566     ASSERT((CurrentTokenType == TOK_REDIRECTION) ||
567            (CurrentTokenType == TOK_NORMAL));
568 
569     if (_istdigit(*Tok))
570         Number = *Tok++ - _T('0');
571     else
572         Number = *Tok == _T('<') ? 0 : 1;
573 
574     if (*Tok++ == _T('<'))
575     {
576         RedirMode = REDIR_READ;
577         /* Forbid '<<' */
578         if (*Tok == _T('<'))
579             goto fail;
580     }
581     else
582     {
583         RedirMode = REDIR_WRITE;
584         if (*Tok == _T('>'))
585         {
586             RedirMode = REDIR_APPEND;
587             Tok++;
588         }
589     }
590 
591     if (*Tok == _T('&'))
592     {
593         /* This is a handle redirection: the next character must be one single digit */
594         if (!(_istdigit(Tok[1]) && !Tok[2]))
595             goto fail;
596     }
597     else
598 #ifndef MSCMD_REDIR_PARSE_BUGS
599     if (!*Tok)
600         /* The file name was not part of this token, so it will be the next one */
601 #else
602         /* Get rid of what possibly remains in the token, and retrieve the next one */
603 #endif
604     {
605         if (ParseToken(0, STANDARD_SEPS) != TOK_NORMAL)
606             goto fail;
607         Tok = CurrentToken;
608     }
609 
610     /* If a redirection for this handle number already exists, delete it */
611     while ((Redir = *List))
612     {
613         if (Redir->Number == Number)
614         {
615             *List = Redir->Next;
616             cmd_free(Redir);
617             continue;
618         }
619         List = &Redir->Next;
620     }
621 
622     Redir = cmd_alloc(FIELD_OFFSET(REDIRECTION, Filename[_tcslen(Tok) + 1]));
623     if (!Redir)
624     {
625         WARN("Cannot allocate memory for Redir!\n");
626         goto fail;
627     }
628     Redir->Next = NULL;
629     Redir->OldHandle = INVALID_HANDLE_VALUE;
630     Redir->Number = Number;
631     Redir->Mode = RedirMode;
632     _tcscpy(Redir->Filename, Tok);
633     *List = Redir;
634     return TRUE;
635 
636 fail:
637     ParseError();
638     FreeRedirection(*List);
639     *List = NULL;
640     return FALSE;
641 }
642 
643 static __inline PARSED_COMMAND*
644 ParseCommandOp(
645     IN COMMAND_TYPE OpType);
646 
647 /* Parse a parenthesized block */
648 static PARSED_COMMAND*
649 ParseBlock(
650     IN OUT REDIRECTION** RedirList)
651 {
652     PARSED_COMMAND *Cmd, *Sub, **NextPtr;
653 
654     Cmd = AllocCommand(C_BLOCK, NULL, NULL);
655     if (!Cmd)
656     {
657         WARN("Cannot allocate memory for Cmd!\n");
658         ParseError();
659         return NULL;
660     }
661 
662     /* Read the block contents */
663     NextPtr = &Cmd->Subcommands;
664     ++InsideBlock;
665     while (TRUE)
666     {
667         /*
668          * Windows' CMD compatibility: Strip leading newlines in the block.
669          *
670          * Note that this behaviour is buggy, especially when MSCMD_PARSER_BUGS is defined!
671          * For example:
672          *   (foo^\n
673          *   bar)
674          * would be parsed ultimately as: '(', 'bar', ')' because the "foo^"
675          * part would be discarded due to the UnParseToken() call, since this
676          * function doesn't work across line continuations.
677          */
678         while (ParseToken(0, STANDARD_SEPS) == TOK_END && *CurrentToken == _T('\n'))
679             ;
680         if (*CurrentToken && *CurrentToken != _T('\n'))
681             UnParseToken();
682 
683         /* Break early if we have nothing else to read. We will also fail
684          * due to the fact we haven't encountered any closing parenthesis. */
685         if (!*CurrentToken /* || *CurrentToken == _T('\n') */)
686         {
687             ASSERT(CurrentTokenType == TOK_END);
688             break;
689         }
690 
691         /*
692          * NOTE: Windows' CMD uses a "CRLF" operator when dealing with
693          * newlines in parenthesized blocks, as an alternative to the
694          * '&' command-separation operator.
695          */
696 
697         Sub = ParseCommandOp(C_OP_LOWEST);
698         if (Sub)
699         {
700             *NextPtr = Sub;
701             NextPtr = &Sub->Next;
702         }
703         else if (bParseError)
704         {
705             --InsideBlock;
706             FreeCommand(Cmd);
707             return NULL;
708         }
709 
710         if (CurrentTokenType == TOK_END_BLOCK)
711             break;
712 
713         /* Skip past the \n */
714     }
715     --InsideBlock;
716 
717     /* Fail if the block was not terminated, or if we have
718      * an empty block, i.e. "( )", considered invalid. */
719     if ((CurrentTokenType != TOK_END_BLOCK) || (Cmd->Subcommands == NULL))
720     {
721         ParseError();
722         FreeCommand(Cmd);
723         return NULL;
724     }
725 
726     /* Process any trailing redirections and append them to the list */
727 #ifndef MSCMD_REDIR_PARSE_BUGS
728     while (ParseToken(0, STANDARD_SEPS) == TOK_REDIRECTION)
729     {
730         if (!ParseRedirection(RedirList))
731         {
732             FreeCommand(Cmd);
733             return NULL;
734         }
735     }
736 #else
737     while (ParseToken(0, STANDARD_SEPS) != TOK_END)
738     {
739         if (!ParseRedirection(RedirList))
740         {
741             /* If an actual error happened in ParseRedirection(), bail out */
742             if (bParseError)
743             {
744                 FreeCommand(Cmd);
745                 return NULL;
746             }
747             /* Otherwise it just returned FALSE because the current token
748              * is not a redirection. Unparse the token and refetch it. */
749             break;
750         }
751     }
752 #endif
753     if (CurrentTokenType != TOK_END)
754     {
755         /*
756          * Windows' CMD compatibility: Unparse the current token.
757          *
758          * Note that this behaviour is buggy, especially when MSCMD_PARSER_BUGS is defined!
759          * For example:
760          *   (foo^\n
761          *   bar)
762          * would be parsed ultimately as: '(', 'bar', ')' because the "foo^"
763          * part would be discarded due to the UnParseToken() call, since this
764          * function doesn't work across line continuations.
765          */
766         UnParseToken();
767 
768         /*
769          * Since it is expected that when ParseBlock() returns, the next
770          * token is already fetched, call ParseToken() again to compensate.
771          */
772         ParseToken(0, STANDARD_SEPS);
773     }
774 
775     return Cmd;
776 }
777 
778 /* Parse an IF statement */
779 static PARSED_COMMAND*
780 ParseIf(VOID)
781 {
782     PARSED_COMMAND* Cmd;
783 
784     Cmd = AllocCommand(C_IF, NULL, NULL);
785     if (!Cmd)
786     {
787         WARN("Cannot allocate memory for Cmd!\n");
788         ParseError();
789         return NULL;
790     }
791 
792     if (bEnableExtensions && (_tcsicmp(CurrentToken, _T("/I")) == 0))
793     {
794         Cmd->If.Flags |= IFFLAG_IGNORECASE;
795         ParseToken(0, STANDARD_SEPS);
796     }
797     if (_tcsicmp(CurrentToken, _T("not")) == 0)
798     {
799         Cmd->If.Flags |= IFFLAG_NEGATE;
800         ParseToken(0, STANDARD_SEPS);
801     }
802 
803     if (CurrentTokenType != TOK_NORMAL)
804         goto error;
805 
806     /* Check for unary operators */
807     for (; Cmd->If.Operator <= IF_MAX_UNARY; Cmd->If.Operator++)
808     {
809         /* Skip the extended operators if the extensions are disabled */
810         if (!bEnableExtensions && (Cmd->If.Operator >= IF_CMDEXTVERSION))
811             continue;
812 
813         if (_tcsicmp(CurrentToken, IfOperatorString[Cmd->If.Operator]) == 0)
814         {
815             if (ParseToken(0, STANDARD_SEPS) != TOK_NORMAL)
816                 goto error;
817             Cmd->If.RightArg = cmd_dup(CurrentToken);
818             goto condition_done;
819         }
820     }
821 
822     /* It must be a two-argument (comparison) operator. It could be ==, so
823      * the equals sign can't be treated as whitespace here. */
824     Cmd->If.LeftArg = cmd_dup(CurrentToken);
825     ParseToken(0, _T(",;"));
826 
827     /* The right argument can come immediately after == */
828     if (_tcsnicmp(CurrentToken, _T("=="), 2) == 0 && CurrentToken[2])
829     {
830         Cmd->If.RightArg = cmd_dup(&CurrentToken[2]);
831         goto condition_done;
832     }
833 
834     // Cmd->If.Operator == IF_MAX_UNARY + 1;
835     for (; Cmd->If.Operator <= IF_MAX_COMPARISON; Cmd->If.Operator++)
836     {
837         /* Skip the extended operators if the extensions are disabled */
838         if (!bEnableExtensions && (Cmd->If.Operator >= IF_EQU)) // (Cmd->If.Operator > IF_STRINGEQ)
839             continue;
840 
841         if (_tcsicmp(CurrentToken, IfOperatorString[Cmd->If.Operator]) == 0)
842         {
843             if (ParseToken(0, STANDARD_SEPS) != TOK_NORMAL)
844                 goto error;
845             Cmd->If.RightArg = cmd_dup(CurrentToken);
846             goto condition_done;
847         }
848     }
849     goto error;
850 
851 condition_done:
852     Cmd->Subcommands = ParseCommandOp(C_OP_LOWEST);
853     if (Cmd->Subcommands == NULL)
854         goto error;
855     if (_tcsicmp(CurrentToken, _T("else")) == 0)
856     {
857         Cmd->Subcommands->Next = ParseCommandOp(C_OP_LOWEST);
858         if (Cmd->Subcommands->Next == NULL)
859             goto error;
860     }
861 
862     return Cmd;
863 
864 error:
865     FreeCommand(Cmd);
866     ParseError();
867     return NULL;
868 }
869 
870 /*
871  * Parse a FOR command.
872  * Syntax is: FOR [options] %var IN (list) DO command
873  */
874 static PARSED_COMMAND*
875 ParseFor(VOID)
876 {
877     PARSED_COMMAND* Cmd;
878 
879     /* Use the scratch buffer */
880     PTSTR List = TempBuf;
881     PTCHAR Pos = List;
882 
883     Cmd = AllocCommand(C_FOR, NULL, NULL);
884     if (!Cmd)
885     {
886         WARN("Cannot allocate memory for Cmd!\n");
887         ParseError();
888         return NULL;
889     }
890 
891     /* Skip the extended FOR syntax if extensions are disabled */
892     if (!bEnableExtensions)
893         goto parseForBody;
894 
895     while (TRUE)
896     {
897         if (_tcsicmp(CurrentToken, _T("/D")) == 0)
898         {
899             Cmd->For.Switches |= FOR_DIRS;
900         }
901         else if (_tcsicmp(CurrentToken, _T("/F")) == 0)
902         {
903             Cmd->For.Switches |= FOR_F;
904             if (!Cmd->For.Params)
905             {
906                 ParseToken(0, STANDARD_SEPS);
907                 if (CurrentToken[0] == _T('/') || CurrentToken[0] == _T('%'))
908                     break;
909                 Cmd->For.Params = cmd_dup(CurrentToken);
910             }
911         }
912         else if (_tcsicmp(CurrentToken, _T("/L")) == 0)
913         {
914             Cmd->For.Switches |= FOR_LOOP;
915         }
916         else if (_tcsicmp(CurrentToken, _T("/R")) == 0)
917         {
918             Cmd->For.Switches |= FOR_RECURSIVE;
919             if (!Cmd->For.Params)
920             {
921                 ParseToken(0, STANDARD_SEPS);
922                 if (CurrentToken[0] == _T('/') || CurrentToken[0] == _T('%'))
923                     break;
924                 StripQuotes(CurrentToken);
925                 Cmd->For.Params = cmd_dup(CurrentToken);
926             }
927         }
928         else
929         {
930             break;
931         }
932 
933         ParseToken(0, STANDARD_SEPS);
934     }
935 
936     /* Make sure there aren't two different switches specified
937      * at the same time, unless they're /D and /R */
938     if ((Cmd->For.Switches & (Cmd->For.Switches - 1)) != 0
939         && Cmd->For.Switches != (FOR_DIRS | FOR_RECURSIVE))
940     {
941         goto error;
942     }
943 
944 parseForBody:
945 
946     /* Variable name should be % and just one other character */
947     if (CurrentToken[0] != _T('%') || _tcslen(CurrentToken) != 2)
948         goto error;
949     Cmd->For.Variable = CurrentToken[1];
950 
951     ParseToken(0, STANDARD_SEPS);
952     if (_tcsicmp(CurrentToken, _T("in")) != 0)
953         goto error;
954 
955     if (ParseToken(_T('('), STANDARD_SEPS) != TOK_BEGIN_BLOCK)
956         goto error;
957 
958     while (TRUE)
959     {
960         /* Pretend we're inside a block so the tokenizer will stop on ')' */
961         ++InsideBlock;
962         ParseToken(0, STANDARD_SEPS);
963         --InsideBlock;
964 
965         if (CurrentTokenType == TOK_END_BLOCK)
966             break;
967 
968         /* Skip past the \n */
969         if ((CurrentTokenType == TOK_END) && *CurrentToken == _T('\n'))
970             continue;
971 
972         if (CurrentTokenType != TOK_NORMAL)
973             goto error;
974 
975         if (Pos != List)
976             *Pos++ = _T(' ');
977 
978         if (Pos + _tcslen(CurrentToken) >= &List[CMDLINE_LENGTH])
979             goto error;
980         Pos = _stpcpy(Pos, CurrentToken);
981     }
982     *Pos = _T('\0');
983     Cmd->For.List = cmd_dup(List);
984 
985     ParseToken(0, STANDARD_SEPS);
986     if (_tcsicmp(CurrentToken, _T("do")) != 0)
987         goto error;
988 
989     Cmd->Subcommands = ParseCommandOp(C_OP_LOWEST);
990     if (Cmd->Subcommands == NULL)
991         goto error;
992 
993     return Cmd;
994 
995 error:
996     FreeCommand(Cmd);
997     ParseError();
998     return NULL;
999 }
1000 
1001 /* Parse a REM command */
1002 static PARSED_COMMAND*
1003 ParseRem(VOID)
1004 {
1005     PARSED_COMMAND* Cmd;
1006 
1007     /* The common scratch buffer already contains the name of the command */
1008     PTSTR ParsedLine = TempBuf;
1009 
1010     PTCHAR Pos = ParsedLine + _tcslen(ParsedLine) + 1;
1011     SIZE_T TailOffset = Pos - ParsedLine;
1012 
1013     /* Build a minimal command for REM, so that it can still get through the batch echo unparsing */
1014 
1015     /* Unparse the current token, so as to emulate the REM command parsing
1016      * behaviour of Windows' CMD, that discards everything before the last
1017      * line continuation. */
1018     UnParseToken();
1019 
1020     /*
1021      * Ignore the rest of the line, without any line continuation (but eat the caret).
1022      * We cannot simply set bLineContinuations to TRUE or FALSE, because we want (only
1023      * for the REM command), even when bLineContinuations == FALSE, to get the caret,
1024      * otherwise it would be ignored.
1025      */
1026     while (ParseTokenEx(0, 0, NULL, FALSE) != TOK_END)
1027     {
1028         if (Pos + _tcslen(CurrentToken) >= &ParsedLine[CMDLINE_LENGTH])
1029         {
1030             ParseError();
1031             return NULL;
1032         }
1033         Pos = _stpcpy(Pos, CurrentToken);
1034     }
1035     *Pos = _T('\0');
1036 
1037     Cmd = AllocCommand(C_REM,
1038                        ParsedLine,
1039                        ParsedLine + TailOffset);
1040     if (!Cmd)
1041     {
1042         WARN("Cannot allocate memory for Cmd!\n");
1043         ParseError();
1044         return NULL;
1045     }
1046     return Cmd;
1047 }
1048 
1049 /* Parse a command */
1050 static PARSED_COMMAND*
1051 ParseCommandPart(
1052     IN OUT REDIRECTION** RedirList)
1053 {
1054     PARSED_COMMAND* Cmd;
1055     PARSED_COMMAND* (*Func)(VOID);
1056 
1057     /* Use the scratch buffer */
1058     PTSTR ParsedLine = TempBuf;
1059 
1060     /* We need to copy the current token because it's going to be changed below by the ParseToken() calls */
1061     PTCHAR Pos = _stpcpy(ParsedLine, CurrentToken) + 1;
1062     SIZE_T TailOffset = Pos - ParsedLine;
1063 
1064     /* Check for special forms */
1065     if ((Func = ParseFor, _tcsicmp(ParsedLine, _T("FOR")) == 0) ||
1066         (Func = ParseIf,  _tcsicmp(ParsedLine, _T("IF"))  == 0) ||
1067         (Func = ParseRem, _tcsicmp(ParsedLine, _T("REM")) == 0))
1068     {
1069         PTCHAR pHelp;
1070 
1071         ParseToken(0, STANDARD_SEPS);
1072 
1073         if ((pHelp = _tcsstr(CurrentToken, _T("/?"))) &&
1074             (Func == ParseIf ? (pHelp[2] == _T('/') || pHelp[2] == 0) : TRUE))
1075         {
1076             /* /? was found within the first token */
1077             ParseToken(0, STANDARD_SEPS);
1078         }
1079         else
1080         {
1081             pHelp = NULL;
1082         }
1083         if (pHelp && (CurrentTokenType == TOK_NORMAL))
1084         {
1085             /* We encountered /? first, but is followed
1086              * by another token: that's an error. */
1087             ParseError();
1088             return NULL;
1089         }
1090 
1091         /* Do actual parsing only if no help is present */
1092         if (!pHelp)
1093         {
1094             /* FOR and IF commands cannot have leading redirection, but REM can */
1095             if (*RedirList && ((Func == ParseFor) || (Func == ParseIf)))
1096             {
1097                 /* Display the culprit command and fail */
1098                 ParseErrorEx(ParsedLine);
1099                 return NULL;
1100             }
1101 
1102             return Func();
1103         }
1104 
1105         /* Otherwise, run FOR,IF,REM as regular commands only for help support */
1106         if (Pos + _tcslen(_T("/?")) >= &ParsedLine[CMDLINE_LENGTH])
1107         {
1108             ParseError();
1109             return NULL;
1110         }
1111         Pos = _stpcpy(Pos, _T("/?"));
1112     }
1113     else
1114     {
1115         ParseToken(0, NULL);
1116     }
1117 
1118     /* Now get the tail */
1119     while (CurrentTokenType != TOK_END)
1120     {
1121         if (CurrentTokenType == TOK_NORMAL)
1122         {
1123             if (Pos + _tcslen(CurrentToken) >= &ParsedLine[CMDLINE_LENGTH])
1124             {
1125                 ParseError();
1126                 return NULL;
1127             }
1128             Pos = _stpcpy(Pos, CurrentToken);
1129         }
1130 #ifndef MSCMD_REDIR_PARSE_BUGS
1131         else if (CurrentTokenType == TOK_REDIRECTION)
1132         {
1133             /* Process any trailing redirections and append them to the list */
1134             while (CurrentTokenType == TOK_REDIRECTION)
1135             {
1136                 if (!ParseRedirection(RedirList))
1137                     return NULL;
1138 
1139                 ParseToken(0, STANDARD_SEPS);
1140             }
1141             if (CurrentTokenType == TOK_END)
1142                 break;
1143 
1144             /* Unparse the current token, and reparse it below with no separators */
1145             UnParseToken();
1146         }
1147         else
1148         {
1149             /* There is no need to do a UnParseToken() / ParseToken() cycle */
1150             break;
1151         }
1152 #else
1153         else
1154         {
1155             /* Process any trailing redirections and append them to the list */
1156             BOOL bSuccess = FALSE;
1157 
1158             ASSERT(CurrentTokenType != TOK_END);
1159 
1160             while (CurrentTokenType != TOK_END)
1161             {
1162                 if (!ParseRedirection(RedirList))
1163                 {
1164                     /* If an actual error happened in ParseRedirection(), bail out */
1165                     if (bParseError)
1166                         return NULL;
1167 
1168                     /* Otherwise it just returned FALSE because the current token
1169                      * is not a redirection. Unparse the token and refetch it. */
1170                     break;
1171                 }
1172                 bSuccess = TRUE;
1173 
1174                 ParseToken(0, STANDARD_SEPS);
1175             }
1176             if (CurrentTokenType == TOK_END)
1177                 break;
1178 
1179             /* Unparse the current token, and reparse it below with no separators */
1180             UnParseToken();
1181 
1182             /* If bSuccess == FALSE, we know that it's still the old fetched token, but
1183              * it has been unparsed, so we need to refetch it before quitting the loop. */
1184             if (!bSuccess)
1185             {
1186                 ParseToken(0, NULL);
1187                 break;
1188             }
1189         }
1190 #endif
1191 
1192         ParseToken(0, NULL);
1193     }
1194     *Pos = _T('\0');
1195 
1196     Cmd = AllocCommand(C_COMMAND,
1197                        ParsedLine,
1198                        ParsedLine + TailOffset);
1199     if (!Cmd)
1200     {
1201         WARN("Cannot allocate memory for Cmd!\n");
1202         ParseError();
1203         return NULL;
1204     }
1205     return Cmd;
1206 }
1207 
1208 static PARSED_COMMAND*
1209 ParsePrimary(VOID)
1210 {
1211     PARSED_COMMAND* Cmd = NULL;
1212     REDIRECTION* RedirList = NULL;
1213 
1214     /* In this context, '@' is considered as a separate token */
1215     if ((*CurrentToken == _T('@')) && (CurrentTokenType == TOK_OPERATOR))
1216     {
1217         Cmd = AllocCommand(C_QUIET, NULL, NULL);
1218         if (!Cmd)
1219         {
1220             WARN("Cannot allocate memory for Cmd!\n");
1221             ParseError();
1222             return NULL;
1223         }
1224         /* @ acts like a unary operator with low precedence,
1225          * so call the top-level parser */
1226         Cmd->Subcommands = ParseCommandOp(C_OP_LOWEST);
1227         return Cmd;
1228     }
1229 
1230     /* Process leading redirections and get the head of the command */
1231 #ifndef MSCMD_REDIR_PARSE_BUGS
1232     while (CurrentTokenType == TOK_REDIRECTION)
1233     {
1234         if (!ParseRedirection(&RedirList))
1235             return NULL;
1236 
1237         ParseToken(_T('('), STANDARD_SEPS);
1238     }
1239 #else
1240     {
1241     BOOL bSuccess = FALSE;
1242     while (CurrentTokenType != TOK_END)
1243     {
1244         if (!ParseRedirection(&RedirList))
1245         {
1246             /* If an actual error happened in ParseRedirection(), bail out */
1247             if (bParseError)
1248                 return NULL;
1249 
1250             /* Otherwise it just returned FALSE because
1251              * the current token is not a redirection. */
1252             break;
1253         }
1254         bSuccess = TRUE;
1255 
1256         ParseToken(0, STANDARD_SEPS);
1257     }
1258     if (bSuccess)
1259     {
1260         /* Unparse the current token, and reparse it with support for parenthesis */
1261         if (CurrentTokenType != TOK_END)
1262             UnParseToken();
1263 
1264         ParseToken(_T('('), STANDARD_SEPS);
1265     }
1266     }
1267 #endif
1268 
1269     if (CurrentTokenType == TOK_NORMAL)
1270         Cmd = ParseCommandPart(&RedirList);
1271     else if (CurrentTokenType == TOK_BEGIN_BLOCK)
1272         Cmd = ParseBlock(&RedirList);
1273     else if (CurrentTokenType == TOK_END_BLOCK && !RedirList)
1274         return NULL;
1275 
1276     if (Cmd)
1277     {
1278         /* FOR and IF commands cannot have leading redirection
1279          * (checked by ParseCommandPart(), errors out if so). */
1280         ASSERT(!RedirList || (Cmd->Type != C_FOR && Cmd->Type != C_IF));
1281 
1282         /* Save the redirection list in the command */
1283         Cmd->Redirections = RedirList;
1284 
1285         /* Return the new command */
1286         return Cmd;
1287     }
1288 
1289     ParseError();
1290     FreeRedirection(RedirList);
1291     return NULL;
1292 }
1293 
1294 static PARSED_COMMAND*
1295 ParseCommandBinaryOp(
1296     IN COMMAND_TYPE OpType)
1297 {
1298     PARSED_COMMAND* Cmd;
1299 
1300     if (OpType == C_OP_LOWEST) // i.e. CP_MULTI
1301     {
1302         /* Ignore any parser-level comments */
1303         if (bIgnoreParserComments && (*CurrentToken == _T(':')))
1304         {
1305             /* Ignore the rest of the line, including line continuations */
1306             while (ParseToken(0, NULL) != TOK_END)
1307                 ;
1308 #ifdef MSCMD_PARENS_PARSE_BUGS
1309             /*
1310              * Return NULL in case we are NOT inside a parenthesized block,
1311              * otherwise continue. The effects can be observed as follows:
1312              * within a parenthesized block, every second ':'-prefixed command
1313              * is not ignored, while the first of each "pair" is ignored.
1314              * This first command **MUST NOT** be followed by an empty line,
1315              * otherwise a syntax error is raised.
1316              */
1317             if (InsideBlock == 0)
1318             {
1319 #endif
1320                 return NULL;
1321 #ifdef MSCMD_PARENS_PARSE_BUGS
1322             }
1323             /* Get the next token */
1324             ParseToken(0, NULL);
1325 #endif
1326         }
1327 
1328         /*
1329          * Ignore single closing parenthesis outside of command blocks,
1330          * thus interpreted as a command. This very specific situation
1331          * can happen e.g. while running in batch mode, when jumping to
1332          * a label present inside a command block.
1333          *
1334          * NOTE: If necessary, this condition can be restricted to only
1335          * when a batch context 'bc' is active.
1336          *
1337          * NOTE 2: For further security, Windows checks that we are NOT
1338          * currently inside a parenthesized block, and also, ignores
1339          * explicitly everything (ParseToken() loop) on the same line
1340          * (including line continuations) after this closing parenthesis.
1341          *
1342          * Why doing so? Consider the following batch:
1343          *
1344          *   IF 1==1 (
1345          *   :label
1346          *       echo A
1347          *   ) ^
1348          *   ELSE (
1349          *       echo B
1350          *       exit /b
1351          *   )
1352          *   GOTO :label
1353          *
1354          * First the IF block is executed. Since the condition is trivially
1355          * true, only the first block "echo A" is executed, then execution
1356          * goes after the IF block, that is, at the GOTO. Here, the GOTO
1357          * jumps within the first IF-block, however, the running context now
1358          * is NOT an IF. So parsing and execution will go through each command,
1359          * starting with 'echo A'. But then one gets the ') ^\n ELSE (' part !!
1360          * If we want to make sense of this without bailing out due to
1361          * parsing error, we should ignore this line, **including** the line
1362          * continuation. Hence we need to loop over all the tokens following
1363          * the closing parenthesis, instead of just returning NULL straight ahead.
1364          * Then execution continues with the other commands, 'echo B' and
1365          * 'exit /b' (here to stop the code loop). Execution would also
1366          * continue (if 'exit' was replaced by something else) and encounter
1367          * the lone closing parenthesis ')', that should again be ignored.
1368          *
1369          * Note that this feature has been introduced in Win2k+.
1370          */
1371         if (/** bc && **/ (_tcscmp(CurrentToken, _T(")")) == 0) &&
1372             (CurrentTokenType != TOK_END_BLOCK))
1373         {
1374             ASSERT(InsideBlock == 0);
1375 
1376             /* Ignore the rest of the line, including line continuations */
1377             while (ParseToken(0, NULL) != TOK_END)
1378                 ;
1379             return NULL;
1380         }
1381 
1382 #ifdef MSCMD_PARENS_PARSE_BUGS
1383         /* Check whether we have an empty line only if we are not inside
1384          * a parenthesized block, and return NULL if so, otherwise do not
1385          * do anything; a syntax error will be raised later. */
1386         if (InsideBlock == 0)
1387 #endif
1388         if (!*CurrentToken || *CurrentToken == _T('\n'))
1389         {
1390             ASSERT(CurrentTokenType == TOK_END);
1391             return NULL;
1392         }
1393     }
1394 
1395     if (OpType == C_OP_HIGHEST)
1396         Cmd = ParsePrimary();
1397     else
1398         Cmd = ParseCommandBinaryOp(OpType + 1);
1399 
1400     if (Cmd && !_tcscmp(CurrentToken, OpString[OpType - C_OP_LOWEST]))
1401     {
1402         PARSED_COMMAND* Left = Cmd;
1403         PARSED_COMMAND* Right;
1404 
1405         Right = ParseCommandOp(OpType);
1406         if (!Right)
1407         {
1408             /*
1409              * The '&' operator is allowed to have an empty RHS.
1410              * In this case, we directly return the LHS only.
1411              * Note that Windows' CMD prefers building a '&'
1412              * command with an empty RHS.
1413              */
1414             if (!bParseError && (OpType != C_MULTI))
1415                 ParseError();
1416             if (bParseError)
1417             {
1418                 FreeCommand(Left);
1419                 return NULL;
1420             }
1421 
1422 #ifndef MSCMD_MULTI_EMPTY_RHS
1423             return Left;
1424 #endif
1425         }
1426 
1427         Cmd = AllocCommand(OpType, NULL, NULL);
1428         if (!Cmd)
1429         {
1430             WARN("Cannot allocate memory for Cmd!\n");
1431             ParseError();
1432             FreeCommand(Left);
1433             FreeCommand(Right);
1434             return NULL;
1435         }
1436         Cmd->Subcommands = Left;
1437         Left->Next = Right;
1438 #ifdef MSCMD_MULTI_EMPTY_RHS
1439         if (Right)
1440 #endif
1441         Right->Next = NULL;
1442     }
1443 
1444     return Cmd;
1445 }
1446 static __inline PARSED_COMMAND*
1447 ParseCommandOp(
1448     IN COMMAND_TYPE OpType)
1449 {
1450     /* Start parsing: initialize the first token */
1451 
1452     /* Parse the prefix "quiet" operator '@' as a separate command.
1453      * Thus, @@foo@bar is parsed as: '@', '@', 'foo@bar'. */
1454     ParseTokenEx(_T('@'), _T('('), STANDARD_SEPS, bHandleContinuations);
1455 
1456     return ParseCommandBinaryOp(OpType);
1457 }
1458 
1459 
1460 PARSED_COMMAND*
1461 ParseCommand(
1462     IN PCTSTR Line)
1463 {
1464     PARSED_COMMAND* Cmd;
1465 
1466     if (Line)
1467     {
1468         if (!SubstituteVars(Line, ParseLine, _T('%')))
1469             return NULL;
1470         bLineContinuations = FALSE;
1471     }
1472     else
1473     {
1474         if (!ReadLine(ParseLine, FALSE))
1475             return NULL;
1476         bLineContinuations = TRUE;
1477     }
1478 
1479     InitParser();
1480 
1481     Cmd = ParseCommandOp(C_OP_LOWEST);
1482     if (Cmd)
1483     {
1484         bIgnoreEcho = FALSE;
1485 
1486         if ((CurrentTokenType != TOK_END) &&
1487             (_tcscmp(CurrentToken, _T("\n")) != 0))
1488         {
1489             ParseError();
1490         }
1491         if (bParseError)
1492         {
1493             FreeCommand(Cmd);
1494             return NULL;
1495         }
1496 
1497         /* Debugging support */
1498         if (fDumpParse)
1499             DumpCommand(Cmd, 0);
1500     }
1501     else
1502     {
1503         bIgnoreEcho = TRUE;
1504     }
1505     return Cmd;
1506 }
1507 
1508 
1509 /*
1510  * This function is similar to EchoCommand(), but is used
1511  * for dumping the command tree for debugging purposes.
1512  */
1513 static VOID
1514 DumpRedir(
1515     IN REDIRECTION* Redirections)
1516 {
1517     REDIRECTION* Redir;
1518 
1519     if (Redirections)
1520 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1521         ConOutPuts(_T(" Redir: "));
1522 #else
1523         ConOutPuts(_T("Redir: "));
1524 #endif
1525     for (Redir = Redirections; Redir; Redir = Redir->Next)
1526     {
1527         ConOutPrintf(_T(" %x %s%s"), Redir->Number,
1528                      RedirString[Redir->Mode], Redir->Filename);
1529     }
1530 }
1531 
1532 VOID
1533 DumpCommand(
1534     IN PARSED_COMMAND* Cmd,
1535     IN ULONG SpacePad)
1536 {
1537 /*
1538  * This macro is like DumpCommand(Cmd, Pad);
1539  * but avoids an extra recursive level.
1540  * Note that it can be used ONLY for terminating commands!
1541  */
1542 #define DUMP(Command, Pad) \
1543 do { \
1544     Cmd = (Command); \
1545     SpacePad = (Pad); \
1546     goto dump; \
1547 } while (0)
1548 
1549     PARSED_COMMAND* Sub;
1550 
1551 dump:
1552     if (!Cmd)
1553         return;
1554 
1555     /* Space padding */
1556     ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1557 
1558     switch (Cmd->Type)
1559     {
1560     case C_COMMAND:
1561     case C_REM:
1562     {
1563         /* Generic command name, and Type */
1564 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1565         ConOutPrintf(_T("Cmd: %s  Type: %x"),
1566                      Cmd->Command.First, Cmd->Type);
1567 #else
1568         ConOutPrintf(_T("Cmd: %s  Type: %x "),
1569                      Cmd->Command.First, Cmd->Type);
1570 #endif
1571         /* Arguments */
1572         if (Cmd->Command.Rest && *(Cmd->Command.Rest))
1573 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1574             ConOutPrintf(_T(" Args: `%s'"), Cmd->Command.Rest);
1575 #else
1576             ConOutPrintf(_T("Args: `%s' "), Cmd->Command.Rest);
1577 #endif
1578         /* Redirections */
1579         DumpRedir(Cmd->Redirections);
1580 
1581         ConOutChar(_T('\n'));
1582         return;
1583     }
1584 
1585     case C_QUIET:
1586     {
1587 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1588         ConOutChar(_T('@'));
1589 #else
1590         ConOutPuts(_T("@ "));
1591 #endif
1592         DumpRedir(Cmd->Redirections); // FIXME: Can we have leading redirections??
1593         ConOutChar(_T('\n'));
1594 
1595         /*DumpCommand*/DUMP(Cmd->Subcommands, SpacePad + 2);
1596         return;
1597     }
1598 
1599     case C_BLOCK:
1600     {
1601 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1602         ConOutChar(_T('('));
1603 #else
1604         ConOutPuts(_T("( "));
1605 #endif
1606         DumpRedir(Cmd->Redirections);
1607         ConOutChar(_T('\n'));
1608 
1609         SpacePad += 2;
1610 
1611         for (Sub = Cmd->Subcommands; Sub; Sub = Sub->Next)
1612         {
1613 #if defined(MSCMD_ECHO_COMMAND_COMPAT) && defined(MSCMD_PARSER_BUGS)
1614             /*
1615              * We will emulate Windows' CMD handling of "CRLF" and "&" multi-command
1616              * enumeration within parenthesized command blocks.
1617              */
1618 
1619             if (!Sub->Next)
1620             {
1621                 DumpCommand(Sub, SpacePad);
1622                 continue;
1623             }
1624 
1625             if (Sub->Type != C_MULTI)
1626             {
1627                 ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1628                 ConOutPuts(_T("CRLF \n"));
1629                 DumpCommand(Sub, SpacePad);
1630                 continue;
1631             }
1632 
1633             /* Now, Sub->Type == C_MULTI */
1634 
1635             Cmd = Sub;
1636 
1637             ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1638             ConOutPrintf(_T("%s \n"), OpString[Cmd->Type - C_OP_LOWEST]);
1639             // FIXME: Can we have redirections on these operator-type commands?
1640 
1641             SpacePad += 2;
1642 
1643             Cmd = Cmd->Subcommands;
1644             DumpCommand(Cmd, SpacePad);
1645             ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1646             ConOutPuts(_T("CRLF \n"));
1647             DumpCommand(Cmd->Next, SpacePad);
1648 
1649             // NOTE: Next commands will remain indented.
1650 
1651 #else
1652 
1653             /*
1654              * If this command is followed by another one, first display "CRLF".
1655              * This also emulates the CRLF placement "bug" of Windows' CMD
1656              * for the last two commands.
1657              */
1658             if (Sub->Next)
1659             {
1660                 ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1661 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1662                 ConOutPuts(_T("CRLF\n"));
1663 #else
1664                 ConOutPuts(_T("CRLF \n"));
1665 #endif
1666             }
1667             DumpCommand(Sub, SpacePad);
1668 
1669 #endif // defined(MSCMD_ECHO_COMMAND_COMPAT) && defined(MSCMD_PARSER_BUGS)
1670         }
1671 
1672         return;
1673     }
1674 
1675     case C_MULTI:
1676     case C_OR:
1677     case C_AND:
1678     case C_PIPE:
1679     {
1680 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1681         ConOutPrintf(_T("%s\n"), OpString[Cmd->Type - C_OP_LOWEST]);
1682 #else
1683         ConOutPrintf(_T("%s \n"), OpString[Cmd->Type - C_OP_LOWEST]);
1684 #endif
1685         // FIXME: Can we have redirections on these operator-type commands?
1686 
1687         SpacePad += 2;
1688 
1689         Sub = Cmd->Subcommands;
1690         DumpCommand(Sub, SpacePad);
1691         /*DumpCommand*/DUMP(Sub->Next, SpacePad);
1692         return;
1693     }
1694 
1695     case C_FOR:
1696     {
1697         ConOutPuts(_T("for"));
1698         /* NOTE: FOR cannot have leading redirections */
1699 
1700         if (Cmd->For.Switches & FOR_DIRS)      ConOutPuts(_T(" /D"));
1701         if (Cmd->For.Switches & FOR_F)         ConOutPuts(_T(" /F"));
1702         if (Cmd->For.Switches & FOR_LOOP)      ConOutPuts(_T(" /L"));
1703         if (Cmd->For.Switches & FOR_RECURSIVE) ConOutPuts(_T(" /R"));
1704         if (Cmd->For.Params)
1705             ConOutPrintf(_T(" %s"), Cmd->For.Params);
1706         ConOutPrintf(_T(" %%%c in (%s) do\n"), Cmd->For.Variable, Cmd->For.List);
1707         /*DumpCommand*/DUMP(Cmd->Subcommands, SpacePad + 2);
1708         return;
1709     }
1710 
1711     case C_IF:
1712     {
1713         ConOutPuts(_T("if"));
1714         /* NOTE: IF cannot have leading redirections */
1715 
1716         if (Cmd->If.Flags & IFFLAG_IGNORECASE)
1717             ConOutPuts(_T(" /I"));
1718 
1719         ConOutChar(_T('\n'));
1720 
1721         SpacePad += 2;
1722 
1723         /*
1724          * Show the IF command condition as a command.
1725          * If it is negated, indent the command more.
1726          */
1727         if (Cmd->If.Flags & IFFLAG_NEGATE)
1728         {
1729             ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1730             ConOutPuts(_T("not\n"));
1731             SpacePad += 2;
1732         }
1733 
1734         ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1735 
1736         /*
1737          * Command name:
1738          * - Unary operator: its name is the command name, and its argument is the command argument.
1739          * - Binary operator: its LHS is the command name, its RHS is the command argument.
1740          *
1741          * Type:
1742          * Windows' CMD (Win2k3 / Win7-10) values are as follows:
1743          *   CMDEXTVERSION  Type: 0x32 / 0x34
1744          *   ERRORLEVEL     Type: 0x33 / 0x35
1745          *   DEFINED        Type: 0x34 / 0x36
1746          *   EXIST          Type: 0x35 / 0x37
1747          *   ==             Type: 0x37 / 0x39 (String Comparison)
1748          *
1749          * For the following command:
1750          *   NOT            Type: 0x36 / 0x38
1751          * Windows only prints it without any type / redirection.
1752          *
1753          * For the following command:
1754          *   EQU, NEQ, etc. Type: 0x38 / 0x3a (Generic Comparison)
1755          * Windows displays it as command of unknown type.
1756          */
1757 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1758         ConOutPrintf(_T("Cmd: %s  Type: %x"),
1759                      (Cmd->If.Operator <= IF_MAX_UNARY) ?
1760                         IfOperatorString[Cmd->If.Operator] :
1761                         Cmd->If.LeftArg,
1762                      Cmd->If.Operator);
1763 #else
1764         ConOutPrintf(_T("Cmd: %s  Type: %x "),
1765                      (Cmd->If.Operator <= IF_MAX_UNARY) ?
1766                         IfOperatorString[Cmd->If.Operator] :
1767                         Cmd->If.LeftArg,
1768                      Cmd->If.Operator);
1769 #endif
1770         /* Arguments */
1771 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1772         ConOutPrintf(_T(" Args: `%s'"), Cmd->If.RightArg);
1773 #else
1774         ConOutPrintf(_T("Args: `%s' "), Cmd->If.RightArg);
1775 #endif
1776 
1777         ConOutChar(_T('\n'));
1778 
1779         if (Cmd->If.Flags & IFFLAG_NEGATE)
1780         {
1781             SpacePad -= 2;
1782         }
1783 
1784         Sub = Cmd->Subcommands;
1785         DumpCommand(Sub, SpacePad);
1786         if (Sub->Next)
1787         {
1788             ConOutPrintf(_T("%*s"), SpacePad - 2, _T(""));
1789             ConOutPuts(_T("else\n"));
1790             DumpCommand(Sub->Next, SpacePad);
1791         }
1792         return;
1793     }
1794 
1795     default:
1796         ConOutPrintf(_T("*** Unknown type: %x\n"), Cmd->Type);
1797         break;
1798     }
1799 
1800 #undef DUMP
1801 }
1802 
1803 /*
1804  * Reconstruct a parse tree into text form; used for echoing
1805  * batch file commands and FOR instances.
1806  */
1807 VOID
1808 EchoCommand(
1809     IN PARSED_COMMAND* Cmd)
1810 {
1811     PARSED_COMMAND* Sub;
1812     REDIRECTION* Redir;
1813 
1814     if (!Cmd)
1815         return;
1816 
1817     switch (Cmd->Type)
1818     {
1819     case C_COMMAND:
1820     case C_REM:
1821     {
1822         if (SubstituteForVars(Cmd->Command.First, TempBuf))
1823             ConOutPrintf(_T("%s"), TempBuf);
1824         if (SubstituteForVars(Cmd->Command.Rest, TempBuf))
1825         {
1826             ConOutPrintf(_T("%s"), TempBuf);
1827 #ifdef MSCMD_ECHO_COMMAND_COMPAT
1828             /* NOTE: For Windows compatibility, add a trailing space after printing the command parameter, if present */
1829             if (*TempBuf) ConOutChar(_T(' '));
1830 #endif
1831         }
1832         break;
1833     }
1834 
1835     case C_QUIET:
1836         return;
1837 
1838     case C_BLOCK:
1839     {
1840         BOOLEAN bIsFirstCmdCRLF;
1841 
1842         ConOutChar(_T('('));
1843 
1844         Sub = Cmd->Subcommands;
1845 
1846         bIsFirstCmdCRLF = (Sub && Sub->Next);
1847 
1848 #if defined(MSCMD_ECHO_COMMAND_COMPAT) && defined(MSCMD_PARSER_BUGS)
1849         /*
1850          * We will emulate Windows' CMD handling of "CRLF" and "&" multi-command
1851          * enumeration within parenthesized command blocks.
1852          */
1853         bIsFirstCmdCRLF = bIsFirstCmdCRLF && (Sub->Type != C_MULTI);
1854 #endif
1855 
1856         /*
1857          * Single-command block: display all on one line.
1858          * Multi-command block: display commands on separate lines.
1859          */
1860         if (bIsFirstCmdCRLF)
1861             ConOutChar(_T('\n'));
1862 
1863         for (; Sub; Sub = Sub->Next)
1864         {
1865             EchoCommand(Sub);
1866             if (Sub->Next)
1867 #ifdef MSCMD_ECHO_COMMAND_COMPAT
1868                 ConOutPuts(_T(" \n "));
1869 #else
1870                 ConOutChar(_T('\n'));
1871 #endif
1872         }
1873 
1874         if (bIsFirstCmdCRLF)
1875             ConOutChar(_T('\n'));
1876 
1877 #ifdef MSCMD_ECHO_COMMAND_COMPAT
1878         /* NOTE: For Windows compatibility, add a trailing space after printing the closing parenthesis */
1879         ConOutPuts(_T(") "));
1880 #else
1881         ConOutChar(_T(')'));
1882 #endif
1883         break;
1884     }
1885 
1886     case C_MULTI:
1887     case C_OR:
1888     case C_AND:
1889     case C_PIPE:
1890     {
1891         Sub = Cmd->Subcommands;
1892         EchoCommand(Sub);
1893         ConOutPrintf(_T(" %s "), OpString[Cmd->Type - C_OP_LOWEST]);
1894         EchoCommand(Sub->Next);
1895         break;
1896     }
1897 
1898     case C_FOR:
1899     {
1900         ConOutPuts(_T("for"));
1901         if (Cmd->For.Switches & FOR_DIRS)      ConOutPuts(_T(" /D"));
1902         if (Cmd->For.Switches & FOR_F)         ConOutPuts(_T(" /F"));
1903         if (Cmd->For.Switches & FOR_LOOP)      ConOutPuts(_T(" /L"));
1904         if (Cmd->For.Switches & FOR_RECURSIVE) ConOutPuts(_T(" /R"));
1905         if (Cmd->For.Params)
1906             ConOutPrintf(_T(" %s"), Cmd->For.Params);
1907         if (Cmd->For.List && SubstituteForVars(Cmd->For.List, TempBuf))
1908             ConOutPrintf(_T(" %%%c in (%s) do "), Cmd->For.Variable, TempBuf);
1909         else
1910             ConOutPrintf(_T(" %%%c in (%s) do "), Cmd->For.Variable, Cmd->For.List);
1911         EchoCommand(Cmd->Subcommands);
1912         break;
1913     }
1914 
1915     case C_IF:
1916     {
1917         ConOutPuts(_T("if"));
1918         if (Cmd->If.Flags & IFFLAG_IGNORECASE)
1919             ConOutPuts(_T(" /I"));
1920         if (Cmd->If.Flags & IFFLAG_NEGATE)
1921             ConOutPuts(_T(" not"));
1922         if (Cmd->If.LeftArg && SubstituteForVars(Cmd->If.LeftArg, TempBuf))
1923             ConOutPrintf(_T(" %s"), TempBuf);
1924         ConOutPrintf(_T(" %s"), IfOperatorString[Cmd->If.Operator]);
1925         if (SubstituteForVars(Cmd->If.RightArg, TempBuf))
1926             ConOutPrintf(_T(" %s "), TempBuf);
1927         Sub = Cmd->Subcommands;
1928         EchoCommand(Sub);
1929         if (Sub->Next)
1930         {
1931             ConOutPuts(_T(" else "));
1932             EchoCommand(Sub->Next);
1933         }
1934         break;
1935     }
1936 
1937     default:
1938         ASSERT(FALSE);
1939         break;
1940     }
1941 
1942     for (Redir = Cmd->Redirections; Redir; Redir = Redir->Next)
1943     {
1944         if (SubstituteForVars(Redir->Filename, TempBuf))
1945         {
1946 #ifdef MSCMD_ECHO_COMMAND_COMPAT
1947             ConOutPrintf(_T("%c%s%s "),
1948                          _T('0') + Redir->Number,
1949                          RedirString[Redir->Mode], TempBuf);
1950 #else
1951             ConOutPrintf(_T(" %c%s%s"),
1952                          _T('0') + Redir->Number,
1953                          RedirString[Redir->Mode], TempBuf);
1954 #endif
1955         }
1956     }
1957 }
1958 
1959 /*
1960  * "Unparse" a command into a text form suitable for passing to CMD /C.
1961  * Used for pipes. This is basically the same thing as EchoCommand(),
1962  * but writing into a string instead of to standard output.
1963  */
1964 PTCHAR
1965 UnparseCommand(
1966     IN PARSED_COMMAND* Cmd,
1967     OUT PTCHAR Out,
1968     IN  PTCHAR OutEnd)
1969 {
1970 /*
1971  * Since this function has the annoying requirement that it must avoid
1972  * overflowing the supplied buffer, define some helper macros to make
1973  * this less painful.
1974  */
1975 #define CHAR(Char) \
1976 do { \
1977     if (Out == OutEnd) return NULL; \
1978     *Out++ = Char; \
1979 } while (0)
1980 #define STRING(String) \
1981 do { \
1982     if (Out + _tcslen(String) > OutEnd) return NULL; \
1983     Out = _stpcpy(Out, String); \
1984 } while (0)
1985 #define PRINTF(Format, ...) \
1986 do { \
1987     UINT Len = _sntprintf(Out, OutEnd - Out, Format, __VA_ARGS__); \
1988     if (Len > (UINT)(OutEnd - Out)) return NULL; \
1989     Out += Len; \
1990 } while (0)
1991 #define RECURSE(Subcommand) \
1992 do { \
1993     Out = UnparseCommand(Subcommand, Out, OutEnd); \
1994     if (!Out) return NULL; \
1995 } while (0)
1996 
1997     PARSED_COMMAND* Sub;
1998     REDIRECTION* Redir;
1999 
2000     if (!Cmd)
2001         return Out;
2002 
2003     switch (Cmd->Type)
2004     {
2005     case C_COMMAND:
2006     case C_REM:
2007     {
2008         /* This is fragile since there could be special characters, but
2009          * Windows doesn't bother escaping them, so for compatibility
2010          * we probably shouldn't do it either */
2011         if (!SubstituteForVars(Cmd->Command.First, TempBuf)) return NULL;
2012         STRING(TempBuf);
2013         if (!SubstituteForVars(Cmd->Command.Rest, TempBuf)) return NULL;
2014         STRING(TempBuf);
2015         break;
2016     }
2017 
2018     case C_QUIET:
2019     {
2020         CHAR(_T('@'));
2021         RECURSE(Cmd->Subcommands);
2022         break;
2023     }
2024 
2025     case C_BLOCK:
2026     {
2027         CHAR(_T('('));
2028         for (Sub = Cmd->Subcommands; Sub; Sub = Sub->Next)
2029         {
2030             RECURSE(Sub);
2031             if (Sub->Next)
2032                 CHAR(_T('&'));
2033         }
2034         CHAR(_T(')'));
2035         break;
2036     }
2037 
2038     case C_MULTI:
2039     case C_OR:
2040     case C_AND:
2041     case C_PIPE:
2042     {
2043         Sub = Cmd->Subcommands;
2044         RECURSE(Sub);
2045         PRINTF(_T(" %s "), OpString[Cmd->Type - C_OP_LOWEST]);
2046         RECURSE(Sub->Next);
2047         break;
2048     }
2049 
2050     case C_FOR:
2051     {
2052         STRING(_T("for"));
2053         if (Cmd->For.Switches & FOR_DIRS)      STRING(_T(" /D"));
2054         if (Cmd->For.Switches & FOR_F)         STRING(_T(" /F"));
2055         if (Cmd->For.Switches & FOR_LOOP)      STRING(_T(" /L"));
2056         if (Cmd->For.Switches & FOR_RECURSIVE) STRING(_T(" /R"));
2057         if (Cmd->For.Params)
2058             PRINTF(_T(" %s"), Cmd->For.Params);
2059         if (Cmd->For.List && SubstituteForVars(Cmd->For.List, TempBuf))
2060             PRINTF(_T(" %%%c in (%s) do "), Cmd->For.Variable, TempBuf);
2061         else
2062             PRINTF(_T(" %%%c in (%s) do "), Cmd->For.Variable, Cmd->For.List);
2063         RECURSE(Cmd->Subcommands);
2064         break;
2065     }
2066 
2067     case C_IF:
2068     {
2069         STRING(_T("if"));
2070         if (Cmd->If.Flags & IFFLAG_IGNORECASE)
2071             STRING(_T(" /I"));
2072         if (Cmd->If.Flags & IFFLAG_NEGATE)
2073             STRING(_T(" not"));
2074         if (Cmd->If.LeftArg && SubstituteForVars(Cmd->If.LeftArg, TempBuf))
2075             PRINTF(_T(" %s"), TempBuf);
2076         PRINTF(_T(" %s"), IfOperatorString[Cmd->If.Operator]);
2077         if (!SubstituteForVars(Cmd->If.RightArg, TempBuf)) return NULL;
2078         PRINTF(_T(" %s "), TempBuf);
2079         Sub = Cmd->Subcommands;
2080         RECURSE(Sub);
2081         if (Sub->Next)
2082         {
2083             STRING(_T(" else "));
2084             RECURSE(Sub->Next);
2085         }
2086         break;
2087     }
2088 
2089     default:
2090         ASSERT(FALSE);
2091         break;
2092     }
2093 
2094     for (Redir = Cmd->Redirections; Redir; Redir = Redir->Next)
2095     {
2096         if (!SubstituteForVars(Redir->Filename, TempBuf))
2097             return NULL;
2098         PRINTF(_T(" %c%s%s"), _T('0') + Redir->Number,
2099                RedirString[Redir->Mode], TempBuf);
2100     }
2101     return Out;
2102 
2103 #undef CHAR
2104 #undef STRING
2105 #undef PRINTF
2106 #undef RECURSE
2107 }
2108