1 /*
2 * PARSER.C - Command-line Lexical Analyzer/Tokenizer and Parser.
3 */
4
5 #include "precomp.h"
6
7 /*
8 * Defines for enabling different Windows' CMD compatibility behaviours.
9 */
10
11 /* Enable this define for command echoer compatibility */
12 #define MSCMD_ECHO_COMMAND_COMPAT
13
14 /* Enable this define for parser quirks (see UnParseToken() for more details) */
15 #define MSCMD_PARSER_BUGS
16
17 /* Enable this define for parenthesized blocks parsing quirks */
18 // #define MSCMD_PARENS_PARSE_BUGS
19
20 /* Enable this define for redirection parsing quirks */
21 #define MSCMD_REDIR_PARSE_BUGS
22
23 /* Enable this define for allowing '&' commands with an empty RHS.
24 * The default behaviour is to just return the LHS instead.
25 * See ParseCommandBinaryOp() for details. */
26 // #define MSCMD_MULTI_EMPTY_RHS
27
28
29 /*
30 * Parser debugging support. These flags are global so that their values can be
31 * modified at runtime from a debugger. They correspond to the public Windows'
32 * cmd!fDumpTokens and cmd!fDumpParse booleans.
33 * (Same names are used for compatibility as they are documented online.)
34 */
35 BOOLEAN fDumpTokens = FALSE;
36 BOOLEAN fDumpParse = FALSE;
37
38 #define C_OP_LOWEST C_MULTI
39 #define C_OP_HIGHEST C_PIPE
40 static const TCHAR OpString[][3] = { _T("&"), _T("||"), _T("&&"), _T("|") };
41
42 static const TCHAR RedirString[][3] = { _T("<"), _T(">"), _T(">>") };
43
44 static const TCHAR* const IfOperatorString[] =
45 {
46 /** Unary operators **/
47
48 /* Standard */
49 _T("errorlevel"),
50 _T("exist"),
51
52 /* Extended */
53 _T("cmdextversion"),
54 _T("defined"),
55 #define IF_MAX_UNARY IF_DEFINED
56
57 /** Binary operators **/
58
59 /* Standard */
60 _T("=="),
61
62 /* Extended */
63 _T("equ"),
64 _T("neq"),
65 _T("lss"),
66 _T("leq"),
67 _T("gtr"),
68 _T("geq"),
69 #define IF_MAX_COMPARISON IF_GEQ
70 };
71
IsSeparator(TCHAR Char)72 static __inline BOOL IsSeparator(TCHAR Char)
73 {
74 return _istspace(Char) || (Char && !!_tcschr(STANDARD_SEPS, Char));
75 }
76
77 typedef enum _TOK_TYPE
78 {
79 TOK_END,
80 TOK_NORMAL,
81 TOK_OPERATOR,
82 TOK_REDIRECTION,
83 TOK_BEGIN_BLOCK,
84 TOK_END_BLOCK
85 } TOK_TYPE;
86
87 /* Scratch buffer for temporary command substitutions / expansions */
88 static TCHAR TempBuf[CMDLINE_LENGTH];
89
90 /*static*/ BOOL bParseError;
91 static BOOL bLineContinuations;
92 /*static*/ TCHAR ParseLine[CMDLINE_LENGTH];
93 static PTCHAR ParsePos;
94 static PTCHAR OldParsePos;
95
96 BOOL bIgnoreParserComments = TRUE;
97 BOOL bHandleContinuations = TRUE;
98
99 static TCHAR CurrentToken[CMDLINE_LENGTH];
100 static TOK_TYPE CurrentTokenType = TOK_END;
101 #ifndef MSCMD_PARSER_BUGS
102 static BOOL bReparseToken = FALSE;
103 static PTCHAR LastCurTokPos;
104 #endif
105 static INT InsideBlock = 0;
106
ResetParser(IN PTCHAR Pos)107 static VOID ResetParser(IN PTCHAR Pos)
108 {
109 bParseError = FALSE;
110 ParsePos = Pos;
111 OldParsePos = ParsePos;
112 }
113
114 /*
115 * This function "refetches" the last parsed token back into the stream
116 * for later reparsing -- since the way of lexing it is context-dependent.
117 * This "feature" is at the root of many obscure CMD parsing quirks,
118 * due to the fact this feature is in opposition with line-continuation.
119 * Indeed, when a stream of characters has a line-continuation, the lexer-
120 * parser will parse the stream up to the end of the line, then will
121 * reset the parser state and position back to the beginning of the line
122 * before accepting the rest of the character stream and continuing
123 * parsing them. This means that all the non-parsed characters before the
124 * line-continuation have been lost. Of course, their parsed form is now
125 * within the current parsed token. However, suppose now we need to
126 * unparse this token for reparsing it a different way later on. If we
127 * somehow pushed the already-parsed current token back into the beginning
128 * of the character stream, besides the complications of moving up the
129 * characters in the stream buffer, we would basically have "new" data
130 * that has been already parsed one way, to be now parsed another way.
131 * If instead we had saved somehow the unparsed form of the token, and
132 * we push back that form into the stream buffer for reparsing, we would
133 * encounter again the line-continuation, that, depending on which
134 * context the token is reparsed, would cause problems:
135 * e.g. in the case of REM command parsing, the parser would stop at the
136 * first line-continuation.
137 *
138 * When MSCMD_PARSER_BUGS is undefined, the UnParseToken() / ParseToken()
139 * cycle keeps the current token in its buffer, but also saves the start
140 * position corresponding to the batch of characters that have been parsed
141 * during the last line-continuation. The next ParseToken() would then
142 * reparse these latest charcters and the result replaces the last part
143 * in the current token.
144 *
145 * For example, a first parsing of
146 * foo^\n
147 * bar^\n
148 * baz
149 * would result in the current token "foobarbaz", where the start position
150 * corresponding to the batch of characters parsed during the last line-continuation
151 * being pointing at "baz". The stream buffer only contains "baz" (and following data).
152 * Then UnParseToken() saves this info so that at the next ParseToken(), the "baz"
153 * part of the stream buffer gets reparsed (possibly differently) and the result
154 * would replace the "baz" part in the current token.
155 *
156 * If MSCMD_PARSER_BUGS is defined however, then the behaviour of the Windows' CMD
157 * parser applies: in the example above, the last ParseToken() call would completely
158 * replace the current token "foobarbaz" with the new result of the parsing of "baz".
159 */
UnParseToken(VOID)160 static VOID UnParseToken(VOID)
161 {
162 ParsePos = OldParsePos;
163
164 /* Debugging support */
165 if (fDumpTokens)
166 ConOutPrintf(_T("Ungetting: '%s'\n"), ParsePos);
167
168 #ifndef MSCMD_PARSER_BUGS
169 bReparseToken = TRUE;
170 #endif
171 }
172
InitParser(VOID)173 static VOID InitParser(VOID)
174 {
175 *CurrentToken = 0;
176 CurrentTokenType = TOK_END;
177 InsideBlock = 0;
178
179 #ifndef MSCMD_PARSER_BUGS
180 bReparseToken = FALSE;
181 LastCurTokPos = NULL;
182 #endif
183
184 ResetParser(ParseLine);
185 }
186
ParseChar(VOID)187 static TCHAR ParseChar(VOID)
188 {
189 TCHAR Char;
190
191 if (bParseError)
192 return 0;
193
194 restart:
195 /*
196 * Although CRs can be injected into a line via an environment
197 * variable substitution, the parser ignores them - they won't
198 * even separate tokens.
199 */
200 do
201 {
202 Char = *ParsePos++;
203 }
204 while (Char == _T('\r'));
205
206 if (!Char) --ParsePos;
207 if (!Char && bLineContinuations)
208 {
209 if (!ReadLine(ParseLine, TRUE))
210 {
211 /* ^C pressed, or line was too long */
212 //
213 // FIXME: Distinguish with respect to BATCH end of file !!
214 //
215 bParseError = TRUE;
216 }
217 else
218 {
219 ResetParser(ParseLine);
220 if (*ParsePos)
221 goto restart;
222 }
223 }
224 return Char;
225 }
226
ParseErrorEx(IN PCTSTR s)227 VOID ParseErrorEx(IN PCTSTR s)
228 {
229 /* Only display the first error we encounter */
230 if (!bParseError)
231 error_syntax(s);
232 bParseError = TRUE;
233 }
234
ParseError(VOID)235 static __inline VOID ParseError(VOID)
236 {
237 ParseErrorEx(CurrentTokenType != TOK_END ? CurrentToken : NULL);
238 }
239
240 static TOK_TYPE
ParseTokenEx(IN TCHAR PrefixOperator OPTIONAL,IN TCHAR ExtraEnd OPTIONAL,IN PCTSTR Separators OPTIONAL,IN BOOL bHandleContinuations)241 ParseTokenEx(
242 IN TCHAR PrefixOperator OPTIONAL,
243 IN TCHAR ExtraEnd OPTIONAL,
244 IN PCTSTR Separators OPTIONAL,
245 IN BOOL bHandleContinuations)
246 {
247 TOK_TYPE Type;
248 PTCHAR CurrentTokStart = CurrentToken;
249 PTCHAR Out = CurrentTokStart;
250 TCHAR Char;
251 BOOL bInQuote = FALSE;
252
253 #ifndef MSCMD_PARSER_BUGS
254 if (bReparseToken)
255 {
256 bReparseToken = FALSE;
257
258 /*
259 * We will append the part to be reparsed to the old one
260 * (still present in CurrentToken).
261 */
262 CurrentTokStart = LastCurTokPos;
263 Out = CurrentTokStart;
264 }
265 else
266 {
267 LastCurTokPos = CurrentToken;
268 }
269 #endif
270
271 /* Start with what we have at current ParsePos */
272 OldParsePos = ParsePos;
273
274 for (Char = ParseChar(); Char && Char != _T('\n'); Char = ParseChar())
275 {
276 bInQuote ^= (Char == _T('"'));
277 if (!bInQuote)
278 {
279 if (Separators != NULL)
280 {
281 if (_istspace(Char) || !!_tcschr(Separators, Char))
282 {
283 /* Skip leading separators */
284 if (Out == CurrentTokStart)
285 continue;
286 break;
287 }
288 }
289
290 /* Check for prefix operator */
291 if ((Out == CurrentTokStart) && (Char == PrefixOperator))
292 break;
293
294 /*
295 * Check for numbered redirection.
296 *
297 * For this purpose, we check whether this is a number, that is
298 * in first position in the current parsing buffer (remember that
299 * ParsePos points to the next character) or is preceded by a
300 * whitespace-like separator, including standard command operators
301 * (excepting '@' !) and double-quotes.
302 */
303 if ( _istdigit(Char) &&
304 (ParsePos == &OldParsePos[1] ||
305 IsSeparator(ParsePos[-2]) ||
306 !!_tcschr(_T("()&|\""), ParsePos[-2])) &&
307 (*ParsePos == _T('<') || *ParsePos == _T('>')) )
308 {
309 break;
310 }
311
312 /* Check for other delimiters / operators */
313 if (Char == ExtraEnd)
314 break;
315 if (InsideBlock && Char == _T(')'))
316 break;
317 if (_tcschr(_T("&|<>"), Char))
318 break;
319
320 if (bHandleContinuations && (Char == _T('^')))
321 {
322 Char = ParseChar();
323 /* Eat up a \n, allowing line continuation */
324 if (Char == _T('\n'))
325 {
326 #ifndef MSCMD_PARSER_BUGS
327 LastCurTokPos = Out;
328 #endif
329 Char = ParseChar();
330 }
331 /* Next character is a forced literal */
332
333 if (Out == CurrentTokStart)
334 {
335 /* Ignore any prefix operator if we don't start a new command block */
336 if (CurrentTokenType != TOK_BEGIN_BLOCK)
337 PrefixOperator = 0;
338 }
339 }
340 }
341 if (Out == &CurrentToken[CMDLINE_LENGTH - 1])
342 break;
343 *Out++ = Char;
344
345 // PrefixOperator = 0;
346 }
347
348 /*
349 * We exited the parsing loop. If the current character is the first one
350 * (Out == CurrentTokStart), interpret it as an operator. Otherwise,
351 * terminate the current token (type TOK_NORMAL) and keep the current
352 * character so that it can be refetched as an operator at the next call.
353 */
354
355 if (Out != CurrentTokStart)
356 {
357 Type = TOK_NORMAL;
358 }
359 /*
360 * Else we have an operator.
361 */
362 else if (Char == _T('@'))
363 {
364 Type = TOK_OPERATOR; // TOK_QUIET / TOK_PREFIX_OPERATOR
365 *Out++ = Char;
366 Char = ParseChar();
367 }
368 else if (Char == _T('('))
369 {
370 Type = TOK_BEGIN_BLOCK;
371 *Out++ = Char;
372 Char = ParseChar();
373 }
374 else if (Char == _T(')'))
375 {
376 Type = TOK_END_BLOCK;
377 *Out++ = Char;
378 Char = ParseChar();
379 }
380 else if (Char == _T('&') || Char == _T('|'))
381 {
382 Type = TOK_OPERATOR;
383 *Out++ = Char;
384 Char = ParseChar();
385 /* Check for '&&' or '||' */
386 if (Char == Out[-1])
387 {
388 *Out++ = Char;
389 Char = ParseChar();
390 }
391 }
392 else if ( _istdigit(Char) ||
393 (Char == _T('<') || Char == _T('>')) )
394 {
395 Type = TOK_REDIRECTION;
396 if (_istdigit(Char))
397 {
398 *Out++ = Char;
399 Char = ParseChar();
400 }
401 /* By construction (see the while-loop above),
402 * the next character must be a redirection. */
403 ASSERT(Char == _T('<') || Char == _T('>'));
404 *Out++ = Char;
405 Char = ParseChar();
406 if (Char == Out[-1])
407 {
408 /* Strangely, the tokenizer allows << as well as >>... (it
409 * will cause an error when trying to parse it though) */
410 *Out++ = Char;
411 Char = ParseChar();
412 }
413 if (Char == _T('&'))
414 {
415 *Out++ = Char;
416 while (IsSeparator(Char = ParseChar()))
417 ;
418 if (_istdigit(Char))
419 {
420 *Out++ = Char;
421 Char = ParseChar();
422 }
423 }
424 }
425 else
426 {
427 Type = TOK_END;
428 *Out++ = Char;
429 }
430 *Out = _T('\0');
431
432 /*
433 * Rewind the parsing position, so that the current character can be
434 * refetched later on. However do this only if it is not NULL and if
435 * this is not TOK_END, since we do not want to reparse later the line
436 * termination (we could enter into infinite loops, or, in case of line
437 * continuation, get unwanted "More?" prompts).
438 */
439 if (Char != 0 && Type != TOK_END)
440 --ParsePos;
441
442 /* Debugging support */
443 if (fDumpTokens)
444 ConOutPrintf(_T("ParseToken: (%d) '%s'\n"), Type, CurrentToken);
445
446 return (CurrentTokenType = Type);
447 }
448
449 static __inline INT
ParseToken(IN TCHAR ExtraEnd OPTIONAL,IN PCTSTR Separators OPTIONAL)450 ParseToken(
451 IN TCHAR ExtraEnd OPTIONAL,
452 IN PCTSTR Separators OPTIONAL)
453 {
454 return ParseTokenEx(0, ExtraEnd, Separators, bHandleContinuations);
455 }
456
457
458 static PARSED_COMMAND*
AllocCommand(IN COMMAND_TYPE Type,IN PCTSTR CmdHead OPTIONAL,IN PCTSTR CmdTail OPTIONAL)459 AllocCommand(
460 IN COMMAND_TYPE Type,
461 IN PCTSTR CmdHead OPTIONAL,
462 IN PCTSTR CmdTail OPTIONAL)
463 {
464 PARSED_COMMAND* Cmd;
465
466 switch (Type)
467 {
468 case C_COMMAND:
469 case C_REM:
470 {
471 SIZE_T CmdHeadLen = _tcslen(CmdHead) + 1;
472 SIZE_T CmdTailLen = _tcslen(CmdTail) + 1;
473
474 Cmd = cmd_alloc(FIELD_OFFSET(PARSED_COMMAND,
475 Command.First[CmdHeadLen + CmdTailLen]));
476 if (!Cmd)
477 return NULL;
478
479 Cmd->Type = Type;
480 Cmd->Next = NULL;
481 Cmd->Subcommands = NULL;
482 Cmd->Redirections = NULL; /* Is assigned by the calling function */
483 memcpy(Cmd->Command.First, CmdHead, CmdHeadLen * sizeof(TCHAR));
484 Cmd->Command.Rest = Cmd->Command.First + CmdHeadLen;
485 memcpy(Cmd->Command.Rest, CmdTail, CmdTailLen * sizeof(TCHAR));
486 return Cmd;
487 }
488
489 case C_QUIET:
490 case C_BLOCK:
491 case C_MULTI:
492 case C_OR:
493 case C_AND:
494 case C_PIPE:
495 {
496 Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
497 if (!Cmd)
498 return NULL;
499
500 Cmd->Type = Type;
501 Cmd->Next = NULL;
502 Cmd->Subcommands = NULL;
503 Cmd->Redirections = NULL; /* For C_BLOCK only: is assigned by the calling function */
504 return Cmd;
505 }
506
507 case C_FOR:
508 case C_IF:
509 {
510 Cmd = cmd_alloc(sizeof(PARSED_COMMAND));
511 if (!Cmd)
512 return NULL;
513
514 memset(Cmd, 0, sizeof(PARSED_COMMAND));
515 Cmd->Type = Type;
516 return Cmd;
517 }
518
519 default:
520 ERR("Unknown command type 0x%x\n", Type);
521 ASSERT(FALSE);
522 return NULL;
523 }
524 }
525
526 VOID
FreeCommand(IN OUT PARSED_COMMAND * Cmd)527 FreeCommand(
528 IN OUT PARSED_COMMAND* Cmd)
529 {
530 if (Cmd->Subcommands)
531 FreeCommand(Cmd->Subcommands);
532 if (Cmd->Next)
533 FreeCommand(Cmd->Next);
534 FreeRedirection(Cmd->Redirections);
535 if (Cmd->Type == C_FOR)
536 {
537 cmd_free(Cmd->For.Params);
538 cmd_free(Cmd->For.List);
539 }
540 else if (Cmd->Type == C_IF)
541 {
542 cmd_free(Cmd->If.LeftArg);
543 cmd_free(Cmd->If.RightArg);
544 }
545 cmd_free(Cmd);
546 }
547
548
549 /* Parse redirections and append them to the list */
550 static BOOL
ParseRedirection(IN OUT REDIRECTION ** List)551 ParseRedirection(
552 IN OUT REDIRECTION** List)
553 {
554 PTSTR Tok = CurrentToken;
555 REDIRECTION* Redir;
556 REDIR_MODE RedirMode;
557 BYTE Number;
558
559 if ( !(*Tok == _T('<') || *Tok == _T('>')) &&
560 !(_istdigit(*Tok) &&
561 (Tok[1] == _T('<') || Tok[1] == _T('>')) ) )
562 {
563 ASSERT(CurrentTokenType != TOK_REDIRECTION);
564 return FALSE;
565 }
566 ASSERT((CurrentTokenType == TOK_REDIRECTION) ||
567 (CurrentTokenType == TOK_NORMAL));
568
569 if (_istdigit(*Tok))
570 Number = *Tok++ - _T('0');
571 else
572 Number = *Tok == _T('<') ? 0 : 1;
573
574 if (*Tok++ == _T('<'))
575 {
576 RedirMode = REDIR_READ;
577 /* Forbid '<<' */
578 if (*Tok == _T('<'))
579 goto fail;
580 }
581 else
582 {
583 RedirMode = REDIR_WRITE;
584 if (*Tok == _T('>'))
585 {
586 RedirMode = REDIR_APPEND;
587 Tok++;
588 }
589 }
590
591 if (*Tok == _T('&'))
592 {
593 /* This is a handle redirection: the next character must be one single digit */
594 if (!(_istdigit(Tok[1]) && !Tok[2]))
595 goto fail;
596 }
597 else
598 #ifndef MSCMD_REDIR_PARSE_BUGS
599 if (!*Tok)
600 /* The file name was not part of this token, so it will be the next one */
601 #else
602 /* Get rid of what possibly remains in the token, and retrieve the next one */
603 #endif
604 {
605 if (ParseToken(0, STANDARD_SEPS) != TOK_NORMAL)
606 goto fail;
607 Tok = CurrentToken;
608 }
609
610 /* If a redirection for this handle number already exists, delete it */
611 while ((Redir = *List))
612 {
613 if (Redir->Number == Number)
614 {
615 *List = Redir->Next;
616 cmd_free(Redir);
617 continue;
618 }
619 List = &Redir->Next;
620 }
621
622 Redir = cmd_alloc(FIELD_OFFSET(REDIRECTION, Filename[_tcslen(Tok) + 1]));
623 if (!Redir)
624 {
625 WARN("Cannot allocate memory for Redir!\n");
626 goto fail;
627 }
628 Redir->Next = NULL;
629 Redir->OldHandle = INVALID_HANDLE_VALUE;
630 Redir->Number = Number;
631 Redir->Mode = RedirMode;
632 _tcscpy(Redir->Filename, Tok);
633 *List = Redir;
634 return TRUE;
635
636 fail:
637 ParseError();
638 FreeRedirection(*List);
639 *List = NULL;
640 return FALSE;
641 }
642
643 static __inline PARSED_COMMAND*
644 ParseCommandOp(
645 IN COMMAND_TYPE OpType);
646
647 /* Parse a parenthesized block */
648 static PARSED_COMMAND*
ParseBlock(IN OUT REDIRECTION ** RedirList)649 ParseBlock(
650 IN OUT REDIRECTION** RedirList)
651 {
652 PARSED_COMMAND *Cmd, *Sub, **NextPtr;
653
654 Cmd = AllocCommand(C_BLOCK, NULL, NULL);
655 if (!Cmd)
656 {
657 WARN("Cannot allocate memory for Cmd!\n");
658 ParseError();
659 return NULL;
660 }
661
662 /* Read the block contents */
663 NextPtr = &Cmd->Subcommands;
664 ++InsideBlock;
665 while (TRUE)
666 {
667 /*
668 * Windows' CMD compatibility: Strip leading newlines in the block.
669 *
670 * Note that this behaviour is buggy, especially when MSCMD_PARSER_BUGS is defined!
671 * For example:
672 * (foo^\n
673 * bar)
674 * would be parsed ultimately as: '(', 'bar', ')' because the "foo^"
675 * part would be discarded due to the UnParseToken() call, since this
676 * function doesn't work across line continuations.
677 */
678 while (ParseToken(0, STANDARD_SEPS) == TOK_END && *CurrentToken == _T('\n'))
679 ;
680 if (*CurrentToken && *CurrentToken != _T('\n'))
681 UnParseToken();
682
683 /* Break early if we have nothing else to read. We will also fail
684 * due to the fact we haven't encountered any closing parenthesis. */
685 if (!*CurrentToken /* || *CurrentToken == _T('\n') */)
686 {
687 ASSERT(CurrentTokenType == TOK_END);
688 break;
689 }
690
691 /*
692 * NOTE: Windows' CMD uses a "CRLF" operator when dealing with
693 * newlines in parenthesized blocks, as an alternative to the
694 * '&' command-separation operator.
695 */
696
697 Sub = ParseCommandOp(C_OP_LOWEST);
698 if (Sub)
699 {
700 *NextPtr = Sub;
701 NextPtr = &Sub->Next;
702 }
703 else if (bParseError)
704 {
705 --InsideBlock;
706 FreeCommand(Cmd);
707 return NULL;
708 }
709
710 if (CurrentTokenType == TOK_END_BLOCK)
711 break;
712
713 /* Skip past the \n */
714 }
715 --InsideBlock;
716
717 /* Fail if the block was not terminated, or if we have
718 * an empty block, i.e. "( )", considered invalid. */
719 if ((CurrentTokenType != TOK_END_BLOCK) || (Cmd->Subcommands == NULL))
720 {
721 ParseError();
722 FreeCommand(Cmd);
723 return NULL;
724 }
725
726 /* Process any trailing redirections and append them to the list */
727 #ifndef MSCMD_REDIR_PARSE_BUGS
728 while (ParseToken(0, STANDARD_SEPS) == TOK_REDIRECTION)
729 {
730 if (!ParseRedirection(RedirList))
731 {
732 FreeCommand(Cmd);
733 return NULL;
734 }
735 }
736 #else
737 while (ParseToken(0, STANDARD_SEPS) != TOK_END)
738 {
739 if (!ParseRedirection(RedirList))
740 {
741 /* If an actual error happened in ParseRedirection(), bail out */
742 if (bParseError)
743 {
744 FreeCommand(Cmd);
745 return NULL;
746 }
747 /* Otherwise it just returned FALSE because the current token
748 * is not a redirection. Unparse the token and refetch it. */
749 break;
750 }
751 }
752 #endif
753 if (CurrentTokenType != TOK_END)
754 {
755 /*
756 * Windows' CMD compatibility: Unparse the current token.
757 *
758 * Note that this behaviour is buggy, especially when MSCMD_PARSER_BUGS is defined!
759 * For example:
760 * (foo^\n
761 * bar)
762 * would be parsed ultimately as: '(', 'bar', ')' because the "foo^"
763 * part would be discarded due to the UnParseToken() call, since this
764 * function doesn't work across line continuations.
765 */
766 UnParseToken();
767
768 /*
769 * Since it is expected that when ParseBlock() returns, the next
770 * token is already fetched, call ParseToken() again to compensate.
771 */
772 ParseToken(0, STANDARD_SEPS);
773 }
774
775 return Cmd;
776 }
777
778 /* Parse an IF statement */
779 static PARSED_COMMAND*
ParseIf(VOID)780 ParseIf(VOID)
781 {
782 PARSED_COMMAND* Cmd;
783
784 Cmd = AllocCommand(C_IF, NULL, NULL);
785 if (!Cmd)
786 {
787 WARN("Cannot allocate memory for Cmd!\n");
788 ParseError();
789 return NULL;
790 }
791
792 if (bEnableExtensions && (_tcsicmp(CurrentToken, _T("/I")) == 0))
793 {
794 Cmd->If.Flags |= IFFLAG_IGNORECASE;
795 ParseToken(0, STANDARD_SEPS);
796 }
797 if (_tcsicmp(CurrentToken, _T("not")) == 0)
798 {
799 Cmd->If.Flags |= IFFLAG_NEGATE;
800 ParseToken(0, STANDARD_SEPS);
801 }
802
803 if (CurrentTokenType != TOK_NORMAL)
804 goto error;
805
806 /* Check for unary operators */
807 for (; Cmd->If.Operator <= IF_MAX_UNARY; Cmd->If.Operator++)
808 {
809 /* Skip the extended operators if the extensions are disabled */
810 if (!bEnableExtensions && (Cmd->If.Operator >= IF_CMDEXTVERSION))
811 continue;
812
813 if (_tcsicmp(CurrentToken, IfOperatorString[Cmd->If.Operator]) == 0)
814 {
815 if (ParseToken(0, STANDARD_SEPS) != TOK_NORMAL)
816 goto error;
817 Cmd->If.RightArg = cmd_dup(CurrentToken);
818 goto condition_done;
819 }
820 }
821
822 /* It must be a two-argument (comparison) operator. It could be ==, so
823 * the equals sign can't be treated as whitespace here. */
824 Cmd->If.LeftArg = cmd_dup(CurrentToken);
825 ParseToken(0, _T(",;"));
826
827 /* The right argument can come immediately after == */
828 if (_tcsnicmp(CurrentToken, _T("=="), 2) == 0 && CurrentToken[2])
829 {
830 Cmd->If.RightArg = cmd_dup(&CurrentToken[2]);
831 goto condition_done;
832 }
833
834 // Cmd->If.Operator == IF_MAX_UNARY + 1;
835 for (; Cmd->If.Operator <= IF_MAX_COMPARISON; Cmd->If.Operator++)
836 {
837 /* Skip the extended operators if the extensions are disabled */
838 if (!bEnableExtensions && (Cmd->If.Operator >= IF_EQU)) // (Cmd->If.Operator > IF_STRINGEQ)
839 continue;
840
841 if (_tcsicmp(CurrentToken, IfOperatorString[Cmd->If.Operator]) == 0)
842 {
843 if (ParseToken(0, STANDARD_SEPS) != TOK_NORMAL)
844 goto error;
845 Cmd->If.RightArg = cmd_dup(CurrentToken);
846 goto condition_done;
847 }
848 }
849 goto error;
850
851 condition_done:
852 Cmd->Subcommands = ParseCommandOp(C_OP_LOWEST);
853 if (Cmd->Subcommands == NULL)
854 goto error;
855 if (_tcsicmp(CurrentToken, _T("else")) == 0)
856 {
857 Cmd->Subcommands->Next = ParseCommandOp(C_OP_LOWEST);
858 if (Cmd->Subcommands->Next == NULL)
859 goto error;
860 }
861
862 return Cmd;
863
864 error:
865 FreeCommand(Cmd);
866 ParseError();
867 return NULL;
868 }
869
870 /*
871 * Parse a FOR command.
872 * Syntax is: FOR [options] %var IN (list) DO command
873 */
874 static PARSED_COMMAND*
ParseFor(VOID)875 ParseFor(VOID)
876 {
877 PARSED_COMMAND* Cmd;
878
879 /* Use the scratch buffer */
880 PTSTR List = TempBuf;
881 PTCHAR Pos = List;
882
883 Cmd = AllocCommand(C_FOR, NULL, NULL);
884 if (!Cmd)
885 {
886 WARN("Cannot allocate memory for Cmd!\n");
887 ParseError();
888 return NULL;
889 }
890
891 /* Skip the extended FOR syntax if extensions are disabled */
892 if (!bEnableExtensions)
893 goto parseForBody;
894
895 while (TRUE)
896 {
897 if (_tcsicmp(CurrentToken, _T("/D")) == 0)
898 {
899 Cmd->For.Switches |= FOR_DIRS;
900 }
901 else if (_tcsicmp(CurrentToken, _T("/F")) == 0)
902 {
903 Cmd->For.Switches |= FOR_F;
904 if (!Cmd->For.Params)
905 {
906 ParseToken(0, STANDARD_SEPS);
907 if (CurrentToken[0] == _T('/') || CurrentToken[0] == _T('%'))
908 break;
909 Cmd->For.Params = cmd_dup(CurrentToken);
910 }
911 }
912 else if (_tcsicmp(CurrentToken, _T("/L")) == 0)
913 {
914 Cmd->For.Switches |= FOR_LOOP;
915 }
916 else if (_tcsicmp(CurrentToken, _T("/R")) == 0)
917 {
918 Cmd->For.Switches |= FOR_RECURSIVE;
919 if (!Cmd->For.Params)
920 {
921 ParseToken(0, STANDARD_SEPS);
922 if (CurrentToken[0] == _T('/') || CurrentToken[0] == _T('%'))
923 break;
924 StripQuotes(CurrentToken);
925 Cmd->For.Params = cmd_dup(CurrentToken);
926 }
927 }
928 else
929 {
930 break;
931 }
932
933 ParseToken(0, STANDARD_SEPS);
934 }
935
936 /* Make sure there aren't two different switches specified
937 * at the same time, unless they're /D and /R */
938 if ((Cmd->For.Switches & (Cmd->For.Switches - 1)) != 0
939 && Cmd->For.Switches != (FOR_DIRS | FOR_RECURSIVE))
940 {
941 goto error;
942 }
943
944 parseForBody:
945
946 /* Variable name should be % and just one other character */
947 if (CurrentToken[0] != _T('%') || _tcslen(CurrentToken) != 2)
948 goto error;
949 Cmd->For.Variable = CurrentToken[1];
950
951 ParseToken(0, STANDARD_SEPS);
952 if (_tcsicmp(CurrentToken, _T("in")) != 0)
953 goto error;
954
955 if (ParseToken(_T('('), STANDARD_SEPS) != TOK_BEGIN_BLOCK)
956 goto error;
957
958 while (TRUE)
959 {
960 /* Pretend we're inside a block so the tokenizer will stop on ')' */
961 ++InsideBlock;
962 ParseToken(0, STANDARD_SEPS);
963 --InsideBlock;
964
965 if (CurrentTokenType == TOK_END_BLOCK)
966 break;
967
968 /* Skip past the \n */
969 if ((CurrentTokenType == TOK_END) && *CurrentToken == _T('\n'))
970 continue;
971
972 if (CurrentTokenType != TOK_NORMAL)
973 goto error;
974
975 if (Pos != List)
976 *Pos++ = _T(' ');
977
978 if (Pos + _tcslen(CurrentToken) >= &List[CMDLINE_LENGTH])
979 goto error;
980 Pos = _stpcpy(Pos, CurrentToken);
981 }
982 *Pos = _T('\0');
983 Cmd->For.List = cmd_dup(List);
984
985 ParseToken(0, STANDARD_SEPS);
986 if (_tcsicmp(CurrentToken, _T("do")) != 0)
987 goto error;
988
989 Cmd->Subcommands = ParseCommandOp(C_OP_LOWEST);
990 if (Cmd->Subcommands == NULL)
991 goto error;
992
993 return Cmd;
994
995 error:
996 FreeCommand(Cmd);
997 ParseError();
998 return NULL;
999 }
1000
1001 /* Parse a REM command */
1002 static PARSED_COMMAND*
ParseRem(VOID)1003 ParseRem(VOID)
1004 {
1005 PARSED_COMMAND* Cmd;
1006
1007 /* The common scratch buffer already contains the name of the command */
1008 PTSTR ParsedLine = TempBuf;
1009
1010 PTCHAR Pos = ParsedLine + _tcslen(ParsedLine) + 1;
1011 SIZE_T TailOffset = Pos - ParsedLine;
1012
1013 /* Build a minimal command for REM, so that it can still get through the batch echo unparsing */
1014
1015 /* Unparse the current token, so as to emulate the REM command parsing
1016 * behaviour of Windows' CMD, that discards everything before the last
1017 * line continuation. */
1018 UnParseToken();
1019
1020 /*
1021 * Ignore the rest of the line, without any line continuation (but eat the caret).
1022 * We cannot simply set bLineContinuations to TRUE or FALSE, because we want (only
1023 * for the REM command), even when bLineContinuations == FALSE, to get the caret,
1024 * otherwise it would be ignored.
1025 */
1026 while (ParseTokenEx(0, 0, NULL, FALSE) != TOK_END)
1027 {
1028 if (Pos + _tcslen(CurrentToken) >= &ParsedLine[CMDLINE_LENGTH])
1029 {
1030 ParseError();
1031 return NULL;
1032 }
1033 Pos = _stpcpy(Pos, CurrentToken);
1034 }
1035 *Pos = _T('\0');
1036
1037 Cmd = AllocCommand(C_REM,
1038 ParsedLine,
1039 ParsedLine + TailOffset);
1040 if (!Cmd)
1041 {
1042 WARN("Cannot allocate memory for Cmd!\n");
1043 ParseError();
1044 return NULL;
1045 }
1046 return Cmd;
1047 }
1048
1049 /* Parse a command */
1050 static PARSED_COMMAND*
ParseCommandPart(IN OUT REDIRECTION ** RedirList)1051 ParseCommandPart(
1052 IN OUT REDIRECTION** RedirList)
1053 {
1054 PARSED_COMMAND* Cmd;
1055 PARSED_COMMAND* (*Func)(VOID);
1056
1057 /* Use the scratch buffer */
1058 PTSTR ParsedLine = TempBuf;
1059
1060 /* We need to copy the current token because it's going to be changed below by the ParseToken() calls */
1061 PTCHAR Pos = _stpcpy(ParsedLine, CurrentToken) + 1;
1062 SIZE_T TailOffset = Pos - ParsedLine;
1063
1064 /* Check for special forms */
1065 if ((Func = ParseFor, _tcsicmp(ParsedLine, _T("FOR")) == 0) ||
1066 (Func = ParseIf, _tcsicmp(ParsedLine, _T("IF")) == 0) ||
1067 (Func = ParseRem, _tcsicmp(ParsedLine, _T("REM")) == 0))
1068 {
1069 PTCHAR pHelp;
1070
1071 ParseToken(0, STANDARD_SEPS);
1072
1073 if ((pHelp = _tcsstr(CurrentToken, _T("/?"))) &&
1074 (Func == ParseIf ? (pHelp[2] == _T('/') || pHelp[2] == 0) : TRUE))
1075 {
1076 /* /? was found within the first token */
1077 ParseToken(0, STANDARD_SEPS);
1078 }
1079 else
1080 {
1081 pHelp = NULL;
1082 }
1083 if (pHelp && (CurrentTokenType == TOK_NORMAL))
1084 {
1085 /* We encountered /? first, but is followed
1086 * by another token: that's an error. */
1087 ParseError();
1088 return NULL;
1089 }
1090
1091 /* Do actual parsing only if no help is present */
1092 if (!pHelp)
1093 {
1094 /* FOR and IF commands cannot have leading redirection, but REM can */
1095 if (*RedirList && ((Func == ParseFor) || (Func == ParseIf)))
1096 {
1097 /* Display the culprit command and fail */
1098 ParseErrorEx(ParsedLine);
1099 return NULL;
1100 }
1101
1102 return Func();
1103 }
1104
1105 /* Otherwise, run FOR,IF,REM as regular commands only for help support */
1106 if (Pos + _tcslen(_T("/?")) >= &ParsedLine[CMDLINE_LENGTH])
1107 {
1108 ParseError();
1109 return NULL;
1110 }
1111 Pos = _stpcpy(Pos, _T("/?"));
1112 }
1113 else
1114 {
1115 ParseToken(0, NULL);
1116 }
1117
1118 /* Now get the tail */
1119 while (CurrentTokenType != TOK_END)
1120 {
1121 if (CurrentTokenType == TOK_NORMAL)
1122 {
1123 if (Pos + _tcslen(CurrentToken) >= &ParsedLine[CMDLINE_LENGTH])
1124 {
1125 ParseError();
1126 return NULL;
1127 }
1128 Pos = _stpcpy(Pos, CurrentToken);
1129 }
1130 #ifndef MSCMD_REDIR_PARSE_BUGS
1131 else if (CurrentTokenType == TOK_REDIRECTION)
1132 {
1133 /* Process any trailing redirections and append them to the list */
1134 while (CurrentTokenType == TOK_REDIRECTION)
1135 {
1136 if (!ParseRedirection(RedirList))
1137 return NULL;
1138
1139 ParseToken(0, STANDARD_SEPS);
1140 }
1141 if (CurrentTokenType == TOK_END)
1142 break;
1143
1144 /* Unparse the current token, and reparse it below with no separators */
1145 UnParseToken();
1146 }
1147 else
1148 {
1149 /* There is no need to do a UnParseToken() / ParseToken() cycle */
1150 break;
1151 }
1152 #else
1153 else
1154 {
1155 /* Process any trailing redirections and append them to the list */
1156 BOOL bSuccess = FALSE;
1157
1158 ASSERT(CurrentTokenType != TOK_END);
1159
1160 while (CurrentTokenType != TOK_END)
1161 {
1162 if (!ParseRedirection(RedirList))
1163 {
1164 /* If an actual error happened in ParseRedirection(), bail out */
1165 if (bParseError)
1166 return NULL;
1167
1168 /* Otherwise it just returned FALSE because the current token
1169 * is not a redirection. Unparse the token and refetch it. */
1170 break;
1171 }
1172 bSuccess = TRUE;
1173
1174 ParseToken(0, STANDARD_SEPS);
1175 }
1176 if (CurrentTokenType == TOK_END)
1177 break;
1178
1179 /* Unparse the current token, and reparse it below with no separators */
1180 UnParseToken();
1181
1182 /* If bSuccess == FALSE, we know that it's still the old fetched token, but
1183 * it has been unparsed, so we need to refetch it before quitting the loop. */
1184 if (!bSuccess)
1185 {
1186 ParseToken(0, NULL);
1187 break;
1188 }
1189 }
1190 #endif
1191
1192 ParseToken(0, NULL);
1193 }
1194 *Pos = _T('\0');
1195
1196 Cmd = AllocCommand(C_COMMAND,
1197 ParsedLine,
1198 ParsedLine + TailOffset);
1199 if (!Cmd)
1200 {
1201 WARN("Cannot allocate memory for Cmd!\n");
1202 ParseError();
1203 return NULL;
1204 }
1205 return Cmd;
1206 }
1207
1208 static PARSED_COMMAND*
ParsePrimary(VOID)1209 ParsePrimary(VOID)
1210 {
1211 PARSED_COMMAND* Cmd = NULL;
1212 REDIRECTION* RedirList = NULL;
1213
1214 /* In this context, '@' is considered as a separate token */
1215 if ((*CurrentToken == _T('@')) && (CurrentTokenType == TOK_OPERATOR))
1216 {
1217 Cmd = AllocCommand(C_QUIET, NULL, NULL);
1218 if (!Cmd)
1219 {
1220 WARN("Cannot allocate memory for Cmd!\n");
1221 ParseError();
1222 return NULL;
1223 }
1224 /* @ acts like a unary operator with low precedence,
1225 * so call the top-level parser */
1226 Cmd->Subcommands = ParseCommandOp(C_OP_LOWEST);
1227 return Cmd;
1228 }
1229
1230 /* Process leading redirections and get the head of the command */
1231 #ifndef MSCMD_REDIR_PARSE_BUGS
1232 while (CurrentTokenType == TOK_REDIRECTION)
1233 {
1234 if (!ParseRedirection(&RedirList))
1235 return NULL;
1236
1237 ParseToken(_T('('), STANDARD_SEPS);
1238 }
1239 #else
1240 {
1241 BOOL bSuccess = FALSE;
1242 while (CurrentTokenType != TOK_END)
1243 {
1244 if (!ParseRedirection(&RedirList))
1245 {
1246 /* If an actual error happened in ParseRedirection(), bail out */
1247 if (bParseError)
1248 return NULL;
1249
1250 /* Otherwise it just returned FALSE because
1251 * the current token is not a redirection. */
1252 break;
1253 }
1254 bSuccess = TRUE;
1255
1256 ParseToken(0, STANDARD_SEPS);
1257 }
1258 if (bSuccess)
1259 {
1260 /* Unparse the current token, and reparse it with support for parenthesis */
1261 if (CurrentTokenType != TOK_END)
1262 UnParseToken();
1263
1264 ParseToken(_T('('), STANDARD_SEPS);
1265 }
1266 }
1267 #endif
1268
1269 if (CurrentTokenType == TOK_NORMAL)
1270 Cmd = ParseCommandPart(&RedirList);
1271 else if (CurrentTokenType == TOK_BEGIN_BLOCK)
1272 Cmd = ParseBlock(&RedirList);
1273 else if (CurrentTokenType == TOK_END_BLOCK && !RedirList)
1274 return NULL;
1275
1276 if (Cmd)
1277 {
1278 /* FOR and IF commands cannot have leading redirection
1279 * (checked by ParseCommandPart(), errors out if so). */
1280 ASSERT(!RedirList || (Cmd->Type != C_FOR && Cmd->Type != C_IF));
1281
1282 /* Save the redirection list in the command */
1283 Cmd->Redirections = RedirList;
1284
1285 /* Return the new command */
1286 return Cmd;
1287 }
1288
1289 ParseError();
1290 FreeRedirection(RedirList);
1291 return NULL;
1292 }
1293
1294 static PARSED_COMMAND*
ParseCommandBinaryOp(IN COMMAND_TYPE OpType)1295 ParseCommandBinaryOp(
1296 IN COMMAND_TYPE OpType)
1297 {
1298 PARSED_COMMAND* Cmd;
1299
1300 if (OpType == C_OP_LOWEST) // i.e. CP_MULTI
1301 {
1302 /* Ignore any parser-level comments */
1303 if (bIgnoreParserComments && (*CurrentToken == _T(':')))
1304 {
1305 /* Ignore the rest of the line, including line continuations */
1306 while (ParseToken(0, NULL) != TOK_END)
1307 ;
1308 #ifdef MSCMD_PARENS_PARSE_BUGS
1309 /*
1310 * Return NULL in case we are NOT inside a parenthesized block,
1311 * otherwise continue. The effects can be observed as follows:
1312 * within a parenthesized block, every second ':'-prefixed command
1313 * is not ignored, while the first of each "pair" is ignored.
1314 * This first command **MUST NOT** be followed by an empty line,
1315 * otherwise a syntax error is raised.
1316 */
1317 if (InsideBlock == 0)
1318 {
1319 #endif
1320 return NULL;
1321 #ifdef MSCMD_PARENS_PARSE_BUGS
1322 }
1323 /* Get the next token */
1324 ParseToken(0, NULL);
1325 #endif
1326 }
1327
1328 /*
1329 * Ignore single closing parenthesis outside of command blocks,
1330 * thus interpreted as a command. This very specific situation
1331 * can happen e.g. while running in batch mode, when jumping to
1332 * a label present inside a command block.
1333 *
1334 * NOTE: If necessary, this condition can be restricted to only
1335 * when a batch context 'bc' is active.
1336 *
1337 * NOTE 2: For further security, Windows checks that we are NOT
1338 * currently inside a parenthesized block, and also, ignores
1339 * explicitly everything (ParseToken() loop) on the same line
1340 * (including line continuations) after this closing parenthesis.
1341 *
1342 * Why doing so? Consider the following batch:
1343 *
1344 * IF 1==1 (
1345 * :label
1346 * echo A
1347 * ) ^
1348 * ELSE (
1349 * echo B
1350 * exit /b
1351 * )
1352 * GOTO :label
1353 *
1354 * First the IF block is executed. Since the condition is trivially
1355 * true, only the first block "echo A" is executed, then execution
1356 * goes after the IF block, that is, at the GOTO. Here, the GOTO
1357 * jumps within the first IF-block, however, the running context now
1358 * is NOT an IF. So parsing and execution will go through each command,
1359 * starting with 'echo A'. But then one gets the ') ^\n ELSE (' part !!
1360 * If we want to make sense of this without bailing out due to
1361 * parsing error, we should ignore this line, **including** the line
1362 * continuation. Hence we need to loop over all the tokens following
1363 * the closing parenthesis, instead of just returning NULL straight ahead.
1364 * Then execution continues with the other commands, 'echo B' and
1365 * 'exit /b' (here to stop the code loop). Execution would also
1366 * continue (if 'exit' was replaced by something else) and encounter
1367 * the lone closing parenthesis ')', that should again be ignored.
1368 *
1369 * Note that this feature has been introduced in Win2k+.
1370 */
1371 if (/** bc && **/ (_tcscmp(CurrentToken, _T(")")) == 0) &&
1372 (CurrentTokenType != TOK_END_BLOCK))
1373 {
1374 ASSERT(InsideBlock == 0);
1375
1376 /* Ignore the rest of the line, including line continuations */
1377 while (ParseToken(0, NULL) != TOK_END)
1378 ;
1379 return NULL;
1380 }
1381
1382 #ifdef MSCMD_PARENS_PARSE_BUGS
1383 /* Check whether we have an empty line only if we are not inside
1384 * a parenthesized block, and return NULL if so, otherwise do not
1385 * do anything; a syntax error will be raised later. */
1386 if (InsideBlock == 0)
1387 #endif
1388 if (!*CurrentToken || *CurrentToken == _T('\n'))
1389 {
1390 ASSERT(CurrentTokenType == TOK_END);
1391 return NULL;
1392 }
1393 }
1394
1395 if (OpType == C_OP_HIGHEST)
1396 Cmd = ParsePrimary();
1397 else
1398 Cmd = ParseCommandBinaryOp(OpType + 1);
1399
1400 if (Cmd && !_tcscmp(CurrentToken, OpString[OpType - C_OP_LOWEST]))
1401 {
1402 PARSED_COMMAND* Left = Cmd;
1403 PARSED_COMMAND* Right;
1404
1405 Right = ParseCommandOp(OpType);
1406 if (!Right)
1407 {
1408 /*
1409 * The '&' operator is allowed to have an empty RHS.
1410 * In this case, we directly return the LHS only.
1411 * Note that Windows' CMD prefers building a '&'
1412 * command with an empty RHS.
1413 */
1414 if (!bParseError && (OpType != C_MULTI))
1415 ParseError();
1416 if (bParseError)
1417 {
1418 FreeCommand(Left);
1419 return NULL;
1420 }
1421
1422 #ifndef MSCMD_MULTI_EMPTY_RHS
1423 return Left;
1424 #endif
1425 }
1426
1427 Cmd = AllocCommand(OpType, NULL, NULL);
1428 if (!Cmd)
1429 {
1430 WARN("Cannot allocate memory for Cmd!\n");
1431 ParseError();
1432 FreeCommand(Left);
1433 FreeCommand(Right);
1434 return NULL;
1435 }
1436 Cmd->Subcommands = Left;
1437 Left->Next = Right;
1438 #ifdef MSCMD_MULTI_EMPTY_RHS
1439 if (Right)
1440 #endif
1441 Right->Next = NULL;
1442 }
1443
1444 return Cmd;
1445 }
1446 static __inline PARSED_COMMAND*
ParseCommandOp(IN COMMAND_TYPE OpType)1447 ParseCommandOp(
1448 IN COMMAND_TYPE OpType)
1449 {
1450 /* Start parsing: initialize the first token */
1451
1452 /* Parse the prefix "quiet" operator '@' as a separate command.
1453 * Thus, @@foo@bar is parsed as: '@', '@', 'foo@bar'. */
1454 ParseTokenEx(_T('@'), _T('('), STANDARD_SEPS, bHandleContinuations);
1455
1456 return ParseCommandBinaryOp(OpType);
1457 }
1458
1459
1460 PARSED_COMMAND*
ParseCommand(IN PCTSTR Line)1461 ParseCommand(
1462 IN PCTSTR Line)
1463 {
1464 PARSED_COMMAND* Cmd;
1465
1466 if (Line)
1467 {
1468 if (!SubstituteVars(Line, ParseLine, _T('%')))
1469 return NULL;
1470 bLineContinuations = FALSE;
1471 }
1472 else
1473 {
1474 if (!ReadLine(ParseLine, FALSE))
1475 return NULL;
1476 bLineContinuations = TRUE;
1477 }
1478
1479 InitParser();
1480
1481 Cmd = ParseCommandOp(C_OP_LOWEST);
1482 if (Cmd)
1483 {
1484 bIgnoreEcho = FALSE;
1485
1486 if ((CurrentTokenType != TOK_END) &&
1487 (_tcscmp(CurrentToken, _T("\n")) != 0))
1488 {
1489 ParseError();
1490 }
1491 if (bParseError)
1492 {
1493 FreeCommand(Cmd);
1494 return NULL;
1495 }
1496
1497 /* Debugging support */
1498 if (fDumpParse)
1499 DumpCommand(Cmd, 0);
1500 }
1501 else
1502 {
1503 bIgnoreEcho = TRUE;
1504 }
1505 return Cmd;
1506 }
1507
1508
1509 /*
1510 * This function is similar to EchoCommand(), but is used
1511 * for dumping the command tree for debugging purposes.
1512 */
1513 static VOID
DumpRedir(IN REDIRECTION * Redirections)1514 DumpRedir(
1515 IN REDIRECTION* Redirections)
1516 {
1517 REDIRECTION* Redir;
1518
1519 if (Redirections)
1520 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1521 ConOutPuts(_T(" Redir: "));
1522 #else
1523 ConOutPuts(_T("Redir: "));
1524 #endif
1525 for (Redir = Redirections; Redir; Redir = Redir->Next)
1526 {
1527 ConOutPrintf(_T(" %x %s%s"), Redir->Number,
1528 RedirString[Redir->Mode], Redir->Filename);
1529 }
1530 }
1531
1532 VOID
DumpCommand(IN PARSED_COMMAND * Cmd,IN ULONG SpacePad)1533 DumpCommand(
1534 IN PARSED_COMMAND* Cmd,
1535 IN ULONG SpacePad)
1536 {
1537 /*
1538 * This macro is like DumpCommand(Cmd, Pad);
1539 * but avoids an extra recursive level.
1540 * Note that it can be used ONLY for terminating commands!
1541 */
1542 #define DUMP(Command, Pad) \
1543 do { \
1544 Cmd = (Command); \
1545 SpacePad = (Pad); \
1546 goto dump; \
1547 } while (0)
1548
1549 PARSED_COMMAND* Sub;
1550
1551 dump:
1552 if (!Cmd)
1553 return;
1554
1555 /* Space padding */
1556 ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1557
1558 switch (Cmd->Type)
1559 {
1560 case C_COMMAND:
1561 case C_REM:
1562 {
1563 /* Generic command name, and Type */
1564 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1565 ConOutPrintf(_T("Cmd: %s Type: %x"),
1566 Cmd->Command.First, Cmd->Type);
1567 #else
1568 ConOutPrintf(_T("Cmd: %s Type: %x "),
1569 Cmd->Command.First, Cmd->Type);
1570 #endif
1571 /* Arguments */
1572 if (Cmd->Command.Rest && *(Cmd->Command.Rest))
1573 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1574 ConOutPrintf(_T(" Args: `%s'"), Cmd->Command.Rest);
1575 #else
1576 ConOutPrintf(_T("Args: `%s' "), Cmd->Command.Rest);
1577 #endif
1578 /* Redirections */
1579 DumpRedir(Cmd->Redirections);
1580
1581 ConOutChar(_T('\n'));
1582 return;
1583 }
1584
1585 case C_QUIET:
1586 {
1587 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1588 ConOutChar(_T('@'));
1589 #else
1590 ConOutPuts(_T("@ "));
1591 #endif
1592 DumpRedir(Cmd->Redirections); // FIXME: Can we have leading redirections??
1593 ConOutChar(_T('\n'));
1594
1595 /*DumpCommand*/DUMP(Cmd->Subcommands, SpacePad + 2);
1596 return;
1597 }
1598
1599 case C_BLOCK:
1600 {
1601 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1602 ConOutChar(_T('('));
1603 #else
1604 ConOutPuts(_T("( "));
1605 #endif
1606 DumpRedir(Cmd->Redirections);
1607 ConOutChar(_T('\n'));
1608
1609 SpacePad += 2;
1610
1611 for (Sub = Cmd->Subcommands; Sub; Sub = Sub->Next)
1612 {
1613 #if defined(MSCMD_ECHO_COMMAND_COMPAT) && defined(MSCMD_PARSER_BUGS)
1614 /*
1615 * We will emulate Windows' CMD handling of "CRLF" and "&" multi-command
1616 * enumeration within parenthesized command blocks.
1617 */
1618
1619 if (!Sub->Next)
1620 {
1621 DumpCommand(Sub, SpacePad);
1622 continue;
1623 }
1624
1625 if (Sub->Type != C_MULTI)
1626 {
1627 ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1628 ConOutPuts(_T("CRLF \n"));
1629 DumpCommand(Sub, SpacePad);
1630 continue;
1631 }
1632
1633 /* Now, Sub->Type == C_MULTI */
1634
1635 Cmd = Sub;
1636
1637 ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1638 ConOutPrintf(_T("%s \n"), OpString[Cmd->Type - C_OP_LOWEST]);
1639 // FIXME: Can we have redirections on these operator-type commands?
1640
1641 SpacePad += 2;
1642
1643 Cmd = Cmd->Subcommands;
1644 DumpCommand(Cmd, SpacePad);
1645 ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1646 ConOutPuts(_T("CRLF \n"));
1647 DumpCommand(Cmd->Next, SpacePad);
1648
1649 // NOTE: Next commands will remain indented.
1650
1651 #else
1652
1653 /*
1654 * If this command is followed by another one, first display "CRLF".
1655 * This also emulates the CRLF placement "bug" of Windows' CMD
1656 * for the last two commands.
1657 */
1658 if (Sub->Next)
1659 {
1660 ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1661 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1662 ConOutPuts(_T("CRLF\n"));
1663 #else
1664 ConOutPuts(_T("CRLF \n"));
1665 #endif
1666 }
1667 DumpCommand(Sub, SpacePad);
1668
1669 #endif // defined(MSCMD_ECHO_COMMAND_COMPAT) && defined(MSCMD_PARSER_BUGS)
1670 }
1671
1672 return;
1673 }
1674
1675 case C_MULTI:
1676 case C_OR:
1677 case C_AND:
1678 case C_PIPE:
1679 {
1680 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1681 ConOutPrintf(_T("%s\n"), OpString[Cmd->Type - C_OP_LOWEST]);
1682 #else
1683 ConOutPrintf(_T("%s \n"), OpString[Cmd->Type - C_OP_LOWEST]);
1684 #endif
1685 // FIXME: Can we have redirections on these operator-type commands?
1686
1687 SpacePad += 2;
1688
1689 Sub = Cmd->Subcommands;
1690 DumpCommand(Sub, SpacePad);
1691 /*DumpCommand*/DUMP(Sub->Next, SpacePad);
1692 return;
1693 }
1694
1695 case C_FOR:
1696 {
1697 ConOutPuts(_T("for"));
1698 /* NOTE: FOR cannot have leading redirections */
1699
1700 if (Cmd->For.Switches & FOR_DIRS) ConOutPuts(_T(" /D"));
1701 if (Cmd->For.Switches & FOR_F) ConOutPuts(_T(" /F"));
1702 if (Cmd->For.Switches & FOR_LOOP) ConOutPuts(_T(" /L"));
1703 if (Cmd->For.Switches & FOR_RECURSIVE) ConOutPuts(_T(" /R"));
1704 if (Cmd->For.Params)
1705 ConOutPrintf(_T(" %s"), Cmd->For.Params);
1706 ConOutPrintf(_T(" %%%c in (%s) do\n"), Cmd->For.Variable, Cmd->For.List);
1707 /*DumpCommand*/DUMP(Cmd->Subcommands, SpacePad + 2);
1708 return;
1709 }
1710
1711 case C_IF:
1712 {
1713 ConOutPuts(_T("if"));
1714 /* NOTE: IF cannot have leading redirections */
1715
1716 if (Cmd->If.Flags & IFFLAG_IGNORECASE)
1717 ConOutPuts(_T(" /I"));
1718
1719 ConOutChar(_T('\n'));
1720
1721 SpacePad += 2;
1722
1723 /*
1724 * Show the IF command condition as a command.
1725 * If it is negated, indent the command more.
1726 */
1727 if (Cmd->If.Flags & IFFLAG_NEGATE)
1728 {
1729 ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1730 ConOutPuts(_T("not\n"));
1731 SpacePad += 2;
1732 }
1733
1734 ConOutPrintf(_T("%*s"), SpacePad, _T(""));
1735
1736 /*
1737 * Command name:
1738 * - Unary operator: its name is the command name, and its argument is the command argument.
1739 * - Binary operator: its LHS is the command name, its RHS is the command argument.
1740 *
1741 * Type:
1742 * Windows' CMD (Win2k3 / Win7-10) values are as follows:
1743 * CMDEXTVERSION Type: 0x32 / 0x34
1744 * ERRORLEVEL Type: 0x33 / 0x35
1745 * DEFINED Type: 0x34 / 0x36
1746 * EXIST Type: 0x35 / 0x37
1747 * == Type: 0x37 / 0x39 (String Comparison)
1748 *
1749 * For the following command:
1750 * NOT Type: 0x36 / 0x38
1751 * Windows only prints it without any type / redirection.
1752 *
1753 * For the following command:
1754 * EQU, NEQ, etc. Type: 0x38 / 0x3a (Generic Comparison)
1755 * Windows displays it as command of unknown type.
1756 */
1757 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1758 ConOutPrintf(_T("Cmd: %s Type: %x"),
1759 (Cmd->If.Operator <= IF_MAX_UNARY) ?
1760 IfOperatorString[Cmd->If.Operator] :
1761 Cmd->If.LeftArg,
1762 Cmd->If.Operator);
1763 #else
1764 ConOutPrintf(_T("Cmd: %s Type: %x "),
1765 (Cmd->If.Operator <= IF_MAX_UNARY) ?
1766 IfOperatorString[Cmd->If.Operator] :
1767 Cmd->If.LeftArg,
1768 Cmd->If.Operator);
1769 #endif
1770 /* Arguments */
1771 #ifndef MSCMD_ECHO_COMMAND_COMPAT
1772 ConOutPrintf(_T(" Args: `%s'"), Cmd->If.RightArg);
1773 #else
1774 ConOutPrintf(_T("Args: `%s' "), Cmd->If.RightArg);
1775 #endif
1776
1777 ConOutChar(_T('\n'));
1778
1779 if (Cmd->If.Flags & IFFLAG_NEGATE)
1780 {
1781 SpacePad -= 2;
1782 }
1783
1784 Sub = Cmd->Subcommands;
1785 DumpCommand(Sub, SpacePad);
1786 if (Sub->Next)
1787 {
1788 ConOutPrintf(_T("%*s"), SpacePad - 2, _T(""));
1789 ConOutPuts(_T("else\n"));
1790 DumpCommand(Sub->Next, SpacePad);
1791 }
1792 return;
1793 }
1794
1795 default:
1796 ConOutPrintf(_T("*** Unknown type: %x\n"), Cmd->Type);
1797 break;
1798 }
1799
1800 #undef DUMP
1801 }
1802
1803 /*
1804 * Reconstruct a parse tree into text form; used for echoing
1805 * batch file commands and FOR instances.
1806 */
1807 VOID
EchoCommand(IN PARSED_COMMAND * Cmd)1808 EchoCommand(
1809 IN PARSED_COMMAND* Cmd)
1810 {
1811 PARSED_COMMAND* Sub;
1812 REDIRECTION* Redir;
1813
1814 if (!Cmd)
1815 return;
1816
1817 switch (Cmd->Type)
1818 {
1819 case C_COMMAND:
1820 case C_REM:
1821 {
1822 if (SubstituteForVars(Cmd->Command.First, TempBuf))
1823 ConOutPrintf(_T("%s"), TempBuf);
1824 if (SubstituteForVars(Cmd->Command.Rest, TempBuf))
1825 {
1826 ConOutPrintf(_T("%s"), TempBuf);
1827 #ifdef MSCMD_ECHO_COMMAND_COMPAT
1828 /* NOTE: For Windows compatibility, add a trailing space after printing the command parameter, if present */
1829 if (*TempBuf) ConOutChar(_T(' '));
1830 #endif
1831 }
1832 break;
1833 }
1834
1835 case C_QUIET:
1836 return;
1837
1838 case C_BLOCK:
1839 {
1840 BOOLEAN bIsFirstCmdCRLF;
1841
1842 ConOutChar(_T('('));
1843
1844 Sub = Cmd->Subcommands;
1845
1846 bIsFirstCmdCRLF = (Sub && Sub->Next);
1847
1848 #if defined(MSCMD_ECHO_COMMAND_COMPAT) && defined(MSCMD_PARSER_BUGS)
1849 /*
1850 * We will emulate Windows' CMD handling of "CRLF" and "&" multi-command
1851 * enumeration within parenthesized command blocks.
1852 */
1853 bIsFirstCmdCRLF = bIsFirstCmdCRLF && (Sub->Type != C_MULTI);
1854 #endif
1855
1856 /*
1857 * Single-command block: display all on one line.
1858 * Multi-command block: display commands on separate lines.
1859 */
1860 if (bIsFirstCmdCRLF)
1861 ConOutChar(_T('\n'));
1862
1863 for (; Sub; Sub = Sub->Next)
1864 {
1865 EchoCommand(Sub);
1866 if (Sub->Next)
1867 #ifdef MSCMD_ECHO_COMMAND_COMPAT
1868 ConOutPuts(_T(" \n "));
1869 #else
1870 ConOutChar(_T('\n'));
1871 #endif
1872 }
1873
1874 if (bIsFirstCmdCRLF)
1875 ConOutChar(_T('\n'));
1876
1877 #ifdef MSCMD_ECHO_COMMAND_COMPAT
1878 /* NOTE: For Windows compatibility, add a trailing space after printing the closing parenthesis */
1879 ConOutPuts(_T(") "));
1880 #else
1881 ConOutChar(_T(')'));
1882 #endif
1883 break;
1884 }
1885
1886 case C_MULTI:
1887 case C_OR:
1888 case C_AND:
1889 case C_PIPE:
1890 {
1891 Sub = Cmd->Subcommands;
1892 EchoCommand(Sub);
1893 ConOutPrintf(_T(" %s "), OpString[Cmd->Type - C_OP_LOWEST]);
1894 EchoCommand(Sub->Next);
1895 break;
1896 }
1897
1898 case C_FOR:
1899 {
1900 ConOutPuts(_T("for"));
1901 if (Cmd->For.Switches & FOR_DIRS) ConOutPuts(_T(" /D"));
1902 if (Cmd->For.Switches & FOR_F) ConOutPuts(_T(" /F"));
1903 if (Cmd->For.Switches & FOR_LOOP) ConOutPuts(_T(" /L"));
1904 if (Cmd->For.Switches & FOR_RECURSIVE) ConOutPuts(_T(" /R"));
1905 if (Cmd->For.Params)
1906 ConOutPrintf(_T(" %s"), Cmd->For.Params);
1907 if (Cmd->For.List && SubstituteForVars(Cmd->For.List, TempBuf))
1908 ConOutPrintf(_T(" %%%c in (%s) do "), Cmd->For.Variable, TempBuf);
1909 else
1910 ConOutPrintf(_T(" %%%c in (%s) do "), Cmd->For.Variable, Cmd->For.List);
1911 EchoCommand(Cmd->Subcommands);
1912 break;
1913 }
1914
1915 case C_IF:
1916 {
1917 ConOutPuts(_T("if"));
1918 if (Cmd->If.Flags & IFFLAG_IGNORECASE)
1919 ConOutPuts(_T(" /I"));
1920 if (Cmd->If.Flags & IFFLAG_NEGATE)
1921 ConOutPuts(_T(" not"));
1922 if (Cmd->If.LeftArg && SubstituteForVars(Cmd->If.LeftArg, TempBuf))
1923 ConOutPrintf(_T(" %s"), TempBuf);
1924 ConOutPrintf(_T(" %s"), IfOperatorString[Cmd->If.Operator]);
1925 if (SubstituteForVars(Cmd->If.RightArg, TempBuf))
1926 ConOutPrintf(_T(" %s "), TempBuf);
1927 Sub = Cmd->Subcommands;
1928 EchoCommand(Sub);
1929 if (Sub->Next)
1930 {
1931 ConOutPuts(_T(" else "));
1932 EchoCommand(Sub->Next);
1933 }
1934 break;
1935 }
1936
1937 default:
1938 ASSERT(FALSE);
1939 break;
1940 }
1941
1942 for (Redir = Cmd->Redirections; Redir; Redir = Redir->Next)
1943 {
1944 if (SubstituteForVars(Redir->Filename, TempBuf))
1945 {
1946 #ifdef MSCMD_ECHO_COMMAND_COMPAT
1947 ConOutPrintf(_T("%c%s%s "),
1948 _T('0') + Redir->Number,
1949 RedirString[Redir->Mode], TempBuf);
1950 #else
1951 ConOutPrintf(_T(" %c%s%s"),
1952 _T('0') + Redir->Number,
1953 RedirString[Redir->Mode], TempBuf);
1954 #endif
1955 }
1956 }
1957 }
1958
1959 /*
1960 * "Unparse" a command into a text form suitable for passing to CMD /C.
1961 * Used for pipes. This is basically the same thing as EchoCommand(),
1962 * but writing into a string instead of to standard output.
1963 */
1964 PTCHAR
UnparseCommand(IN PARSED_COMMAND * Cmd,OUT PTCHAR Out,IN PTCHAR OutEnd)1965 UnparseCommand(
1966 IN PARSED_COMMAND* Cmd,
1967 OUT PTCHAR Out,
1968 IN PTCHAR OutEnd)
1969 {
1970 /*
1971 * Since this function has the annoying requirement that it must avoid
1972 * overflowing the supplied buffer, define some helper macros to make
1973 * this less painful.
1974 */
1975 #define CHAR(Char) \
1976 do { \
1977 if (Out == OutEnd) return NULL; \
1978 *Out++ = Char; \
1979 } while (0)
1980 #define STRING(String) \
1981 do { \
1982 if (Out + _tcslen(String) > OutEnd) return NULL; \
1983 Out = _stpcpy(Out, String); \
1984 } while (0)
1985 #define PRINTF(Format, ...) \
1986 do { \
1987 UINT Len = _sntprintf(Out, OutEnd - Out, Format, __VA_ARGS__); \
1988 if (Len > (UINT)(OutEnd - Out)) return NULL; \
1989 Out += Len; \
1990 } while (0)
1991 #define RECURSE(Subcommand) \
1992 do { \
1993 Out = UnparseCommand(Subcommand, Out, OutEnd); \
1994 if (!Out) return NULL; \
1995 } while (0)
1996
1997 PARSED_COMMAND* Sub;
1998 REDIRECTION* Redir;
1999
2000 if (!Cmd)
2001 return Out;
2002
2003 switch (Cmd->Type)
2004 {
2005 case C_COMMAND:
2006 case C_REM:
2007 {
2008 /* This is fragile since there could be special characters, but
2009 * Windows doesn't bother escaping them, so for compatibility
2010 * we probably shouldn't do it either */
2011 if (!SubstituteForVars(Cmd->Command.First, TempBuf)) return NULL;
2012 STRING(TempBuf);
2013 if (!SubstituteForVars(Cmd->Command.Rest, TempBuf)) return NULL;
2014 STRING(TempBuf);
2015 break;
2016 }
2017
2018 case C_QUIET:
2019 {
2020 CHAR(_T('@'));
2021 RECURSE(Cmd->Subcommands);
2022 break;
2023 }
2024
2025 case C_BLOCK:
2026 {
2027 CHAR(_T('('));
2028 for (Sub = Cmd->Subcommands; Sub; Sub = Sub->Next)
2029 {
2030 RECURSE(Sub);
2031 if (Sub->Next)
2032 CHAR(_T('&'));
2033 }
2034 CHAR(_T(')'));
2035 break;
2036 }
2037
2038 case C_MULTI:
2039 case C_OR:
2040 case C_AND:
2041 case C_PIPE:
2042 {
2043 Sub = Cmd->Subcommands;
2044 RECURSE(Sub);
2045 PRINTF(_T(" %s "), OpString[Cmd->Type - C_OP_LOWEST]);
2046 RECURSE(Sub->Next);
2047 break;
2048 }
2049
2050 case C_FOR:
2051 {
2052 STRING(_T("for"));
2053 if (Cmd->For.Switches & FOR_DIRS) STRING(_T(" /D"));
2054 if (Cmd->For.Switches & FOR_F) STRING(_T(" /F"));
2055 if (Cmd->For.Switches & FOR_LOOP) STRING(_T(" /L"));
2056 if (Cmd->For.Switches & FOR_RECURSIVE) STRING(_T(" /R"));
2057 if (Cmd->For.Params)
2058 PRINTF(_T(" %s"), Cmd->For.Params);
2059 if (Cmd->For.List && SubstituteForVars(Cmd->For.List, TempBuf))
2060 PRINTF(_T(" %%%c in (%s) do "), Cmd->For.Variable, TempBuf);
2061 else
2062 PRINTF(_T(" %%%c in (%s) do "), Cmd->For.Variable, Cmd->For.List);
2063 RECURSE(Cmd->Subcommands);
2064 break;
2065 }
2066
2067 case C_IF:
2068 {
2069 STRING(_T("if"));
2070 if (Cmd->If.Flags & IFFLAG_IGNORECASE)
2071 STRING(_T(" /I"));
2072 if (Cmd->If.Flags & IFFLAG_NEGATE)
2073 STRING(_T(" not"));
2074 if (Cmd->If.LeftArg && SubstituteForVars(Cmd->If.LeftArg, TempBuf))
2075 PRINTF(_T(" %s"), TempBuf);
2076 PRINTF(_T(" %s"), IfOperatorString[Cmd->If.Operator]);
2077 if (!SubstituteForVars(Cmd->If.RightArg, TempBuf)) return NULL;
2078 PRINTF(_T(" %s "), TempBuf);
2079 Sub = Cmd->Subcommands;
2080 RECURSE(Sub);
2081 if (Sub->Next)
2082 {
2083 STRING(_T(" else "));
2084 RECURSE(Sub->Next);
2085 }
2086 break;
2087 }
2088
2089 default:
2090 ASSERT(FALSE);
2091 break;
2092 }
2093
2094 for (Redir = Cmd->Redirections; Redir; Redir = Redir->Next)
2095 {
2096 if (!SubstituteForVars(Redir->Filename, TempBuf))
2097 return NULL;
2098 PRINTF(_T(" %c%s%s"), _T('0') + Redir->Number,
2099 RedirString[Redir->Mode], TempBuf);
2100 }
2101 return Out;
2102
2103 #undef CHAR
2104 #undef STRING
2105 #undef PRINTF
2106 #undef RECURSE
2107 }
2108