1 // Copyright (c) Microsoft. All rights reserved.
2 // Licensed under the MIT license. See LICENSE file in the project root for full license information.
3 
4 using System.Globalization;
5 using System;
6 using System.Diagnostics;
7 
8 using Microsoft.Build.BuildEngine.Shared;
9 
10 namespace Microsoft.Build.BuildEngine
11 {
12     /// <summary>
13     /// Class:       Scanner
14     /// This class does the scanning of the input and returns tokens.
15     /// The usage pattern is:
16     ///    Scanner s = new Scanner(expression, CultureInfo)
17     ///    do {
18     ///      s.Advance();
19     ///    while (s.IsNext(Token.EndOfInput));
20     ///
21     ///  After Advance() is called, you can get the current token (s.CurrentToken),
22     ///  check it's type (s.IsNext()), get the string for it (s.NextString()).
23     /// </summary>
24     internal sealed class Scanner
25     {
26         private string expression;
27         private int parsePoint;
28         private Token lookahead;
29         private bool errorState;
30         private int errorPosition;
31         // What we found instead of what we were looking for
32         private string unexpectedlyFound = null;
33         private ParserOptions options;
34         private string errorResource = null;
35 
36         // Shared instances of "hardcoded" token strings. These are only used
37         // in error messages.
38         private const string comma = ",";
39         private const string leftParenthesis = "(";
40         private const string rightParenthesis = ")";
41         private const string lessThan = "<";
42         private const string greaterThan = ">";
43         private const string lessThanOrEqualTo = "<=";
44         private const string greaterThanOrEqualTo = ">=";
45         private const string equalTo = "==";
46         private const string notEqualTo = "!=";
47         private const string not = "!";
48         private static string endOfInput = null;
49 
50         /// <summary>
51         /// Lazily format resource string to help avoid (in some perf critical cases) even loading
52         /// resources at all.
53         /// </summary>
54         private string EndOfInput
55         {
56             get
57             {
58                 if (endOfInput == null)
59                 {
60                     endOfInput = ResourceUtilities.FormatResourceString("EndOfInputTokenName");
61                 }
62 
63                 return endOfInput;
64             }
65         }
66 
Scanner()67         private Scanner() { }
68         //
69         // Constructor takes the string to parse and the culture.
70         //
Scanner(string expressionToParse, ParserOptions options)71         internal Scanner(string expressionToParse, ParserOptions options)
72         {
73             // We currently have no support (and no scenarios) for disallowing property references
74             // in Conditions.
75             ErrorUtilities.VerifyThrow(0 != (options & ParserOptions.AllowProperties),
76                 "Properties should always be allowed.");
77 
78             this.expression = expressionToParse;
79             this.parsePoint = 0;
80             this.errorState = false;
81             this.errorPosition = -1; // invalid
82             this.options = options;
83         }
84 
85         /// <summary>
86         /// If the lexer errors, it has the best knowledge of the error message to show. For example,
87         /// 'unexpected character' or 'illformed operator'. This method returns the name of the resource
88         /// string that the parser should display.
89         /// </summary>
90         /// <remarks>Intentionally not a property getter to avoid the debugger triggering the Assert dialog</remarks>
91         /// <returns></returns>
GetErrorResource()92         internal string GetErrorResource()
93         {
94             if (errorResource == null)
95             {
96                 // I do not believe this is reachable, but provide a reasonable default.
97                 Debug.Assert(false, "What code path did not set an appropriate error resource? Expression: " + expression);
98                 unexpectedlyFound = EndOfInput;
99                 return "UnexpectedCharacterInCondition";
100             }
101             else
102             {
103                 return errorResource;
104             }
105         }
106 
IsNext( Token.TokenType type )107         internal bool IsNext( Token.TokenType type )
108         {
109             return lookahead.IsToken(type);
110         }
111 
IsNextString()112         internal string IsNextString()
113         {
114             return lookahead.String;
115         }
116 
117         internal Token CurrentToken
118         {
119             get { return lookahead; }
120         }
121 
GetErrorPosition()122         internal int GetErrorPosition()
123         {
124             Debug.Assert(-1 != errorPosition); // We should have set it
125             return errorPosition;
126         }
127 
128         // The string (usually a single character) we found unexpectedly.
129         // We might want to show it in the error message, to help the user spot the error.
130         internal string UnexpectedlyFound
131         {
132             get
133             {
134                 return unexpectedlyFound;
135             }
136         }
137 
138         /// <summary>
139         /// Advance
140         /// returns true on successful advance
141         ///     and false on an erroneous token
142         ///
143         /// Doesn't return error until the bogus input is encountered.
144         /// Advance() returns true even after EndOfInput is encountered.
145         /// </summary>
Advance()146         internal bool Advance()
147         {
148             if (errorState)
149                 return false;
150 
151             if (lookahead != null && lookahead.IsToken(Token.TokenType.EndOfInput))
152                 return true;
153 
154             SkipWhiteSpace();
155 
156             // Update error position after skipping whitespace
157             errorPosition = parsePoint + 1;
158 
159             if (parsePoint >= expression.Length)
160             {
161                 lookahead = new Token(Token.TokenType.EndOfInput, null /* end of input */);
162             }
163             else
164             {
165                 switch (expression[parsePoint])
166                 {
167                     case ',':
168                         lookahead = new Token(Token.TokenType.Comma, comma);
169                         parsePoint++;
170                         break;
171                     case '(':
172                         lookahead = new Token(Token.TokenType.LeftParenthesis, leftParenthesis);
173                         parsePoint++;
174                         break;
175                     case ')':
176                         lookahead = new Token(Token.TokenType.RightParenthesis, rightParenthesis);
177                         parsePoint++;
178                         break;
179                     case '$':
180                         if (!ParseProperty())
181                             return false;
182                         break;
183                     case '%':
184                         // If the caller specified that he DOESN'T want to allow item metadata ...
185                         if ((this.options & ParserOptions.AllowItemMetadata) == 0)
186                         {
187                             errorPosition = this.parsePoint;
188                             errorState = true;
189                             errorResource = "UnexpectedCharacterInCondition";
190                             unexpectedlyFound = "%";
191                             return false;
192                         }
193                         if (!ParseItemMetadata())
194                             return false;
195                         break;
196                     case '@':
197                         int start = this.parsePoint;
198                         // If the caller specified that he DOESN'T want to allow item lists ...
199                         if ((this.options & ParserOptions.AllowItemLists) == 0)
200                         {
201                             if ((parsePoint + 1) < expression.Length && expression[parsePoint + 1] == '(')
202                             {
203                                 errorPosition = start + 1;
204                                 errorState = true;
205                                 errorResource = "ItemListNotAllowedInThisConditional";
206                                 return false;
207                             }
208                         }
209                         if (!ParseItemList())
210                             return false;
211                         break;
212                     case '!':
213                         // negation and not-equal
214                         if ((parsePoint + 1) < expression.Length && expression[parsePoint + 1] == '=')
215                         {
216                             lookahead = new Token(Token.TokenType.NotEqualTo, notEqualTo);
217                             parsePoint += 2;
218                         }
219                         else
220                         {
221                             lookahead = new Token(Token.TokenType.Not, not);
222                             parsePoint++;
223                         }
224                         break;
225                     case '>':
226                         // gt and gte
227                         if ((parsePoint + 1) < expression.Length && expression[parsePoint + 1] == '=')
228                         {
229                             lookahead = new Token(Token.TokenType.GreaterThanOrEqualTo, greaterThanOrEqualTo);
230                             parsePoint += 2;
231                         }
232                         else
233                         {
234                             lookahead = new Token(Token.TokenType.GreaterThan, greaterThan);
235                             parsePoint++;
236                         }
237                         break;
238                     case '<':
239                         // lt and lte
240                         if ((parsePoint + 1) < expression.Length && expression[parsePoint + 1] == '=')
241                         {
242                             lookahead = new Token(Token.TokenType.LessThanOrEqualTo, lessThanOrEqualTo);
243                             parsePoint += 2;
244                         }
245                         else
246                         {
247                             lookahead = new Token(Token.TokenType.LessThan, lessThan);
248                             parsePoint++;
249                         }
250                         break;
251                     case '=':
252                         if ((parsePoint + 1) < expression.Length && expression[parsePoint + 1] == '=')
253                         {
254                             lookahead = new Token(Token.TokenType.EqualTo, equalTo);
255                             parsePoint += 2;
256                         }
257                         else
258                         {
259                             errorPosition = parsePoint + 2; // expression[parsePoint + 1], counting from 1
260                             errorResource = "IllFormedEqualsInCondition";
261                             if ((parsePoint + 1) < expression.Length)
262                             {
263                                 // store the char we found instead
264                                 unexpectedlyFound = Convert.ToString(expression[parsePoint + 1], CultureInfo.InvariantCulture);
265                             }
266                             else
267                             {
268                                 unexpectedlyFound = EndOfInput;
269                             }
270                             parsePoint++;
271                             errorState = true;
272                             return false;
273                         }
274                         break;
275                     case '\'':
276                         if (!ParseQuotedString())
277                             return false;
278                         break;
279                     default:
280                         // Simple strings, function calls, decimal numbers, hex numbers
281                         if (!ParseRemaining())
282                             return false;
283                         break;
284                 }
285             }
286             return true;
287         }
288 
289         /// <summary>
290         /// Parses either the $(propertyname) syntax or the %(metadataname) syntax,
291         /// and returns the parsed string beginning with the '$' or '%', and ending with the
292         /// closing parenthesis.
293         /// </summary>
294         /// <returns></returns>
295         /// <owner>RGoel, DavidLe</owner>
ParsePropertyOrItemMetadata()296         private string ParsePropertyOrItemMetadata()
297         {
298             int start = parsePoint; // set start so that we include "$(" or "%("
299             parsePoint++;
300 
301             if (parsePoint < expression.Length && expression[parsePoint] != '(')
302             {
303                 errorState = true;
304                 errorPosition = start + 1;
305                 errorResource = "IllFormedPropertyOpenParenthesisInCondition";
306                 unexpectedlyFound = Convert.ToString(expression[parsePoint], CultureInfo.InvariantCulture);
307                 return null;
308             }
309 
310             parsePoint = ScanForPropertyExpressionEnd(expression, parsePoint++);
311 
312             // Maybe we need to generate an error for invalid characters in property/metadata name?
313             // For now, just wait and let the property/metadata evaluation handle the error case.
314 
315             if (parsePoint >= expression.Length)
316             {
317                 errorState = true;
318                 errorPosition = start + 1;
319                 errorResource = "IllFormedPropertyCloseParenthesisInCondition";
320                 unexpectedlyFound = EndOfInput;
321                 return null;
322             }
323 
324             parsePoint++;
325             return expression.Substring(start, parsePoint - start);
326         }
327 
328         /// <summary>
329         /// Scan for the end of the property expression
330         /// </summary>
ScanForPropertyExpressionEnd(string expression, int index)331         private static int ScanForPropertyExpressionEnd(string expression, int index)
332         {
333             int nestLevel = 0;
334 
335             while (index < expression.Length)
336             {
337                 if (expression[index] == '(')
338                 {
339                     nestLevel++;
340                 }
341                 else if (expression[index] == ')')
342                 {
343                     nestLevel--;
344                 }
345 
346                 // We have reached the end of the parenthesis nesting
347                 // this should be the end of the property expression
348                 // If it is not then the calling code will determine that
349                 if (nestLevel == 0)
350                 {
351                     return index;
352                 }
353                 else
354                 {
355                     index++;
356                 }
357             }
358 
359             return index;
360         }
361 
362         /// <summary>
363         /// Parses a string of the form $(propertyname).
364         /// </summary>
365         /// <returns></returns>
366         /// <owner>RGoel, DavidLe</owner>
ParseProperty()367         private bool ParseProperty()
368         {
369             string propertyExpression = this.ParsePropertyOrItemMetadata();
370 
371             if (propertyExpression == null)
372             {
373                 return false;
374             }
375             else
376             {
377                 this.lookahead = new Token(Token.TokenType.Property, propertyExpression);
378                 return true;
379             }
380         }
381 
382         /// <summary>
383         /// Parses a string of the form %(itemmetadataname).
384         /// </summary>
385         /// <returns></returns>
386         /// <owner>RGoel</owner>
ParseItemMetadata()387         private bool ParseItemMetadata()
388         {
389             string itemMetadataExpression = this.ParsePropertyOrItemMetadata();
390 
391             if (itemMetadataExpression == null)
392             {
393                 // The ParsePropertyOrItemMetadata method returns the correct error resources
394                 // for parsing properties such as $(propertyname).  At this stage in the Whidbey
395                 // cycle, we're not allowed to add new string resources, so I can't add a new
396                 // resource specific to item metadata, so here, we just change the error to
397                 // the generic "UnexpectedCharacter".
398                 errorResource = "UnexpectedCharacterInCondition";
399                 return false;
400             }
401             else
402             {
403                 this.lookahead = new Token(Token.TokenType.ItemMetadata, itemMetadataExpression);
404                 return true;
405             }
406         }
407 
ParseInternalItemList()408         private bool ParseInternalItemList()
409         {
410             int start = parsePoint;
411             parsePoint++;
412 
413             if (parsePoint < expression.Length && expression[parsePoint] != '(')
414             {
415                 // @ was not followed by (
416                 errorPosition = start + 1;
417                 errorResource = "IllFormedItemListOpenParenthesisInCondition";
418                 // Not useful to set unexpectedlyFound here. The message is going to be detailed enough.
419                 errorState = true;
420                 return false;
421             }
422             parsePoint++;
423             // Maybe we need to generate an error for invalid characters in itemgroup name?
424             // For now, just let item evaluation handle the error.
425             bool fInReplacement = false;
426             while (parsePoint < expression.Length)
427             {
428                 if (expression[parsePoint] == '\'')
429                 {
430                     fInReplacement = !fInReplacement;
431                 }
432                 else if (expression[parsePoint] == ')' && !fInReplacement)
433                 {
434                     break;
435                 }
436                 parsePoint++;
437             }
438             if (parsePoint >= expression.Length)
439             {
440 
441                 errorPosition = start + 1;
442                 if (fInReplacement)
443                 {
444                     // @( ... ' was never followed by a closing quote before the closing parenthesis
445                     errorResource = "IllFormedItemListQuoteInCondition";
446                 }
447                 else
448                 {
449                     // @( was never followed by a )
450                     errorResource = "IllFormedItemListCloseParenthesisInCondition";
451                 }
452                 // Not useful to set unexpectedlyFound here. The message is going to be detailed enough.
453                 errorState = true;
454                 return false;
455             }
456             parsePoint++;
457             return true;
458         }
459 
ParseItemList()460         private bool ParseItemList()
461         {
462             int start = parsePoint;
463             if (!ParseInternalItemList())
464             {
465                 return false;
466             }
467             lookahead = new Token(Token.TokenType.ItemList, expression.Substring(start, parsePoint - start));
468             return true;
469         }
470 
ParseQuotedString()471         private bool ParseQuotedString()
472         {
473             parsePoint++;
474             int start = parsePoint;
475             while (parsePoint < expression.Length && expression[parsePoint] != '\'')
476             {
477                 // Standalone percent-sign must be allowed within a condition because it's
478                 // needed to escape special characters.  However, percent-sign followed
479                 // by open-parenthesis is an indication of an item metadata reference, and
480                 // that is only allowed in certain contexts.
481                 if ((expression[parsePoint] == '%') && ((parsePoint + 1) < expression.Length) && (expression[parsePoint + 1] == '('))
482                 {
483                     // If the caller specified that he DOESN'T want to allow item metadata...
484                     if ((this.options & ParserOptions.AllowItemMetadata) == 0)
485                     {
486                         errorPosition = start + 1;
487                         errorState = true;
488                         errorResource = "UnexpectedCharacterInCondition";
489                         unexpectedlyFound = "%";
490                         return false;
491                     }
492                 }
493                 else if (expression[parsePoint] == '@' && ((parsePoint + 1) < expression.Length) && (expression[parsePoint + 1] == '('))
494                 {
495                     // If the caller specified that he DOESN'T want to allow item lists ...
496                     if ((this.options & ParserOptions.AllowItemLists) == 0)
497                     {
498                         errorPosition = start + 1;
499                         errorState = true;
500                         errorResource = "ItemListNotAllowedInThisConditional";
501                         return false;
502                     }
503 
504                     // Item lists have to be parsed because of the replacement syntax e.g. @(Foo,'_').
505                     // I have to know how to parse those so I can skip over the tic marks.  I don't
506                     // have to do that with other things like propertygroups, hence itemlists are
507                     // treated specially.
508 
509                     ParseInternalItemList();
510                     continue;
511                 }
512                 parsePoint++;
513             }
514             if (parsePoint >= expression.Length)
515             {
516                 // Quoted string wasn't closed
517                 errorState = true;
518                 errorPosition = start; // The message is going to say "expected after position n" so don't add 1 here.
519                 errorResource = "IllFormedQuotedStringInCondition";
520                 // Not useful to set unexpectedlyFound here. By definition it got to the end of the string.
521                 return false;
522             }
523             string originalTokenString = expression.Substring(start, parsePoint - start);
524 
525             lookahead = new Token(Token.TokenType.String, originalTokenString);
526             parsePoint++;
527             return true;
528         }
529 
ParseRemaining()530         private bool ParseRemaining()
531         {
532             int start = parsePoint;
533             if (CharacterUtilities.IsNumberStart(expression[parsePoint])) // numeric
534             {
535                 if (!ParseNumeric(start))
536                     return false;
537             }
538             else if (CharacterUtilities.IsSimpleStringStart(expression[parsePoint])) // simple string (handle 'and' and 'or')
539             {
540                 if (!ParseSimpleStringOrFunction(start))
541                     return false;
542             }
543             else
544             {
545                 // Something that wasn't a number or a letter, like a newline (%0a)
546                 errorState = true;
547                 errorPosition = start + 1;
548                 errorResource = "UnexpectedCharacterInCondition";
549                 unexpectedlyFound = Convert.ToString(expression[parsePoint], CultureInfo.InvariantCulture);
550                 return false;
551             }
552             return true;
553         }
ParseSimpleStringOrFunction( int start )554         private bool ParseSimpleStringOrFunction( int start )
555         {
556             SkipSimpleStringChars();
557             if (0 == string.Compare(expression.Substring(start, parsePoint - start), "and", StringComparison.OrdinalIgnoreCase))
558             {
559                 lookahead = new Token(Token.TokenType.And, expression.Substring(start, parsePoint - start));
560             }
561             else if (0 == string.Compare(expression.Substring(start, parsePoint - start), "or", StringComparison.OrdinalIgnoreCase))
562             {
563                 lookahead = new Token(Token.TokenType.Or, expression.Substring(start, parsePoint - start));
564             }
565             else
566             {
567                 int end = parsePoint;
568                 SkipWhiteSpace();
569                 if (parsePoint < expression.Length && expression[parsePoint] == '(')
570                 {
571                     lookahead = new Token(Token.TokenType.Function, expression.Substring(start, end - start));
572                 }
573                 else
574                 {
575                     string tokenValue = expression.Substring(start, end - start);
576                     lookahead = new Token(Token.TokenType.String, tokenValue);
577                 }
578             }
579             return true;
580         }
ParseNumeric( int start )581         private bool ParseNumeric( int start )
582         {
583             if ((expression.Length-parsePoint) > 2 && expression[parsePoint] == '0' && (expression[parsePoint + 1] == 'x' || expression[parsePoint + 1] == 'X'))
584             {
585                 // Hex number
586                 parsePoint += 2;
587                 SkipHexDigits();
588                 lookahead = new Token(Token.TokenType.Numeric, expression.Substring(start, parsePoint - start));
589             }
590             else if ( CharacterUtilities.IsNumberStart(expression[parsePoint]))
591             {
592                 // Decimal number
593                 if (expression[parsePoint] == '+')
594                 {
595                     parsePoint++;
596                 }
597                 else if (expression[parsePoint] == '-')
598                 {
599                     parsePoint++;
600                 }
601                 SkipDigits();
602                 if (parsePoint < expression.Length && expression[parsePoint] == '.')
603                 {
604                     parsePoint++;
605                 }
606                 if (parsePoint < expression.Length)
607                 {
608                     SkipDigits();
609                 }
610                 // Do we need to error on malformed input like 0.00.00)? or will the conversion handle it?
611                 // For now, let the conversion generate the error.
612                 lookahead = new Token(Token.TokenType.Numeric, expression.Substring(start, parsePoint - start));
613             }
614             else
615             {
616                 // Unreachable
617                 errorState = true;
618                 errorPosition = start + 1;
619                 return false;
620             }
621             return true;
622         }
SkipWhiteSpace()623         private void SkipWhiteSpace()
624         {
625             while (parsePoint < expression.Length && char.IsWhiteSpace(expression[parsePoint]))
626                 parsePoint++;
627             return;
628         }
SkipDigits()629         private void SkipDigits()
630         {
631             while (parsePoint < expression.Length && char.IsDigit(expression[parsePoint]))
632                 parsePoint++;
633             return;
634         }
SkipHexDigits()635         private void SkipHexDigits()
636         {
637             while (parsePoint < expression.Length && CharacterUtilities.IsHexDigit(expression[parsePoint]))
638                 parsePoint++;
639             return;
640         }
SkipSimpleStringChars()641         private void SkipSimpleStringChars()
642         {
643             while (parsePoint < expression.Length && CharacterUtilities.IsSimpleStringChar(expression[parsePoint]))
644                 parsePoint++;
645             return;
646         }
647     }
648 }
649