1 // Copyright (c) Microsoft. All rights reserved. 2 // Licensed under the MIT license. See LICENSE file in the project root for full license information. 3 4 using System.Globalization; 5 using System; 6 using System.Diagnostics; 7 8 using Microsoft.Build.BuildEngine.Shared; 9 10 namespace Microsoft.Build.BuildEngine 11 { 12 /// <summary> 13 /// Class: Scanner 14 /// This class does the scanning of the input and returns tokens. 15 /// The usage pattern is: 16 /// Scanner s = new Scanner(expression, CultureInfo) 17 /// do { 18 /// s.Advance(); 19 /// while (s.IsNext(Token.EndOfInput)); 20 /// 21 /// After Advance() is called, you can get the current token (s.CurrentToken), 22 /// check it's type (s.IsNext()), get the string for it (s.NextString()). 23 /// </summary> 24 internal sealed class Scanner 25 { 26 private string expression; 27 private int parsePoint; 28 private Token lookahead; 29 private bool errorState; 30 private int errorPosition; 31 // What we found instead of what we were looking for 32 private string unexpectedlyFound = null; 33 private ParserOptions options; 34 private string errorResource = null; 35 36 // Shared instances of "hardcoded" token strings. These are only used 37 // in error messages. 38 private const string comma = ","; 39 private const string leftParenthesis = "("; 40 private const string rightParenthesis = ")"; 41 private const string lessThan = "<"; 42 private const string greaterThan = ">"; 43 private const string lessThanOrEqualTo = "<="; 44 private const string greaterThanOrEqualTo = ">="; 45 private const string equalTo = "=="; 46 private const string notEqualTo = "!="; 47 private const string not = "!"; 48 private static string endOfInput = null; 49 50 /// <summary> 51 /// Lazily format resource string to help avoid (in some perf critical cases) even loading 52 /// resources at all. 53 /// </summary> 54 private string EndOfInput 55 { 56 get 57 { 58 if (endOfInput == null) 59 { 60 endOfInput = ResourceUtilities.FormatResourceString("EndOfInputTokenName"); 61 } 62 63 return endOfInput; 64 } 65 } 66 Scanner()67 private Scanner() { } 68 // 69 // Constructor takes the string to parse and the culture. 70 // Scanner(string expressionToParse, ParserOptions options)71 internal Scanner(string expressionToParse, ParserOptions options) 72 { 73 // We currently have no support (and no scenarios) for disallowing property references 74 // in Conditions. 75 ErrorUtilities.VerifyThrow(0 != (options & ParserOptions.AllowProperties), 76 "Properties should always be allowed."); 77 78 this.expression = expressionToParse; 79 this.parsePoint = 0; 80 this.errorState = false; 81 this.errorPosition = -1; // invalid 82 this.options = options; 83 } 84 85 /// <summary> 86 /// If the lexer errors, it has the best knowledge of the error message to show. For example, 87 /// 'unexpected character' or 'illformed operator'. This method returns the name of the resource 88 /// string that the parser should display. 89 /// </summary> 90 /// <remarks>Intentionally not a property getter to avoid the debugger triggering the Assert dialog</remarks> 91 /// <returns></returns> GetErrorResource()92 internal string GetErrorResource() 93 { 94 if (errorResource == null) 95 { 96 // I do not believe this is reachable, but provide a reasonable default. 97 Debug.Assert(false, "What code path did not set an appropriate error resource? Expression: " + expression); 98 unexpectedlyFound = EndOfInput; 99 return "UnexpectedCharacterInCondition"; 100 } 101 else 102 { 103 return errorResource; 104 } 105 } 106 IsNext( Token.TokenType type )107 internal bool IsNext( Token.TokenType type ) 108 { 109 return lookahead.IsToken(type); 110 } 111 IsNextString()112 internal string IsNextString() 113 { 114 return lookahead.String; 115 } 116 117 internal Token CurrentToken 118 { 119 get { return lookahead; } 120 } 121 GetErrorPosition()122 internal int GetErrorPosition() 123 { 124 Debug.Assert(-1 != errorPosition); // We should have set it 125 return errorPosition; 126 } 127 128 // The string (usually a single character) we found unexpectedly. 129 // We might want to show it in the error message, to help the user spot the error. 130 internal string UnexpectedlyFound 131 { 132 get 133 { 134 return unexpectedlyFound; 135 } 136 } 137 138 /// <summary> 139 /// Advance 140 /// returns true on successful advance 141 /// and false on an erroneous token 142 /// 143 /// Doesn't return error until the bogus input is encountered. 144 /// Advance() returns true even after EndOfInput is encountered. 145 /// </summary> Advance()146 internal bool Advance() 147 { 148 if (errorState) 149 return false; 150 151 if (lookahead != null && lookahead.IsToken(Token.TokenType.EndOfInput)) 152 return true; 153 154 SkipWhiteSpace(); 155 156 // Update error position after skipping whitespace 157 errorPosition = parsePoint + 1; 158 159 if (parsePoint >= expression.Length) 160 { 161 lookahead = new Token(Token.TokenType.EndOfInput, null /* end of input */); 162 } 163 else 164 { 165 switch (expression[parsePoint]) 166 { 167 case ',': 168 lookahead = new Token(Token.TokenType.Comma, comma); 169 parsePoint++; 170 break; 171 case '(': 172 lookahead = new Token(Token.TokenType.LeftParenthesis, leftParenthesis); 173 parsePoint++; 174 break; 175 case ')': 176 lookahead = new Token(Token.TokenType.RightParenthesis, rightParenthesis); 177 parsePoint++; 178 break; 179 case '$': 180 if (!ParseProperty()) 181 return false; 182 break; 183 case '%': 184 // If the caller specified that he DOESN'T want to allow item metadata ... 185 if ((this.options & ParserOptions.AllowItemMetadata) == 0) 186 { 187 errorPosition = this.parsePoint; 188 errorState = true; 189 errorResource = "UnexpectedCharacterInCondition"; 190 unexpectedlyFound = "%"; 191 return false; 192 } 193 if (!ParseItemMetadata()) 194 return false; 195 break; 196 case '@': 197 int start = this.parsePoint; 198 // If the caller specified that he DOESN'T want to allow item lists ... 199 if ((this.options & ParserOptions.AllowItemLists) == 0) 200 { 201 if ((parsePoint + 1) < expression.Length && expression[parsePoint + 1] == '(') 202 { 203 errorPosition = start + 1; 204 errorState = true; 205 errorResource = "ItemListNotAllowedInThisConditional"; 206 return false; 207 } 208 } 209 if (!ParseItemList()) 210 return false; 211 break; 212 case '!': 213 // negation and not-equal 214 if ((parsePoint + 1) < expression.Length && expression[parsePoint + 1] == '=') 215 { 216 lookahead = new Token(Token.TokenType.NotEqualTo, notEqualTo); 217 parsePoint += 2; 218 } 219 else 220 { 221 lookahead = new Token(Token.TokenType.Not, not); 222 parsePoint++; 223 } 224 break; 225 case '>': 226 // gt and gte 227 if ((parsePoint + 1) < expression.Length && expression[parsePoint + 1] == '=') 228 { 229 lookahead = new Token(Token.TokenType.GreaterThanOrEqualTo, greaterThanOrEqualTo); 230 parsePoint += 2; 231 } 232 else 233 { 234 lookahead = new Token(Token.TokenType.GreaterThan, greaterThan); 235 parsePoint++; 236 } 237 break; 238 case '<': 239 // lt and lte 240 if ((parsePoint + 1) < expression.Length && expression[parsePoint + 1] == '=') 241 { 242 lookahead = new Token(Token.TokenType.LessThanOrEqualTo, lessThanOrEqualTo); 243 parsePoint += 2; 244 } 245 else 246 { 247 lookahead = new Token(Token.TokenType.LessThan, lessThan); 248 parsePoint++; 249 } 250 break; 251 case '=': 252 if ((parsePoint + 1) < expression.Length && expression[parsePoint + 1] == '=') 253 { 254 lookahead = new Token(Token.TokenType.EqualTo, equalTo); 255 parsePoint += 2; 256 } 257 else 258 { 259 errorPosition = parsePoint + 2; // expression[parsePoint + 1], counting from 1 260 errorResource = "IllFormedEqualsInCondition"; 261 if ((parsePoint + 1) < expression.Length) 262 { 263 // store the char we found instead 264 unexpectedlyFound = Convert.ToString(expression[parsePoint + 1], CultureInfo.InvariantCulture); 265 } 266 else 267 { 268 unexpectedlyFound = EndOfInput; 269 } 270 parsePoint++; 271 errorState = true; 272 return false; 273 } 274 break; 275 case '\'': 276 if (!ParseQuotedString()) 277 return false; 278 break; 279 default: 280 // Simple strings, function calls, decimal numbers, hex numbers 281 if (!ParseRemaining()) 282 return false; 283 break; 284 } 285 } 286 return true; 287 } 288 289 /// <summary> 290 /// Parses either the $(propertyname) syntax or the %(metadataname) syntax, 291 /// and returns the parsed string beginning with the '$' or '%', and ending with the 292 /// closing parenthesis. 293 /// </summary> 294 /// <returns></returns> 295 /// <owner>RGoel, DavidLe</owner> ParsePropertyOrItemMetadata()296 private string ParsePropertyOrItemMetadata() 297 { 298 int start = parsePoint; // set start so that we include "$(" or "%(" 299 parsePoint++; 300 301 if (parsePoint < expression.Length && expression[parsePoint] != '(') 302 { 303 errorState = true; 304 errorPosition = start + 1; 305 errorResource = "IllFormedPropertyOpenParenthesisInCondition"; 306 unexpectedlyFound = Convert.ToString(expression[parsePoint], CultureInfo.InvariantCulture); 307 return null; 308 } 309 310 parsePoint = ScanForPropertyExpressionEnd(expression, parsePoint++); 311 312 // Maybe we need to generate an error for invalid characters in property/metadata name? 313 // For now, just wait and let the property/metadata evaluation handle the error case. 314 315 if (parsePoint >= expression.Length) 316 { 317 errorState = true; 318 errorPosition = start + 1; 319 errorResource = "IllFormedPropertyCloseParenthesisInCondition"; 320 unexpectedlyFound = EndOfInput; 321 return null; 322 } 323 324 parsePoint++; 325 return expression.Substring(start, parsePoint - start); 326 } 327 328 /// <summary> 329 /// Scan for the end of the property expression 330 /// </summary> ScanForPropertyExpressionEnd(string expression, int index)331 private static int ScanForPropertyExpressionEnd(string expression, int index) 332 { 333 int nestLevel = 0; 334 335 while (index < expression.Length) 336 { 337 if (expression[index] == '(') 338 { 339 nestLevel++; 340 } 341 else if (expression[index] == ')') 342 { 343 nestLevel--; 344 } 345 346 // We have reached the end of the parenthesis nesting 347 // this should be the end of the property expression 348 // If it is not then the calling code will determine that 349 if (nestLevel == 0) 350 { 351 return index; 352 } 353 else 354 { 355 index++; 356 } 357 } 358 359 return index; 360 } 361 362 /// <summary> 363 /// Parses a string of the form $(propertyname). 364 /// </summary> 365 /// <returns></returns> 366 /// <owner>RGoel, DavidLe</owner> ParseProperty()367 private bool ParseProperty() 368 { 369 string propertyExpression = this.ParsePropertyOrItemMetadata(); 370 371 if (propertyExpression == null) 372 { 373 return false; 374 } 375 else 376 { 377 this.lookahead = new Token(Token.TokenType.Property, propertyExpression); 378 return true; 379 } 380 } 381 382 /// <summary> 383 /// Parses a string of the form %(itemmetadataname). 384 /// </summary> 385 /// <returns></returns> 386 /// <owner>RGoel</owner> ParseItemMetadata()387 private bool ParseItemMetadata() 388 { 389 string itemMetadataExpression = this.ParsePropertyOrItemMetadata(); 390 391 if (itemMetadataExpression == null) 392 { 393 // The ParsePropertyOrItemMetadata method returns the correct error resources 394 // for parsing properties such as $(propertyname). At this stage in the Whidbey 395 // cycle, we're not allowed to add new string resources, so I can't add a new 396 // resource specific to item metadata, so here, we just change the error to 397 // the generic "UnexpectedCharacter". 398 errorResource = "UnexpectedCharacterInCondition"; 399 return false; 400 } 401 else 402 { 403 this.lookahead = new Token(Token.TokenType.ItemMetadata, itemMetadataExpression); 404 return true; 405 } 406 } 407 ParseInternalItemList()408 private bool ParseInternalItemList() 409 { 410 int start = parsePoint; 411 parsePoint++; 412 413 if (parsePoint < expression.Length && expression[parsePoint] != '(') 414 { 415 // @ was not followed by ( 416 errorPosition = start + 1; 417 errorResource = "IllFormedItemListOpenParenthesisInCondition"; 418 // Not useful to set unexpectedlyFound here. The message is going to be detailed enough. 419 errorState = true; 420 return false; 421 } 422 parsePoint++; 423 // Maybe we need to generate an error for invalid characters in itemgroup name? 424 // For now, just let item evaluation handle the error. 425 bool fInReplacement = false; 426 while (parsePoint < expression.Length) 427 { 428 if (expression[parsePoint] == '\'') 429 { 430 fInReplacement = !fInReplacement; 431 } 432 else if (expression[parsePoint] == ')' && !fInReplacement) 433 { 434 break; 435 } 436 parsePoint++; 437 } 438 if (parsePoint >= expression.Length) 439 { 440 441 errorPosition = start + 1; 442 if (fInReplacement) 443 { 444 // @( ... ' was never followed by a closing quote before the closing parenthesis 445 errorResource = "IllFormedItemListQuoteInCondition"; 446 } 447 else 448 { 449 // @( was never followed by a ) 450 errorResource = "IllFormedItemListCloseParenthesisInCondition"; 451 } 452 // Not useful to set unexpectedlyFound here. The message is going to be detailed enough. 453 errorState = true; 454 return false; 455 } 456 parsePoint++; 457 return true; 458 } 459 ParseItemList()460 private bool ParseItemList() 461 { 462 int start = parsePoint; 463 if (!ParseInternalItemList()) 464 { 465 return false; 466 } 467 lookahead = new Token(Token.TokenType.ItemList, expression.Substring(start, parsePoint - start)); 468 return true; 469 } 470 ParseQuotedString()471 private bool ParseQuotedString() 472 { 473 parsePoint++; 474 int start = parsePoint; 475 while (parsePoint < expression.Length && expression[parsePoint] != '\'') 476 { 477 // Standalone percent-sign must be allowed within a condition because it's 478 // needed to escape special characters. However, percent-sign followed 479 // by open-parenthesis is an indication of an item metadata reference, and 480 // that is only allowed in certain contexts. 481 if ((expression[parsePoint] == '%') && ((parsePoint + 1) < expression.Length) && (expression[parsePoint + 1] == '(')) 482 { 483 // If the caller specified that he DOESN'T want to allow item metadata... 484 if ((this.options & ParserOptions.AllowItemMetadata) == 0) 485 { 486 errorPosition = start + 1; 487 errorState = true; 488 errorResource = "UnexpectedCharacterInCondition"; 489 unexpectedlyFound = "%"; 490 return false; 491 } 492 } 493 else if (expression[parsePoint] == '@' && ((parsePoint + 1) < expression.Length) && (expression[parsePoint + 1] == '(')) 494 { 495 // If the caller specified that he DOESN'T want to allow item lists ... 496 if ((this.options & ParserOptions.AllowItemLists) == 0) 497 { 498 errorPosition = start + 1; 499 errorState = true; 500 errorResource = "ItemListNotAllowedInThisConditional"; 501 return false; 502 } 503 504 // Item lists have to be parsed because of the replacement syntax e.g. @(Foo,'_'). 505 // I have to know how to parse those so I can skip over the tic marks. I don't 506 // have to do that with other things like propertygroups, hence itemlists are 507 // treated specially. 508 509 ParseInternalItemList(); 510 continue; 511 } 512 parsePoint++; 513 } 514 if (parsePoint >= expression.Length) 515 { 516 // Quoted string wasn't closed 517 errorState = true; 518 errorPosition = start; // The message is going to say "expected after position n" so don't add 1 here. 519 errorResource = "IllFormedQuotedStringInCondition"; 520 // Not useful to set unexpectedlyFound here. By definition it got to the end of the string. 521 return false; 522 } 523 string originalTokenString = expression.Substring(start, parsePoint - start); 524 525 lookahead = new Token(Token.TokenType.String, originalTokenString); 526 parsePoint++; 527 return true; 528 } 529 ParseRemaining()530 private bool ParseRemaining() 531 { 532 int start = parsePoint; 533 if (CharacterUtilities.IsNumberStart(expression[parsePoint])) // numeric 534 { 535 if (!ParseNumeric(start)) 536 return false; 537 } 538 else if (CharacterUtilities.IsSimpleStringStart(expression[parsePoint])) // simple string (handle 'and' and 'or') 539 { 540 if (!ParseSimpleStringOrFunction(start)) 541 return false; 542 } 543 else 544 { 545 // Something that wasn't a number or a letter, like a newline (%0a) 546 errorState = true; 547 errorPosition = start + 1; 548 errorResource = "UnexpectedCharacterInCondition"; 549 unexpectedlyFound = Convert.ToString(expression[parsePoint], CultureInfo.InvariantCulture); 550 return false; 551 } 552 return true; 553 } ParseSimpleStringOrFunction( int start )554 private bool ParseSimpleStringOrFunction( int start ) 555 { 556 SkipSimpleStringChars(); 557 if (0 == string.Compare(expression.Substring(start, parsePoint - start), "and", StringComparison.OrdinalIgnoreCase)) 558 { 559 lookahead = new Token(Token.TokenType.And, expression.Substring(start, parsePoint - start)); 560 } 561 else if (0 == string.Compare(expression.Substring(start, parsePoint - start), "or", StringComparison.OrdinalIgnoreCase)) 562 { 563 lookahead = new Token(Token.TokenType.Or, expression.Substring(start, parsePoint - start)); 564 } 565 else 566 { 567 int end = parsePoint; 568 SkipWhiteSpace(); 569 if (parsePoint < expression.Length && expression[parsePoint] == '(') 570 { 571 lookahead = new Token(Token.TokenType.Function, expression.Substring(start, end - start)); 572 } 573 else 574 { 575 string tokenValue = expression.Substring(start, end - start); 576 lookahead = new Token(Token.TokenType.String, tokenValue); 577 } 578 } 579 return true; 580 } ParseNumeric( int start )581 private bool ParseNumeric( int start ) 582 { 583 if ((expression.Length-parsePoint) > 2 && expression[parsePoint] == '0' && (expression[parsePoint + 1] == 'x' || expression[parsePoint + 1] == 'X')) 584 { 585 // Hex number 586 parsePoint += 2; 587 SkipHexDigits(); 588 lookahead = new Token(Token.TokenType.Numeric, expression.Substring(start, parsePoint - start)); 589 } 590 else if ( CharacterUtilities.IsNumberStart(expression[parsePoint])) 591 { 592 // Decimal number 593 if (expression[parsePoint] == '+') 594 { 595 parsePoint++; 596 } 597 else if (expression[parsePoint] == '-') 598 { 599 parsePoint++; 600 } 601 SkipDigits(); 602 if (parsePoint < expression.Length && expression[parsePoint] == '.') 603 { 604 parsePoint++; 605 } 606 if (parsePoint < expression.Length) 607 { 608 SkipDigits(); 609 } 610 // Do we need to error on malformed input like 0.00.00)? or will the conversion handle it? 611 // For now, let the conversion generate the error. 612 lookahead = new Token(Token.TokenType.Numeric, expression.Substring(start, parsePoint - start)); 613 } 614 else 615 { 616 // Unreachable 617 errorState = true; 618 errorPosition = start + 1; 619 return false; 620 } 621 return true; 622 } SkipWhiteSpace()623 private void SkipWhiteSpace() 624 { 625 while (parsePoint < expression.Length && char.IsWhiteSpace(expression[parsePoint])) 626 parsePoint++; 627 return; 628 } SkipDigits()629 private void SkipDigits() 630 { 631 while (parsePoint < expression.Length && char.IsDigit(expression[parsePoint])) 632 parsePoint++; 633 return; 634 } SkipHexDigits()635 private void SkipHexDigits() 636 { 637 while (parsePoint < expression.Length && CharacterUtilities.IsHexDigit(expression[parsePoint])) 638 parsePoint++; 639 return; 640 } SkipSimpleStringChars()641 private void SkipSimpleStringChars() 642 { 643 while (parsePoint < expression.Length && CharacterUtilities.IsSimpleStringChar(expression[parsePoint])) 644 parsePoint++; 645 return; 646 } 647 } 648 } 649