1 //--------------------------------------------------------------------- 2 // <copyright file="CqlLexerHelper.cs" company="Microsoft"> 3 // Copyright (c) Microsoft Corporation. All rights reserved. 4 // </copyright> 5 // 6 // @owner Microsoft 7 // @backupOwner Microsoft 8 //--------------------------------------------------------------------- 9 10 namespace System.Data.Common.EntitySql 11 { 12 using System; 13 using System.Globalization; 14 using System.Collections.Generic; 15 using System.Text.RegularExpressions; 16 using System.Diagnostics; 17 using System.Text; 18 using System.Data.Entity; 19 20 /// <summary> 21 /// Represents eSQL error context. 22 /// </summary> 23 internal class ErrorContext 24 { 25 /// <summary> 26 /// Represents the position of the error in the input stream. 27 /// </summary> 28 internal int InputPosition = -1; 29 30 /// <summary> 31 /// Represents the additional/contextual information related to the error position/cause. 32 /// </summary> 33 internal string ErrorContextInfo; 34 35 /// <summary> 36 /// Defines how ErrorContextInfo should be interpreted. 37 /// </summary> 38 internal bool UseContextInfoAsResourceIdentifier = true; 39 40 /// <summary> 41 /// Represents a referece to the original command text. 42 /// </summary> 43 internal string CommandText; 44 } 45 46 /// <summary> 47 /// Represents Cql scanner and helper functions. 48 /// </summary> 49 internal sealed partial class CqlLexer 50 { 51 static readonly StringComparer _stringComparer = StringComparer.OrdinalIgnoreCase; 52 static Dictionary<string, short> _keywords; 53 static HashSet<string> _invalidAliasNames; 54 static HashSet<string> _invalidInlineFunctionNames; 55 static Dictionary<string, short> _operators; 56 static Dictionary<string, short> _punctuators; 57 static HashSet<string> _canonicalFunctionNames; 58 static Regex _reDateTimeValue; 59 static Regex _reTimeValue; 60 static Regex _reDateTimeOffsetValue; 61 private const string _datetimeValueRegularExpression = @"^[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}([ ])+[0-9]{1,2}:[0-9]{1,2}(:[0-9]{1,2}(\.[0-9]{1,7})?)?$"; 62 private const string _timeValueRegularExpression = @"^[0-9]{1,2}:[0-9]{1,2}(:[0-9]{1,2}(\.[0-9]{1,7})?)?$"; 63 private const string _datetimeOffsetValueRegularExpression = @"^[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}([ ])+[0-9]{1,2}:[0-9]{1,2}(:[0-9]{1,2}(\.[0-9]{1,7})?)?([ ])*[\+-][0-9]{1,2}:[0-9]{1,2}$"; 64 65 private int _iPos; 66 private int _lineNumber; 67 ParserOptions _parserOptions; 68 private string _query; 69 /// <summary> 70 /// set for DOT expressions 71 /// </summary> 72 private bool _symbolAsIdentifierState = false; 73 /// <summary> 74 /// set for AS expressions 75 /// </summary> 76 private bool _symbolAsAliasIdentifierState = false; 77 /// <summary> 78 /// set for function definitions 79 /// </summary> 80 private bool _symbolAsInlineFunctionNameState = false; 81 82 /// Defines the set of characters to be interpreted as mandatory line breaks 83 /// according to UNICODE 5.0, section 5.8 Newline Guidelines.These are 'mandatory' 84 /// line breaks. We do not handle other 'line breaking opportunities'as defined by 85 /// UNICODE 5.0 since they are intended for presentation. The mandatory line break 86 /// defines breaking opportunities that must not be ignored. For all practical purposes 87 /// the interpretation of mandatory breaks determines the end of one line and consequently 88 /// the start of the next line of query text. 89 /// NOTE that CR and CRLF is treated as a composite 'character' and was obviously and intentionaly 90 /// omitted in the character set bellow. 91 static readonly Char[] _newLineCharacters = { '\u000A' , // LF - line feed 92 '\u0085' , // NEL - next line 93 '\u000B' , // VT - vertical tab 94 '\u2028' , // LS - line separator 95 '\u2029' // PS - paragraph separator 96 }; 97 98 /// <summary> 99 /// Intializes scanner 100 /// </summary> 101 /// <param name="query">input query</param> 102 /// <param name="parserOptions">parser options</param> CqlLexer(string query, ParserOptions parserOptions)103 internal CqlLexer(string query, ParserOptions parserOptions) 104 : this() 105 { 106 Debug.Assert(query != null, "query must not be null"); 107 Debug.Assert(parserOptions != null, "parserOptions must not be null"); 108 109 _query = query; 110 _parserOptions = parserOptions; 111 yy_reader = new System.IO.StringReader(_query); 112 } 113 114 /// <summary> 115 /// Creates a new token. 116 /// </summary> 117 /// <param name="tokenId">tokenid</param> 118 /// <param name="tokenvalue">ast node</param> 119 /// <returns></returns> NewToken(short tokenId, AST.Node tokenvalue)120 static internal Token NewToken(short tokenId, AST.Node tokenvalue) 121 { 122 return new Token(tokenId, tokenvalue); 123 } 124 125 /// <summary> 126 /// Creates a new token representing a terminal. 127 /// </summary> 128 /// <param name="tokenId">tokenid</param> 129 /// <param name="termToken">lexical value</param> 130 /// <returns></returns> NewToken(short tokenId, TerminalToken termToken)131 static internal Token NewToken(short tokenId, TerminalToken termToken) 132 { 133 return new Token(tokenId, termToken); 134 } 135 136 /// <summary> 137 /// Represents a token to be used in parser stack. 138 /// </summary> 139 internal class Token 140 { 141 private short _tokenId; 142 private object _tokenValue; 143 Token(short tokenId, AST.Node tokenValue)144 internal Token(short tokenId, AST.Node tokenValue) 145 { 146 _tokenId = tokenId; 147 _tokenValue = tokenValue; 148 } 149 Token(short tokenId, TerminalToken terminal)150 internal Token(short tokenId, TerminalToken terminal) 151 { 152 _tokenId = tokenId; 153 _tokenValue = terminal; 154 } 155 156 internal short TokenId 157 { 158 get { return _tokenId; } 159 } 160 161 internal object Value 162 { 163 get { return _tokenValue; } 164 } 165 } 166 167 /// <summary> 168 /// Represents a terminal token 169 /// </summary> 170 internal class TerminalToken 171 { 172 string _token; 173 int _iPos; 174 TerminalToken(string token, int iPos)175 internal TerminalToken(string token, int iPos) 176 { 177 _token = token; 178 _iPos = iPos; 179 } 180 181 internal int IPos 182 { 183 get { return _iPos; } 184 } 185 186 internal string Token 187 { 188 get { return _token; } 189 } 190 } 191 192 internal static class yy_translate 193 { translate(char c)194 internal static char translate(char c) 195 #region TRANSLATE 196 { 197 if (Char.IsWhiteSpace(c) || Char.IsControl(c)) 198 { 199 if (IsNewLine(c)) 200 { 201 return '\n'; 202 } 203 return ' '; 204 } 205 206 if (c < 0x007F) 207 { 208 return c; 209 } 210 211 if (Char.IsLetter(c) || Char.IsSymbol(c) || Char.IsNumber(c)) 212 { 213 return 'a'; 214 } 215 216 // 217 // otherwise pass dummy 'marker' char so as we can continue 'extracting' tokens. 218 // 219 return '`'; 220 } 221 #endregion 222 } 223 224 225 /// <summary> 226 /// Returns current lexeme 227 /// </summary> 228 internal string YYText 229 { 230 get { return yytext(); } 231 } 232 233 /// <summary> 234 /// Returns current input position 235 /// </summary> 236 internal int IPos 237 { 238 get { return _iPos; } 239 } 240 241 /// <summary> 242 /// Advances input position. 243 /// </summary> 244 /// <returns>updated input position</returns> AdvanceIPos()245 internal int AdvanceIPos() 246 { 247 _iPos += YYText.Length; 248 return _iPos; 249 } 250 251 /// <summary> 252 /// returns true if given term is a eSQL keyword 253 /// </summary> 254 /// <param name="term"></param> 255 /// <returns></returns> IsReservedKeyword(string term)256 internal static bool IsReservedKeyword(string term) 257 { 258 return CqlLexer.InternalKeywordDictionary.ContainsKey(term); 259 } 260 261 /// <summary> 262 /// Map lexical symbol to a keyword or an identifier. 263 /// </summary> 264 /// <param name="symbol">lexeme</param> 265 /// <returns>Token</returns> MapIdentifierOrKeyword(string symbol)266 internal Token MapIdentifierOrKeyword(string symbol) 267 { 268 /* 269 The purpose of this method is to separate symbols into keywords and identifiers. 270 This separation then leads parser into applying different productions 271 to the same eSQL expression. For example if 'key' symbol is mapped to a keyword then 272 the expression 'KEY(x)' will satisfy 'keyExpr ::= KEY parenExpr', else if 'key' is mapped 273 to an identifier then the expression satisfies 274 'methodExpr :: = identifier L_PAREN optAllOrDistinct exprList R_PAREN optWithRelationship' 275 276 Escaped symbols are always assumed to be identifiers. 277 278 For unescaped symbols the naive implementation would check the symbol against 279 the collection of keywords and map the symbol to a keyword in case of match, 280 otherwise map to an identifier. 281 This would result in a strong restriction on unescaped identifiers - they must not 282 match keywords. 283 284 In the long run this strategy has a potential of invalidating user queries with addition 285 of new keywords to the language. This is an undesired effect and the current implementation 286 tries to mitigate it. 287 288 The general mitigation pattern is to separate the collection of keywords and the collection of 289 invalid aliases (identifiers), making invalid identifiers a subset of keywords. 290 This allows in certain language constructs using unescaped references 'common' identifiers 291 that may be defined in the query or in the model (such as Key in Customer.Key). 292 Although it adds usability for common cases, it does not solve the general problem: 293 select c.id as Key from Customers as c -- works 294 select Key from (select c.id from Customers as c) as Key -- does not work for the first occurence of Key 295 -- it is mapped to a keyword which results in 296 -- invalid syntax 297 select [Key] from (select c.id from Customers as c) as Key -- works again 298 299 The first two major places in syntax where restrictions are relaxed: 300 1. DOT expressions where a symbol before DOT or after DOT is expected to be an identifier. 301 2. AS expressions where a symbol after AS is expected to be an identifier. 302 In both places identifiers are checked against the invalid aliases collection instead of 303 the keywords collection. If an unescaped identifier appears outside of these two places 304 (like the Key in the second query above) it must be escaped or it must not match a keyword. 305 306 The third special case is related to method expressions (function calls). Normally method identifier 307 in a method expression must not match a keyword or must be escaped, except the two cases: LEFT and RIGHT. 308 LEFT and RIGHT are canonical functions and their usage in a method expression is not ambiguos with 309 LEFT OUTER JOIN and RIGHT OUT JOIN constructs. 310 Note that if method identifier is a DOT expression (multipart identifier) such as 'MyNameSpace.Key.Ref(x)' 311 then every part of the identifier follows the relaxed check described for DOT expressions (see above). 312 This would help with LEFT and RIGHT functions, 'Edm.Left(x)' would work without the third specialcase, 313 but most common use of these function is likely to be without 'Edm.' 314 315 The fourth special case is function names in query inline definition section. These names are checked 316 against both 317 - the invalid aliases collection and 318 - the collection invalid inline function names. 319 The second collection contains certain keywords that are not in the first collection and that may be followed 320 by the L_PAREN, which makes them look like method expression. The reason for this stronger restriction is to 321 disallow the following kind of ambiguos queries: 322 Function Key(c Customer) AS (Key(c)) 323 select Key(cust) from Customsers as cust 324 */ 325 326 Token token; 327 328 // Handle the escaped identifiers coming from HandleEscapedIdentifiers() 329 if (IsEscapedIdentifier(symbol, out token)) 330 { 331 Debug.Assert(token != null, "IsEscapedIdentifier must not return null token"); 332 return token; 333 } 334 335 // Handle keywords 336 if (IsKeyword(symbol, out token)) 337 { 338 Debug.Assert(token != null, "IsKeyword must not return null token"); 339 return token; 340 } 341 342 // Handle unescaped identifiers 343 return MapUnescapedIdentifier(symbol); 344 } 345 346 #region MapIdentifierOrKeyword implementation details IsEscapedIdentifier(string symbol, out Token identifierToken)347 private bool IsEscapedIdentifier(string symbol, out Token identifierToken) 348 { 349 if (symbol.Length > 1 && symbol[0] == '[') 350 { 351 if (symbol[symbol.Length - 1] == ']') 352 { 353 string name = symbol.Substring(1, symbol.Length - 2); 354 AST.Identifier id = new AST.Identifier(name, true, _query, _iPos); 355 id.ErrCtx.ErrorContextInfo = EntityRes.CtxEscapedIdentifier; 356 identifierToken = NewToken(CqlParser.ESCAPED_IDENTIFIER, id); 357 return true; 358 } 359 else 360 { 361 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidEscapedIdentifier(symbol), _iPos); 362 } 363 } 364 else 365 { 366 identifierToken = null; 367 return false; 368 } 369 } 370 IsKeyword(string symbol, out Token terminalToken)371 private bool IsKeyword(string symbol, out Token terminalToken) 372 { 373 Char lookAheadChar = GetLookAheadChar(); 374 375 if (!IsInSymbolAsIdentifierState(lookAheadChar) && 376 !IsCanonicalFunctionCall(symbol, lookAheadChar) && 377 CqlLexer.InternalKeywordDictionary.ContainsKey(symbol)) 378 { 379 ResetSymbolAsIdentifierState(true); 380 381 short keywordID = CqlLexer.InternalKeywordDictionary[symbol]; 382 383 if (keywordID == CqlParser.AS) 384 { 385 // Treat the symbol following AS keyword as an identifier. 386 // Note that this state will be turned off by a punctuator, so in case of function definitions: 387 // FUNCTION identifier(...) AS (generalExpr) 388 // the generalExpr will not be affected by the state. 389 _symbolAsAliasIdentifierState = true; 390 } 391 else if (keywordID == CqlParser.FUNCTION) 392 { 393 // Treat the symbol following FUNCTION keyword as an identifier. 394 // Inline function names in definition section have stronger restrictions than normal identifiers 395 _symbolAsInlineFunctionNameState = true; 396 } 397 398 terminalToken = NewToken(keywordID, new TerminalToken(symbol, _iPos)); 399 return true; 400 } 401 else 402 { 403 terminalToken = null; 404 return false; 405 } 406 } 407 408 /// <summary> 409 /// Returns true when current symbol looks like a caninical function name in a function call. 410 /// Method only treats canonical functions with names ovelapping eSQL keywords. 411 /// This check allows calling these canonical functions without escaping their names. 412 /// Check lookAheadChar for a left paren to see if looks like a function call, check symbol against the list of 413 /// canonical functions with names overlapping keywords. 414 /// </summary> IsCanonicalFunctionCall(string symbol, Char lookAheadChar)415 private bool IsCanonicalFunctionCall(string symbol, Char lookAheadChar) 416 { 417 return lookAheadChar == '(' && CqlLexer.InternalCanonicalFunctionNames.Contains(symbol); 418 } 419 MapUnescapedIdentifier(string symbol)420 private Token MapUnescapedIdentifier(string symbol) 421 { 422 // Validate before calling ResetSymbolAsIdentifierState(...) because it will reset _symbolAsInlineFunctionNameState 423 bool invalidIdentifier = CqlLexer.InternalInvalidAliasNames.Contains(symbol); 424 if (_symbolAsInlineFunctionNameState) 425 { 426 invalidIdentifier |= CqlLexer.InternalInvalidInlineFunctionNames.Contains(symbol); 427 } 428 429 ResetSymbolAsIdentifierState(true); 430 431 if (invalidIdentifier) 432 { 433 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidAliasName(symbol), _iPos); 434 } 435 else 436 { 437 AST.Identifier id = new AST.Identifier(symbol, false, _query, _iPos); 438 id.ErrCtx.ErrorContextInfo = EntityRes.CtxIdentifier; 439 return NewToken(CqlParser.IDENTIFIER, id); 440 } 441 } 442 443 /// <summary> 444 /// Skip insignificant whitespace to reach the first potentially significant char. 445 /// </summary> GetLookAheadChar()446 private Char GetLookAheadChar() 447 { 448 yy_mark_end(); 449 Char lookAheadChar = yy_advance(); 450 while (lookAheadChar != YY_EOF && (Char.IsWhiteSpace(lookAheadChar) || IsNewLine(lookAheadChar))) 451 { 452 lookAheadChar = yy_advance(); 453 } 454 yy_to_mark(); 455 return lookAheadChar; 456 } 457 IsInSymbolAsIdentifierState(char lookAheadChar)458 private bool IsInSymbolAsIdentifierState(char lookAheadChar) 459 { 460 return _symbolAsIdentifierState || 461 _symbolAsAliasIdentifierState || 462 _symbolAsInlineFunctionNameState || 463 lookAheadChar == '.' /*treat symbols followed by DOT as identifiers*/; 464 } 465 466 /// <summary> 467 /// Resets "symbol as identifier" state. 468 /// </summary> 469 /// <param name="significant">see function callers for more info</param> ResetSymbolAsIdentifierState(bool significant)470 private void ResetSymbolAsIdentifierState(bool significant) 471 { 472 _symbolAsIdentifierState = false; 473 474 // Do not reset the following states if going over {NONNEWLINE_SPACE} or {NEWLINE} or {LINE_COMMENT} 475 if (significant) 476 { 477 _symbolAsAliasIdentifierState = false; 478 _symbolAsInlineFunctionNameState = false; 479 } 480 } 481 #endregion 482 483 /// <summary> 484 /// Maps operator to respective token 485 /// </summary> 486 /// <param name="oper">operator lexeme</param> 487 /// <returns>Token</returns> MapOperator(string oper)488 internal Token MapOperator(string oper) 489 { 490 if (InternalOperatorDictionary.ContainsKey(oper)) 491 { 492 return NewToken(InternalOperatorDictionary[oper], new TerminalToken(oper, _iPos)); 493 } 494 else 495 { 496 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidOperatorSymbol, _iPos); 497 } 498 } 499 500 /// <summary> 501 /// Maps punctuator to respective token 502 /// </summary> 503 /// <param name="punct">punctuator</param> 504 /// <returns>Token</returns> MapPunctuator(string punct)505 internal Token MapPunctuator(string punct) 506 { 507 if (InternalPunctuatorDictionary.ContainsKey(punct)) 508 { 509 ResetSymbolAsIdentifierState(true); 510 511 if (punct.Equals(".", StringComparison.OrdinalIgnoreCase)) 512 { 513 _symbolAsIdentifierState = true; 514 } 515 516 return NewToken(InternalPunctuatorDictionary[punct], new TerminalToken(punct, _iPos)); 517 } 518 else 519 { 520 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidPunctuatorSymbol, _iPos); 521 } 522 } 523 524 /// <summary> 525 /// Maps double quoted string to a literal or an idendifier 526 /// </summary> 527 /// <param name="symbol"></param> 528 /// <returns>Token</returns> MapDoubleQuotedString(string symbol)529 internal Token MapDoubleQuotedString(string symbol) 530 { 531 // If there is a mode that makes eSQL parser to follow the SQL-92 rules regarding quotation mark 532 // delimiting identifiers then this method may decide to map to identifiers. 533 // In this case identifiers delimited by double quotation marks can be either eSQL reserved keywords 534 // or can contain characters not usually allowed by the eSQL syntax rules for identifiers, 535 // so identifiers mapped here should be treated as escaped identifiers. 536 return NewLiteralToken(symbol, AST.LiteralKind.String); 537 } 538 539 /// <summary> 540 /// Creates literal token 541 /// </summary> 542 /// <param name="literal">literal</param> 543 /// <param name="literalKind">literal kind</param> 544 /// <returns>Literal Token</returns> NewLiteralToken(string literal, AST.LiteralKind literalKind)545 internal Token NewLiteralToken(string literal, AST.LiteralKind literalKind) 546 { 547 Debug.Assert(!String.IsNullOrEmpty(literal), "literal must not be null or empty"); 548 Debug.Assert(literalKind != AST.LiteralKind.Null, "literalKind must not be LiteralKind.Null"); 549 550 string literalValue = literal; 551 switch (literalKind) 552 { 553 case AST.LiteralKind.Binary: 554 literalValue = GetLiteralSingleQuotePayload(literal); 555 if (!IsValidBinaryValue(literalValue)) 556 { 557 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidLiteralFormat("binary", literalValue), _iPos); 558 } 559 break; 560 561 case AST.LiteralKind.String: 562 if ('N' == literal[0]) 563 { 564 literalKind = AST.LiteralKind.UnicodeString; 565 } 566 break; 567 568 case AST.LiteralKind.DateTime: 569 literalValue = GetLiteralSingleQuotePayload(literal); 570 if (!IsValidDateTimeValue(literalValue)) 571 { 572 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidLiteralFormat("datetime", literalValue), _iPos); 573 } 574 break; 575 576 case AST.LiteralKind.Time: 577 literalValue = GetLiteralSingleQuotePayload(literal); 578 if (!IsValidTimeValue(literalValue)) 579 { 580 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidLiteralFormat("time", literalValue), _iPos); 581 } 582 break; 583 case AST.LiteralKind.DateTimeOffset: 584 literalValue = GetLiteralSingleQuotePayload(literal); 585 if (!IsValidDateTimeOffsetValue(literalValue)) 586 { 587 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidLiteralFormat("datetimeoffset", literalValue), _iPos); 588 } 589 break; 590 591 case AST.LiteralKind.Guid: 592 literalValue = GetLiteralSingleQuotePayload(literal); 593 if (!IsValidGuidValue(literalValue)) 594 { 595 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidLiteralFormat("guid", literalValue), _iPos); 596 } 597 break; 598 } 599 600 return NewToken(CqlParser.LITERAL, new AST.Literal(literalValue, literalKind, _query, _iPos)); 601 } 602 603 /// <summary> 604 /// Creates parameter token 605 /// </summary> 606 /// <param name="param">param</param> 607 /// <returns>Parameter Token</returns> NewParameterToken(string param)608 internal Token NewParameterToken(string param) 609 { 610 return NewToken(CqlParser.PARAMETER, new AST.QueryParameter(param, _query, _iPos)); 611 } 612 613 /// <summary> 614 /// handles escaped identifiers 615 /// ch will always be translated i.e. normalized. 616 /// </summary> HandleEscapedIdentifiers()617 internal Token HandleEscapedIdentifiers() 618 { 619 char ch = YYText[0]; 620 while (ch != YY_EOF) 621 { 622 if (ch == ']') 623 { 624 yy_mark_end(); 625 ch = yy_advance(); 626 if (ch != ']') 627 { 628 yy_to_mark(); 629 ResetSymbolAsIdentifierState(true); 630 return MapIdentifierOrKeyword(YYText.Replace("]]", "]")); 631 } 632 } 633 ch = yy_advance(); 634 } 635 Debug.Assert(ch == YY_EOF, "ch == YY_EOF"); 636 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidEscapedIdentifierUnbalanced(YYText), _iPos); 637 } 638 IsLetterOrDigitOrUnderscore(string symbol, out bool isIdentifierASCII)639 internal static bool IsLetterOrDigitOrUnderscore(string symbol, out bool isIdentifierASCII) 640 { 641 isIdentifierASCII = true; 642 for (int i = 0; i < symbol.Length; i++) 643 { 644 isIdentifierASCII = isIdentifierASCII && symbol[i] < 0x80; 645 if (!isIdentifierASCII && !IsLetter(symbol[i]) && !IsDigit(symbol[i]) && (symbol[i] != '_')) 646 { 647 return false; 648 } 649 } 650 return true; 651 } 652 IsLetter(char c)653 private static bool IsLetter(char c) 654 { 655 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); 656 } 657 IsDigit(char c)658 private static bool IsDigit(char c) 659 { 660 return (c >= '0' && c <= '9'); 661 } 662 isHexDigit(char c)663 private static bool isHexDigit(char c) 664 { 665 return (IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); 666 } 667 668 /// <summary> 669 /// Returns true if given char is a new line character defined by 670 /// UNICODE 5.0, section 5.8 Newline Guidelines. 671 /// These are 'mandatory' line breaks. NOTE that CRLF is treated as a 672 /// composite 'character' and was intentionaly omitted in the character set bellow. 673 /// </summary> 674 /// <param name="c"></param> 675 /// <returns></returns> IsNewLine(Char c)676 internal static bool IsNewLine(Char c) 677 { 678 for (int i = 0; i < _newLineCharacters.Length; i++) 679 { 680 if (c == _newLineCharacters[i]) 681 { 682 return true; 683 } 684 } 685 return false; 686 } 687 688 /// <summary> 689 /// extracts single quoted literal 'payload'. literal MUST BE normalized. 690 /// </summary> 691 /// <param name="literal"></param> 692 /// <returns></returns> GetLiteralSingleQuotePayload(string literal)693 private static string GetLiteralSingleQuotePayload(string literal) 694 { 695 Debug.Assert(-1 != literal.IndexOf('\''), "quoted literal value must have single quotes"); 696 Debug.Assert(-1 != literal.LastIndexOf('\''), "quoted literal value must have single quotes"); 697 Debug.Assert(literal.IndexOf('\'') != literal.LastIndexOf('\''), "quoted literal value must have 2 single quotes"); 698 Debug.Assert(literal.Split(new char[] { '\'' }).Length == 3, "quoted literal value must have 2 single quotes"); 699 700 // NOTE: this is not a precondition validation. This validation is for security purposes based on the 701 // paranoid assumption that all input is evil. we should not see this exception under normal 702 // conditions. 703 if ((literal.Split(new char[] { '\'' }).Length != 3) || (-1 == literal.IndexOf('\'')) || (-1 == literal.LastIndexOf('\''))) 704 { 705 throw EntityUtil.EntitySqlError(System.Data.Entity.Strings.MalformedSingleQuotePayload); 706 } 707 708 int startIndex = literal.IndexOf('\''); 709 710 string literalPayload = literal.Substring(startIndex + 1, literal.Length - (startIndex + 2)); 711 712 Debug.Assert(literalPayload.IndexOf('\'') == -1, "quoted literal payload must not have single quotes"); 713 Debug.Assert(literalPayload.LastIndexOf('\'') == -1, "quoted literal payload must not have single quotes"); 714 715 // NOTE: this is not a precondition validation. This validation is for security purposes based on the 716 // paranoid assumption that all input is evil. we should not see this exception under normal 717 // conditions. 718 if (literalPayload.Split(new char[] { '\'' }).Length != 1) 719 { 720 throw EntityUtil.EntitySqlError(System.Data.Entity.Strings.MalformedSingleQuotePayload); 721 } 722 723 return literalPayload; 724 } 725 726 /// <summary> 727 /// returns true if guid literal value format is valid 728 /// </summary> 729 /// <param name="guidValue"></param> 730 /// <returns></returns> IsValidGuidValue(string guidValue)731 private static bool IsValidGuidValue(string guidValue) 732 { 733 int startIndex = 0; 734 int endIndex = guidValue.Length - 1; 735 if ((endIndex - startIndex) + 1 != 36) 736 { 737 return false; 738 } 739 740 int i = 0; 741 bool bValid = true; 742 while (bValid && i < 36) 743 { 744 if ((i == 8) || (i == 13) || (i == 18) || (i == 23)) 745 { 746 bValid = (guidValue[startIndex + i] == '-'); 747 } 748 else 749 { 750 bValid = isHexDigit(guidValue[startIndex + i]); 751 } 752 i++; 753 } 754 return bValid; 755 } 756 757 /// <summary> 758 /// returns true if binary literal value format is valid 759 /// </summary> 760 /// <param name="binaryValue"></param> 761 /// <returns></returns> IsValidBinaryValue(string binaryValue)762 private static bool IsValidBinaryValue(string binaryValue) 763 { 764 Debug.Assert(null != binaryValue, "binaryValue must not be null"); 765 766 if (String.IsNullOrEmpty(binaryValue)) 767 { 768 return true; 769 } 770 771 int i = 0; 772 bool bValid = binaryValue.Length > 0; 773 while (bValid && i < binaryValue.Length) 774 { 775 bValid = isHexDigit(binaryValue[i++]); 776 } 777 778 return bValid; 779 } 780 781 /// <summary> 782 /// Returns true if datetime literal value format is valid 783 /// allowed format is: dddd-d?d-d?d{space}+d?d:d?d(:d?d(.d?d?d)?)? 784 /// where d is any decimal digit. 785 /// </summary> 786 /// <param name="datetimeValue"></param> 787 /// <returns></returns> IsValidDateTimeValue(string datetimeValue)788 private static bool IsValidDateTimeValue(string datetimeValue) 789 { 790 if (null == _reDateTimeValue) 791 { 792 _reDateTimeValue = new Regex(_datetimeValueRegularExpression, RegexOptions.Singleline | RegexOptions.CultureInvariant); 793 } 794 return _reDateTimeValue.IsMatch(datetimeValue); 795 } 796 797 /// <summary> 798 /// Returns true if time literal value format is valid 799 /// allowed format is: +d?d:d?d(:d?d(.d?d?d)?)? 800 /// where d is any decimal digit. 801 /// </summary> 802 /// <param name="timeValue"></param> 803 /// <returns></returns> IsValidTimeValue(string timeValue)804 private static bool IsValidTimeValue(string timeValue) 805 { 806 if (null == _reTimeValue) 807 { 808 _reTimeValue = new Regex(_timeValueRegularExpression, RegexOptions.Singleline | RegexOptions.CultureInvariant); 809 } 810 return _reTimeValue.IsMatch(timeValue); 811 } 812 813 /// <summary> 814 /// Returns true if datetimeoffset literal value format is valid 815 /// allowed format is: dddd-d?d-d?d{space}+d?d:d?d(:d?d(.d?d?d)?)?([+-]d?d:d?d)? 816 /// where d is any decimal digit. 817 /// </summary> 818 /// <param name="datetimeOffsetValue"></param> 819 /// <returns></returns> IsValidDateTimeOffsetValue(string datetimeOffsetValue)820 private static bool IsValidDateTimeOffsetValue(string datetimeOffsetValue) 821 { 822 if (null == _reDateTimeOffsetValue) 823 { 824 _reDateTimeOffsetValue = new Regex(_datetimeOffsetValueRegularExpression, RegexOptions.Singleline | RegexOptions.CultureInvariant); 825 } 826 return _reDateTimeOffsetValue.IsMatch(datetimeOffsetValue); 827 } 828 829 private static Dictionary<string, short> InternalKeywordDictionary 830 { 831 get 832 { 833 if (null == _keywords) 834 { 835 #region Initializes eSQL keywords 836 Dictionary<string, short> keywords = new Dictionary<string, short>(60, _stringComparer); 837 keywords.Add("all", CqlParser.ALL); 838 keywords.Add("and", CqlParser.AND); 839 keywords.Add("anyelement", CqlParser.ANYELEMENT); 840 keywords.Add("apply", CqlParser.APPLY); 841 keywords.Add("as", CqlParser.AS); 842 keywords.Add("asc", CqlParser.ASC); 843 keywords.Add("between", CqlParser.BETWEEN); 844 keywords.Add("by", CqlParser.BY); 845 keywords.Add("case", CqlParser.CASE); 846 keywords.Add("cast", CqlParser.CAST); 847 keywords.Add("collate", CqlParser.COLLATE); 848 keywords.Add("collection", CqlParser.COLLECTION); 849 keywords.Add("createref", CqlParser.CREATEREF); 850 keywords.Add("cross", CqlParser.CROSS); 851 keywords.Add("deref", CqlParser.DEREF); 852 keywords.Add("desc", CqlParser.DESC); 853 keywords.Add("distinct", CqlParser.DISTINCT); 854 keywords.Add("element", CqlParser.ELEMENT); 855 keywords.Add("else", CqlParser.ELSE); 856 keywords.Add("end", CqlParser.END); 857 keywords.Add("escape", CqlParser.ESCAPE); 858 keywords.Add("except", CqlParser.EXCEPT); 859 keywords.Add("exists", CqlParser.EXISTS); 860 keywords.Add("false", CqlParser.LITERAL); 861 keywords.Add("flatten", CqlParser.FLATTEN); 862 keywords.Add("from", CqlParser.FROM); 863 keywords.Add("full", CqlParser.FULL); 864 keywords.Add("function", CqlParser.FUNCTION); 865 keywords.Add("group", CqlParser.GROUP); 866 keywords.Add("grouppartition", CqlParser.GROUPPARTITION); 867 keywords.Add("having", CqlParser.HAVING); 868 keywords.Add("in", CqlParser.IN); 869 keywords.Add("inner", CqlParser.INNER); 870 keywords.Add("intersect", CqlParser.INTERSECT); 871 keywords.Add("is", CqlParser.IS); 872 keywords.Add("join", CqlParser.JOIN); 873 keywords.Add("key", CqlParser.KEY); 874 keywords.Add("left", CqlParser.LEFT); 875 keywords.Add("like", CqlParser.LIKE); 876 keywords.Add("limit", CqlParser.LIMIT); 877 keywords.Add("multiset", CqlParser.MULTISET); 878 keywords.Add("navigate", CqlParser.NAVIGATE); 879 keywords.Add("not", CqlParser.NOT); 880 keywords.Add("null", CqlParser.NULL); 881 keywords.Add("of", CqlParser.OF); 882 keywords.Add("oftype", CqlParser.OFTYPE); 883 keywords.Add("on", CqlParser.ON); 884 keywords.Add("only", CqlParser.ONLY); 885 keywords.Add("or", CqlParser.OR); 886 keywords.Add("order", CqlParser.ORDER); 887 keywords.Add("outer", CqlParser.OUTER); 888 keywords.Add("overlaps", CqlParser.OVERLAPS); 889 keywords.Add("ref", CqlParser.REF); 890 keywords.Add("relationship", CqlParser.RELATIONSHIP); 891 keywords.Add("right", CqlParser.RIGHT); 892 keywords.Add("row", CqlParser.ROW); 893 keywords.Add("select", CqlParser.SELECT); 894 keywords.Add("set", CqlParser.SET); 895 keywords.Add("skip", CqlParser.SKIP); 896 keywords.Add("then", CqlParser.THEN); 897 keywords.Add("top", CqlParser.TOP); 898 keywords.Add("treat", CqlParser.TREAT); 899 keywords.Add("true", CqlParser.LITERAL); 900 keywords.Add("union", CqlParser.UNION); 901 keywords.Add("using", CqlParser.USING); 902 keywords.Add("value", CqlParser.VALUE); 903 keywords.Add("when", CqlParser.WHEN); 904 keywords.Add("where", CqlParser.WHERE); 905 keywords.Add("with", CqlParser.WITH); 906 _keywords = keywords; 907 #endregion 908 } 909 return _keywords; 910 } 911 912 } 913 914 private static HashSet<string> InternalInvalidAliasNames 915 { 916 get 917 { 918 if (null == _invalidAliasNames) 919 { 920 #region Initializes invalid aliases 921 HashSet<string> invalidAliasName = new HashSet<string>(_stringComparer); 922 invalidAliasName.Add("all"); 923 invalidAliasName.Add("and"); 924 invalidAliasName.Add("apply"); 925 invalidAliasName.Add("as"); 926 invalidAliasName.Add("asc"); 927 invalidAliasName.Add("between"); 928 invalidAliasName.Add("by"); 929 invalidAliasName.Add("case"); 930 invalidAliasName.Add("cast"); 931 invalidAliasName.Add("collate"); 932 invalidAliasName.Add("createref"); 933 invalidAliasName.Add("deref"); 934 invalidAliasName.Add("desc"); 935 invalidAliasName.Add("distinct"); 936 invalidAliasName.Add("element"); 937 invalidAliasName.Add("else"); 938 invalidAliasName.Add("end"); 939 invalidAliasName.Add("escape"); 940 invalidAliasName.Add("except"); 941 invalidAliasName.Add("exists"); 942 invalidAliasName.Add("flatten"); 943 invalidAliasName.Add("from"); 944 invalidAliasName.Add("group"); 945 invalidAliasName.Add("having"); 946 invalidAliasName.Add("in"); 947 invalidAliasName.Add("inner"); 948 invalidAliasName.Add("intersect"); 949 invalidAliasName.Add("is"); 950 invalidAliasName.Add("join"); 951 invalidAliasName.Add("like"); 952 invalidAliasName.Add("multiset"); 953 invalidAliasName.Add("navigate"); 954 invalidAliasName.Add("not"); 955 invalidAliasName.Add("null"); 956 invalidAliasName.Add("of"); 957 invalidAliasName.Add("oftype"); 958 invalidAliasName.Add("on"); 959 invalidAliasName.Add("only"); 960 invalidAliasName.Add("or"); 961 invalidAliasName.Add("overlaps"); 962 invalidAliasName.Add("ref"); 963 invalidAliasName.Add("relationship"); 964 invalidAliasName.Add("select"); 965 invalidAliasName.Add("set"); 966 invalidAliasName.Add("then"); 967 invalidAliasName.Add("treat"); 968 invalidAliasName.Add("union"); 969 invalidAliasName.Add("using"); 970 invalidAliasName.Add("when"); 971 invalidAliasName.Add("where"); 972 invalidAliasName.Add("with"); 973 _invalidAliasNames = invalidAliasName; 974 #endregion 975 } 976 return _invalidAliasNames; 977 } 978 } 979 980 private static HashSet<string> InternalInvalidInlineFunctionNames 981 { 982 get 983 { 984 if (null == _invalidInlineFunctionNames) 985 { 986 #region Initializes invalid inline function names 987 HashSet<string> invalidInlineFunctionNames = new HashSet<string>(_stringComparer); 988 invalidInlineFunctionNames.Add("anyelement"); 989 invalidInlineFunctionNames.Add("element"); 990 invalidInlineFunctionNames.Add("function"); 991 invalidInlineFunctionNames.Add("grouppartition"); 992 invalidInlineFunctionNames.Add("key"); 993 invalidInlineFunctionNames.Add("ref"); 994 invalidInlineFunctionNames.Add("row"); 995 invalidInlineFunctionNames.Add("skip"); 996 invalidInlineFunctionNames.Add("top"); 997 invalidInlineFunctionNames.Add("value"); 998 _invalidInlineFunctionNames = invalidInlineFunctionNames; 999 #endregion 1000 } 1001 return _invalidInlineFunctionNames; 1002 } 1003 } 1004 1005 private static Dictionary<string, short> InternalOperatorDictionary 1006 { 1007 get 1008 { 1009 if (null == _operators) 1010 { 1011 #region Initializes operator dictionary 1012 Dictionary<string, short> operators = new Dictionary<string, short>(16, _stringComparer); 1013 operators.Add("==", CqlParser.OP_EQ); 1014 operators.Add("!=", CqlParser.OP_NEQ); 1015 operators.Add("<>", CqlParser.OP_NEQ); 1016 operators.Add("<", CqlParser.OP_LT); 1017 operators.Add("<=", CqlParser.OP_LE); 1018 operators.Add(">", CqlParser.OP_GT); 1019 operators.Add(">=", CqlParser.OP_GE); 1020 operators.Add("&&", CqlParser.AND); 1021 operators.Add("||", CqlParser.OR); 1022 operators.Add("!", CqlParser.NOT); 1023 operators.Add("+", CqlParser.PLUS); 1024 operators.Add("-", CqlParser.MINUS); 1025 operators.Add("*", CqlParser.STAR); 1026 operators.Add("/", CqlParser.FSLASH); 1027 operators.Add("%", CqlParser.PERCENT); 1028 _operators = operators; 1029 #endregion 1030 } 1031 return _operators; 1032 } 1033 } 1034 1035 private static Dictionary<string, short> InternalPunctuatorDictionary 1036 { 1037 get 1038 { 1039 if (null == _punctuators) 1040 { 1041 #region Initializes punctuators dictionary 1042 Dictionary<string, short> punctuators = new Dictionary<string, short>(16, _stringComparer); 1043 punctuators.Add(",", CqlParser.COMMA); 1044 punctuators.Add(":", CqlParser.COLON); 1045 punctuators.Add(".", CqlParser.DOT); 1046 punctuators.Add("?", CqlParser.QMARK); 1047 punctuators.Add("(", CqlParser.L_PAREN); 1048 punctuators.Add(")", CqlParser.R_PAREN); 1049 punctuators.Add("[", CqlParser.L_BRACE); 1050 punctuators.Add("]", CqlParser.R_BRACE); 1051 punctuators.Add("{", CqlParser.L_CURLY); 1052 punctuators.Add("}", CqlParser.R_CURLY); 1053 punctuators.Add(";", CqlParser.SCOLON); 1054 punctuators.Add("=", CqlParser.EQUAL); 1055 _punctuators = punctuators; 1056 #endregion 1057 } 1058 return _punctuators; 1059 } 1060 } 1061 1062 private static HashSet<string> InternalCanonicalFunctionNames 1063 { 1064 get 1065 { 1066 if (null == _canonicalFunctionNames) 1067 { 1068 HashSet<string> canonicalFunctionNames = new HashSet<string>(_stringComparer); 1069 canonicalFunctionNames.Add("left"); 1070 canonicalFunctionNames.Add("right"); 1071 _canonicalFunctionNames = canonicalFunctionNames; 1072 } 1073 return _canonicalFunctionNames; 1074 } 1075 } 1076 } 1077 } 1078