1 //---------------------------------------------------------------------
2 // <copyright file="CqlLexerHelper.cs" company="Microsoft">
3 //      Copyright (c) Microsoft Corporation.  All rights reserved.
4 // </copyright>
5 //
6 // @owner  Microsoft
7 // @backupOwner Microsoft
8 //---------------------------------------------------------------------
9 
10 namespace System.Data.Common.EntitySql
11 {
12     using System;
13     using System.Globalization;
14     using System.Collections.Generic;
15     using System.Text.RegularExpressions;
16     using System.Diagnostics;
17     using System.Text;
18     using System.Data.Entity;
19 
20     /// <summary>
21     /// Represents eSQL error context.
22     /// </summary>
23     internal class ErrorContext
24     {
25         /// <summary>
26         /// Represents the position of the error in the input stream.
27         /// </summary>
28         internal int InputPosition = -1;
29 
30         /// <summary>
31         /// Represents the additional/contextual information related to the error position/cause.
32         /// </summary>
33         internal string ErrorContextInfo;
34 
35         /// <summary>
36         /// Defines how ErrorContextInfo should be interpreted.
37         /// </summary>
38         internal bool UseContextInfoAsResourceIdentifier = true;
39 
40         /// <summary>
41         /// Represents a referece to the original command text.
42         /// </summary>
43         internal string CommandText;
44     }
45 
46     /// <summary>
47     /// Represents Cql scanner and helper functions.
48     /// </summary>
49     internal sealed partial class CqlLexer
50     {
51         static readonly StringComparer _stringComparer = StringComparer.OrdinalIgnoreCase;
52         static Dictionary<string, short> _keywords;
53         static HashSet<string> _invalidAliasNames;
54         static HashSet<string> _invalidInlineFunctionNames;
55         static Dictionary<string, short> _operators;
56         static Dictionary<string, short> _punctuators;
57         static HashSet<string> _canonicalFunctionNames;
58         static Regex _reDateTimeValue;
59         static Regex _reTimeValue;
60         static Regex _reDateTimeOffsetValue;
61         private const string _datetimeValueRegularExpression = @"^[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}([ ])+[0-9]{1,2}:[0-9]{1,2}(:[0-9]{1,2}(\.[0-9]{1,7})?)?$";
62         private const string _timeValueRegularExpression = @"^[0-9]{1,2}:[0-9]{1,2}(:[0-9]{1,2}(\.[0-9]{1,7})?)?$";
63         private const string _datetimeOffsetValueRegularExpression = @"^[0-9]{4}-[0-9]{1,2}-[0-9]{1,2}([ ])+[0-9]{1,2}:[0-9]{1,2}(:[0-9]{1,2}(\.[0-9]{1,7})?)?([ ])*[\+-][0-9]{1,2}:[0-9]{1,2}$";
64 
65         private int _iPos;
66         private int _lineNumber;
67         ParserOptions _parserOptions;
68         private string _query;
69         /// <summary>
70         /// set for DOT expressions
71         /// </summary>
72         private bool _symbolAsIdentifierState = false;
73         /// <summary>
74         /// set for AS expressions
75         /// </summary>
76         private bool _symbolAsAliasIdentifierState = false;
77         /// <summary>
78         /// set for function definitions
79         /// </summary>
80         private bool _symbolAsInlineFunctionNameState = false;
81 
82         /// Defines the set of characters to be interpreted as mandatory line breaks
83         /// according to UNICODE 5.0, section 5.8 Newline Guidelines.These are 'mandatory'
84         /// line breaks. We do not handle other 'line breaking opportunities'as defined by
85         /// UNICODE 5.0 since they are intended for presentation. The mandatory line break
86         /// defines breaking opportunities that must not be ignored. For all practical purposes
87         /// the interpretation of mandatory breaks determines the end of one line and consequently
88         /// the start of the next line of query text.
89         /// NOTE that CR and CRLF is treated as a composite 'character' and was obviously and intentionaly
90         /// omitted in the character set bellow.
91         static readonly Char[] _newLineCharacters = { '\u000A' , // LF - line feed
92                                                       '\u0085' , // NEL - next line
93                                                       '\u000B' , // VT - vertical tab
94                                                       '\u2028' , // LS - line separator
95                                                       '\u2029'   // PS - paragraph separator
96                                                     };
97 
98         /// <summary>
99         /// Intializes scanner
100         /// </summary>
101         /// <param name="query">input query</param>
102         /// <param name="parserOptions">parser options</param>
CqlLexer(string query, ParserOptions parserOptions)103         internal CqlLexer(string query, ParserOptions parserOptions)
104             : this()
105         {
106             Debug.Assert(query != null, "query must not be null");
107             Debug.Assert(parserOptions != null, "parserOptions must not be null");
108 
109             _query = query;
110             _parserOptions = parserOptions;
111             yy_reader = new System.IO.StringReader(_query);
112         }
113 
114         /// <summary>
115         /// Creates a new token.
116         /// </summary>
117         /// <param name="tokenId">tokenid</param>
118         /// <param name="tokenvalue">ast node</param>
119         /// <returns></returns>
NewToken(short tokenId, AST.Node tokenvalue)120         static internal Token NewToken(short tokenId, AST.Node tokenvalue)
121         {
122             return new Token(tokenId, tokenvalue);
123         }
124 
125         /// <summary>
126         /// Creates a new token representing a terminal.
127         /// </summary>
128         /// <param name="tokenId">tokenid</param>
129         /// <param name="termToken">lexical value</param>
130         /// <returns></returns>
NewToken(short tokenId, TerminalToken termToken)131         static internal Token NewToken(short tokenId, TerminalToken termToken)
132         {
133             return new Token(tokenId, termToken);
134         }
135 
136         /// <summary>
137         /// Represents a token to be used in parser stack.
138         /// </summary>
139         internal class Token
140         {
141             private short _tokenId;
142             private object _tokenValue;
143 
Token(short tokenId, AST.Node tokenValue)144             internal Token(short tokenId, AST.Node tokenValue)
145             {
146                 _tokenId = tokenId;
147                 _tokenValue = tokenValue;
148             }
149 
Token(short tokenId, TerminalToken terminal)150             internal Token(short tokenId, TerminalToken terminal)
151             {
152                 _tokenId = tokenId;
153                 _tokenValue = terminal;
154             }
155 
156             internal short TokenId
157             {
158                 get { return _tokenId; }
159             }
160 
161             internal object Value
162             {
163                 get { return _tokenValue; }
164             }
165         }
166 
167         /// <summary>
168         /// Represents a terminal token
169         /// </summary>
170         internal class TerminalToken
171         {
172             string _token;
173             int _iPos;
174 
TerminalToken(string token, int iPos)175             internal TerminalToken(string token, int iPos)
176             {
177                 _token = token;
178                 _iPos = iPos;
179             }
180 
181             internal int IPos
182             {
183                 get { return _iPos; }
184             }
185 
186             internal string Token
187             {
188                 get { return _token; }
189             }
190         }
191 
192         internal static class yy_translate
193         {
translate(char c)194             internal static char translate(char c)
195             #region TRANSLATE
196             {
197                 if (Char.IsWhiteSpace(c) || Char.IsControl(c))
198                 {
199                     if (IsNewLine(c))
200                     {
201                         return '\n';
202                     }
203                     return ' ';
204                 }
205 
206                 if (c < 0x007F)
207                 {
208                     return c;
209                 }
210 
211                 if (Char.IsLetter(c) || Char.IsSymbol(c) || Char.IsNumber(c))
212                 {
213                     return 'a';
214                 }
215 
216                 //
217                 // otherwise pass dummy 'marker' char so as we can continue 'extracting' tokens.
218                 //
219                 return '`';
220             }
221             #endregion
222         }
223 
224 
225         /// <summary>
226         /// Returns current lexeme
227         /// </summary>
228         internal string YYText
229         {
230             get { return yytext(); }
231         }
232 
233         /// <summary>
234         /// Returns current input position
235         /// </summary>
236         internal int IPos
237         {
238             get { return _iPos; }
239         }
240 
241         /// <summary>
242         /// Advances input position.
243         /// </summary>
244         /// <returns>updated input position</returns>
AdvanceIPos()245         internal int AdvanceIPos()
246         {
247             _iPos += YYText.Length;
248             return _iPos;
249         }
250 
251         /// <summary>
252         /// returns true if given term is a eSQL keyword
253         /// </summary>
254         /// <param name="term"></param>
255         /// <returns></returns>
IsReservedKeyword(string term)256         internal static bool IsReservedKeyword(string term)
257         {
258             return CqlLexer.InternalKeywordDictionary.ContainsKey(term);
259         }
260 
261         /// <summary>
262         /// Map lexical symbol to a keyword or an identifier.
263         /// </summary>
264         /// <param name="symbol">lexeme</param>
265         /// <returns>Token</returns>
MapIdentifierOrKeyword(string symbol)266         internal Token MapIdentifierOrKeyword(string symbol)
267         {
268             /*
269             The purpose of this method is to separate symbols into keywords and identifiers.
270             This separation then leads parser into applying different productions
271             to the same eSQL expression. For example if 'key' symbol is mapped to a keyword then
272             the expression 'KEY(x)' will satisfy 'keyExpr ::= KEY parenExpr', else if 'key' is mapped
273             to an identifier then the expression satisfies
274             'methodExpr :: = identifier L_PAREN optAllOrDistinct exprList R_PAREN optWithRelationship'
275 
276             Escaped symbols are always assumed to be identifiers.
277 
278             For unescaped symbols the naive implementation would check the symbol against
279             the collection of keywords and map the symbol to a keyword in case of match,
280             otherwise map to an identifier.
281             This would result in a strong restriction on unescaped identifiers - they must not
282             match keywords.
283 
284             In the long run this strategy has a potential of invalidating user queries with addition
285             of new keywords to the language. This is an undesired effect and the current implementation
286             tries to mitigate it.
287 
288             The general mitigation pattern is to separate the collection of keywords and the collection of
289             invalid aliases (identifiers), making invalid identifiers a subset of keywords.
290             This allows in certain language constructs using unescaped references 'common' identifiers
291             that may be defined in the query or in the model (such as Key in Customer.Key).
292             Although it adds usability for common cases, it does not solve the general problem:
293             select c.id as Key from Customers as c -- works
294             select Key from (select c.id from Customers as c) as Key -- does not work for the first occurence of Key
295                                                                      -- it is mapped to a keyword which results in
296                                                                      -- invalid syntax
297             select [Key] from (select c.id from Customers as c) as Key -- works again
298 
299             The first two major places in syntax where restrictions are relaxed:
300             1. DOT expressions where a symbol before DOT or after DOT is expected to be an identifier.
301             2. AS expressions where a symbol after AS is expected to be an identifier.
302             In both places identifiers are checked against the invalid aliases collection instead of
303             the keywords collection. If an unescaped identifier appears outside of these two places
304             (like the Key in the second query above) it must be escaped or it must not match a keyword.
305 
306             The third special case is related to method expressions (function calls). Normally method identifier
307             in a method expression must not match a keyword or must be escaped, except the two cases: LEFT and RIGHT.
308             LEFT and RIGHT are canonical functions and their usage in a method expression is not ambiguos with
309             LEFT OUTER JOIN and RIGHT OUT JOIN constructs.
310             Note that if method identifier is a DOT expression (multipart identifier) such as 'MyNameSpace.Key.Ref(x)'
311             then every part of the identifier follows the relaxed check described for DOT expressions (see above).
312             This would help with LEFT and RIGHT functions, 'Edm.Left(x)' would work without the third specialcase,
313             but most common use of these function is likely to be without 'Edm.'
314 
315             The fourth special case is function names in query inline definition section. These names are checked
316             against both
317             - the invalid aliases collection and
318             - the collection invalid inline function names.
319             The second collection contains certain keywords that are not in the first collection and that may be followed
320             by the L_PAREN, which makes them look like method expression. The reason for this stronger restriction is to
321             disallow the following kind of ambiguos queries:
322             Function Key(c Customer) AS (Key(c))
323             select Key(cust) from Customsers as cust
324             */
325 
326             Token token;
327 
328             // Handle the escaped identifiers coming from HandleEscapedIdentifiers()
329             if (IsEscapedIdentifier(symbol, out token))
330             {
331                 Debug.Assert(token != null, "IsEscapedIdentifier must not return null token");
332                 return token;
333             }
334 
335             // Handle keywords
336             if (IsKeyword(symbol, out token))
337             {
338                 Debug.Assert(token != null, "IsKeyword must not return null token");
339                 return token;
340             }
341 
342             // Handle unescaped identifiers
343             return MapUnescapedIdentifier(symbol);
344         }
345 
346         #region MapIdentifierOrKeyword implementation details
IsEscapedIdentifier(string symbol, out Token identifierToken)347         private bool IsEscapedIdentifier(string symbol, out Token identifierToken)
348         {
349             if (symbol.Length > 1 && symbol[0] == '[')
350             {
351                 if (symbol[symbol.Length - 1] == ']')
352                 {
353                     string name = symbol.Substring(1, symbol.Length - 2);
354                     AST.Identifier id = new AST.Identifier(name, true, _query, _iPos);
355                     id.ErrCtx.ErrorContextInfo = EntityRes.CtxEscapedIdentifier;
356                     identifierToken = NewToken(CqlParser.ESCAPED_IDENTIFIER, id);
357                     return true;
358                 }
359                 else
360                 {
361                     throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidEscapedIdentifier(symbol), _iPos);
362                 }
363             }
364             else
365             {
366                 identifierToken = null;
367                 return false;
368             }
369         }
370 
IsKeyword(string symbol, out Token terminalToken)371         private bool IsKeyword(string symbol, out Token terminalToken)
372         {
373             Char lookAheadChar = GetLookAheadChar();
374 
375             if (!IsInSymbolAsIdentifierState(lookAheadChar) &&
376                 !IsCanonicalFunctionCall(symbol, lookAheadChar) &&
377                 CqlLexer.InternalKeywordDictionary.ContainsKey(symbol))
378             {
379                 ResetSymbolAsIdentifierState(true);
380 
381                 short keywordID = CqlLexer.InternalKeywordDictionary[symbol];
382 
383                 if (keywordID == CqlParser.AS)
384                 {
385                     // Treat the symbol following AS keyword as an identifier.
386                     // Note that this state will be turned off by a punctuator, so in case of function definitions:
387                     // FUNCTION identifier(...) AS (generalExpr)
388                     // the generalExpr will not be affected by the state.
389                     _symbolAsAliasIdentifierState = true;
390                 }
391                 else if (keywordID == CqlParser.FUNCTION)
392                 {
393                     // Treat the symbol following FUNCTION keyword as an identifier.
394                     // Inline function names in definition section have stronger restrictions than normal identifiers
395                     _symbolAsInlineFunctionNameState = true;
396                 }
397 
398                 terminalToken = NewToken(keywordID, new TerminalToken(symbol, _iPos));
399                 return true;
400             }
401             else
402             {
403                 terminalToken = null;
404                 return false;
405             }
406         }
407 
408         /// <summary>
409         /// Returns true when current symbol looks like a caninical function name in a function call.
410         /// Method only treats canonical functions with names ovelapping eSQL keywords.
411         /// This check allows calling these canonical functions without escaping their names.
412         /// Check lookAheadChar for a left paren to see if looks like a function call, check symbol against the list of
413         /// canonical functions with names overlapping keywords.
414         /// </summary>
IsCanonicalFunctionCall(string symbol, Char lookAheadChar)415         private bool IsCanonicalFunctionCall(string symbol, Char lookAheadChar)
416         {
417             return lookAheadChar == '(' && CqlLexer.InternalCanonicalFunctionNames.Contains(symbol);
418         }
419 
MapUnescapedIdentifier(string symbol)420         private Token MapUnescapedIdentifier(string symbol)
421         {
422             // Validate before calling ResetSymbolAsIdentifierState(...) because it will reset _symbolAsInlineFunctionNameState
423             bool invalidIdentifier = CqlLexer.InternalInvalidAliasNames.Contains(symbol);
424             if (_symbolAsInlineFunctionNameState)
425             {
426                 invalidIdentifier |= CqlLexer.InternalInvalidInlineFunctionNames.Contains(symbol);
427             }
428 
429             ResetSymbolAsIdentifierState(true);
430 
431             if (invalidIdentifier)
432             {
433                 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidAliasName(symbol), _iPos);
434             }
435             else
436             {
437                 AST.Identifier id = new AST.Identifier(symbol, false, _query, _iPos);
438                 id.ErrCtx.ErrorContextInfo = EntityRes.CtxIdentifier;
439                 return NewToken(CqlParser.IDENTIFIER, id);
440             }
441         }
442 
443         /// <summary>
444         /// Skip insignificant whitespace to reach the first potentially significant char.
445         /// </summary>
GetLookAheadChar()446         private Char GetLookAheadChar()
447         {
448             yy_mark_end();
449             Char lookAheadChar = yy_advance();
450             while (lookAheadChar != YY_EOF && (Char.IsWhiteSpace(lookAheadChar) || IsNewLine(lookAheadChar)))
451             {
452                 lookAheadChar = yy_advance();
453             }
454             yy_to_mark();
455             return lookAheadChar;
456         }
457 
IsInSymbolAsIdentifierState(char lookAheadChar)458         private bool IsInSymbolAsIdentifierState(char lookAheadChar)
459         {
460             return _symbolAsIdentifierState ||
461                    _symbolAsAliasIdentifierState ||
462                    _symbolAsInlineFunctionNameState ||
463                    lookAheadChar == '.' /*treat symbols followed by DOT as identifiers*/;
464         }
465 
466         /// <summary>
467         /// Resets "symbol as identifier" state.
468         /// </summary>
469         /// <param name="significant">see function callers for more info</param>
ResetSymbolAsIdentifierState(bool significant)470         private void ResetSymbolAsIdentifierState(bool significant)
471         {
472             _symbolAsIdentifierState = false;
473 
474             // Do not reset the following states if going over {NONNEWLINE_SPACE} or {NEWLINE} or {LINE_COMMENT}
475             if (significant)
476             {
477                 _symbolAsAliasIdentifierState = false;
478                 _symbolAsInlineFunctionNameState = false;
479             }
480         }
481         #endregion
482 
483         /// <summary>
484         /// Maps operator to respective token
485         /// </summary>
486         /// <param name="oper">operator lexeme</param>
487         /// <returns>Token</returns>
MapOperator(string oper)488         internal Token MapOperator(string oper)
489         {
490             if (InternalOperatorDictionary.ContainsKey(oper))
491             {
492                 return NewToken(InternalOperatorDictionary[oper], new TerminalToken(oper, _iPos));
493             }
494             else
495             {
496                 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidOperatorSymbol, _iPos);
497             }
498         }
499 
500         /// <summary>
501         /// Maps punctuator to respective token
502         /// </summary>
503         /// <param name="punct">punctuator</param>
504         /// <returns>Token</returns>
MapPunctuator(string punct)505         internal Token MapPunctuator(string punct)
506         {
507             if (InternalPunctuatorDictionary.ContainsKey(punct))
508             {
509                 ResetSymbolAsIdentifierState(true);
510 
511                 if (punct.Equals(".", StringComparison.OrdinalIgnoreCase))
512                 {
513                     _symbolAsIdentifierState = true;
514                 }
515 
516                 return NewToken(InternalPunctuatorDictionary[punct], new TerminalToken(punct, _iPos));
517             }
518             else
519             {
520                 throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidPunctuatorSymbol, _iPos);
521             }
522         }
523 
524         /// <summary>
525         /// Maps double quoted string to a literal or an idendifier
526         /// </summary>
527         /// <param name="symbol"></param>
528         /// <returns>Token</returns>
MapDoubleQuotedString(string symbol)529         internal Token MapDoubleQuotedString(string symbol)
530         {
531             // If there is a mode that makes eSQL parser to follow the SQL-92 rules regarding quotation mark
532             // delimiting identifiers then this method may decide to map to identifiers.
533             // In this case identifiers delimited by double quotation marks can be either eSQL reserved keywords
534             // or can contain characters not usually allowed by the eSQL syntax rules for identifiers,
535             // so identifiers mapped here should be treated as escaped identifiers.
536             return NewLiteralToken(symbol, AST.LiteralKind.String);
537         }
538 
539         /// <summary>
540         /// Creates literal token
541         /// </summary>
542         /// <param name="literal">literal</param>
543         /// <param name="literalKind">literal kind</param>
544         /// <returns>Literal Token</returns>
NewLiteralToken(string literal, AST.LiteralKind literalKind)545         internal Token NewLiteralToken(string literal, AST.LiteralKind literalKind)
546         {
547             Debug.Assert(!String.IsNullOrEmpty(literal), "literal must not be null or empty");
548             Debug.Assert(literalKind != AST.LiteralKind.Null, "literalKind must not be LiteralKind.Null");
549 
550             string literalValue = literal;
551             switch (literalKind)
552             {
553                 case AST.LiteralKind.Binary:
554                     literalValue = GetLiteralSingleQuotePayload(literal);
555                     if (!IsValidBinaryValue(literalValue))
556                     {
557                         throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidLiteralFormat("binary", literalValue), _iPos);
558                     }
559                     break;
560 
561                 case AST.LiteralKind.String:
562                     if ('N' == literal[0])
563                     {
564                         literalKind = AST.LiteralKind.UnicodeString;
565                     }
566                     break;
567 
568                 case AST.LiteralKind.DateTime:
569                     literalValue = GetLiteralSingleQuotePayload(literal);
570                     if (!IsValidDateTimeValue(literalValue))
571                     {
572                         throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidLiteralFormat("datetime", literalValue), _iPos);
573                     }
574                     break;
575 
576                 case AST.LiteralKind.Time:
577                     literalValue = GetLiteralSingleQuotePayload(literal);
578                     if (!IsValidTimeValue(literalValue))
579                     {
580                         throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidLiteralFormat("time", literalValue), _iPos);
581                     }
582                     break;
583                 case AST.LiteralKind.DateTimeOffset:
584                     literalValue = GetLiteralSingleQuotePayload(literal);
585                     if (!IsValidDateTimeOffsetValue(literalValue))
586                     {
587                         throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidLiteralFormat("datetimeoffset", literalValue), _iPos);
588                     }
589                     break;
590 
591                 case AST.LiteralKind.Guid:
592                     literalValue = GetLiteralSingleQuotePayload(literal);
593                     if (!IsValidGuidValue(literalValue))
594                     {
595                         throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidLiteralFormat("guid", literalValue), _iPos);
596                     }
597                     break;
598             }
599 
600             return NewToken(CqlParser.LITERAL, new AST.Literal(literalValue, literalKind, _query, _iPos));
601         }
602 
603         /// <summary>
604         /// Creates parameter token
605         /// </summary>
606         /// <param name="param">param</param>
607         /// <returns>Parameter Token</returns>
NewParameterToken(string param)608         internal Token NewParameterToken(string param)
609         {
610             return NewToken(CqlParser.PARAMETER, new AST.QueryParameter(param, _query, _iPos));
611         }
612 
613         /// <summary>
614         /// handles escaped identifiers
615         /// ch will always be translated i.e. normalized.
616         /// </summary>
HandleEscapedIdentifiers()617         internal Token HandleEscapedIdentifiers()
618         {
619             char ch = YYText[0];
620             while (ch != YY_EOF)
621             {
622                 if (ch == ']')
623                 {
624                     yy_mark_end();
625                     ch = yy_advance();
626                     if (ch != ']')
627                     {
628                         yy_to_mark();
629                         ResetSymbolAsIdentifierState(true);
630                         return MapIdentifierOrKeyword(YYText.Replace("]]", "]"));
631                     }
632                 }
633                 ch = yy_advance();
634             }
635             Debug.Assert(ch == YY_EOF, "ch == YY_EOF");
636             throw EntityUtil.EntitySqlError(_query, System.Data.Entity.Strings.InvalidEscapedIdentifierUnbalanced(YYText), _iPos);
637         }
638 
IsLetterOrDigitOrUnderscore(string symbol, out bool isIdentifierASCII)639         internal static bool IsLetterOrDigitOrUnderscore(string symbol, out bool isIdentifierASCII)
640         {
641             isIdentifierASCII = true;
642             for (int i = 0; i < symbol.Length; i++)
643             {
644                 isIdentifierASCII = isIdentifierASCII && symbol[i] < 0x80;
645                 if (!isIdentifierASCII && !IsLetter(symbol[i]) && !IsDigit(symbol[i]) && (symbol[i] != '_'))
646                 {
647                     return false;
648                 }
649             }
650             return true;
651         }
652 
IsLetter(char c)653         private static bool IsLetter(char c)
654         {
655             return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
656         }
657 
IsDigit(char c)658         private static bool IsDigit(char c)
659         {
660             return (c >= '0' && c <= '9');
661         }
662 
isHexDigit(char c)663         private static bool isHexDigit(char c)
664         {
665             return (IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'));
666         }
667 
668         /// <summary>
669         /// Returns true if given char is a new line character defined by
670         /// UNICODE 5.0, section 5.8 Newline Guidelines.
671         /// These are 'mandatory' line breaks. NOTE that CRLF is treated as a
672         /// composite 'character' and was intentionaly omitted in the character set bellow.
673         /// </summary>
674         /// <param name="c"></param>
675         /// <returns></returns>
IsNewLine(Char c)676         internal static bool IsNewLine(Char c)
677         {
678             for (int i = 0; i < _newLineCharacters.Length; i++)
679             {
680                 if (c == _newLineCharacters[i])
681                 {
682                     return true;
683                 }
684             }
685             return false;
686         }
687 
688         /// <summary>
689         /// extracts single quoted literal 'payload'. literal MUST BE normalized.
690         /// </summary>
691         /// <param name="literal"></param>
692         /// <returns></returns>
GetLiteralSingleQuotePayload(string literal)693         private static string GetLiteralSingleQuotePayload(string literal)
694         {
695             Debug.Assert(-1 != literal.IndexOf('\''), "quoted literal value must have single quotes");
696             Debug.Assert(-1 != literal.LastIndexOf('\''), "quoted literal value must have single quotes");
697             Debug.Assert(literal.IndexOf('\'') != literal.LastIndexOf('\''), "quoted literal value must have 2 single quotes");
698             Debug.Assert(literal.Split(new char[] { '\'' }).Length == 3, "quoted literal value must have 2 single quotes");
699 
700             // NOTE: this is not a precondition validation. This validation is for security purposes based on the
701             // paranoid assumption that all input is evil. we should not see this exception under normal
702             // conditions.
703             if ((literal.Split(new char[] { '\'' }).Length != 3) || (-1 == literal.IndexOf('\'')) || (-1 == literal.LastIndexOf('\'')))
704             {
705                 throw EntityUtil.EntitySqlError(System.Data.Entity.Strings.MalformedSingleQuotePayload);
706             }
707 
708             int startIndex = literal.IndexOf('\'');
709 
710             string literalPayload = literal.Substring(startIndex + 1, literal.Length - (startIndex + 2));
711 
712             Debug.Assert(literalPayload.IndexOf('\'') == -1, "quoted literal payload must not have single quotes");
713             Debug.Assert(literalPayload.LastIndexOf('\'') == -1, "quoted literal payload must not have single quotes");
714 
715             // NOTE: this is not a precondition validation. This validation is for security purposes based on the
716             // paranoid assumption that all input is evil. we should not see this exception under normal
717             // conditions.
718             if (literalPayload.Split(new char[] { '\'' }).Length != 1)
719             {
720                 throw EntityUtil.EntitySqlError(System.Data.Entity.Strings.MalformedSingleQuotePayload);
721             }
722 
723             return literalPayload;
724         }
725 
726         /// <summary>
727         /// returns true if guid literal value format is valid
728         /// </summary>
729         /// <param name="guidValue"></param>
730         /// <returns></returns>
IsValidGuidValue(string guidValue)731         private static bool IsValidGuidValue(string guidValue)
732         {
733             int startIndex = 0;
734             int endIndex = guidValue.Length - 1;
735             if ((endIndex - startIndex) + 1 != 36)
736             {
737                 return false;
738             }
739 
740             int i = 0;
741             bool bValid = true;
742             while (bValid && i < 36)
743             {
744                 if ((i == 8) || (i == 13) || (i == 18) || (i == 23))
745                 {
746                     bValid = (guidValue[startIndex + i] == '-');
747                 }
748                 else
749                 {
750                     bValid = isHexDigit(guidValue[startIndex + i]);
751                 }
752                 i++;
753             }
754             return bValid;
755         }
756 
757         /// <summary>
758         /// returns true if binary literal value format is valid
759         /// </summary>
760         /// <param name="binaryValue"></param>
761         /// <returns></returns>
IsValidBinaryValue(string binaryValue)762         private static bool IsValidBinaryValue(string binaryValue)
763         {
764             Debug.Assert(null != binaryValue, "binaryValue must not be null");
765 
766             if (String.IsNullOrEmpty(binaryValue))
767             {
768                 return true;
769             }
770 
771             int i = 0;
772             bool bValid = binaryValue.Length > 0;
773             while (bValid && i < binaryValue.Length)
774             {
775                 bValid = isHexDigit(binaryValue[i++]);
776             }
777 
778             return bValid;
779         }
780 
781         /// <summary>
782         /// Returns true if datetime literal value format is valid
783         /// allowed format is: dddd-d?d-d?d{space}+d?d:d?d(:d?d(.d?d?d)?)?
784         /// where d is any decimal digit.
785         /// </summary>
786         /// <param name="datetimeValue"></param>
787         /// <returns></returns>
IsValidDateTimeValue(string datetimeValue)788         private static bool IsValidDateTimeValue(string datetimeValue)
789         {
790             if (null == _reDateTimeValue)
791             {
792                 _reDateTimeValue = new Regex(_datetimeValueRegularExpression, RegexOptions.Singleline | RegexOptions.CultureInvariant);
793             }
794             return _reDateTimeValue.IsMatch(datetimeValue);
795         }
796 
797         /// <summary>
798         /// Returns true if time literal value format is valid
799         /// allowed format is: +d?d:d?d(:d?d(.d?d?d)?)?
800         /// where d is any decimal digit.
801         /// </summary>
802         /// <param name="timeValue"></param>
803         /// <returns></returns>
IsValidTimeValue(string timeValue)804         private static bool IsValidTimeValue(string timeValue)
805         {
806             if (null == _reTimeValue)
807             {
808                 _reTimeValue = new Regex(_timeValueRegularExpression, RegexOptions.Singleline | RegexOptions.CultureInvariant);
809             }
810             return _reTimeValue.IsMatch(timeValue);
811         }
812 
813         /// <summary>
814         /// Returns true if datetimeoffset literal value format is valid
815         /// allowed format is: dddd-d?d-d?d{space}+d?d:d?d(:d?d(.d?d?d)?)?([+-]d?d:d?d)?
816         /// where d is any decimal digit.
817         /// </summary>
818         /// <param name="datetimeOffsetValue"></param>
819         /// <returns></returns>
IsValidDateTimeOffsetValue(string datetimeOffsetValue)820         private static bool IsValidDateTimeOffsetValue(string datetimeOffsetValue)
821         {
822             if (null == _reDateTimeOffsetValue)
823             {
824                 _reDateTimeOffsetValue = new Regex(_datetimeOffsetValueRegularExpression, RegexOptions.Singleline | RegexOptions.CultureInvariant);
825             }
826             return _reDateTimeOffsetValue.IsMatch(datetimeOffsetValue);
827         }
828 
829         private static Dictionary<string, short> InternalKeywordDictionary
830         {
831             get
832             {
833                 if (null == _keywords)
834                 {
835                     #region Initializes eSQL keywords
836                     Dictionary<string, short> keywords = new Dictionary<string, short>(60, _stringComparer);
837                     keywords.Add("all", CqlParser.ALL);
838                     keywords.Add("and", CqlParser.AND);
839                     keywords.Add("anyelement", CqlParser.ANYELEMENT);
840                     keywords.Add("apply", CqlParser.APPLY);
841                     keywords.Add("as", CqlParser.AS);
842                     keywords.Add("asc", CqlParser.ASC);
843                     keywords.Add("between", CqlParser.BETWEEN);
844                     keywords.Add("by", CqlParser.BY);
845                     keywords.Add("case", CqlParser.CASE);
846                     keywords.Add("cast", CqlParser.CAST);
847                     keywords.Add("collate", CqlParser.COLLATE);
848                     keywords.Add("collection", CqlParser.COLLECTION);
849                     keywords.Add("createref", CqlParser.CREATEREF);
850                     keywords.Add("cross", CqlParser.CROSS);
851                     keywords.Add("deref", CqlParser.DEREF);
852                     keywords.Add("desc", CqlParser.DESC);
853                     keywords.Add("distinct", CqlParser.DISTINCT);
854                     keywords.Add("element", CqlParser.ELEMENT);
855                     keywords.Add("else", CqlParser.ELSE);
856                     keywords.Add("end", CqlParser.END);
857                     keywords.Add("escape", CqlParser.ESCAPE);
858                     keywords.Add("except", CqlParser.EXCEPT);
859                     keywords.Add("exists", CqlParser.EXISTS);
860                     keywords.Add("false", CqlParser.LITERAL);
861                     keywords.Add("flatten", CqlParser.FLATTEN);
862                     keywords.Add("from", CqlParser.FROM);
863                     keywords.Add("full", CqlParser.FULL);
864                     keywords.Add("function", CqlParser.FUNCTION);
865                     keywords.Add("group", CqlParser.GROUP);
866                     keywords.Add("grouppartition", CqlParser.GROUPPARTITION);
867                     keywords.Add("having", CqlParser.HAVING);
868                     keywords.Add("in", CqlParser.IN);
869                     keywords.Add("inner", CqlParser.INNER);
870                     keywords.Add("intersect", CqlParser.INTERSECT);
871                     keywords.Add("is", CqlParser.IS);
872                     keywords.Add("join", CqlParser.JOIN);
873                     keywords.Add("key", CqlParser.KEY);
874                     keywords.Add("left", CqlParser.LEFT);
875                     keywords.Add("like", CqlParser.LIKE);
876                     keywords.Add("limit", CqlParser.LIMIT);
877                     keywords.Add("multiset", CqlParser.MULTISET);
878                     keywords.Add("navigate", CqlParser.NAVIGATE);
879                     keywords.Add("not", CqlParser.NOT);
880                     keywords.Add("null", CqlParser.NULL);
881                     keywords.Add("of", CqlParser.OF);
882                     keywords.Add("oftype", CqlParser.OFTYPE);
883                     keywords.Add("on", CqlParser.ON);
884                     keywords.Add("only", CqlParser.ONLY);
885                     keywords.Add("or", CqlParser.OR);
886                     keywords.Add("order", CqlParser.ORDER);
887                     keywords.Add("outer", CqlParser.OUTER);
888                     keywords.Add("overlaps", CqlParser.OVERLAPS);
889                     keywords.Add("ref", CqlParser.REF);
890                     keywords.Add("relationship", CqlParser.RELATIONSHIP);
891                     keywords.Add("right", CqlParser.RIGHT);
892                     keywords.Add("row", CqlParser.ROW);
893                     keywords.Add("select", CqlParser.SELECT);
894                     keywords.Add("set", CqlParser.SET);
895                     keywords.Add("skip", CqlParser.SKIP);
896                     keywords.Add("then", CqlParser.THEN);
897                     keywords.Add("top", CqlParser.TOP);
898                     keywords.Add("treat", CqlParser.TREAT);
899                     keywords.Add("true", CqlParser.LITERAL);
900                     keywords.Add("union", CqlParser.UNION);
901                     keywords.Add("using", CqlParser.USING);
902                     keywords.Add("value", CqlParser.VALUE);
903                     keywords.Add("when", CqlParser.WHEN);
904                     keywords.Add("where", CqlParser.WHERE);
905                     keywords.Add("with", CqlParser.WITH);
906                     _keywords = keywords;
907                     #endregion
908                 }
909                 return _keywords;
910             }
911 
912         }
913 
914         private static HashSet<string> InternalInvalidAliasNames
915         {
916             get
917             {
918                 if (null == _invalidAliasNames)
919                 {
920                     #region Initializes invalid aliases
921                     HashSet<string> invalidAliasName = new HashSet<string>(_stringComparer);
922                     invalidAliasName.Add("all");
923                     invalidAliasName.Add("and");
924                     invalidAliasName.Add("apply");
925                     invalidAliasName.Add("as");
926                     invalidAliasName.Add("asc");
927                     invalidAliasName.Add("between");
928                     invalidAliasName.Add("by");
929                     invalidAliasName.Add("case");
930                     invalidAliasName.Add("cast");
931                     invalidAliasName.Add("collate");
932                     invalidAliasName.Add("createref");
933                     invalidAliasName.Add("deref");
934                     invalidAliasName.Add("desc");
935                     invalidAliasName.Add("distinct");
936                     invalidAliasName.Add("element");
937                     invalidAliasName.Add("else");
938                     invalidAliasName.Add("end");
939                     invalidAliasName.Add("escape");
940                     invalidAliasName.Add("except");
941                     invalidAliasName.Add("exists");
942                     invalidAliasName.Add("flatten");
943                     invalidAliasName.Add("from");
944                     invalidAliasName.Add("group");
945                     invalidAliasName.Add("having");
946                     invalidAliasName.Add("in");
947                     invalidAliasName.Add("inner");
948                     invalidAliasName.Add("intersect");
949                     invalidAliasName.Add("is");
950                     invalidAliasName.Add("join");
951                     invalidAliasName.Add("like");
952                     invalidAliasName.Add("multiset");
953                     invalidAliasName.Add("navigate");
954                     invalidAliasName.Add("not");
955                     invalidAliasName.Add("null");
956                     invalidAliasName.Add("of");
957                     invalidAliasName.Add("oftype");
958                     invalidAliasName.Add("on");
959                     invalidAliasName.Add("only");
960                     invalidAliasName.Add("or");
961                     invalidAliasName.Add("overlaps");
962                     invalidAliasName.Add("ref");
963                     invalidAliasName.Add("relationship");
964                     invalidAliasName.Add("select");
965                     invalidAliasName.Add("set");
966                     invalidAliasName.Add("then");
967                     invalidAliasName.Add("treat");
968                     invalidAliasName.Add("union");
969                     invalidAliasName.Add("using");
970                     invalidAliasName.Add("when");
971                     invalidAliasName.Add("where");
972                     invalidAliasName.Add("with");
973                     _invalidAliasNames = invalidAliasName;
974                     #endregion
975                 }
976                 return _invalidAliasNames;
977             }
978         }
979 
980         private static HashSet<string> InternalInvalidInlineFunctionNames
981         {
982             get
983             {
984                 if (null == _invalidInlineFunctionNames)
985                 {
986                     #region Initializes invalid inline function names
987                     HashSet<string> invalidInlineFunctionNames = new HashSet<string>(_stringComparer);
988                     invalidInlineFunctionNames.Add("anyelement");
989                     invalidInlineFunctionNames.Add("element");
990                     invalidInlineFunctionNames.Add("function");
991                     invalidInlineFunctionNames.Add("grouppartition");
992                     invalidInlineFunctionNames.Add("key");
993                     invalidInlineFunctionNames.Add("ref");
994                     invalidInlineFunctionNames.Add("row");
995                     invalidInlineFunctionNames.Add("skip");
996                     invalidInlineFunctionNames.Add("top");
997                     invalidInlineFunctionNames.Add("value");
998                     _invalidInlineFunctionNames = invalidInlineFunctionNames;
999                     #endregion
1000                 }
1001                 return _invalidInlineFunctionNames;
1002             }
1003         }
1004 
1005         private static Dictionary<string, short> InternalOperatorDictionary
1006         {
1007             get
1008             {
1009                 if (null == _operators)
1010                 {
1011                     #region Initializes operator dictionary
1012                     Dictionary<string, short> operators = new Dictionary<string, short>(16, _stringComparer);
1013                     operators.Add("==", CqlParser.OP_EQ);
1014                     operators.Add("!=", CqlParser.OP_NEQ);
1015                     operators.Add("<>", CqlParser.OP_NEQ);
1016                     operators.Add("<", CqlParser.OP_LT);
1017                     operators.Add("<=", CqlParser.OP_LE);
1018                     operators.Add(">", CqlParser.OP_GT);
1019                     operators.Add(">=", CqlParser.OP_GE);
1020                     operators.Add("&&", CqlParser.AND);
1021                     operators.Add("||", CqlParser.OR);
1022                     operators.Add("!", CqlParser.NOT);
1023                     operators.Add("+", CqlParser.PLUS);
1024                     operators.Add("-", CqlParser.MINUS);
1025                     operators.Add("*", CqlParser.STAR);
1026                     operators.Add("/", CqlParser.FSLASH);
1027                     operators.Add("%", CqlParser.PERCENT);
1028                     _operators = operators;
1029                     #endregion
1030                 }
1031                 return _operators;
1032             }
1033         }
1034 
1035         private static Dictionary<string, short> InternalPunctuatorDictionary
1036         {
1037             get
1038             {
1039                 if (null == _punctuators)
1040                 {
1041                     #region Initializes punctuators dictionary
1042                     Dictionary<string, short> punctuators = new Dictionary<string, short>(16, _stringComparer);
1043                     punctuators.Add(",", CqlParser.COMMA);
1044                     punctuators.Add(":", CqlParser.COLON);
1045                     punctuators.Add(".", CqlParser.DOT);
1046                     punctuators.Add("?", CqlParser.QMARK);
1047                     punctuators.Add("(", CqlParser.L_PAREN);
1048                     punctuators.Add(")", CqlParser.R_PAREN);
1049                     punctuators.Add("[", CqlParser.L_BRACE);
1050                     punctuators.Add("]", CqlParser.R_BRACE);
1051                     punctuators.Add("{", CqlParser.L_CURLY);
1052                     punctuators.Add("}", CqlParser.R_CURLY);
1053                     punctuators.Add(";", CqlParser.SCOLON);
1054                     punctuators.Add("=", CqlParser.EQUAL);
1055                     _punctuators = punctuators;
1056                     #endregion
1057                 }
1058                 return _punctuators;
1059             }
1060         }
1061 
1062         private static HashSet<string> InternalCanonicalFunctionNames
1063         {
1064             get
1065             {
1066                 if (null == _canonicalFunctionNames)
1067                 {
1068                     HashSet<string> canonicalFunctionNames = new HashSet<string>(_stringComparer);
1069                     canonicalFunctionNames.Add("left");
1070                     canonicalFunctionNames.Add("right");
1071                     _canonicalFunctionNames = canonicalFunctionNames;
1072                 }
1073                 return _canonicalFunctionNames;
1074             }
1075         }
1076     }
1077 }
1078