1 // Copyright (c) Microsoft Corporation. All rights reserved. See License.txt in the project root for license information. 2 3 using System.Collections.Generic; 4 using System.Diagnostics; 5 using System.Globalization; 6 using System.Web.Razor.Parser; 7 using System.Web.Razor.Parser.SyntaxTree; 8 using System.Web.Razor.Text; 9 using System.Web.Razor.Tokenizer.Symbols; 10 11 namespace System.Web.Razor.Tokenizer 12 { 13 public class VBTokenizer : Tokenizer<VBSymbol, VBSymbolType> 14 { 15 private static Dictionary<char, VBSymbolType> _operatorTable = new Dictionary<char, VBSymbolType>() 16 { 17 { '_', VBSymbolType.LineContinuation }, 18 { '(', VBSymbolType.LeftParenthesis }, 19 { ')', VBSymbolType.RightParenthesis }, 20 { '[', VBSymbolType.LeftBracket }, 21 { ']', VBSymbolType.RightBracket }, 22 { '{', VBSymbolType.LeftBrace }, 23 { '}', VBSymbolType.RightBrace }, 24 { '!', VBSymbolType.Bang }, 25 { '#', VBSymbolType.Hash }, 26 { ',', VBSymbolType.Comma }, 27 { '.', VBSymbolType.Dot }, 28 { ':', VBSymbolType.Colon }, 29 { '?', VBSymbolType.QuestionMark }, 30 { '&', VBSymbolType.Concatenation }, 31 { '*', VBSymbolType.Multiply }, 32 { '+', VBSymbolType.Add }, 33 { '-', VBSymbolType.Subtract }, 34 { '/', VBSymbolType.Divide }, 35 { '\\', VBSymbolType.IntegerDivide }, 36 { '^', VBSymbolType.Exponentiation }, 37 { '=', VBSymbolType.Equal }, 38 { '<', VBSymbolType.LessThan }, 39 { '>', VBSymbolType.GreaterThan }, 40 { '$', VBSymbolType.Dollar }, 41 }; 42 VBTokenizer(ITextDocument source)43 public VBTokenizer(ITextDocument source) 44 : base(source) 45 { 46 CurrentState = Data; 47 } 48 49 protected override State StartState 50 { 51 get { return Data; } 52 } 53 54 public override VBSymbolType RazorCommentType 55 { 56 get { return VBSymbolType.RazorComment; } 57 } 58 59 public override VBSymbolType RazorCommentTransitionType 60 { 61 get { return VBSymbolType.RazorCommentTransition; } 62 } 63 64 public override VBSymbolType RazorCommentStarType 65 { 66 get { return VBSymbolType.RazorCommentStar; } 67 } 68 Tokenize(string content)69 internal static IEnumerable<VBSymbol> Tokenize(string content) 70 { 71 using (SeekableTextReader reader = new SeekableTextReader(content)) 72 { 73 VBTokenizer tok = new VBTokenizer(reader); 74 VBSymbol sym; 75 while ((sym = tok.NextSymbol()) != null) 76 { 77 yield return sym; 78 } 79 } 80 } 81 CreateSymbol(SourceLocation start, string content, VBSymbolType type, IEnumerable<RazorError> errors)82 protected override VBSymbol CreateSymbol(SourceLocation start, string content, VBSymbolType type, IEnumerable<RazorError> errors) 83 { 84 return new VBSymbol(start, content, type, errors); 85 } 86 Data()87 private StateResult Data() 88 { 89 // We are accepting more characters and whitespace/newlines then the VB Spec defines, to simplify things 90 // Since the code must still be compiled by a VB compiler, this will not cause adverse effects. 91 if (ParserHelpers.IsNewLine(CurrentCharacter)) 92 { 93 // VB Spec §2.1.1 94 bool checkTwoCharNewline = CurrentCharacter == '\r'; 95 TakeCurrent(); 96 if (checkTwoCharNewline && CurrentCharacter == '\n') 97 { 98 TakeCurrent(); 99 } 100 return Stay(EndSymbol(VBSymbolType.NewLine)); 101 } 102 else if (ParserHelpers.IsWhitespace(CurrentCharacter)) 103 { 104 // CSharp Spec §2.1.3 105 TakeUntil(c => !ParserHelpers.IsWhitespace(c)); 106 return Stay(EndSymbol(VBSymbolType.WhiteSpace)); 107 } 108 else if (VBHelpers.IsSingleQuote(CurrentCharacter)) 109 { 110 TakeCurrent(); 111 return CommentBody(); 112 } 113 else if (IsIdentifierStart()) 114 { 115 return Identifier(); 116 } 117 else if (Char.IsDigit(CurrentCharacter)) 118 { 119 return DecimalLiteral(); 120 } 121 else if (CurrentCharacter == '&') 122 { 123 char next = Char.ToLower(Peek(), CultureInfo.InvariantCulture); 124 if (next == 'h') 125 { 126 return HexLiteral(); 127 } 128 else if (next == 'o') 129 { 130 return OctLiteral(); 131 } 132 } 133 else if (CurrentCharacter == '.' && Char.IsDigit(Peek())) 134 { 135 return FloatingPointLiteralEnd(); 136 } 137 else if (VBHelpers.IsDoubleQuote(CurrentCharacter)) 138 { 139 TakeCurrent(); 140 return Transition(QuotedLiteral); 141 } 142 else if (AtDateLiteral()) 143 { 144 return DateLiteral(); 145 } 146 else if (CurrentCharacter == '@') 147 { 148 TakeCurrent(); 149 if (CurrentCharacter == '*') 150 { 151 return Transition(EndSymbol(VBSymbolType.RazorCommentTransition), AfterRazorCommentTransition); 152 } 153 else if (CurrentCharacter == '@') 154 { 155 // Could be escaped comment transition 156 return Transition(EndSymbol(VBSymbolType.Transition), () => 157 { 158 TakeCurrent(); 159 return Transition(EndSymbol(VBSymbolType.Transition), Data); 160 }); 161 } 162 else 163 { 164 return Stay(EndSymbol(VBSymbolType.Transition)); 165 } 166 } 167 return Stay(EndSymbol(Operator())); 168 } 169 DateLiteral()170 private StateResult DateLiteral() 171 { 172 AssertCurrent('#'); 173 TakeCurrent(); 174 TakeUntil(c => c == '#' || ParserHelpers.IsNewLine(c)); 175 if (CurrentCharacter == '#') 176 { 177 TakeCurrent(); 178 } 179 return Stay(EndSymbol(VBSymbolType.DateLiteral)); 180 } 181 AtDateLiteral()182 private bool AtDateLiteral() 183 { 184 if (CurrentCharacter != '#') 185 { 186 return false; 187 } 188 int start = Source.Position; 189 try 190 { 191 MoveNext(); 192 while (ParserHelpers.IsWhitespace(CurrentCharacter)) 193 { 194 MoveNext(); 195 } 196 return Char.IsDigit(CurrentCharacter); 197 } 198 finally 199 { 200 Source.Position = start; 201 } 202 } 203 QuotedLiteral()204 private StateResult QuotedLiteral() 205 { 206 TakeUntil(c => VBHelpers.IsDoubleQuote(c) || ParserHelpers.IsNewLine(c)); 207 if (VBHelpers.IsDoubleQuote(CurrentCharacter)) 208 { 209 TakeCurrent(); 210 if (VBHelpers.IsDoubleQuote(CurrentCharacter)) 211 { 212 // Escape sequence, remain in the string 213 TakeCurrent(); 214 return Stay(); 215 } 216 } 217 218 VBSymbolType type = VBSymbolType.StringLiteral; 219 if (Char.ToLowerInvariant(CurrentCharacter) == 'c') 220 { 221 TakeCurrent(); 222 type = VBSymbolType.CharacterLiteral; 223 } 224 return Transition(EndSymbol(type), Data); 225 } 226 DecimalLiteral()227 private StateResult DecimalLiteral() 228 { 229 TakeUntil(c => !Char.IsDigit(c)); 230 char lower = Char.ToLowerInvariant(CurrentCharacter); 231 if (IsFloatTypeSuffix(lower) || lower == '.' || lower == 'e') 232 { 233 return FloatingPointLiteralEnd(); 234 } 235 else 236 { 237 TakeIntTypeSuffix(); 238 return Stay(EndSymbol(VBSymbolType.IntegerLiteral)); 239 } 240 } 241 IsFloatTypeSuffix(char chr)242 private static bool IsFloatTypeSuffix(char chr) 243 { 244 chr = Char.ToLowerInvariant(chr); 245 return chr == 'f' || chr == 'r' || chr == 'd'; 246 } 247 FloatingPointLiteralEnd()248 private StateResult FloatingPointLiteralEnd() 249 { 250 if (CurrentCharacter == '.') 251 { 252 TakeCurrent(); 253 TakeUntil(c => !Char.IsDigit(c)); 254 } 255 if (Char.ToLowerInvariant(CurrentCharacter) == 'e') 256 { 257 TakeCurrent(); 258 if (CurrentCharacter == '+' || CurrentCharacter == '-') 259 { 260 TakeCurrent(); 261 } 262 TakeUntil(c => !Char.IsDigit(c)); 263 } 264 if (IsFloatTypeSuffix(CurrentCharacter)) 265 { 266 TakeCurrent(); 267 } 268 return Stay(EndSymbol(VBSymbolType.FloatingPointLiteral)); 269 } 270 HexLiteral()271 private StateResult HexLiteral() 272 { 273 AssertCurrent('&'); 274 TakeCurrent(); 275 Debug.Assert(Char.ToLowerInvariant(CurrentCharacter) == 'h'); 276 TakeCurrent(); 277 TakeUntil(c => !ParserHelpers.IsHexDigit(c)); 278 TakeIntTypeSuffix(); 279 return Stay(EndSymbol(VBSymbolType.IntegerLiteral)); 280 } 281 OctLiteral()282 private StateResult OctLiteral() 283 { 284 AssertCurrent('&'); 285 TakeCurrent(); 286 Debug.Assert(Char.ToLowerInvariant(CurrentCharacter) == 'o'); 287 TakeCurrent(); 288 TakeUntil(c => !VBHelpers.IsOctalDigit(c)); 289 TakeIntTypeSuffix(); 290 return Stay(EndSymbol(VBSymbolType.IntegerLiteral)); 291 } 292 Operator()293 private VBSymbolType Operator() 294 { 295 char op = CurrentCharacter; 296 TakeCurrent(); 297 VBSymbolType ret; 298 if (_operatorTable.TryGetValue(op, out ret)) 299 { 300 return ret; 301 } 302 return VBSymbolType.Unknown; 303 } 304 TakeIntTypeSuffix()305 private void TakeIntTypeSuffix() 306 { 307 // Take the "U" in US, UI, UL 308 if (Char.ToLowerInvariant(CurrentCharacter) == 'u') 309 { 310 TakeCurrent(); // Unsigned Prefix 311 } 312 313 // Take the S, I or L integer suffix 314 if (IsIntegerSuffix(CurrentCharacter)) 315 { 316 TakeCurrent(); 317 } 318 } 319 IsIntegerSuffix(char chr)320 private static bool IsIntegerSuffix(char chr) 321 { 322 chr = Char.ToLowerInvariant(chr); 323 return chr == 's' || chr == 'i' || chr == 'l'; 324 } 325 CommentBody()326 private StateResult CommentBody() 327 { 328 TakeUntil(ParserHelpers.IsNewLine); 329 return Stay(EndSymbol(VBSymbolType.Comment)); 330 } 331 Identifier()332 private StateResult Identifier() 333 { 334 bool isEscaped = false; 335 if (CurrentCharacter == '[') 336 { 337 TakeCurrent(); 338 isEscaped = true; 339 } 340 TakeUntil(c => !ParserHelpers.IsIdentifierPart(c)); 341 342 // If we're escaped, take the ']' 343 if (isEscaped) 344 { 345 if (CurrentCharacter == ']') 346 { 347 TakeCurrent(); 348 } 349 } 350 351 // Check for Keywords and build the symbol 352 VBKeyword? keyword = VBKeywordDetector.GetKeyword(Buffer.ToString()); 353 if (keyword == VBKeyword.Rem) 354 { 355 return CommentBody(); 356 } 357 358 VBSymbol sym = new VBSymbol(CurrentStart, Buffer.ToString(), keyword == null ? VBSymbolType.Identifier : VBSymbolType.Keyword) 359 { 360 Keyword = keyword 361 }; 362 363 StartSymbol(); 364 365 return Stay(sym); 366 } 367 IsIdentifierStart()368 private bool IsIdentifierStart() 369 { 370 if (CurrentCharacter == '_') 371 { 372 // VB Spec §2.2: 373 // If an identifier begins with an underscore, it must contain at least one other valid identifier character to disambiguate it from a line continuation. 374 return ParserHelpers.IsIdentifierPart(Peek()); 375 } 376 if (CurrentCharacter == '[') 377 { 378 return ParserHelpers.IsIdentifierPart(Peek()); 379 } 380 return ParserHelpers.IsIdentifierStart(CurrentCharacter); 381 } 382 } 383 } 384