1 // Copyright (c) Microsoft Corporation. All rights reserved. See License.txt in the project root for license information.
2 
3 using System.Collections.Generic;
4 using System.Diagnostics;
5 using System.Globalization;
6 using System.Web.Razor.Parser;
7 using System.Web.Razor.Parser.SyntaxTree;
8 using System.Web.Razor.Text;
9 using System.Web.Razor.Tokenizer.Symbols;
10 
11 namespace System.Web.Razor.Tokenizer
12 {
13     public class VBTokenizer : Tokenizer<VBSymbol, VBSymbolType>
14     {
15         private static Dictionary<char, VBSymbolType> _operatorTable = new Dictionary<char, VBSymbolType>()
16         {
17             { '_', VBSymbolType.LineContinuation },
18             { '(', VBSymbolType.LeftParenthesis },
19             { ')', VBSymbolType.RightParenthesis },
20             { '[', VBSymbolType.LeftBracket },
21             { ']', VBSymbolType.RightBracket },
22             { '{', VBSymbolType.LeftBrace },
23             { '}', VBSymbolType.RightBrace },
24             { '!', VBSymbolType.Bang },
25             { '#', VBSymbolType.Hash },
26             { ',', VBSymbolType.Comma },
27             { '.', VBSymbolType.Dot },
28             { ':', VBSymbolType.Colon },
29             { '?', VBSymbolType.QuestionMark },
30             { '&', VBSymbolType.Concatenation },
31             { '*', VBSymbolType.Multiply },
32             { '+', VBSymbolType.Add },
33             { '-', VBSymbolType.Subtract },
34             { '/', VBSymbolType.Divide },
35             { '\\', VBSymbolType.IntegerDivide },
36             { '^', VBSymbolType.Exponentiation },
37             { '=', VBSymbolType.Equal },
38             { '<', VBSymbolType.LessThan },
39             { '>', VBSymbolType.GreaterThan },
40             { '$', VBSymbolType.Dollar },
41         };
42 
VBTokenizer(ITextDocument source)43         public VBTokenizer(ITextDocument source)
44             : base(source)
45         {
46             CurrentState = Data;
47         }
48 
49         protected override State StartState
50         {
51             get { return Data; }
52         }
53 
54         public override VBSymbolType RazorCommentType
55         {
56             get { return VBSymbolType.RazorComment; }
57         }
58 
59         public override VBSymbolType RazorCommentTransitionType
60         {
61             get { return VBSymbolType.RazorCommentTransition; }
62         }
63 
64         public override VBSymbolType RazorCommentStarType
65         {
66             get { return VBSymbolType.RazorCommentStar; }
67         }
68 
Tokenize(string content)69         internal static IEnumerable<VBSymbol> Tokenize(string content)
70         {
71             using (SeekableTextReader reader = new SeekableTextReader(content))
72             {
73                 VBTokenizer tok = new VBTokenizer(reader);
74                 VBSymbol sym;
75                 while ((sym = tok.NextSymbol()) != null)
76                 {
77                     yield return sym;
78                 }
79             }
80         }
81 
CreateSymbol(SourceLocation start, string content, VBSymbolType type, IEnumerable<RazorError> errors)82         protected override VBSymbol CreateSymbol(SourceLocation start, string content, VBSymbolType type, IEnumerable<RazorError> errors)
83         {
84             return new VBSymbol(start, content, type, errors);
85         }
86 
Data()87         private StateResult Data()
88         {
89             // We are accepting more characters and whitespace/newlines then the VB Spec defines, to simplify things
90             // Since the code must still be compiled by a VB compiler, this will not cause adverse effects.
91             if (ParserHelpers.IsNewLine(CurrentCharacter))
92             {
93                 // VB Spec §2.1.1
94                 bool checkTwoCharNewline = CurrentCharacter == '\r';
95                 TakeCurrent();
96                 if (checkTwoCharNewline && CurrentCharacter == '\n')
97                 {
98                     TakeCurrent();
99                 }
100                 return Stay(EndSymbol(VBSymbolType.NewLine));
101             }
102             else if (ParserHelpers.IsWhitespace(CurrentCharacter))
103             {
104                 // CSharp Spec §2.1.3
105                 TakeUntil(c => !ParserHelpers.IsWhitespace(c));
106                 return Stay(EndSymbol(VBSymbolType.WhiteSpace));
107             }
108             else if (VBHelpers.IsSingleQuote(CurrentCharacter))
109             {
110                 TakeCurrent();
111                 return CommentBody();
112             }
113             else if (IsIdentifierStart())
114             {
115                 return Identifier();
116             }
117             else if (Char.IsDigit(CurrentCharacter))
118             {
119                 return DecimalLiteral();
120             }
121             else if (CurrentCharacter == '&')
122             {
123                 char next = Char.ToLower(Peek(), CultureInfo.InvariantCulture);
124                 if (next == 'h')
125                 {
126                     return HexLiteral();
127                 }
128                 else if (next == 'o')
129                 {
130                     return OctLiteral();
131                 }
132             }
133             else if (CurrentCharacter == '.' && Char.IsDigit(Peek()))
134             {
135                 return FloatingPointLiteralEnd();
136             }
137             else if (VBHelpers.IsDoubleQuote(CurrentCharacter))
138             {
139                 TakeCurrent();
140                 return Transition(QuotedLiteral);
141             }
142             else if (AtDateLiteral())
143             {
144                 return DateLiteral();
145             }
146             else if (CurrentCharacter == '@')
147             {
148                 TakeCurrent();
149                 if (CurrentCharacter == '*')
150                 {
151                     return Transition(EndSymbol(VBSymbolType.RazorCommentTransition), AfterRazorCommentTransition);
152                 }
153                 else if (CurrentCharacter == '@')
154                 {
155                     // Could be escaped comment transition
156                     return Transition(EndSymbol(VBSymbolType.Transition), () =>
157                     {
158                         TakeCurrent();
159                         return Transition(EndSymbol(VBSymbolType.Transition), Data);
160                     });
161                 }
162                 else
163                 {
164                     return Stay(EndSymbol(VBSymbolType.Transition));
165                 }
166             }
167             return Stay(EndSymbol(Operator()));
168         }
169 
DateLiteral()170         private StateResult DateLiteral()
171         {
172             AssertCurrent('#');
173             TakeCurrent();
174             TakeUntil(c => c == '#' || ParserHelpers.IsNewLine(c));
175             if (CurrentCharacter == '#')
176             {
177                 TakeCurrent();
178             }
179             return Stay(EndSymbol(VBSymbolType.DateLiteral));
180         }
181 
AtDateLiteral()182         private bool AtDateLiteral()
183         {
184             if (CurrentCharacter != '#')
185             {
186                 return false;
187             }
188             int start = Source.Position;
189             try
190             {
191                 MoveNext();
192                 while (ParserHelpers.IsWhitespace(CurrentCharacter))
193                 {
194                     MoveNext();
195                 }
196                 return Char.IsDigit(CurrentCharacter);
197             }
198             finally
199             {
200                 Source.Position = start;
201             }
202         }
203 
QuotedLiteral()204         private StateResult QuotedLiteral()
205         {
206             TakeUntil(c => VBHelpers.IsDoubleQuote(c) || ParserHelpers.IsNewLine(c));
207             if (VBHelpers.IsDoubleQuote(CurrentCharacter))
208             {
209                 TakeCurrent();
210                 if (VBHelpers.IsDoubleQuote(CurrentCharacter))
211                 {
212                     // Escape sequence, remain in the string
213                     TakeCurrent();
214                     return Stay();
215                 }
216             }
217 
218             VBSymbolType type = VBSymbolType.StringLiteral;
219             if (Char.ToLowerInvariant(CurrentCharacter) == 'c')
220             {
221                 TakeCurrent();
222                 type = VBSymbolType.CharacterLiteral;
223             }
224             return Transition(EndSymbol(type), Data);
225         }
226 
DecimalLiteral()227         private StateResult DecimalLiteral()
228         {
229             TakeUntil(c => !Char.IsDigit(c));
230             char lower = Char.ToLowerInvariant(CurrentCharacter);
231             if (IsFloatTypeSuffix(lower) || lower == '.' || lower == 'e')
232             {
233                 return FloatingPointLiteralEnd();
234             }
235             else
236             {
237                 TakeIntTypeSuffix();
238                 return Stay(EndSymbol(VBSymbolType.IntegerLiteral));
239             }
240         }
241 
IsFloatTypeSuffix(char chr)242         private static bool IsFloatTypeSuffix(char chr)
243         {
244             chr = Char.ToLowerInvariant(chr);
245             return chr == 'f' || chr == 'r' || chr == 'd';
246         }
247 
FloatingPointLiteralEnd()248         private StateResult FloatingPointLiteralEnd()
249         {
250             if (CurrentCharacter == '.')
251             {
252                 TakeCurrent();
253                 TakeUntil(c => !Char.IsDigit(c));
254             }
255             if (Char.ToLowerInvariant(CurrentCharacter) == 'e')
256             {
257                 TakeCurrent();
258                 if (CurrentCharacter == '+' || CurrentCharacter == '-')
259                 {
260                     TakeCurrent();
261                 }
262                 TakeUntil(c => !Char.IsDigit(c));
263             }
264             if (IsFloatTypeSuffix(CurrentCharacter))
265             {
266                 TakeCurrent();
267             }
268             return Stay(EndSymbol(VBSymbolType.FloatingPointLiteral));
269         }
270 
HexLiteral()271         private StateResult HexLiteral()
272         {
273             AssertCurrent('&');
274             TakeCurrent();
275             Debug.Assert(Char.ToLowerInvariant(CurrentCharacter) == 'h');
276             TakeCurrent();
277             TakeUntil(c => !ParserHelpers.IsHexDigit(c));
278             TakeIntTypeSuffix();
279             return Stay(EndSymbol(VBSymbolType.IntegerLiteral));
280         }
281 
OctLiteral()282         private StateResult OctLiteral()
283         {
284             AssertCurrent('&');
285             TakeCurrent();
286             Debug.Assert(Char.ToLowerInvariant(CurrentCharacter) == 'o');
287             TakeCurrent();
288             TakeUntil(c => !VBHelpers.IsOctalDigit(c));
289             TakeIntTypeSuffix();
290             return Stay(EndSymbol(VBSymbolType.IntegerLiteral));
291         }
292 
Operator()293         private VBSymbolType Operator()
294         {
295             char op = CurrentCharacter;
296             TakeCurrent();
297             VBSymbolType ret;
298             if (_operatorTable.TryGetValue(op, out ret))
299             {
300                 return ret;
301             }
302             return VBSymbolType.Unknown;
303         }
304 
TakeIntTypeSuffix()305         private void TakeIntTypeSuffix()
306         {
307             // Take the "U" in US, UI, UL
308             if (Char.ToLowerInvariant(CurrentCharacter) == 'u')
309             {
310                 TakeCurrent(); // Unsigned Prefix
311             }
312 
313             // Take the S, I or L integer suffix
314             if (IsIntegerSuffix(CurrentCharacter))
315             {
316                 TakeCurrent();
317             }
318         }
319 
IsIntegerSuffix(char chr)320         private static bool IsIntegerSuffix(char chr)
321         {
322             chr = Char.ToLowerInvariant(chr);
323             return chr == 's' || chr == 'i' || chr == 'l';
324         }
325 
CommentBody()326         private StateResult CommentBody()
327         {
328             TakeUntil(ParserHelpers.IsNewLine);
329             return Stay(EndSymbol(VBSymbolType.Comment));
330         }
331 
Identifier()332         private StateResult Identifier()
333         {
334             bool isEscaped = false;
335             if (CurrentCharacter == '[')
336             {
337                 TakeCurrent();
338                 isEscaped = true;
339             }
340             TakeUntil(c => !ParserHelpers.IsIdentifierPart(c));
341 
342             // If we're escaped, take the ']'
343             if (isEscaped)
344             {
345                 if (CurrentCharacter == ']')
346                 {
347                     TakeCurrent();
348                 }
349             }
350 
351             // Check for Keywords and build the symbol
352             VBKeyword? keyword = VBKeywordDetector.GetKeyword(Buffer.ToString());
353             if (keyword == VBKeyword.Rem)
354             {
355                 return CommentBody();
356             }
357 
358             VBSymbol sym = new VBSymbol(CurrentStart, Buffer.ToString(), keyword == null ? VBSymbolType.Identifier : VBSymbolType.Keyword)
359             {
360                 Keyword = keyword
361             };
362 
363             StartSymbol();
364 
365             return Stay(sym);
366         }
367 
IsIdentifierStart()368         private bool IsIdentifierStart()
369         {
370             if (CurrentCharacter == '_')
371             {
372                 // VB Spec §2.2:
373                 //  If an identifier begins with an underscore, it must contain at least one other valid identifier character to disambiguate it from a line continuation.
374                 return ParserHelpers.IsIdentifierPart(Peek());
375             }
376             if (CurrentCharacter == '[')
377             {
378                 return ParserHelpers.IsIdentifierPart(Peek());
379             }
380             return ParserHelpers.IsIdentifierStart(CurrentCharacter);
381         }
382     }
383 }
384