1 // Copyright (c) Microsoft Corporation. All rights reserved. See License.txt in the project root for license information.
2 
3 using System.Collections.Generic;
4 using System.Diagnostics;
5 using System.Diagnostics.CodeAnalysis;
6 using System.Globalization;
7 using System.Linq;
8 using System.Text;
9 using System.Web.Razor.Parser;
10 using System.Web.Razor.Parser.SyntaxTree;
11 using System.Web.Razor.Resources;
12 using System.Web.Razor.Text;
13 using System.Web.Razor.Tokenizer.Symbols;
14 
15 namespace System.Web.Razor.Tokenizer
16 {
17     public abstract partial class Tokenizer<TSymbol, TSymbolType> : StateMachine<TSymbol>, ITokenizer
18         where TSymbol : SymbolBase<TSymbolType>
19     {
20         [SuppressMessage("Microsoft.Reliability", "CA2000:Dispose objects before losing scope", Justification = "TextDocumentReader does not require disposal")]
Tokenizer(ITextDocument source)21         protected Tokenizer(ITextDocument source)
22         {
23             if (source == null)
24             {
25                 throw new ArgumentNullException("source");
26             }
27             Source = new TextDocumentReader(source);
28             Buffer = new StringBuilder();
29             CurrentErrors = new List<RazorError>();
30             StartSymbol();
31         }
32 
33         public TextDocumentReader Source { get; private set; }
34 
35         protected StringBuilder Buffer { get; private set; }
36 
37         protected bool EndOfFile
38         {
39             get { return Source.Peek() == -1; }
40         }
41 
42         protected IList<RazorError> CurrentErrors { get; private set; }
43 
44         public abstract TSymbolType RazorCommentStarType { get; }
45         public abstract TSymbolType RazorCommentType { get; }
46         public abstract TSymbolType RazorCommentTransitionType { get; }
47 
48         protected bool HaveContent
49         {
50             get { return Buffer.Length > 0; }
51         }
52 
53         protected char CurrentCharacter
54         {
55             get
56             {
57                 int peek = Source.Peek();
58                 return peek == -1 ? '\0' : (char)peek;
59             }
60         }
61 
62         protected SourceLocation CurrentLocation
63         {
64             get { return Source.Location; }
65         }
66 
67         protected SourceLocation CurrentStart { get; private set; }
68 
NextSymbol()69         public virtual TSymbol NextSymbol()
70         {
71             // Post-Condition: Buffer should be empty at the start of Next()
72             Debug.Assert(Buffer.Length == 0);
73             StartSymbol();
74 
75             if (EndOfFile)
76             {
77                 return null;
78             }
79             TSymbol sym = Turn();
80 
81             // Post-Condition: Buffer should be empty at the end of Next()
82             Debug.Assert(Buffer.Length == 0);
83 
84             return sym;
85         }
86 
Reset()87         public void Reset()
88         {
89             CurrentState = StartState;
90         }
91 
CreateSymbol(SourceLocation start, string content, TSymbolType type, IEnumerable<RazorError> errors)92         protected abstract TSymbol CreateSymbol(SourceLocation start, string content, TSymbolType type, IEnumerable<RazorError> errors);
93 
Single(TSymbolType type)94         protected TSymbol Single(TSymbolType type)
95         {
96             TakeCurrent();
97             return EndSymbol(type);
98         }
99 
TakeString(string input, bool caseSensitive)100         protected bool TakeString(string input, bool caseSensitive)
101         {
102             int position = 0;
103             Func<char, char> charFilter = c => c;
104             if (caseSensitive)
105             {
106                 charFilter = Char.ToLower;
107             }
108             while (!EndOfFile && position < input.Length && charFilter(CurrentCharacter) == charFilter(input[position++]))
109             {
110                 TakeCurrent();
111             }
112             return position == input.Length;
113         }
114 
StartSymbol()115         protected void StartSymbol()
116         {
117             Buffer.Clear();
118             CurrentStart = CurrentLocation;
119             CurrentErrors.Clear();
120         }
121 
EndSymbol(TSymbolType type)122         protected TSymbol EndSymbol(TSymbolType type)
123         {
124             return EndSymbol(CurrentStart, type);
125         }
126 
EndSymbol(SourceLocation start, TSymbolType type)127         protected TSymbol EndSymbol(SourceLocation start, TSymbolType type)
128         {
129             TSymbol sym = null;
130             if (HaveContent)
131             {
132                 sym = CreateSymbol(start, Buffer.ToString(), type, CurrentErrors.ToArray());
133             }
134             StartSymbol();
135             return sym;
136         }
137 
ResumeSymbol(TSymbol previous)138         protected void ResumeSymbol(TSymbol previous)
139         {
140             // Verify the symbol can be resumed
141             if (previous.Start.AbsoluteIndex + previous.Content.Length != CurrentStart.AbsoluteIndex)
142             {
143                 throw new InvalidOperationException(RazorResources.Tokenizer_CannotResumeSymbolUnlessIsPrevious);
144             }
145 
146             // Reset the start point
147             CurrentStart = previous.Start;
148 
149             // Capture the current buffer content
150             string newContent = Buffer.ToString();
151 
152             // Clear the buffer, then put the old content back and add the new content to the end
153             Buffer.Clear();
154             Buffer.Append(previous.Content);
155             Buffer.Append(newContent);
156         }
157 
TakeUntil(Func<char, bool> predicate)158         protected bool TakeUntil(Func<char, bool> predicate)
159         {
160             // Take all the characters up to the end character
161             while (!EndOfFile && !predicate(CurrentCharacter))
162             {
163                 TakeCurrent();
164             }
165 
166             // Why did we end?
167             return !EndOfFile;
168         }
169 
CharOrWhiteSpace(char character)170         protected Func<char, bool> CharOrWhiteSpace(char character)
171         {
172             return c => c == character || ParserHelpers.IsWhitespace(c) || ParserHelpers.IsNewLine(c);
173         }
174 
TakeCurrent()175         protected void TakeCurrent()
176         {
177             if (EndOfFile)
178             {
179                 return;
180             } // No-op
181             Buffer.Append(CurrentCharacter);
182             MoveNext();
183         }
184 
MoveNext()185         protected void MoveNext()
186         {
187 #if DEBUG
188             _read.Append(CurrentCharacter);
189 #endif
190             Source.Read();
191         }
192 
TakeAll(string expected, bool caseSensitive)193         protected bool TakeAll(string expected, bool caseSensitive)
194         {
195             return Lookahead(expected, takeIfMatch: true, caseSensitive: caseSensitive);
196         }
197 
At(string expected, bool caseSensitive)198         protected bool At(string expected, bool caseSensitive)
199         {
200             return Lookahead(expected, takeIfMatch: false, caseSensitive: caseSensitive);
201         }
202 
Peek()203         protected char Peek()
204         {
205             using (LookaheadToken lookahead = Source.BeginLookahead())
206             {
207                 MoveNext();
208                 return CurrentCharacter;
209             }
210         }
211 
AfterRazorCommentTransition()212         protected StateResult AfterRazorCommentTransition()
213         {
214             if (CurrentCharacter != '*')
215             {
216                 // We've been moved since last time we were asked for a symbol... reset the state
217                 return Transition(StartState);
218             }
219             AssertCurrent('*');
220             TakeCurrent();
221             return Transition(EndSymbol(RazorCommentStarType), RazorCommentBody);
222         }
223 
RazorCommentBody()224         protected StateResult RazorCommentBody()
225         {
226             TakeUntil(c => c == '*');
227             if (CurrentCharacter == '*')
228             {
229                 char star = CurrentCharacter;
230                 SourceLocation start = CurrentLocation;
231                 MoveNext();
232                 if (!EndOfFile && CurrentCharacter == '@')
233                 {
234                     State next = () =>
235                     {
236                         Buffer.Append(star);
237                         return Transition(EndSymbol(start, RazorCommentStarType), () =>
238                         {
239                             if (CurrentCharacter != '@')
240                             {
241                                 // We've been moved since last time we were asked for a symbol... reset the state
242                                 return Transition(StartState);
243                             }
244                             TakeCurrent();
245                             return Transition(EndSymbol(RazorCommentTransitionType), StartState);
246                         });
247                     };
248 
249                     if (HaveContent)
250                     {
251                         return Transition(EndSymbol(RazorCommentType), next);
252                     }
253                     else
254                     {
255                         return Transition(next);
256                     }
257                 }
258                 else
259                 {
260                     Buffer.Append(star);
261                     return Stay();
262                 }
263             }
264             return Transition(EndSymbol(RazorCommentType), StartState);
265         }
266 
Lookahead(string expected, bool takeIfMatch, bool caseSensitive)267         private bool Lookahead(string expected, bool takeIfMatch, bool caseSensitive)
268         {
269             Func<char, char> filter = c => c;
270             if (!caseSensitive)
271             {
272                 filter = Char.ToLowerInvariant;
273             }
274 
275             if (expected.Length == 0 || filter(CurrentCharacter) != filter(expected[0]))
276             {
277                 return false;
278             }
279 
280             // Capture the current buffer content in case we have to backtrack
281             string oldBuffer = null;
282             if (takeIfMatch)
283             {
284                 Buffer.ToString();
285             }
286 
287             using (LookaheadToken lookahead = Source.BeginLookahead())
288             {
289                 for (int i = 0; i < expected.Length; i++)
290                 {
291                     if (filter(CurrentCharacter) != filter(expected[i]))
292                     {
293                         if (takeIfMatch)
294                         {
295                             // Clear the buffer and put the old buffer text back
296                             Buffer.Clear();
297                             Buffer.Append(oldBuffer);
298                         }
299                         // Return without accepting lookahead (thus rejecting it)
300                         return false;
301                     }
302                     if (takeIfMatch)
303                     {
304                         TakeCurrent();
305                     }
306                     else
307                     {
308                         MoveNext();
309                     }
310                 }
311                 if (takeIfMatch)
312                 {
313                     lookahead.Accept();
314                 }
315             }
316             return true;
317         }
318 
319         [SuppressMessage("Microsoft.Performance", "CA1822:MarkMembersAsStatic", Justification = "This only occurs in Release builds, where this method is empty by design")]
320         [Conditional("DEBUG")]
AssertCurrent(char current)321         internal void AssertCurrent(char current)
322         {
323             Debug.Assert(CurrentCharacter == current, "CurrentCharacter Assumption violated", "Assumed that the current character would be {0}, but it is actually {1}", current, CurrentCharacter);
324         }
325 
ITokenizer.NextSymbol()326         ISymbol ITokenizer.NextSymbol()
327         {
328             return (ISymbol)NextSymbol();
329         }
330     }
331 
332 #if DEBUG
333     [DebuggerDisplay("{DebugDisplay}")]
334     public partial class Tokenizer<TSymbol, TSymbolType>
335     {
336         private StringBuilder _read = new StringBuilder();
337 
338         public string DebugDisplay
339         {
340             get { return String.Format(CultureInfo.InvariantCulture, "[{0}] [{1}] [{2}]", _read.ToString(), CurrentCharacter, Remaining); }
341         }
342 
343         public string Remaining
344         {
345             get
346             {
347                 string remaining = Source.ReadToEnd();
348                 Source.Seek(-remaining.Length);
349                 return remaining;
350             }
351         }
352     }
353 #endif
354 }
355