1 // Copyright (c) Microsoft Corporation. All rights reserved. See License.txt in the project root for license information. 2 3 using System.Collections.Generic; 4 using System.Diagnostics; 5 using System.Diagnostics.CodeAnalysis; 6 using System.Globalization; 7 using System.Linq; 8 using System.Text; 9 using System.Web.Razor.Parser; 10 using System.Web.Razor.Parser.SyntaxTree; 11 using System.Web.Razor.Resources; 12 using System.Web.Razor.Text; 13 using System.Web.Razor.Tokenizer.Symbols; 14 15 namespace System.Web.Razor.Tokenizer 16 { 17 public abstract partial class Tokenizer<TSymbol, TSymbolType> : StateMachine<TSymbol>, ITokenizer 18 where TSymbol : SymbolBase<TSymbolType> 19 { 20 [SuppressMessage("Microsoft.Reliability", "CA2000:Dispose objects before losing scope", Justification = "TextDocumentReader does not require disposal")] Tokenizer(ITextDocument source)21 protected Tokenizer(ITextDocument source) 22 { 23 if (source == null) 24 { 25 throw new ArgumentNullException("source"); 26 } 27 Source = new TextDocumentReader(source); 28 Buffer = new StringBuilder(); 29 CurrentErrors = new List<RazorError>(); 30 StartSymbol(); 31 } 32 33 public TextDocumentReader Source { get; private set; } 34 35 protected StringBuilder Buffer { get; private set; } 36 37 protected bool EndOfFile 38 { 39 get { return Source.Peek() == -1; } 40 } 41 42 protected IList<RazorError> CurrentErrors { get; private set; } 43 44 public abstract TSymbolType RazorCommentStarType { get; } 45 public abstract TSymbolType RazorCommentType { get; } 46 public abstract TSymbolType RazorCommentTransitionType { get; } 47 48 protected bool HaveContent 49 { 50 get { return Buffer.Length > 0; } 51 } 52 53 protected char CurrentCharacter 54 { 55 get 56 { 57 int peek = Source.Peek(); 58 return peek == -1 ? '\0' : (char)peek; 59 } 60 } 61 62 protected SourceLocation CurrentLocation 63 { 64 get { return Source.Location; } 65 } 66 67 protected SourceLocation CurrentStart { get; private set; } 68 NextSymbol()69 public virtual TSymbol NextSymbol() 70 { 71 // Post-Condition: Buffer should be empty at the start of Next() 72 Debug.Assert(Buffer.Length == 0); 73 StartSymbol(); 74 75 if (EndOfFile) 76 { 77 return null; 78 } 79 TSymbol sym = Turn(); 80 81 // Post-Condition: Buffer should be empty at the end of Next() 82 Debug.Assert(Buffer.Length == 0); 83 84 return sym; 85 } 86 Reset()87 public void Reset() 88 { 89 CurrentState = StartState; 90 } 91 CreateSymbol(SourceLocation start, string content, TSymbolType type, IEnumerable<RazorError> errors)92 protected abstract TSymbol CreateSymbol(SourceLocation start, string content, TSymbolType type, IEnumerable<RazorError> errors); 93 Single(TSymbolType type)94 protected TSymbol Single(TSymbolType type) 95 { 96 TakeCurrent(); 97 return EndSymbol(type); 98 } 99 TakeString(string input, bool caseSensitive)100 protected bool TakeString(string input, bool caseSensitive) 101 { 102 int position = 0; 103 Func<char, char> charFilter = c => c; 104 if (caseSensitive) 105 { 106 charFilter = Char.ToLower; 107 } 108 while (!EndOfFile && position < input.Length && charFilter(CurrentCharacter) == charFilter(input[position++])) 109 { 110 TakeCurrent(); 111 } 112 return position == input.Length; 113 } 114 StartSymbol()115 protected void StartSymbol() 116 { 117 Buffer.Clear(); 118 CurrentStart = CurrentLocation; 119 CurrentErrors.Clear(); 120 } 121 EndSymbol(TSymbolType type)122 protected TSymbol EndSymbol(TSymbolType type) 123 { 124 return EndSymbol(CurrentStart, type); 125 } 126 EndSymbol(SourceLocation start, TSymbolType type)127 protected TSymbol EndSymbol(SourceLocation start, TSymbolType type) 128 { 129 TSymbol sym = null; 130 if (HaveContent) 131 { 132 sym = CreateSymbol(start, Buffer.ToString(), type, CurrentErrors.ToArray()); 133 } 134 StartSymbol(); 135 return sym; 136 } 137 ResumeSymbol(TSymbol previous)138 protected void ResumeSymbol(TSymbol previous) 139 { 140 // Verify the symbol can be resumed 141 if (previous.Start.AbsoluteIndex + previous.Content.Length != CurrentStart.AbsoluteIndex) 142 { 143 throw new InvalidOperationException(RazorResources.Tokenizer_CannotResumeSymbolUnlessIsPrevious); 144 } 145 146 // Reset the start point 147 CurrentStart = previous.Start; 148 149 // Capture the current buffer content 150 string newContent = Buffer.ToString(); 151 152 // Clear the buffer, then put the old content back and add the new content to the end 153 Buffer.Clear(); 154 Buffer.Append(previous.Content); 155 Buffer.Append(newContent); 156 } 157 TakeUntil(Func<char, bool> predicate)158 protected bool TakeUntil(Func<char, bool> predicate) 159 { 160 // Take all the characters up to the end character 161 while (!EndOfFile && !predicate(CurrentCharacter)) 162 { 163 TakeCurrent(); 164 } 165 166 // Why did we end? 167 return !EndOfFile; 168 } 169 CharOrWhiteSpace(char character)170 protected Func<char, bool> CharOrWhiteSpace(char character) 171 { 172 return c => c == character || ParserHelpers.IsWhitespace(c) || ParserHelpers.IsNewLine(c); 173 } 174 TakeCurrent()175 protected void TakeCurrent() 176 { 177 if (EndOfFile) 178 { 179 return; 180 } // No-op 181 Buffer.Append(CurrentCharacter); 182 MoveNext(); 183 } 184 MoveNext()185 protected void MoveNext() 186 { 187 #if DEBUG 188 _read.Append(CurrentCharacter); 189 #endif 190 Source.Read(); 191 } 192 TakeAll(string expected, bool caseSensitive)193 protected bool TakeAll(string expected, bool caseSensitive) 194 { 195 return Lookahead(expected, takeIfMatch: true, caseSensitive: caseSensitive); 196 } 197 At(string expected, bool caseSensitive)198 protected bool At(string expected, bool caseSensitive) 199 { 200 return Lookahead(expected, takeIfMatch: false, caseSensitive: caseSensitive); 201 } 202 Peek()203 protected char Peek() 204 { 205 using (LookaheadToken lookahead = Source.BeginLookahead()) 206 { 207 MoveNext(); 208 return CurrentCharacter; 209 } 210 } 211 AfterRazorCommentTransition()212 protected StateResult AfterRazorCommentTransition() 213 { 214 if (CurrentCharacter != '*') 215 { 216 // We've been moved since last time we were asked for a symbol... reset the state 217 return Transition(StartState); 218 } 219 AssertCurrent('*'); 220 TakeCurrent(); 221 return Transition(EndSymbol(RazorCommentStarType), RazorCommentBody); 222 } 223 RazorCommentBody()224 protected StateResult RazorCommentBody() 225 { 226 TakeUntil(c => c == '*'); 227 if (CurrentCharacter == '*') 228 { 229 char star = CurrentCharacter; 230 SourceLocation start = CurrentLocation; 231 MoveNext(); 232 if (!EndOfFile && CurrentCharacter == '@') 233 { 234 State next = () => 235 { 236 Buffer.Append(star); 237 return Transition(EndSymbol(start, RazorCommentStarType), () => 238 { 239 if (CurrentCharacter != '@') 240 { 241 // We've been moved since last time we were asked for a symbol... reset the state 242 return Transition(StartState); 243 } 244 TakeCurrent(); 245 return Transition(EndSymbol(RazorCommentTransitionType), StartState); 246 }); 247 }; 248 249 if (HaveContent) 250 { 251 return Transition(EndSymbol(RazorCommentType), next); 252 } 253 else 254 { 255 return Transition(next); 256 } 257 } 258 else 259 { 260 Buffer.Append(star); 261 return Stay(); 262 } 263 } 264 return Transition(EndSymbol(RazorCommentType), StartState); 265 } 266 Lookahead(string expected, bool takeIfMatch, bool caseSensitive)267 private bool Lookahead(string expected, bool takeIfMatch, bool caseSensitive) 268 { 269 Func<char, char> filter = c => c; 270 if (!caseSensitive) 271 { 272 filter = Char.ToLowerInvariant; 273 } 274 275 if (expected.Length == 0 || filter(CurrentCharacter) != filter(expected[0])) 276 { 277 return false; 278 } 279 280 // Capture the current buffer content in case we have to backtrack 281 string oldBuffer = null; 282 if (takeIfMatch) 283 { 284 Buffer.ToString(); 285 } 286 287 using (LookaheadToken lookahead = Source.BeginLookahead()) 288 { 289 for (int i = 0; i < expected.Length; i++) 290 { 291 if (filter(CurrentCharacter) != filter(expected[i])) 292 { 293 if (takeIfMatch) 294 { 295 // Clear the buffer and put the old buffer text back 296 Buffer.Clear(); 297 Buffer.Append(oldBuffer); 298 } 299 // Return without accepting lookahead (thus rejecting it) 300 return false; 301 } 302 if (takeIfMatch) 303 { 304 TakeCurrent(); 305 } 306 else 307 { 308 MoveNext(); 309 } 310 } 311 if (takeIfMatch) 312 { 313 lookahead.Accept(); 314 } 315 } 316 return true; 317 } 318 319 [SuppressMessage("Microsoft.Performance", "CA1822:MarkMembersAsStatic", Justification = "This only occurs in Release builds, where this method is empty by design")] 320 [Conditional("DEBUG")] AssertCurrent(char current)321 internal void AssertCurrent(char current) 322 { 323 Debug.Assert(CurrentCharacter == current, "CurrentCharacter Assumption violated", "Assumed that the current character would be {0}, but it is actually {1}", current, CurrentCharacter); 324 } 325 ITokenizer.NextSymbol()326 ISymbol ITokenizer.NextSymbol() 327 { 328 return (ISymbol)NextSymbol(); 329 } 330 } 331 332 #if DEBUG 333 [DebuggerDisplay("{DebugDisplay}")] 334 public partial class Tokenizer<TSymbol, TSymbolType> 335 { 336 private StringBuilder _read = new StringBuilder(); 337 338 public string DebugDisplay 339 { 340 get { return String.Format(CultureInfo.InvariantCulture, "[{0}] [{1}] [{2}]", _read.ToString(), CurrentCharacter, Remaining); } 341 } 342 343 public string Remaining 344 { 345 get 346 { 347 string remaining = Source.ReadToEnd(); 348 Source.Seek(-remaining.Length); 349 return remaining; 350 } 351 } 352 } 353 #endif 354 } 355