1/* 2 * Copyright (C) 2009 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31// Generate js file as follows: 32// 33// re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \ 34// | sed 's|^yy\([^:]*\)*\:|case \1:|' \ 35// | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \ 36// | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \ 37// | sed 's|[*]cursor|this._charAt(cursor)|' \ 38// | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \ 39// | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \ 40// | sed 's|unsigned\ int|var|' \ 41// | sed 's|var\ yych|case 1: var yych|' 42 43WebInspector.SourceHTMLTokenizer = function() 44{ 45 WebInspector.SourceTokenizer.call(this); 46 47 // The order is determined by the generated code. 48 this._lexConditions = { 49 INITIAL: 0, 50 COMMENT: 1, 51 DOCTYPE: 2, 52 TAG: 3, 53 DSTRING: 4, 54 SSTRING: 5 55 }; 56 this.case_INITIAL = 1000; 57 this.case_COMMENT = 1001; 58 this.case_DOCTYPE = 1002; 59 this.case_TAG = 1003; 60 this.case_DSTRING = 1004; 61 this.case_SSTRING = 1005; 62 63 this._parseConditions = { 64 INITIAL: 0, 65 ATTRIBUTE: 1, 66 ATTRIBUTE_VALUE: 2, 67 LINKIFY: 4, 68 A_NODE: 8, 69 SCRIPT: 16, 70 STYLE: 32 71 }; 72 73 this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL }; 74 this.condition = this.initialCondition; 75} 76 77WebInspector.SourceHTMLTokenizer.prototype = { 78 set line(line) { 79 if (this._internalJavaScriptTokenizer) { 80 var match = /<\/script/i.exec(line); 81 if (match) { 82 this._internalJavaScriptTokenizer.line = line.substring(0, match.index); 83 } else 84 this._internalJavaScriptTokenizer.line = line; 85 } else if (this._internalCSSTokenizer) { 86 var match = /<\/style/i.exec(line); 87 if (match) { 88 this._internalCSSTokenizer.line = line.substring(0, match.index); 89 } else 90 this._internalCSSTokenizer.line = line; 91 } 92 this._line = line; 93 }, 94 95 _isExpectingAttribute: function() 96 { 97 return this._condition.parseCondition & this._parseConditions.ATTRIBUTE; 98 }, 99 100 _isExpectingAttributeValue: function() 101 { 102 return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE; 103 }, 104 105 _setExpectingAttribute: function() 106 { 107 if (this._isExpectingAttributeValue()) 108 this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE; 109 this._condition.parseCondition |= this._parseConditions.ATTRIBUTE; 110 }, 111 112 _setExpectingAttributeValue: function() 113 { 114 if (this._isExpectingAttribute()) 115 this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE; 116 this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE; 117 }, 118 119 _stringToken: function(cursor, stringEnds) 120 { 121 if (!this._isExpectingAttributeValue()) { 122 this.tokenType = null; 123 return cursor; 124 } 125 this.tokenType = this._attrValueTokenType(); 126 if (stringEnds) 127 this._setExpectingAttribute(); 128 return cursor; 129 }, 130 131 _attrValueTokenType: function() 132 { 133 if (this._condition.parseCondition & this._parseConditions.LINKIFY) { 134 if (this._condition.parseCondition & this._parseConditions.A_NODE) 135 return "html-external-link"; 136 return "html-resource-link"; 137 } 138 return "html-attribute-value"; 139 }, 140 141 scriptStarted: function(cursor) 142 { 143 if (!this._internalJavaScriptTokenizer) { 144 this._internalJavaScriptTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript"); 145 this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.initialCondition; 146 } 147 }, 148 149 scriptEnded: function(cursor) 150 { 151 }, 152 153 styleSheetStarted: function(cursor) 154 { 155 if (!this._internalCSSTokenizer) { 156 this._internalCSSTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/css"); 157 this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.initialCondition; 158 } 159 }, 160 161 styleSheetEnded: function(cursor) 162 { 163 }, 164 165 nextToken: function(cursor) 166 { 167 if (this._internalJavaScriptTokenizer) { 168 // Re-set line to force </script> detection first. 169 this.line = this._line; 170 if (cursor !== this._internalJavaScriptTokenizer._line.length) { 171 // Tokenizer is stateless, so restore its condition before tokenizing and save it after. 172 this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition; 173 var result = this._internalJavaScriptTokenizer.nextToken(cursor); 174 this.tokenType = this._internalJavaScriptTokenizer.tokenType; 175 this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition; 176 return result; 177 } else if (cursor !== this._line.length) 178 delete this._internalJavaScriptTokenizer; 179 } else if (this._internalCSSTokenizer) { 180 // Re-set line to force </style> detection first. 181 this.line = this._line; 182 if (cursor !== this._internalCSSTokenizer._line.length) { 183 // Tokenizer is stateless, so restore its condition before tokenizing and save it after. 184 this._internalCSSTokenizer.condition = this._condition.internalCSSTokenizerCondition; 185 var result = this._internalCSSTokenizer.nextToken(cursor); 186 this.tokenType = this._internalCSSTokenizer.tokenType; 187 this._condition.internalCSSTokenizerCondition = this._internalCSSTokenizer.condition; 188 return result; 189 } else if (cursor !== this._line.length) 190 delete this._internalCSSTokenizer; 191 } 192 193 var cursorOnEnter = cursor; 194 var gotoCase = 1; 195 while (1) { 196 switch (gotoCase) 197 // Following comment is replaced with generated state machine. 198 /*!re2c 199 re2c:define:YYCTYPE = "var"; 200 re2c:define:YYCURSOR = cursor; 201 re2c:define:YYGETCONDITION = "this.getLexCondition"; 202 re2c:define:YYSETCONDITION = "this.setLexCondition"; 203 re2c:condprefix = "case this.case_"; 204 re2c:condenumprefix = "this._lexConditions."; 205 re2c:yyfill:enable = 0; 206 re2c:labelprefix = "case "; 207 re2c:indent:top = 2; 208 re2c:indent:string = " "; 209 210 CommentContent = ([^-\r\n] | ("--" [^>]))*; 211 Comment = "<!--" CommentContent "-->"; 212 CommentStart = "<!--" CommentContent [\r\n]; 213 CommentEnd = CommentContent "-->"; 214 215 DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee]; 216 DocTypeContent = [^\r\n>]*; 217 218 ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; 219 ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; 220 221 StyleStart = "<" [Ss] [Tt] [Yy] [Ll] [Ee]; 222 StyleEnd = "</" [Ss] [Tt] [Yy] [Ll] [Ee]; 223 224 LT = "<" | "</"; 225 GT = ">"; 226 EqualSign = "="; 227 228 DoubleStringContent = [^\r\n\"]*; 229 SingleStringContent = [^\r\n\']*; 230 StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'"; 231 DoubleStringStart = "\"" DoubleStringContent [\r\n]; 232 DoubleStringEnd = DoubleStringContent "\""; 233 SingleStringStart = "'" SingleStringContent [\r\n]; 234 SingleStringEnd = SingleStringContent "'"; 235 236 Identifier = [^ \r\n"'<>\[\]=]+; 237 238 <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; } 239 <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; } 240 <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; } 241 <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; } 242 243 <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } 244 <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } 245 <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; } 246 247 <INITIAL> ScriptStart => TAG 248 { 249 if (this._condition.parseCondition & this._parseConditions.SCRIPT) { 250 // Do not tokenize script tag contents, keep lexer state, even though processing "<". 251 this.setLexCondition(this._lexConditions.INITIAL); 252 this.tokenType = null; 253 return cursor; 254 } 255 this.tokenType = "html-tag"; 256 this._condition.parseCondition = this._parseConditions.SCRIPT; 257 this._setExpectingAttribute(); 258 return cursor; 259 } 260 261 <INITIAL> ScriptEnd => TAG 262 { 263 this.tokenType = "html-tag"; 264 this._condition.parseCondition = this._parseConditions.INITIAL; 265 this.scriptEnded(cursor - 8); 266 return cursor; 267 } 268 269 <INITIAL> StyleStart => TAG 270 { 271 if (this._condition.parseCondition & this._parseConditions.STYLE) { 272 // Do not tokenize style tag contents, keep lexer state, even though processing "<". 273 this.setLexCondition(this._lexConditions.INITIAL); 274 this.tokenType = null; 275 return cursor; 276 } 277 this.tokenType = "html-tag"; 278 this._condition.parseCondition = this._parseConditions.STYLE; 279 this._setExpectingAttribute(); 280 return cursor; 281 } 282 283 <INITIAL> StyleEnd => TAG 284 { 285 this.tokenType = "html-tag"; 286 this._condition.parseCondition = this._parseConditions.INITIAL; 287 this.styleEnded(cursor - 7); 288 return cursor; 289 } 290 291 <INITIAL> LT => TAG 292 { 293 if (this._condition.parseCondition & (this._parseConditions.SCRIPT | this._parseConditions.STYLE)) { 294 // Do not tokenize script and style tag contents, keep lexer state, even though processing "<". 295 this.setLexCondition(this._lexConditions.INITIAL); 296 this.tokenType = null; 297 return cursor; 298 } 299 300 this._condition.parseCondition = this._parseConditions.INITIAL; 301 this.tokenType = "html-tag"; 302 return cursor; 303 } 304 305 <TAG> GT => INITIAL 306 { 307 this.tokenType = "html-tag"; 308 if (this._condition.parseCondition & this._parseConditions.SCRIPT) { 309 this.scriptStarted(cursor); 310 // Do not tokenize script tag contents. 311 return cursor; 312 } 313 314 if (this._condition.parseCondition & this._parseConditions.STYLE) { 315 this.styleSheetStarted(cursor); 316 // Do not tokenize style tag contents. 317 return cursor; 318 } 319 320 this._condition.parseCondition = this._parseConditions.INITIAL; 321 return cursor; 322 } 323 324 <TAG> StringLiteral { return this._stringToken(cursor, true); } 325 <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); } 326 <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); } 327 <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); } 328 <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); } 329 <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); } 330 <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); } 331 332 <TAG> EqualSign => TAG 333 { 334 if (this._isExpectingAttribute()) 335 this._setExpectingAttributeValue(); 336 this.tokenType = null; 337 return cursor; 338 } 339 340 <TAG> Identifier 341 { 342 if (this._condition.parseCondition === this._parseConditions.SCRIPT || this._condition.parseCondition === this._parseConditions.STYLE) { 343 // Fall through if expecting attributes. 344 this.tokenType = null; 345 return cursor; 346 } 347 348 if (this._condition.parseCondition === this._parseConditions.INITIAL) { 349 this.tokenType = "html-tag"; 350 this._setExpectingAttribute(); 351 var token = this._line.substring(cursorOnEnter, cursor); 352 if (token === "a") 353 this._condition.parseCondition |= this._parseConditions.A_NODE; 354 else if (this._condition.parseCondition & this._parseConditions.A_NODE) 355 this._condition.parseCondition ^= this._parseConditions.A_NODE; 356 } else if (this._isExpectingAttribute()) { 357 var token = this._line.substring(cursorOnEnter, cursor); 358 if (token === "href" || token === "src") 359 this._condition.parseCondition |= this._parseConditions.LINKIFY; 360 else if (this._condition.parseCondition |= this._parseConditions.LINKIFY) 361 this._condition.parseCondition ^= this._parseConditions.LINKIFY; 362 this.tokenType = "html-attribute-name"; 363 } else if (this._isExpectingAttributeValue()) 364 this.tokenType = this._attrValueTokenType(); 365 else 366 this.tokenType = null; 367 return cursor; 368 } 369 <*> [^] { this.tokenType = null; return cursor; } 370 */ 371 } 372 } 373} 374 375WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype; 376