1 /* 2 * Copyright (c) 2005-2007 Henri Sivonen 3 * Copyright (c) 2007-2015 Mozilla Foundation 4 * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla 5 * Foundation, and Opera Software ASA. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 * DEALINGS IN THE SOFTWARE. 24 */ 25 26 /* 27 * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT. 28 * Please edit Tokenizer.java instead and regenerate. 29 */ 30 31 #ifndef nsHtml5Tokenizer_h 32 #define nsHtml5Tokenizer_h 33 34 #include "nsIAtom.h" 35 #include "nsHtml5AtomTable.h" 36 #include "nsString.h" 37 #include "nsIContent.h" 38 #include "nsTraceRefcnt.h" 39 #include "jArray.h" 40 #include "nsHtml5DocumentMode.h" 41 #include "nsHtml5ArrayCopy.h" 42 #include "nsHtml5NamedCharacters.h" 43 #include "nsHtml5NamedCharactersAccel.h" 44 #include "nsHtml5Atoms.h" 45 #include "nsAHtml5TreeBuilderState.h" 46 #include "nsHtml5Macros.h" 47 #include "nsHtml5Highlighter.h" 48 #include "nsHtml5TokenizerLoopPolicies.h" 49 50 class nsHtml5StreamParser; 51 52 class nsHtml5TreeBuilder; 53 class nsHtml5MetaScanner; 54 class nsHtml5AttributeName; 55 class nsHtml5ElementName; 56 class nsHtml5HtmlAttributes; 57 class nsHtml5UTF16Buffer; 58 class nsHtml5StateSnapshot; 59 class nsHtml5Portability; 60 61 62 class nsHtml5Tokenizer 63 { 64 private: 65 static char16_t LT_GT[]; 66 static char16_t LT_SOLIDUS[]; 67 static char16_t RSQB_RSQB[]; 68 static char16_t REPLACEMENT_CHARACTER[]; 69 static char16_t LF[]; 70 static char16_t CDATA_LSQB[]; 71 static char16_t OCTYPE[]; 72 static char16_t UBLIC[]; 73 static char16_t YSTEM[]; 74 static staticJArray<char16_t,int32_t> TITLE_ARR; 75 static staticJArray<char16_t,int32_t> SCRIPT_ARR; 76 static staticJArray<char16_t,int32_t> STYLE_ARR; 77 static staticJArray<char16_t,int32_t> PLAINTEXT_ARR; 78 static staticJArray<char16_t,int32_t> XMP_ARR; 79 static staticJArray<char16_t,int32_t> TEXTAREA_ARR; 80 static staticJArray<char16_t,int32_t> IFRAME_ARR; 81 static staticJArray<char16_t,int32_t> NOEMBED_ARR; 82 static staticJArray<char16_t,int32_t> NOSCRIPT_ARR; 83 static staticJArray<char16_t,int32_t> NOFRAMES_ARR; 84 protected: 85 nsHtml5TreeBuilder* tokenHandler; 86 nsHtml5StreamParser* encodingDeclarationHandler; 87 bool lastCR; 88 int32_t stateSave; 89 private: 90 int32_t returnStateSave; 91 protected: 92 int32_t index; 93 private: 94 bool forceQuirks; 95 char16_t additional; 96 int32_t entCol; 97 int32_t firstCharKey; 98 int32_t lo; 99 int32_t hi; 100 int32_t candidate; 101 int32_t charRefBufMark; 102 protected: 103 int32_t value; 104 private: 105 bool seenDigits; 106 protected: 107 int32_t cstart; 108 private: 109 nsString* publicId; 110 nsString* systemId; 111 autoJArray<char16_t,int32_t> strBuf; 112 int32_t strBufLen; 113 autoJArray<char16_t,int32_t> charRefBuf; 114 int32_t charRefBufLen; 115 autoJArray<char16_t,int32_t> bmpChar; 116 autoJArray<char16_t,int32_t> astralChar; 117 protected: 118 nsHtml5ElementName* endTagExpectation; 119 private: 120 jArray<char16_t,int32_t> endTagExpectationAsArray; 121 protected: 122 bool endTag; 123 private: 124 nsHtml5ElementName* tagName; 125 protected: 126 nsHtml5AttributeName* attributeName; 127 private: 128 nsIAtom* doctypeName; 129 nsString* publicIdentifier; 130 nsString* systemIdentifier; 131 nsHtml5HtmlAttributes* attributes; 132 bool newAttributesEachTime; 133 bool shouldSuspend; 134 protected: 135 bool confident; 136 private: 137 int32_t line; 138 int32_t attributeLine; 139 nsHtml5AtomTable* interner; 140 bool viewingXmlSource; 141 public: 142 nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource); 143 void setInterner(nsHtml5AtomTable* interner); 144 void initLocation(nsString* newPublicId, nsString* newSystemId); 145 bool isViewingXmlSource(); 146 void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation); 147 void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation); 148 private: 149 void endTagExpectationToArray(); 150 public: 151 void setLineNumber(int32_t line); getLineNumber()152 inline int32_t getLineNumber() 153 { 154 return line; 155 } 156 157 nsHtml5HtmlAttributes* emptyAttributes(); 158 private: appendCharRefBuf(char16_t c)159 inline void appendCharRefBuf(char16_t c) 160 { 161 MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length, "Attempted to overrun charRefBuf!"); 162 charRefBuf[charRefBufLen++] = c; 163 } 164 165 void emitOrAppendCharRefBuf(int32_t returnState); clearStrBufAfterUse()166 inline void clearStrBufAfterUse() 167 { 168 strBufLen = 0; 169 } 170 clearStrBufBeforeUse()171 inline void clearStrBufBeforeUse() 172 { 173 MOZ_ASSERT(!strBufLen, "strBufLen not reset after previous use!"); 174 strBufLen = 0; 175 } 176 clearStrBufAfterOneHyphen()177 inline void clearStrBufAfterOneHyphen() 178 { 179 MOZ_ASSERT(strBufLen == 1, "strBufLen length not one!"); 180 MOZ_ASSERT(strBuf[0] == '-', "strBuf does not start with a hyphen!"); 181 strBufLen = 0; 182 } 183 appendStrBuf(char16_t c)184 inline void appendStrBuf(char16_t c) 185 { 186 MOZ_ASSERT(strBufLen < strBuf.length, "Previous buffer length insufficient."); 187 if (MOZ_UNLIKELY(strBufLen == strBuf.length)) { 188 if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) { 189 MOZ_CRASH("Unable to recover from buffer reallocation failure"); 190 } 191 } 192 strBuf[strBufLen++] = c; 193 } 194 195 protected: 196 nsString* strBufToString(); 197 private: 198 void strBufToDoctypeName(); 199 void emitStrBuf(); appendSecondHyphenToBogusComment()200 inline void appendSecondHyphenToBogusComment() 201 { 202 appendStrBuf('-'); 203 } 204 adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c)205 inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c) 206 { 207 errConsecutiveHyphens(); 208 appendStrBuf(c); 209 } 210 211 void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length); appendCharRefBufToStrBuf()212 inline void appendCharRefBufToStrBuf() 213 { 214 appendStrBuf(charRefBuf, 0, charRefBufLen); 215 charRefBufLen = 0; 216 } 217 218 void emitComment(int32_t provisionalHyphens, int32_t pos); 219 protected: 220 void flushChars(char16_t* buf, int32_t pos); 221 private: 222 void strBufToElementNameString(); 223 int32_t emitCurrentTagToken(bool selfClosing, int32_t pos); 224 void attributeNameComplete(); 225 void addAttributeWithoutValue(); 226 void addAttributeWithValue(); 227 public: 228 void start(); 229 bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer); 230 private: 231 template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos); 232 void initDoctypeFields(); adjustDoubleHyphenAndAppendToStrBufCarriageReturn()233 inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn() 234 { 235 silentCarriageReturn(); 236 adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); 237 } 238 adjustDoubleHyphenAndAppendToStrBufLineFeed()239 inline void adjustDoubleHyphenAndAppendToStrBufLineFeed() 240 { 241 silentLineFeed(); 242 adjustDoubleHyphenAndAppendToStrBufAndErr('\n'); 243 } 244 appendStrBufLineFeed()245 inline void appendStrBufLineFeed() 246 { 247 silentLineFeed(); 248 appendStrBuf('\n'); 249 } 250 appendStrBufCarriageReturn()251 inline void appendStrBufCarriageReturn() 252 { 253 silentCarriageReturn(); 254 appendStrBuf('\n'); 255 } 256 257 protected: silentCarriageReturn()258 inline void silentCarriageReturn() 259 { 260 ++line; 261 lastCR = true; 262 } 263 silentLineFeed()264 inline void silentLineFeed() 265 { 266 ++line; 267 } 268 269 private: 270 void emitCarriageReturn(char16_t* buf, int32_t pos); 271 void emitReplacementCharacter(char16_t* buf, int32_t pos); 272 void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos); 273 void setAdditionalAndRememberAmpersandLocation(char16_t add); 274 void bogusDoctype(); 275 void bogusDoctypeWithoutQuirks(); 276 void handleNcrValue(int32_t returnState); 277 public: 278 void eof(); 279 private: 280 void emitDoctypeToken(int32_t pos); 281 protected: checkChar(char16_t * buf,int32_t pos)282 inline char16_t checkChar(char16_t* buf, int32_t pos) 283 { 284 return buf[pos]; 285 } 286 287 public: 288 bool internalEncodingDeclaration(nsString* internalCharset); 289 private: 290 void emitOrAppendTwo(const char16_t* val, int32_t returnState); 291 void emitOrAppendOne(const char16_t* val, int32_t returnState); 292 public: 293 void end(); 294 void requestSuspension(); 295 bool isInDataState(); 296 void resetToDataState(); 297 void loadState(nsHtml5Tokenizer* other); 298 void initializeWithoutStarting(); 299 void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler); 300 ~nsHtml5Tokenizer(); 301 static void initializeStatics(); 302 static void releaseStatics(); 303 304 #include "nsHtml5TokenizerHSupplement.h" 305 }; 306 307 #define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1 308 #define NS_HTML5TOKENIZER_DATA 0 309 #define NS_HTML5TOKENIZER_RCDATA 1 310 #define NS_HTML5TOKENIZER_SCRIPT_DATA 2 311 #define NS_HTML5TOKENIZER_RAWTEXT 3 312 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4 313 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5 314 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6 315 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7 316 #define NS_HTML5TOKENIZER_PLAINTEXT 8 317 #define NS_HTML5TOKENIZER_TAG_OPEN 9 318 #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10 319 #define NS_HTML5TOKENIZER_TAG_NAME 11 320 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12 321 #define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13 322 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14 323 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15 324 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16 325 #define NS_HTML5TOKENIZER_BOGUS_COMMENT 17 326 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18 327 #define NS_HTML5TOKENIZER_DOCTYPE 19 328 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20 329 #define NS_HTML5TOKENIZER_DOCTYPE_NAME 21 330 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22 331 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23 332 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24 333 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25 334 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26 335 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27 336 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28 337 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29 338 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30 339 #define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31 340 #define NS_HTML5TOKENIZER_COMMENT_START 32 341 #define NS_HTML5TOKENIZER_COMMENT_START_DASH 33 342 #define NS_HTML5TOKENIZER_COMMENT 34 343 #define NS_HTML5TOKENIZER_COMMENT_END_DASH 35 344 #define NS_HTML5TOKENIZER_COMMENT_END 36 345 #define NS_HTML5TOKENIZER_COMMENT_END_BANG 37 346 #define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38 347 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39 348 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40 349 #define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41 350 #define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42 351 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43 352 #define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44 353 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45 354 #define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46 355 #define NS_HTML5TOKENIZER_CONSUME_NCR 47 356 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48 357 #define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49 358 #define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50 359 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51 360 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52 361 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53 362 #define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54 363 #define NS_HTML5TOKENIZER_CDATA_START 55 364 #define NS_HTML5TOKENIZER_CDATA_SECTION 56 365 #define NS_HTML5TOKENIZER_CDATA_RSQB 57 366 #define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58 367 #define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59 368 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60 369 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61 370 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62 371 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63 372 #define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64 373 #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65 374 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66 375 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67 376 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68 377 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69 378 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70 379 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71 380 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72 381 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73 382 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74 383 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10)) 384 385 386 #endif 387 388