1 /*
2  * Copyright (c) 2005-2007 Henri Sivonen
3  * Copyright (c) 2007-2015 Mozilla Foundation
4  * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
5  * Foundation, and Opera Software ASA.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  */
25 
26 /*
27  * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
28  * Please edit Tokenizer.java instead and regenerate.
29  */
30 
31 #ifndef nsHtml5Tokenizer_h
32 #define nsHtml5Tokenizer_h
33 
34 #include "nsIAtom.h"
35 #include "nsHtml5AtomTable.h"
36 #include "nsString.h"
37 #include "nsIContent.h"
38 #include "nsTraceRefcnt.h"
39 #include "jArray.h"
40 #include "nsHtml5DocumentMode.h"
41 #include "nsHtml5ArrayCopy.h"
42 #include "nsHtml5NamedCharacters.h"
43 #include "nsHtml5NamedCharactersAccel.h"
44 #include "nsHtml5Atoms.h"
45 #include "nsAHtml5TreeBuilderState.h"
46 #include "nsHtml5Macros.h"
47 #include "nsHtml5Highlighter.h"
48 #include "nsHtml5TokenizerLoopPolicies.h"
49 
50 class nsHtml5StreamParser;
51 
52 class nsHtml5TreeBuilder;
53 class nsHtml5MetaScanner;
54 class nsHtml5AttributeName;
55 class nsHtml5ElementName;
56 class nsHtml5HtmlAttributes;
57 class nsHtml5UTF16Buffer;
58 class nsHtml5StateSnapshot;
59 class nsHtml5Portability;
60 
61 
62 class nsHtml5Tokenizer
63 {
64   private:
65     static char16_t LT_GT[];
66     static char16_t LT_SOLIDUS[];
67     static char16_t RSQB_RSQB[];
68     static char16_t REPLACEMENT_CHARACTER[];
69     static char16_t LF[];
70     static char16_t CDATA_LSQB[];
71     static char16_t OCTYPE[];
72     static char16_t UBLIC[];
73     static char16_t YSTEM[];
74     static staticJArray<char16_t,int32_t> TITLE_ARR;
75     static staticJArray<char16_t,int32_t> SCRIPT_ARR;
76     static staticJArray<char16_t,int32_t> STYLE_ARR;
77     static staticJArray<char16_t,int32_t> PLAINTEXT_ARR;
78     static staticJArray<char16_t,int32_t> XMP_ARR;
79     static staticJArray<char16_t,int32_t> TEXTAREA_ARR;
80     static staticJArray<char16_t,int32_t> IFRAME_ARR;
81     static staticJArray<char16_t,int32_t> NOEMBED_ARR;
82     static staticJArray<char16_t,int32_t> NOSCRIPT_ARR;
83     static staticJArray<char16_t,int32_t> NOFRAMES_ARR;
84   protected:
85     nsHtml5TreeBuilder* tokenHandler;
86     nsHtml5StreamParser* encodingDeclarationHandler;
87     bool lastCR;
88     int32_t stateSave;
89   private:
90     int32_t returnStateSave;
91   protected:
92     int32_t index;
93   private:
94     bool forceQuirks;
95     char16_t additional;
96     int32_t entCol;
97     int32_t firstCharKey;
98     int32_t lo;
99     int32_t hi;
100     int32_t candidate;
101     int32_t charRefBufMark;
102   protected:
103     int32_t value;
104   private:
105     bool seenDigits;
106   protected:
107     int32_t cstart;
108   private:
109     nsString* publicId;
110     nsString* systemId;
111     autoJArray<char16_t,int32_t> strBuf;
112     int32_t strBufLen;
113     autoJArray<char16_t,int32_t> charRefBuf;
114     int32_t charRefBufLen;
115     autoJArray<char16_t,int32_t> bmpChar;
116     autoJArray<char16_t,int32_t> astralChar;
117   protected:
118     nsHtml5ElementName* endTagExpectation;
119   private:
120     jArray<char16_t,int32_t> endTagExpectationAsArray;
121   protected:
122     bool endTag;
123   private:
124     nsHtml5ElementName* tagName;
125   protected:
126     nsHtml5AttributeName* attributeName;
127   private:
128     nsIAtom* doctypeName;
129     nsString* publicIdentifier;
130     nsString* systemIdentifier;
131     nsHtml5HtmlAttributes* attributes;
132     bool newAttributesEachTime;
133     bool shouldSuspend;
134   protected:
135     bool confident;
136   private:
137     int32_t line;
138     int32_t attributeLine;
139     nsHtml5AtomTable* interner;
140     bool viewingXmlSource;
141   public:
142     nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
143     void setInterner(nsHtml5AtomTable* interner);
144     void initLocation(nsString* newPublicId, nsString* newSystemId);
145     bool isViewingXmlSource();
146     void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation);
147     void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
148   private:
149     void endTagExpectationToArray();
150   public:
151     void setLineNumber(int32_t line);
getLineNumber()152     inline int32_t getLineNumber()
153     {
154       return line;
155     }
156 
157     nsHtml5HtmlAttributes* emptyAttributes();
158   private:
appendCharRefBuf(char16_t c)159     inline void appendCharRefBuf(char16_t c)
160     {
161       MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length, "Attempted to overrun charRefBuf!");
162       charRefBuf[charRefBufLen++] = c;
163     }
164 
165     void emitOrAppendCharRefBuf(int32_t returnState);
clearStrBufAfterUse()166     inline void clearStrBufAfterUse()
167     {
168       strBufLen = 0;
169     }
170 
clearStrBufBeforeUse()171     inline void clearStrBufBeforeUse()
172     {
173       MOZ_ASSERT(!strBufLen, "strBufLen not reset after previous use!");
174       strBufLen = 0;
175     }
176 
clearStrBufAfterOneHyphen()177     inline void clearStrBufAfterOneHyphen()
178     {
179       MOZ_ASSERT(strBufLen == 1, "strBufLen length not one!");
180       MOZ_ASSERT(strBuf[0] == '-', "strBuf does not start with a hyphen!");
181       strBufLen = 0;
182     }
183 
appendStrBuf(char16_t c)184     inline void appendStrBuf(char16_t c)
185     {
186       MOZ_ASSERT(strBufLen < strBuf.length, "Previous buffer length insufficient.");
187       if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
188         if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
189           MOZ_CRASH("Unable to recover from buffer reallocation failure");
190         }
191       }
192       strBuf[strBufLen++] = c;
193     }
194 
195   protected:
196     nsString* strBufToString();
197   private:
198     void strBufToDoctypeName();
199     void emitStrBuf();
appendSecondHyphenToBogusComment()200     inline void appendSecondHyphenToBogusComment()
201     {
202       appendStrBuf('-');
203     }
204 
adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c)205     inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c)
206     {
207       errConsecutiveHyphens();
208       appendStrBuf(c);
209     }
210 
211     void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length);
appendCharRefBufToStrBuf()212     inline void appendCharRefBufToStrBuf()
213     {
214       appendStrBuf(charRefBuf, 0, charRefBufLen);
215       charRefBufLen = 0;
216     }
217 
218     void emitComment(int32_t provisionalHyphens, int32_t pos);
219   protected:
220     void flushChars(char16_t* buf, int32_t pos);
221   private:
222     void strBufToElementNameString();
223     int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
224     void attributeNameComplete();
225     void addAttributeWithoutValue();
226     void addAttributeWithValue();
227   public:
228     void start();
229     bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
230   private:
231     template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos);
232     void initDoctypeFields();
adjustDoubleHyphenAndAppendToStrBufCarriageReturn()233     inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
234     {
235       silentCarriageReturn();
236       adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
237     }
238 
adjustDoubleHyphenAndAppendToStrBufLineFeed()239     inline void adjustDoubleHyphenAndAppendToStrBufLineFeed()
240     {
241       silentLineFeed();
242       adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
243     }
244 
appendStrBufLineFeed()245     inline void appendStrBufLineFeed()
246     {
247       silentLineFeed();
248       appendStrBuf('\n');
249     }
250 
appendStrBufCarriageReturn()251     inline void appendStrBufCarriageReturn()
252     {
253       silentCarriageReturn();
254       appendStrBuf('\n');
255     }
256 
257   protected:
silentCarriageReturn()258     inline void silentCarriageReturn()
259     {
260       ++line;
261       lastCR = true;
262     }
263 
silentLineFeed()264     inline void silentLineFeed()
265     {
266       ++line;
267     }
268 
269   private:
270     void emitCarriageReturn(char16_t* buf, int32_t pos);
271     void emitReplacementCharacter(char16_t* buf, int32_t pos);
272     void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
273     void setAdditionalAndRememberAmpersandLocation(char16_t add);
274     void bogusDoctype();
275     void bogusDoctypeWithoutQuirks();
276     void handleNcrValue(int32_t returnState);
277   public:
278     void eof();
279   private:
280     void emitDoctypeToken(int32_t pos);
281   protected:
checkChar(char16_t * buf,int32_t pos)282     inline char16_t checkChar(char16_t* buf, int32_t pos)
283     {
284       return buf[pos];
285     }
286 
287   public:
288     bool internalEncodingDeclaration(nsString* internalCharset);
289   private:
290     void emitOrAppendTwo(const char16_t* val, int32_t returnState);
291     void emitOrAppendOne(const char16_t* val, int32_t returnState);
292   public:
293     void end();
294     void requestSuspension();
295     bool isInDataState();
296     void resetToDataState();
297     void loadState(nsHtml5Tokenizer* other);
298     void initializeWithoutStarting();
299     void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
300     ~nsHtml5Tokenizer();
301     static void initializeStatics();
302     static void releaseStatics();
303 
304 #include "nsHtml5TokenizerHSupplement.h"
305 };
306 
307 #define NS_HTML5TOKENIZER_DATA_AND_RCDATA_MASK ~1
308 #define NS_HTML5TOKENIZER_DATA 0
309 #define NS_HTML5TOKENIZER_RCDATA 1
310 #define NS_HTML5TOKENIZER_SCRIPT_DATA 2
311 #define NS_HTML5TOKENIZER_RAWTEXT 3
312 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED 4
313 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_DOUBLE_QUOTED 5
314 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_SINGLE_QUOTED 6
315 #define NS_HTML5TOKENIZER_ATTRIBUTE_VALUE_UNQUOTED 7
316 #define NS_HTML5TOKENIZER_PLAINTEXT 8
317 #define NS_HTML5TOKENIZER_TAG_OPEN 9
318 #define NS_HTML5TOKENIZER_CLOSE_TAG_OPEN 10
319 #define NS_HTML5TOKENIZER_TAG_NAME 11
320 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_NAME 12
321 #define NS_HTML5TOKENIZER_ATTRIBUTE_NAME 13
322 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_NAME 14
323 #define NS_HTML5TOKENIZER_BEFORE_ATTRIBUTE_VALUE 15
324 #define NS_HTML5TOKENIZER_AFTER_ATTRIBUTE_VALUE_QUOTED 16
325 #define NS_HTML5TOKENIZER_BOGUS_COMMENT 17
326 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OPEN 18
327 #define NS_HTML5TOKENIZER_DOCTYPE 19
328 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_NAME 20
329 #define NS_HTML5TOKENIZER_DOCTYPE_NAME 21
330 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_NAME 22
331 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER 23
332 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED 24
333 #define NS_HTML5TOKENIZER_DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED 25
334 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_IDENTIFIER 26
335 #define NS_HTML5TOKENIZER_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER 27
336 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED 28
337 #define NS_HTML5TOKENIZER_DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED 29
338 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER 30
339 #define NS_HTML5TOKENIZER_BOGUS_DOCTYPE 31
340 #define NS_HTML5TOKENIZER_COMMENT_START 32
341 #define NS_HTML5TOKENIZER_COMMENT_START_DASH 33
342 #define NS_HTML5TOKENIZER_COMMENT 34
343 #define NS_HTML5TOKENIZER_COMMENT_END_DASH 35
344 #define NS_HTML5TOKENIZER_COMMENT_END 36
345 #define NS_HTML5TOKENIZER_COMMENT_END_BANG 37
346 #define NS_HTML5TOKENIZER_NON_DATA_END_TAG_NAME 38
347 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_HYPHEN 39
348 #define NS_HTML5TOKENIZER_MARKUP_DECLARATION_OCTYPE 40
349 #define NS_HTML5TOKENIZER_DOCTYPE_UBLIC 41
350 #define NS_HTML5TOKENIZER_DOCTYPE_YSTEM 42
351 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_PUBLIC_KEYWORD 43
352 #define NS_HTML5TOKENIZER_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS 44
353 #define NS_HTML5TOKENIZER_AFTER_DOCTYPE_SYSTEM_KEYWORD 45
354 #define NS_HTML5TOKENIZER_CONSUME_CHARACTER_REFERENCE 46
355 #define NS_HTML5TOKENIZER_CONSUME_NCR 47
356 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_TAIL 48
357 #define NS_HTML5TOKENIZER_HEX_NCR_LOOP 49
358 #define NS_HTML5TOKENIZER_DECIMAL_NRC_LOOP 50
359 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE 51
360 #define NS_HTML5TOKENIZER_HANDLE_NCR_VALUE_RECONSUME 52
361 #define NS_HTML5TOKENIZER_CHARACTER_REFERENCE_HILO_LOOKUP 53
362 #define NS_HTML5TOKENIZER_SELF_CLOSING_START_TAG 54
363 #define NS_HTML5TOKENIZER_CDATA_START 55
364 #define NS_HTML5TOKENIZER_CDATA_SECTION 56
365 #define NS_HTML5TOKENIZER_CDATA_RSQB 57
366 #define NS_HTML5TOKENIZER_CDATA_RSQB_RSQB 58
367 #define NS_HTML5TOKENIZER_SCRIPT_DATA_LESS_THAN_SIGN 59
368 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START 60
369 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPE_START_DASH 61
370 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH 62
371 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_DASH_DASH 63
372 #define NS_HTML5TOKENIZER_BOGUS_COMMENT_HYPHEN 64
373 #define NS_HTML5TOKENIZER_RAWTEXT_RCDATA_LESS_THAN_SIGN 65
374 #define NS_HTML5TOKENIZER_SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN 66
375 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_START 67
376 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED 68
377 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN 69
378 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH 70
379 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH 71
380 #define NS_HTML5TOKENIZER_SCRIPT_DATA_DOUBLE_ESCAPE_END 72
381 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION 73
382 #define NS_HTML5TOKENIZER_PROCESSING_INSTRUCTION_QUESTION_MARK 74
383 #define NS_HTML5TOKENIZER_LEAD_OFFSET (0xD800 - (0x10000 >> 10))
384 
385 
386 #endif
387 
388