1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29 
30 #include "Comment.h"
31 #include "DOMWindow.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLDocumentParser.h"
37 #include "HTMLElementFactory.h"
38 #include "HTMLFormElement.h"
39 #include "HTMLHtmlElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLParserIdioms.h"
42 #include "HTMLScriptElement.h"
43 #include "HTMLToken.h"
44 #include "HTMLTokenizer.h"
45 #include "LocalizedStrings.h"
46 #include "MathMLNames.h"
47 #include "NotImplemented.h"
48 #include "SVGNames.h"
49 #include "Text.h"
50 #include "XLinkNames.h"
51 #include "XMLNSNames.h"
52 #include "XMLNames.h"
53 #include <wtf/unicode/CharacterNames.h>
54 
55 namespace WebCore {
56 
57 using namespace HTMLNames;
58 
59 static const int uninitializedLineNumberValue = -1;
60 
uninitializedPositionValue1()61 static TextPosition1 uninitializedPositionValue1()
62 {
63     return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
64 }
65 
66 namespace {
67 
isHTMLSpaceOrReplacementCharacter(UChar character)68 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
69 {
70     return isHTMLSpace(character) || character == replacementCharacter;
71 }
72 
isAllWhitespace(const String & string)73 inline bool isAllWhitespace(const String& string)
74 {
75     return string.isAllSpecialCharacters<isHTMLSpace>();
76 }
77 
isAllWhitespaceOrReplacementCharacters(const String & string)78 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
79 {
80     return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
81 }
82 
isNumberedHeaderTag(const AtomicString & tagName)83 bool isNumberedHeaderTag(const AtomicString& tagName)
84 {
85     return tagName == h1Tag
86         || tagName == h2Tag
87         || tagName == h3Tag
88         || tagName == h4Tag
89         || tagName == h5Tag
90         || tagName == h6Tag;
91 }
92 
isCaptionColOrColgroupTag(const AtomicString & tagName)93 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
94 {
95     return tagName == captionTag
96         || tagName == colTag
97         || tagName == colgroupTag;
98 }
99 
isTableCellContextTag(const AtomicString & tagName)100 bool isTableCellContextTag(const AtomicString& tagName)
101 {
102     return tagName == thTag || tagName == tdTag;
103 }
104 
isTableBodyContextTag(const AtomicString & tagName)105 bool isTableBodyContextTag(const AtomicString& tagName)
106 {
107     return tagName == tbodyTag
108         || tagName == tfootTag
109         || tagName == theadTag;
110 }
111 
112 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
isSpecialNode(Node * node)113 bool isSpecialNode(Node* node)
114 {
115     if (node->hasTagName(MathMLNames::miTag)
116         || node->hasTagName(MathMLNames::moTag)
117         || node->hasTagName(MathMLNames::mnTag)
118         || node->hasTagName(MathMLNames::msTag)
119         || node->hasTagName(MathMLNames::mtextTag)
120         || node->hasTagName(MathMLNames::annotation_xmlTag)
121         || node->hasTagName(SVGNames::foreignObjectTag)
122         || node->hasTagName(SVGNames::descTag)
123         || node->hasTagName(SVGNames::titleTag))
124         return true;
125     if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
126         return true;
127     if (!isInHTMLNamespace(node))
128         return false;
129     const AtomicString& tagName = node->localName();
130     return tagName == addressTag
131         || tagName == appletTag
132         || tagName == areaTag
133         || tagName == articleTag
134         || tagName == asideTag
135         || tagName == baseTag
136         || tagName == basefontTag
137         || tagName == bgsoundTag
138         || tagName == blockquoteTag
139         || tagName == bodyTag
140         || tagName == brTag
141         || tagName == buttonTag
142         || tagName == captionTag
143         || tagName == centerTag
144         || tagName == colTag
145         || tagName == colgroupTag
146         || tagName == commandTag
147         || tagName == ddTag
148         || tagName == detailsTag
149         || tagName == dirTag
150         || tagName == divTag
151         || tagName == dlTag
152         || tagName == dtTag
153         || tagName == embedTag
154         || tagName == fieldsetTag
155         || tagName == figcaptionTag
156         || tagName == figureTag
157         || tagName == footerTag
158         || tagName == formTag
159         || tagName == frameTag
160         || tagName == framesetTag
161         || isNumberedHeaderTag(tagName)
162         || tagName == headTag
163         || tagName == headerTag
164         || tagName == hgroupTag
165         || tagName == hrTag
166         || tagName == htmlTag
167         || tagName == iframeTag
168         || tagName == imgTag
169         || tagName == inputTag
170         || tagName == isindexTag
171         || tagName == liTag
172         || tagName == linkTag
173         || tagName == listingTag
174         || tagName == marqueeTag
175         || tagName == menuTag
176         || tagName == metaTag
177         || tagName == navTag
178         || tagName == noembedTag
179         || tagName == noframesTag
180         || tagName == noscriptTag
181         || tagName == objectTag
182         || tagName == olTag
183         || tagName == pTag
184         || tagName == paramTag
185         || tagName == plaintextTag
186         || tagName == preTag
187         || tagName == scriptTag
188         || tagName == sectionTag
189         || tagName == selectTag
190         || tagName == styleTag
191         || tagName == summaryTag
192         || tagName == tableTag
193         || isTableBodyContextTag(tagName)
194         || tagName == tdTag
195         || tagName == textareaTag
196         || tagName == thTag
197         || tagName == titleTag
198         || tagName == trTag
199         || tagName == ulTag
200         || tagName == wbrTag
201         || tagName == xmpTag;
202 }
203 
isNonAnchorNonNobrFormattingTag(const AtomicString & tagName)204 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
205 {
206     return tagName == bTag
207         || tagName == bigTag
208         || tagName == codeTag
209         || tagName == emTag
210         || tagName == fontTag
211         || tagName == iTag
212         || tagName == sTag
213         || tagName == smallTag
214         || tagName == strikeTag
215         || tagName == strongTag
216         || tagName == ttTag
217         || tagName == uTag;
218 }
219 
isNonAnchorFormattingTag(const AtomicString & tagName)220 bool isNonAnchorFormattingTag(const AtomicString& tagName)
221 {
222     return tagName == nobrTag
223         || isNonAnchorNonNobrFormattingTag(tagName);
224 }
225 
226 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
isFormattingTag(const AtomicString & tagName)227 bool isFormattingTag(const AtomicString& tagName)
228 {
229     return tagName == aTag || isNonAnchorFormattingTag(tagName);
230 }
231 
closestFormAncestor(Element * element)232 HTMLFormElement* closestFormAncestor(Element* element)
233 {
234     while (element) {
235         if (element->hasTagName(formTag))
236             return static_cast<HTMLFormElement*>(element);
237         ContainerNode* parent = element->parentNode();
238         if (!parent || !parent->isElementNode())
239             return 0;
240         element = static_cast<Element*>(parent);
241     }
242     return 0;
243 }
244 
245 } // namespace
246 
247 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
248     WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
249 public:
ExternalCharacterTokenBuffer(AtomicHTMLToken & token)250     explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
251         : m_current(token.characters().data())
252         , m_end(m_current + token.characters().size())
253     {
254         ASSERT(!isEmpty());
255     }
256 
ExternalCharacterTokenBuffer(const String & string)257     explicit ExternalCharacterTokenBuffer(const String& string)
258         : m_current(string.characters())
259         , m_end(m_current + string.length())
260     {
261         ASSERT(!isEmpty());
262     }
263 
~ExternalCharacterTokenBuffer()264     ~ExternalCharacterTokenBuffer()
265     {
266         ASSERT(isEmpty());
267     }
268 
isEmpty() const269     bool isEmpty() const { return m_current == m_end; }
270 
skipLeadingWhitespace()271     void skipLeadingWhitespace()
272     {
273         skipLeading<isHTMLSpace>();
274     }
275 
takeLeadingWhitespace()276     String takeLeadingWhitespace()
277     {
278         return takeLeading<isHTMLSpace>();
279     }
280 
takeLeadingNonWhitespace()281     String takeLeadingNonWhitespace()
282     {
283         return takeLeading<isNotHTMLSpace>();
284     }
285 
takeRemaining()286     String takeRemaining()
287     {
288         ASSERT(!isEmpty());
289         const UChar* start = m_current;
290         m_current = m_end;
291         return String(start, m_current - start);
292     }
293 
giveRemainingTo(Vector<UChar> & recipient)294     void giveRemainingTo(Vector<UChar>& recipient)
295     {
296         recipient.append(m_current, m_end - m_current);
297         m_current = m_end;
298     }
299 
takeRemainingWhitespace()300     String takeRemainingWhitespace()
301     {
302         ASSERT(!isEmpty());
303         Vector<UChar> whitespace;
304         do {
305             UChar cc = *m_current++;
306             if (isHTMLSpace(cc))
307                 whitespace.append(cc);
308         } while (m_current < m_end);
309         // Returning the null string when there aren't any whitespace
310         // characters is slightly cleaner semantically because we don't want
311         // to insert a text node (as opposed to inserting an empty text node).
312         if (whitespace.isEmpty())
313             return String();
314         return String::adopt(whitespace);
315     }
316 
317 private:
318     template<bool characterPredicate(UChar)>
skipLeading()319     void skipLeading()
320     {
321         ASSERT(!isEmpty());
322         while (characterPredicate(*m_current)) {
323             if (++m_current == m_end)
324                 return;
325         }
326     }
327 
328     template<bool characterPredicate(UChar)>
takeLeading()329     String takeLeading()
330     {
331         ASSERT(!isEmpty());
332         const UChar* start = m_current;
333         skipLeading<characterPredicate>();
334         if (start == m_current)
335             return String();
336         return String(start, m_current - start);
337     }
338 
339     const UChar* m_current;
340     const UChar* m_end;
341 };
342 
343 
HTMLTreeBuilder(HTMLDocumentParser * parser,HTMLDocument * document,bool reportErrors,bool usePreHTML5ParserQuirks)344 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
345     : m_framesetOk(true)
346     , m_document(document)
347     , m_tree(document)
348     , m_reportErrors(reportErrors)
349     , m_isPaused(false)
350     , m_insertionMode(InitialMode)
351     , m_originalInsertionMode(InitialMode)
352     , m_parser(parser)
353     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
354     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
355     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
356     , m_hasPendingForeignInsertionModeSteps(false)
357 {
358 }
359 
360 // FIXME: Member variables should be grouped into self-initializing structs to
361 // minimize code duplication between these constructors.
HTMLTreeBuilder(HTMLDocumentParser * parser,DocumentFragment * fragment,Element * contextElement,FragmentScriptingPermission scriptingPermission,bool usePreHTML5ParserQuirks)362 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
363     : m_framesetOk(true)
364     , m_fragmentContext(fragment, contextElement, scriptingPermission)
365     , m_document(fragment->document())
366     , m_tree(fragment, scriptingPermission)
367     , m_reportErrors(false) // FIXME: Why not report errors in fragments?
368     , m_isPaused(false)
369     , m_insertionMode(InitialMode)
370     , m_originalInsertionMode(InitialMode)
371     , m_parser(parser)
372     , m_scriptToProcessStartPosition(uninitializedPositionValue1())
373     , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
374     , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
375     , m_hasPendingForeignInsertionModeSteps(false)
376 {
377     if (contextElement) {
378         // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
379         // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
380         // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
381         // and instead use the DocumentFragment as a root node.
382         m_tree.openElements()->pushRootNode(fragment);
383         resetInsertionModeAppropriately();
384         m_tree.setForm(closestFormAncestor(contextElement));
385     }
386 }
387 
~HTMLTreeBuilder()388 HTMLTreeBuilder::~HTMLTreeBuilder()
389 {
390 }
391 
detach()392 void HTMLTreeBuilder::detach()
393 {
394     // This call makes little sense in fragment mode, but for consistency
395     // DocumentParser expects detach() to always be called before it's destroyed.
396     m_document = 0;
397     // HTMLConstructionSite might be on the callstack when detach() is called
398     // otherwise we'd just call m_tree.clear() here instead.
399     m_tree.detach();
400 }
401 
FragmentParsingContext()402 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
403     : m_fragment(0)
404     , m_contextElement(0)
405     , m_scriptingPermission(FragmentScriptingAllowed)
406 {
407 }
408 
FragmentParsingContext(DocumentFragment * fragment,Element * contextElement,FragmentScriptingPermission scriptingPermission)409 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
410     : m_fragment(fragment)
411     , m_contextElement(contextElement)
412     , m_scriptingPermission(scriptingPermission)
413 {
414     ASSERT(!fragment->hasChildNodes());
415 }
416 
~FragmentParsingContext()417 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
418 {
419 }
420 
takeScriptToProcess(TextPosition1 & scriptStartPosition)421 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
422 {
423     // Unpause ourselves, callers may pause us again when processing the script.
424     // The HTML5 spec is written as though scripts are executed inside the tree
425     // builder.  We pause the parser to exit the tree builder, and then resume
426     // before running scripts.
427     m_isPaused = false;
428     scriptStartPosition = m_scriptToProcessStartPosition;
429     m_scriptToProcessStartPosition = uninitializedPositionValue1();
430     return m_scriptToProcess.release();
431 }
432 
constructTreeFromToken(HTMLToken & rawToken)433 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
434 {
435     AtomicHTMLToken token(rawToken);
436 
437     // We clear the rawToken in case constructTreeFromAtomicToken
438     // synchronously re-enters the parser. We don't clear the token immedately
439     // for Character tokens because the AtomicHTMLToken avoids copying the
440     // characters by keeping a pointer to the underlying buffer in the
441     // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
442     // the parser.
443     //
444     // FIXME: Top clearing the rawToken once we start running the parser off
445     // the main thread or once we stop allowing synchronous JavaScript
446     // execution from parseMappedAttribute.
447     if (rawToken.type() != HTMLToken::Character)
448         rawToken.clear();
449 
450     constructTreeFromAtomicToken(token);
451 
452     if (!rawToken.isUninitialized()) {
453         ASSERT(rawToken.type() == HTMLToken::Character);
454         rawToken.clear();
455     }
456 }
457 
constructTreeFromAtomicToken(AtomicHTMLToken & token)458 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
459 {
460     processToken(token);
461 
462     // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
463     // the U+0000 characters into replacement characters has compatibility
464     // problems.
465     m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
466     m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && !isInHTMLNamespace(m_tree.currentNode()));
467 }
468 
processToken(AtomicHTMLToken & token)469 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
470 {
471     switch (token.type()) {
472     case HTMLToken::Uninitialized:
473         ASSERT_NOT_REACHED();
474         break;
475     case HTMLToken::DOCTYPE:
476         processDoctypeToken(token);
477         break;
478     case HTMLToken::StartTag:
479         processStartTag(token);
480         break;
481     case HTMLToken::EndTag:
482         processEndTag(token);
483         break;
484     case HTMLToken::Comment:
485         processComment(token);
486         return;
487     case HTMLToken::Character:
488         processCharacter(token);
489         break;
490     case HTMLToken::EndOfFile:
491         processEndOfFile(token);
492         break;
493     }
494 }
495 
processDoctypeToken(AtomicHTMLToken & token)496 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
497 {
498     ASSERT(token.type() == HTMLToken::DOCTYPE);
499     if (m_insertionMode == InitialMode) {
500         m_tree.insertDoctype(token);
501         setInsertionMode(BeforeHTMLMode);
502         return;
503     }
504     if (m_insertionMode == InTableTextMode) {
505         defaultForInTableText();
506         processDoctypeToken(token);
507         return;
508     }
509     parseError(token);
510 }
511 
processFakeStartTag(const QualifiedName & tagName,PassRefPtr<NamedNodeMap> attributes)512 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
513 {
514     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
515     AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
516     processStartTag(fakeToken);
517 }
518 
processFakeEndTag(const QualifiedName & tagName)519 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
520 {
521     // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
522     AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
523     processEndTag(fakeToken);
524 }
525 
processFakeCharacters(const String & characters)526 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
527 {
528     ASSERT(!characters.isEmpty());
529     ExternalCharacterTokenBuffer buffer(characters);
530     processCharacterBuffer(buffer);
531 }
532 
processFakePEndTagIfPInButtonScope()533 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
534 {
535     if (!m_tree.openElements()->inButtonScope(pTag.localName()))
536         return;
537     AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
538     processEndTag(endP);
539 }
540 
attributesForIsindexInput(AtomicHTMLToken & token)541 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
542 {
543     RefPtr<NamedNodeMap> attributes = token.takeAtributes();
544     if (!attributes)
545         attributes = NamedNodeMap::create();
546     else {
547         attributes->removeAttribute(nameAttr);
548         attributes->removeAttribute(actionAttr);
549         attributes->removeAttribute(promptAttr);
550     }
551 
552     RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
553     attributes->insertAttribute(mappedAttribute.release(), false);
554     return attributes.release();
555 }
556 
processIsindexStartTagForInBody(AtomicHTMLToken & token)557 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
558 {
559     ASSERT(token.type() == HTMLToken::StartTag);
560     ASSERT(token.name() == isindexTag);
561     parseError(token);
562     if (m_tree.form())
563         return;
564     notImplemented(); // Acknowledge self-closing flag
565     processFakeStartTag(formTag);
566     RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
567     if (actionAttribute) {
568         ASSERT(m_tree.currentElement()->hasTagName(formTag));
569         m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
570     }
571     processFakeStartTag(hrTag);
572     processFakeStartTag(labelTag);
573     RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
574     if (promptAttribute)
575         processFakeCharacters(promptAttribute->value());
576     else
577         processFakeCharacters(searchableIndexIntroduction());
578     processFakeStartTag(inputTag, attributesForIsindexInput(token));
579     notImplemented(); // This second set of characters may be needed by non-english locales.
580     processFakeEndTag(labelTag);
581     processFakeStartTag(hrTag);
582     processFakeEndTag(formTag);
583 }
584 
585 namespace {
586 
isLi(const ContainerNode * element)587 bool isLi(const ContainerNode* element)
588 {
589     return element->hasTagName(liTag);
590 }
591 
isDdOrDt(const ContainerNode * element)592 bool isDdOrDt(const ContainerNode* element)
593 {
594     return element->hasTagName(ddTag)
595         || element->hasTagName(dtTag);
596 }
597 
598 }
599 
600 template <bool shouldClose(const ContainerNode*)>
processCloseWhenNestedTag(AtomicHTMLToken & token)601 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
602 {
603     m_framesetOk = false;
604     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
605     while (1) {
606         RefPtr<ContainerNode> node = nodeRecord->node();
607         if (shouldClose(node.get())) {
608             ASSERT(node->isElementNode());
609             processFakeEndTag(toElement(node.get())->tagQName());
610             break;
611         }
612         if (isSpecialNode(node.get()) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
613             break;
614         nodeRecord = nodeRecord->next();
615     }
616     processFakePEndTagIfPInButtonScope();
617     m_tree.insertHTMLElement(token);
618 }
619 
620 namespace {
621 
622 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
623 
mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap * map,QualifiedName ** names,size_t length)624 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
625 {
626     for (size_t i = 0; i < length; ++i) {
627         const QualifiedName& name = *names[i];
628         const AtomicString& localName = name.localName();
629         AtomicString loweredLocalName = localName.lower();
630         if (loweredLocalName != localName)
631             map->add(loweredLocalName, name);
632     }
633 }
634 
adjustSVGTagNameCase(AtomicHTMLToken & token)635 void adjustSVGTagNameCase(AtomicHTMLToken& token)
636 {
637     static PrefixedNameToQualifiedNameMap* caseMap = 0;
638     if (!caseMap) {
639         caseMap = new PrefixedNameToQualifiedNameMap;
640         size_t length = 0;
641         QualifiedName** svgTags = SVGNames::getSVGTags(&length);
642         mapLoweredLocalNameToName(caseMap, svgTags, length);
643     }
644 
645     const QualifiedName& casedName = caseMap->get(token.name());
646     if (casedName.localName().isNull())
647         return;
648     token.setName(casedName.localName());
649 }
650 
651 template<QualifiedName** getAttrs(size_t* length)>
adjustAttributes(AtomicHTMLToken & token)652 void adjustAttributes(AtomicHTMLToken& token)
653 {
654     static PrefixedNameToQualifiedNameMap* caseMap = 0;
655     if (!caseMap) {
656         caseMap = new PrefixedNameToQualifiedNameMap;
657         size_t length = 0;
658         QualifiedName** attrs = getAttrs(&length);
659         mapLoweredLocalNameToName(caseMap, attrs, length);
660     }
661 
662     NamedNodeMap* attributes = token.attributes();
663     if (!attributes)
664         return;
665 
666     for (unsigned x = 0; x < attributes->length(); ++x) {
667         Attribute* attribute = attributes->attributeItem(x);
668         const QualifiedName& casedName = caseMap->get(attribute->localName());
669         if (!casedName.localName().isNull())
670             attribute->parserSetName(casedName);
671     }
672 }
673 
adjustSVGAttributes(AtomicHTMLToken & token)674 void adjustSVGAttributes(AtomicHTMLToken& token)
675 {
676     adjustAttributes<SVGNames::getSVGAttrs>(token);
677 }
678 
adjustMathMLAttributes(AtomicHTMLToken & token)679 void adjustMathMLAttributes(AtomicHTMLToken& token)
680 {
681     adjustAttributes<MathMLNames::getMathMLAttrs>(token);
682 }
683 
addNamesWithPrefix(PrefixedNameToQualifiedNameMap * map,const AtomicString & prefix,QualifiedName ** names,size_t length)684 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
685 {
686     for (size_t i = 0; i < length; ++i) {
687         QualifiedName* name = names[i];
688         const AtomicString& localName = name->localName();
689         AtomicString prefixColonLocalName(prefix + ":" + localName);
690         QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
691         map->add(prefixColonLocalName, nameWithPrefix);
692     }
693 }
694 
adjustForeignAttributes(AtomicHTMLToken & token)695 void adjustForeignAttributes(AtomicHTMLToken& token)
696 {
697     static PrefixedNameToQualifiedNameMap* map = 0;
698     if (!map) {
699         map = new PrefixedNameToQualifiedNameMap;
700         size_t length = 0;
701         QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
702         addNamesWithPrefix(map, "xlink", attrs, length);
703 
704         attrs = XMLNames::getXMLAttrs(&length);
705         addNamesWithPrefix(map, "xml", attrs, length);
706 
707         map->add("xmlns", XMLNSNames::xmlnsAttr);
708         map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
709     }
710 
711     NamedNodeMap* attributes = token.attributes();
712     if (!attributes)
713         return;
714 
715     for (unsigned x = 0; x < attributes->length(); ++x) {
716         Attribute* attribute = attributes->attributeItem(x);
717         const QualifiedName& name = map->get(attribute->localName());
718         if (!name.localName().isNull())
719             attribute->parserSetName(name);
720     }
721 }
722 
723 }
724 
processStartTagForInBody(AtomicHTMLToken & token)725 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
726 {
727     ASSERT(token.type() == HTMLToken::StartTag);
728     if (token.name() == htmlTag) {
729         m_tree.insertHTMLHtmlStartTagInBody(token);
730         return;
731     }
732     if (token.name() == baseTag
733         || token.name() == basefontTag
734         || token.name() == bgsoundTag
735         || token.name() == commandTag
736         || token.name() == linkTag
737         || token.name() == metaTag
738         || token.name() == noframesTag
739         || token.name() == scriptTag
740         || token.name() == styleTag
741         || token.name() == titleTag) {
742         bool didProcess = processStartTagForInHead(token);
743         ASSERT_UNUSED(didProcess, didProcess);
744         return;
745     }
746     if (token.name() == bodyTag) {
747         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
748             ASSERT(isParsingFragment());
749             return;
750         }
751         m_tree.insertHTMLBodyStartTagInBody(token);
752         return;
753     }
754     if (token.name() == framesetTag) {
755         parseError(token);
756         if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
757             ASSERT(isParsingFragment());
758             return;
759         }
760         if (!m_framesetOk)
761             return;
762         ExceptionCode ec = 0;
763         m_tree.openElements()->bodyElement()->remove(ec);
764         ASSERT(!ec);
765         m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
766         m_tree.openElements()->popHTMLBodyElement();
767         ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
768         m_tree.insertHTMLElement(token);
769         setInsertionMode(InFramesetMode);
770         return;
771     }
772     if (token.name() == addressTag
773         || token.name() == articleTag
774         || token.name() == asideTag
775         || token.name() == blockquoteTag
776         || token.name() == centerTag
777         || token.name() == detailsTag
778         || token.name() == dirTag
779         || token.name() == divTag
780         || token.name() == dlTag
781         || token.name() == fieldsetTag
782         || token.name() == figcaptionTag
783         || token.name() == figureTag
784         || token.name() == footerTag
785         || token.name() == headerTag
786         || token.name() == hgroupTag
787         || token.name() == menuTag
788         || token.name() == navTag
789         || token.name() == olTag
790         || token.name() == pTag
791         || token.name() == sectionTag
792         || token.name() == summaryTag
793         || token.name() == ulTag) {
794         processFakePEndTagIfPInButtonScope();
795         m_tree.insertHTMLElement(token);
796         return;
797     }
798     if (isNumberedHeaderTag(token.name())) {
799         processFakePEndTagIfPInButtonScope();
800         if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
801             parseError(token);
802             m_tree.openElements()->pop();
803         }
804         m_tree.insertHTMLElement(token);
805         return;
806     }
807     if (token.name() == preTag || token.name() == listingTag) {
808         processFakePEndTagIfPInButtonScope();
809         m_tree.insertHTMLElement(token);
810         m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
811         m_framesetOk = false;
812         return;
813     }
814     if (token.name() == formTag) {
815         if (m_tree.form()) {
816             parseError(token);
817             return;
818         }
819         processFakePEndTagIfPInButtonScope();
820         m_tree.insertHTMLFormElement(token);
821         return;
822     }
823     if (token.name() == liTag) {
824         processCloseWhenNestedTag<isLi>(token);
825         return;
826     }
827     if (token.name() == ddTag || token.name() == dtTag) {
828         processCloseWhenNestedTag<isDdOrDt>(token);
829         return;
830     }
831     if (token.name() == plaintextTag) {
832         processFakePEndTagIfPInButtonScope();
833         m_tree.insertHTMLElement(token);
834         m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
835         return;
836     }
837     if (token.name() == buttonTag) {
838         if (m_tree.openElements()->inScope(buttonTag)) {
839             parseError(token);
840             processFakeEndTag(buttonTag);
841             reprocessStartTag(token); // FIXME: Could we just fall through here?
842             return;
843         }
844         m_tree.reconstructTheActiveFormattingElements();
845         m_tree.insertHTMLElement(token);
846         m_framesetOk = false;
847         return;
848     }
849     if (token.name() == aTag) {
850         Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
851         if (activeATag) {
852             parseError(token);
853             processFakeEndTag(aTag);
854             m_tree.activeFormattingElements()->remove(activeATag);
855             if (m_tree.openElements()->contains(activeATag))
856                 m_tree.openElements()->remove(activeATag);
857         }
858         m_tree.reconstructTheActiveFormattingElements();
859         m_tree.insertFormattingElement(token);
860         return;
861     }
862     if (isNonAnchorNonNobrFormattingTag(token.name())) {
863         m_tree.reconstructTheActiveFormattingElements();
864         m_tree.insertFormattingElement(token);
865         return;
866     }
867     if (token.name() == nobrTag) {
868         m_tree.reconstructTheActiveFormattingElements();
869         if (m_tree.openElements()->inScope(nobrTag)) {
870             parseError(token);
871             processFakeEndTag(nobrTag);
872             m_tree.reconstructTheActiveFormattingElements();
873         }
874         m_tree.insertFormattingElement(token);
875         return;
876     }
877     if (token.name() == appletTag
878         || token.name() == marqueeTag
879         || token.name() == objectTag) {
880         m_tree.reconstructTheActiveFormattingElements();
881         m_tree.insertHTMLElement(token);
882         m_tree.activeFormattingElements()->appendMarker();
883         m_framesetOk = false;
884         return;
885     }
886     if (token.name() == tableTag) {
887         if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
888             processFakeEndTag(pTag);
889         m_tree.insertHTMLElement(token);
890         m_framesetOk = false;
891         setInsertionMode(InTableMode);
892         return;
893     }
894     if (token.name() == imageTag) {
895         parseError(token);
896         // Apparently we're not supposed to ask.
897         token.setName(imgTag.localName());
898         prepareToReprocessToken();
899         // Note the fall through to the imgTag handling below!
900     }
901     if (token.name() == areaTag
902         || token.name() == brTag
903         || token.name() == embedTag
904         || token.name() == imgTag
905         || token.name() == keygenTag
906         || token.name() == wbrTag) {
907         m_tree.reconstructTheActiveFormattingElements();
908         m_tree.insertSelfClosingHTMLElement(token);
909         m_framesetOk = false;
910         return;
911     }
912     if (token.name() == inputTag) {
913         RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
914         m_tree.reconstructTheActiveFormattingElements();
915         m_tree.insertSelfClosingHTMLElement(token);
916         if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
917             m_framesetOk = false;
918         return;
919     }
920     if (token.name() == paramTag
921         || token.name() == sourceTag
922         || token.name() == trackTag) {
923         m_tree.insertSelfClosingHTMLElement(token);
924         return;
925     }
926     if (token.name() == hrTag) {
927         processFakePEndTagIfPInButtonScope();
928         m_tree.insertSelfClosingHTMLElement(token);
929         m_framesetOk = false;
930         return;
931     }
932     if (token.name() == isindexTag) {
933         processIsindexStartTagForInBody(token);
934         return;
935     }
936     if (token.name() == textareaTag) {
937         m_tree.insertHTMLElement(token);
938         m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
939         m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
940         m_originalInsertionMode = m_insertionMode;
941         m_framesetOk = false;
942         setInsertionMode(TextMode);
943         return;
944     }
945     if (token.name() == xmpTag) {
946         processFakePEndTagIfPInButtonScope();
947         m_tree.reconstructTheActiveFormattingElements();
948         m_framesetOk = false;
949         processGenericRawTextStartTag(token);
950         return;
951     }
952     if (token.name() == iframeTag) {
953         m_framesetOk = false;
954         processGenericRawTextStartTag(token);
955         return;
956     }
957     if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
958         processGenericRawTextStartTag(token);
959         return;
960     }
961     if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
962         processGenericRawTextStartTag(token);
963         return;
964     }
965     if (token.name() == selectTag) {
966         m_tree.reconstructTheActiveFormattingElements();
967         m_tree.insertHTMLElement(token);
968         m_framesetOk = false;
969         if (m_insertionMode == InTableMode
970              || m_insertionMode == InCaptionMode
971              || m_insertionMode == InColumnGroupMode
972              || m_insertionMode == InTableBodyMode
973              || m_insertionMode == InRowMode
974              || m_insertionMode == InCellMode)
975             setInsertionMode(InSelectInTableMode);
976         else
977             setInsertionMode(InSelectMode);
978         return;
979     }
980     if (token.name() == optgroupTag || token.name() == optionTag) {
981         if (m_tree.openElements()->inScope(optionTag.localName())) {
982             AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
983             processEndTag(endOption);
984         }
985         m_tree.reconstructTheActiveFormattingElements();
986         m_tree.insertHTMLElement(token);
987         return;
988     }
989     if (token.name() == rpTag || token.name() == rtTag) {
990         if (m_tree.openElements()->inScope(rubyTag.localName())) {
991             m_tree.generateImpliedEndTags();
992             if (!m_tree.currentNode()->hasTagName(rubyTag)) {
993                 parseError(token);
994                 m_tree.openElements()->popUntil(rubyTag.localName());
995             }
996         }
997         m_tree.insertHTMLElement(token);
998         return;
999     }
1000     if (token.name() == MathMLNames::mathTag.localName()) {
1001         m_tree.reconstructTheActiveFormattingElements();
1002         adjustMathMLAttributes(token);
1003         adjustForeignAttributes(token);
1004         m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1005         if (m_insertionMode != InForeignContentMode && !token.selfClosing())
1006             setInsertionMode(InForeignContentMode);
1007         return;
1008     }
1009     if (token.name() == SVGNames::svgTag.localName()) {
1010         m_tree.reconstructTheActiveFormattingElements();
1011         adjustSVGAttributes(token);
1012         adjustForeignAttributes(token);
1013         m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1014         if (m_insertionMode != InForeignContentMode && !token.selfClosing())
1015             setInsertionMode(InForeignContentMode);
1016         return;
1017     }
1018     if (isCaptionColOrColgroupTag(token.name())
1019         || token.name() == frameTag
1020         || token.name() == headTag
1021         || isTableBodyContextTag(token.name())
1022         || isTableCellContextTag(token.name())
1023         || token.name() == trTag) {
1024         parseError(token);
1025         return;
1026     }
1027     m_tree.reconstructTheActiveFormattingElements();
1028     m_tree.insertHTMLElement(token);
1029 }
1030 
processColgroupEndTagForInColumnGroup()1031 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1032 {
1033     if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
1034         ASSERT(isParsingFragment());
1035         // FIXME: parse error
1036         return false;
1037     }
1038     m_tree.openElements()->pop();
1039     setInsertionMode(InTableMode);
1040     return true;
1041 }
1042 
1043 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
closeTheCell()1044 void HTMLTreeBuilder::closeTheCell()
1045 {
1046     ASSERT(insertionMode() == InCellMode);
1047     if (m_tree.openElements()->inTableScope(tdTag)) {
1048         ASSERT(!m_tree.openElements()->inTableScope(thTag));
1049         processFakeEndTag(tdTag);
1050         return;
1051     }
1052     ASSERT(m_tree.openElements()->inTableScope(thTag));
1053     processFakeEndTag(thTag);
1054     ASSERT(insertionMode() == InRowMode);
1055 }
1056 
processStartTagForInTable(AtomicHTMLToken & token)1057 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1058 {
1059     ASSERT(token.type() == HTMLToken::StartTag);
1060     if (token.name() == captionTag) {
1061         m_tree.openElements()->popUntilTableScopeMarker();
1062         m_tree.activeFormattingElements()->appendMarker();
1063         m_tree.insertHTMLElement(token);
1064         setInsertionMode(InCaptionMode);
1065         return;
1066     }
1067     if (token.name() == colgroupTag) {
1068         m_tree.openElements()->popUntilTableScopeMarker();
1069         m_tree.insertHTMLElement(token);
1070         setInsertionMode(InColumnGroupMode);
1071         return;
1072     }
1073     if (token.name() == colTag) {
1074         processFakeStartTag(colgroupTag);
1075         ASSERT(InColumnGroupMode);
1076         reprocessStartTag(token);
1077         return;
1078     }
1079     if (isTableBodyContextTag(token.name())) {
1080         m_tree.openElements()->popUntilTableScopeMarker();
1081         m_tree.insertHTMLElement(token);
1082         setInsertionMode(InTableBodyMode);
1083         return;
1084     }
1085     if (isTableCellContextTag(token.name())
1086         || token.name() == trTag) {
1087         processFakeStartTag(tbodyTag);
1088         ASSERT(insertionMode() == InTableBodyMode);
1089         reprocessStartTag(token);
1090         return;
1091     }
1092     if (token.name() == tableTag) {
1093         parseError(token);
1094         if (!processTableEndTagForInTable()) {
1095             ASSERT(isParsingFragment());
1096             return;
1097         }
1098         reprocessStartTag(token);
1099         return;
1100     }
1101     if (token.name() == styleTag || token.name() == scriptTag) {
1102         processStartTagForInHead(token);
1103         return;
1104     }
1105     if (token.name() == inputTag) {
1106         Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1107         if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1108             parseError(token);
1109             m_tree.insertSelfClosingHTMLElement(token);
1110             return;
1111         }
1112         // Fall through to "anything else" case.
1113     }
1114     if (token.name() == formTag) {
1115         parseError(token);
1116         if (m_tree.form())
1117             return;
1118         m_tree.insertHTMLFormElement(token, true);
1119         m_tree.openElements()->pop();
1120         return;
1121     }
1122     parseError(token);
1123     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1124     processStartTagForInBody(token);
1125 }
1126 
1127 namespace {
1128 
shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken & token,ContainerNode * currentElement)1129 bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, ContainerNode* currentElement)
1130 {
1131     ASSERT(token.type() == HTMLToken::StartTag);
1132     if (currentElement->hasTagName(MathMLNames::miTag)
1133         || currentElement->hasTagName(MathMLNames::moTag)
1134         || currentElement->hasTagName(MathMLNames::mnTag)
1135         || currentElement->hasTagName(MathMLNames::msTag)
1136         || currentElement->hasTagName(MathMLNames::mtextTag)) {
1137         return token.name() != MathMLNames::mglyphTag
1138             && token.name() != MathMLNames::malignmarkTag;
1139     }
1140     if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
1141         return token.name() == SVGNames::svgTag;
1142     if (currentElement->hasTagName(SVGNames::foreignObjectTag)
1143         || currentElement->hasTagName(SVGNames::descTag)
1144         || currentElement->hasTagName(SVGNames::titleTag))
1145         return true;
1146     return isInHTMLNamespace(currentElement);
1147 }
1148 
1149 }
1150 
processStartTag(AtomicHTMLToken & token)1151 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1152 {
1153     ASSERT(token.type() == HTMLToken::StartTag);
1154     switch (insertionMode()) {
1155     case InitialMode:
1156         ASSERT(insertionMode() == InitialMode);
1157         defaultForInitial();
1158         // Fall through.
1159     case BeforeHTMLMode:
1160         ASSERT(insertionMode() == BeforeHTMLMode);
1161         if (token.name() == htmlTag) {
1162             m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1163             setInsertionMode(BeforeHeadMode);
1164             return;
1165         }
1166         defaultForBeforeHTML();
1167         // Fall through.
1168     case BeforeHeadMode:
1169         ASSERT(insertionMode() == BeforeHeadMode);
1170         if (token.name() == htmlTag) {
1171             m_tree.insertHTMLHtmlStartTagInBody(token);
1172             return;
1173         }
1174         if (token.name() == headTag) {
1175             m_tree.insertHTMLHeadElement(token);
1176             setInsertionMode(InHeadMode);
1177             return;
1178         }
1179         defaultForBeforeHead();
1180         // Fall through.
1181     case InHeadMode:
1182         ASSERT(insertionMode() == InHeadMode);
1183         if (processStartTagForInHead(token))
1184             return;
1185         defaultForInHead();
1186         // Fall through.
1187     case AfterHeadMode:
1188         ASSERT(insertionMode() == AfterHeadMode);
1189         if (token.name() == htmlTag) {
1190             m_tree.insertHTMLHtmlStartTagInBody(token);
1191             return;
1192         }
1193         if (token.name() == bodyTag) {
1194             m_framesetOk = false;
1195             m_tree.insertHTMLBodyElement(token);
1196             setInsertionMode(InBodyMode);
1197             return;
1198         }
1199         if (token.name() == framesetTag) {
1200             m_tree.insertHTMLElement(token);
1201             setInsertionMode(InFramesetMode);
1202             return;
1203         }
1204         if (token.name() == baseTag
1205             || token.name() == basefontTag
1206             || token.name() == bgsoundTag
1207             || token.name() == linkTag
1208             || token.name() == metaTag
1209             || token.name() == noframesTag
1210             || token.name() == scriptTag
1211             || token.name() == styleTag
1212             || token.name() == titleTag) {
1213             parseError(token);
1214             ASSERT(m_tree.head());
1215             m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1216             processStartTagForInHead(token);
1217             m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1218             return;
1219         }
1220         if (token.name() == headTag) {
1221             parseError(token);
1222             return;
1223         }
1224         defaultForAfterHead();
1225         // Fall through
1226     case InBodyMode:
1227         ASSERT(insertionMode() == InBodyMode);
1228         processStartTagForInBody(token);
1229         break;
1230     case InTableMode:
1231         ASSERT(insertionMode() == InTableMode);
1232         processStartTagForInTable(token);
1233         break;
1234     case InCaptionMode:
1235         ASSERT(insertionMode() == InCaptionMode);
1236         if (isCaptionColOrColgroupTag(token.name())
1237             || isTableBodyContextTag(token.name())
1238             || isTableCellContextTag(token.name())
1239             || token.name() == trTag) {
1240             parseError(token);
1241             if (!processCaptionEndTagForInCaption()) {
1242                 ASSERT(isParsingFragment());
1243                 return;
1244             }
1245             reprocessStartTag(token);
1246             return;
1247         }
1248         processStartTagForInBody(token);
1249         break;
1250     case InColumnGroupMode:
1251         ASSERT(insertionMode() == InColumnGroupMode);
1252         if (token.name() == htmlTag) {
1253             m_tree.insertHTMLHtmlStartTagInBody(token);
1254             return;
1255         }
1256         if (token.name() == colTag) {
1257             m_tree.insertSelfClosingHTMLElement(token);
1258             return;
1259         }
1260         if (!processColgroupEndTagForInColumnGroup()) {
1261             ASSERT(isParsingFragment());
1262             return;
1263         }
1264         reprocessStartTag(token);
1265         break;
1266     case InTableBodyMode:
1267         ASSERT(insertionMode() == InTableBodyMode);
1268         if (token.name() == trTag) {
1269             m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1270             m_tree.insertHTMLElement(token);
1271             setInsertionMode(InRowMode);
1272             return;
1273         }
1274         if (isTableCellContextTag(token.name())) {
1275             parseError(token);
1276             processFakeStartTag(trTag);
1277             ASSERT(insertionMode() == InRowMode);
1278             reprocessStartTag(token);
1279             return;
1280         }
1281         if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1282             // FIXME: This is slow.
1283             if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1284                 ASSERT(isParsingFragment());
1285                 parseError(token);
1286                 return;
1287             }
1288             m_tree.openElements()->popUntilTableBodyScopeMarker();
1289             ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1290             processFakeEndTag(m_tree.currentElement()->tagQName());
1291             reprocessStartTag(token);
1292             return;
1293         }
1294         processStartTagForInTable(token);
1295         break;
1296     case InRowMode:
1297         ASSERT(insertionMode() == InRowMode);
1298         if (isTableCellContextTag(token.name())) {
1299             m_tree.openElements()->popUntilTableRowScopeMarker();
1300             m_tree.insertHTMLElement(token);
1301             setInsertionMode(InCellMode);
1302             m_tree.activeFormattingElements()->appendMarker();
1303             return;
1304         }
1305         if (token.name() == trTag
1306             || isCaptionColOrColgroupTag(token.name())
1307             || isTableBodyContextTag(token.name())) {
1308             if (!processTrEndTagForInRow()) {
1309                 ASSERT(isParsingFragment());
1310                 return;
1311             }
1312             ASSERT(insertionMode() == InTableBodyMode);
1313             reprocessStartTag(token);
1314             return;
1315         }
1316         processStartTagForInTable(token);
1317         break;
1318     case InCellMode:
1319         ASSERT(insertionMode() == InCellMode);
1320         if (isCaptionColOrColgroupTag(token.name())
1321             || isTableCellContextTag(token.name())
1322             || token.name() == trTag
1323             || isTableBodyContextTag(token.name())) {
1324             // FIXME: This could be more efficient.
1325             if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1326                 ASSERT(isParsingFragment());
1327                 parseError(token);
1328                 return;
1329             }
1330             closeTheCell();
1331             reprocessStartTag(token);
1332             return;
1333         }
1334         processStartTagForInBody(token);
1335         break;
1336     case AfterBodyMode:
1337     case AfterAfterBodyMode:
1338         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1339         if (token.name() == htmlTag) {
1340             m_tree.insertHTMLHtmlStartTagInBody(token);
1341             return;
1342         }
1343         setInsertionMode(InBodyMode);
1344         reprocessStartTag(token);
1345         break;
1346     case InHeadNoscriptMode:
1347         ASSERT(insertionMode() == InHeadNoscriptMode);
1348         if (token.name() == htmlTag) {
1349             m_tree.insertHTMLHtmlStartTagInBody(token);
1350             return;
1351         }
1352         if (token.name() == basefontTag
1353             || token.name() == bgsoundTag
1354             || token.name() == linkTag
1355             || token.name() == metaTag
1356             || token.name() == noframesTag
1357             || token.name() == styleTag) {
1358             bool didProcess = processStartTagForInHead(token);
1359             ASSERT_UNUSED(didProcess, didProcess);
1360             return;
1361         }
1362         if (token.name() == htmlTag || token.name() == noscriptTag) {
1363             parseError(token);
1364             return;
1365         }
1366         defaultForInHeadNoscript();
1367         processToken(token);
1368         break;
1369     case InFramesetMode:
1370         ASSERT(insertionMode() == InFramesetMode);
1371         if (token.name() == htmlTag) {
1372             m_tree.insertHTMLHtmlStartTagInBody(token);
1373             return;
1374         }
1375         if (token.name() == framesetTag) {
1376             m_tree.insertHTMLElement(token);
1377             return;
1378         }
1379         if (token.name() == frameTag) {
1380             m_tree.insertSelfClosingHTMLElement(token);
1381             return;
1382         }
1383         if (token.name() == noframesTag) {
1384             processStartTagForInHead(token);
1385             return;
1386         }
1387         parseError(token);
1388         break;
1389     case AfterFramesetMode:
1390     case AfterAfterFramesetMode:
1391         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1392         if (token.name() == htmlTag) {
1393             m_tree.insertHTMLHtmlStartTagInBody(token);
1394             return;
1395         }
1396         if (token.name() == noframesTag) {
1397             processStartTagForInHead(token);
1398             return;
1399         }
1400         parseError(token);
1401         break;
1402     case InSelectInTableMode:
1403         ASSERT(insertionMode() == InSelectInTableMode);
1404         if (token.name() == captionTag
1405             || token.name() == tableTag
1406             || isTableBodyContextTag(token.name())
1407             || token.name() == trTag
1408             || isTableCellContextTag(token.name())) {
1409             parseError(token);
1410             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1411             processEndTag(endSelect);
1412             reprocessStartTag(token);
1413             return;
1414         }
1415         // Fall through
1416     case InSelectMode:
1417         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1418         if (token.name() == htmlTag) {
1419             m_tree.insertHTMLHtmlStartTagInBody(token);
1420             return;
1421         }
1422         if (token.name() == optionTag) {
1423             if (m_tree.currentNode()->hasTagName(optionTag)) {
1424                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1425                 processEndTag(endOption);
1426             }
1427             m_tree.insertHTMLElement(token);
1428             return;
1429         }
1430         if (token.name() == optgroupTag) {
1431             if (m_tree.currentNode()->hasTagName(optionTag)) {
1432                 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1433                 processEndTag(endOption);
1434             }
1435             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
1436                 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1437                 processEndTag(endOptgroup);
1438             }
1439             m_tree.insertHTMLElement(token);
1440             return;
1441         }
1442         if (token.name() == selectTag) {
1443             parseError(token);
1444             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1445             processEndTag(endSelect);
1446             return;
1447         }
1448         if (token.name() == inputTag
1449             || token.name() == keygenTag
1450             || token.name() == textareaTag) {
1451             parseError(token);
1452             if (!m_tree.openElements()->inSelectScope(selectTag)) {
1453                 ASSERT(isParsingFragment());
1454                 return;
1455             }
1456             AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1457             processEndTag(endSelect);
1458             reprocessStartTag(token);
1459             return;
1460         }
1461         if (token.name() == scriptTag) {
1462             bool didProcess = processStartTagForInHead(token);
1463             ASSERT_UNUSED(didProcess, didProcess);
1464             return;
1465         }
1466         break;
1467     case InTableTextMode:
1468         defaultForInTableText();
1469         processStartTag(token);
1470         break;
1471     case InForeignContentMode: {
1472         if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentNode())) {
1473             processForeignContentUsingInBodyModeAndResetMode(token);
1474             return;
1475         }
1476         if (token.name() == bTag
1477             || token.name() == bigTag
1478             || token.name() == blockquoteTag
1479             || token.name() == bodyTag
1480             || token.name() == brTag
1481             || token.name() == centerTag
1482             || token.name() == codeTag
1483             || token.name() == ddTag
1484             || token.name() == divTag
1485             || token.name() == dlTag
1486             || token.name() == dtTag
1487             || token.name() == emTag
1488             || token.name() == embedTag
1489             || isNumberedHeaderTag(token.name())
1490             || token.name() == headTag
1491             || token.name() == hrTag
1492             || token.name() == iTag
1493             || token.name() == imgTag
1494             || token.name() == liTag
1495             || token.name() == listingTag
1496             || token.name() == menuTag
1497             || token.name() == metaTag
1498             || token.name() == nobrTag
1499             || token.name() == olTag
1500             || token.name() == pTag
1501             || token.name() == preTag
1502             || token.name() == rubyTag
1503             || token.name() == sTag
1504             || token.name() == smallTag
1505             || token.name() == spanTag
1506             || token.name() == strongTag
1507             || token.name() == strikeTag
1508             || token.name() == subTag
1509             || token.name() == supTag
1510             || token.name() == tableTag
1511             || token.name() == ttTag
1512             || token.name() == uTag
1513             || token.name() == ulTag
1514             || token.name() == varTag
1515             || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
1516             parseError(token);
1517             m_tree.openElements()->popUntilForeignContentScopeMarker();
1518             resetInsertionModeAppropriately();
1519             reprocessStartTag(token);
1520             return;
1521         }
1522         const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
1523         if (currentNamespace == MathMLNames::mathmlNamespaceURI)
1524             adjustMathMLAttributes(token);
1525         if (currentNamespace == SVGNames::svgNamespaceURI) {
1526             adjustSVGTagNameCase(token);
1527             adjustSVGAttributes(token);
1528         }
1529         adjustForeignAttributes(token);
1530         m_tree.insertForeignElement(token, currentNamespace);
1531         break;
1532     }
1533     case TextMode:
1534         ASSERT_NOT_REACHED();
1535         break;
1536     }
1537 }
1538 
processBodyEndTagForInBody(AtomicHTMLToken & token)1539 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1540 {
1541     ASSERT(token.type() == HTMLToken::EndTag);
1542     ASSERT(token.name() == bodyTag);
1543     if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1544         parseError(token);
1545         return false;
1546     }
1547     notImplemented(); // Emit a more specific parse error based on stack contents.
1548     setInsertionMode(AfterBodyMode);
1549     return true;
1550 }
1551 
processAnyOtherEndTagForInBody(AtomicHTMLToken & token)1552 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1553 {
1554     ASSERT(token.type() == HTMLToken::EndTag);
1555     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1556     while (1) {
1557         RefPtr<ContainerNode> node = record->node();
1558         if (node->hasLocalName(token.name())) {
1559             m_tree.generateImpliedEndTags();
1560             // FIXME: The ElementRecord pointed to by record might be deleted by
1561             // the preceding call. Perhaps we should hold a RefPtr so that it
1562             // stays alive for the duration of record's scope.
1563             record = 0;
1564             if (!m_tree.currentNode()->hasLocalName(token.name())) {
1565                 parseError(token);
1566                 // FIXME: This is either a bug in the spec, or a bug in our
1567                 // implementation.  Filed a bug with HTML5:
1568                 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1569                 // We might have already popped the node for the token in
1570                 // generateImpliedEndTags, just abort.
1571                 if (!m_tree.openElements()->contains(toElement(node.get())))
1572                     return;
1573             }
1574             m_tree.openElements()->popUntilPopped(toElement(node.get()));
1575             return;
1576         }
1577         if (isSpecialNode(node.get())) {
1578             parseError(token);
1579             return;
1580         }
1581         record = record->next();
1582     }
1583 }
1584 
1585 // FIXME: This probably belongs on HTMLElementStack.
furthestBlockForFormattingElement(Element * formattingElement)1586 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1587 {
1588     HTMLElementStack::ElementRecord* furthestBlock = 0;
1589     HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1590     for (; record; record = record->next()) {
1591         if (record->element() == formattingElement)
1592             return furthestBlock;
1593         if (isSpecialNode(record->element()))
1594             furthestBlock = record;
1595     }
1596     ASSERT_NOT_REACHED();
1597     return 0;
1598 }
1599 
1600 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
callTheAdoptionAgency(AtomicHTMLToken & token)1601 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1602 {
1603     // The adoption agency algorithm is N^2.  We limit the number of iterations
1604     // to stop from hanging the whole browser.  This limit is copied from the
1605     // legacy tree builder and might need to be tweaked in the future.
1606     static const int adoptionAgencyIterationLimit = 10;
1607 
1608     for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1609         // 1.
1610         Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1611         if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1612             parseError(token);
1613             notImplemented(); // Check the stack of open elements for a more specific parse error.
1614             return;
1615         }
1616         HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1617         if (!formattingElementRecord) {
1618             parseError(token);
1619             m_tree.activeFormattingElements()->remove(formattingElement);
1620             return;
1621         }
1622         if (formattingElement != m_tree.currentElement())
1623             parseError(token);
1624         // 2.
1625         HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1626         // 3.
1627         if (!furthestBlock) {
1628             m_tree.openElements()->popUntilPopped(formattingElement);
1629             m_tree.activeFormattingElements()->remove(formattingElement);
1630             return;
1631         }
1632         // 4.
1633         ASSERT(furthestBlock->isAbove(formattingElementRecord));
1634         RefPtr<ContainerNode> commonAncestor = formattingElementRecord->next()->node();
1635         // 5.
1636         HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1637         // 6.
1638         HTMLElementStack::ElementRecord* node = furthestBlock;
1639         HTMLElementStack::ElementRecord* nextNode = node->next();
1640         HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1641         for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1642             // 6.1
1643             node = nextNode;
1644             ASSERT(node);
1645             nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1646             // 6.2
1647             if (!m_tree.activeFormattingElements()->contains(node->element())) {
1648                 m_tree.openElements()->remove(node->element());
1649                 node = 0;
1650                 continue;
1651             }
1652             // 6.3
1653             if (node == formattingElementRecord)
1654                 break;
1655             // 6.5
1656             RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1657             HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1658             nodeEntry->replaceElement(newElement.get());
1659             node->replaceElement(newElement.release());
1660             // 6.4 -- Intentionally out of order to handle the case where node
1661             // was replaced in 6.5.
1662             // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1663             if (lastNode == furthestBlock)
1664                 bookmark.moveToAfter(nodeEntry);
1665             // 6.6
1666             if (Element* parent = lastNode->element()->parentElement())
1667                 parent->parserRemoveChild(lastNode->element());
1668             node->element()->parserAddChild(lastNode->element());
1669             if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1670                 lastNode->element()->lazyAttach();
1671             // 6.7
1672             lastNode = node;
1673         }
1674         // 7
1675         const AtomicString& commonAncestorTag = commonAncestor->localName();
1676         if (Element* parent = lastNode->element()->parentElement())
1677             parent->parserRemoveChild(lastNode->element());
1678         // FIXME: If this moves to HTMLConstructionSite, this check should use
1679         // causesFosterParenting(tagName) instead.
1680         if (commonAncestorTag == tableTag
1681             || commonAncestorTag == trTag
1682             || isTableBodyContextTag(commonAncestorTag))
1683             m_tree.fosterParent(lastNode->element());
1684         else {
1685             commonAncestor->parserAddChild(lastNode->element());
1686             ASSERT(lastNode->node()->isElementNode());
1687             ASSERT(lastNode->element()->parentNode());
1688             if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
1689                 lastNode->element()->lazyAttach();
1690         }
1691         // 8
1692         RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1693         // 9
1694         newElement->takeAllChildrenFrom(furthestBlock->element());
1695         // 10
1696         Element* furthestBlockElement = furthestBlock->element();
1697         // FIXME: All this creation / parserAddChild / attach business should
1698         //        be in HTMLConstructionSite.  My guess is that steps 8--12
1699         //        should all be in some HTMLConstructionSite function.
1700         furthestBlockElement->parserAddChild(newElement);
1701         if (furthestBlockElement->attached() && !newElement->attached()) {
1702             // Notice that newElement might already be attached if, for example, one of the reparented
1703             // children is a style element, which attaches itself automatically.
1704             newElement->attach();
1705         }
1706         // 11
1707         m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1708         // 12
1709         m_tree.openElements()->remove(formattingElement);
1710         m_tree.openElements()->insertAbove(newElement, furthestBlock);
1711     }
1712 }
1713 
resetInsertionModeAppropriately()1714 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1715 {
1716     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1717     bool last = false;
1718     HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1719     while (1) {
1720         ContainerNode* node = nodeRecord->node();
1721         if (node == m_tree.openElements()->rootNode()) {
1722             ASSERT(isParsingFragment());
1723             last = true;
1724             node = m_fragmentContext.contextElement();
1725         }
1726         if (node->hasTagName(selectTag)) {
1727             ASSERT(isParsingFragment());
1728             return setInsertionMode(InSelectMode);
1729         }
1730         if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1731             return setInsertionMode(InCellMode);
1732         if (node->hasTagName(trTag))
1733             return setInsertionMode(InRowMode);
1734         if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1735             return setInsertionMode(InTableBodyMode);
1736         if (node->hasTagName(captionTag))
1737             return setInsertionMode(InCaptionMode);
1738         if (node->hasTagName(colgroupTag)) {
1739             ASSERT(isParsingFragment());
1740             return setInsertionMode(InColumnGroupMode);
1741         }
1742         if (node->hasTagName(tableTag))
1743             return setInsertionMode(InTableMode);
1744         if (node->hasTagName(headTag)) {
1745             ASSERT(isParsingFragment());
1746             return setInsertionMode(InBodyMode);
1747         }
1748         if (node->hasTagName(bodyTag))
1749             return setInsertionMode(InBodyMode);
1750         if (node->hasTagName(framesetTag)) {
1751             ASSERT(isParsingFragment());
1752             return setInsertionMode(InFramesetMode);
1753         }
1754         if (node->hasTagName(htmlTag)) {
1755             ASSERT(isParsingFragment());
1756             return setInsertionMode(BeforeHeadMode);
1757         }
1758         if (node->namespaceURI() == SVGNames::svgNamespaceURI
1759             || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
1760             return setInsertionMode(InForeignContentMode);
1761         if (last) {
1762             ASSERT(isParsingFragment());
1763             return setInsertionMode(InBodyMode);
1764         }
1765         nodeRecord = nodeRecord->next();
1766     }
1767 }
1768 
processEndTagForInTableBody(AtomicHTMLToken & token)1769 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1770 {
1771     ASSERT(token.type() == HTMLToken::EndTag);
1772     if (isTableBodyContextTag(token.name())) {
1773         if (!m_tree.openElements()->inTableScope(token.name())) {
1774             parseError(token);
1775             return;
1776         }
1777         m_tree.openElements()->popUntilTableBodyScopeMarker();
1778         m_tree.openElements()->pop();
1779         setInsertionMode(InTableMode);
1780         return;
1781     }
1782     if (token.name() == tableTag) {
1783         // FIXME: This is slow.
1784         if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1785             ASSERT(isParsingFragment());
1786             parseError(token);
1787             return;
1788         }
1789         m_tree.openElements()->popUntilTableBodyScopeMarker();
1790         ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1791         processFakeEndTag(m_tree.currentElement()->tagQName());
1792         reprocessEndTag(token);
1793         return;
1794     }
1795     if (token.name() == bodyTag
1796         || isCaptionColOrColgroupTag(token.name())
1797         || token.name() == htmlTag
1798         || isTableCellContextTag(token.name())
1799         || token.name() == trTag) {
1800         parseError(token);
1801         return;
1802     }
1803     processEndTagForInTable(token);
1804 }
1805 
processEndTagForInRow(AtomicHTMLToken & token)1806 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1807 {
1808     ASSERT(token.type() == HTMLToken::EndTag);
1809     if (token.name() == trTag) {
1810         processTrEndTagForInRow();
1811         return;
1812     }
1813     if (token.name() == tableTag) {
1814         if (!processTrEndTagForInRow()) {
1815             ASSERT(isParsingFragment());
1816             return;
1817         }
1818         ASSERT(insertionMode() == InTableBodyMode);
1819         reprocessEndTag(token);
1820         return;
1821     }
1822     if (isTableBodyContextTag(token.name())) {
1823         if (!m_tree.openElements()->inTableScope(token.name())) {
1824             parseError(token);
1825             return;
1826         }
1827         processFakeEndTag(trTag);
1828         ASSERT(insertionMode() == InTableBodyMode);
1829         reprocessEndTag(token);
1830         return;
1831     }
1832     if (token.name() == bodyTag
1833         || isCaptionColOrColgroupTag(token.name())
1834         || token.name() == htmlTag
1835         || isTableCellContextTag(token.name())) {
1836         parseError(token);
1837         return;
1838     }
1839     processEndTagForInTable(token);
1840 }
1841 
processEndTagForInCell(AtomicHTMLToken & token)1842 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1843 {
1844     ASSERT(token.type() == HTMLToken::EndTag);
1845     if (isTableCellContextTag(token.name())) {
1846         if (!m_tree.openElements()->inTableScope(token.name())) {
1847             parseError(token);
1848             return;
1849         }
1850         m_tree.generateImpliedEndTags();
1851         if (!m_tree.currentNode()->hasLocalName(token.name()))
1852             parseError(token);
1853         m_tree.openElements()->popUntilPopped(token.name());
1854         m_tree.activeFormattingElements()->clearToLastMarker();
1855         setInsertionMode(InRowMode);
1856         return;
1857     }
1858     if (token.name() == bodyTag
1859         || isCaptionColOrColgroupTag(token.name())
1860         || token.name() == htmlTag) {
1861         parseError(token);
1862         return;
1863     }
1864     if (token.name() == tableTag
1865         || token.name() == trTag
1866         || isTableBodyContextTag(token.name())) {
1867         if (!m_tree.openElements()->inTableScope(token.name())) {
1868             ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1869             parseError(token);
1870             return;
1871         }
1872         closeTheCell();
1873         reprocessEndTag(token);
1874         return;
1875     }
1876     processEndTagForInBody(token);
1877 }
1878 
processEndTagForInBody(AtomicHTMLToken & token)1879 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1880 {
1881     ASSERT(token.type() == HTMLToken::EndTag);
1882     if (token.name() == bodyTag) {
1883         processBodyEndTagForInBody(token);
1884         return;
1885     }
1886     if (token.name() == htmlTag) {
1887         AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1888         if (processBodyEndTagForInBody(endBody))
1889             reprocessEndTag(token);
1890         return;
1891     }
1892     if (token.name() == addressTag
1893         || token.name() == articleTag
1894         || token.name() == asideTag
1895         || token.name() == blockquoteTag
1896         || token.name() == buttonTag
1897         || token.name() == centerTag
1898         || token.name() == detailsTag
1899         || token.name() == dirTag
1900         || token.name() == divTag
1901         || token.name() == dlTag
1902         || token.name() == fieldsetTag
1903         || token.name() == figcaptionTag
1904         || token.name() == figureTag
1905         || token.name() == footerTag
1906         || token.name() == headerTag
1907         || token.name() == hgroupTag
1908         || token.name() == listingTag
1909         || token.name() == menuTag
1910         || token.name() == navTag
1911         || token.name() == olTag
1912         || token.name() == preTag
1913         || token.name() == sectionTag
1914         || token.name() == summaryTag
1915         || token.name() == ulTag) {
1916         if (!m_tree.openElements()->inScope(token.name())) {
1917             parseError(token);
1918             return;
1919         }
1920         m_tree.generateImpliedEndTags();
1921         if (!m_tree.currentNode()->hasLocalName(token.name()))
1922             parseError(token);
1923         m_tree.openElements()->popUntilPopped(token.name());
1924         return;
1925     }
1926     if (token.name() == formTag) {
1927         RefPtr<Element> node = m_tree.takeForm();
1928         if (!node || !m_tree.openElements()->inScope(node.get())) {
1929             parseError(token);
1930             return;
1931         }
1932         m_tree.generateImpliedEndTags();
1933         if (m_tree.currentElement() != node.get())
1934             parseError(token);
1935         m_tree.openElements()->remove(node.get());
1936     }
1937     if (token.name() == pTag) {
1938         if (!m_tree.openElements()->inButtonScope(token.name())) {
1939             parseError(token);
1940             processFakeStartTag(pTag);
1941             ASSERT(m_tree.openElements()->inScope(token.name()));
1942             reprocessEndTag(token);
1943             return;
1944         }
1945         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1946         if (!m_tree.currentNode()->hasLocalName(token.name()))
1947             parseError(token);
1948         m_tree.openElements()->popUntilPopped(token.name());
1949         return;
1950     }
1951     if (token.name() == liTag) {
1952         if (!m_tree.openElements()->inListItemScope(token.name())) {
1953             parseError(token);
1954             return;
1955         }
1956         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1957         if (!m_tree.currentNode()->hasLocalName(token.name()))
1958             parseError(token);
1959         m_tree.openElements()->popUntilPopped(token.name());
1960         return;
1961     }
1962     if (token.name() == ddTag
1963         || token.name() == dtTag) {
1964         if (!m_tree.openElements()->inScope(token.name())) {
1965             parseError(token);
1966             return;
1967         }
1968         m_tree.generateImpliedEndTagsWithExclusion(token.name());
1969         if (!m_tree.currentNode()->hasLocalName(token.name()))
1970             parseError(token);
1971         m_tree.openElements()->popUntilPopped(token.name());
1972         return;
1973     }
1974     if (isNumberedHeaderTag(token.name())) {
1975         if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1976             parseError(token);
1977             return;
1978         }
1979         m_tree.generateImpliedEndTags();
1980         if (!m_tree.currentNode()->hasLocalName(token.name()))
1981             parseError(token);
1982         m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1983         return;
1984     }
1985     if (isFormattingTag(token.name())) {
1986         callTheAdoptionAgency(token);
1987         return;
1988     }
1989     if (token.name() == appletTag
1990         || token.name() == marqueeTag
1991         || token.name() == objectTag) {
1992         if (!m_tree.openElements()->inScope(token.name())) {
1993             parseError(token);
1994             return;
1995         }
1996         m_tree.generateImpliedEndTags();
1997         if (!m_tree.currentNode()->hasLocalName(token.name()))
1998             parseError(token);
1999         m_tree.openElements()->popUntilPopped(token.name());
2000         m_tree.activeFormattingElements()->clearToLastMarker();
2001         return;
2002     }
2003     if (token.name() == brTag) {
2004         parseError(token);
2005         processFakeStartTag(brTag);
2006         return;
2007     }
2008     processAnyOtherEndTagForInBody(token);
2009 }
2010 
processCaptionEndTagForInCaption()2011 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
2012 {
2013     if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
2014         ASSERT(isParsingFragment());
2015         // FIXME: parse error
2016         return false;
2017     }
2018     m_tree.generateImpliedEndTags();
2019     // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
2020     m_tree.openElements()->popUntilPopped(captionTag.localName());
2021     m_tree.activeFormattingElements()->clearToLastMarker();
2022     setInsertionMode(InTableMode);
2023     return true;
2024 }
2025 
processTrEndTagForInRow()2026 bool HTMLTreeBuilder::processTrEndTagForInRow()
2027 {
2028     if (!m_tree.openElements()->inTableScope(trTag.localName())) {
2029         ASSERT(isParsingFragment());
2030         // FIXME: parse error
2031         return false;
2032     }
2033     m_tree.openElements()->popUntilTableRowScopeMarker();
2034     ASSERT(m_tree.currentElement()->hasTagName(trTag));
2035     m_tree.openElements()->pop();
2036     setInsertionMode(InTableBodyMode);
2037     return true;
2038 }
2039 
processTableEndTagForInTable()2040 bool HTMLTreeBuilder::processTableEndTagForInTable()
2041 {
2042     if (!m_tree.openElements()->inTableScope(tableTag)) {
2043         ASSERT(isParsingFragment());
2044         // FIXME: parse error.
2045         return false;
2046     }
2047     m_tree.openElements()->popUntilPopped(tableTag.localName());
2048     resetInsertionModeAppropriately();
2049     return true;
2050 }
2051 
processEndTagForInTable(AtomicHTMLToken & token)2052 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
2053 {
2054     ASSERT(token.type() == HTMLToken::EndTag);
2055     if (token.name() == tableTag) {
2056         processTableEndTagForInTable();
2057         return;
2058     }
2059     if (token.name() == bodyTag
2060         || isCaptionColOrColgroupTag(token.name())
2061         || token.name() == htmlTag
2062         || isTableBodyContextTag(token.name())
2063         || isTableCellContextTag(token.name())
2064         || token.name() == trTag) {
2065         parseError(token);
2066         return;
2067     }
2068     // Is this redirection necessary here?
2069     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2070     processEndTagForInBody(token);
2071 }
2072 
processEndTag(AtomicHTMLToken & token)2073 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2074 {
2075     ASSERT(token.type() == HTMLToken::EndTag);
2076     switch (insertionMode()) {
2077     case InitialMode:
2078         ASSERT(insertionMode() == InitialMode);
2079         defaultForInitial();
2080         // Fall through.
2081     case BeforeHTMLMode:
2082         ASSERT(insertionMode() == BeforeHTMLMode);
2083         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2084             parseError(token);
2085             return;
2086         }
2087         defaultForBeforeHTML();
2088         // Fall through.
2089     case BeforeHeadMode:
2090         ASSERT(insertionMode() == BeforeHeadMode);
2091         if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2092             parseError(token);
2093             return;
2094         }
2095         defaultForBeforeHead();
2096         // Fall through.
2097     case InHeadMode:
2098         ASSERT(insertionMode() == InHeadMode);
2099         if (token.name() == headTag) {
2100             m_tree.openElements()->popHTMLHeadElement();
2101             setInsertionMode(AfterHeadMode);
2102             return;
2103         }
2104         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2105             parseError(token);
2106             return;
2107         }
2108         defaultForInHead();
2109         // Fall through.
2110     case AfterHeadMode:
2111         ASSERT(insertionMode() == AfterHeadMode);
2112         if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2113             parseError(token);
2114             return;
2115         }
2116         defaultForAfterHead();
2117         // Fall through
2118     case InBodyMode:
2119         ASSERT(insertionMode() == InBodyMode);
2120         processEndTagForInBody(token);
2121         break;
2122     case InTableMode:
2123         ASSERT(insertionMode() == InTableMode);
2124         processEndTagForInTable(token);
2125         break;
2126     case InCaptionMode:
2127         ASSERT(insertionMode() == InCaptionMode);
2128         if (token.name() == captionTag) {
2129             processCaptionEndTagForInCaption();
2130             return;
2131         }
2132         if (token.name() == tableTag) {
2133             parseError(token);
2134             if (!processCaptionEndTagForInCaption()) {
2135                 ASSERT(isParsingFragment());
2136                 return;
2137             }
2138             reprocessEndTag(token);
2139             return;
2140         }
2141         if (token.name() == bodyTag
2142             || token.name() == colTag
2143             || token.name() == colgroupTag
2144             || token.name() == htmlTag
2145             || isTableBodyContextTag(token.name())
2146             || isTableCellContextTag(token.name())
2147             || token.name() == trTag) {
2148             parseError(token);
2149             return;
2150         }
2151         processEndTagForInBody(token);
2152         break;
2153     case InColumnGroupMode:
2154         ASSERT(insertionMode() == InColumnGroupMode);
2155         if (token.name() == colgroupTag) {
2156             processColgroupEndTagForInColumnGroup();
2157             return;
2158         }
2159         if (token.name() == colTag) {
2160             parseError(token);
2161             return;
2162         }
2163         if (!processColgroupEndTagForInColumnGroup()) {
2164             ASSERT(isParsingFragment());
2165             return;
2166         }
2167         reprocessEndTag(token);
2168         break;
2169     case InRowMode:
2170         ASSERT(insertionMode() == InRowMode);
2171         processEndTagForInRow(token);
2172         break;
2173     case InCellMode:
2174         ASSERT(insertionMode() == InCellMode);
2175         processEndTagForInCell(token);
2176         break;
2177     case InTableBodyMode:
2178         ASSERT(insertionMode() == InTableBodyMode);
2179         processEndTagForInTableBody(token);
2180         break;
2181     case AfterBodyMode:
2182         ASSERT(insertionMode() == AfterBodyMode);
2183         if (token.name() == htmlTag) {
2184             if (isParsingFragment()) {
2185                 parseError(token);
2186                 return;
2187             }
2188             setInsertionMode(AfterAfterBodyMode);
2189             return;
2190         }
2191         prepareToReprocessToken();
2192         // Fall through.
2193     case AfterAfterBodyMode:
2194         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2195         parseError(token);
2196         setInsertionMode(InBodyMode);
2197         reprocessEndTag(token);
2198         break;
2199     case InHeadNoscriptMode:
2200         ASSERT(insertionMode() == InHeadNoscriptMode);
2201         if (token.name() == noscriptTag) {
2202             ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2203             m_tree.openElements()->pop();
2204             ASSERT(m_tree.currentElement()->hasTagName(headTag));
2205             setInsertionMode(InHeadMode);
2206             return;
2207         }
2208         if (token.name() != brTag) {
2209             parseError(token);
2210             return;
2211         }
2212         defaultForInHeadNoscript();
2213         processToken(token);
2214         break;
2215     case TextMode:
2216         if (token.name() == scriptTag) {
2217             // Pause ourselves so that parsing stops until the script can be processed by the caller.
2218             m_isPaused = true;
2219             ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2220             m_scriptToProcess = m_tree.currentElement();
2221             m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
2222             m_tree.openElements()->pop();
2223             if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2224                 m_scriptToProcess->removeAllChildren();
2225             setInsertionMode(m_originalInsertionMode);
2226 
2227             // This token will not have been created by the tokenizer if a
2228             // self-closing script tag was encountered and pre-HTML5 parser
2229             // quirks are enabled. We must set the tokenizer's state to
2230             // DataState explicitly if the tokenizer didn't have a chance to.
2231             ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
2232             m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2233             return;
2234         }
2235         m_tree.openElements()->pop();
2236         setInsertionMode(m_originalInsertionMode);
2237         break;
2238     case InFramesetMode:
2239         ASSERT(insertionMode() == InFramesetMode);
2240         if (token.name() == framesetTag) {
2241             if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2242                 parseError(token);
2243                 return;
2244             }
2245             m_tree.openElements()->pop();
2246             if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2247                 setInsertionMode(AfterFramesetMode);
2248             return;
2249         }
2250         break;
2251     case AfterFramesetMode:
2252         ASSERT(insertionMode() == AfterFramesetMode);
2253         if (token.name() == htmlTag) {
2254             setInsertionMode(AfterAfterFramesetMode);
2255             return;
2256         }
2257         // Fall through.
2258     case AfterAfterFramesetMode:
2259         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2260         parseError(token);
2261         break;
2262     case InSelectInTableMode:
2263         ASSERT(insertionMode() == InSelectInTableMode);
2264         if (token.name() == captionTag
2265             || token.name() == tableTag
2266             || isTableBodyContextTag(token.name())
2267             || token.name() == trTag
2268             || isTableCellContextTag(token.name())) {
2269             parseError(token);
2270             if (m_tree.openElements()->inTableScope(token.name())) {
2271                 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2272                 processEndTag(endSelect);
2273                 reprocessEndTag(token);
2274             }
2275             return;
2276         }
2277         // Fall through.
2278     case InSelectMode:
2279         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2280         if (token.name() == optgroupTag) {
2281             if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2282                 processFakeEndTag(optionTag);
2283             if (m_tree.currentNode()->hasTagName(optgroupTag)) {
2284                 m_tree.openElements()->pop();
2285                 return;
2286             }
2287             parseError(token);
2288             return;
2289         }
2290         if (token.name() == optionTag) {
2291             if (m_tree.currentNode()->hasTagName(optionTag)) {
2292                 m_tree.openElements()->pop();
2293                 return;
2294             }
2295             parseError(token);
2296             return;
2297         }
2298         if (token.name() == selectTag) {
2299             if (!m_tree.openElements()->inSelectScope(token.name())) {
2300                 ASSERT(isParsingFragment());
2301                 parseError(token);
2302                 return;
2303             }
2304             m_tree.openElements()->popUntilPopped(selectTag.localName());
2305             resetInsertionModeAppropriately();
2306             return;
2307         }
2308         break;
2309     case InTableTextMode:
2310         defaultForInTableText();
2311         processEndTag(token);
2312         break;
2313     case InForeignContentMode:
2314         if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
2315             notImplemented();
2316             return;
2317         }
2318         if (!isInHTMLNamespace(m_tree.currentNode())) {
2319             // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2320             HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2321             if (!nodeRecord->node()->hasLocalName(token.name()))
2322                 parseError(token);
2323             while (1) {
2324                 if (nodeRecord->node()->hasLocalName(token.name())) {
2325                     m_tree.openElements()->popUntilPopped(nodeRecord->element());
2326                     resetForeignInsertionMode();
2327                     return;
2328                 }
2329                 nodeRecord = nodeRecord->next();
2330 
2331                 if (isInHTMLNamespace(nodeRecord->node()))
2332                     break;
2333             }
2334         }
2335         // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
2336         processForeignContentUsingInBodyModeAndResetMode(token);
2337         break;
2338     }
2339 }
2340 
prepareToReprocessToken()2341 void HTMLTreeBuilder::prepareToReprocessToken()
2342 {
2343     if (m_hasPendingForeignInsertionModeSteps) {
2344         resetForeignInsertionMode();
2345         m_hasPendingForeignInsertionModeSteps = false;
2346     }
2347 }
2348 
reprocessStartTag(AtomicHTMLToken & token)2349 void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
2350 {
2351     prepareToReprocessToken();
2352     processStartTag(token);
2353 }
2354 
reprocessEndTag(AtomicHTMLToken & token)2355 void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
2356 {
2357     prepareToReprocessToken();
2358     processEndTag(token);
2359 }
2360 
2361 class HTMLTreeBuilder::FakeInsertionMode {
2362     WTF_MAKE_NONCOPYABLE(FakeInsertionMode);
2363 public:
FakeInsertionMode(HTMLTreeBuilder * treeBuilder,InsertionMode mode)2364     FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
2365         : m_treeBuilder(treeBuilder)
2366         , m_originalMode(treeBuilder->insertionMode())
2367     {
2368         m_treeBuilder->setFakeInsertionMode(mode);
2369     }
2370 
~FakeInsertionMode()2371     ~FakeInsertionMode()
2372     {
2373         if (m_treeBuilder->isFakeInsertionMode())
2374             m_treeBuilder->setInsertionMode(m_originalMode);
2375     }
2376 
2377 private:
2378     HTMLTreeBuilder* m_treeBuilder;
2379     InsertionMode m_originalMode;
2380 };
2381 
processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken & token)2382 void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
2383 {
2384     m_hasPendingForeignInsertionModeSteps = true;
2385     {
2386         FakeInsertionMode fakeMode(this, InBodyMode);
2387         processToken(token);
2388     }
2389     if (m_hasPendingForeignInsertionModeSteps)
2390         resetForeignInsertionMode();
2391 }
2392 
resetForeignInsertionMode()2393 void HTMLTreeBuilder::resetForeignInsertionMode()
2394 {
2395     if (insertionMode() == InForeignContentMode)
2396         resetInsertionModeAppropriately();
2397 }
2398 
processComment(AtomicHTMLToken & token)2399 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2400 {
2401     ASSERT(token.type() == HTMLToken::Comment);
2402     if (m_insertionMode == InitialMode
2403         || m_insertionMode == BeforeHTMLMode
2404         || m_insertionMode == AfterAfterBodyMode
2405         || m_insertionMode == AfterAfterFramesetMode) {
2406         m_tree.insertCommentOnDocument(token);
2407         return;
2408     }
2409     if (m_insertionMode == AfterBodyMode) {
2410         m_tree.insertCommentOnHTMLHtmlElement(token);
2411         return;
2412     }
2413     if (m_insertionMode == InTableTextMode) {
2414         defaultForInTableText();
2415         processComment(token);
2416         return;
2417     }
2418     m_tree.insertComment(token);
2419 }
2420 
processCharacter(AtomicHTMLToken & token)2421 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2422 {
2423     ASSERT(token.type() == HTMLToken::Character);
2424     ExternalCharacterTokenBuffer buffer(token);
2425     processCharacterBuffer(buffer);
2426 }
2427 
processCharacterBuffer(ExternalCharacterTokenBuffer & buffer)2428 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2429 {
2430 ReprocessBuffer:
2431     switch (insertionMode()) {
2432     case InitialMode: {
2433         ASSERT(insertionMode() == InitialMode);
2434         buffer.skipLeadingWhitespace();
2435         if (buffer.isEmpty())
2436             return;
2437         defaultForInitial();
2438         // Fall through.
2439     }
2440     case BeforeHTMLMode: {
2441         ASSERT(insertionMode() == BeforeHTMLMode);
2442         buffer.skipLeadingWhitespace();
2443         if (buffer.isEmpty())
2444             return;
2445         defaultForBeforeHTML();
2446         // Fall through.
2447     }
2448     case BeforeHeadMode: {
2449         ASSERT(insertionMode() == BeforeHeadMode);
2450         buffer.skipLeadingWhitespace();
2451         if (buffer.isEmpty())
2452             return;
2453         defaultForBeforeHead();
2454         // Fall through.
2455     }
2456     case InHeadMode: {
2457         ASSERT(insertionMode() == InHeadMode);
2458         String leadingWhitespace = buffer.takeLeadingWhitespace();
2459         if (!leadingWhitespace.isEmpty())
2460             m_tree.insertTextNode(leadingWhitespace);
2461         if (buffer.isEmpty())
2462             return;
2463         defaultForInHead();
2464         // Fall through.
2465     }
2466     case AfterHeadMode: {
2467         ASSERT(insertionMode() == AfterHeadMode);
2468         String leadingWhitespace = buffer.takeLeadingWhitespace();
2469         if (!leadingWhitespace.isEmpty())
2470             m_tree.insertTextNode(leadingWhitespace);
2471         if (buffer.isEmpty())
2472             return;
2473         defaultForAfterHead();
2474         // Fall through.
2475     }
2476     case InBodyMode:
2477     case InCaptionMode:
2478     case InCellMode: {
2479         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2480         m_tree.reconstructTheActiveFormattingElements();
2481         String characters = buffer.takeRemaining();
2482         m_tree.insertTextNode(characters);
2483         if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2484             m_framesetOk = false;
2485         break;
2486     }
2487     case InTableMode:
2488     case InTableBodyMode:
2489     case InRowMode: {
2490         ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2491         ASSERT(m_pendingTableCharacters.isEmpty());
2492         m_originalInsertionMode = m_insertionMode;
2493         setInsertionMode(InTableTextMode);
2494         prepareToReprocessToken();
2495         // Fall through.
2496     }
2497     case InTableTextMode: {
2498         buffer.giveRemainingTo(m_pendingTableCharacters);
2499         break;
2500     }
2501     case InColumnGroupMode: {
2502         ASSERT(insertionMode() == InColumnGroupMode);
2503         String leadingWhitespace = buffer.takeLeadingWhitespace();
2504         if (!leadingWhitespace.isEmpty())
2505             m_tree.insertTextNode(leadingWhitespace);
2506         if (buffer.isEmpty())
2507             return;
2508         if (!processColgroupEndTagForInColumnGroup()) {
2509             ASSERT(isParsingFragment());
2510             // The spec tells us to drop these characters on the floor.
2511             buffer.takeLeadingNonWhitespace();
2512             if (buffer.isEmpty())
2513                 return;
2514         }
2515         prepareToReprocessToken();
2516         goto ReprocessBuffer;
2517     }
2518     case AfterBodyMode:
2519     case AfterAfterBodyMode: {
2520         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2521         // FIXME: parse error
2522         setInsertionMode(InBodyMode);
2523         prepareToReprocessToken();
2524         goto ReprocessBuffer;
2525         break;
2526     }
2527     case TextMode: {
2528         ASSERT(insertionMode() == TextMode);
2529         m_tree.insertTextNode(buffer.takeRemaining());
2530         break;
2531     }
2532     case InHeadNoscriptMode: {
2533         ASSERT(insertionMode() == InHeadNoscriptMode);
2534         String leadingWhitespace = buffer.takeLeadingWhitespace();
2535         if (!leadingWhitespace.isEmpty())
2536             m_tree.insertTextNode(leadingWhitespace);
2537         if (buffer.isEmpty())
2538             return;
2539         defaultForInHeadNoscript();
2540         goto ReprocessBuffer;
2541         break;
2542     }
2543     case InFramesetMode:
2544     case AfterFramesetMode: {
2545         ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2546         String leadingWhitespace = buffer.takeRemainingWhitespace();
2547         if (!leadingWhitespace.isEmpty())
2548             m_tree.insertTextNode(leadingWhitespace);
2549         // FIXME: We should generate a parse error if we skipped over any
2550         // non-whitespace characters.
2551         break;
2552     }
2553     case InSelectInTableMode:
2554     case InSelectMode: {
2555         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2556         m_tree.insertTextNode(buffer.takeRemaining());
2557         break;
2558     }
2559     case InForeignContentMode: {
2560         ASSERT(insertionMode() == InForeignContentMode);
2561         String characters = buffer.takeRemaining();
2562         m_tree.insertTextNode(characters);
2563         if (m_framesetOk && !isAllWhitespace(characters))
2564             m_framesetOk = false;
2565         break;
2566     }
2567     case AfterAfterFramesetMode: {
2568         String leadingWhitespace = buffer.takeRemainingWhitespace();
2569         if (!leadingWhitespace.isEmpty()) {
2570             m_tree.reconstructTheActiveFormattingElements();
2571             m_tree.insertTextNode(leadingWhitespace);
2572         }
2573         // FIXME: We should generate a parse error if we skipped over any
2574         // non-whitespace characters.
2575         break;
2576     }
2577     }
2578 }
2579 
processEndOfFile(AtomicHTMLToken & token)2580 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2581 {
2582     ASSERT(token.type() == HTMLToken::EndOfFile);
2583     switch (insertionMode()) {
2584     case InitialMode:
2585         ASSERT(insertionMode() == InitialMode);
2586         defaultForInitial();
2587         // Fall through.
2588     case BeforeHTMLMode:
2589         ASSERT(insertionMode() == BeforeHTMLMode);
2590         defaultForBeforeHTML();
2591         // Fall through.
2592     case BeforeHeadMode:
2593         ASSERT(insertionMode() == BeforeHeadMode);
2594         defaultForBeforeHead();
2595         // Fall through.
2596     case InHeadMode:
2597         ASSERT(insertionMode() == InHeadMode);
2598         defaultForInHead();
2599         // Fall through.
2600     case AfterHeadMode:
2601         ASSERT(insertionMode() == AfterHeadMode);
2602         defaultForAfterHead();
2603         // Fall through
2604     case InBodyMode:
2605     case InCellMode:
2606     case InCaptionMode:
2607     case InRowMode:
2608         ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2609         notImplemented(); // Emit parse error based on what elements are still open.
2610         break;
2611     case AfterBodyMode:
2612     case AfterAfterBodyMode:
2613         ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2614         break;
2615     case InHeadNoscriptMode:
2616         ASSERT(insertionMode() == InHeadNoscriptMode);
2617         defaultForInHeadNoscript();
2618         processEndOfFile(token);
2619         return;
2620     case AfterFramesetMode:
2621     case AfterAfterFramesetMode:
2622         ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2623         break;
2624     case InFramesetMode:
2625     case InTableMode:
2626     case InTableBodyMode:
2627     case InSelectInTableMode:
2628     case InSelectMode:
2629         ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2630         if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2631             parseError(token);
2632         break;
2633     case InColumnGroupMode:
2634         if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2635             ASSERT(isParsingFragment());
2636             return; // FIXME: Should we break here instead of returning?
2637         }
2638         if (!processColgroupEndTagForInColumnGroup()) {
2639             ASSERT(isParsingFragment());
2640             return; // FIXME: Should we break here instead of returning?
2641         }
2642         prepareToReprocessToken();
2643         processEndOfFile(token);
2644         return;
2645     case InForeignContentMode:
2646         setInsertionMode(InBodyMode);
2647         processEndOfFile(token);
2648         return;
2649     case InTableTextMode:
2650         defaultForInTableText();
2651         processEndOfFile(token);
2652         return;
2653     case TextMode:
2654         parseError(token);
2655         if (m_tree.currentNode()->hasTagName(scriptTag))
2656             notImplemented(); // mark the script element as "already started".
2657         m_tree.openElements()->pop();
2658         setInsertionMode(m_originalInsertionMode);
2659         prepareToReprocessToken();
2660         processEndOfFile(token);
2661         return;
2662     }
2663     ASSERT(m_tree.currentNode());
2664     m_tree.openElements()->popAll();
2665 }
2666 
defaultForInitial()2667 void HTMLTreeBuilder::defaultForInitial()
2668 {
2669     notImplemented();
2670     if (!m_fragmentContext.fragment())
2671         m_document->setCompatibilityMode(Document::QuirksMode);
2672     // FIXME: parse error
2673     setInsertionMode(BeforeHTMLMode);
2674     prepareToReprocessToken();
2675 }
2676 
defaultForBeforeHTML()2677 void HTMLTreeBuilder::defaultForBeforeHTML()
2678 {
2679     AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2680     m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2681     setInsertionMode(BeforeHeadMode);
2682     prepareToReprocessToken();
2683 }
2684 
defaultForBeforeHead()2685 void HTMLTreeBuilder::defaultForBeforeHead()
2686 {
2687     AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2688     processStartTag(startHead);
2689     prepareToReprocessToken();
2690 }
2691 
defaultForInHead()2692 void HTMLTreeBuilder::defaultForInHead()
2693 {
2694     AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2695     processEndTag(endHead);
2696     prepareToReprocessToken();
2697 }
2698 
defaultForInHeadNoscript()2699 void HTMLTreeBuilder::defaultForInHeadNoscript()
2700 {
2701     AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2702     processEndTag(endNoscript);
2703     prepareToReprocessToken();
2704 }
2705 
defaultForAfterHead()2706 void HTMLTreeBuilder::defaultForAfterHead()
2707 {
2708     AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2709     processStartTag(startBody);
2710     m_framesetOk = true;
2711     prepareToReprocessToken();
2712 }
2713 
defaultForInTableText()2714 void HTMLTreeBuilder::defaultForInTableText()
2715 {
2716     String characters = String::adopt(m_pendingTableCharacters);
2717     if (!isAllWhitespace(characters)) {
2718         // FIXME: parse error
2719         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2720         m_tree.reconstructTheActiveFormattingElements();
2721         m_tree.insertTextNode(characters);
2722         m_framesetOk = false;
2723         setInsertionMode(m_originalInsertionMode);
2724         prepareToReprocessToken();
2725         return;
2726     }
2727     m_tree.insertTextNode(characters);
2728     setInsertionMode(m_originalInsertionMode);
2729     prepareToReprocessToken();
2730 }
2731 
processStartTagForInHead(AtomicHTMLToken & token)2732 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2733 {
2734     ASSERT(token.type() == HTMLToken::StartTag);
2735     if (token.name() == htmlTag) {
2736         m_tree.insertHTMLHtmlStartTagInBody(token);
2737         return true;
2738     }
2739     if (token.name() == baseTag
2740         || token.name() == basefontTag
2741         || token.name() == bgsoundTag
2742         || token.name() == commandTag
2743         || token.name() == linkTag
2744         || token.name() == metaTag) {
2745         m_tree.insertSelfClosingHTMLElement(token);
2746         // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2747         return true;
2748     }
2749     if (token.name() == titleTag) {
2750         processGenericRCDATAStartTag(token);
2751         return true;
2752     }
2753     if (token.name() == noscriptTag) {
2754         if (scriptEnabled(m_document->frame())) {
2755             processGenericRawTextStartTag(token);
2756             return true;
2757         }
2758         m_tree.insertHTMLElement(token);
2759         setInsertionMode(InHeadNoscriptMode);
2760         return true;
2761     }
2762     if (token.name() == noframesTag || token.name() == styleTag) {
2763         processGenericRawTextStartTag(token);
2764         return true;
2765     }
2766     if (token.name() == scriptTag) {
2767         processScriptStartTag(token);
2768         if (m_usePreHTML5ParserQuirks && token.selfClosing())
2769             processFakeEndTag(scriptTag);
2770         return true;
2771     }
2772     if (token.name() == headTag) {
2773         parseError(token);
2774         return true;
2775     }
2776     return false;
2777 }
2778 
processGenericRCDATAStartTag(AtomicHTMLToken & token)2779 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2780 {
2781     ASSERT(token.type() == HTMLToken::StartTag);
2782     m_tree.insertHTMLElement(token);
2783     m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2784     m_originalInsertionMode = m_insertionMode;
2785     setInsertionMode(TextMode);
2786 }
2787 
processGenericRawTextStartTag(AtomicHTMLToken & token)2788 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2789 {
2790     ASSERT(token.type() == HTMLToken::StartTag);
2791     m_tree.insertHTMLElement(token);
2792     m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2793     m_originalInsertionMode = m_insertionMode;
2794     setInsertionMode(TextMode);
2795 }
2796 
processScriptStartTag(AtomicHTMLToken & token)2797 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2798 {
2799     ASSERT(token.type() == HTMLToken::StartTag);
2800     m_tree.insertScriptElement(token);
2801     m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2802     m_originalInsertionMode = m_insertionMode;
2803 
2804     TextPosition0 position = m_parser->textPosition();
2805 
2806     ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
2807 
2808     m_lastScriptElementStartPosition = position;
2809 
2810     setInsertionMode(TextMode);
2811 }
2812 
finished()2813 void HTMLTreeBuilder::finished()
2814 {
2815     if (isParsingFragment())
2816         return;
2817 
2818     ASSERT(m_document);
2819     // Warning, this may detach the parser. Do not do anything else after this.
2820     m_document->finishedParsing();
2821 }
2822 
parseError(AtomicHTMLToken &)2823 void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
2824 {
2825 }
2826 
scriptEnabled(Frame * frame)2827 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2828 {
2829     if (!frame)
2830         return false;
2831     return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2832 }
2833 
pluginsEnabled(Frame * frame)2834 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2835 {
2836     if (!frame)
2837         return false;
2838     return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2839 }
2840 
2841 }
2842