1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29
30 #include "Comment.h"
31 #include "DOMWindow.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLDocumentParser.h"
37 #include "HTMLElementFactory.h"
38 #include "HTMLFormElement.h"
39 #include "HTMLHtmlElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLParserIdioms.h"
42 #include "HTMLScriptElement.h"
43 #include "HTMLToken.h"
44 #include "HTMLTokenizer.h"
45 #include "LocalizedStrings.h"
46 #include "MathMLNames.h"
47 #include "NotImplemented.h"
48 #include "SVGNames.h"
49 #include "Text.h"
50 #include "XLinkNames.h"
51 #include "XMLNSNames.h"
52 #include "XMLNames.h"
53 #include <wtf/unicode/CharacterNames.h>
54
55 namespace WebCore {
56
57 using namespace HTMLNames;
58
59 static const int uninitializedLineNumberValue = -1;
60
uninitializedPositionValue1()61 static TextPosition1 uninitializedPositionValue1()
62 {
63 return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(-1), WTF::OneBasedNumber::base());
64 }
65
66 namespace {
67
isHTMLSpaceOrReplacementCharacter(UChar character)68 inline bool isHTMLSpaceOrReplacementCharacter(UChar character)
69 {
70 return isHTMLSpace(character) || character == replacementCharacter;
71 }
72
isAllWhitespace(const String & string)73 inline bool isAllWhitespace(const String& string)
74 {
75 return string.isAllSpecialCharacters<isHTMLSpace>();
76 }
77
isAllWhitespaceOrReplacementCharacters(const String & string)78 inline bool isAllWhitespaceOrReplacementCharacters(const String& string)
79 {
80 return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>();
81 }
82
isNumberedHeaderTag(const AtomicString & tagName)83 bool isNumberedHeaderTag(const AtomicString& tagName)
84 {
85 return tagName == h1Tag
86 || tagName == h2Tag
87 || tagName == h3Tag
88 || tagName == h4Tag
89 || tagName == h5Tag
90 || tagName == h6Tag;
91 }
92
isCaptionColOrColgroupTag(const AtomicString & tagName)93 bool isCaptionColOrColgroupTag(const AtomicString& tagName)
94 {
95 return tagName == captionTag
96 || tagName == colTag
97 || tagName == colgroupTag;
98 }
99
isTableCellContextTag(const AtomicString & tagName)100 bool isTableCellContextTag(const AtomicString& tagName)
101 {
102 return tagName == thTag || tagName == tdTag;
103 }
104
isTableBodyContextTag(const AtomicString & tagName)105 bool isTableBodyContextTag(const AtomicString& tagName)
106 {
107 return tagName == tbodyTag
108 || tagName == tfootTag
109 || tagName == theadTag;
110 }
111
112 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#special
isSpecialNode(Node * node)113 bool isSpecialNode(Node* node)
114 {
115 if (node->hasTagName(MathMLNames::miTag)
116 || node->hasTagName(MathMLNames::moTag)
117 || node->hasTagName(MathMLNames::mnTag)
118 || node->hasTagName(MathMLNames::msTag)
119 || node->hasTagName(MathMLNames::mtextTag)
120 || node->hasTagName(MathMLNames::annotation_xmlTag)
121 || node->hasTagName(SVGNames::foreignObjectTag)
122 || node->hasTagName(SVGNames::descTag)
123 || node->hasTagName(SVGNames::titleTag))
124 return true;
125 if (node->nodeType() == Node::DOCUMENT_FRAGMENT_NODE)
126 return true;
127 if (!isInHTMLNamespace(node))
128 return false;
129 const AtomicString& tagName = node->localName();
130 return tagName == addressTag
131 || tagName == appletTag
132 || tagName == areaTag
133 || tagName == articleTag
134 || tagName == asideTag
135 || tagName == baseTag
136 || tagName == basefontTag
137 || tagName == bgsoundTag
138 || tagName == blockquoteTag
139 || tagName == bodyTag
140 || tagName == brTag
141 || tagName == buttonTag
142 || tagName == captionTag
143 || tagName == centerTag
144 || tagName == colTag
145 || tagName == colgroupTag
146 || tagName == commandTag
147 || tagName == ddTag
148 || tagName == detailsTag
149 || tagName == dirTag
150 || tagName == divTag
151 || tagName == dlTag
152 || tagName == dtTag
153 || tagName == embedTag
154 || tagName == fieldsetTag
155 || tagName == figcaptionTag
156 || tagName == figureTag
157 || tagName == footerTag
158 || tagName == formTag
159 || tagName == frameTag
160 || tagName == framesetTag
161 || isNumberedHeaderTag(tagName)
162 || tagName == headTag
163 || tagName == headerTag
164 || tagName == hgroupTag
165 || tagName == hrTag
166 || tagName == htmlTag
167 || tagName == iframeTag
168 || tagName == imgTag
169 || tagName == inputTag
170 || tagName == isindexTag
171 || tagName == liTag
172 || tagName == linkTag
173 || tagName == listingTag
174 || tagName == marqueeTag
175 || tagName == menuTag
176 || tagName == metaTag
177 || tagName == navTag
178 || tagName == noembedTag
179 || tagName == noframesTag
180 || tagName == noscriptTag
181 || tagName == objectTag
182 || tagName == olTag
183 || tagName == pTag
184 || tagName == paramTag
185 || tagName == plaintextTag
186 || tagName == preTag
187 || tagName == scriptTag
188 || tagName == sectionTag
189 || tagName == selectTag
190 || tagName == styleTag
191 || tagName == summaryTag
192 || tagName == tableTag
193 || isTableBodyContextTag(tagName)
194 || tagName == tdTag
195 || tagName == textareaTag
196 || tagName == thTag
197 || tagName == titleTag
198 || tagName == trTag
199 || tagName == ulTag
200 || tagName == wbrTag
201 || tagName == xmpTag;
202 }
203
isNonAnchorNonNobrFormattingTag(const AtomicString & tagName)204 bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName)
205 {
206 return tagName == bTag
207 || tagName == bigTag
208 || tagName == codeTag
209 || tagName == emTag
210 || tagName == fontTag
211 || tagName == iTag
212 || tagName == sTag
213 || tagName == smallTag
214 || tagName == strikeTag
215 || tagName == strongTag
216 || tagName == ttTag
217 || tagName == uTag;
218 }
219
isNonAnchorFormattingTag(const AtomicString & tagName)220 bool isNonAnchorFormattingTag(const AtomicString& tagName)
221 {
222 return tagName == nobrTag
223 || isNonAnchorNonNobrFormattingTag(tagName);
224 }
225
226 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
isFormattingTag(const AtomicString & tagName)227 bool isFormattingTag(const AtomicString& tagName)
228 {
229 return tagName == aTag || isNonAnchorFormattingTag(tagName);
230 }
231
closestFormAncestor(Element * element)232 HTMLFormElement* closestFormAncestor(Element* element)
233 {
234 while (element) {
235 if (element->hasTagName(formTag))
236 return static_cast<HTMLFormElement*>(element);
237 ContainerNode* parent = element->parentNode();
238 if (!parent || !parent->isElementNode())
239 return 0;
240 element = static_cast<Element*>(parent);
241 }
242 return 0;
243 }
244
245 } // namespace
246
247 class HTMLTreeBuilder::ExternalCharacterTokenBuffer {
248 WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer);
249 public:
ExternalCharacterTokenBuffer(AtomicHTMLToken & token)250 explicit ExternalCharacterTokenBuffer(AtomicHTMLToken& token)
251 : m_current(token.characters().data())
252 , m_end(m_current + token.characters().size())
253 {
254 ASSERT(!isEmpty());
255 }
256
ExternalCharacterTokenBuffer(const String & string)257 explicit ExternalCharacterTokenBuffer(const String& string)
258 : m_current(string.characters())
259 , m_end(m_current + string.length())
260 {
261 ASSERT(!isEmpty());
262 }
263
~ExternalCharacterTokenBuffer()264 ~ExternalCharacterTokenBuffer()
265 {
266 ASSERT(isEmpty());
267 }
268
isEmpty() const269 bool isEmpty() const { return m_current == m_end; }
270
skipLeadingWhitespace()271 void skipLeadingWhitespace()
272 {
273 skipLeading<isHTMLSpace>();
274 }
275
takeLeadingWhitespace()276 String takeLeadingWhitespace()
277 {
278 return takeLeading<isHTMLSpace>();
279 }
280
takeLeadingNonWhitespace()281 String takeLeadingNonWhitespace()
282 {
283 return takeLeading<isNotHTMLSpace>();
284 }
285
takeRemaining()286 String takeRemaining()
287 {
288 ASSERT(!isEmpty());
289 const UChar* start = m_current;
290 m_current = m_end;
291 return String(start, m_current - start);
292 }
293
giveRemainingTo(Vector<UChar> & recipient)294 void giveRemainingTo(Vector<UChar>& recipient)
295 {
296 recipient.append(m_current, m_end - m_current);
297 m_current = m_end;
298 }
299
takeRemainingWhitespace()300 String takeRemainingWhitespace()
301 {
302 ASSERT(!isEmpty());
303 Vector<UChar> whitespace;
304 do {
305 UChar cc = *m_current++;
306 if (isHTMLSpace(cc))
307 whitespace.append(cc);
308 } while (m_current < m_end);
309 // Returning the null string when there aren't any whitespace
310 // characters is slightly cleaner semantically because we don't want
311 // to insert a text node (as opposed to inserting an empty text node).
312 if (whitespace.isEmpty())
313 return String();
314 return String::adopt(whitespace);
315 }
316
317 private:
318 template<bool characterPredicate(UChar)>
skipLeading()319 void skipLeading()
320 {
321 ASSERT(!isEmpty());
322 while (characterPredicate(*m_current)) {
323 if (++m_current == m_end)
324 return;
325 }
326 }
327
328 template<bool characterPredicate(UChar)>
takeLeading()329 String takeLeading()
330 {
331 ASSERT(!isEmpty());
332 const UChar* start = m_current;
333 skipLeading<characterPredicate>();
334 if (start == m_current)
335 return String();
336 return String(start, m_current - start);
337 }
338
339 const UChar* m_current;
340 const UChar* m_end;
341 };
342
343
HTMLTreeBuilder(HTMLDocumentParser * parser,HTMLDocument * document,bool reportErrors,bool usePreHTML5ParserQuirks)344 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
345 : m_framesetOk(true)
346 , m_document(document)
347 , m_tree(document)
348 , m_reportErrors(reportErrors)
349 , m_isPaused(false)
350 , m_insertionMode(InitialMode)
351 , m_originalInsertionMode(InitialMode)
352 , m_parser(parser)
353 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
354 , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
355 , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
356 , m_hasPendingForeignInsertionModeSteps(false)
357 {
358 }
359
360 // FIXME: Member variables should be grouped into self-initializing structs to
361 // minimize code duplication between these constructors.
HTMLTreeBuilder(HTMLDocumentParser * parser,DocumentFragment * fragment,Element * contextElement,FragmentScriptingPermission scriptingPermission,bool usePreHTML5ParserQuirks)362 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
363 : m_framesetOk(true)
364 , m_fragmentContext(fragment, contextElement, scriptingPermission)
365 , m_document(fragment->document())
366 , m_tree(fragment, scriptingPermission)
367 , m_reportErrors(false) // FIXME: Why not report errors in fragments?
368 , m_isPaused(false)
369 , m_insertionMode(InitialMode)
370 , m_originalInsertionMode(InitialMode)
371 , m_parser(parser)
372 , m_scriptToProcessStartPosition(uninitializedPositionValue1())
373 , m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
374 , m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
375 , m_hasPendingForeignInsertionModeSteps(false)
376 {
377 if (contextElement) {
378 // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
379 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
380 // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes")
381 // and instead use the DocumentFragment as a root node.
382 m_tree.openElements()->pushRootNode(fragment);
383 resetInsertionModeAppropriately();
384 m_tree.setForm(closestFormAncestor(contextElement));
385 }
386 }
387
~HTMLTreeBuilder()388 HTMLTreeBuilder::~HTMLTreeBuilder()
389 {
390 }
391
detach()392 void HTMLTreeBuilder::detach()
393 {
394 // This call makes little sense in fragment mode, but for consistency
395 // DocumentParser expects detach() to always be called before it's destroyed.
396 m_document = 0;
397 // HTMLConstructionSite might be on the callstack when detach() is called
398 // otherwise we'd just call m_tree.clear() here instead.
399 m_tree.detach();
400 }
401
FragmentParsingContext()402 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext()
403 : m_fragment(0)
404 , m_contextElement(0)
405 , m_scriptingPermission(FragmentScriptingAllowed)
406 {
407 }
408
FragmentParsingContext(DocumentFragment * fragment,Element * contextElement,FragmentScriptingPermission scriptingPermission)409 HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
410 : m_fragment(fragment)
411 , m_contextElement(contextElement)
412 , m_scriptingPermission(scriptingPermission)
413 {
414 ASSERT(!fragment->hasChildNodes());
415 }
416
~FragmentParsingContext()417 HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext()
418 {
419 }
420
takeScriptToProcess(TextPosition1 & scriptStartPosition)421 PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition1& scriptStartPosition)
422 {
423 // Unpause ourselves, callers may pause us again when processing the script.
424 // The HTML5 spec is written as though scripts are executed inside the tree
425 // builder. We pause the parser to exit the tree builder, and then resume
426 // before running scripts.
427 m_isPaused = false;
428 scriptStartPosition = m_scriptToProcessStartPosition;
429 m_scriptToProcessStartPosition = uninitializedPositionValue1();
430 return m_scriptToProcess.release();
431 }
432
constructTreeFromToken(HTMLToken & rawToken)433 void HTMLTreeBuilder::constructTreeFromToken(HTMLToken& rawToken)
434 {
435 AtomicHTMLToken token(rawToken);
436
437 // We clear the rawToken in case constructTreeFromAtomicToken
438 // synchronously re-enters the parser. We don't clear the token immedately
439 // for Character tokens because the AtomicHTMLToken avoids copying the
440 // characters by keeping a pointer to the underlying buffer in the
441 // HTMLToken. Fortuantely, Character tokens can't cause use to re-enter
442 // the parser.
443 //
444 // FIXME: Top clearing the rawToken once we start running the parser off
445 // the main thread or once we stop allowing synchronous JavaScript
446 // execution from parseMappedAttribute.
447 if (rawToken.type() != HTMLToken::Character)
448 rawToken.clear();
449
450 constructTreeFromAtomicToken(token);
451
452 if (!rawToken.isUninitialized()) {
453 ASSERT(rawToken.type() == HTMLToken::Character);
454 rawToken.clear();
455 }
456 }
457
constructTreeFromAtomicToken(AtomicHTMLToken & token)458 void HTMLTreeBuilder::constructTreeFromAtomicToken(AtomicHTMLToken& token)
459 {
460 processToken(token);
461
462 // Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
463 // the U+0000 characters into replacement characters has compatibility
464 // problems.
465 m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
466 m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && !isInHTMLNamespace(m_tree.currentNode()));
467 }
468
processToken(AtomicHTMLToken & token)469 void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
470 {
471 switch (token.type()) {
472 case HTMLToken::Uninitialized:
473 ASSERT_NOT_REACHED();
474 break;
475 case HTMLToken::DOCTYPE:
476 processDoctypeToken(token);
477 break;
478 case HTMLToken::StartTag:
479 processStartTag(token);
480 break;
481 case HTMLToken::EndTag:
482 processEndTag(token);
483 break;
484 case HTMLToken::Comment:
485 processComment(token);
486 return;
487 case HTMLToken::Character:
488 processCharacter(token);
489 break;
490 case HTMLToken::EndOfFile:
491 processEndOfFile(token);
492 break;
493 }
494 }
495
processDoctypeToken(AtomicHTMLToken & token)496 void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken& token)
497 {
498 ASSERT(token.type() == HTMLToken::DOCTYPE);
499 if (m_insertionMode == InitialMode) {
500 m_tree.insertDoctype(token);
501 setInsertionMode(BeforeHTMLMode);
502 return;
503 }
504 if (m_insertionMode == InTableTextMode) {
505 defaultForInTableText();
506 processDoctypeToken(token);
507 return;
508 }
509 parseError(token);
510 }
511
processFakeStartTag(const QualifiedName & tagName,PassRefPtr<NamedNodeMap> attributes)512 void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, PassRefPtr<NamedNodeMap> attributes)
513 {
514 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
515 AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes);
516 processStartTag(fakeToken);
517 }
518
processFakeEndTag(const QualifiedName & tagName)519 void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName)
520 {
521 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags.
522 AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName.localName());
523 processEndTag(fakeToken);
524 }
525
processFakeCharacters(const String & characters)526 void HTMLTreeBuilder::processFakeCharacters(const String& characters)
527 {
528 ASSERT(!characters.isEmpty());
529 ExternalCharacterTokenBuffer buffer(characters);
530 processCharacterBuffer(buffer);
531 }
532
processFakePEndTagIfPInButtonScope()533 void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope()
534 {
535 if (!m_tree.openElements()->inButtonScope(pTag.localName()))
536 return;
537 AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName());
538 processEndTag(endP);
539 }
540
attributesForIsindexInput(AtomicHTMLToken & token)541 PassRefPtr<NamedNodeMap> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken& token)
542 {
543 RefPtr<NamedNodeMap> attributes = token.takeAtributes();
544 if (!attributes)
545 attributes = NamedNodeMap::create();
546 else {
547 attributes->removeAttribute(nameAttr);
548 attributes->removeAttribute(actionAttr);
549 attributes->removeAttribute(promptAttr);
550 }
551
552 RefPtr<Attribute> mappedAttribute = Attribute::createMapped(nameAttr, isindexTag.localName());
553 attributes->insertAttribute(mappedAttribute.release(), false);
554 return attributes.release();
555 }
556
processIsindexStartTagForInBody(AtomicHTMLToken & token)557 void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken& token)
558 {
559 ASSERT(token.type() == HTMLToken::StartTag);
560 ASSERT(token.name() == isindexTag);
561 parseError(token);
562 if (m_tree.form())
563 return;
564 notImplemented(); // Acknowledge self-closing flag
565 processFakeStartTag(formTag);
566 RefPtr<Attribute> actionAttribute = token.getAttributeItem(actionAttr);
567 if (actionAttribute) {
568 ASSERT(m_tree.currentElement()->hasTagName(formTag));
569 m_tree.currentElement()->setAttribute(actionAttr, actionAttribute->value());
570 }
571 processFakeStartTag(hrTag);
572 processFakeStartTag(labelTag);
573 RefPtr<Attribute> promptAttribute = token.getAttributeItem(promptAttr);
574 if (promptAttribute)
575 processFakeCharacters(promptAttribute->value());
576 else
577 processFakeCharacters(searchableIndexIntroduction());
578 processFakeStartTag(inputTag, attributesForIsindexInput(token));
579 notImplemented(); // This second set of characters may be needed by non-english locales.
580 processFakeEndTag(labelTag);
581 processFakeStartTag(hrTag);
582 processFakeEndTag(formTag);
583 }
584
585 namespace {
586
isLi(const ContainerNode * element)587 bool isLi(const ContainerNode* element)
588 {
589 return element->hasTagName(liTag);
590 }
591
isDdOrDt(const ContainerNode * element)592 bool isDdOrDt(const ContainerNode* element)
593 {
594 return element->hasTagName(ddTag)
595 || element->hasTagName(dtTag);
596 }
597
598 }
599
600 template <bool shouldClose(const ContainerNode*)>
processCloseWhenNestedTag(AtomicHTMLToken & token)601 void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken& token)
602 {
603 m_framesetOk = false;
604 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
605 while (1) {
606 RefPtr<ContainerNode> node = nodeRecord->node();
607 if (shouldClose(node.get())) {
608 ASSERT(node->isElementNode());
609 processFakeEndTag(toElement(node.get())->tagQName());
610 break;
611 }
612 if (isSpecialNode(node.get()) && !node->hasTagName(addressTag) && !node->hasTagName(divTag) && !node->hasTagName(pTag))
613 break;
614 nodeRecord = nodeRecord->next();
615 }
616 processFakePEndTagIfPInButtonScope();
617 m_tree.insertHTMLElement(token);
618 }
619
620 namespace {
621
622 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
623
mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap * map,QualifiedName ** names,size_t length)624 void mapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map, QualifiedName** names, size_t length)
625 {
626 for (size_t i = 0; i < length; ++i) {
627 const QualifiedName& name = *names[i];
628 const AtomicString& localName = name.localName();
629 AtomicString loweredLocalName = localName.lower();
630 if (loweredLocalName != localName)
631 map->add(loweredLocalName, name);
632 }
633 }
634
adjustSVGTagNameCase(AtomicHTMLToken & token)635 void adjustSVGTagNameCase(AtomicHTMLToken& token)
636 {
637 static PrefixedNameToQualifiedNameMap* caseMap = 0;
638 if (!caseMap) {
639 caseMap = new PrefixedNameToQualifiedNameMap;
640 size_t length = 0;
641 QualifiedName** svgTags = SVGNames::getSVGTags(&length);
642 mapLoweredLocalNameToName(caseMap, svgTags, length);
643 }
644
645 const QualifiedName& casedName = caseMap->get(token.name());
646 if (casedName.localName().isNull())
647 return;
648 token.setName(casedName.localName());
649 }
650
651 template<QualifiedName** getAttrs(size_t* length)>
adjustAttributes(AtomicHTMLToken & token)652 void adjustAttributes(AtomicHTMLToken& token)
653 {
654 static PrefixedNameToQualifiedNameMap* caseMap = 0;
655 if (!caseMap) {
656 caseMap = new PrefixedNameToQualifiedNameMap;
657 size_t length = 0;
658 QualifiedName** attrs = getAttrs(&length);
659 mapLoweredLocalNameToName(caseMap, attrs, length);
660 }
661
662 NamedNodeMap* attributes = token.attributes();
663 if (!attributes)
664 return;
665
666 for (unsigned x = 0; x < attributes->length(); ++x) {
667 Attribute* attribute = attributes->attributeItem(x);
668 const QualifiedName& casedName = caseMap->get(attribute->localName());
669 if (!casedName.localName().isNull())
670 attribute->parserSetName(casedName);
671 }
672 }
673
adjustSVGAttributes(AtomicHTMLToken & token)674 void adjustSVGAttributes(AtomicHTMLToken& token)
675 {
676 adjustAttributes<SVGNames::getSVGAttrs>(token);
677 }
678
adjustMathMLAttributes(AtomicHTMLToken & token)679 void adjustMathMLAttributes(AtomicHTMLToken& token)
680 {
681 adjustAttributes<MathMLNames::getMathMLAttrs>(token);
682 }
683
addNamesWithPrefix(PrefixedNameToQualifiedNameMap * map,const AtomicString & prefix,QualifiedName ** names,size_t length)684 void addNamesWithPrefix(PrefixedNameToQualifiedNameMap* map, const AtomicString& prefix, QualifiedName** names, size_t length)
685 {
686 for (size_t i = 0; i < length; ++i) {
687 QualifiedName* name = names[i];
688 const AtomicString& localName = name->localName();
689 AtomicString prefixColonLocalName(prefix + ":" + localName);
690 QualifiedName nameWithPrefix(prefix, localName, name->namespaceURI());
691 map->add(prefixColonLocalName, nameWithPrefix);
692 }
693 }
694
adjustForeignAttributes(AtomicHTMLToken & token)695 void adjustForeignAttributes(AtomicHTMLToken& token)
696 {
697 static PrefixedNameToQualifiedNameMap* map = 0;
698 if (!map) {
699 map = new PrefixedNameToQualifiedNameMap;
700 size_t length = 0;
701 QualifiedName** attrs = XLinkNames::getXLinkAttrs(&length);
702 addNamesWithPrefix(map, "xlink", attrs, length);
703
704 attrs = XMLNames::getXMLAttrs(&length);
705 addNamesWithPrefix(map, "xml", attrs, length);
706
707 map->add("xmlns", XMLNSNames::xmlnsAttr);
708 map->add("xmlns:xlink", QualifiedName("xmlns", "xlink", XMLNSNames::xmlnsNamespaceURI));
709 }
710
711 NamedNodeMap* attributes = token.attributes();
712 if (!attributes)
713 return;
714
715 for (unsigned x = 0; x < attributes->length(); ++x) {
716 Attribute* attribute = attributes->attributeItem(x);
717 const QualifiedName& name = map->get(attribute->localName());
718 if (!name.localName().isNull())
719 attribute->parserSetName(name);
720 }
721 }
722
723 }
724
processStartTagForInBody(AtomicHTMLToken & token)725 void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken& token)
726 {
727 ASSERT(token.type() == HTMLToken::StartTag);
728 if (token.name() == htmlTag) {
729 m_tree.insertHTMLHtmlStartTagInBody(token);
730 return;
731 }
732 if (token.name() == baseTag
733 || token.name() == basefontTag
734 || token.name() == bgsoundTag
735 || token.name() == commandTag
736 || token.name() == linkTag
737 || token.name() == metaTag
738 || token.name() == noframesTag
739 || token.name() == scriptTag
740 || token.name() == styleTag
741 || token.name() == titleTag) {
742 bool didProcess = processStartTagForInHead(token);
743 ASSERT_UNUSED(didProcess, didProcess);
744 return;
745 }
746 if (token.name() == bodyTag) {
747 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
748 ASSERT(isParsingFragment());
749 return;
750 }
751 m_tree.insertHTMLBodyStartTagInBody(token);
752 return;
753 }
754 if (token.name() == framesetTag) {
755 parseError(token);
756 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) {
757 ASSERT(isParsingFragment());
758 return;
759 }
760 if (!m_framesetOk)
761 return;
762 ExceptionCode ec = 0;
763 m_tree.openElements()->bodyElement()->remove(ec);
764 ASSERT(!ec);
765 m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement());
766 m_tree.openElements()->popHTMLBodyElement();
767 ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement());
768 m_tree.insertHTMLElement(token);
769 setInsertionMode(InFramesetMode);
770 return;
771 }
772 if (token.name() == addressTag
773 || token.name() == articleTag
774 || token.name() == asideTag
775 || token.name() == blockquoteTag
776 || token.name() == centerTag
777 || token.name() == detailsTag
778 || token.name() == dirTag
779 || token.name() == divTag
780 || token.name() == dlTag
781 || token.name() == fieldsetTag
782 || token.name() == figcaptionTag
783 || token.name() == figureTag
784 || token.name() == footerTag
785 || token.name() == headerTag
786 || token.name() == hgroupTag
787 || token.name() == menuTag
788 || token.name() == navTag
789 || token.name() == olTag
790 || token.name() == pTag
791 || token.name() == sectionTag
792 || token.name() == summaryTag
793 || token.name() == ulTag) {
794 processFakePEndTagIfPInButtonScope();
795 m_tree.insertHTMLElement(token);
796 return;
797 }
798 if (isNumberedHeaderTag(token.name())) {
799 processFakePEndTagIfPInButtonScope();
800 if (isNumberedHeaderTag(m_tree.currentNode()->localName())) {
801 parseError(token);
802 m_tree.openElements()->pop();
803 }
804 m_tree.insertHTMLElement(token);
805 return;
806 }
807 if (token.name() == preTag || token.name() == listingTag) {
808 processFakePEndTagIfPInButtonScope();
809 m_tree.insertHTMLElement(token);
810 m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
811 m_framesetOk = false;
812 return;
813 }
814 if (token.name() == formTag) {
815 if (m_tree.form()) {
816 parseError(token);
817 return;
818 }
819 processFakePEndTagIfPInButtonScope();
820 m_tree.insertHTMLFormElement(token);
821 return;
822 }
823 if (token.name() == liTag) {
824 processCloseWhenNestedTag<isLi>(token);
825 return;
826 }
827 if (token.name() == ddTag || token.name() == dtTag) {
828 processCloseWhenNestedTag<isDdOrDt>(token);
829 return;
830 }
831 if (token.name() == plaintextTag) {
832 processFakePEndTagIfPInButtonScope();
833 m_tree.insertHTMLElement(token);
834 m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
835 return;
836 }
837 if (token.name() == buttonTag) {
838 if (m_tree.openElements()->inScope(buttonTag)) {
839 parseError(token);
840 processFakeEndTag(buttonTag);
841 reprocessStartTag(token); // FIXME: Could we just fall through here?
842 return;
843 }
844 m_tree.reconstructTheActiveFormattingElements();
845 m_tree.insertHTMLElement(token);
846 m_framesetOk = false;
847 return;
848 }
849 if (token.name() == aTag) {
850 Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName());
851 if (activeATag) {
852 parseError(token);
853 processFakeEndTag(aTag);
854 m_tree.activeFormattingElements()->remove(activeATag);
855 if (m_tree.openElements()->contains(activeATag))
856 m_tree.openElements()->remove(activeATag);
857 }
858 m_tree.reconstructTheActiveFormattingElements();
859 m_tree.insertFormattingElement(token);
860 return;
861 }
862 if (isNonAnchorNonNobrFormattingTag(token.name())) {
863 m_tree.reconstructTheActiveFormattingElements();
864 m_tree.insertFormattingElement(token);
865 return;
866 }
867 if (token.name() == nobrTag) {
868 m_tree.reconstructTheActiveFormattingElements();
869 if (m_tree.openElements()->inScope(nobrTag)) {
870 parseError(token);
871 processFakeEndTag(nobrTag);
872 m_tree.reconstructTheActiveFormattingElements();
873 }
874 m_tree.insertFormattingElement(token);
875 return;
876 }
877 if (token.name() == appletTag
878 || token.name() == marqueeTag
879 || token.name() == objectTag) {
880 m_tree.reconstructTheActiveFormattingElements();
881 m_tree.insertHTMLElement(token);
882 m_tree.activeFormattingElements()->appendMarker();
883 m_framesetOk = false;
884 return;
885 }
886 if (token.name() == tableTag) {
887 if (!m_document->inQuirksMode() && m_tree.openElements()->inButtonScope(pTag))
888 processFakeEndTag(pTag);
889 m_tree.insertHTMLElement(token);
890 m_framesetOk = false;
891 setInsertionMode(InTableMode);
892 return;
893 }
894 if (token.name() == imageTag) {
895 parseError(token);
896 // Apparently we're not supposed to ask.
897 token.setName(imgTag.localName());
898 prepareToReprocessToken();
899 // Note the fall through to the imgTag handling below!
900 }
901 if (token.name() == areaTag
902 || token.name() == brTag
903 || token.name() == embedTag
904 || token.name() == imgTag
905 || token.name() == keygenTag
906 || token.name() == wbrTag) {
907 m_tree.reconstructTheActiveFormattingElements();
908 m_tree.insertSelfClosingHTMLElement(token);
909 m_framesetOk = false;
910 return;
911 }
912 if (token.name() == inputTag) {
913 RefPtr<Attribute> typeAttribute = token.getAttributeItem(typeAttr);
914 m_tree.reconstructTheActiveFormattingElements();
915 m_tree.insertSelfClosingHTMLElement(token);
916 if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden"))
917 m_framesetOk = false;
918 return;
919 }
920 if (token.name() == paramTag
921 || token.name() == sourceTag
922 || token.name() == trackTag) {
923 m_tree.insertSelfClosingHTMLElement(token);
924 return;
925 }
926 if (token.name() == hrTag) {
927 processFakePEndTagIfPInButtonScope();
928 m_tree.insertSelfClosingHTMLElement(token);
929 m_framesetOk = false;
930 return;
931 }
932 if (token.name() == isindexTag) {
933 processIsindexStartTagForInBody(token);
934 return;
935 }
936 if (token.name() == textareaTag) {
937 m_tree.insertHTMLElement(token);
938 m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
939 m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
940 m_originalInsertionMode = m_insertionMode;
941 m_framesetOk = false;
942 setInsertionMode(TextMode);
943 return;
944 }
945 if (token.name() == xmpTag) {
946 processFakePEndTagIfPInButtonScope();
947 m_tree.reconstructTheActiveFormattingElements();
948 m_framesetOk = false;
949 processGenericRawTextStartTag(token);
950 return;
951 }
952 if (token.name() == iframeTag) {
953 m_framesetOk = false;
954 processGenericRawTextStartTag(token);
955 return;
956 }
957 if (token.name() == noembedTag && pluginsEnabled(m_document->frame())) {
958 processGenericRawTextStartTag(token);
959 return;
960 }
961 if (token.name() == noscriptTag && scriptEnabled(m_document->frame())) {
962 processGenericRawTextStartTag(token);
963 return;
964 }
965 if (token.name() == selectTag) {
966 m_tree.reconstructTheActiveFormattingElements();
967 m_tree.insertHTMLElement(token);
968 m_framesetOk = false;
969 if (m_insertionMode == InTableMode
970 || m_insertionMode == InCaptionMode
971 || m_insertionMode == InColumnGroupMode
972 || m_insertionMode == InTableBodyMode
973 || m_insertionMode == InRowMode
974 || m_insertionMode == InCellMode)
975 setInsertionMode(InSelectInTableMode);
976 else
977 setInsertionMode(InSelectMode);
978 return;
979 }
980 if (token.name() == optgroupTag || token.name() == optionTag) {
981 if (m_tree.openElements()->inScope(optionTag.localName())) {
982 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
983 processEndTag(endOption);
984 }
985 m_tree.reconstructTheActiveFormattingElements();
986 m_tree.insertHTMLElement(token);
987 return;
988 }
989 if (token.name() == rpTag || token.name() == rtTag) {
990 if (m_tree.openElements()->inScope(rubyTag.localName())) {
991 m_tree.generateImpliedEndTags();
992 if (!m_tree.currentNode()->hasTagName(rubyTag)) {
993 parseError(token);
994 m_tree.openElements()->popUntil(rubyTag.localName());
995 }
996 }
997 m_tree.insertHTMLElement(token);
998 return;
999 }
1000 if (token.name() == MathMLNames::mathTag.localName()) {
1001 m_tree.reconstructTheActiveFormattingElements();
1002 adjustMathMLAttributes(token);
1003 adjustForeignAttributes(token);
1004 m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI);
1005 if (m_insertionMode != InForeignContentMode && !token.selfClosing())
1006 setInsertionMode(InForeignContentMode);
1007 return;
1008 }
1009 if (token.name() == SVGNames::svgTag.localName()) {
1010 m_tree.reconstructTheActiveFormattingElements();
1011 adjustSVGAttributes(token);
1012 adjustForeignAttributes(token);
1013 m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI);
1014 if (m_insertionMode != InForeignContentMode && !token.selfClosing())
1015 setInsertionMode(InForeignContentMode);
1016 return;
1017 }
1018 if (isCaptionColOrColgroupTag(token.name())
1019 || token.name() == frameTag
1020 || token.name() == headTag
1021 || isTableBodyContextTag(token.name())
1022 || isTableCellContextTag(token.name())
1023 || token.name() == trTag) {
1024 parseError(token);
1025 return;
1026 }
1027 m_tree.reconstructTheActiveFormattingElements();
1028 m_tree.insertHTMLElement(token);
1029 }
1030
processColgroupEndTagForInColumnGroup()1031 bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup()
1032 {
1033 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
1034 ASSERT(isParsingFragment());
1035 // FIXME: parse error
1036 return false;
1037 }
1038 m_tree.openElements()->pop();
1039 setInsertionMode(InTableMode);
1040 return true;
1041 }
1042
1043 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
closeTheCell()1044 void HTMLTreeBuilder::closeTheCell()
1045 {
1046 ASSERT(insertionMode() == InCellMode);
1047 if (m_tree.openElements()->inTableScope(tdTag)) {
1048 ASSERT(!m_tree.openElements()->inTableScope(thTag));
1049 processFakeEndTag(tdTag);
1050 return;
1051 }
1052 ASSERT(m_tree.openElements()->inTableScope(thTag));
1053 processFakeEndTag(thTag);
1054 ASSERT(insertionMode() == InRowMode);
1055 }
1056
processStartTagForInTable(AtomicHTMLToken & token)1057 void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken& token)
1058 {
1059 ASSERT(token.type() == HTMLToken::StartTag);
1060 if (token.name() == captionTag) {
1061 m_tree.openElements()->popUntilTableScopeMarker();
1062 m_tree.activeFormattingElements()->appendMarker();
1063 m_tree.insertHTMLElement(token);
1064 setInsertionMode(InCaptionMode);
1065 return;
1066 }
1067 if (token.name() == colgroupTag) {
1068 m_tree.openElements()->popUntilTableScopeMarker();
1069 m_tree.insertHTMLElement(token);
1070 setInsertionMode(InColumnGroupMode);
1071 return;
1072 }
1073 if (token.name() == colTag) {
1074 processFakeStartTag(colgroupTag);
1075 ASSERT(InColumnGroupMode);
1076 reprocessStartTag(token);
1077 return;
1078 }
1079 if (isTableBodyContextTag(token.name())) {
1080 m_tree.openElements()->popUntilTableScopeMarker();
1081 m_tree.insertHTMLElement(token);
1082 setInsertionMode(InTableBodyMode);
1083 return;
1084 }
1085 if (isTableCellContextTag(token.name())
1086 || token.name() == trTag) {
1087 processFakeStartTag(tbodyTag);
1088 ASSERT(insertionMode() == InTableBodyMode);
1089 reprocessStartTag(token);
1090 return;
1091 }
1092 if (token.name() == tableTag) {
1093 parseError(token);
1094 if (!processTableEndTagForInTable()) {
1095 ASSERT(isParsingFragment());
1096 return;
1097 }
1098 reprocessStartTag(token);
1099 return;
1100 }
1101 if (token.name() == styleTag || token.name() == scriptTag) {
1102 processStartTagForInHead(token);
1103 return;
1104 }
1105 if (token.name() == inputTag) {
1106 Attribute* typeAttribute = token.getAttributeItem(typeAttr);
1107 if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) {
1108 parseError(token);
1109 m_tree.insertSelfClosingHTMLElement(token);
1110 return;
1111 }
1112 // Fall through to "anything else" case.
1113 }
1114 if (token.name() == formTag) {
1115 parseError(token);
1116 if (m_tree.form())
1117 return;
1118 m_tree.insertHTMLFormElement(token, true);
1119 m_tree.openElements()->pop();
1120 return;
1121 }
1122 parseError(token);
1123 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
1124 processStartTagForInBody(token);
1125 }
1126
1127 namespace {
1128
shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken & token,ContainerNode * currentElement)1129 bool shouldProcessForeignContentUsingInBodyInsertionMode(AtomicHTMLToken& token, ContainerNode* currentElement)
1130 {
1131 ASSERT(token.type() == HTMLToken::StartTag);
1132 if (currentElement->hasTagName(MathMLNames::miTag)
1133 || currentElement->hasTagName(MathMLNames::moTag)
1134 || currentElement->hasTagName(MathMLNames::mnTag)
1135 || currentElement->hasTagName(MathMLNames::msTag)
1136 || currentElement->hasTagName(MathMLNames::mtextTag)) {
1137 return token.name() != MathMLNames::mglyphTag
1138 && token.name() != MathMLNames::malignmarkTag;
1139 }
1140 if (currentElement->hasTagName(MathMLNames::annotation_xmlTag))
1141 return token.name() == SVGNames::svgTag;
1142 if (currentElement->hasTagName(SVGNames::foreignObjectTag)
1143 || currentElement->hasTagName(SVGNames::descTag)
1144 || currentElement->hasTagName(SVGNames::titleTag))
1145 return true;
1146 return isInHTMLNamespace(currentElement);
1147 }
1148
1149 }
1150
processStartTag(AtomicHTMLToken & token)1151 void HTMLTreeBuilder::processStartTag(AtomicHTMLToken& token)
1152 {
1153 ASSERT(token.type() == HTMLToken::StartTag);
1154 switch (insertionMode()) {
1155 case InitialMode:
1156 ASSERT(insertionMode() == InitialMode);
1157 defaultForInitial();
1158 // Fall through.
1159 case BeforeHTMLMode:
1160 ASSERT(insertionMode() == BeforeHTMLMode);
1161 if (token.name() == htmlTag) {
1162 m_tree.insertHTMLHtmlStartTagBeforeHTML(token);
1163 setInsertionMode(BeforeHeadMode);
1164 return;
1165 }
1166 defaultForBeforeHTML();
1167 // Fall through.
1168 case BeforeHeadMode:
1169 ASSERT(insertionMode() == BeforeHeadMode);
1170 if (token.name() == htmlTag) {
1171 m_tree.insertHTMLHtmlStartTagInBody(token);
1172 return;
1173 }
1174 if (token.name() == headTag) {
1175 m_tree.insertHTMLHeadElement(token);
1176 setInsertionMode(InHeadMode);
1177 return;
1178 }
1179 defaultForBeforeHead();
1180 // Fall through.
1181 case InHeadMode:
1182 ASSERT(insertionMode() == InHeadMode);
1183 if (processStartTagForInHead(token))
1184 return;
1185 defaultForInHead();
1186 // Fall through.
1187 case AfterHeadMode:
1188 ASSERT(insertionMode() == AfterHeadMode);
1189 if (token.name() == htmlTag) {
1190 m_tree.insertHTMLHtmlStartTagInBody(token);
1191 return;
1192 }
1193 if (token.name() == bodyTag) {
1194 m_framesetOk = false;
1195 m_tree.insertHTMLBodyElement(token);
1196 setInsertionMode(InBodyMode);
1197 return;
1198 }
1199 if (token.name() == framesetTag) {
1200 m_tree.insertHTMLElement(token);
1201 setInsertionMode(InFramesetMode);
1202 return;
1203 }
1204 if (token.name() == baseTag
1205 || token.name() == basefontTag
1206 || token.name() == bgsoundTag
1207 || token.name() == linkTag
1208 || token.name() == metaTag
1209 || token.name() == noframesTag
1210 || token.name() == scriptTag
1211 || token.name() == styleTag
1212 || token.name() == titleTag) {
1213 parseError(token);
1214 ASSERT(m_tree.head());
1215 m_tree.openElements()->pushHTMLHeadElement(m_tree.head());
1216 processStartTagForInHead(token);
1217 m_tree.openElements()->removeHTMLHeadElement(m_tree.head());
1218 return;
1219 }
1220 if (token.name() == headTag) {
1221 parseError(token);
1222 return;
1223 }
1224 defaultForAfterHead();
1225 // Fall through
1226 case InBodyMode:
1227 ASSERT(insertionMode() == InBodyMode);
1228 processStartTagForInBody(token);
1229 break;
1230 case InTableMode:
1231 ASSERT(insertionMode() == InTableMode);
1232 processStartTagForInTable(token);
1233 break;
1234 case InCaptionMode:
1235 ASSERT(insertionMode() == InCaptionMode);
1236 if (isCaptionColOrColgroupTag(token.name())
1237 || isTableBodyContextTag(token.name())
1238 || isTableCellContextTag(token.name())
1239 || token.name() == trTag) {
1240 parseError(token);
1241 if (!processCaptionEndTagForInCaption()) {
1242 ASSERT(isParsingFragment());
1243 return;
1244 }
1245 reprocessStartTag(token);
1246 return;
1247 }
1248 processStartTagForInBody(token);
1249 break;
1250 case InColumnGroupMode:
1251 ASSERT(insertionMode() == InColumnGroupMode);
1252 if (token.name() == htmlTag) {
1253 m_tree.insertHTMLHtmlStartTagInBody(token);
1254 return;
1255 }
1256 if (token.name() == colTag) {
1257 m_tree.insertSelfClosingHTMLElement(token);
1258 return;
1259 }
1260 if (!processColgroupEndTagForInColumnGroup()) {
1261 ASSERT(isParsingFragment());
1262 return;
1263 }
1264 reprocessStartTag(token);
1265 break;
1266 case InTableBodyMode:
1267 ASSERT(insertionMode() == InTableBodyMode);
1268 if (token.name() == trTag) {
1269 m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop?
1270 m_tree.insertHTMLElement(token);
1271 setInsertionMode(InRowMode);
1272 return;
1273 }
1274 if (isTableCellContextTag(token.name())) {
1275 parseError(token);
1276 processFakeStartTag(trTag);
1277 ASSERT(insertionMode() == InRowMode);
1278 reprocessStartTag(token);
1279 return;
1280 }
1281 if (isCaptionColOrColgroupTag(token.name()) || isTableBodyContextTag(token.name())) {
1282 // FIXME: This is slow.
1283 if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1284 ASSERT(isParsingFragment());
1285 parseError(token);
1286 return;
1287 }
1288 m_tree.openElements()->popUntilTableBodyScopeMarker();
1289 ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1290 processFakeEndTag(m_tree.currentElement()->tagQName());
1291 reprocessStartTag(token);
1292 return;
1293 }
1294 processStartTagForInTable(token);
1295 break;
1296 case InRowMode:
1297 ASSERT(insertionMode() == InRowMode);
1298 if (isTableCellContextTag(token.name())) {
1299 m_tree.openElements()->popUntilTableRowScopeMarker();
1300 m_tree.insertHTMLElement(token);
1301 setInsertionMode(InCellMode);
1302 m_tree.activeFormattingElements()->appendMarker();
1303 return;
1304 }
1305 if (token.name() == trTag
1306 || isCaptionColOrColgroupTag(token.name())
1307 || isTableBodyContextTag(token.name())) {
1308 if (!processTrEndTagForInRow()) {
1309 ASSERT(isParsingFragment());
1310 return;
1311 }
1312 ASSERT(insertionMode() == InTableBodyMode);
1313 reprocessStartTag(token);
1314 return;
1315 }
1316 processStartTagForInTable(token);
1317 break;
1318 case InCellMode:
1319 ASSERT(insertionMode() == InCellMode);
1320 if (isCaptionColOrColgroupTag(token.name())
1321 || isTableCellContextTag(token.name())
1322 || token.name() == trTag
1323 || isTableBodyContextTag(token.name())) {
1324 // FIXME: This could be more efficient.
1325 if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) {
1326 ASSERT(isParsingFragment());
1327 parseError(token);
1328 return;
1329 }
1330 closeTheCell();
1331 reprocessStartTag(token);
1332 return;
1333 }
1334 processStartTagForInBody(token);
1335 break;
1336 case AfterBodyMode:
1337 case AfterAfterBodyMode:
1338 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
1339 if (token.name() == htmlTag) {
1340 m_tree.insertHTMLHtmlStartTagInBody(token);
1341 return;
1342 }
1343 setInsertionMode(InBodyMode);
1344 reprocessStartTag(token);
1345 break;
1346 case InHeadNoscriptMode:
1347 ASSERT(insertionMode() == InHeadNoscriptMode);
1348 if (token.name() == htmlTag) {
1349 m_tree.insertHTMLHtmlStartTagInBody(token);
1350 return;
1351 }
1352 if (token.name() == basefontTag
1353 || token.name() == bgsoundTag
1354 || token.name() == linkTag
1355 || token.name() == metaTag
1356 || token.name() == noframesTag
1357 || token.name() == styleTag) {
1358 bool didProcess = processStartTagForInHead(token);
1359 ASSERT_UNUSED(didProcess, didProcess);
1360 return;
1361 }
1362 if (token.name() == htmlTag || token.name() == noscriptTag) {
1363 parseError(token);
1364 return;
1365 }
1366 defaultForInHeadNoscript();
1367 processToken(token);
1368 break;
1369 case InFramesetMode:
1370 ASSERT(insertionMode() == InFramesetMode);
1371 if (token.name() == htmlTag) {
1372 m_tree.insertHTMLHtmlStartTagInBody(token);
1373 return;
1374 }
1375 if (token.name() == framesetTag) {
1376 m_tree.insertHTMLElement(token);
1377 return;
1378 }
1379 if (token.name() == frameTag) {
1380 m_tree.insertSelfClosingHTMLElement(token);
1381 return;
1382 }
1383 if (token.name() == noframesTag) {
1384 processStartTagForInHead(token);
1385 return;
1386 }
1387 parseError(token);
1388 break;
1389 case AfterFramesetMode:
1390 case AfterAfterFramesetMode:
1391 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
1392 if (token.name() == htmlTag) {
1393 m_tree.insertHTMLHtmlStartTagInBody(token);
1394 return;
1395 }
1396 if (token.name() == noframesTag) {
1397 processStartTagForInHead(token);
1398 return;
1399 }
1400 parseError(token);
1401 break;
1402 case InSelectInTableMode:
1403 ASSERT(insertionMode() == InSelectInTableMode);
1404 if (token.name() == captionTag
1405 || token.name() == tableTag
1406 || isTableBodyContextTag(token.name())
1407 || token.name() == trTag
1408 || isTableCellContextTag(token.name())) {
1409 parseError(token);
1410 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1411 processEndTag(endSelect);
1412 reprocessStartTag(token);
1413 return;
1414 }
1415 // Fall through
1416 case InSelectMode:
1417 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
1418 if (token.name() == htmlTag) {
1419 m_tree.insertHTMLHtmlStartTagInBody(token);
1420 return;
1421 }
1422 if (token.name() == optionTag) {
1423 if (m_tree.currentNode()->hasTagName(optionTag)) {
1424 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1425 processEndTag(endOption);
1426 }
1427 m_tree.insertHTMLElement(token);
1428 return;
1429 }
1430 if (token.name() == optgroupTag) {
1431 if (m_tree.currentNode()->hasTagName(optionTag)) {
1432 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName());
1433 processEndTag(endOption);
1434 }
1435 if (m_tree.currentNode()->hasTagName(optgroupTag)) {
1436 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName());
1437 processEndTag(endOptgroup);
1438 }
1439 m_tree.insertHTMLElement(token);
1440 return;
1441 }
1442 if (token.name() == selectTag) {
1443 parseError(token);
1444 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1445 processEndTag(endSelect);
1446 return;
1447 }
1448 if (token.name() == inputTag
1449 || token.name() == keygenTag
1450 || token.name() == textareaTag) {
1451 parseError(token);
1452 if (!m_tree.openElements()->inSelectScope(selectTag)) {
1453 ASSERT(isParsingFragment());
1454 return;
1455 }
1456 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
1457 processEndTag(endSelect);
1458 reprocessStartTag(token);
1459 return;
1460 }
1461 if (token.name() == scriptTag) {
1462 bool didProcess = processStartTagForInHead(token);
1463 ASSERT_UNUSED(didProcess, didProcess);
1464 return;
1465 }
1466 break;
1467 case InTableTextMode:
1468 defaultForInTableText();
1469 processStartTag(token);
1470 break;
1471 case InForeignContentMode: {
1472 if (shouldProcessForeignContentUsingInBodyInsertionMode(token, m_tree.currentNode())) {
1473 processForeignContentUsingInBodyModeAndResetMode(token);
1474 return;
1475 }
1476 if (token.name() == bTag
1477 || token.name() == bigTag
1478 || token.name() == blockquoteTag
1479 || token.name() == bodyTag
1480 || token.name() == brTag
1481 || token.name() == centerTag
1482 || token.name() == codeTag
1483 || token.name() == ddTag
1484 || token.name() == divTag
1485 || token.name() == dlTag
1486 || token.name() == dtTag
1487 || token.name() == emTag
1488 || token.name() == embedTag
1489 || isNumberedHeaderTag(token.name())
1490 || token.name() == headTag
1491 || token.name() == hrTag
1492 || token.name() == iTag
1493 || token.name() == imgTag
1494 || token.name() == liTag
1495 || token.name() == listingTag
1496 || token.name() == menuTag
1497 || token.name() == metaTag
1498 || token.name() == nobrTag
1499 || token.name() == olTag
1500 || token.name() == pTag
1501 || token.name() == preTag
1502 || token.name() == rubyTag
1503 || token.name() == sTag
1504 || token.name() == smallTag
1505 || token.name() == spanTag
1506 || token.name() == strongTag
1507 || token.name() == strikeTag
1508 || token.name() == subTag
1509 || token.name() == supTag
1510 || token.name() == tableTag
1511 || token.name() == ttTag
1512 || token.name() == uTag
1513 || token.name() == ulTag
1514 || token.name() == varTag
1515 || (token.name() == fontTag && (token.getAttributeItem(colorAttr) || token.getAttributeItem(faceAttr) || token.getAttributeItem(sizeAttr)))) {
1516 parseError(token);
1517 m_tree.openElements()->popUntilForeignContentScopeMarker();
1518 resetInsertionModeAppropriately();
1519 reprocessStartTag(token);
1520 return;
1521 }
1522 const AtomicString& currentNamespace = m_tree.currentElement()->namespaceURI();
1523 if (currentNamespace == MathMLNames::mathmlNamespaceURI)
1524 adjustMathMLAttributes(token);
1525 if (currentNamespace == SVGNames::svgNamespaceURI) {
1526 adjustSVGTagNameCase(token);
1527 adjustSVGAttributes(token);
1528 }
1529 adjustForeignAttributes(token);
1530 m_tree.insertForeignElement(token, currentNamespace);
1531 break;
1532 }
1533 case TextMode:
1534 ASSERT_NOT_REACHED();
1535 break;
1536 }
1537 }
1538
processBodyEndTagForInBody(AtomicHTMLToken & token)1539 bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken& token)
1540 {
1541 ASSERT(token.type() == HTMLToken::EndTag);
1542 ASSERT(token.name() == bodyTag);
1543 if (!m_tree.openElements()->inScope(bodyTag.localName())) {
1544 parseError(token);
1545 return false;
1546 }
1547 notImplemented(); // Emit a more specific parse error based on stack contents.
1548 setInsertionMode(AfterBodyMode);
1549 return true;
1550 }
1551
processAnyOtherEndTagForInBody(AtomicHTMLToken & token)1552 void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken& token)
1553 {
1554 ASSERT(token.type() == HTMLToken::EndTag);
1555 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1556 while (1) {
1557 RefPtr<ContainerNode> node = record->node();
1558 if (node->hasLocalName(token.name())) {
1559 m_tree.generateImpliedEndTags();
1560 // FIXME: The ElementRecord pointed to by record might be deleted by
1561 // the preceding call. Perhaps we should hold a RefPtr so that it
1562 // stays alive for the duration of record's scope.
1563 record = 0;
1564 if (!m_tree.currentNode()->hasLocalName(token.name())) {
1565 parseError(token);
1566 // FIXME: This is either a bug in the spec, or a bug in our
1567 // implementation. Filed a bug with HTML5:
1568 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10080
1569 // We might have already popped the node for the token in
1570 // generateImpliedEndTags, just abort.
1571 if (!m_tree.openElements()->contains(toElement(node.get())))
1572 return;
1573 }
1574 m_tree.openElements()->popUntilPopped(toElement(node.get()));
1575 return;
1576 }
1577 if (isSpecialNode(node.get())) {
1578 parseError(token);
1579 return;
1580 }
1581 record = record->next();
1582 }
1583 }
1584
1585 // FIXME: This probably belongs on HTMLElementStack.
furthestBlockForFormattingElement(Element * formattingElement)1586 HTMLElementStack::ElementRecord* HTMLTreeBuilder::furthestBlockForFormattingElement(Element* formattingElement)
1587 {
1588 HTMLElementStack::ElementRecord* furthestBlock = 0;
1589 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord();
1590 for (; record; record = record->next()) {
1591 if (record->element() == formattingElement)
1592 return furthestBlock;
1593 if (isSpecialNode(record->element()))
1594 furthestBlock = record;
1595 }
1596 ASSERT_NOT_REACHED();
1597 return 0;
1598 }
1599
1600 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
callTheAdoptionAgency(AtomicHTMLToken & token)1601 void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken& token)
1602 {
1603 // The adoption agency algorithm is N^2. We limit the number of iterations
1604 // to stop from hanging the whole browser. This limit is copied from the
1605 // legacy tree builder and might need to be tweaked in the future.
1606 static const int adoptionAgencyIterationLimit = 10;
1607
1608 for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1609 // 1.
1610 Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token.name());
1611 if (!formattingElement || ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement))) {
1612 parseError(token);
1613 notImplemented(); // Check the stack of open elements for a more specific parse error.
1614 return;
1615 }
1616 HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement);
1617 if (!formattingElementRecord) {
1618 parseError(token);
1619 m_tree.activeFormattingElements()->remove(formattingElement);
1620 return;
1621 }
1622 if (formattingElement != m_tree.currentElement())
1623 parseError(token);
1624 // 2.
1625 HTMLElementStack::ElementRecord* furthestBlock = furthestBlockForFormattingElement(formattingElement);
1626 // 3.
1627 if (!furthestBlock) {
1628 m_tree.openElements()->popUntilPopped(formattingElement);
1629 m_tree.activeFormattingElements()->remove(formattingElement);
1630 return;
1631 }
1632 // 4.
1633 ASSERT(furthestBlock->isAbove(formattingElementRecord));
1634 RefPtr<ContainerNode> commonAncestor = formattingElementRecord->next()->node();
1635 // 5.
1636 HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement);
1637 // 6.
1638 HTMLElementStack::ElementRecord* node = furthestBlock;
1639 HTMLElementStack::ElementRecord* nextNode = node->next();
1640 HTMLElementStack::ElementRecord* lastNode = furthestBlock;
1641 for (int i = 0; i < adoptionAgencyIterationLimit; ++i) {
1642 // 6.1
1643 node = nextNode;
1644 ASSERT(node);
1645 nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 6.2.
1646 // 6.2
1647 if (!m_tree.activeFormattingElements()->contains(node->element())) {
1648 m_tree.openElements()->remove(node->element());
1649 node = 0;
1650 continue;
1651 }
1652 // 6.3
1653 if (node == formattingElementRecord)
1654 break;
1655 // 6.5
1656 RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(node);
1657 HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element());
1658 nodeEntry->replaceElement(newElement.get());
1659 node->replaceElement(newElement.release());
1660 // 6.4 -- Intentionally out of order to handle the case where node
1661 // was replaced in 6.5.
1662 // http://www.w3.org/Bugs/Public/show_bug.cgi?id=10096
1663 if (lastNode == furthestBlock)
1664 bookmark.moveToAfter(nodeEntry);
1665 // 6.6
1666 if (Element* parent = lastNode->element()->parentElement())
1667 parent->parserRemoveChild(lastNode->element());
1668 node->element()->parserAddChild(lastNode->element());
1669 if (lastNode->element()->parentElement()->attached() && !lastNode->element()->attached())
1670 lastNode->element()->lazyAttach();
1671 // 6.7
1672 lastNode = node;
1673 }
1674 // 7
1675 const AtomicString& commonAncestorTag = commonAncestor->localName();
1676 if (Element* parent = lastNode->element()->parentElement())
1677 parent->parserRemoveChild(lastNode->element());
1678 // FIXME: If this moves to HTMLConstructionSite, this check should use
1679 // causesFosterParenting(tagName) instead.
1680 if (commonAncestorTag == tableTag
1681 || commonAncestorTag == trTag
1682 || isTableBodyContextTag(commonAncestorTag))
1683 m_tree.fosterParent(lastNode->element());
1684 else {
1685 commonAncestor->parserAddChild(lastNode->element());
1686 ASSERT(lastNode->node()->isElementNode());
1687 ASSERT(lastNode->element()->parentNode());
1688 if (lastNode->element()->parentNode()->attached() && !lastNode->element()->attached())
1689 lastNode->element()->lazyAttach();
1690 }
1691 // 8
1692 RefPtr<Element> newElement = m_tree.createHTMLElementFromElementRecord(formattingElementRecord);
1693 // 9
1694 newElement->takeAllChildrenFrom(furthestBlock->element());
1695 // 10
1696 Element* furthestBlockElement = furthestBlock->element();
1697 // FIXME: All this creation / parserAddChild / attach business should
1698 // be in HTMLConstructionSite. My guess is that steps 8--12
1699 // should all be in some HTMLConstructionSite function.
1700 furthestBlockElement->parserAddChild(newElement);
1701 if (furthestBlockElement->attached() && !newElement->attached()) {
1702 // Notice that newElement might already be attached if, for example, one of the reparented
1703 // children is a style element, which attaches itself automatically.
1704 newElement->attach();
1705 }
1706 // 11
1707 m_tree.activeFormattingElements()->swapTo(formattingElement, newElement.get(), bookmark);
1708 // 12
1709 m_tree.openElements()->remove(formattingElement);
1710 m_tree.openElements()->insertAbove(newElement, furthestBlock);
1711 }
1712 }
1713
resetInsertionModeAppropriately()1714 void HTMLTreeBuilder::resetInsertionModeAppropriately()
1715 {
1716 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1717 bool last = false;
1718 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
1719 while (1) {
1720 ContainerNode* node = nodeRecord->node();
1721 if (node == m_tree.openElements()->rootNode()) {
1722 ASSERT(isParsingFragment());
1723 last = true;
1724 node = m_fragmentContext.contextElement();
1725 }
1726 if (node->hasTagName(selectTag)) {
1727 ASSERT(isParsingFragment());
1728 return setInsertionMode(InSelectMode);
1729 }
1730 if (node->hasTagName(tdTag) || node->hasTagName(thTag))
1731 return setInsertionMode(InCellMode);
1732 if (node->hasTagName(trTag))
1733 return setInsertionMode(InRowMode);
1734 if (node->hasTagName(tbodyTag) || node->hasTagName(theadTag) || node->hasTagName(tfootTag))
1735 return setInsertionMode(InTableBodyMode);
1736 if (node->hasTagName(captionTag))
1737 return setInsertionMode(InCaptionMode);
1738 if (node->hasTagName(colgroupTag)) {
1739 ASSERT(isParsingFragment());
1740 return setInsertionMode(InColumnGroupMode);
1741 }
1742 if (node->hasTagName(tableTag))
1743 return setInsertionMode(InTableMode);
1744 if (node->hasTagName(headTag)) {
1745 ASSERT(isParsingFragment());
1746 return setInsertionMode(InBodyMode);
1747 }
1748 if (node->hasTagName(bodyTag))
1749 return setInsertionMode(InBodyMode);
1750 if (node->hasTagName(framesetTag)) {
1751 ASSERT(isParsingFragment());
1752 return setInsertionMode(InFramesetMode);
1753 }
1754 if (node->hasTagName(htmlTag)) {
1755 ASSERT(isParsingFragment());
1756 return setInsertionMode(BeforeHeadMode);
1757 }
1758 if (node->namespaceURI() == SVGNames::svgNamespaceURI
1759 || node->namespaceURI() == MathMLNames::mathmlNamespaceURI)
1760 return setInsertionMode(InForeignContentMode);
1761 if (last) {
1762 ASSERT(isParsingFragment());
1763 return setInsertionMode(InBodyMode);
1764 }
1765 nodeRecord = nodeRecord->next();
1766 }
1767 }
1768
processEndTagForInTableBody(AtomicHTMLToken & token)1769 void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken& token)
1770 {
1771 ASSERT(token.type() == HTMLToken::EndTag);
1772 if (isTableBodyContextTag(token.name())) {
1773 if (!m_tree.openElements()->inTableScope(token.name())) {
1774 parseError(token);
1775 return;
1776 }
1777 m_tree.openElements()->popUntilTableBodyScopeMarker();
1778 m_tree.openElements()->pop();
1779 setInsertionMode(InTableMode);
1780 return;
1781 }
1782 if (token.name() == tableTag) {
1783 // FIXME: This is slow.
1784 if (!m_tree.openElements()->inTableScope(tbodyTag.localName()) && !m_tree.openElements()->inTableScope(theadTag.localName()) && !m_tree.openElements()->inTableScope(tfootTag.localName())) {
1785 ASSERT(isParsingFragment());
1786 parseError(token);
1787 return;
1788 }
1789 m_tree.openElements()->popUntilTableBodyScopeMarker();
1790 ASSERT(isTableBodyContextTag(m_tree.currentElement()->localName()));
1791 processFakeEndTag(m_tree.currentElement()->tagQName());
1792 reprocessEndTag(token);
1793 return;
1794 }
1795 if (token.name() == bodyTag
1796 || isCaptionColOrColgroupTag(token.name())
1797 || token.name() == htmlTag
1798 || isTableCellContextTag(token.name())
1799 || token.name() == trTag) {
1800 parseError(token);
1801 return;
1802 }
1803 processEndTagForInTable(token);
1804 }
1805
processEndTagForInRow(AtomicHTMLToken & token)1806 void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken& token)
1807 {
1808 ASSERT(token.type() == HTMLToken::EndTag);
1809 if (token.name() == trTag) {
1810 processTrEndTagForInRow();
1811 return;
1812 }
1813 if (token.name() == tableTag) {
1814 if (!processTrEndTagForInRow()) {
1815 ASSERT(isParsingFragment());
1816 return;
1817 }
1818 ASSERT(insertionMode() == InTableBodyMode);
1819 reprocessEndTag(token);
1820 return;
1821 }
1822 if (isTableBodyContextTag(token.name())) {
1823 if (!m_tree.openElements()->inTableScope(token.name())) {
1824 parseError(token);
1825 return;
1826 }
1827 processFakeEndTag(trTag);
1828 ASSERT(insertionMode() == InTableBodyMode);
1829 reprocessEndTag(token);
1830 return;
1831 }
1832 if (token.name() == bodyTag
1833 || isCaptionColOrColgroupTag(token.name())
1834 || token.name() == htmlTag
1835 || isTableCellContextTag(token.name())) {
1836 parseError(token);
1837 return;
1838 }
1839 processEndTagForInTable(token);
1840 }
1841
processEndTagForInCell(AtomicHTMLToken & token)1842 void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken& token)
1843 {
1844 ASSERT(token.type() == HTMLToken::EndTag);
1845 if (isTableCellContextTag(token.name())) {
1846 if (!m_tree.openElements()->inTableScope(token.name())) {
1847 parseError(token);
1848 return;
1849 }
1850 m_tree.generateImpliedEndTags();
1851 if (!m_tree.currentNode()->hasLocalName(token.name()))
1852 parseError(token);
1853 m_tree.openElements()->popUntilPopped(token.name());
1854 m_tree.activeFormattingElements()->clearToLastMarker();
1855 setInsertionMode(InRowMode);
1856 return;
1857 }
1858 if (token.name() == bodyTag
1859 || isCaptionColOrColgroupTag(token.name())
1860 || token.name() == htmlTag) {
1861 parseError(token);
1862 return;
1863 }
1864 if (token.name() == tableTag
1865 || token.name() == trTag
1866 || isTableBodyContextTag(token.name())) {
1867 if (!m_tree.openElements()->inTableScope(token.name())) {
1868 ASSERT(isTableBodyContextTag(token.name()) || isParsingFragment());
1869 parseError(token);
1870 return;
1871 }
1872 closeTheCell();
1873 reprocessEndTag(token);
1874 return;
1875 }
1876 processEndTagForInBody(token);
1877 }
1878
processEndTagForInBody(AtomicHTMLToken & token)1879 void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken& token)
1880 {
1881 ASSERT(token.type() == HTMLToken::EndTag);
1882 if (token.name() == bodyTag) {
1883 processBodyEndTagForInBody(token);
1884 return;
1885 }
1886 if (token.name() == htmlTag) {
1887 AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName());
1888 if (processBodyEndTagForInBody(endBody))
1889 reprocessEndTag(token);
1890 return;
1891 }
1892 if (token.name() == addressTag
1893 || token.name() == articleTag
1894 || token.name() == asideTag
1895 || token.name() == blockquoteTag
1896 || token.name() == buttonTag
1897 || token.name() == centerTag
1898 || token.name() == detailsTag
1899 || token.name() == dirTag
1900 || token.name() == divTag
1901 || token.name() == dlTag
1902 || token.name() == fieldsetTag
1903 || token.name() == figcaptionTag
1904 || token.name() == figureTag
1905 || token.name() == footerTag
1906 || token.name() == headerTag
1907 || token.name() == hgroupTag
1908 || token.name() == listingTag
1909 || token.name() == menuTag
1910 || token.name() == navTag
1911 || token.name() == olTag
1912 || token.name() == preTag
1913 || token.name() == sectionTag
1914 || token.name() == summaryTag
1915 || token.name() == ulTag) {
1916 if (!m_tree.openElements()->inScope(token.name())) {
1917 parseError(token);
1918 return;
1919 }
1920 m_tree.generateImpliedEndTags();
1921 if (!m_tree.currentNode()->hasLocalName(token.name()))
1922 parseError(token);
1923 m_tree.openElements()->popUntilPopped(token.name());
1924 return;
1925 }
1926 if (token.name() == formTag) {
1927 RefPtr<Element> node = m_tree.takeForm();
1928 if (!node || !m_tree.openElements()->inScope(node.get())) {
1929 parseError(token);
1930 return;
1931 }
1932 m_tree.generateImpliedEndTags();
1933 if (m_tree.currentElement() != node.get())
1934 parseError(token);
1935 m_tree.openElements()->remove(node.get());
1936 }
1937 if (token.name() == pTag) {
1938 if (!m_tree.openElements()->inButtonScope(token.name())) {
1939 parseError(token);
1940 processFakeStartTag(pTag);
1941 ASSERT(m_tree.openElements()->inScope(token.name()));
1942 reprocessEndTag(token);
1943 return;
1944 }
1945 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1946 if (!m_tree.currentNode()->hasLocalName(token.name()))
1947 parseError(token);
1948 m_tree.openElements()->popUntilPopped(token.name());
1949 return;
1950 }
1951 if (token.name() == liTag) {
1952 if (!m_tree.openElements()->inListItemScope(token.name())) {
1953 parseError(token);
1954 return;
1955 }
1956 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1957 if (!m_tree.currentNode()->hasLocalName(token.name()))
1958 parseError(token);
1959 m_tree.openElements()->popUntilPopped(token.name());
1960 return;
1961 }
1962 if (token.name() == ddTag
1963 || token.name() == dtTag) {
1964 if (!m_tree.openElements()->inScope(token.name())) {
1965 parseError(token);
1966 return;
1967 }
1968 m_tree.generateImpliedEndTagsWithExclusion(token.name());
1969 if (!m_tree.currentNode()->hasLocalName(token.name()))
1970 parseError(token);
1971 m_tree.openElements()->popUntilPopped(token.name());
1972 return;
1973 }
1974 if (isNumberedHeaderTag(token.name())) {
1975 if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) {
1976 parseError(token);
1977 return;
1978 }
1979 m_tree.generateImpliedEndTags();
1980 if (!m_tree.currentNode()->hasLocalName(token.name()))
1981 parseError(token);
1982 m_tree.openElements()->popUntilNumberedHeaderElementPopped();
1983 return;
1984 }
1985 if (isFormattingTag(token.name())) {
1986 callTheAdoptionAgency(token);
1987 return;
1988 }
1989 if (token.name() == appletTag
1990 || token.name() == marqueeTag
1991 || token.name() == objectTag) {
1992 if (!m_tree.openElements()->inScope(token.name())) {
1993 parseError(token);
1994 return;
1995 }
1996 m_tree.generateImpliedEndTags();
1997 if (!m_tree.currentNode()->hasLocalName(token.name()))
1998 parseError(token);
1999 m_tree.openElements()->popUntilPopped(token.name());
2000 m_tree.activeFormattingElements()->clearToLastMarker();
2001 return;
2002 }
2003 if (token.name() == brTag) {
2004 parseError(token);
2005 processFakeStartTag(brTag);
2006 return;
2007 }
2008 processAnyOtherEndTagForInBody(token);
2009 }
2010
processCaptionEndTagForInCaption()2011 bool HTMLTreeBuilder::processCaptionEndTagForInCaption()
2012 {
2013 if (!m_tree.openElements()->inTableScope(captionTag.localName())) {
2014 ASSERT(isParsingFragment());
2015 // FIXME: parse error
2016 return false;
2017 }
2018 m_tree.generateImpliedEndTags();
2019 // FIXME: parse error if (!m_tree.currentElement()->hasTagName(captionTag))
2020 m_tree.openElements()->popUntilPopped(captionTag.localName());
2021 m_tree.activeFormattingElements()->clearToLastMarker();
2022 setInsertionMode(InTableMode);
2023 return true;
2024 }
2025
processTrEndTagForInRow()2026 bool HTMLTreeBuilder::processTrEndTagForInRow()
2027 {
2028 if (!m_tree.openElements()->inTableScope(trTag.localName())) {
2029 ASSERT(isParsingFragment());
2030 // FIXME: parse error
2031 return false;
2032 }
2033 m_tree.openElements()->popUntilTableRowScopeMarker();
2034 ASSERT(m_tree.currentElement()->hasTagName(trTag));
2035 m_tree.openElements()->pop();
2036 setInsertionMode(InTableBodyMode);
2037 return true;
2038 }
2039
processTableEndTagForInTable()2040 bool HTMLTreeBuilder::processTableEndTagForInTable()
2041 {
2042 if (!m_tree.openElements()->inTableScope(tableTag)) {
2043 ASSERT(isParsingFragment());
2044 // FIXME: parse error.
2045 return false;
2046 }
2047 m_tree.openElements()->popUntilPopped(tableTag.localName());
2048 resetInsertionModeAppropriately();
2049 return true;
2050 }
2051
processEndTagForInTable(AtomicHTMLToken & token)2052 void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken& token)
2053 {
2054 ASSERT(token.type() == HTMLToken::EndTag);
2055 if (token.name() == tableTag) {
2056 processTableEndTagForInTable();
2057 return;
2058 }
2059 if (token.name() == bodyTag
2060 || isCaptionColOrColgroupTag(token.name())
2061 || token.name() == htmlTag
2062 || isTableBodyContextTag(token.name())
2063 || isTableCellContextTag(token.name())
2064 || token.name() == trTag) {
2065 parseError(token);
2066 return;
2067 }
2068 // Is this redirection necessary here?
2069 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2070 processEndTagForInBody(token);
2071 }
2072
processEndTag(AtomicHTMLToken & token)2073 void HTMLTreeBuilder::processEndTag(AtomicHTMLToken& token)
2074 {
2075 ASSERT(token.type() == HTMLToken::EndTag);
2076 switch (insertionMode()) {
2077 case InitialMode:
2078 ASSERT(insertionMode() == InitialMode);
2079 defaultForInitial();
2080 // Fall through.
2081 case BeforeHTMLMode:
2082 ASSERT(insertionMode() == BeforeHTMLMode);
2083 if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2084 parseError(token);
2085 return;
2086 }
2087 defaultForBeforeHTML();
2088 // Fall through.
2089 case BeforeHeadMode:
2090 ASSERT(insertionMode() == BeforeHeadMode);
2091 if (token.name() != headTag && token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2092 parseError(token);
2093 return;
2094 }
2095 defaultForBeforeHead();
2096 // Fall through.
2097 case InHeadMode:
2098 ASSERT(insertionMode() == InHeadMode);
2099 if (token.name() == headTag) {
2100 m_tree.openElements()->popHTMLHeadElement();
2101 setInsertionMode(AfterHeadMode);
2102 return;
2103 }
2104 if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2105 parseError(token);
2106 return;
2107 }
2108 defaultForInHead();
2109 // Fall through.
2110 case AfterHeadMode:
2111 ASSERT(insertionMode() == AfterHeadMode);
2112 if (token.name() != bodyTag && token.name() != htmlTag && token.name() != brTag) {
2113 parseError(token);
2114 return;
2115 }
2116 defaultForAfterHead();
2117 // Fall through
2118 case InBodyMode:
2119 ASSERT(insertionMode() == InBodyMode);
2120 processEndTagForInBody(token);
2121 break;
2122 case InTableMode:
2123 ASSERT(insertionMode() == InTableMode);
2124 processEndTagForInTable(token);
2125 break;
2126 case InCaptionMode:
2127 ASSERT(insertionMode() == InCaptionMode);
2128 if (token.name() == captionTag) {
2129 processCaptionEndTagForInCaption();
2130 return;
2131 }
2132 if (token.name() == tableTag) {
2133 parseError(token);
2134 if (!processCaptionEndTagForInCaption()) {
2135 ASSERT(isParsingFragment());
2136 return;
2137 }
2138 reprocessEndTag(token);
2139 return;
2140 }
2141 if (token.name() == bodyTag
2142 || token.name() == colTag
2143 || token.name() == colgroupTag
2144 || token.name() == htmlTag
2145 || isTableBodyContextTag(token.name())
2146 || isTableCellContextTag(token.name())
2147 || token.name() == trTag) {
2148 parseError(token);
2149 return;
2150 }
2151 processEndTagForInBody(token);
2152 break;
2153 case InColumnGroupMode:
2154 ASSERT(insertionMode() == InColumnGroupMode);
2155 if (token.name() == colgroupTag) {
2156 processColgroupEndTagForInColumnGroup();
2157 return;
2158 }
2159 if (token.name() == colTag) {
2160 parseError(token);
2161 return;
2162 }
2163 if (!processColgroupEndTagForInColumnGroup()) {
2164 ASSERT(isParsingFragment());
2165 return;
2166 }
2167 reprocessEndTag(token);
2168 break;
2169 case InRowMode:
2170 ASSERT(insertionMode() == InRowMode);
2171 processEndTagForInRow(token);
2172 break;
2173 case InCellMode:
2174 ASSERT(insertionMode() == InCellMode);
2175 processEndTagForInCell(token);
2176 break;
2177 case InTableBodyMode:
2178 ASSERT(insertionMode() == InTableBodyMode);
2179 processEndTagForInTableBody(token);
2180 break;
2181 case AfterBodyMode:
2182 ASSERT(insertionMode() == AfterBodyMode);
2183 if (token.name() == htmlTag) {
2184 if (isParsingFragment()) {
2185 parseError(token);
2186 return;
2187 }
2188 setInsertionMode(AfterAfterBodyMode);
2189 return;
2190 }
2191 prepareToReprocessToken();
2192 // Fall through.
2193 case AfterAfterBodyMode:
2194 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2195 parseError(token);
2196 setInsertionMode(InBodyMode);
2197 reprocessEndTag(token);
2198 break;
2199 case InHeadNoscriptMode:
2200 ASSERT(insertionMode() == InHeadNoscriptMode);
2201 if (token.name() == noscriptTag) {
2202 ASSERT(m_tree.currentElement()->hasTagName(noscriptTag));
2203 m_tree.openElements()->pop();
2204 ASSERT(m_tree.currentElement()->hasTagName(headTag));
2205 setInsertionMode(InHeadMode);
2206 return;
2207 }
2208 if (token.name() != brTag) {
2209 parseError(token);
2210 return;
2211 }
2212 defaultForInHeadNoscript();
2213 processToken(token);
2214 break;
2215 case TextMode:
2216 if (token.name() == scriptTag) {
2217 // Pause ourselves so that parsing stops until the script can be processed by the caller.
2218 m_isPaused = true;
2219 ASSERT(m_tree.currentElement()->hasTagName(scriptTag));
2220 m_scriptToProcess = m_tree.currentElement();
2221 m_scriptToProcessStartPosition = WTF::toOneBasedTextPosition(m_lastScriptElementStartPosition);
2222 m_tree.openElements()->pop();
2223 if (isParsingFragment() && m_fragmentContext.scriptingPermission() == FragmentScriptingNotAllowed)
2224 m_scriptToProcess->removeAllChildren();
2225 setInsertionMode(m_originalInsertionMode);
2226
2227 // This token will not have been created by the tokenizer if a
2228 // self-closing script tag was encountered and pre-HTML5 parser
2229 // quirks are enabled. We must set the tokenizer's state to
2230 // DataState explicitly if the tokenizer didn't have a chance to.
2231 ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
2232 m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
2233 return;
2234 }
2235 m_tree.openElements()->pop();
2236 setInsertionMode(m_originalInsertionMode);
2237 break;
2238 case InFramesetMode:
2239 ASSERT(insertionMode() == InFramesetMode);
2240 if (token.name() == framesetTag) {
2241 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2242 parseError(token);
2243 return;
2244 }
2245 m_tree.openElements()->pop();
2246 if (!isParsingFragment() && !m_tree.currentElement()->hasTagName(framesetTag))
2247 setInsertionMode(AfterFramesetMode);
2248 return;
2249 }
2250 break;
2251 case AfterFramesetMode:
2252 ASSERT(insertionMode() == AfterFramesetMode);
2253 if (token.name() == htmlTag) {
2254 setInsertionMode(AfterAfterFramesetMode);
2255 return;
2256 }
2257 // Fall through.
2258 case AfterAfterFramesetMode:
2259 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2260 parseError(token);
2261 break;
2262 case InSelectInTableMode:
2263 ASSERT(insertionMode() == InSelectInTableMode);
2264 if (token.name() == captionTag
2265 || token.name() == tableTag
2266 || isTableBodyContextTag(token.name())
2267 || token.name() == trTag
2268 || isTableCellContextTag(token.name())) {
2269 parseError(token);
2270 if (m_tree.openElements()->inTableScope(token.name())) {
2271 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName());
2272 processEndTag(endSelect);
2273 reprocessEndTag(token);
2274 }
2275 return;
2276 }
2277 // Fall through.
2278 case InSelectMode:
2279 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2280 if (token.name() == optgroupTag) {
2281 if (m_tree.currentNode()->hasTagName(optionTag) && m_tree.oneBelowTop()->hasTagName(optgroupTag))
2282 processFakeEndTag(optionTag);
2283 if (m_tree.currentNode()->hasTagName(optgroupTag)) {
2284 m_tree.openElements()->pop();
2285 return;
2286 }
2287 parseError(token);
2288 return;
2289 }
2290 if (token.name() == optionTag) {
2291 if (m_tree.currentNode()->hasTagName(optionTag)) {
2292 m_tree.openElements()->pop();
2293 return;
2294 }
2295 parseError(token);
2296 return;
2297 }
2298 if (token.name() == selectTag) {
2299 if (!m_tree.openElements()->inSelectScope(token.name())) {
2300 ASSERT(isParsingFragment());
2301 parseError(token);
2302 return;
2303 }
2304 m_tree.openElements()->popUntilPopped(selectTag.localName());
2305 resetInsertionModeAppropriately();
2306 return;
2307 }
2308 break;
2309 case InTableTextMode:
2310 defaultForInTableText();
2311 processEndTag(token);
2312 break;
2313 case InForeignContentMode:
2314 if (token.name() == SVGNames::scriptTag && m_tree.currentNode()->hasTagName(SVGNames::scriptTag)) {
2315 notImplemented();
2316 return;
2317 }
2318 if (!isInHTMLNamespace(m_tree.currentNode())) {
2319 // FIXME: This code just wants an Element* iterator, instead of an ElementRecord*
2320 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord();
2321 if (!nodeRecord->node()->hasLocalName(token.name()))
2322 parseError(token);
2323 while (1) {
2324 if (nodeRecord->node()->hasLocalName(token.name())) {
2325 m_tree.openElements()->popUntilPopped(nodeRecord->element());
2326 resetForeignInsertionMode();
2327 return;
2328 }
2329 nodeRecord = nodeRecord->next();
2330
2331 if (isInHTMLNamespace(nodeRecord->node()))
2332 break;
2333 }
2334 }
2335 // Any other end tag (also the last two steps of "An end tag, if the current node is not an element in the HTML namespace."
2336 processForeignContentUsingInBodyModeAndResetMode(token);
2337 break;
2338 }
2339 }
2340
prepareToReprocessToken()2341 void HTMLTreeBuilder::prepareToReprocessToken()
2342 {
2343 if (m_hasPendingForeignInsertionModeSteps) {
2344 resetForeignInsertionMode();
2345 m_hasPendingForeignInsertionModeSteps = false;
2346 }
2347 }
2348
reprocessStartTag(AtomicHTMLToken & token)2349 void HTMLTreeBuilder::reprocessStartTag(AtomicHTMLToken& token)
2350 {
2351 prepareToReprocessToken();
2352 processStartTag(token);
2353 }
2354
reprocessEndTag(AtomicHTMLToken & token)2355 void HTMLTreeBuilder::reprocessEndTag(AtomicHTMLToken& token)
2356 {
2357 prepareToReprocessToken();
2358 processEndTag(token);
2359 }
2360
2361 class HTMLTreeBuilder::FakeInsertionMode {
2362 WTF_MAKE_NONCOPYABLE(FakeInsertionMode);
2363 public:
FakeInsertionMode(HTMLTreeBuilder * treeBuilder,InsertionMode mode)2364 FakeInsertionMode(HTMLTreeBuilder* treeBuilder, InsertionMode mode)
2365 : m_treeBuilder(treeBuilder)
2366 , m_originalMode(treeBuilder->insertionMode())
2367 {
2368 m_treeBuilder->setFakeInsertionMode(mode);
2369 }
2370
~FakeInsertionMode()2371 ~FakeInsertionMode()
2372 {
2373 if (m_treeBuilder->isFakeInsertionMode())
2374 m_treeBuilder->setInsertionMode(m_originalMode);
2375 }
2376
2377 private:
2378 HTMLTreeBuilder* m_treeBuilder;
2379 InsertionMode m_originalMode;
2380 };
2381
processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken & token)2382 void HTMLTreeBuilder::processForeignContentUsingInBodyModeAndResetMode(AtomicHTMLToken& token)
2383 {
2384 m_hasPendingForeignInsertionModeSteps = true;
2385 {
2386 FakeInsertionMode fakeMode(this, InBodyMode);
2387 processToken(token);
2388 }
2389 if (m_hasPendingForeignInsertionModeSteps)
2390 resetForeignInsertionMode();
2391 }
2392
resetForeignInsertionMode()2393 void HTMLTreeBuilder::resetForeignInsertionMode()
2394 {
2395 if (insertionMode() == InForeignContentMode)
2396 resetInsertionModeAppropriately();
2397 }
2398
processComment(AtomicHTMLToken & token)2399 void HTMLTreeBuilder::processComment(AtomicHTMLToken& token)
2400 {
2401 ASSERT(token.type() == HTMLToken::Comment);
2402 if (m_insertionMode == InitialMode
2403 || m_insertionMode == BeforeHTMLMode
2404 || m_insertionMode == AfterAfterBodyMode
2405 || m_insertionMode == AfterAfterFramesetMode) {
2406 m_tree.insertCommentOnDocument(token);
2407 return;
2408 }
2409 if (m_insertionMode == AfterBodyMode) {
2410 m_tree.insertCommentOnHTMLHtmlElement(token);
2411 return;
2412 }
2413 if (m_insertionMode == InTableTextMode) {
2414 defaultForInTableText();
2415 processComment(token);
2416 return;
2417 }
2418 m_tree.insertComment(token);
2419 }
2420
processCharacter(AtomicHTMLToken & token)2421 void HTMLTreeBuilder::processCharacter(AtomicHTMLToken& token)
2422 {
2423 ASSERT(token.type() == HTMLToken::Character);
2424 ExternalCharacterTokenBuffer buffer(token);
2425 processCharacterBuffer(buffer);
2426 }
2427
processCharacterBuffer(ExternalCharacterTokenBuffer & buffer)2428 void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer)
2429 {
2430 ReprocessBuffer:
2431 switch (insertionMode()) {
2432 case InitialMode: {
2433 ASSERT(insertionMode() == InitialMode);
2434 buffer.skipLeadingWhitespace();
2435 if (buffer.isEmpty())
2436 return;
2437 defaultForInitial();
2438 // Fall through.
2439 }
2440 case BeforeHTMLMode: {
2441 ASSERT(insertionMode() == BeforeHTMLMode);
2442 buffer.skipLeadingWhitespace();
2443 if (buffer.isEmpty())
2444 return;
2445 defaultForBeforeHTML();
2446 // Fall through.
2447 }
2448 case BeforeHeadMode: {
2449 ASSERT(insertionMode() == BeforeHeadMode);
2450 buffer.skipLeadingWhitespace();
2451 if (buffer.isEmpty())
2452 return;
2453 defaultForBeforeHead();
2454 // Fall through.
2455 }
2456 case InHeadMode: {
2457 ASSERT(insertionMode() == InHeadMode);
2458 String leadingWhitespace = buffer.takeLeadingWhitespace();
2459 if (!leadingWhitespace.isEmpty())
2460 m_tree.insertTextNode(leadingWhitespace);
2461 if (buffer.isEmpty())
2462 return;
2463 defaultForInHead();
2464 // Fall through.
2465 }
2466 case AfterHeadMode: {
2467 ASSERT(insertionMode() == AfterHeadMode);
2468 String leadingWhitespace = buffer.takeLeadingWhitespace();
2469 if (!leadingWhitespace.isEmpty())
2470 m_tree.insertTextNode(leadingWhitespace);
2471 if (buffer.isEmpty())
2472 return;
2473 defaultForAfterHead();
2474 // Fall through.
2475 }
2476 case InBodyMode:
2477 case InCaptionMode:
2478 case InCellMode: {
2479 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCaptionMode || insertionMode() == InCellMode);
2480 m_tree.reconstructTheActiveFormattingElements();
2481 String characters = buffer.takeRemaining();
2482 m_tree.insertTextNode(characters);
2483 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters))
2484 m_framesetOk = false;
2485 break;
2486 }
2487 case InTableMode:
2488 case InTableBodyMode:
2489 case InRowMode: {
2490 ASSERT(insertionMode() == InTableMode || insertionMode() == InTableBodyMode || insertionMode() == InRowMode);
2491 ASSERT(m_pendingTableCharacters.isEmpty());
2492 m_originalInsertionMode = m_insertionMode;
2493 setInsertionMode(InTableTextMode);
2494 prepareToReprocessToken();
2495 // Fall through.
2496 }
2497 case InTableTextMode: {
2498 buffer.giveRemainingTo(m_pendingTableCharacters);
2499 break;
2500 }
2501 case InColumnGroupMode: {
2502 ASSERT(insertionMode() == InColumnGroupMode);
2503 String leadingWhitespace = buffer.takeLeadingWhitespace();
2504 if (!leadingWhitespace.isEmpty())
2505 m_tree.insertTextNode(leadingWhitespace);
2506 if (buffer.isEmpty())
2507 return;
2508 if (!processColgroupEndTagForInColumnGroup()) {
2509 ASSERT(isParsingFragment());
2510 // The spec tells us to drop these characters on the floor.
2511 buffer.takeLeadingNonWhitespace();
2512 if (buffer.isEmpty())
2513 return;
2514 }
2515 prepareToReprocessToken();
2516 goto ReprocessBuffer;
2517 }
2518 case AfterBodyMode:
2519 case AfterAfterBodyMode: {
2520 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2521 // FIXME: parse error
2522 setInsertionMode(InBodyMode);
2523 prepareToReprocessToken();
2524 goto ReprocessBuffer;
2525 break;
2526 }
2527 case TextMode: {
2528 ASSERT(insertionMode() == TextMode);
2529 m_tree.insertTextNode(buffer.takeRemaining());
2530 break;
2531 }
2532 case InHeadNoscriptMode: {
2533 ASSERT(insertionMode() == InHeadNoscriptMode);
2534 String leadingWhitespace = buffer.takeLeadingWhitespace();
2535 if (!leadingWhitespace.isEmpty())
2536 m_tree.insertTextNode(leadingWhitespace);
2537 if (buffer.isEmpty())
2538 return;
2539 defaultForInHeadNoscript();
2540 goto ReprocessBuffer;
2541 break;
2542 }
2543 case InFramesetMode:
2544 case AfterFramesetMode: {
2545 ASSERT(insertionMode() == InFramesetMode || insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2546 String leadingWhitespace = buffer.takeRemainingWhitespace();
2547 if (!leadingWhitespace.isEmpty())
2548 m_tree.insertTextNode(leadingWhitespace);
2549 // FIXME: We should generate a parse error if we skipped over any
2550 // non-whitespace characters.
2551 break;
2552 }
2553 case InSelectInTableMode:
2554 case InSelectMode: {
2555 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode);
2556 m_tree.insertTextNode(buffer.takeRemaining());
2557 break;
2558 }
2559 case InForeignContentMode: {
2560 ASSERT(insertionMode() == InForeignContentMode);
2561 String characters = buffer.takeRemaining();
2562 m_tree.insertTextNode(characters);
2563 if (m_framesetOk && !isAllWhitespace(characters))
2564 m_framesetOk = false;
2565 break;
2566 }
2567 case AfterAfterFramesetMode: {
2568 String leadingWhitespace = buffer.takeRemainingWhitespace();
2569 if (!leadingWhitespace.isEmpty()) {
2570 m_tree.reconstructTheActiveFormattingElements();
2571 m_tree.insertTextNode(leadingWhitespace);
2572 }
2573 // FIXME: We should generate a parse error if we skipped over any
2574 // non-whitespace characters.
2575 break;
2576 }
2577 }
2578 }
2579
processEndOfFile(AtomicHTMLToken & token)2580 void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken& token)
2581 {
2582 ASSERT(token.type() == HTMLToken::EndOfFile);
2583 switch (insertionMode()) {
2584 case InitialMode:
2585 ASSERT(insertionMode() == InitialMode);
2586 defaultForInitial();
2587 // Fall through.
2588 case BeforeHTMLMode:
2589 ASSERT(insertionMode() == BeforeHTMLMode);
2590 defaultForBeforeHTML();
2591 // Fall through.
2592 case BeforeHeadMode:
2593 ASSERT(insertionMode() == BeforeHeadMode);
2594 defaultForBeforeHead();
2595 // Fall through.
2596 case InHeadMode:
2597 ASSERT(insertionMode() == InHeadMode);
2598 defaultForInHead();
2599 // Fall through.
2600 case AfterHeadMode:
2601 ASSERT(insertionMode() == AfterHeadMode);
2602 defaultForAfterHead();
2603 // Fall through
2604 case InBodyMode:
2605 case InCellMode:
2606 case InCaptionMode:
2607 case InRowMode:
2608 ASSERT(insertionMode() == InBodyMode || insertionMode() == InCellMode || insertionMode() == InCaptionMode || insertionMode() == InRowMode);
2609 notImplemented(); // Emit parse error based on what elements are still open.
2610 break;
2611 case AfterBodyMode:
2612 case AfterAfterBodyMode:
2613 ASSERT(insertionMode() == AfterBodyMode || insertionMode() == AfterAfterBodyMode);
2614 break;
2615 case InHeadNoscriptMode:
2616 ASSERT(insertionMode() == InHeadNoscriptMode);
2617 defaultForInHeadNoscript();
2618 processEndOfFile(token);
2619 return;
2620 case AfterFramesetMode:
2621 case AfterAfterFramesetMode:
2622 ASSERT(insertionMode() == AfterFramesetMode || insertionMode() == AfterAfterFramesetMode);
2623 break;
2624 case InFramesetMode:
2625 case InTableMode:
2626 case InTableBodyMode:
2627 case InSelectInTableMode:
2628 case InSelectMode:
2629 ASSERT(insertionMode() == InSelectMode || insertionMode() == InSelectInTableMode || insertionMode() == InTableMode || insertionMode() == InFramesetMode || insertionMode() == InTableBodyMode);
2630 if (m_tree.currentNode() != m_tree.openElements()->rootNode())
2631 parseError(token);
2632 break;
2633 case InColumnGroupMode:
2634 if (m_tree.currentNode() == m_tree.openElements()->rootNode()) {
2635 ASSERT(isParsingFragment());
2636 return; // FIXME: Should we break here instead of returning?
2637 }
2638 if (!processColgroupEndTagForInColumnGroup()) {
2639 ASSERT(isParsingFragment());
2640 return; // FIXME: Should we break here instead of returning?
2641 }
2642 prepareToReprocessToken();
2643 processEndOfFile(token);
2644 return;
2645 case InForeignContentMode:
2646 setInsertionMode(InBodyMode);
2647 processEndOfFile(token);
2648 return;
2649 case InTableTextMode:
2650 defaultForInTableText();
2651 processEndOfFile(token);
2652 return;
2653 case TextMode:
2654 parseError(token);
2655 if (m_tree.currentNode()->hasTagName(scriptTag))
2656 notImplemented(); // mark the script element as "already started".
2657 m_tree.openElements()->pop();
2658 setInsertionMode(m_originalInsertionMode);
2659 prepareToReprocessToken();
2660 processEndOfFile(token);
2661 return;
2662 }
2663 ASSERT(m_tree.currentNode());
2664 m_tree.openElements()->popAll();
2665 }
2666
defaultForInitial()2667 void HTMLTreeBuilder::defaultForInitial()
2668 {
2669 notImplemented();
2670 if (!m_fragmentContext.fragment())
2671 m_document->setCompatibilityMode(Document::QuirksMode);
2672 // FIXME: parse error
2673 setInsertionMode(BeforeHTMLMode);
2674 prepareToReprocessToken();
2675 }
2676
defaultForBeforeHTML()2677 void HTMLTreeBuilder::defaultForBeforeHTML()
2678 {
2679 AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName());
2680 m_tree.insertHTMLHtmlStartTagBeforeHTML(startHTML);
2681 setInsertionMode(BeforeHeadMode);
2682 prepareToReprocessToken();
2683 }
2684
defaultForBeforeHead()2685 void HTMLTreeBuilder::defaultForBeforeHead()
2686 {
2687 AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName());
2688 processStartTag(startHead);
2689 prepareToReprocessToken();
2690 }
2691
defaultForInHead()2692 void HTMLTreeBuilder::defaultForInHead()
2693 {
2694 AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName());
2695 processEndTag(endHead);
2696 prepareToReprocessToken();
2697 }
2698
defaultForInHeadNoscript()2699 void HTMLTreeBuilder::defaultForInHeadNoscript()
2700 {
2701 AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName());
2702 processEndTag(endNoscript);
2703 prepareToReprocessToken();
2704 }
2705
defaultForAfterHead()2706 void HTMLTreeBuilder::defaultForAfterHead()
2707 {
2708 AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName());
2709 processStartTag(startBody);
2710 m_framesetOk = true;
2711 prepareToReprocessToken();
2712 }
2713
defaultForInTableText()2714 void HTMLTreeBuilder::defaultForInTableText()
2715 {
2716 String characters = String::adopt(m_pendingTableCharacters);
2717 if (!isAllWhitespace(characters)) {
2718 // FIXME: parse error
2719 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree);
2720 m_tree.reconstructTheActiveFormattingElements();
2721 m_tree.insertTextNode(characters);
2722 m_framesetOk = false;
2723 setInsertionMode(m_originalInsertionMode);
2724 prepareToReprocessToken();
2725 return;
2726 }
2727 m_tree.insertTextNode(characters);
2728 setInsertionMode(m_originalInsertionMode);
2729 prepareToReprocessToken();
2730 }
2731
processStartTagForInHead(AtomicHTMLToken & token)2732 bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken& token)
2733 {
2734 ASSERT(token.type() == HTMLToken::StartTag);
2735 if (token.name() == htmlTag) {
2736 m_tree.insertHTMLHtmlStartTagInBody(token);
2737 return true;
2738 }
2739 if (token.name() == baseTag
2740 || token.name() == basefontTag
2741 || token.name() == bgsoundTag
2742 || token.name() == commandTag
2743 || token.name() == linkTag
2744 || token.name() == metaTag) {
2745 m_tree.insertSelfClosingHTMLElement(token);
2746 // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process().
2747 return true;
2748 }
2749 if (token.name() == titleTag) {
2750 processGenericRCDATAStartTag(token);
2751 return true;
2752 }
2753 if (token.name() == noscriptTag) {
2754 if (scriptEnabled(m_document->frame())) {
2755 processGenericRawTextStartTag(token);
2756 return true;
2757 }
2758 m_tree.insertHTMLElement(token);
2759 setInsertionMode(InHeadNoscriptMode);
2760 return true;
2761 }
2762 if (token.name() == noframesTag || token.name() == styleTag) {
2763 processGenericRawTextStartTag(token);
2764 return true;
2765 }
2766 if (token.name() == scriptTag) {
2767 processScriptStartTag(token);
2768 if (m_usePreHTML5ParserQuirks && token.selfClosing())
2769 processFakeEndTag(scriptTag);
2770 return true;
2771 }
2772 if (token.name() == headTag) {
2773 parseError(token);
2774 return true;
2775 }
2776 return false;
2777 }
2778
processGenericRCDATAStartTag(AtomicHTMLToken & token)2779 void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken& token)
2780 {
2781 ASSERT(token.type() == HTMLToken::StartTag);
2782 m_tree.insertHTMLElement(token);
2783 m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
2784 m_originalInsertionMode = m_insertionMode;
2785 setInsertionMode(TextMode);
2786 }
2787
processGenericRawTextStartTag(AtomicHTMLToken & token)2788 void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken& token)
2789 {
2790 ASSERT(token.type() == HTMLToken::StartTag);
2791 m_tree.insertHTMLElement(token);
2792 m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
2793 m_originalInsertionMode = m_insertionMode;
2794 setInsertionMode(TextMode);
2795 }
2796
processScriptStartTag(AtomicHTMLToken & token)2797 void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken& token)
2798 {
2799 ASSERT(token.type() == HTMLToken::StartTag);
2800 m_tree.insertScriptElement(token);
2801 m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
2802 m_originalInsertionMode = m_insertionMode;
2803
2804 TextPosition0 position = m_parser->textPosition();
2805
2806 ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
2807
2808 m_lastScriptElementStartPosition = position;
2809
2810 setInsertionMode(TextMode);
2811 }
2812
finished()2813 void HTMLTreeBuilder::finished()
2814 {
2815 if (isParsingFragment())
2816 return;
2817
2818 ASSERT(m_document);
2819 // Warning, this may detach the parser. Do not do anything else after this.
2820 m_document->finishedParsing();
2821 }
2822
parseError(AtomicHTMLToken &)2823 void HTMLTreeBuilder::parseError(AtomicHTMLToken&)
2824 {
2825 }
2826
scriptEnabled(Frame * frame)2827 bool HTMLTreeBuilder::scriptEnabled(Frame* frame)
2828 {
2829 if (!frame)
2830 return false;
2831 return frame->script()->canExecuteScripts(NotAboutToExecuteScript);
2832 }
2833
pluginsEnabled(Frame * frame)2834 bool HTMLTreeBuilder::pluginsEnabled(Frame* frame)
2835 {
2836 if (!frame)
2837 return false;
2838 return frame->loader()->subframeLoader()->allowPlugins(NotAboutToInstantiatePlugin);
2839 }
2840
2841 }
2842