1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011, 2014 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "third_party/blink/renderer/core/html/parser/html_tree_builder.h"
28 
29 #include <memory>
30 
31 #include "base/macros.h"
32 #include "third_party/blink/renderer/core/dom/document.h"
33 #include "third_party/blink/renderer/core/dom/document_fragment.h"
34 #include "third_party/blink/renderer/core/dom/element_traversal.h"
35 #include "third_party/blink/renderer/core/dom/shadow_root.h"
36 #include "third_party/blink/renderer/core/execution_context/execution_context.h"
37 #include "third_party/blink/renderer/core/frame/web_feature.h"
38 #include "third_party/blink/renderer/core/html/forms/html_form_control_element.h"
39 #include "third_party/blink/renderer/core/html/forms/html_form_element.h"
40 #include "third_party/blink/renderer/core/html/html_template_element.h"
41 #include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
42 #include "third_party/blink/renderer/core/html/parser/html_document_parser.h"
43 #include "third_party/blink/renderer/core/html/parser/html_parser_idioms.h"
44 #include "third_party/blink/renderer/core/html/parser/html_stack_item.h"
45 #include "third_party/blink/renderer/core/html/parser/html_token.h"
46 #include "third_party/blink/renderer/core/html/parser/html_tokenizer.h"
47 #include "third_party/blink/renderer/core/html_names.h"
48 #include "third_party/blink/renderer/core/inspector/console_message.h"
49 #include "third_party/blink/renderer/core/mathml_names.h"
50 #include "third_party/blink/renderer/core/svg_names.h"
51 #include "third_party/blink/renderer/core/xlink_names.h"
52 #include "third_party/blink/renderer/core/xml_names.h"
53 #include "third_party/blink/renderer/core/xmlns_names.h"
54 #include "third_party/blink/renderer/platform/bindings/exception_state.h"
55 #include "third_party/blink/renderer/platform/bindings/runtime_call_stats.h"
56 #include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
57 #include "third_party/blink/renderer/platform/heap/heap.h"
58 #include "third_party/blink/renderer/platform/instrumentation/use_counter.h"
59 #include "third_party/blink/renderer/platform/text/platform_locale.h"
60 #include "third_party/blink/renderer/platform/wtf/text/character_names.h"
61 #include "third_party/blink/renderer/platform/wtf/text/character_visitor.h"
62 #include "third_party/blink/renderer/platform/wtf/text/string_buffer.h"
63 
64 namespace blink {
65 
66 namespace {
67 
IsHTMLSpaceOrReplacementCharacter(UChar character)68 inline bool IsHTMLSpaceOrReplacementCharacter(UChar character) {
69   return IsHTMLSpace<UChar>(character) || character == kReplacementCharacter;
70 }
71 }
72 
UninitializedPositionValue1()73 static TextPosition UninitializedPositionValue1() {
74   return TextPosition(OrdinalNumber::FromOneBasedInt(-1),
75                       OrdinalNumber::First());
76 }
77 
IsAllWhitespace(const StringView & string_view)78 static inline bool IsAllWhitespace(const StringView& string_view) {
79   return string_view.IsAllSpecialCharacters<IsHTMLSpace<UChar>>();
80 }
81 
IsAllWhitespaceOrReplacementCharacters(const StringView & string_view)82 static inline bool IsAllWhitespaceOrReplacementCharacters(
83     const StringView& string_view) {
84   return string_view
85       .IsAllSpecialCharacters<IsHTMLSpaceOrReplacementCharacter>();
86 }
87 
IsNumberedHeaderTag(const AtomicString & tag_name)88 static bool IsNumberedHeaderTag(const AtomicString& tag_name) {
89   return tag_name == html_names::kH1Tag || tag_name == html_names::kH2Tag ||
90          tag_name == html_names::kH3Tag || tag_name == html_names::kH4Tag ||
91          tag_name == html_names::kH5Tag || tag_name == html_names::kH6Tag;
92 }
93 
IsCaptionColOrColgroupTag(const AtomicString & tag_name)94 static bool IsCaptionColOrColgroupTag(const AtomicString& tag_name) {
95   return tag_name == html_names::kCaptionTag ||
96          tag_name == html_names::kColTag ||
97          tag_name == html_names::kColgroupTag;
98 }
99 
IsTableCellContextTag(const AtomicString & tag_name)100 static bool IsTableCellContextTag(const AtomicString& tag_name) {
101   return tag_name == html_names::kThTag || tag_name == html_names::kTdTag;
102 }
103 
IsTableBodyContextTag(const AtomicString & tag_name)104 static bool IsTableBodyContextTag(const AtomicString& tag_name) {
105   return tag_name == html_names::kTbodyTag ||
106          tag_name == html_names::kTfootTag || tag_name == html_names::kTheadTag;
107 }
108 
IsNonAnchorNonNobrFormattingTag(const AtomicString & tag_name)109 static bool IsNonAnchorNonNobrFormattingTag(const AtomicString& tag_name) {
110   return tag_name == html_names::kBTag || tag_name == html_names::kBigTag ||
111          tag_name == html_names::kCodeTag || tag_name == html_names::kEmTag ||
112          tag_name == html_names::kFontTag || tag_name == html_names::kITag ||
113          tag_name == html_names::kSTag || tag_name == html_names::kSmallTag ||
114          tag_name == html_names::kStrikeTag ||
115          tag_name == html_names::kStrongTag || tag_name == html_names::kTtTag ||
116          tag_name == html_names::kUTag;
117 }
118 
IsNonAnchorFormattingTag(const AtomicString & tag_name)119 static bool IsNonAnchorFormattingTag(const AtomicString& tag_name) {
120   return tag_name == html_names::kNobrTag ||
121          IsNonAnchorNonNobrFormattingTag(tag_name);
122 }
123 
124 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
IsFormattingTag(const AtomicString & tag_name)125 static bool IsFormattingTag(const AtomicString& tag_name) {
126   return tag_name == html_names::kATag || IsNonAnchorFormattingTag(tag_name);
127 }
128 
129 class HTMLTreeBuilder::CharacterTokenBuffer {
130  public:
CharacterTokenBuffer(AtomicHTMLToken * token)131   explicit CharacterTokenBuffer(AtomicHTMLToken* token)
132       : characters_(token->Characters().Impl()),
133         current_(0),
134         end_(token->Characters().length()) {
135     DCHECK(!IsEmpty());
136   }
137 
CharacterTokenBuffer(const String & characters)138   explicit CharacterTokenBuffer(const String& characters)
139       : characters_(characters.Impl()), current_(0), end_(characters.length()) {
140     DCHECK(!IsEmpty());
141   }
142 
~CharacterTokenBuffer()143   ~CharacterTokenBuffer() { DCHECK(IsEmpty()); }
144 
IsEmpty() const145   bool IsEmpty() const { return current_ == end_; }
146 
SkipAtMostOneLeadingNewline()147   void SkipAtMostOneLeadingNewline() {
148     DCHECK(!IsEmpty());
149     if ((*characters_)[current_] == '\n')
150       ++current_;
151   }
152 
SkipLeadingWhitespace()153   void SkipLeadingWhitespace() { SkipLeading<IsHTMLSpace<UChar>>(); }
154 
TakeLeadingWhitespace()155   StringView TakeLeadingWhitespace() {
156     return TakeLeading<IsHTMLSpace<UChar>>();
157   }
158 
SkipLeadingNonWhitespace()159   void SkipLeadingNonWhitespace() { SkipLeading<IsNotHTMLSpace<UChar>>(); }
160 
SkipRemaining()161   void SkipRemaining() { current_ = end_; }
162 
TakeRemaining()163   StringView TakeRemaining() {
164     DCHECK(!IsEmpty());
165     unsigned start = current_;
166     current_ = end_;
167     return StringView(characters_.get(), start, end_ - start);
168   }
169 
GiveRemainingTo(StringBuilder & recipient)170   void GiveRemainingTo(StringBuilder& recipient) {
171     WTF::VisitCharacters(*characters_, [&](const auto* chars, unsigned length) {
172       recipient.Append(chars + current_, end_ - current_);
173     });
174     current_ = end_;
175   }
176 
TakeRemainingWhitespace()177   String TakeRemainingWhitespace() {
178     DCHECK(!IsEmpty());
179     const unsigned start = current_;
180     current_ = end_;  // One way or another, we're taking everything!
181 
182     unsigned length = 0;
183     for (unsigned i = start; i < end_; ++i) {
184       if (IsHTMLSpace<UChar>((*characters_)[i]))
185         ++length;
186     }
187     // Returning the null string when there aren't any whitespace
188     // characters is slightly cleaner semantically because we don't want
189     // to insert a text node (as opposed to inserting an empty text node).
190     if (!length)
191       return String();
192     if (length == start - end_)  // It's all whitespace.
193       return String(characters_->Substring(start, start - end_));
194 
195     // All HTML spaces are ASCII.
196     StringBuffer<LChar> result(length);
197     unsigned j = 0;
198     for (unsigned i = start; i < end_; ++i) {
199       UChar c = (*characters_)[i];
200       if (IsHTMLSpace(c))
201         result[j++] = static_cast<LChar>(c);
202     }
203     DCHECK_EQ(j, length);
204     return String::Adopt(result);
205   }
206 
207  private:
208   template <bool characterPredicate(UChar)>
SkipLeading()209   void SkipLeading() {
210     DCHECK(!IsEmpty());
211     while (characterPredicate((*characters_)[current_])) {
212       if (++current_ == end_)
213         return;
214     }
215   }
216 
217   template <bool characterPredicate(UChar)>
TakeLeading()218   StringView TakeLeading() {
219     DCHECK(!IsEmpty());
220     const unsigned start = current_;
221     SkipLeading<characterPredicate>();
222     return StringView(characters_.get(), start, current_ - start);
223   }
224 
225   scoped_refptr<StringImpl> characters_;
226   unsigned current_;
227   unsigned end_;
228 
229   DISALLOW_COPY_AND_ASSIGN(CharacterTokenBuffer);
230 };
231 
HTMLTreeBuilder(HTMLDocumentParser * parser,Document & document,ParserContentPolicy parser_content_policy,const HTMLParserOptions & options,bool allow_shadow_root)232 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser,
233                                  Document& document,
234                                  ParserContentPolicy parser_content_policy,
235                                  const HTMLParserOptions& options,
236                                  bool allow_shadow_root)
237     : frameset_ok_(true),
238       tree_(parser->ReentryPermit(), document, parser_content_policy),
239       insertion_mode_(kInitialMode),
240       original_insertion_mode_(kInitialMode),
241       should_skip_leading_newline_(false),
242       allow_shadow_root_(allow_shadow_root),
243       parser_(parser),
244       script_to_process_start_position_(UninitializedPositionValue1()),
245       options_(options) {}
246 
HTMLTreeBuilder(HTMLDocumentParser * parser,DocumentFragment * fragment,Element * context_element,ParserContentPolicy parser_content_policy,const HTMLParserOptions & options,bool allow_shadow_root)247 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser,
248                                  DocumentFragment* fragment,
249                                  Element* context_element,
250                                  ParserContentPolicy parser_content_policy,
251                                  const HTMLParserOptions& options,
252                                  bool allow_shadow_root)
253     : HTMLTreeBuilder(parser,
254                       fragment->GetDocument(),
255                       parser_content_policy,
256                       options,
257                       allow_shadow_root) {
258   DCHECK(IsMainThread());
259   DCHECK(context_element);
260   tree_.InitFragmentParsing(fragment, context_element);
261   fragment_context_.Init(fragment, context_element);
262 
263   // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
264   // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
265   // For efficiency, we skip step 4.2 ("Let root be a new html element with no
266   // attributes") and instead use the DocumentFragment as a root node.
267   tree_.OpenElements()->PushRootNode(MakeGarbageCollected<HTMLStackItem>(
268       fragment, HTMLStackItem::kItemForDocumentFragmentNode));
269 
270   if (IsA<HTMLTemplateElement>(*context_element))
271     template_insertion_modes_.push_back(kTemplateContentsMode);
272 
273   ResetInsertionModeAppropriately();
274 }
275 
276 HTMLTreeBuilder::~HTMLTreeBuilder() = default;
277 
Init(DocumentFragment * fragment,Element * context_element)278 void HTMLTreeBuilder::FragmentParsingContext::Init(DocumentFragment* fragment,
279                                                    Element* context_element) {
280   DCHECK(fragment);
281   DCHECK(!fragment->HasChildren());
282   fragment_ = fragment;
283   context_element_stack_item_ = MakeGarbageCollected<HTMLStackItem>(
284       context_element, HTMLStackItem::kItemForContextElement);
285 }
286 
Trace(Visitor * visitor) const287 void HTMLTreeBuilder::FragmentParsingContext::Trace(Visitor* visitor) const {
288   visitor->Trace(fragment_);
289   visitor->Trace(context_element_stack_item_);
290 }
291 
Trace(Visitor * visitor) const292 void HTMLTreeBuilder::Trace(Visitor* visitor) const {
293   visitor->Trace(fragment_context_);
294   visitor->Trace(tree_);
295   visitor->Trace(parser_);
296   visitor->Trace(script_to_process_);
297 }
298 
Detach()299 void HTMLTreeBuilder::Detach() {
300 #if DCHECK_IS_ON()
301   // This call makes little sense in fragment mode, but for consistency
302   // DocumentParser expects Detach() to always be called before it's destroyed.
303   is_attached_ = false;
304 #endif
305   // HTMLConstructionSite might be on the callstack when Detach() is called
306   // otherwise we'd just call tree_.Clear() here instead.
307   tree_.Detach();
308 }
309 
TakeScriptToProcess(TextPosition & script_start_position)310 Element* HTMLTreeBuilder::TakeScriptToProcess(
311     TextPosition& script_start_position) {
312   DCHECK(script_to_process_);
313   DCHECK(!tree_.HasPendingTasks());
314   // Unpause ourselves, callers may pause us again when processing the script.
315   // The HTML5 spec is written as though scripts are executed inside the tree
316   // builder.  We pause the parser to exit the tree builder, and then resume
317   // before running scripts.
318   script_start_position = script_to_process_start_position_;
319   script_to_process_start_position_ = UninitializedPositionValue1();
320   return script_to_process_.Release();
321 }
322 
ConstructTree(AtomicHTMLToken * token)323 void HTMLTreeBuilder::ConstructTree(AtomicHTMLToken* token) {
324   RUNTIME_CALL_TIMER_SCOPE(V8PerIsolateData::MainThreadIsolate(),
325                            RuntimeCallStats::CounterId::kConstructTree);
326   if (ShouldProcessTokenInForeignContent(token))
327     ProcessTokenInForeignContent(token);
328   else
329     ProcessToken(token);
330 
331   if (parser_->Tokenizer()) {
332     bool in_foreign_content = false;
333     if (!tree_.IsEmpty()) {
334       HTMLStackItem* adjusted_current_node = AdjustedCurrentStackItem();
335       in_foreign_content =
336           !adjusted_current_node->IsInHTMLNamespace() &&
337           !HTMLElementStack::IsHTMLIntegrationPoint(adjusted_current_node) &&
338           !HTMLElementStack::IsMathMLTextIntegrationPoint(
339               adjusted_current_node);
340     }
341 
342     parser_->Tokenizer()->SetForceNullCharacterReplacement(
343         insertion_mode_ == kTextMode || in_foreign_content);
344     parser_->Tokenizer()->SetShouldAllowCDATA(in_foreign_content);
345   }
346 
347   tree_.ExecuteQueuedTasks();
348   // We might be detached now.
349 }
350 
ProcessToken(AtomicHTMLToken * token)351 void HTMLTreeBuilder::ProcessToken(AtomicHTMLToken* token) {
352   if (token->GetType() == HTMLToken::kCharacter) {
353     ProcessCharacter(token);
354     return;
355   }
356 
357   // Any non-character token needs to cause us to flush any pending text
358   // immediately. NOTE: flush() can cause any queued tasks to execute, possibly
359   // re-entering the parser.
360   tree_.Flush(kFlushAlways);
361   should_skip_leading_newline_ = false;
362 
363   switch (token->GetType()) {
364     case HTMLToken::kUninitialized:
365     case HTMLToken::kCharacter:
366       NOTREACHED();
367       break;
368     case HTMLToken::DOCTYPE:
369       ProcessDoctypeToken(token);
370       break;
371     case HTMLToken::kStartTag:
372       ProcessStartTag(token);
373       break;
374     case HTMLToken::kEndTag:
375       ProcessEndTag(token);
376       break;
377     case HTMLToken::kComment:
378       ProcessComment(token);
379       break;
380     case HTMLToken::kEndOfFile:
381       ProcessEndOfFile(token);
382       break;
383   }
384 }
385 
ProcessDoctypeToken(AtomicHTMLToken * token)386 void HTMLTreeBuilder::ProcessDoctypeToken(AtomicHTMLToken* token) {
387   DCHECK_EQ(token->GetType(), HTMLToken::DOCTYPE);
388   if (insertion_mode_ == kInitialMode) {
389     tree_.InsertDoctype(token);
390     SetInsertionMode(kBeforeHTMLMode);
391     return;
392   }
393   if (insertion_mode_ == kInTableTextMode) {
394     DefaultForInTableText();
395     ProcessDoctypeToken(token);
396     return;
397   }
398   ParseError(token);
399 }
400 
ProcessFakeStartTag(const QualifiedName & tag_name,const Vector<Attribute> & attributes)401 void HTMLTreeBuilder::ProcessFakeStartTag(const QualifiedName& tag_name,
402                                           const Vector<Attribute>& attributes) {
403   // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML
404   // tags.
405   AtomicHTMLToken fake_token(HTMLToken::kStartTag, tag_name.LocalName(),
406                              attributes);
407   ProcessStartTag(&fake_token);
408 }
409 
ProcessFakeEndTag(const AtomicString & tag_name)410 void HTMLTreeBuilder::ProcessFakeEndTag(const AtomicString& tag_name) {
411   AtomicHTMLToken fake_token(HTMLToken::kEndTag, tag_name);
412   ProcessEndTag(&fake_token);
413 }
414 
ProcessFakeEndTag(const QualifiedName & tag_name)415 void HTMLTreeBuilder::ProcessFakeEndTag(const QualifiedName& tag_name) {
416   // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML
417   // tags.
418   ProcessFakeEndTag(tag_name.LocalName());
419 }
420 
ProcessFakePEndTagIfPInButtonScope()421 void HTMLTreeBuilder::ProcessFakePEndTagIfPInButtonScope() {
422   if (!tree_.OpenElements()->InButtonScope(html_names::kPTag.LocalName()))
423     return;
424   AtomicHTMLToken end_p(HTMLToken::kEndTag, html_names::kPTag.LocalName());
425   ProcessEndTag(&end_p);
426 }
427 
428 namespace {
429 
IsLi(const HTMLStackItem * item)430 bool IsLi(const HTMLStackItem* item) {
431   return item->HasTagName(html_names::kLiTag);
432 }
433 
IsDdOrDt(const HTMLStackItem * item)434 bool IsDdOrDt(const HTMLStackItem* item) {
435   return item->HasTagName(html_names::kDdTag) ||
436          item->HasTagName(html_names::kDtTag);
437 }
438 
439 }  // namespace
440 
441 template <bool shouldClose(const HTMLStackItem*)>
ProcessCloseWhenNestedTag(AtomicHTMLToken * token)442 void HTMLTreeBuilder::ProcessCloseWhenNestedTag(AtomicHTMLToken* token) {
443   frameset_ok_ = false;
444   HTMLElementStack::ElementRecord* node_record =
445       tree_.OpenElements()->TopRecord();
446   while (1) {
447     HTMLStackItem* item = node_record->StackItem();
448     if (shouldClose(item)) {
449       DCHECK(item->IsElementNode());
450       ProcessFakeEndTag(item->LocalName());
451       break;
452     }
453     if (item->IsSpecialNode() && !item->HasTagName(html_names::kAddressTag) &&
454         !item->HasTagName(html_names::kDivTag) &&
455         !item->HasTagName(html_names::kPTag))
456       break;
457     node_record = node_record->Next();
458   }
459   ProcessFakePEndTagIfPInButtonScope();
460   tree_.InsertHTMLElement(token);
461 }
462 
463 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
464 
465 template <typename TableQualifiedName>
MapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap * map,const TableQualifiedName * const * names,size_t length)466 static void MapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map,
467                                       const TableQualifiedName* const* names,
468                                       size_t length) {
469   for (size_t i = 0; i < length; ++i) {
470     const QualifiedName& name = *names[i];
471     const AtomicString& local_name = name.LocalName();
472     AtomicString lowered_local_name = local_name.LowerASCII();
473     if (lowered_local_name != local_name)
474       map->insert(lowered_local_name, name);
475   }
476 }
477 
478 // "Any other start tag" bullet in
479 // https://html.spec.whatwg.org/C/#parsing-main-inforeign
AdjustSVGTagNameCase(AtomicHTMLToken * token)480 static void AdjustSVGTagNameCase(AtomicHTMLToken* token) {
481   static PrefixedNameToQualifiedNameMap* case_map = nullptr;
482   if (!case_map) {
483     case_map = new PrefixedNameToQualifiedNameMap;
484     std::unique_ptr<const SVGQualifiedName* []> svg_tags = svg_names::GetTags();
485     MapLoweredLocalNameToName(case_map, svg_tags.get(), svg_names::kTagsCount);
486   }
487 
488   const QualifiedName& cased_name = case_map->at(token->GetName());
489   if (cased_name.LocalName().IsNull())
490     return;
491   token->SetName(cased_name.LocalName());
492 }
493 
494 template <std::unique_ptr<const QualifiedName* []> getAttrs(), unsigned length>
AdjustAttributes(AtomicHTMLToken * token)495 static void AdjustAttributes(AtomicHTMLToken* token) {
496   static PrefixedNameToQualifiedNameMap* case_map = nullptr;
497   if (!case_map) {
498     case_map = new PrefixedNameToQualifiedNameMap;
499     std::unique_ptr<const QualifiedName* []> attrs = getAttrs();
500     MapLoweredLocalNameToName(case_map, attrs.get(), length);
501   }
502 
503   for (auto& token_attribute : token->Attributes()) {
504     const QualifiedName& cased_name = case_map->at(token_attribute.LocalName());
505     if (!cased_name.LocalName().IsNull())
506       token_attribute.ParserSetName(cased_name);
507   }
508 }
509 
510 // https://html.spec.whatwg.org/C/#adjust-svg-attributes
AdjustSVGAttributes(AtomicHTMLToken * token)511 static void AdjustSVGAttributes(AtomicHTMLToken* token) {
512   AdjustAttributes<svg_names::GetAttrs, svg_names::kAttrsCount>(token);
513 }
514 
515 // https://html.spec.whatwg.org/C/#adjust-mathml-attributes
AdjustMathMLAttributes(AtomicHTMLToken * token)516 static void AdjustMathMLAttributes(AtomicHTMLToken* token) {
517   AdjustAttributes<mathml_names::GetAttrs, mathml_names::kAttrsCount>(token);
518 }
519 
AddNamesWithPrefix(PrefixedNameToQualifiedNameMap * map,const AtomicString & prefix,const QualifiedName * const * names,size_t length)520 static void AddNamesWithPrefix(PrefixedNameToQualifiedNameMap* map,
521                                const AtomicString& prefix,
522                                const QualifiedName* const* names,
523                                size_t length) {
524   for (size_t i = 0; i < length; ++i) {
525     const QualifiedName* name = names[i];
526     const AtomicString& local_name = name->LocalName();
527     AtomicString prefix_colon_local_name = prefix + ':' + local_name;
528     QualifiedName name_with_prefix(prefix, local_name, name->NamespaceURI());
529     map->insert(prefix_colon_local_name, name_with_prefix);
530   }
531 }
532 
AdjustForeignAttributes(AtomicHTMLToken * token)533 static void AdjustForeignAttributes(AtomicHTMLToken* token) {
534   static PrefixedNameToQualifiedNameMap* map = nullptr;
535   if (!map) {
536     map = new PrefixedNameToQualifiedNameMap;
537 
538     std::unique_ptr<const QualifiedName* []> attrs = xlink_names::GetAttrs();
539     AddNamesWithPrefix(map, g_xlink_atom, attrs.get(),
540                        xlink_names::kAttrsCount);
541 
542     std::unique_ptr<const QualifiedName* []> xml_attrs = xml_names::GetAttrs();
543     AddNamesWithPrefix(map, g_xml_atom, xml_attrs.get(),
544                        xml_names::kAttrsCount);
545 
546     map->insert(WTF::g_xmlns_atom, xmlns_names::kXmlnsAttr);
547     map->insert("xmlns:xlink", QualifiedName(g_xmlns_atom, g_xlink_atom,
548                                              xmlns_names::kNamespaceURI));
549   }
550 
551   for (unsigned i = 0; i < token->Attributes().size(); ++i) {
552     Attribute& token_attribute = token->Attributes().at(i);
553     const QualifiedName& name = map->at(token_attribute.LocalName());
554     if (!name.LocalName().IsNull())
555       token_attribute.ParserSetName(name);
556   }
557 }
558 
ProcessStartTagForInBody(AtomicHTMLToken * token)559 void HTMLTreeBuilder::ProcessStartTagForInBody(AtomicHTMLToken* token) {
560   DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
561   if (token->GetName() == html_names::kHTMLTag) {
562     ProcessHtmlStartTagForInBody(token);
563     return;
564   }
565   if (token->GetName() == html_names::kBaseTag ||
566       token->GetName() == html_names::kBasefontTag ||
567       token->GetName() == html_names::kBgsoundTag ||
568       token->GetName() == html_names::kCommandTag ||
569       token->GetName() == html_names::kLinkTag ||
570       token->GetName() == html_names::kMetaTag ||
571       token->GetName() == html_names::kNoframesTag ||
572       token->GetName() == html_names::kScriptTag ||
573       token->GetName() == html_names::kStyleTag ||
574       token->GetName() == html_names::kTitleTag ||
575       token->GetName() == html_names::kTemplateTag) {
576     bool did_process = ProcessStartTagForInHead(token);
577     DCHECK(did_process);
578     return;
579   }
580   if (token->GetName() == html_names::kBodyTag) {
581     ParseError(token);
582     if (!tree_.OpenElements()->SecondElementIsHTMLBodyElement() ||
583         tree_.OpenElements()->HasOnlyOneElement() ||
584         tree_.OpenElements()->HasTemplateInHTMLScope()) {
585       DCHECK(IsParsingFragmentOrTemplateContents());
586       return;
587     }
588     frameset_ok_ = false;
589     tree_.InsertHTMLBodyStartTagInBody(token);
590     return;
591   }
592   if (token->GetName() == html_names::kFramesetTag) {
593     ParseError(token);
594     if (!tree_.OpenElements()->SecondElementIsHTMLBodyElement() ||
595         tree_.OpenElements()->HasOnlyOneElement()) {
596       DCHECK(IsParsingFragmentOrTemplateContents());
597       return;
598     }
599     if (!frameset_ok_)
600       return;
601     tree_.OpenElements()->BodyElement()->remove(ASSERT_NO_EXCEPTION);
602     tree_.OpenElements()->PopUntil(tree_.OpenElements()->BodyElement());
603     tree_.OpenElements()->PopHTMLBodyElement();
604 
605     // Note: in the fragment case the root is a DocumentFragment instead of
606     // a proper html element which is a quirk in Blink's implementation.
607     DCHECK(!IsParsingTemplateContents());
608     DCHECK(!IsParsingFragment() ||
609            To<DocumentFragment>(tree_.OpenElements()->TopNode()));
610     DCHECK(IsParsingFragment() ||
611            tree_.OpenElements()->Top() == tree_.OpenElements()->HtmlElement());
612     tree_.InsertHTMLElement(token);
613     SetInsertionMode(kInFramesetMode);
614     return;
615   }
616   if (token->GetName() == html_names::kAddressTag ||
617       token->GetName() == html_names::kArticleTag ||
618       token->GetName() == html_names::kAsideTag ||
619       token->GetName() == html_names::kBlockquoteTag ||
620       token->GetName() == html_names::kCenterTag ||
621       token->GetName() == html_names::kDetailsTag ||
622       token->GetName() == html_names::kDirTag ||
623       token->GetName() == html_names::kDivTag ||
624       token->GetName() == html_names::kDlTag ||
625       token->GetName() == html_names::kFieldsetTag ||
626       token->GetName() == html_names::kFigcaptionTag ||
627       token->GetName() == html_names::kFigureTag ||
628       token->GetName() == html_names::kFooterTag ||
629       token->GetName() == html_names::kHeaderTag ||
630       token->GetName() == html_names::kHgroupTag ||
631       token->GetName() == html_names::kMainTag ||
632       token->GetName() == html_names::kMenuTag ||
633       token->GetName() == html_names::kNavTag ||
634       token->GetName() == html_names::kOlTag ||
635       token->GetName() == html_names::kPTag ||
636       token->GetName() == html_names::kSectionTag ||
637       token->GetName() == html_names::kSummaryTag ||
638       token->GetName() == html_names::kUlTag) {
639     ProcessFakePEndTagIfPInButtonScope();
640     tree_.InsertHTMLElement(token);
641     return;
642   }
643   if (IsNumberedHeaderTag(token->GetName())) {
644     ProcessFakePEndTagIfPInButtonScope();
645     if (tree_.CurrentStackItem()->IsNumberedHeaderElement()) {
646       ParseError(token);
647       tree_.OpenElements()->Pop();
648     }
649     tree_.InsertHTMLElement(token);
650     return;
651   }
652   if (token->GetName() == html_names::kPreTag ||
653       token->GetName() == html_names::kListingTag) {
654     ProcessFakePEndTagIfPInButtonScope();
655     tree_.InsertHTMLElement(token);
656     should_skip_leading_newline_ = true;
657     frameset_ok_ = false;
658     return;
659   }
660   if (token->GetName() == html_names::kFormTag) {
661     if (tree_.IsFormElementPointerNonNull() && !IsParsingTemplateContents()) {
662       ParseError(token);
663       UseCounter::Count(tree_.CurrentNode()->GetDocument(),
664                         WebFeature::kHTMLParseErrorNestedForm);
665       return;
666     }
667     ProcessFakePEndTagIfPInButtonScope();
668     tree_.InsertHTMLFormElement(token);
669     return;
670   }
671   if (token->GetName() == html_names::kLiTag) {
672     ProcessCloseWhenNestedTag<IsLi>(token);
673     return;
674   }
675   if (token->GetName() == html_names::kDdTag ||
676       token->GetName() == html_names::kDtTag) {
677     ProcessCloseWhenNestedTag<IsDdOrDt>(token);
678     return;
679   }
680   if (token->GetName() == html_names::kPlaintextTag) {
681     ProcessFakePEndTagIfPInButtonScope();
682     tree_.InsertHTMLElement(token);
683     if (parser_->Tokenizer())
684       parser_->Tokenizer()->SetState(HTMLTokenizer::kPLAINTEXTState);
685     return;
686   }
687   if (token->GetName() == html_names::kButtonTag) {
688     if (tree_.OpenElements()->InScope(html_names::kButtonTag)) {
689       ParseError(token);
690       ProcessFakeEndTag(html_names::kButtonTag);
691       ProcessStartTag(token);  // FIXME: Could we just fall through here?
692       return;
693     }
694     tree_.ReconstructTheActiveFormattingElements();
695     tree_.InsertHTMLElement(token);
696     frameset_ok_ = false;
697     return;
698   }
699   if (token->GetName() == html_names::kATag) {
700     Element* active_a_tag =
701         tree_.ActiveFormattingElements()->ClosestElementInScopeWithName(
702             html_names::kATag.LocalName());
703     if (active_a_tag) {
704       ParseError(token);
705       ProcessFakeEndTag(html_names::kATag);
706       tree_.ActiveFormattingElements()->Remove(active_a_tag);
707       if (tree_.OpenElements()->Contains(active_a_tag))
708         tree_.OpenElements()->Remove(active_a_tag);
709     }
710     tree_.ReconstructTheActiveFormattingElements();
711     tree_.InsertFormattingElement(token);
712     return;
713   }
714   if (IsNonAnchorNonNobrFormattingTag(token->GetName())) {
715     tree_.ReconstructTheActiveFormattingElements();
716     tree_.InsertFormattingElement(token);
717     return;
718   }
719   if (token->GetName() == html_names::kNobrTag) {
720     tree_.ReconstructTheActiveFormattingElements();
721     if (tree_.OpenElements()->InScope(html_names::kNobrTag)) {
722       ParseError(token);
723       ProcessFakeEndTag(html_names::kNobrTag);
724       tree_.ReconstructTheActiveFormattingElements();
725     }
726     tree_.InsertFormattingElement(token);
727     return;
728   }
729   if (token->GetName() == html_names::kAppletTag ||
730       token->GetName() == html_names::kEmbedTag ||
731       token->GetName() == html_names::kObjectTag) {
732     if (!PluginContentIsAllowed(tree_.GetParserContentPolicy()))
733       return;
734   }
735   if (token->GetName() == html_names::kAppletTag ||
736       token->GetName() == html_names::kMarqueeTag ||
737       token->GetName() == html_names::kObjectTag) {
738     tree_.ReconstructTheActiveFormattingElements();
739     tree_.InsertHTMLElement(token);
740     tree_.ActiveFormattingElements()->AppendMarker();
741     frameset_ok_ = false;
742     return;
743   }
744   if (token->GetName() == html_names::kTableTag) {
745     if (!tree_.InQuirksMode() &&
746         tree_.OpenElements()->InButtonScope(html_names::kPTag))
747       ProcessFakeEndTag(html_names::kPTag);
748     tree_.InsertHTMLElement(token);
749     frameset_ok_ = false;
750     SetInsertionMode(kInTableMode);
751     return;
752   }
753   if (token->GetName() == html_names::kImageTag) {
754     ParseError(token);
755     // Apparently we're not supposed to ask.
756     token->SetName(html_names::kImgTag.LocalName());
757     // Note the fall through to the kImgTag handling below!
758   }
759   if (token->GetName() == html_names::kAreaTag ||
760       token->GetName() == html_names::kBrTag ||
761       token->GetName() == html_names::kEmbedTag ||
762       token->GetName() == html_names::kImgTag ||
763       token->GetName() == html_names::kKeygenTag ||
764       token->GetName() == html_names::kWbrTag) {
765     tree_.ReconstructTheActiveFormattingElements();
766     tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
767     frameset_ok_ = false;
768     return;
769   }
770   if (token->GetName() == html_names::kInputTag) {
771     // Per spec https://html.spec.whatwg.org/C/#parsing-main-inbody,
772     // section "A start tag whose tag name is "input""
773 
774     Attribute* type_attribute = token->GetAttributeItem(html_names::kTypeAttr);
775     bool disable_frameset =
776         !type_attribute ||
777         !EqualIgnoringASCIICase(type_attribute->Value(), "hidden");
778 
779     tree_.ReconstructTheActiveFormattingElements();
780     tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
781 
782     if (disable_frameset)
783       frameset_ok_ = false;
784     return;
785   }
786   if (token->GetName() == html_names::kParamTag ||
787       token->GetName() == html_names::kSourceTag ||
788       token->GetName() == html_names::kTrackTag) {
789     tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
790     return;
791   }
792   if (token->GetName() == html_names::kHrTag) {
793     ProcessFakePEndTagIfPInButtonScope();
794     tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
795     frameset_ok_ = false;
796     return;
797   }
798   if (token->GetName() == html_names::kTextareaTag) {
799     tree_.InsertHTMLElement(token);
800     should_skip_leading_newline_ = true;
801     if (parser_->Tokenizer())
802       parser_->Tokenizer()->SetState(HTMLTokenizer::kRCDATAState);
803     original_insertion_mode_ = insertion_mode_;
804     frameset_ok_ = false;
805     SetInsertionMode(kTextMode);
806     return;
807   }
808   if (token->GetName() == html_names::kXmpTag) {
809     ProcessFakePEndTagIfPInButtonScope();
810     tree_.ReconstructTheActiveFormattingElements();
811     frameset_ok_ = false;
812     ProcessGenericRawTextStartTag(token);
813     return;
814   }
815   if (token->GetName() == html_names::kIFrameTag) {
816     frameset_ok_ = false;
817     ProcessGenericRawTextStartTag(token);
818     return;
819   }
820   if (token->GetName() == html_names::kNoembedTag) {
821     ProcessGenericRawTextStartTag(token);
822     return;
823   }
824   if (token->GetName() == html_names::kNoscriptTag && options_.scripting_flag) {
825     ProcessGenericRawTextStartTag(token);
826     return;
827   }
828   if (token->GetName() == html_names::kSelectTag) {
829     tree_.ReconstructTheActiveFormattingElements();
830     tree_.InsertHTMLElement(token);
831     frameset_ok_ = false;
832     if (insertion_mode_ == kInTableMode || insertion_mode_ == kInCaptionMode ||
833         insertion_mode_ == kInColumnGroupMode ||
834         insertion_mode_ == kInTableBodyMode || insertion_mode_ == kInRowMode ||
835         insertion_mode_ == kInCellMode)
836       SetInsertionMode(kInSelectInTableMode);
837     else
838       SetInsertionMode(kInSelectMode);
839     return;
840   }
841   if (token->GetName() == html_names::kOptgroupTag ||
842       token->GetName() == html_names::kOptionTag) {
843     if (tree_.CurrentStackItem()->HasTagName(html_names::kOptionTag)) {
844       AtomicHTMLToken end_option(HTMLToken::kEndTag,
845                                  html_names::kOptionTag.LocalName());
846       ProcessEndTag(&end_option);
847     }
848     tree_.ReconstructTheActiveFormattingElements();
849     tree_.InsertHTMLElement(token);
850     return;
851   }
852   if (token->GetName() == html_names::kRbTag ||
853       token->GetName() == html_names::kRTCTag) {
854     if (tree_.OpenElements()->InScope(html_names::kRubyTag.LocalName())) {
855       tree_.GenerateImpliedEndTags();
856       if (!tree_.CurrentStackItem()->HasTagName(html_names::kRubyTag))
857         ParseError(token);
858     }
859     tree_.InsertHTMLElement(token);
860     return;
861   }
862   if (token->GetName() == html_names::kRtTag ||
863       token->GetName() == html_names::kRpTag) {
864     if (tree_.OpenElements()->InScope(html_names::kRubyTag.LocalName())) {
865       tree_.GenerateImpliedEndTagsWithExclusion(
866           html_names::kRTCTag.LocalName());
867       if (!tree_.CurrentStackItem()->HasTagName(html_names::kRubyTag) &&
868           !tree_.CurrentStackItem()->HasTagName(html_names::kRTCTag))
869         ParseError(token);
870     }
871     tree_.InsertHTMLElement(token);
872     return;
873   }
874   if (token->GetName() == mathml_names::kMathTag.LocalName()) {
875     tree_.ReconstructTheActiveFormattingElements();
876     AdjustMathMLAttributes(token);
877     AdjustForeignAttributes(token);
878     tree_.InsertForeignElement(token, mathml_names::kNamespaceURI);
879     return;
880   }
881   if (token->GetName() == svg_names::kSVGTag.LocalName()) {
882     tree_.ReconstructTheActiveFormattingElements();
883     AdjustSVGAttributes(token);
884     AdjustForeignAttributes(token);
885     tree_.InsertForeignElement(token, svg_names::kNamespaceURI);
886     return;
887   }
888   if (IsCaptionColOrColgroupTag(token->GetName()) ||
889       token->GetName() == html_names::kFrameTag ||
890       token->GetName() == html_names::kHeadTag ||
891       IsTableBodyContextTag(token->GetName()) ||
892       IsTableCellContextTag(token->GetName()) ||
893       token->GetName() == html_names::kTrTag) {
894     ParseError(token);
895     return;
896   }
897   tree_.ReconstructTheActiveFormattingElements();
898   tree_.InsertHTMLElement(token);
899 }
900 
ProcessTemplateStartTag(AtomicHTMLToken * token)901 void HTMLTreeBuilder::ProcessTemplateStartTag(AtomicHTMLToken* token) {
902   tree_.ActiveFormattingElements()->AppendMarker();
903 
904   DeclarativeShadowRootType declarative_shadow_root_type(
905       DeclarativeShadowRootType::kNone);
906   if (RuntimeEnabledFeatures::DeclarativeShadowDOMEnabled(
907           tree_.CurrentNode()->GetExecutionContext()) &&
908       allow_shadow_root_) {
909     if (Attribute* type_attribute =
910             token->GetAttributeItem(html_names::kShadowrootAttr)) {
911       String shadow_mode = type_attribute->Value();
912       if (EqualIgnoringASCIICase(shadow_mode, "open")) {
913         declarative_shadow_root_type = DeclarativeShadowRootType::kOpen;
914       } else if (EqualIgnoringASCIICase(shadow_mode, "closed")) {
915         declarative_shadow_root_type = DeclarativeShadowRootType::kClosed;
916       } else {
917         tree_.OwnerDocumentForCurrentNode().AddConsoleMessage(
918             MakeGarbageCollected<ConsoleMessage>(
919                 mojom::blink::ConsoleMessageSource::kOther,
920                 mojom::blink::ConsoleMessageLevel::kWarning,
921                 "Invalid declarative shadowroot attribute value \"" +
922                     shadow_mode +
923                     "\". Valid values include \"open\" and \"closed\"."));
924       }
925     }
926   }
927   tree_.InsertHTMLTemplateElement(token, declarative_shadow_root_type);
928   frameset_ok_ = false;
929   template_insertion_modes_.push_back(kTemplateContentsMode);
930   SetInsertionMode(kTemplateContentsMode);
931 }
932 
ProcessTemplateEndTag(AtomicHTMLToken * token)933 bool HTMLTreeBuilder::ProcessTemplateEndTag(AtomicHTMLToken* token) {
934   DCHECK_EQ(token->GetName(), html_names::kTemplateTag.LocalName());
935   if (!tree_.OpenElements()->HasTemplateInHTMLScope()) {
936     DCHECK(template_insertion_modes_.IsEmpty() ||
937            (template_insertion_modes_.size() == 1 &&
938             IsA<HTMLTemplateElement>(fragment_context_.ContextElement())));
939     ParseError(token);
940     return false;
941   }
942   tree_.GenerateImpliedEndTags();
943   if (!tree_.CurrentStackItem()->HasTagName(html_names::kTemplateTag))
944     ParseError(token);
945   tree_.OpenElements()->PopUntil(html_names::kTemplateTag.LocalName());
946   HTMLStackItem* template_stack_item =
947       tree_.OpenElements()->TopRecord()->StackItem();
948   tree_.OpenElements()->Pop();
949   HTMLStackItem* shadow_host_stack_item =
950       tree_.OpenElements()->TopRecord()->StackItem();
951   tree_.ActiveFormattingElements()->ClearToLastMarker();
952   template_insertion_modes_.pop_back();
953   ResetInsertionModeAppropriately();
954   if (RuntimeEnabledFeatures::DeclarativeShadowDOMEnabled(
955           shadow_host_stack_item->GetNode()->GetExecutionContext()) &&
956       template_stack_item) {
957     DCHECK(template_stack_item->IsElementNode());
958     HTMLTemplateElement* template_element =
959         DynamicTo<HTMLTemplateElement>(template_stack_item->GetElement());
960     // 9. If the start tag for the declarative template element did not have an
961     // attribute with the name "shadowroot" whose value was an ASCII
962     // case-insensitive match for the strings "open" or "closed", then stop this
963     // algorithm.
964     if (template_element->IsDeclarativeShadowRoot()) {
965       if (shadow_host_stack_item->GetNode() ==
966           tree_.OpenElements()->RootNode()) {
967         // 10. If the adjusted current node is the topmost element in the stack
968         // of open elements, then stop this algorithm.
969         template_element->SetDeclarativeShadowRootType(
970             DeclarativeShadowRootType::kNone);
971       } else {
972         DCHECK(shadow_host_stack_item);
973         DCHECK(shadow_host_stack_item->IsElementNode());
974         bool delegates_focus = template_stack_item->GetAttributeItem(
975             html_names::kShadowrootdelegatesfocusAttr);
976         // TODO(crbug.com/1063157): Add an attribute for imperative slot
977         // assignment.
978         bool manual_slotting = false;
979         shadow_host_stack_item->GetElement()->AttachDeclarativeShadowRoot(
980             template_element,
981             template_element->GetDeclarativeShadowRootType() ==
982                     DeclarativeShadowRootType::kOpen
983                 ? ShadowRootType::kOpen
984                 : ShadowRootType::kClosed,
985             delegates_focus ? FocusDelegation::kDelegateFocus
986                             : FocusDelegation::kNone,
987             manual_slotting ? SlotAssignmentMode::kManual
988                             : SlotAssignmentMode::kAuto);
989       }
990     }
991   }
992   return true;
993 }
994 
ProcessEndOfFileForInTemplateContents(AtomicHTMLToken * token)995 bool HTMLTreeBuilder::ProcessEndOfFileForInTemplateContents(
996     AtomicHTMLToken* token) {
997   AtomicHTMLToken end_template(HTMLToken::kEndTag,
998                                html_names::kTemplateTag.LocalName());
999   if (!ProcessTemplateEndTag(&end_template))
1000     return false;
1001 
1002   ProcessEndOfFile(token);
1003   return true;
1004 }
1005 
ProcessColgroupEndTagForInColumnGroup()1006 bool HTMLTreeBuilder::ProcessColgroupEndTagForInColumnGroup() {
1007   if (tree_.CurrentIsRootNode() ||
1008       IsA<HTMLTemplateElement>(*tree_.CurrentNode())) {
1009     DCHECK(IsParsingFragmentOrTemplateContents());
1010     // FIXME: parse error
1011     return false;
1012   }
1013   tree_.OpenElements()->Pop();
1014   SetInsertionMode(kInTableMode);
1015   return true;
1016 }
1017 
1018 // http://www.whatwg.org/specs/web-apps/current-work/#adjusted-current-node
AdjustedCurrentStackItem() const1019 HTMLStackItem* HTMLTreeBuilder::AdjustedCurrentStackItem() const {
1020   DCHECK(!tree_.IsEmpty());
1021   if (IsParsingFragment() && tree_.OpenElements()->HasOnlyOneElement())
1022     return fragment_context_.ContextElementStackItem();
1023 
1024   return tree_.CurrentStackItem();
1025 }
1026 
1027 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
CloseTheCell()1028 void HTMLTreeBuilder::CloseTheCell() {
1029   DCHECK_EQ(GetInsertionMode(), kInCellMode);
1030   if (tree_.OpenElements()->InTableScope(html_names::kTdTag)) {
1031     DCHECK(!tree_.OpenElements()->InTableScope(html_names::kThTag));
1032     ProcessFakeEndTag(html_names::kTdTag);
1033     return;
1034   }
1035   DCHECK(tree_.OpenElements()->InTableScope(html_names::kThTag));
1036   ProcessFakeEndTag(html_names::kThTag);
1037   DCHECK_EQ(GetInsertionMode(), kInRowMode);
1038 }
1039 
ProcessStartTagForInTable(AtomicHTMLToken * token)1040 void HTMLTreeBuilder::ProcessStartTagForInTable(AtomicHTMLToken* token) {
1041   DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
1042   if (token->GetName() == html_names::kCaptionTag) {
1043     tree_.OpenElements()->PopUntilTableScopeMarker();
1044     tree_.ActiveFormattingElements()->AppendMarker();
1045     tree_.InsertHTMLElement(token);
1046     SetInsertionMode(kInCaptionMode);
1047     return;
1048   }
1049   if (token->GetName() == html_names::kColgroupTag) {
1050     tree_.OpenElements()->PopUntilTableScopeMarker();
1051     tree_.InsertHTMLElement(token);
1052     SetInsertionMode(kInColumnGroupMode);
1053     return;
1054   }
1055   if (token->GetName() == html_names::kColTag) {
1056     ProcessFakeStartTag(html_names::kColgroupTag);
1057     DCHECK(kInColumnGroupMode);
1058     ProcessStartTag(token);
1059     return;
1060   }
1061   if (IsTableBodyContextTag(token->GetName())) {
1062     tree_.OpenElements()->PopUntilTableScopeMarker();
1063     tree_.InsertHTMLElement(token);
1064     SetInsertionMode(kInTableBodyMode);
1065     return;
1066   }
1067   if (IsTableCellContextTag(token->GetName()) ||
1068       token->GetName() == html_names::kTrTag) {
1069     ProcessFakeStartTag(html_names::kTbodyTag);
1070     DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
1071     ProcessStartTag(token);
1072     return;
1073   }
1074   if (token->GetName() == html_names::kTableTag) {
1075     ParseError(token);
1076     if (!ProcessTableEndTagForInTable()) {
1077       DCHECK(IsParsingFragmentOrTemplateContents());
1078       return;
1079     }
1080     ProcessStartTag(token);
1081     return;
1082   }
1083   if (token->GetName() == html_names::kStyleTag ||
1084       token->GetName() == html_names::kScriptTag) {
1085     ProcessStartTagForInHead(token);
1086     return;
1087   }
1088   if (token->GetName() == html_names::kInputTag) {
1089     Attribute* type_attribute = token->GetAttributeItem(html_names::kTypeAttr);
1090     if (type_attribute &&
1091         EqualIgnoringASCIICase(type_attribute->Value(), "hidden")) {
1092       ParseError(token);
1093       tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
1094       return;
1095     }
1096     // Fall through to "anything else" case.
1097   }
1098   if (token->GetName() == html_names::kFormTag) {
1099     ParseError(token);
1100     if (tree_.IsFormElementPointerNonNull() && !IsParsingTemplateContents())
1101       return;
1102     tree_.InsertHTMLFormElement(token, true);
1103     tree_.OpenElements()->Pop();
1104     return;
1105   }
1106   if (token->GetName() == html_names::kTemplateTag) {
1107     ProcessTemplateStartTag(token);
1108     return;
1109   }
1110   ParseError(token);
1111   HTMLConstructionSite::RedirectToFosterParentGuard redirecter(tree_);
1112   ProcessStartTagForInBody(token);
1113 }
1114 
ProcessStartTag(AtomicHTMLToken * token)1115 void HTMLTreeBuilder::ProcessStartTag(AtomicHTMLToken* token) {
1116   DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
1117   switch (GetInsertionMode()) {
1118     case kInitialMode:
1119       DCHECK_EQ(GetInsertionMode(), kInitialMode);
1120       DefaultForInitial();
1121       FALLTHROUGH;
1122     case kBeforeHTMLMode:
1123       DCHECK_EQ(GetInsertionMode(), kBeforeHTMLMode);
1124       if (token->GetName() == html_names::kHTMLTag) {
1125         tree_.InsertHTMLHtmlStartTagBeforeHTML(token);
1126         SetInsertionMode(kBeforeHeadMode);
1127         return;
1128       }
1129       DefaultForBeforeHTML();
1130       FALLTHROUGH;
1131     case kBeforeHeadMode:
1132       DCHECK_EQ(GetInsertionMode(), kBeforeHeadMode);
1133       if (token->GetName() == html_names::kHTMLTag) {
1134         ProcessHtmlStartTagForInBody(token);
1135         return;
1136       }
1137       if (token->GetName() == html_names::kHeadTag) {
1138         tree_.InsertHTMLHeadElement(token);
1139         SetInsertionMode(kInHeadMode);
1140         return;
1141       }
1142       DefaultForBeforeHead();
1143       FALLTHROUGH;
1144     case kInHeadMode:
1145       DCHECK_EQ(GetInsertionMode(), kInHeadMode);
1146       if (ProcessStartTagForInHead(token))
1147         return;
1148       DefaultForInHead();
1149       FALLTHROUGH;
1150     case kAfterHeadMode:
1151       DCHECK_EQ(GetInsertionMode(), kAfterHeadMode);
1152       if (token->GetName() == html_names::kHTMLTag) {
1153         ProcessHtmlStartTagForInBody(token);
1154         return;
1155       }
1156       if (token->GetName() == html_names::kBodyTag) {
1157         frameset_ok_ = false;
1158         tree_.InsertHTMLBodyElement(token);
1159         SetInsertionMode(kInBodyMode);
1160         return;
1161       }
1162       if (token->GetName() == html_names::kFramesetTag) {
1163         tree_.InsertHTMLElement(token);
1164         SetInsertionMode(kInFramesetMode);
1165         return;
1166       }
1167       if (token->GetName() == html_names::kBaseTag ||
1168           token->GetName() == html_names::kBasefontTag ||
1169           token->GetName() == html_names::kBgsoundTag ||
1170           token->GetName() == html_names::kLinkTag ||
1171           token->GetName() == html_names::kMetaTag ||
1172           token->GetName() == html_names::kNoframesTag ||
1173           token->GetName() == html_names::kScriptTag ||
1174           token->GetName() == html_names::kStyleTag ||
1175           token->GetName() == html_names::kTemplateTag ||
1176           token->GetName() == html_names::kTitleTag) {
1177         ParseError(token);
1178         DCHECK(tree_.Head());
1179         tree_.OpenElements()->PushHTMLHeadElement(tree_.HeadStackItem());
1180         ProcessStartTagForInHead(token);
1181         tree_.OpenElements()->RemoveHTMLHeadElement(tree_.Head());
1182         return;
1183       }
1184       if (token->GetName() == html_names::kHeadTag) {
1185         ParseError(token);
1186         return;
1187       }
1188       DefaultForAfterHead();
1189       FALLTHROUGH;
1190     case kInBodyMode:
1191       DCHECK_EQ(GetInsertionMode(), kInBodyMode);
1192       ProcessStartTagForInBody(token);
1193       break;
1194     case kInTableMode:
1195       DCHECK_EQ(GetInsertionMode(), kInTableMode);
1196       ProcessStartTagForInTable(token);
1197       break;
1198     case kInCaptionMode:
1199       DCHECK_EQ(GetInsertionMode(), kInCaptionMode);
1200       if (IsCaptionColOrColgroupTag(token->GetName()) ||
1201           IsTableBodyContextTag(token->GetName()) ||
1202           IsTableCellContextTag(token->GetName()) ||
1203           token->GetName() == html_names::kTrTag) {
1204         ParseError(token);
1205         if (!ProcessCaptionEndTagForInCaption()) {
1206           DCHECK(IsParsingFragment());
1207           return;
1208         }
1209         ProcessStartTag(token);
1210         return;
1211       }
1212       ProcessStartTagForInBody(token);
1213       break;
1214     case kInColumnGroupMode:
1215       DCHECK_EQ(GetInsertionMode(), kInColumnGroupMode);
1216       if (token->GetName() == html_names::kHTMLTag) {
1217         ProcessHtmlStartTagForInBody(token);
1218         return;
1219       }
1220       if (token->GetName() == html_names::kColTag) {
1221         tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
1222         return;
1223       }
1224       if (token->GetName() == html_names::kTemplateTag) {
1225         ProcessTemplateStartTag(token);
1226         return;
1227       }
1228       if (!ProcessColgroupEndTagForInColumnGroup()) {
1229         DCHECK(IsParsingFragmentOrTemplateContents());
1230         return;
1231       }
1232       ProcessStartTag(token);
1233       break;
1234     case kInTableBodyMode:
1235       DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
1236       if (token->GetName() == html_names::kTrTag) {
1237         // How is there ever anything to pop?
1238         tree_.OpenElements()->PopUntilTableBodyScopeMarker();
1239         tree_.InsertHTMLElement(token);
1240         SetInsertionMode(kInRowMode);
1241         return;
1242       }
1243       if (IsTableCellContextTag(token->GetName())) {
1244         ParseError(token);
1245         ProcessFakeStartTag(html_names::kTrTag);
1246         DCHECK_EQ(GetInsertionMode(), kInRowMode);
1247         ProcessStartTag(token);
1248         return;
1249       }
1250       if (IsCaptionColOrColgroupTag(token->GetName()) ||
1251           IsTableBodyContextTag(token->GetName())) {
1252         // FIXME: This is slow.
1253         if (!tree_.OpenElements()->InTableScope(html_names::kTbodyTag) &&
1254             !tree_.OpenElements()->InTableScope(html_names::kTheadTag) &&
1255             !tree_.OpenElements()->InTableScope(html_names::kTfootTag)) {
1256           DCHECK(IsParsingFragmentOrTemplateContents());
1257           ParseError(token);
1258           return;
1259         }
1260         tree_.OpenElements()->PopUntilTableBodyScopeMarker();
1261         DCHECK(IsTableBodyContextTag(tree_.CurrentStackItem()->LocalName()));
1262         ProcessFakeEndTag(tree_.CurrentStackItem()->LocalName());
1263         ProcessStartTag(token);
1264         return;
1265       }
1266       ProcessStartTagForInTable(token);
1267       break;
1268     case kInRowMode:
1269       DCHECK_EQ(GetInsertionMode(), kInRowMode);
1270       if (IsTableCellContextTag(token->GetName())) {
1271         tree_.OpenElements()->PopUntilTableRowScopeMarker();
1272         tree_.InsertHTMLElement(token);
1273         SetInsertionMode(kInCellMode);
1274         tree_.ActiveFormattingElements()->AppendMarker();
1275         return;
1276       }
1277       if (token->GetName() == html_names::kTrTag ||
1278           IsCaptionColOrColgroupTag(token->GetName()) ||
1279           IsTableBodyContextTag(token->GetName())) {
1280         if (!ProcessTrEndTagForInRow()) {
1281           DCHECK(IsParsingFragmentOrTemplateContents());
1282           return;
1283         }
1284         DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
1285         ProcessStartTag(token);
1286         return;
1287       }
1288       ProcessStartTagForInTable(token);
1289       break;
1290     case kInCellMode:
1291       DCHECK_EQ(GetInsertionMode(), kInCellMode);
1292       if (IsCaptionColOrColgroupTag(token->GetName()) ||
1293           IsTableCellContextTag(token->GetName()) ||
1294           token->GetName() == html_names::kTrTag ||
1295           IsTableBodyContextTag(token->GetName())) {
1296         // FIXME: This could be more efficient.
1297         if (!tree_.OpenElements()->InTableScope(html_names::kTdTag) &&
1298             !tree_.OpenElements()->InTableScope(html_names::kThTag)) {
1299           DCHECK(IsParsingFragment());
1300           ParseError(token);
1301           return;
1302         }
1303         CloseTheCell();
1304         ProcessStartTag(token);
1305         return;
1306       }
1307       ProcessStartTagForInBody(token);
1308       break;
1309     case kAfterBodyMode:
1310     case kAfterAfterBodyMode:
1311       DCHECK(GetInsertionMode() == kAfterBodyMode ||
1312              GetInsertionMode() == kAfterAfterBodyMode);
1313       if (token->GetName() == html_names::kHTMLTag) {
1314         ProcessHtmlStartTagForInBody(token);
1315         return;
1316       }
1317       SetInsertionMode(kInBodyMode);
1318       ProcessStartTag(token);
1319       break;
1320     case kInHeadNoscriptMode:
1321       DCHECK_EQ(GetInsertionMode(), kInHeadNoscriptMode);
1322       if (token->GetName() == html_names::kHTMLTag) {
1323         ProcessHtmlStartTagForInBody(token);
1324         return;
1325       }
1326       if (token->GetName() == html_names::kBasefontTag ||
1327           token->GetName() == html_names::kBgsoundTag ||
1328           token->GetName() == html_names::kLinkTag ||
1329           token->GetName() == html_names::kMetaTag ||
1330           token->GetName() == html_names::kNoframesTag ||
1331           token->GetName() == html_names::kStyleTag) {
1332         bool did_process = ProcessStartTagForInHead(token);
1333         DCHECK(did_process);
1334         return;
1335       }
1336       if (token->GetName() == html_names::kHTMLTag ||
1337           token->GetName() == html_names::kNoscriptTag) {
1338         ParseError(token);
1339         return;
1340       }
1341       DefaultForInHeadNoscript();
1342       ProcessToken(token);
1343       break;
1344     case kInFramesetMode:
1345       DCHECK_EQ(GetInsertionMode(), kInFramesetMode);
1346       if (token->GetName() == html_names::kHTMLTag) {
1347         ProcessHtmlStartTagForInBody(token);
1348         return;
1349       }
1350       if (token->GetName() == html_names::kFramesetTag) {
1351         tree_.InsertHTMLElement(token);
1352         return;
1353       }
1354       if (token->GetName() == html_names::kFrameTag) {
1355         tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
1356         return;
1357       }
1358       if (token->GetName() == html_names::kNoframesTag) {
1359         ProcessStartTagForInHead(token);
1360         return;
1361       }
1362       ParseError(token);
1363       break;
1364     case kAfterFramesetMode:
1365     case kAfterAfterFramesetMode:
1366       DCHECK(GetInsertionMode() == kAfterFramesetMode ||
1367              GetInsertionMode() == kAfterAfterFramesetMode);
1368       if (token->GetName() == html_names::kHTMLTag) {
1369         ProcessHtmlStartTagForInBody(token);
1370         return;
1371       }
1372       if (token->GetName() == html_names::kNoframesTag) {
1373         ProcessStartTagForInHead(token);
1374         return;
1375       }
1376       ParseError(token);
1377       break;
1378     case kInSelectInTableMode:
1379       DCHECK_EQ(GetInsertionMode(), kInSelectInTableMode);
1380       if (token->GetName() == html_names::kCaptionTag ||
1381           token->GetName() == html_names::kTableTag ||
1382           IsTableBodyContextTag(token->GetName()) ||
1383           token->GetName() == html_names::kTrTag ||
1384           IsTableCellContextTag(token->GetName())) {
1385         ParseError(token);
1386         AtomicHTMLToken end_select(HTMLToken::kEndTag,
1387                                    html_names::kSelectTag.LocalName());
1388         ProcessEndTag(&end_select);
1389         ProcessStartTag(token);
1390         return;
1391       }
1392       FALLTHROUGH;
1393     case kInSelectMode:
1394       DCHECK(GetInsertionMode() == kInSelectMode ||
1395              GetInsertionMode() == kInSelectInTableMode);
1396       if (token->GetName() == html_names::kHTMLTag) {
1397         ProcessHtmlStartTagForInBody(token);
1398         return;
1399       }
1400       if (token->GetName() == html_names::kOptionTag) {
1401         if (tree_.CurrentStackItem()->HasTagName(html_names::kOptionTag)) {
1402           AtomicHTMLToken end_option(HTMLToken::kEndTag,
1403                                      html_names::kOptionTag.LocalName());
1404           ProcessEndTag(&end_option);
1405         }
1406         tree_.InsertHTMLElement(token);
1407         return;
1408       }
1409       if (token->GetName() == html_names::kOptgroupTag) {
1410         if (tree_.CurrentStackItem()->HasTagName(html_names::kOptionTag)) {
1411           AtomicHTMLToken end_option(HTMLToken::kEndTag,
1412                                      html_names::kOptionTag.LocalName());
1413           ProcessEndTag(&end_option);
1414         }
1415         if (tree_.CurrentStackItem()->HasTagName(html_names::kOptgroupTag)) {
1416           AtomicHTMLToken end_optgroup(HTMLToken::kEndTag,
1417                                        html_names::kOptgroupTag.LocalName());
1418           ProcessEndTag(&end_optgroup);
1419         }
1420         tree_.InsertHTMLElement(token);
1421         return;
1422       }
1423       if (token->GetName() == html_names::kSelectTag) {
1424         ParseError(token);
1425         AtomicHTMLToken end_select(HTMLToken::kEndTag,
1426                                    html_names::kSelectTag.LocalName());
1427         ProcessEndTag(&end_select);
1428         return;
1429       }
1430       if (token->GetName() == html_names::kInputTag ||
1431           token->GetName() == html_names::kKeygenTag ||
1432           token->GetName() == html_names::kTextareaTag) {
1433         ParseError(token);
1434         if (!tree_.OpenElements()->InSelectScope(html_names::kSelectTag)) {
1435           DCHECK(IsParsingFragment());
1436           return;
1437         }
1438         AtomicHTMLToken end_select(HTMLToken::kEndTag,
1439                                    html_names::kSelectTag.LocalName());
1440         ProcessEndTag(&end_select);
1441         ProcessStartTag(token);
1442         return;
1443       }
1444       if (token->GetName() == html_names::kScriptTag) {
1445         bool did_process = ProcessStartTagForInHead(token);
1446         DCHECK(did_process);
1447         return;
1448       }
1449       if (token->GetName() == html_names::kTemplateTag) {
1450         ProcessTemplateStartTag(token);
1451         return;
1452       }
1453       break;
1454     case kInTableTextMode:
1455       DefaultForInTableText();
1456       ProcessStartTag(token);
1457       break;
1458     case kTextMode:
1459       NOTREACHED();
1460       break;
1461     case kTemplateContentsMode:
1462       if (token->GetName() == html_names::kTemplateTag) {
1463         ProcessTemplateStartTag(token);
1464         return;
1465       }
1466 
1467       if (token->GetName() == html_names::kLinkTag ||
1468           token->GetName() == html_names::kScriptTag ||
1469           token->GetName() == html_names::kStyleTag ||
1470           token->GetName() == html_names::kMetaTag) {
1471         ProcessStartTagForInHead(token);
1472         return;
1473       }
1474 
1475       InsertionMode insertion_mode = kTemplateContentsMode;
1476       if (token->GetName() == html_names::kColTag)
1477         insertion_mode = kInColumnGroupMode;
1478       else if (IsCaptionColOrColgroupTag(token->GetName()) ||
1479                IsTableBodyContextTag(token->GetName()))
1480         insertion_mode = kInTableMode;
1481       else if (token->GetName() == html_names::kTrTag)
1482         insertion_mode = kInTableBodyMode;
1483       else if (IsTableCellContextTag(token->GetName()))
1484         insertion_mode = kInRowMode;
1485       else
1486         insertion_mode = kInBodyMode;
1487 
1488       DCHECK_NE(insertion_mode, kTemplateContentsMode);
1489       DCHECK_EQ(template_insertion_modes_.back(), kTemplateContentsMode);
1490       template_insertion_modes_.back() = insertion_mode;
1491       SetInsertionMode(insertion_mode);
1492 
1493       ProcessStartTag(token);
1494       break;
1495   }
1496 }
1497 
ProcessHtmlStartTagForInBody(AtomicHTMLToken * token)1498 void HTMLTreeBuilder::ProcessHtmlStartTagForInBody(AtomicHTMLToken* token) {
1499   ParseError(token);
1500   if (tree_.OpenElements()->HasTemplateInHTMLScope()) {
1501     DCHECK(IsParsingTemplateContents());
1502     return;
1503   }
1504   tree_.InsertHTMLHtmlStartTagInBody(token);
1505 }
1506 
ProcessBodyEndTagForInBody(AtomicHTMLToken * token)1507 bool HTMLTreeBuilder::ProcessBodyEndTagForInBody(AtomicHTMLToken* token) {
1508   DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1509   DCHECK(token->GetName() == html_names::kBodyTag);
1510   if (!tree_.OpenElements()->InScope(html_names::kBodyTag.LocalName())) {
1511     ParseError(token);
1512     return false;
1513   }
1514   // Emit a more specific parse error based on stack contents.
1515   DVLOG(1) << "Not implmeneted.";
1516   SetInsertionMode(kAfterBodyMode);
1517   return true;
1518 }
1519 
ProcessAnyOtherEndTagForInBody(AtomicHTMLToken * token)1520 void HTMLTreeBuilder::ProcessAnyOtherEndTagForInBody(AtomicHTMLToken* token) {
1521   DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1522   HTMLElementStack::ElementRecord* record = tree_.OpenElements()->TopRecord();
1523   while (1) {
1524     HTMLStackItem* item = record->StackItem();
1525     if (item->MatchesHTMLTag(token->GetName())) {
1526       tree_.GenerateImpliedEndTagsWithExclusion(token->GetName());
1527       if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1528         ParseError(token);
1529       tree_.OpenElements()->PopUntilPopped(item->GetElement());
1530       return;
1531     }
1532     if (item->IsSpecialNode()) {
1533       ParseError(token);
1534       return;
1535     }
1536     record = record->Next();
1537   }
1538 }
1539 
1540 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
CallTheAdoptionAgency(AtomicHTMLToken * token)1541 void HTMLTreeBuilder::CallTheAdoptionAgency(AtomicHTMLToken* token) {
1542   // The adoption agency algorithm is N^2. We limit the number of iterations
1543   // to stop from hanging the whole browser. This limit is specified in the
1544   // adoption agency algorithm:
1545   // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1546   static const int kOuterIterationLimit = 8;
1547   static const int kInnerIterationLimit = 3;
1548 
1549   // 1, 2, 3 and 16 are covered by the for() loop.
1550   for (int i = 0; i < kOuterIterationLimit; ++i) {
1551     // 4.
1552     Element* formatting_element =
1553         tree_.ActiveFormattingElements()->ClosestElementInScopeWithName(
1554             token->GetName());
1555     // 4.a
1556     if (!formatting_element)
1557       return ProcessAnyOtherEndTagForInBody(token);
1558     // 4.c
1559     if ((tree_.OpenElements()->Contains(formatting_element)) &&
1560         !tree_.OpenElements()->InScope(formatting_element)) {
1561       ParseError(token);
1562       // Check the stack of open elements for a more specific parse error.
1563       DVLOG(1) << "Not implemented.";
1564       return;
1565     }
1566     // 4.b
1567     HTMLElementStack::ElementRecord* formatting_element_record =
1568         tree_.OpenElements()->Find(formatting_element);
1569     if (!formatting_element_record) {
1570       ParseError(token);
1571       tree_.ActiveFormattingElements()->Remove(formatting_element);
1572       return;
1573     }
1574     // 4.d
1575     if (formatting_element != tree_.CurrentElement())
1576       ParseError(token);
1577     // 5.
1578     HTMLElementStack::ElementRecord* furthest_block =
1579         tree_.OpenElements()->FurthestBlockForFormattingElement(
1580             formatting_element);
1581     // 6.
1582     if (!furthest_block) {
1583       tree_.OpenElements()->PopUntilPopped(formatting_element);
1584       tree_.ActiveFormattingElements()->Remove(formatting_element);
1585       return;
1586     }
1587     // 7.
1588     DCHECK(furthest_block->IsAbove(formatting_element_record));
1589     HTMLStackItem* common_ancestor =
1590         formatting_element_record->Next()->StackItem();
1591     // 8.
1592     HTMLFormattingElementList::Bookmark bookmark =
1593         tree_.ActiveFormattingElements()->BookmarkFor(formatting_element);
1594     // 9.
1595     HTMLElementStack::ElementRecord* node = furthest_block;
1596     HTMLElementStack::ElementRecord* next_node = node->Next();
1597     HTMLElementStack::ElementRecord* last_node = furthest_block;
1598     // 9.1, 9.2, 9.3 and 9.11 are covered by the for() loop.
1599     for (int j = 0; j < kInnerIterationLimit; ++j) {
1600       // 9.4
1601       node = next_node;
1602       DCHECK(node);
1603       // Save node->next() for the next iteration in case node is deleted in
1604       // 9.5.
1605       next_node = node->Next();
1606       // 9.5
1607       if (!tree_.ActiveFormattingElements()->Contains(node->GetElement())) {
1608         tree_.OpenElements()->Remove(node->GetElement());
1609         node = nullptr;
1610         continue;
1611       }
1612       // 9.6
1613       if (node == formatting_element_record)
1614         break;
1615       // 9.7
1616       HTMLStackItem* new_item =
1617           tree_.CreateElementFromSavedToken(node->StackItem());
1618 
1619       HTMLFormattingElementList::Entry* node_entry =
1620           tree_.ActiveFormattingElements()->Find(node->GetElement());
1621       node_entry->ReplaceElement(new_item);
1622       node->ReplaceElement(new_item);
1623 
1624       // 9.8
1625       if (last_node == furthest_block)
1626         bookmark.MoveToAfter(node_entry);
1627       // 9.9
1628       tree_.Reparent(node, last_node);
1629       // 9.10
1630       last_node = node;
1631     }
1632     // 10.
1633     tree_.InsertAlreadyParsedChild(common_ancestor, last_node);
1634     // 11.
1635     HTMLStackItem* new_item = tree_.CreateElementFromSavedToken(
1636         formatting_element_record->StackItem());
1637     // 12.
1638     tree_.TakeAllChildren(new_item, furthest_block);
1639     // 13.
1640     tree_.Reparent(furthest_block, new_item);
1641     // 14.
1642     tree_.ActiveFormattingElements()->SwapTo(formatting_element, new_item,
1643                                              bookmark);
1644     // 15.
1645     tree_.OpenElements()->Remove(formatting_element);
1646     tree_.OpenElements()->InsertAbove(new_item, furthest_block);
1647   }
1648 }
1649 
ResetInsertionModeAppropriately()1650 void HTMLTreeBuilder::ResetInsertionModeAppropriately() {
1651   // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1652   bool last = false;
1653   HTMLElementStack::ElementRecord* node_record =
1654       tree_.OpenElements()->TopRecord();
1655   while (1) {
1656     HTMLStackItem* item = node_record->StackItem();
1657     if (item->GetNode() == tree_.OpenElements()->RootNode()) {
1658       last = true;
1659       if (IsParsingFragment())
1660         item = fragment_context_.ContextElementStackItem();
1661     }
1662     if (item->HasTagName(html_names::kTemplateTag))
1663       return SetInsertionMode(template_insertion_modes_.back());
1664     if (item->HasTagName(html_names::kSelectTag)) {
1665       if (!last) {
1666         while (item->GetNode() != tree_.OpenElements()->RootNode() &&
1667                !item->HasTagName(html_names::kTemplateTag)) {
1668           node_record = node_record->Next();
1669           item = node_record->StackItem();
1670           if (item->HasTagName(html_names::kTableTag))
1671             return SetInsertionMode(kInSelectInTableMode);
1672         }
1673       }
1674       return SetInsertionMode(kInSelectMode);
1675     }
1676     if (item->HasTagName(html_names::kTdTag) ||
1677         item->HasTagName(html_names::kThTag))
1678       return SetInsertionMode(kInCellMode);
1679     if (item->HasTagName(html_names::kTrTag))
1680       return SetInsertionMode(kInRowMode);
1681     if (item->HasTagName(html_names::kTbodyTag) ||
1682         item->HasTagName(html_names::kTheadTag) ||
1683         item->HasTagName(html_names::kTfootTag))
1684       return SetInsertionMode(kInTableBodyMode);
1685     if (item->HasTagName(html_names::kCaptionTag))
1686       return SetInsertionMode(kInCaptionMode);
1687     if (item->HasTagName(html_names::kColgroupTag)) {
1688       return SetInsertionMode(kInColumnGroupMode);
1689     }
1690     if (item->HasTagName(html_names::kTableTag))
1691       return SetInsertionMode(kInTableMode);
1692     if (item->HasTagName(html_names::kHeadTag)) {
1693       if (!fragment_context_.Fragment() ||
1694           fragment_context_.ContextElement() != item->GetNode())
1695         return SetInsertionMode(kInHeadMode);
1696       return SetInsertionMode(kInBodyMode);
1697     }
1698     if (item->HasTagName(html_names::kBodyTag))
1699       return SetInsertionMode(kInBodyMode);
1700     if (item->HasTagName(html_names::kFramesetTag)) {
1701       return SetInsertionMode(kInFramesetMode);
1702     }
1703     if (item->HasTagName(html_names::kHTMLTag)) {
1704       if (tree_.HeadStackItem())
1705         return SetInsertionMode(kAfterHeadMode);
1706 
1707       DCHECK(IsParsingFragment());
1708       return SetInsertionMode(kBeforeHeadMode);
1709     }
1710     if (last) {
1711       DCHECK(IsParsingFragment());
1712       return SetInsertionMode(kInBodyMode);
1713     }
1714     node_record = node_record->Next();
1715   }
1716 }
1717 
ProcessEndTagForInTableBody(AtomicHTMLToken * token)1718 void HTMLTreeBuilder::ProcessEndTagForInTableBody(AtomicHTMLToken* token) {
1719   DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1720   if (IsTableBodyContextTag(token->GetName())) {
1721     if (!tree_.OpenElements()->InTableScope(token->GetName())) {
1722       ParseError(token);
1723       return;
1724     }
1725     tree_.OpenElements()->PopUntilTableBodyScopeMarker();
1726     tree_.OpenElements()->Pop();
1727     SetInsertionMode(kInTableMode);
1728     return;
1729   }
1730   if (token->GetName() == html_names::kTableTag) {
1731     // FIXME: This is slow.
1732     if (!tree_.OpenElements()->InTableScope(html_names::kTbodyTag) &&
1733         !tree_.OpenElements()->InTableScope(html_names::kTheadTag) &&
1734         !tree_.OpenElements()->InTableScope(html_names::kTfootTag)) {
1735       DCHECK(IsParsingFragmentOrTemplateContents());
1736       ParseError(token);
1737       return;
1738     }
1739     tree_.OpenElements()->PopUntilTableBodyScopeMarker();
1740     DCHECK(IsTableBodyContextTag(tree_.CurrentStackItem()->LocalName()));
1741     ProcessFakeEndTag(tree_.CurrentStackItem()->LocalName());
1742     ProcessEndTag(token);
1743     return;
1744   }
1745   if (token->GetName() == html_names::kBodyTag ||
1746       IsCaptionColOrColgroupTag(token->GetName()) ||
1747       token->GetName() == html_names::kHTMLTag ||
1748       IsTableCellContextTag(token->GetName()) ||
1749       token->GetName() == html_names::kTrTag) {
1750     ParseError(token);
1751     return;
1752   }
1753   ProcessEndTagForInTable(token);
1754 }
1755 
ProcessEndTagForInRow(AtomicHTMLToken * token)1756 void HTMLTreeBuilder::ProcessEndTagForInRow(AtomicHTMLToken* token) {
1757   DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1758   if (token->GetName() == html_names::kTrTag) {
1759     ProcessTrEndTagForInRow();
1760     return;
1761   }
1762   if (token->GetName() == html_names::kTableTag) {
1763     if (!ProcessTrEndTagForInRow()) {
1764       DCHECK(IsParsingFragmentOrTemplateContents());
1765       return;
1766     }
1767     DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
1768     ProcessEndTag(token);
1769     return;
1770   }
1771   if (IsTableBodyContextTag(token->GetName())) {
1772     if (!tree_.OpenElements()->InTableScope(token->GetName())) {
1773       ParseError(token);
1774       return;
1775     }
1776     ProcessFakeEndTag(html_names::kTrTag);
1777     DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
1778     ProcessEndTag(token);
1779     return;
1780   }
1781   if (token->GetName() == html_names::kBodyTag ||
1782       IsCaptionColOrColgroupTag(token->GetName()) ||
1783       token->GetName() == html_names::kHTMLTag ||
1784       IsTableCellContextTag(token->GetName())) {
1785     ParseError(token);
1786     return;
1787   }
1788   ProcessEndTagForInTable(token);
1789 }
1790 
ProcessEndTagForInCell(AtomicHTMLToken * token)1791 void HTMLTreeBuilder::ProcessEndTagForInCell(AtomicHTMLToken* token) {
1792   DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1793   if (IsTableCellContextTag(token->GetName())) {
1794     if (!tree_.OpenElements()->InTableScope(token->GetName())) {
1795       ParseError(token);
1796       return;
1797     }
1798     tree_.GenerateImpliedEndTags();
1799     if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1800       ParseError(token);
1801     tree_.OpenElements()->PopUntilPopped(token->GetName());
1802     tree_.ActiveFormattingElements()->ClearToLastMarker();
1803     SetInsertionMode(kInRowMode);
1804     return;
1805   }
1806   if (token->GetName() == html_names::kBodyTag ||
1807       IsCaptionColOrColgroupTag(token->GetName()) ||
1808       token->GetName() == html_names::kHTMLTag) {
1809     ParseError(token);
1810     return;
1811   }
1812   if (token->GetName() == html_names::kTableTag ||
1813       token->GetName() == html_names::kTrTag ||
1814       IsTableBodyContextTag(token->GetName())) {
1815     if (!tree_.OpenElements()->InTableScope(token->GetName())) {
1816       DCHECK(IsTableBodyContextTag(token->GetName()) ||
1817              tree_.OpenElements()->InTableScope(html_names::kTemplateTag) ||
1818              IsParsingFragment());
1819       ParseError(token);
1820       return;
1821     }
1822     CloseTheCell();
1823     ProcessEndTag(token);
1824     return;
1825   }
1826   ProcessEndTagForInBody(token);
1827 }
1828 
ProcessEndTagForInBody(AtomicHTMLToken * token)1829 void HTMLTreeBuilder::ProcessEndTagForInBody(AtomicHTMLToken* token) {
1830   DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1831   if (token->GetName() == html_names::kBodyTag) {
1832     ProcessBodyEndTagForInBody(token);
1833     return;
1834   }
1835   if (token->GetName() == html_names::kHTMLTag) {
1836     AtomicHTMLToken end_body(HTMLToken::kEndTag,
1837                              html_names::kBodyTag.LocalName());
1838     if (ProcessBodyEndTagForInBody(&end_body))
1839       ProcessEndTag(token);
1840     return;
1841   }
1842   if (token->GetName() == html_names::kAddressTag ||
1843       token->GetName() == html_names::kArticleTag ||
1844       token->GetName() == html_names::kAsideTag ||
1845       token->GetName() == html_names::kBlockquoteTag ||
1846       token->GetName() == html_names::kButtonTag ||
1847       token->GetName() == html_names::kCenterTag ||
1848       token->GetName() == html_names::kDetailsTag ||
1849       token->GetName() == html_names::kDirTag ||
1850       token->GetName() == html_names::kDivTag ||
1851       token->GetName() == html_names::kDlTag ||
1852       token->GetName() == html_names::kFieldsetTag ||
1853       token->GetName() == html_names::kFigcaptionTag ||
1854       token->GetName() == html_names::kFigureTag ||
1855       token->GetName() == html_names::kFooterTag ||
1856       token->GetName() == html_names::kHeaderTag ||
1857       token->GetName() == html_names::kHgroupTag ||
1858       token->GetName() == html_names::kListingTag ||
1859       token->GetName() == html_names::kMainTag ||
1860       token->GetName() == html_names::kMenuTag ||
1861       token->GetName() == html_names::kNavTag ||
1862       token->GetName() == html_names::kOlTag ||
1863       token->GetName() == html_names::kPreTag ||
1864       token->GetName() == html_names::kSectionTag ||
1865       token->GetName() == html_names::kSummaryTag ||
1866       token->GetName() == html_names::kUlTag) {
1867     if (!tree_.OpenElements()->InScope(token->GetName())) {
1868       ParseError(token);
1869       return;
1870     }
1871     tree_.GenerateImpliedEndTags();
1872     if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1873       ParseError(token);
1874     tree_.OpenElements()->PopUntilPopped(token->GetName());
1875     return;
1876   }
1877   if (token->GetName() == html_names::kFormTag &&
1878       !IsParsingTemplateContents()) {
1879     Element* node = tree_.TakeForm();
1880     if (!node || !tree_.OpenElements()->InScope(node)) {
1881       ParseError(token);
1882       return;
1883     }
1884     tree_.GenerateImpliedEndTags();
1885     if (tree_.CurrentElement() != node)
1886       ParseError(token);
1887     tree_.OpenElements()->Remove(node);
1888   }
1889   if (token->GetName() == html_names::kPTag) {
1890     if (!tree_.OpenElements()->InButtonScope(token->GetName())) {
1891       ParseError(token);
1892       ProcessFakeStartTag(html_names::kPTag);
1893       DCHECK(tree_.OpenElements()->InScope(token->GetName()));
1894       ProcessEndTag(token);
1895       return;
1896     }
1897     tree_.GenerateImpliedEndTagsWithExclusion(token->GetName());
1898     if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1899       ParseError(token);
1900     tree_.OpenElements()->PopUntilPopped(token->GetName());
1901     return;
1902   }
1903   if (token->GetName() == html_names::kLiTag) {
1904     if (!tree_.OpenElements()->InListItemScope(token->GetName())) {
1905       ParseError(token);
1906       return;
1907     }
1908     tree_.GenerateImpliedEndTagsWithExclusion(token->GetName());
1909     if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1910       ParseError(token);
1911     tree_.OpenElements()->PopUntilPopped(token->GetName());
1912     return;
1913   }
1914   if (token->GetName() == html_names::kDdTag ||
1915       token->GetName() == html_names::kDtTag) {
1916     if (!tree_.OpenElements()->InScope(token->GetName())) {
1917       ParseError(token);
1918       return;
1919     }
1920     tree_.GenerateImpliedEndTagsWithExclusion(token->GetName());
1921     if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1922       ParseError(token);
1923     tree_.OpenElements()->PopUntilPopped(token->GetName());
1924     return;
1925   }
1926   if (IsNumberedHeaderTag(token->GetName())) {
1927     if (!tree_.OpenElements()->HasNumberedHeaderElementInScope()) {
1928       ParseError(token);
1929       return;
1930     }
1931     tree_.GenerateImpliedEndTags();
1932     if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1933       ParseError(token);
1934     tree_.OpenElements()->PopUntilNumberedHeaderElementPopped();
1935     return;
1936   }
1937   if (IsFormattingTag(token->GetName())) {
1938     CallTheAdoptionAgency(token);
1939     return;
1940   }
1941   if (token->GetName() == html_names::kAppletTag ||
1942       token->GetName() == html_names::kMarqueeTag ||
1943       token->GetName() == html_names::kObjectTag) {
1944     if (!tree_.OpenElements()->InScope(token->GetName())) {
1945       ParseError(token);
1946       return;
1947     }
1948     tree_.GenerateImpliedEndTags();
1949     if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1950       ParseError(token);
1951     tree_.OpenElements()->PopUntilPopped(token->GetName());
1952     tree_.ActiveFormattingElements()->ClearToLastMarker();
1953     return;
1954   }
1955   if (token->GetName() == html_names::kBrTag) {
1956     ParseError(token);
1957     ProcessFakeStartTag(html_names::kBrTag);
1958     return;
1959   }
1960   if (token->GetName() == html_names::kTemplateTag) {
1961     ProcessTemplateEndTag(token);
1962     return;
1963   }
1964   ProcessAnyOtherEndTagForInBody(token);
1965 }
1966 
ProcessCaptionEndTagForInCaption()1967 bool HTMLTreeBuilder::ProcessCaptionEndTagForInCaption() {
1968   if (!tree_.OpenElements()->InTableScope(
1969           html_names::kCaptionTag.LocalName())) {
1970     DCHECK(IsParsingFragment());
1971     // FIXME: parse error
1972     return false;
1973   }
1974   tree_.GenerateImpliedEndTags();
1975   // FIXME: parse error if
1976   // (!tree_.CurrentStackItem()->HasTagName(html_names::kCaptionTag))
1977   tree_.OpenElements()->PopUntilPopped(html_names::kCaptionTag.LocalName());
1978   tree_.ActiveFormattingElements()->ClearToLastMarker();
1979   SetInsertionMode(kInTableMode);
1980   return true;
1981 }
1982 
ProcessTrEndTagForInRow()1983 bool HTMLTreeBuilder::ProcessTrEndTagForInRow() {
1984   if (!tree_.OpenElements()->InTableScope(html_names::kTrTag)) {
1985     DCHECK(IsParsingFragmentOrTemplateContents());
1986     // FIXME: parse error
1987     return false;
1988   }
1989   tree_.OpenElements()->PopUntilTableRowScopeMarker();
1990   DCHECK(tree_.CurrentStackItem()->HasTagName(html_names::kTrTag));
1991   tree_.OpenElements()->Pop();
1992   SetInsertionMode(kInTableBodyMode);
1993   return true;
1994 }
1995 
ProcessTableEndTagForInTable()1996 bool HTMLTreeBuilder::ProcessTableEndTagForInTable() {
1997   if (!tree_.OpenElements()->InTableScope(html_names::kTableTag)) {
1998     DCHECK(IsParsingFragmentOrTemplateContents());
1999     // FIXME: parse error.
2000     return false;
2001   }
2002   tree_.OpenElements()->PopUntilPopped(html_names::kTableTag.LocalName());
2003   ResetInsertionModeAppropriately();
2004   return true;
2005 }
2006 
ProcessEndTagForInTable(AtomicHTMLToken * token)2007 void HTMLTreeBuilder::ProcessEndTagForInTable(AtomicHTMLToken* token) {
2008   DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
2009   if (token->GetName() == html_names::kTableTag) {
2010     ProcessTableEndTagForInTable();
2011     return;
2012   }
2013   if (token->GetName() == html_names::kBodyTag ||
2014       IsCaptionColOrColgroupTag(token->GetName()) ||
2015       token->GetName() == html_names::kHTMLTag ||
2016       IsTableBodyContextTag(token->GetName()) ||
2017       IsTableCellContextTag(token->GetName()) ||
2018       token->GetName() == html_names::kTrTag) {
2019     ParseError(token);
2020     return;
2021   }
2022   ParseError(token);
2023   // Is this redirection necessary here?
2024   HTMLConstructionSite::RedirectToFosterParentGuard redirecter(tree_);
2025   ProcessEndTagForInBody(token);
2026 }
2027 
ProcessEndTag(AtomicHTMLToken * token)2028 void HTMLTreeBuilder::ProcessEndTag(AtomicHTMLToken* token) {
2029   DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
2030   switch (GetInsertionMode()) {
2031     case kInitialMode:
2032       DCHECK_EQ(GetInsertionMode(), kInitialMode);
2033       DefaultForInitial();
2034       FALLTHROUGH;
2035     case kBeforeHTMLMode:
2036       DCHECK_EQ(GetInsertionMode(), kBeforeHTMLMode);
2037       if (token->GetName() != html_names::kHeadTag &&
2038           token->GetName() != html_names::kBodyTag &&
2039           token->GetName() != html_names::kHTMLTag &&
2040           token->GetName() != html_names::kBrTag) {
2041         ParseError(token);
2042         return;
2043       }
2044       DefaultForBeforeHTML();
2045       FALLTHROUGH;
2046     case kBeforeHeadMode:
2047       DCHECK_EQ(GetInsertionMode(), kBeforeHeadMode);
2048       if (token->GetName() != html_names::kHeadTag &&
2049           token->GetName() != html_names::kBodyTag &&
2050           token->GetName() != html_names::kHTMLTag &&
2051           token->GetName() != html_names::kBrTag) {
2052         ParseError(token);
2053         return;
2054       }
2055       DefaultForBeforeHead();
2056       FALLTHROUGH;
2057     case kInHeadMode:
2058       DCHECK_EQ(GetInsertionMode(), kInHeadMode);
2059       // FIXME: This case should be broken out into processEndTagForInHead,
2060       // because other end tag cases now refer to it ("process the token for
2061       // using the rules of the "in head" insertion mode"). but because the
2062       // logic falls through to AfterHeadMode, that gets a little messy.
2063       if (token->GetName() == html_names::kTemplateTag) {
2064         ProcessTemplateEndTag(token);
2065         return;
2066       }
2067       if (token->GetName() == html_names::kHeadTag) {
2068         tree_.OpenElements()->PopHTMLHeadElement();
2069         SetInsertionMode(kAfterHeadMode);
2070         return;
2071       }
2072       if (token->GetName() != html_names::kBodyTag &&
2073           token->GetName() != html_names::kHTMLTag &&
2074           token->GetName() != html_names::kBrTag) {
2075         ParseError(token);
2076         return;
2077       }
2078       DefaultForInHead();
2079       FALLTHROUGH;
2080     case kAfterHeadMode:
2081       DCHECK_EQ(GetInsertionMode(), kAfterHeadMode);
2082       if (token->GetName() != html_names::kBodyTag &&
2083           token->GetName() != html_names::kHTMLTag &&
2084           token->GetName() != html_names::kBrTag) {
2085         ParseError(token);
2086         return;
2087       }
2088       DefaultForAfterHead();
2089       FALLTHROUGH;
2090     case kInBodyMode:
2091       DCHECK_EQ(GetInsertionMode(), kInBodyMode);
2092       ProcessEndTagForInBody(token);
2093       break;
2094     case kInTableMode:
2095       DCHECK_EQ(GetInsertionMode(), kInTableMode);
2096       ProcessEndTagForInTable(token);
2097       break;
2098     case kInCaptionMode:
2099       DCHECK_EQ(GetInsertionMode(), kInCaptionMode);
2100       if (token->GetName() == html_names::kCaptionTag) {
2101         ProcessCaptionEndTagForInCaption();
2102         return;
2103       }
2104       if (token->GetName() == html_names::kTableTag) {
2105         ParseError(token);
2106         if (!ProcessCaptionEndTagForInCaption()) {
2107           DCHECK(IsParsingFragment());
2108           return;
2109         }
2110         ProcessEndTag(token);
2111         return;
2112       }
2113       if (token->GetName() == html_names::kBodyTag ||
2114           token->GetName() == html_names::kColTag ||
2115           token->GetName() == html_names::kColgroupTag ||
2116           token->GetName() == html_names::kHTMLTag ||
2117           IsTableBodyContextTag(token->GetName()) ||
2118           IsTableCellContextTag(token->GetName()) ||
2119           token->GetName() == html_names::kTrTag) {
2120         ParseError(token);
2121         return;
2122       }
2123       ProcessEndTagForInBody(token);
2124       break;
2125     case kInColumnGroupMode:
2126       DCHECK_EQ(GetInsertionMode(), kInColumnGroupMode);
2127       if (token->GetName() == html_names::kColgroupTag) {
2128         ProcessColgroupEndTagForInColumnGroup();
2129         return;
2130       }
2131       if (token->GetName() == html_names::kColTag) {
2132         ParseError(token);
2133         return;
2134       }
2135       if (token->GetName() == html_names::kTemplateTag) {
2136         ProcessTemplateEndTag(token);
2137         return;
2138       }
2139       if (!ProcessColgroupEndTagForInColumnGroup()) {
2140         DCHECK(IsParsingFragmentOrTemplateContents());
2141         return;
2142       }
2143       ProcessEndTag(token);
2144       break;
2145     case kInRowMode:
2146       DCHECK_EQ(GetInsertionMode(), kInRowMode);
2147       ProcessEndTagForInRow(token);
2148       break;
2149     case kInCellMode:
2150       DCHECK_EQ(GetInsertionMode(), kInCellMode);
2151       ProcessEndTagForInCell(token);
2152       break;
2153     case kInTableBodyMode:
2154       DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
2155       ProcessEndTagForInTableBody(token);
2156       break;
2157     case kAfterBodyMode:
2158       DCHECK_EQ(GetInsertionMode(), kAfterBodyMode);
2159       if (token->GetName() == html_names::kHTMLTag) {
2160         if (IsParsingFragment()) {
2161           ParseError(token);
2162           return;
2163         }
2164         SetInsertionMode(kAfterAfterBodyMode);
2165         return;
2166       }
2167       FALLTHROUGH;
2168     case kAfterAfterBodyMode:
2169       DCHECK(GetInsertionMode() == kAfterBodyMode ||
2170              GetInsertionMode() == kAfterAfterBodyMode);
2171       ParseError(token);
2172       SetInsertionMode(kInBodyMode);
2173       ProcessEndTag(token);
2174       break;
2175     case kInHeadNoscriptMode:
2176       DCHECK_EQ(GetInsertionMode(), kInHeadNoscriptMode);
2177       if (token->GetName() == html_names::kNoscriptTag) {
2178         DCHECK(tree_.CurrentStackItem()->HasTagName(html_names::kNoscriptTag));
2179         tree_.OpenElements()->Pop();
2180         DCHECK(tree_.CurrentStackItem()->HasTagName(html_names::kHeadTag));
2181         SetInsertionMode(kInHeadMode);
2182         return;
2183       }
2184       if (token->GetName() != html_names::kBrTag) {
2185         ParseError(token);
2186         return;
2187       }
2188       DefaultForInHeadNoscript();
2189       ProcessToken(token);
2190       break;
2191     case kTextMode:
2192       if (token->GetName() == html_names::kScriptTag &&
2193           tree_.CurrentStackItem()->HasTagName(html_names::kScriptTag)) {
2194         // Pause ourselves so that parsing stops until the script can be
2195         // processed by the caller.
2196         if (ScriptingContentIsAllowed(tree_.GetParserContentPolicy()))
2197           script_to_process_ = tree_.CurrentElement();
2198         tree_.OpenElements()->Pop();
2199         SetInsertionMode(original_insertion_mode_);
2200 
2201         if (parser_->Tokenizer()) {
2202           // We must set the tokenizer's state to DataState explicitly if the
2203           // tokenizer didn't have a chance to.
2204           parser_->Tokenizer()->SetState(HTMLTokenizer::kDataState);
2205         }
2206         return;
2207       }
2208       tree_.OpenElements()->Pop();
2209       SetInsertionMode(original_insertion_mode_);
2210       break;
2211     case kInFramesetMode:
2212       DCHECK_EQ(GetInsertionMode(), kInFramesetMode);
2213       if (token->GetName() == html_names::kFramesetTag) {
2214         bool ignore_frameset_for_fragment_parsing = tree_.CurrentIsRootNode();
2215         ignore_frameset_for_fragment_parsing =
2216             ignore_frameset_for_fragment_parsing ||
2217             tree_.OpenElements()->HasTemplateInHTMLScope();
2218         if (ignore_frameset_for_fragment_parsing) {
2219           DCHECK(IsParsingFragmentOrTemplateContents());
2220           ParseError(token);
2221           return;
2222         }
2223         tree_.OpenElements()->Pop();
2224         if (!IsParsingFragment() &&
2225             !tree_.CurrentStackItem()->HasTagName(html_names::kFramesetTag))
2226           SetInsertionMode(kAfterFramesetMode);
2227         return;
2228       }
2229       break;
2230     case kAfterFramesetMode:
2231       DCHECK_EQ(GetInsertionMode(), kAfterFramesetMode);
2232       if (token->GetName() == html_names::kHTMLTag) {
2233         SetInsertionMode(kAfterAfterFramesetMode);
2234         return;
2235       }
2236       FALLTHROUGH;
2237     case kAfterAfterFramesetMode:
2238       DCHECK(GetInsertionMode() == kAfterFramesetMode ||
2239              GetInsertionMode() == kAfterAfterFramesetMode);
2240       ParseError(token);
2241       break;
2242     case kInSelectInTableMode:
2243       DCHECK(GetInsertionMode() == kInSelectInTableMode);
2244       if (token->GetName() == html_names::kCaptionTag ||
2245           token->GetName() == html_names::kTableTag ||
2246           IsTableBodyContextTag(token->GetName()) ||
2247           token->GetName() == html_names::kTrTag ||
2248           IsTableCellContextTag(token->GetName())) {
2249         ParseError(token);
2250         if (tree_.OpenElements()->InTableScope(token->GetName())) {
2251           AtomicHTMLToken end_select(HTMLToken::kEndTag,
2252                                      html_names::kSelectTag.LocalName());
2253           ProcessEndTag(&end_select);
2254           ProcessEndTag(token);
2255         }
2256         return;
2257       }
2258       FALLTHROUGH;
2259     case kInSelectMode:
2260       DCHECK(GetInsertionMode() == kInSelectMode ||
2261              GetInsertionMode() == kInSelectInTableMode);
2262       if (token->GetName() == html_names::kOptgroupTag) {
2263         if (tree_.CurrentStackItem()->HasTagName(html_names::kOptionTag) &&
2264             tree_.OneBelowTop() &&
2265             tree_.OneBelowTop()->HasTagName(html_names::kOptgroupTag))
2266           ProcessFakeEndTag(html_names::kOptionTag);
2267         if (tree_.CurrentStackItem()->HasTagName(html_names::kOptgroupTag)) {
2268           tree_.OpenElements()->Pop();
2269           return;
2270         }
2271         ParseError(token);
2272         return;
2273       }
2274       if (token->GetName() == html_names::kOptionTag) {
2275         if (tree_.CurrentStackItem()->HasTagName(html_names::kOptionTag)) {
2276           tree_.OpenElements()->Pop();
2277           return;
2278         }
2279         ParseError(token);
2280         return;
2281       }
2282       if (token->GetName() == html_names::kSelectTag) {
2283         if (!tree_.OpenElements()->InSelectScope(token->GetName())) {
2284           DCHECK(IsParsingFragment());
2285           ParseError(token);
2286           return;
2287         }
2288         tree_.OpenElements()->PopUntilPopped(
2289             html_names::kSelectTag.LocalName());
2290         ResetInsertionModeAppropriately();
2291         return;
2292       }
2293       if (token->GetName() == html_names::kTemplateTag) {
2294         ProcessTemplateEndTag(token);
2295         return;
2296       }
2297       break;
2298     case kInTableTextMode:
2299       DefaultForInTableText();
2300       ProcessEndTag(token);
2301       break;
2302     case kTemplateContentsMode:
2303       if (token->GetName() == html_names::kTemplateTag) {
2304         ProcessTemplateEndTag(token);
2305         return;
2306       }
2307       break;
2308   }
2309 }
2310 
ProcessComment(AtomicHTMLToken * token)2311 void HTMLTreeBuilder::ProcessComment(AtomicHTMLToken* token) {
2312   DCHECK_EQ(token->GetType(), HTMLToken::kComment);
2313   if (insertion_mode_ == kInitialMode || insertion_mode_ == kBeforeHTMLMode ||
2314       insertion_mode_ == kAfterAfterBodyMode ||
2315       insertion_mode_ == kAfterAfterFramesetMode) {
2316     tree_.InsertCommentOnDocument(token);
2317     return;
2318   }
2319   if (insertion_mode_ == kAfterBodyMode) {
2320     tree_.InsertCommentOnHTMLHtmlElement(token);
2321     return;
2322   }
2323   if (insertion_mode_ == kInTableTextMode) {
2324     DefaultForInTableText();
2325     ProcessComment(token);
2326     return;
2327   }
2328   tree_.InsertComment(token);
2329 }
2330 
ProcessCharacter(AtomicHTMLToken * token)2331 void HTMLTreeBuilder::ProcessCharacter(AtomicHTMLToken* token) {
2332   DCHECK_EQ(token->GetType(), HTMLToken::kCharacter);
2333   CharacterTokenBuffer buffer(token);
2334   ProcessCharacterBuffer(buffer);
2335 }
2336 
ProcessCharacterBuffer(CharacterTokenBuffer & buffer)2337 void HTMLTreeBuilder::ProcessCharacterBuffer(CharacterTokenBuffer& buffer) {
2338 ReprocessBuffer:
2339   // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2340   // Note that this logic is different than the generic \r\n collapsing
2341   // handled in the input stream preprocessor. This logic is here as an
2342   // "authoring convenience" so folks can write:
2343   //
2344   // <pre>
2345   // lorem ipsum
2346   // lorem ipsum
2347   // </pre>
2348   //
2349   // without getting an extra newline at the start of their <pre> element.
2350   if (should_skip_leading_newline_) {
2351     should_skip_leading_newline_ = false;
2352     buffer.SkipAtMostOneLeadingNewline();
2353     if (buffer.IsEmpty())
2354       return;
2355   }
2356 
2357   switch (GetInsertionMode()) {
2358     case kInitialMode: {
2359       DCHECK_EQ(GetInsertionMode(), kInitialMode);
2360       buffer.SkipLeadingWhitespace();
2361       if (buffer.IsEmpty())
2362         return;
2363       DefaultForInitial();
2364       FALLTHROUGH;
2365     }
2366     case kBeforeHTMLMode: {
2367       DCHECK_EQ(GetInsertionMode(), kBeforeHTMLMode);
2368       buffer.SkipLeadingWhitespace();
2369       if (buffer.IsEmpty())
2370         return;
2371       DefaultForBeforeHTML();
2372       if (parser_->IsStopped()) {
2373         buffer.SkipRemaining();
2374         return;
2375       }
2376       FALLTHROUGH;
2377     }
2378     case kBeforeHeadMode: {
2379       DCHECK_EQ(GetInsertionMode(), kBeforeHeadMode);
2380       buffer.SkipLeadingWhitespace();
2381       if (buffer.IsEmpty())
2382         return;
2383       DefaultForBeforeHead();
2384       FALLTHROUGH;
2385     }
2386     case kInHeadMode: {
2387       DCHECK_EQ(GetInsertionMode(), kInHeadMode);
2388       StringView leading_whitespace = buffer.TakeLeadingWhitespace();
2389       if (!leading_whitespace.IsEmpty())
2390         tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2391       if (buffer.IsEmpty())
2392         return;
2393       DefaultForInHead();
2394       FALLTHROUGH;
2395     }
2396     case kAfterHeadMode: {
2397       DCHECK_EQ(GetInsertionMode(), kAfterHeadMode);
2398       StringView leading_whitespace = buffer.TakeLeadingWhitespace();
2399       if (!leading_whitespace.IsEmpty())
2400         tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2401       if (buffer.IsEmpty())
2402         return;
2403       DefaultForAfterHead();
2404       FALLTHROUGH;
2405     }
2406     case kInBodyMode:
2407     case kInCaptionMode:
2408     case kTemplateContentsMode:
2409     case kInCellMode: {
2410       DCHECK(GetInsertionMode() == kInBodyMode ||
2411              GetInsertionMode() == kInCaptionMode ||
2412              GetInsertionMode() == kInCellMode ||
2413              GetInsertionMode() == kTemplateContentsMode);
2414       ProcessCharacterBufferForInBody(buffer);
2415       break;
2416     }
2417     case kInTableMode:
2418     case kInTableBodyMode:
2419     case kInRowMode: {
2420       DCHECK(GetInsertionMode() == kInTableMode ||
2421              GetInsertionMode() == kInTableBodyMode ||
2422              GetInsertionMode() == kInRowMode);
2423       DCHECK(pending_table_characters_.IsEmpty());
2424       if (tree_.CurrentStackItem()->IsElementNode() &&
2425           (tree_.CurrentStackItem()->HasTagName(html_names::kTableTag) ||
2426            tree_.CurrentStackItem()->HasTagName(html_names::kTbodyTag) ||
2427            tree_.CurrentStackItem()->HasTagName(html_names::kTfootTag) ||
2428            tree_.CurrentStackItem()->HasTagName(html_names::kTheadTag) ||
2429            tree_.CurrentStackItem()->HasTagName(html_names::kTrTag))) {
2430         original_insertion_mode_ = insertion_mode_;
2431         SetInsertionMode(kInTableTextMode);
2432         // Note that we fall through to the InTableTextMode case below.
2433       } else {
2434         HTMLConstructionSite::RedirectToFosterParentGuard redirecter(tree_);
2435         ProcessCharacterBufferForInBody(buffer);
2436         break;
2437       }
2438       FALLTHROUGH;
2439     }
2440     case kInTableTextMode: {
2441       buffer.GiveRemainingTo(pending_table_characters_);
2442       break;
2443     }
2444     case kInColumnGroupMode: {
2445       DCHECK_EQ(GetInsertionMode(), kInColumnGroupMode);
2446       StringView leading_whitespace = buffer.TakeLeadingWhitespace();
2447       if (!leading_whitespace.IsEmpty())
2448         tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2449       if (buffer.IsEmpty())
2450         return;
2451       if (!ProcessColgroupEndTagForInColumnGroup()) {
2452         DCHECK(IsParsingFragmentOrTemplateContents());
2453         // The spec tells us to drop these characters on the floor.
2454         buffer.SkipLeadingNonWhitespace();
2455         if (buffer.IsEmpty())
2456           return;
2457       }
2458       goto ReprocessBuffer;
2459     }
2460     case kAfterBodyMode:
2461     case kAfterAfterBodyMode: {
2462       DCHECK(GetInsertionMode() == kAfterBodyMode ||
2463              GetInsertionMode() == kAfterAfterBodyMode);
2464       // FIXME: parse error
2465       SetInsertionMode(kInBodyMode);
2466       goto ReprocessBuffer;
2467     }
2468     case kTextMode: {
2469       DCHECK_EQ(GetInsertionMode(), kTextMode);
2470       tree_.InsertTextNode(buffer.TakeRemaining());
2471       break;
2472     }
2473     case kInHeadNoscriptMode: {
2474       DCHECK_EQ(GetInsertionMode(), kInHeadNoscriptMode);
2475       StringView leading_whitespace = buffer.TakeLeadingWhitespace();
2476       if (!leading_whitespace.IsEmpty())
2477         tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2478       if (buffer.IsEmpty())
2479         return;
2480       DefaultForInHeadNoscript();
2481       goto ReprocessBuffer;
2482     }
2483     case kInFramesetMode:
2484     case kAfterFramesetMode: {
2485       DCHECK(GetInsertionMode() == kInFramesetMode ||
2486              GetInsertionMode() == kAfterFramesetMode ||
2487              GetInsertionMode() == kAfterAfterFramesetMode);
2488       String leading_whitespace = buffer.TakeRemainingWhitespace();
2489       if (!leading_whitespace.IsEmpty())
2490         tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2491       // FIXME: We should generate a parse error if we skipped over any
2492       // non-whitespace characters.
2493       break;
2494     }
2495     case kInSelectInTableMode:
2496     case kInSelectMode: {
2497       DCHECK(GetInsertionMode() == kInSelectMode ||
2498              GetInsertionMode() == kInSelectInTableMode);
2499       tree_.InsertTextNode(buffer.TakeRemaining());
2500       break;
2501     }
2502     case kAfterAfterFramesetMode: {
2503       String leading_whitespace = buffer.TakeRemainingWhitespace();
2504       if (!leading_whitespace.IsEmpty()) {
2505         tree_.ReconstructTheActiveFormattingElements();
2506         tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2507       }
2508       // FIXME: We should generate a parse error if we skipped over any
2509       // non-whitespace characters.
2510       break;
2511     }
2512   }
2513 }
2514 
ProcessCharacterBufferForInBody(CharacterTokenBuffer & buffer)2515 void HTMLTreeBuilder::ProcessCharacterBufferForInBody(
2516     CharacterTokenBuffer& buffer) {
2517   tree_.ReconstructTheActiveFormattingElements();
2518   StringView characters = buffer.TakeRemaining();
2519   tree_.InsertTextNode(characters);
2520   if (frameset_ok_ && !IsAllWhitespaceOrReplacementCharacters(characters))
2521     frameset_ok_ = false;
2522 }
2523 
ProcessEndOfFile(AtomicHTMLToken * token)2524 void HTMLTreeBuilder::ProcessEndOfFile(AtomicHTMLToken* token) {
2525   DCHECK_EQ(token->GetType(), HTMLToken::kEndOfFile);
2526   switch (GetInsertionMode()) {
2527     case kInitialMode:
2528       DCHECK_EQ(GetInsertionMode(), kInitialMode);
2529       DefaultForInitial();
2530       FALLTHROUGH;
2531     case kBeforeHTMLMode:
2532       DCHECK_EQ(GetInsertionMode(), kBeforeHTMLMode);
2533       DefaultForBeforeHTML();
2534       FALLTHROUGH;
2535     case kBeforeHeadMode:
2536       DCHECK_EQ(GetInsertionMode(), kBeforeHeadMode);
2537       DefaultForBeforeHead();
2538       FALLTHROUGH;
2539     case kInHeadMode:
2540       DCHECK_EQ(GetInsertionMode(), kInHeadMode);
2541       DefaultForInHead();
2542       FALLTHROUGH;
2543     case kAfterHeadMode:
2544       DCHECK_EQ(GetInsertionMode(), kAfterHeadMode);
2545       DefaultForAfterHead();
2546       FALLTHROUGH;
2547     case kInBodyMode:
2548     case kInCellMode:
2549     case kInCaptionMode:
2550     case kInRowMode:
2551       DCHECK(GetInsertionMode() == kInBodyMode ||
2552              GetInsertionMode() == kInCellMode ||
2553              GetInsertionMode() == kInCaptionMode ||
2554              GetInsertionMode() == kInRowMode ||
2555              GetInsertionMode() == kTemplateContentsMode);
2556       // Emit parse error based on what elements are still open.
2557       DVLOG(1) << "Not implemented.";
2558       if (!template_insertion_modes_.IsEmpty() &&
2559           ProcessEndOfFileForInTemplateContents(token))
2560         return;
2561       break;
2562     case kAfterBodyMode:
2563     case kAfterAfterBodyMode:
2564       DCHECK(GetInsertionMode() == kAfterBodyMode ||
2565              GetInsertionMode() == kAfterAfterBodyMode);
2566       break;
2567     case kInHeadNoscriptMode:
2568       DCHECK_EQ(GetInsertionMode(), kInHeadNoscriptMode);
2569       DefaultForInHeadNoscript();
2570       ProcessEndOfFile(token);
2571       return;
2572     case kAfterFramesetMode:
2573     case kAfterAfterFramesetMode:
2574       DCHECK(GetInsertionMode() == kAfterFramesetMode ||
2575              GetInsertionMode() == kAfterAfterFramesetMode);
2576       break;
2577     case kInColumnGroupMode:
2578       if (tree_.CurrentIsRootNode()) {
2579         DCHECK(IsParsingFragment());
2580         return;  // FIXME: Should we break here instead of returning?
2581       }
2582       DCHECK(tree_.CurrentNode()->HasTagName(html_names::kColgroupTag) ||
2583              IsA<HTMLTemplateElement>(tree_.CurrentNode()));
2584       ProcessColgroupEndTagForInColumnGroup();
2585       FALLTHROUGH;
2586     case kInFramesetMode:
2587     case kInTableMode:
2588     case kInTableBodyMode:
2589     case kInSelectInTableMode:
2590     case kInSelectMode:
2591       DCHECK(GetInsertionMode() == kInSelectMode ||
2592              GetInsertionMode() == kInSelectInTableMode ||
2593              GetInsertionMode() == kInTableMode ||
2594              GetInsertionMode() == kInFramesetMode ||
2595              GetInsertionMode() == kInTableBodyMode ||
2596              GetInsertionMode() == kInColumnGroupMode);
2597       if (tree_.CurrentNode() != tree_.OpenElements()->RootNode())
2598         ParseError(token);
2599       if (!template_insertion_modes_.IsEmpty() &&
2600           ProcessEndOfFileForInTemplateContents(token))
2601         return;
2602       break;
2603     case kInTableTextMode:
2604       DefaultForInTableText();
2605       ProcessEndOfFile(token);
2606       return;
2607     case kTextMode: {
2608       ParseError(token);
2609       if (tree_.CurrentStackItem()->HasTagName(html_names::kScriptTag)) {
2610         // Mark the script element as "already started".
2611         DVLOG(1) << "Not implemented.";
2612       }
2613       Element* el = tree_.OpenElements()->Top();
2614       if (IsA<HTMLTextAreaElement>(el))
2615         To<HTMLFormControlElement>(el)->SetBlocksFormSubmission(true);
2616       tree_.OpenElements()->Pop();
2617       DCHECK_NE(original_insertion_mode_, kTextMode);
2618       SetInsertionMode(original_insertion_mode_);
2619       ProcessEndOfFile(token);
2620       return;
2621     }
2622     case kTemplateContentsMode:
2623       if (ProcessEndOfFileForInTemplateContents(token))
2624         return;
2625       break;
2626   }
2627   tree_.ProcessEndOfFile();
2628 }
2629 
DefaultForInitial()2630 void HTMLTreeBuilder::DefaultForInitial() {
2631   DVLOG(1) << "Not implemented.";
2632   tree_.SetDefaultCompatibilityMode();
2633   // FIXME: parse error
2634   SetInsertionMode(kBeforeHTMLMode);
2635 }
2636 
DefaultForBeforeHTML()2637 void HTMLTreeBuilder::DefaultForBeforeHTML() {
2638   AtomicHTMLToken start_html(HTMLToken::kStartTag,
2639                              html_names::kHTMLTag.LocalName());
2640   tree_.InsertHTMLHtmlStartTagBeforeHTML(&start_html);
2641   SetInsertionMode(kBeforeHeadMode);
2642 }
2643 
DefaultForBeforeHead()2644 void HTMLTreeBuilder::DefaultForBeforeHead() {
2645   AtomicHTMLToken start_head(HTMLToken::kStartTag,
2646                              html_names::kHeadTag.LocalName());
2647   ProcessStartTag(&start_head);
2648 }
2649 
DefaultForInHead()2650 void HTMLTreeBuilder::DefaultForInHead() {
2651   AtomicHTMLToken end_head(HTMLToken::kEndTag,
2652                            html_names::kHeadTag.LocalName());
2653   ProcessEndTag(&end_head);
2654 }
2655 
DefaultForInHeadNoscript()2656 void HTMLTreeBuilder::DefaultForInHeadNoscript() {
2657   AtomicHTMLToken end_noscript(HTMLToken::kEndTag,
2658                                html_names::kNoscriptTag.LocalName());
2659   ProcessEndTag(&end_noscript);
2660 }
2661 
DefaultForAfterHead()2662 void HTMLTreeBuilder::DefaultForAfterHead() {
2663   AtomicHTMLToken start_body(HTMLToken::kStartTag,
2664                              html_names::kBodyTag.LocalName());
2665   ProcessStartTag(&start_body);
2666   frameset_ok_ = true;
2667 }
2668 
DefaultForInTableText()2669 void HTMLTreeBuilder::DefaultForInTableText() {
2670   String characters = pending_table_characters_.ToString();
2671   pending_table_characters_.Clear();
2672   if (!IsAllWhitespace(characters)) {
2673     // FIXME: parse error
2674     HTMLConstructionSite::RedirectToFosterParentGuard redirecter(tree_);
2675     tree_.ReconstructTheActiveFormattingElements();
2676     tree_.InsertTextNode(characters, kNotAllWhitespace);
2677     frameset_ok_ = false;
2678     SetInsertionMode(original_insertion_mode_);
2679     return;
2680   }
2681   tree_.InsertTextNode(characters);
2682   SetInsertionMode(original_insertion_mode_);
2683 }
2684 
ProcessStartTagForInHead(AtomicHTMLToken * token)2685 bool HTMLTreeBuilder::ProcessStartTagForInHead(AtomicHTMLToken* token) {
2686   DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
2687   if (token->GetName() == html_names::kHTMLTag) {
2688     ProcessHtmlStartTagForInBody(token);
2689     return true;
2690   }
2691   if (token->GetName() == html_names::kBaseTag ||
2692       token->GetName() == html_names::kBasefontTag ||
2693       token->GetName() == html_names::kBgsoundTag ||
2694       token->GetName() == html_names::kCommandTag ||
2695       token->GetName() == html_names::kLinkTag ||
2696       token->GetName() == html_names::kMetaTag) {
2697     tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
2698     // Note: The custom processing for the <meta> tag is done in
2699     // HTMLMetaElement::process().
2700     return true;
2701   }
2702   if (token->GetName() == html_names::kTitleTag) {
2703     ProcessGenericRCDATAStartTag(token);
2704     return true;
2705   }
2706   if (token->GetName() == html_names::kNoscriptTag) {
2707     if (options_.scripting_flag) {
2708       ProcessGenericRawTextStartTag(token);
2709       return true;
2710     }
2711     tree_.InsertHTMLElement(token);
2712     SetInsertionMode(kInHeadNoscriptMode);
2713     return true;
2714   }
2715   if (token->GetName() == html_names::kNoframesTag ||
2716       token->GetName() == html_names::kStyleTag) {
2717     ProcessGenericRawTextStartTag(token);
2718     return true;
2719   }
2720   if (token->GetName() == html_names::kScriptTag) {
2721     ProcessScriptStartTag(token);
2722     return true;
2723   }
2724   if (token->GetName() == html_names::kTemplateTag) {
2725     ProcessTemplateStartTag(token);
2726     return true;
2727   }
2728   if (token->GetName() == html_names::kHeadTag) {
2729     ParseError(token);
2730     return true;
2731   }
2732   return false;
2733 }
2734 
ProcessGenericRCDATAStartTag(AtomicHTMLToken * token)2735 void HTMLTreeBuilder::ProcessGenericRCDATAStartTag(AtomicHTMLToken* token) {
2736   DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
2737   tree_.InsertHTMLElement(token);
2738   if (parser_->Tokenizer())
2739     parser_->Tokenizer()->SetState(HTMLTokenizer::kRCDATAState);
2740   original_insertion_mode_ = insertion_mode_;
2741   SetInsertionMode(kTextMode);
2742 }
2743 
ProcessGenericRawTextStartTag(AtomicHTMLToken * token)2744 void HTMLTreeBuilder::ProcessGenericRawTextStartTag(AtomicHTMLToken* token) {
2745   DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
2746   tree_.InsertHTMLElement(token);
2747   if (parser_->Tokenizer())
2748     parser_->Tokenizer()->SetState(HTMLTokenizer::kRAWTEXTState);
2749   original_insertion_mode_ = insertion_mode_;
2750   SetInsertionMode(kTextMode);
2751 }
2752 
ProcessScriptStartTag(AtomicHTMLToken * token)2753 void HTMLTreeBuilder::ProcessScriptStartTag(AtomicHTMLToken* token) {
2754   DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
2755   tree_.InsertScriptElement(token);
2756   if (parser_->Tokenizer())
2757     parser_->Tokenizer()->SetState(HTMLTokenizer::kScriptDataState);
2758   original_insertion_mode_ = insertion_mode_;
2759 
2760   TextPosition position = parser_->GetTextPosition();
2761 
2762   script_to_process_start_position_ = position;
2763 
2764   SetInsertionMode(kTextMode);
2765 }
2766 
2767 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
ShouldProcessTokenInForeignContent(AtomicHTMLToken * token)2768 bool HTMLTreeBuilder::ShouldProcessTokenInForeignContent(
2769     AtomicHTMLToken* token) {
2770   if (tree_.IsEmpty())
2771     return false;
2772   HTMLStackItem* adjusted_current_node = AdjustedCurrentStackItem();
2773 
2774   if (adjusted_current_node->IsInHTMLNamespace())
2775     return false;
2776   if (HTMLElementStack::IsMathMLTextIntegrationPoint(adjusted_current_node)) {
2777     if (token->GetType() == HTMLToken::kStartTag &&
2778         token->GetName() != mathml_names::kMglyphTag &&
2779         token->GetName() != mathml_names::kMalignmarkTag)
2780       return false;
2781     if (token->GetType() == HTMLToken::kCharacter)
2782       return false;
2783   }
2784   if (adjusted_current_node->HasTagName(mathml_names::kAnnotationXmlTag) &&
2785       token->GetType() == HTMLToken::kStartTag &&
2786       token->GetName() == svg_names::kSVGTag)
2787     return false;
2788   if (HTMLElementStack::IsHTMLIntegrationPoint(adjusted_current_node)) {
2789     if (token->GetType() == HTMLToken::kStartTag)
2790       return false;
2791     if (token->GetType() == HTMLToken::kCharacter)
2792       return false;
2793   }
2794   if (token->GetType() == HTMLToken::kEndOfFile)
2795     return false;
2796   return true;
2797 }
2798 
ProcessTokenInForeignContent(AtomicHTMLToken * token)2799 void HTMLTreeBuilder::ProcessTokenInForeignContent(AtomicHTMLToken* token) {
2800   if (token->GetType() == HTMLToken::kCharacter) {
2801     const String& characters = token->Characters();
2802     tree_.InsertTextNode(characters);
2803     if (frameset_ok_ && !IsAllWhitespaceOrReplacementCharacters(characters))
2804       frameset_ok_ = false;
2805     return;
2806   }
2807 
2808   tree_.Flush(kFlushAlways);
2809   HTMLStackItem* adjusted_current_node = AdjustedCurrentStackItem();
2810 
2811   switch (token->GetType()) {
2812     case HTMLToken::kUninitialized:
2813       NOTREACHED();
2814       break;
2815     case HTMLToken::DOCTYPE:
2816       ParseError(token);
2817       break;
2818     case HTMLToken::kStartTag: {
2819       if (token->GetName() == html_names::kBTag ||
2820           token->GetName() == html_names::kBigTag ||
2821           token->GetName() == html_names::kBlockquoteTag ||
2822           token->GetName() == html_names::kBodyTag ||
2823           token->GetName() == html_names::kBrTag ||
2824           token->GetName() == html_names::kCenterTag ||
2825           token->GetName() == html_names::kCodeTag ||
2826           token->GetName() == html_names::kDdTag ||
2827           token->GetName() == html_names::kDivTag ||
2828           token->GetName() == html_names::kDlTag ||
2829           token->GetName() == html_names::kDtTag ||
2830           token->GetName() == html_names::kEmTag ||
2831           token->GetName() == html_names::kEmbedTag ||
2832           IsNumberedHeaderTag(token->GetName()) ||
2833           token->GetName() == html_names::kHeadTag ||
2834           token->GetName() == html_names::kHrTag ||
2835           token->GetName() == html_names::kITag ||
2836           token->GetName() == html_names::kImgTag ||
2837           token->GetName() == html_names::kLiTag ||
2838           token->GetName() == html_names::kListingTag ||
2839           token->GetName() == html_names::kMenuTag ||
2840           token->GetName() == html_names::kMetaTag ||
2841           token->GetName() == html_names::kNobrTag ||
2842           token->GetName() == html_names::kOlTag ||
2843           token->GetName() == html_names::kPTag ||
2844           token->GetName() == html_names::kPreTag ||
2845           token->GetName() == html_names::kRubyTag ||
2846           token->GetName() == html_names::kSTag ||
2847           token->GetName() == html_names::kSmallTag ||
2848           token->GetName() == html_names::kSpanTag ||
2849           token->GetName() == html_names::kStrongTag ||
2850           token->GetName() == html_names::kStrikeTag ||
2851           token->GetName() == html_names::kSubTag ||
2852           token->GetName() == html_names::kSupTag ||
2853           token->GetName() == html_names::kTableTag ||
2854           token->GetName() == html_names::kTtTag ||
2855           token->GetName() == html_names::kUTag ||
2856           token->GetName() == html_names::kUlTag ||
2857           token->GetName() == html_names::kVarTag ||
2858           (token->GetName() == html_names::kFontTag &&
2859            (token->GetAttributeItem(html_names::kColorAttr) ||
2860             token->GetAttributeItem(html_names::kFaceAttr) ||
2861             token->GetAttributeItem(html_names::kSizeAttr)))) {
2862         ParseError(token);
2863         tree_.OpenElements()->PopUntilForeignContentScopeMarker();
2864         ProcessStartTag(token);
2865         return;
2866       }
2867       if (token->GetName() == html_names::kScriptTag)
2868         script_to_process_start_position_ = parser_->GetTextPosition();
2869       const AtomicString& current_namespace =
2870           adjusted_current_node->NamespaceURI();
2871       if (current_namespace == mathml_names::kNamespaceURI)
2872         AdjustMathMLAttributes(token);
2873       if (current_namespace == svg_names::kNamespaceURI) {
2874         AdjustSVGTagNameCase(token);
2875         AdjustSVGAttributes(token);
2876       }
2877       AdjustForeignAttributes(token);
2878       tree_.InsertForeignElement(token, current_namespace);
2879       break;
2880     }
2881     case HTMLToken::kEndTag: {
2882       if (adjusted_current_node->NamespaceURI() == svg_names::kNamespaceURI)
2883         AdjustSVGTagNameCase(token);
2884 
2885       if (token->GetName() == svg_names::kScriptTag &&
2886           tree_.CurrentStackItem()->HasTagName(svg_names::kScriptTag)) {
2887         if (ScriptingContentIsAllowed(tree_.GetParserContentPolicy()))
2888           script_to_process_ = tree_.CurrentElement();
2889         tree_.OpenElements()->Pop();
2890         return;
2891       }
2892       if (token->GetName() == html_names::kBrTag ||
2893           token->GetName() == html_names::kPTag) {
2894         ParseError(token);
2895         tree_.OpenElements()->PopUntilForeignContentScopeMarker();
2896         ProcessEndTag(token);
2897         return;
2898       }
2899       if (!tree_.CurrentStackItem()->IsInHTMLNamespace()) {
2900         // FIXME: This code just wants an Element* iterator, instead of an
2901         // ElementRecord*
2902         HTMLElementStack::ElementRecord* node_record =
2903             tree_.OpenElements()->TopRecord();
2904         if (!node_record->StackItem()->HasLocalName(token->GetName()))
2905           ParseError(token);
2906         while (1) {
2907           if (node_record->StackItem()->HasLocalName(token->GetName())) {
2908             tree_.OpenElements()->PopUntilPopped(node_record->GetElement());
2909             return;
2910           }
2911           node_record = node_record->Next();
2912 
2913           if (node_record->StackItem()->IsInHTMLNamespace())
2914             break;
2915         }
2916       }
2917       // Otherwise, process the token according to the rules given in the
2918       // section corresponding to the current insertion mode in HTML content.
2919       ProcessEndTag(token);
2920       break;
2921     }
2922     case HTMLToken::kComment:
2923       tree_.InsertComment(token);
2924       break;
2925     case HTMLToken::kCharacter:
2926     case HTMLToken::kEndOfFile:
2927       NOTREACHED();
2928       break;
2929   }
2930 }
2931 
Finished()2932 void HTMLTreeBuilder::Finished() {
2933   if (IsParsingFragment())
2934     return;
2935 
2936   DCHECK(template_insertion_modes_.IsEmpty());
2937 #if DCHECK_IS_ON()
2938   DCHECK(is_attached_);
2939 #endif
2940   // Warning, this may detach the parser. Do not do anything else after this.
2941   tree_.FinishedParsing();
2942 }
2943 
ParseError(AtomicHTMLToken *)2944 void HTMLTreeBuilder::ParseError(AtomicHTMLToken*) {}
2945 
2946 #ifndef NDEBUG
ToString(HTMLTreeBuilder::InsertionMode mode)2947 const char* HTMLTreeBuilder::ToString(HTMLTreeBuilder::InsertionMode mode) {
2948   switch (mode) {
2949 #define DEFINE_STRINGIFY(mode) \
2950   case mode:                   \
2951     return #mode;
2952     DEFINE_STRINGIFY(kInitialMode)
2953     DEFINE_STRINGIFY(kBeforeHTMLMode)
2954     DEFINE_STRINGIFY(kBeforeHeadMode)
2955     DEFINE_STRINGIFY(kInHeadMode)
2956     DEFINE_STRINGIFY(kInHeadNoscriptMode)
2957     DEFINE_STRINGIFY(kAfterHeadMode)
2958     DEFINE_STRINGIFY(kTemplateContentsMode)
2959     DEFINE_STRINGIFY(kInBodyMode)
2960     DEFINE_STRINGIFY(kTextMode)
2961     DEFINE_STRINGIFY(kInTableMode)
2962     DEFINE_STRINGIFY(kInTableTextMode)
2963     DEFINE_STRINGIFY(kInCaptionMode)
2964     DEFINE_STRINGIFY(kInColumnGroupMode)
2965     DEFINE_STRINGIFY(kInTableBodyMode)
2966     DEFINE_STRINGIFY(kInRowMode)
2967     DEFINE_STRINGIFY(kInCellMode)
2968     DEFINE_STRINGIFY(kInSelectMode)
2969     DEFINE_STRINGIFY(kInSelectInTableMode)
2970     DEFINE_STRINGIFY(kAfterBodyMode)
2971     DEFINE_STRINGIFY(kInFramesetMode)
2972     DEFINE_STRINGIFY(kAfterFramesetMode)
2973     DEFINE_STRINGIFY(kAfterAfterBodyMode)
2974     DEFINE_STRINGIFY(kAfterAfterFramesetMode)
2975 #undef DEFINE_STRINGIFY
2976   }
2977   return "<unknown>";
2978 }
2979 #endif
2980 
2981 }  // namespace blink
2982