1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011, 2014 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "third_party/blink/renderer/core/html/parser/html_tree_builder.h"
28
29 #include <memory>
30
31 #include "base/macros.h"
32 #include "third_party/blink/renderer/core/dom/document.h"
33 #include "third_party/blink/renderer/core/dom/document_fragment.h"
34 #include "third_party/blink/renderer/core/dom/element_traversal.h"
35 #include "third_party/blink/renderer/core/dom/shadow_root.h"
36 #include "third_party/blink/renderer/core/execution_context/execution_context.h"
37 #include "third_party/blink/renderer/core/frame/web_feature.h"
38 #include "third_party/blink/renderer/core/html/forms/html_form_control_element.h"
39 #include "third_party/blink/renderer/core/html/forms/html_form_element.h"
40 #include "third_party/blink/renderer/core/html/html_template_element.h"
41 #include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
42 #include "third_party/blink/renderer/core/html/parser/html_document_parser.h"
43 #include "third_party/blink/renderer/core/html/parser/html_parser_idioms.h"
44 #include "third_party/blink/renderer/core/html/parser/html_stack_item.h"
45 #include "third_party/blink/renderer/core/html/parser/html_token.h"
46 #include "third_party/blink/renderer/core/html/parser/html_tokenizer.h"
47 #include "third_party/blink/renderer/core/html_names.h"
48 #include "third_party/blink/renderer/core/inspector/console_message.h"
49 #include "third_party/blink/renderer/core/mathml_names.h"
50 #include "third_party/blink/renderer/core/svg_names.h"
51 #include "third_party/blink/renderer/core/xlink_names.h"
52 #include "third_party/blink/renderer/core/xml_names.h"
53 #include "third_party/blink/renderer/core/xmlns_names.h"
54 #include "third_party/blink/renderer/platform/bindings/exception_state.h"
55 #include "third_party/blink/renderer/platform/bindings/runtime_call_stats.h"
56 #include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
57 #include "third_party/blink/renderer/platform/heap/heap.h"
58 #include "third_party/blink/renderer/platform/instrumentation/use_counter.h"
59 #include "third_party/blink/renderer/platform/text/platform_locale.h"
60 #include "third_party/blink/renderer/platform/wtf/text/character_names.h"
61 #include "third_party/blink/renderer/platform/wtf/text/character_visitor.h"
62 #include "third_party/blink/renderer/platform/wtf/text/string_buffer.h"
63
64 namespace blink {
65
66 namespace {
67
IsHTMLSpaceOrReplacementCharacter(UChar character)68 inline bool IsHTMLSpaceOrReplacementCharacter(UChar character) {
69 return IsHTMLSpace<UChar>(character) || character == kReplacementCharacter;
70 }
71 }
72
UninitializedPositionValue1()73 static TextPosition UninitializedPositionValue1() {
74 return TextPosition(OrdinalNumber::FromOneBasedInt(-1),
75 OrdinalNumber::First());
76 }
77
IsAllWhitespace(const StringView & string_view)78 static inline bool IsAllWhitespace(const StringView& string_view) {
79 return string_view.IsAllSpecialCharacters<IsHTMLSpace<UChar>>();
80 }
81
IsAllWhitespaceOrReplacementCharacters(const StringView & string_view)82 static inline bool IsAllWhitespaceOrReplacementCharacters(
83 const StringView& string_view) {
84 return string_view
85 .IsAllSpecialCharacters<IsHTMLSpaceOrReplacementCharacter>();
86 }
87
IsNumberedHeaderTag(const AtomicString & tag_name)88 static bool IsNumberedHeaderTag(const AtomicString& tag_name) {
89 return tag_name == html_names::kH1Tag || tag_name == html_names::kH2Tag ||
90 tag_name == html_names::kH3Tag || tag_name == html_names::kH4Tag ||
91 tag_name == html_names::kH5Tag || tag_name == html_names::kH6Tag;
92 }
93
IsCaptionColOrColgroupTag(const AtomicString & tag_name)94 static bool IsCaptionColOrColgroupTag(const AtomicString& tag_name) {
95 return tag_name == html_names::kCaptionTag ||
96 tag_name == html_names::kColTag ||
97 tag_name == html_names::kColgroupTag;
98 }
99
IsTableCellContextTag(const AtomicString & tag_name)100 static bool IsTableCellContextTag(const AtomicString& tag_name) {
101 return tag_name == html_names::kThTag || tag_name == html_names::kTdTag;
102 }
103
IsTableBodyContextTag(const AtomicString & tag_name)104 static bool IsTableBodyContextTag(const AtomicString& tag_name) {
105 return tag_name == html_names::kTbodyTag ||
106 tag_name == html_names::kTfootTag || tag_name == html_names::kTheadTag;
107 }
108
IsNonAnchorNonNobrFormattingTag(const AtomicString & tag_name)109 static bool IsNonAnchorNonNobrFormattingTag(const AtomicString& tag_name) {
110 return tag_name == html_names::kBTag || tag_name == html_names::kBigTag ||
111 tag_name == html_names::kCodeTag || tag_name == html_names::kEmTag ||
112 tag_name == html_names::kFontTag || tag_name == html_names::kITag ||
113 tag_name == html_names::kSTag || tag_name == html_names::kSmallTag ||
114 tag_name == html_names::kStrikeTag ||
115 tag_name == html_names::kStrongTag || tag_name == html_names::kTtTag ||
116 tag_name == html_names::kUTag;
117 }
118
IsNonAnchorFormattingTag(const AtomicString & tag_name)119 static bool IsNonAnchorFormattingTag(const AtomicString& tag_name) {
120 return tag_name == html_names::kNobrTag ||
121 IsNonAnchorNonNobrFormattingTag(tag_name);
122 }
123
124 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting
IsFormattingTag(const AtomicString & tag_name)125 static bool IsFormattingTag(const AtomicString& tag_name) {
126 return tag_name == html_names::kATag || IsNonAnchorFormattingTag(tag_name);
127 }
128
129 class HTMLTreeBuilder::CharacterTokenBuffer {
130 public:
CharacterTokenBuffer(AtomicHTMLToken * token)131 explicit CharacterTokenBuffer(AtomicHTMLToken* token)
132 : characters_(token->Characters().Impl()),
133 current_(0),
134 end_(token->Characters().length()) {
135 DCHECK(!IsEmpty());
136 }
137
CharacterTokenBuffer(const String & characters)138 explicit CharacterTokenBuffer(const String& characters)
139 : characters_(characters.Impl()), current_(0), end_(characters.length()) {
140 DCHECK(!IsEmpty());
141 }
142
~CharacterTokenBuffer()143 ~CharacterTokenBuffer() { DCHECK(IsEmpty()); }
144
IsEmpty() const145 bool IsEmpty() const { return current_ == end_; }
146
SkipAtMostOneLeadingNewline()147 void SkipAtMostOneLeadingNewline() {
148 DCHECK(!IsEmpty());
149 if ((*characters_)[current_] == '\n')
150 ++current_;
151 }
152
SkipLeadingWhitespace()153 void SkipLeadingWhitespace() { SkipLeading<IsHTMLSpace<UChar>>(); }
154
TakeLeadingWhitespace()155 StringView TakeLeadingWhitespace() {
156 return TakeLeading<IsHTMLSpace<UChar>>();
157 }
158
SkipLeadingNonWhitespace()159 void SkipLeadingNonWhitespace() { SkipLeading<IsNotHTMLSpace<UChar>>(); }
160
SkipRemaining()161 void SkipRemaining() { current_ = end_; }
162
TakeRemaining()163 StringView TakeRemaining() {
164 DCHECK(!IsEmpty());
165 unsigned start = current_;
166 current_ = end_;
167 return StringView(characters_.get(), start, end_ - start);
168 }
169
GiveRemainingTo(StringBuilder & recipient)170 void GiveRemainingTo(StringBuilder& recipient) {
171 WTF::VisitCharacters(*characters_, [&](const auto* chars, unsigned length) {
172 recipient.Append(chars + current_, end_ - current_);
173 });
174 current_ = end_;
175 }
176
TakeRemainingWhitespace()177 String TakeRemainingWhitespace() {
178 DCHECK(!IsEmpty());
179 const unsigned start = current_;
180 current_ = end_; // One way or another, we're taking everything!
181
182 unsigned length = 0;
183 for (unsigned i = start; i < end_; ++i) {
184 if (IsHTMLSpace<UChar>((*characters_)[i]))
185 ++length;
186 }
187 // Returning the null string when there aren't any whitespace
188 // characters is slightly cleaner semantically because we don't want
189 // to insert a text node (as opposed to inserting an empty text node).
190 if (!length)
191 return String();
192 if (length == start - end_) // It's all whitespace.
193 return String(characters_->Substring(start, start - end_));
194
195 // All HTML spaces are ASCII.
196 StringBuffer<LChar> result(length);
197 unsigned j = 0;
198 for (unsigned i = start; i < end_; ++i) {
199 UChar c = (*characters_)[i];
200 if (IsHTMLSpace(c))
201 result[j++] = static_cast<LChar>(c);
202 }
203 DCHECK_EQ(j, length);
204 return String::Adopt(result);
205 }
206
207 private:
208 template <bool characterPredicate(UChar)>
SkipLeading()209 void SkipLeading() {
210 DCHECK(!IsEmpty());
211 while (characterPredicate((*characters_)[current_])) {
212 if (++current_ == end_)
213 return;
214 }
215 }
216
217 template <bool characterPredicate(UChar)>
TakeLeading()218 StringView TakeLeading() {
219 DCHECK(!IsEmpty());
220 const unsigned start = current_;
221 SkipLeading<characterPredicate>();
222 return StringView(characters_.get(), start, current_ - start);
223 }
224
225 scoped_refptr<StringImpl> characters_;
226 unsigned current_;
227 unsigned end_;
228
229 DISALLOW_COPY_AND_ASSIGN(CharacterTokenBuffer);
230 };
231
HTMLTreeBuilder(HTMLDocumentParser * parser,Document & document,ParserContentPolicy parser_content_policy,const HTMLParserOptions & options,bool allow_shadow_root)232 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser,
233 Document& document,
234 ParserContentPolicy parser_content_policy,
235 const HTMLParserOptions& options,
236 bool allow_shadow_root)
237 : frameset_ok_(true),
238 tree_(parser->ReentryPermit(), document, parser_content_policy),
239 insertion_mode_(kInitialMode),
240 original_insertion_mode_(kInitialMode),
241 should_skip_leading_newline_(false),
242 allow_shadow_root_(allow_shadow_root),
243 parser_(parser),
244 script_to_process_start_position_(UninitializedPositionValue1()),
245 options_(options) {}
246
HTMLTreeBuilder(HTMLDocumentParser * parser,DocumentFragment * fragment,Element * context_element,ParserContentPolicy parser_content_policy,const HTMLParserOptions & options,bool allow_shadow_root)247 HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser,
248 DocumentFragment* fragment,
249 Element* context_element,
250 ParserContentPolicy parser_content_policy,
251 const HTMLParserOptions& options,
252 bool allow_shadow_root)
253 : HTMLTreeBuilder(parser,
254 fragment->GetDocument(),
255 parser_content_policy,
256 options,
257 allow_shadow_root) {
258 DCHECK(IsMainThread());
259 DCHECK(context_element);
260 tree_.InitFragmentParsing(fragment, context_element);
261 fragment_context_.Init(fragment, context_element);
262
263 // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm:
264 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case
265 // For efficiency, we skip step 4.2 ("Let root be a new html element with no
266 // attributes") and instead use the DocumentFragment as a root node.
267 tree_.OpenElements()->PushRootNode(MakeGarbageCollected<HTMLStackItem>(
268 fragment, HTMLStackItem::kItemForDocumentFragmentNode));
269
270 if (IsA<HTMLTemplateElement>(*context_element))
271 template_insertion_modes_.push_back(kTemplateContentsMode);
272
273 ResetInsertionModeAppropriately();
274 }
275
276 HTMLTreeBuilder::~HTMLTreeBuilder() = default;
277
Init(DocumentFragment * fragment,Element * context_element)278 void HTMLTreeBuilder::FragmentParsingContext::Init(DocumentFragment* fragment,
279 Element* context_element) {
280 DCHECK(fragment);
281 DCHECK(!fragment->HasChildren());
282 fragment_ = fragment;
283 context_element_stack_item_ = MakeGarbageCollected<HTMLStackItem>(
284 context_element, HTMLStackItem::kItemForContextElement);
285 }
286
Trace(Visitor * visitor) const287 void HTMLTreeBuilder::FragmentParsingContext::Trace(Visitor* visitor) const {
288 visitor->Trace(fragment_);
289 visitor->Trace(context_element_stack_item_);
290 }
291
Trace(Visitor * visitor) const292 void HTMLTreeBuilder::Trace(Visitor* visitor) const {
293 visitor->Trace(fragment_context_);
294 visitor->Trace(tree_);
295 visitor->Trace(parser_);
296 visitor->Trace(script_to_process_);
297 }
298
Detach()299 void HTMLTreeBuilder::Detach() {
300 #if DCHECK_IS_ON()
301 // This call makes little sense in fragment mode, but for consistency
302 // DocumentParser expects Detach() to always be called before it's destroyed.
303 is_attached_ = false;
304 #endif
305 // HTMLConstructionSite might be on the callstack when Detach() is called
306 // otherwise we'd just call tree_.Clear() here instead.
307 tree_.Detach();
308 }
309
TakeScriptToProcess(TextPosition & script_start_position)310 Element* HTMLTreeBuilder::TakeScriptToProcess(
311 TextPosition& script_start_position) {
312 DCHECK(script_to_process_);
313 DCHECK(!tree_.HasPendingTasks());
314 // Unpause ourselves, callers may pause us again when processing the script.
315 // The HTML5 spec is written as though scripts are executed inside the tree
316 // builder. We pause the parser to exit the tree builder, and then resume
317 // before running scripts.
318 script_start_position = script_to_process_start_position_;
319 script_to_process_start_position_ = UninitializedPositionValue1();
320 return script_to_process_.Release();
321 }
322
ConstructTree(AtomicHTMLToken * token)323 void HTMLTreeBuilder::ConstructTree(AtomicHTMLToken* token) {
324 RUNTIME_CALL_TIMER_SCOPE(V8PerIsolateData::MainThreadIsolate(),
325 RuntimeCallStats::CounterId::kConstructTree);
326 if (ShouldProcessTokenInForeignContent(token))
327 ProcessTokenInForeignContent(token);
328 else
329 ProcessToken(token);
330
331 if (parser_->Tokenizer()) {
332 bool in_foreign_content = false;
333 if (!tree_.IsEmpty()) {
334 HTMLStackItem* adjusted_current_node = AdjustedCurrentStackItem();
335 in_foreign_content =
336 !adjusted_current_node->IsInHTMLNamespace() &&
337 !HTMLElementStack::IsHTMLIntegrationPoint(adjusted_current_node) &&
338 !HTMLElementStack::IsMathMLTextIntegrationPoint(
339 adjusted_current_node);
340 }
341
342 parser_->Tokenizer()->SetForceNullCharacterReplacement(
343 insertion_mode_ == kTextMode || in_foreign_content);
344 parser_->Tokenizer()->SetShouldAllowCDATA(in_foreign_content);
345 }
346
347 tree_.ExecuteQueuedTasks();
348 // We might be detached now.
349 }
350
ProcessToken(AtomicHTMLToken * token)351 void HTMLTreeBuilder::ProcessToken(AtomicHTMLToken* token) {
352 if (token->GetType() == HTMLToken::kCharacter) {
353 ProcessCharacter(token);
354 return;
355 }
356
357 // Any non-character token needs to cause us to flush any pending text
358 // immediately. NOTE: flush() can cause any queued tasks to execute, possibly
359 // re-entering the parser.
360 tree_.Flush(kFlushAlways);
361 should_skip_leading_newline_ = false;
362
363 switch (token->GetType()) {
364 case HTMLToken::kUninitialized:
365 case HTMLToken::kCharacter:
366 NOTREACHED();
367 break;
368 case HTMLToken::DOCTYPE:
369 ProcessDoctypeToken(token);
370 break;
371 case HTMLToken::kStartTag:
372 ProcessStartTag(token);
373 break;
374 case HTMLToken::kEndTag:
375 ProcessEndTag(token);
376 break;
377 case HTMLToken::kComment:
378 ProcessComment(token);
379 break;
380 case HTMLToken::kEndOfFile:
381 ProcessEndOfFile(token);
382 break;
383 }
384 }
385
ProcessDoctypeToken(AtomicHTMLToken * token)386 void HTMLTreeBuilder::ProcessDoctypeToken(AtomicHTMLToken* token) {
387 DCHECK_EQ(token->GetType(), HTMLToken::DOCTYPE);
388 if (insertion_mode_ == kInitialMode) {
389 tree_.InsertDoctype(token);
390 SetInsertionMode(kBeforeHTMLMode);
391 return;
392 }
393 if (insertion_mode_ == kInTableTextMode) {
394 DefaultForInTableText();
395 ProcessDoctypeToken(token);
396 return;
397 }
398 ParseError(token);
399 }
400
ProcessFakeStartTag(const QualifiedName & tag_name,const Vector<Attribute> & attributes)401 void HTMLTreeBuilder::ProcessFakeStartTag(const QualifiedName& tag_name,
402 const Vector<Attribute>& attributes) {
403 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML
404 // tags.
405 AtomicHTMLToken fake_token(HTMLToken::kStartTag, tag_name.LocalName(),
406 attributes);
407 ProcessStartTag(&fake_token);
408 }
409
ProcessFakeEndTag(const AtomicString & tag_name)410 void HTMLTreeBuilder::ProcessFakeEndTag(const AtomicString& tag_name) {
411 AtomicHTMLToken fake_token(HTMLToken::kEndTag, tag_name);
412 ProcessEndTag(&fake_token);
413 }
414
ProcessFakeEndTag(const QualifiedName & tag_name)415 void HTMLTreeBuilder::ProcessFakeEndTag(const QualifiedName& tag_name) {
416 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML
417 // tags.
418 ProcessFakeEndTag(tag_name.LocalName());
419 }
420
ProcessFakePEndTagIfPInButtonScope()421 void HTMLTreeBuilder::ProcessFakePEndTagIfPInButtonScope() {
422 if (!tree_.OpenElements()->InButtonScope(html_names::kPTag.LocalName()))
423 return;
424 AtomicHTMLToken end_p(HTMLToken::kEndTag, html_names::kPTag.LocalName());
425 ProcessEndTag(&end_p);
426 }
427
428 namespace {
429
IsLi(const HTMLStackItem * item)430 bool IsLi(const HTMLStackItem* item) {
431 return item->HasTagName(html_names::kLiTag);
432 }
433
IsDdOrDt(const HTMLStackItem * item)434 bool IsDdOrDt(const HTMLStackItem* item) {
435 return item->HasTagName(html_names::kDdTag) ||
436 item->HasTagName(html_names::kDtTag);
437 }
438
439 } // namespace
440
441 template <bool shouldClose(const HTMLStackItem*)>
ProcessCloseWhenNestedTag(AtomicHTMLToken * token)442 void HTMLTreeBuilder::ProcessCloseWhenNestedTag(AtomicHTMLToken* token) {
443 frameset_ok_ = false;
444 HTMLElementStack::ElementRecord* node_record =
445 tree_.OpenElements()->TopRecord();
446 while (1) {
447 HTMLStackItem* item = node_record->StackItem();
448 if (shouldClose(item)) {
449 DCHECK(item->IsElementNode());
450 ProcessFakeEndTag(item->LocalName());
451 break;
452 }
453 if (item->IsSpecialNode() && !item->HasTagName(html_names::kAddressTag) &&
454 !item->HasTagName(html_names::kDivTag) &&
455 !item->HasTagName(html_names::kPTag))
456 break;
457 node_record = node_record->Next();
458 }
459 ProcessFakePEndTagIfPInButtonScope();
460 tree_.InsertHTMLElement(token);
461 }
462
463 typedef HashMap<AtomicString, QualifiedName> PrefixedNameToQualifiedNameMap;
464
465 template <typename TableQualifiedName>
MapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap * map,const TableQualifiedName * const * names,size_t length)466 static void MapLoweredLocalNameToName(PrefixedNameToQualifiedNameMap* map,
467 const TableQualifiedName* const* names,
468 size_t length) {
469 for (size_t i = 0; i < length; ++i) {
470 const QualifiedName& name = *names[i];
471 const AtomicString& local_name = name.LocalName();
472 AtomicString lowered_local_name = local_name.LowerASCII();
473 if (lowered_local_name != local_name)
474 map->insert(lowered_local_name, name);
475 }
476 }
477
478 // "Any other start tag" bullet in
479 // https://html.spec.whatwg.org/C/#parsing-main-inforeign
AdjustSVGTagNameCase(AtomicHTMLToken * token)480 static void AdjustSVGTagNameCase(AtomicHTMLToken* token) {
481 static PrefixedNameToQualifiedNameMap* case_map = nullptr;
482 if (!case_map) {
483 case_map = new PrefixedNameToQualifiedNameMap;
484 std::unique_ptr<const SVGQualifiedName* []> svg_tags = svg_names::GetTags();
485 MapLoweredLocalNameToName(case_map, svg_tags.get(), svg_names::kTagsCount);
486 }
487
488 const QualifiedName& cased_name = case_map->at(token->GetName());
489 if (cased_name.LocalName().IsNull())
490 return;
491 token->SetName(cased_name.LocalName());
492 }
493
494 template <std::unique_ptr<const QualifiedName* []> getAttrs(), unsigned length>
AdjustAttributes(AtomicHTMLToken * token)495 static void AdjustAttributes(AtomicHTMLToken* token) {
496 static PrefixedNameToQualifiedNameMap* case_map = nullptr;
497 if (!case_map) {
498 case_map = new PrefixedNameToQualifiedNameMap;
499 std::unique_ptr<const QualifiedName* []> attrs = getAttrs();
500 MapLoweredLocalNameToName(case_map, attrs.get(), length);
501 }
502
503 for (auto& token_attribute : token->Attributes()) {
504 const QualifiedName& cased_name = case_map->at(token_attribute.LocalName());
505 if (!cased_name.LocalName().IsNull())
506 token_attribute.ParserSetName(cased_name);
507 }
508 }
509
510 // https://html.spec.whatwg.org/C/#adjust-svg-attributes
AdjustSVGAttributes(AtomicHTMLToken * token)511 static void AdjustSVGAttributes(AtomicHTMLToken* token) {
512 AdjustAttributes<svg_names::GetAttrs, svg_names::kAttrsCount>(token);
513 }
514
515 // https://html.spec.whatwg.org/C/#adjust-mathml-attributes
AdjustMathMLAttributes(AtomicHTMLToken * token)516 static void AdjustMathMLAttributes(AtomicHTMLToken* token) {
517 AdjustAttributes<mathml_names::GetAttrs, mathml_names::kAttrsCount>(token);
518 }
519
AddNamesWithPrefix(PrefixedNameToQualifiedNameMap * map,const AtomicString & prefix,const QualifiedName * const * names,size_t length)520 static void AddNamesWithPrefix(PrefixedNameToQualifiedNameMap* map,
521 const AtomicString& prefix,
522 const QualifiedName* const* names,
523 size_t length) {
524 for (size_t i = 0; i < length; ++i) {
525 const QualifiedName* name = names[i];
526 const AtomicString& local_name = name->LocalName();
527 AtomicString prefix_colon_local_name = prefix + ':' + local_name;
528 QualifiedName name_with_prefix(prefix, local_name, name->NamespaceURI());
529 map->insert(prefix_colon_local_name, name_with_prefix);
530 }
531 }
532
AdjustForeignAttributes(AtomicHTMLToken * token)533 static void AdjustForeignAttributes(AtomicHTMLToken* token) {
534 static PrefixedNameToQualifiedNameMap* map = nullptr;
535 if (!map) {
536 map = new PrefixedNameToQualifiedNameMap;
537
538 std::unique_ptr<const QualifiedName* []> attrs = xlink_names::GetAttrs();
539 AddNamesWithPrefix(map, g_xlink_atom, attrs.get(),
540 xlink_names::kAttrsCount);
541
542 std::unique_ptr<const QualifiedName* []> xml_attrs = xml_names::GetAttrs();
543 AddNamesWithPrefix(map, g_xml_atom, xml_attrs.get(),
544 xml_names::kAttrsCount);
545
546 map->insert(WTF::g_xmlns_atom, xmlns_names::kXmlnsAttr);
547 map->insert("xmlns:xlink", QualifiedName(g_xmlns_atom, g_xlink_atom,
548 xmlns_names::kNamespaceURI));
549 }
550
551 for (unsigned i = 0; i < token->Attributes().size(); ++i) {
552 Attribute& token_attribute = token->Attributes().at(i);
553 const QualifiedName& name = map->at(token_attribute.LocalName());
554 if (!name.LocalName().IsNull())
555 token_attribute.ParserSetName(name);
556 }
557 }
558
ProcessStartTagForInBody(AtomicHTMLToken * token)559 void HTMLTreeBuilder::ProcessStartTagForInBody(AtomicHTMLToken* token) {
560 DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
561 if (token->GetName() == html_names::kHTMLTag) {
562 ProcessHtmlStartTagForInBody(token);
563 return;
564 }
565 if (token->GetName() == html_names::kBaseTag ||
566 token->GetName() == html_names::kBasefontTag ||
567 token->GetName() == html_names::kBgsoundTag ||
568 token->GetName() == html_names::kCommandTag ||
569 token->GetName() == html_names::kLinkTag ||
570 token->GetName() == html_names::kMetaTag ||
571 token->GetName() == html_names::kNoframesTag ||
572 token->GetName() == html_names::kScriptTag ||
573 token->GetName() == html_names::kStyleTag ||
574 token->GetName() == html_names::kTitleTag ||
575 token->GetName() == html_names::kTemplateTag) {
576 bool did_process = ProcessStartTagForInHead(token);
577 DCHECK(did_process);
578 return;
579 }
580 if (token->GetName() == html_names::kBodyTag) {
581 ParseError(token);
582 if (!tree_.OpenElements()->SecondElementIsHTMLBodyElement() ||
583 tree_.OpenElements()->HasOnlyOneElement() ||
584 tree_.OpenElements()->HasTemplateInHTMLScope()) {
585 DCHECK(IsParsingFragmentOrTemplateContents());
586 return;
587 }
588 frameset_ok_ = false;
589 tree_.InsertHTMLBodyStartTagInBody(token);
590 return;
591 }
592 if (token->GetName() == html_names::kFramesetTag) {
593 ParseError(token);
594 if (!tree_.OpenElements()->SecondElementIsHTMLBodyElement() ||
595 tree_.OpenElements()->HasOnlyOneElement()) {
596 DCHECK(IsParsingFragmentOrTemplateContents());
597 return;
598 }
599 if (!frameset_ok_)
600 return;
601 tree_.OpenElements()->BodyElement()->remove(ASSERT_NO_EXCEPTION);
602 tree_.OpenElements()->PopUntil(tree_.OpenElements()->BodyElement());
603 tree_.OpenElements()->PopHTMLBodyElement();
604
605 // Note: in the fragment case the root is a DocumentFragment instead of
606 // a proper html element which is a quirk in Blink's implementation.
607 DCHECK(!IsParsingTemplateContents());
608 DCHECK(!IsParsingFragment() ||
609 To<DocumentFragment>(tree_.OpenElements()->TopNode()));
610 DCHECK(IsParsingFragment() ||
611 tree_.OpenElements()->Top() == tree_.OpenElements()->HtmlElement());
612 tree_.InsertHTMLElement(token);
613 SetInsertionMode(kInFramesetMode);
614 return;
615 }
616 if (token->GetName() == html_names::kAddressTag ||
617 token->GetName() == html_names::kArticleTag ||
618 token->GetName() == html_names::kAsideTag ||
619 token->GetName() == html_names::kBlockquoteTag ||
620 token->GetName() == html_names::kCenterTag ||
621 token->GetName() == html_names::kDetailsTag ||
622 token->GetName() == html_names::kDirTag ||
623 token->GetName() == html_names::kDivTag ||
624 token->GetName() == html_names::kDlTag ||
625 token->GetName() == html_names::kFieldsetTag ||
626 token->GetName() == html_names::kFigcaptionTag ||
627 token->GetName() == html_names::kFigureTag ||
628 token->GetName() == html_names::kFooterTag ||
629 token->GetName() == html_names::kHeaderTag ||
630 token->GetName() == html_names::kHgroupTag ||
631 token->GetName() == html_names::kMainTag ||
632 token->GetName() == html_names::kMenuTag ||
633 token->GetName() == html_names::kNavTag ||
634 token->GetName() == html_names::kOlTag ||
635 token->GetName() == html_names::kPTag ||
636 token->GetName() == html_names::kSectionTag ||
637 token->GetName() == html_names::kSummaryTag ||
638 token->GetName() == html_names::kUlTag) {
639 ProcessFakePEndTagIfPInButtonScope();
640 tree_.InsertHTMLElement(token);
641 return;
642 }
643 if (IsNumberedHeaderTag(token->GetName())) {
644 ProcessFakePEndTagIfPInButtonScope();
645 if (tree_.CurrentStackItem()->IsNumberedHeaderElement()) {
646 ParseError(token);
647 tree_.OpenElements()->Pop();
648 }
649 tree_.InsertHTMLElement(token);
650 return;
651 }
652 if (token->GetName() == html_names::kPreTag ||
653 token->GetName() == html_names::kListingTag) {
654 ProcessFakePEndTagIfPInButtonScope();
655 tree_.InsertHTMLElement(token);
656 should_skip_leading_newline_ = true;
657 frameset_ok_ = false;
658 return;
659 }
660 if (token->GetName() == html_names::kFormTag) {
661 if (tree_.IsFormElementPointerNonNull() && !IsParsingTemplateContents()) {
662 ParseError(token);
663 UseCounter::Count(tree_.CurrentNode()->GetDocument(),
664 WebFeature::kHTMLParseErrorNestedForm);
665 return;
666 }
667 ProcessFakePEndTagIfPInButtonScope();
668 tree_.InsertHTMLFormElement(token);
669 return;
670 }
671 if (token->GetName() == html_names::kLiTag) {
672 ProcessCloseWhenNestedTag<IsLi>(token);
673 return;
674 }
675 if (token->GetName() == html_names::kDdTag ||
676 token->GetName() == html_names::kDtTag) {
677 ProcessCloseWhenNestedTag<IsDdOrDt>(token);
678 return;
679 }
680 if (token->GetName() == html_names::kPlaintextTag) {
681 ProcessFakePEndTagIfPInButtonScope();
682 tree_.InsertHTMLElement(token);
683 if (parser_->Tokenizer())
684 parser_->Tokenizer()->SetState(HTMLTokenizer::kPLAINTEXTState);
685 return;
686 }
687 if (token->GetName() == html_names::kButtonTag) {
688 if (tree_.OpenElements()->InScope(html_names::kButtonTag)) {
689 ParseError(token);
690 ProcessFakeEndTag(html_names::kButtonTag);
691 ProcessStartTag(token); // FIXME: Could we just fall through here?
692 return;
693 }
694 tree_.ReconstructTheActiveFormattingElements();
695 tree_.InsertHTMLElement(token);
696 frameset_ok_ = false;
697 return;
698 }
699 if (token->GetName() == html_names::kATag) {
700 Element* active_a_tag =
701 tree_.ActiveFormattingElements()->ClosestElementInScopeWithName(
702 html_names::kATag.LocalName());
703 if (active_a_tag) {
704 ParseError(token);
705 ProcessFakeEndTag(html_names::kATag);
706 tree_.ActiveFormattingElements()->Remove(active_a_tag);
707 if (tree_.OpenElements()->Contains(active_a_tag))
708 tree_.OpenElements()->Remove(active_a_tag);
709 }
710 tree_.ReconstructTheActiveFormattingElements();
711 tree_.InsertFormattingElement(token);
712 return;
713 }
714 if (IsNonAnchorNonNobrFormattingTag(token->GetName())) {
715 tree_.ReconstructTheActiveFormattingElements();
716 tree_.InsertFormattingElement(token);
717 return;
718 }
719 if (token->GetName() == html_names::kNobrTag) {
720 tree_.ReconstructTheActiveFormattingElements();
721 if (tree_.OpenElements()->InScope(html_names::kNobrTag)) {
722 ParseError(token);
723 ProcessFakeEndTag(html_names::kNobrTag);
724 tree_.ReconstructTheActiveFormattingElements();
725 }
726 tree_.InsertFormattingElement(token);
727 return;
728 }
729 if (token->GetName() == html_names::kAppletTag ||
730 token->GetName() == html_names::kEmbedTag ||
731 token->GetName() == html_names::kObjectTag) {
732 if (!PluginContentIsAllowed(tree_.GetParserContentPolicy()))
733 return;
734 }
735 if (token->GetName() == html_names::kAppletTag ||
736 token->GetName() == html_names::kMarqueeTag ||
737 token->GetName() == html_names::kObjectTag) {
738 tree_.ReconstructTheActiveFormattingElements();
739 tree_.InsertHTMLElement(token);
740 tree_.ActiveFormattingElements()->AppendMarker();
741 frameset_ok_ = false;
742 return;
743 }
744 if (token->GetName() == html_names::kTableTag) {
745 if (!tree_.InQuirksMode() &&
746 tree_.OpenElements()->InButtonScope(html_names::kPTag))
747 ProcessFakeEndTag(html_names::kPTag);
748 tree_.InsertHTMLElement(token);
749 frameset_ok_ = false;
750 SetInsertionMode(kInTableMode);
751 return;
752 }
753 if (token->GetName() == html_names::kImageTag) {
754 ParseError(token);
755 // Apparently we're not supposed to ask.
756 token->SetName(html_names::kImgTag.LocalName());
757 // Note the fall through to the kImgTag handling below!
758 }
759 if (token->GetName() == html_names::kAreaTag ||
760 token->GetName() == html_names::kBrTag ||
761 token->GetName() == html_names::kEmbedTag ||
762 token->GetName() == html_names::kImgTag ||
763 token->GetName() == html_names::kKeygenTag ||
764 token->GetName() == html_names::kWbrTag) {
765 tree_.ReconstructTheActiveFormattingElements();
766 tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
767 frameset_ok_ = false;
768 return;
769 }
770 if (token->GetName() == html_names::kInputTag) {
771 // Per spec https://html.spec.whatwg.org/C/#parsing-main-inbody,
772 // section "A start tag whose tag name is "input""
773
774 Attribute* type_attribute = token->GetAttributeItem(html_names::kTypeAttr);
775 bool disable_frameset =
776 !type_attribute ||
777 !EqualIgnoringASCIICase(type_attribute->Value(), "hidden");
778
779 tree_.ReconstructTheActiveFormattingElements();
780 tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
781
782 if (disable_frameset)
783 frameset_ok_ = false;
784 return;
785 }
786 if (token->GetName() == html_names::kParamTag ||
787 token->GetName() == html_names::kSourceTag ||
788 token->GetName() == html_names::kTrackTag) {
789 tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
790 return;
791 }
792 if (token->GetName() == html_names::kHrTag) {
793 ProcessFakePEndTagIfPInButtonScope();
794 tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
795 frameset_ok_ = false;
796 return;
797 }
798 if (token->GetName() == html_names::kTextareaTag) {
799 tree_.InsertHTMLElement(token);
800 should_skip_leading_newline_ = true;
801 if (parser_->Tokenizer())
802 parser_->Tokenizer()->SetState(HTMLTokenizer::kRCDATAState);
803 original_insertion_mode_ = insertion_mode_;
804 frameset_ok_ = false;
805 SetInsertionMode(kTextMode);
806 return;
807 }
808 if (token->GetName() == html_names::kXmpTag) {
809 ProcessFakePEndTagIfPInButtonScope();
810 tree_.ReconstructTheActiveFormattingElements();
811 frameset_ok_ = false;
812 ProcessGenericRawTextStartTag(token);
813 return;
814 }
815 if (token->GetName() == html_names::kIFrameTag) {
816 frameset_ok_ = false;
817 ProcessGenericRawTextStartTag(token);
818 return;
819 }
820 if (token->GetName() == html_names::kNoembedTag) {
821 ProcessGenericRawTextStartTag(token);
822 return;
823 }
824 if (token->GetName() == html_names::kNoscriptTag && options_.scripting_flag) {
825 ProcessGenericRawTextStartTag(token);
826 return;
827 }
828 if (token->GetName() == html_names::kSelectTag) {
829 tree_.ReconstructTheActiveFormattingElements();
830 tree_.InsertHTMLElement(token);
831 frameset_ok_ = false;
832 if (insertion_mode_ == kInTableMode || insertion_mode_ == kInCaptionMode ||
833 insertion_mode_ == kInColumnGroupMode ||
834 insertion_mode_ == kInTableBodyMode || insertion_mode_ == kInRowMode ||
835 insertion_mode_ == kInCellMode)
836 SetInsertionMode(kInSelectInTableMode);
837 else
838 SetInsertionMode(kInSelectMode);
839 return;
840 }
841 if (token->GetName() == html_names::kOptgroupTag ||
842 token->GetName() == html_names::kOptionTag) {
843 if (tree_.CurrentStackItem()->HasTagName(html_names::kOptionTag)) {
844 AtomicHTMLToken end_option(HTMLToken::kEndTag,
845 html_names::kOptionTag.LocalName());
846 ProcessEndTag(&end_option);
847 }
848 tree_.ReconstructTheActiveFormattingElements();
849 tree_.InsertHTMLElement(token);
850 return;
851 }
852 if (token->GetName() == html_names::kRbTag ||
853 token->GetName() == html_names::kRTCTag) {
854 if (tree_.OpenElements()->InScope(html_names::kRubyTag.LocalName())) {
855 tree_.GenerateImpliedEndTags();
856 if (!tree_.CurrentStackItem()->HasTagName(html_names::kRubyTag))
857 ParseError(token);
858 }
859 tree_.InsertHTMLElement(token);
860 return;
861 }
862 if (token->GetName() == html_names::kRtTag ||
863 token->GetName() == html_names::kRpTag) {
864 if (tree_.OpenElements()->InScope(html_names::kRubyTag.LocalName())) {
865 tree_.GenerateImpliedEndTagsWithExclusion(
866 html_names::kRTCTag.LocalName());
867 if (!tree_.CurrentStackItem()->HasTagName(html_names::kRubyTag) &&
868 !tree_.CurrentStackItem()->HasTagName(html_names::kRTCTag))
869 ParseError(token);
870 }
871 tree_.InsertHTMLElement(token);
872 return;
873 }
874 if (token->GetName() == mathml_names::kMathTag.LocalName()) {
875 tree_.ReconstructTheActiveFormattingElements();
876 AdjustMathMLAttributes(token);
877 AdjustForeignAttributes(token);
878 tree_.InsertForeignElement(token, mathml_names::kNamespaceURI);
879 return;
880 }
881 if (token->GetName() == svg_names::kSVGTag.LocalName()) {
882 tree_.ReconstructTheActiveFormattingElements();
883 AdjustSVGAttributes(token);
884 AdjustForeignAttributes(token);
885 tree_.InsertForeignElement(token, svg_names::kNamespaceURI);
886 return;
887 }
888 if (IsCaptionColOrColgroupTag(token->GetName()) ||
889 token->GetName() == html_names::kFrameTag ||
890 token->GetName() == html_names::kHeadTag ||
891 IsTableBodyContextTag(token->GetName()) ||
892 IsTableCellContextTag(token->GetName()) ||
893 token->GetName() == html_names::kTrTag) {
894 ParseError(token);
895 return;
896 }
897 tree_.ReconstructTheActiveFormattingElements();
898 tree_.InsertHTMLElement(token);
899 }
900
ProcessTemplateStartTag(AtomicHTMLToken * token)901 void HTMLTreeBuilder::ProcessTemplateStartTag(AtomicHTMLToken* token) {
902 tree_.ActiveFormattingElements()->AppendMarker();
903
904 DeclarativeShadowRootType declarative_shadow_root_type(
905 DeclarativeShadowRootType::kNone);
906 if (RuntimeEnabledFeatures::DeclarativeShadowDOMEnabled(
907 tree_.CurrentNode()->GetExecutionContext()) &&
908 allow_shadow_root_) {
909 if (Attribute* type_attribute =
910 token->GetAttributeItem(html_names::kShadowrootAttr)) {
911 String shadow_mode = type_attribute->Value();
912 if (EqualIgnoringASCIICase(shadow_mode, "open")) {
913 declarative_shadow_root_type = DeclarativeShadowRootType::kOpen;
914 } else if (EqualIgnoringASCIICase(shadow_mode, "closed")) {
915 declarative_shadow_root_type = DeclarativeShadowRootType::kClosed;
916 } else {
917 tree_.OwnerDocumentForCurrentNode().AddConsoleMessage(
918 MakeGarbageCollected<ConsoleMessage>(
919 mojom::blink::ConsoleMessageSource::kOther,
920 mojom::blink::ConsoleMessageLevel::kWarning,
921 "Invalid declarative shadowroot attribute value \"" +
922 shadow_mode +
923 "\". Valid values include \"open\" and \"closed\"."));
924 }
925 }
926 }
927 tree_.InsertHTMLTemplateElement(token, declarative_shadow_root_type);
928 frameset_ok_ = false;
929 template_insertion_modes_.push_back(kTemplateContentsMode);
930 SetInsertionMode(kTemplateContentsMode);
931 }
932
ProcessTemplateEndTag(AtomicHTMLToken * token)933 bool HTMLTreeBuilder::ProcessTemplateEndTag(AtomicHTMLToken* token) {
934 DCHECK_EQ(token->GetName(), html_names::kTemplateTag.LocalName());
935 if (!tree_.OpenElements()->HasTemplateInHTMLScope()) {
936 DCHECK(template_insertion_modes_.IsEmpty() ||
937 (template_insertion_modes_.size() == 1 &&
938 IsA<HTMLTemplateElement>(fragment_context_.ContextElement())));
939 ParseError(token);
940 return false;
941 }
942 tree_.GenerateImpliedEndTags();
943 if (!tree_.CurrentStackItem()->HasTagName(html_names::kTemplateTag))
944 ParseError(token);
945 tree_.OpenElements()->PopUntil(html_names::kTemplateTag.LocalName());
946 HTMLStackItem* template_stack_item =
947 tree_.OpenElements()->TopRecord()->StackItem();
948 tree_.OpenElements()->Pop();
949 HTMLStackItem* shadow_host_stack_item =
950 tree_.OpenElements()->TopRecord()->StackItem();
951 tree_.ActiveFormattingElements()->ClearToLastMarker();
952 template_insertion_modes_.pop_back();
953 ResetInsertionModeAppropriately();
954 if (RuntimeEnabledFeatures::DeclarativeShadowDOMEnabled(
955 shadow_host_stack_item->GetNode()->GetExecutionContext()) &&
956 template_stack_item) {
957 DCHECK(template_stack_item->IsElementNode());
958 HTMLTemplateElement* template_element =
959 DynamicTo<HTMLTemplateElement>(template_stack_item->GetElement());
960 // 9. If the start tag for the declarative template element did not have an
961 // attribute with the name "shadowroot" whose value was an ASCII
962 // case-insensitive match for the strings "open" or "closed", then stop this
963 // algorithm.
964 if (template_element->IsDeclarativeShadowRoot()) {
965 if (shadow_host_stack_item->GetNode() ==
966 tree_.OpenElements()->RootNode()) {
967 // 10. If the adjusted current node is the topmost element in the stack
968 // of open elements, then stop this algorithm.
969 template_element->SetDeclarativeShadowRootType(
970 DeclarativeShadowRootType::kNone);
971 } else {
972 DCHECK(shadow_host_stack_item);
973 DCHECK(shadow_host_stack_item->IsElementNode());
974 bool delegates_focus = template_stack_item->GetAttributeItem(
975 html_names::kShadowrootdelegatesfocusAttr);
976 // TODO(crbug.com/1063157): Add an attribute for imperative slot
977 // assignment.
978 bool manual_slotting = false;
979 shadow_host_stack_item->GetElement()->AttachDeclarativeShadowRoot(
980 template_element,
981 template_element->GetDeclarativeShadowRootType() ==
982 DeclarativeShadowRootType::kOpen
983 ? ShadowRootType::kOpen
984 : ShadowRootType::kClosed,
985 delegates_focus ? FocusDelegation::kDelegateFocus
986 : FocusDelegation::kNone,
987 manual_slotting ? SlotAssignmentMode::kManual
988 : SlotAssignmentMode::kAuto);
989 }
990 }
991 }
992 return true;
993 }
994
ProcessEndOfFileForInTemplateContents(AtomicHTMLToken * token)995 bool HTMLTreeBuilder::ProcessEndOfFileForInTemplateContents(
996 AtomicHTMLToken* token) {
997 AtomicHTMLToken end_template(HTMLToken::kEndTag,
998 html_names::kTemplateTag.LocalName());
999 if (!ProcessTemplateEndTag(&end_template))
1000 return false;
1001
1002 ProcessEndOfFile(token);
1003 return true;
1004 }
1005
ProcessColgroupEndTagForInColumnGroup()1006 bool HTMLTreeBuilder::ProcessColgroupEndTagForInColumnGroup() {
1007 if (tree_.CurrentIsRootNode() ||
1008 IsA<HTMLTemplateElement>(*tree_.CurrentNode())) {
1009 DCHECK(IsParsingFragmentOrTemplateContents());
1010 // FIXME: parse error
1011 return false;
1012 }
1013 tree_.OpenElements()->Pop();
1014 SetInsertionMode(kInTableMode);
1015 return true;
1016 }
1017
1018 // http://www.whatwg.org/specs/web-apps/current-work/#adjusted-current-node
AdjustedCurrentStackItem() const1019 HTMLStackItem* HTMLTreeBuilder::AdjustedCurrentStackItem() const {
1020 DCHECK(!tree_.IsEmpty());
1021 if (IsParsingFragment() && tree_.OpenElements()->HasOnlyOneElement())
1022 return fragment_context_.ContextElementStackItem();
1023
1024 return tree_.CurrentStackItem();
1025 }
1026
1027 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell
CloseTheCell()1028 void HTMLTreeBuilder::CloseTheCell() {
1029 DCHECK_EQ(GetInsertionMode(), kInCellMode);
1030 if (tree_.OpenElements()->InTableScope(html_names::kTdTag)) {
1031 DCHECK(!tree_.OpenElements()->InTableScope(html_names::kThTag));
1032 ProcessFakeEndTag(html_names::kTdTag);
1033 return;
1034 }
1035 DCHECK(tree_.OpenElements()->InTableScope(html_names::kThTag));
1036 ProcessFakeEndTag(html_names::kThTag);
1037 DCHECK_EQ(GetInsertionMode(), kInRowMode);
1038 }
1039
ProcessStartTagForInTable(AtomicHTMLToken * token)1040 void HTMLTreeBuilder::ProcessStartTagForInTable(AtomicHTMLToken* token) {
1041 DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
1042 if (token->GetName() == html_names::kCaptionTag) {
1043 tree_.OpenElements()->PopUntilTableScopeMarker();
1044 tree_.ActiveFormattingElements()->AppendMarker();
1045 tree_.InsertHTMLElement(token);
1046 SetInsertionMode(kInCaptionMode);
1047 return;
1048 }
1049 if (token->GetName() == html_names::kColgroupTag) {
1050 tree_.OpenElements()->PopUntilTableScopeMarker();
1051 tree_.InsertHTMLElement(token);
1052 SetInsertionMode(kInColumnGroupMode);
1053 return;
1054 }
1055 if (token->GetName() == html_names::kColTag) {
1056 ProcessFakeStartTag(html_names::kColgroupTag);
1057 DCHECK(kInColumnGroupMode);
1058 ProcessStartTag(token);
1059 return;
1060 }
1061 if (IsTableBodyContextTag(token->GetName())) {
1062 tree_.OpenElements()->PopUntilTableScopeMarker();
1063 tree_.InsertHTMLElement(token);
1064 SetInsertionMode(kInTableBodyMode);
1065 return;
1066 }
1067 if (IsTableCellContextTag(token->GetName()) ||
1068 token->GetName() == html_names::kTrTag) {
1069 ProcessFakeStartTag(html_names::kTbodyTag);
1070 DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
1071 ProcessStartTag(token);
1072 return;
1073 }
1074 if (token->GetName() == html_names::kTableTag) {
1075 ParseError(token);
1076 if (!ProcessTableEndTagForInTable()) {
1077 DCHECK(IsParsingFragmentOrTemplateContents());
1078 return;
1079 }
1080 ProcessStartTag(token);
1081 return;
1082 }
1083 if (token->GetName() == html_names::kStyleTag ||
1084 token->GetName() == html_names::kScriptTag) {
1085 ProcessStartTagForInHead(token);
1086 return;
1087 }
1088 if (token->GetName() == html_names::kInputTag) {
1089 Attribute* type_attribute = token->GetAttributeItem(html_names::kTypeAttr);
1090 if (type_attribute &&
1091 EqualIgnoringASCIICase(type_attribute->Value(), "hidden")) {
1092 ParseError(token);
1093 tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
1094 return;
1095 }
1096 // Fall through to "anything else" case.
1097 }
1098 if (token->GetName() == html_names::kFormTag) {
1099 ParseError(token);
1100 if (tree_.IsFormElementPointerNonNull() && !IsParsingTemplateContents())
1101 return;
1102 tree_.InsertHTMLFormElement(token, true);
1103 tree_.OpenElements()->Pop();
1104 return;
1105 }
1106 if (token->GetName() == html_names::kTemplateTag) {
1107 ProcessTemplateStartTag(token);
1108 return;
1109 }
1110 ParseError(token);
1111 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(tree_);
1112 ProcessStartTagForInBody(token);
1113 }
1114
ProcessStartTag(AtomicHTMLToken * token)1115 void HTMLTreeBuilder::ProcessStartTag(AtomicHTMLToken* token) {
1116 DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
1117 switch (GetInsertionMode()) {
1118 case kInitialMode:
1119 DCHECK_EQ(GetInsertionMode(), kInitialMode);
1120 DefaultForInitial();
1121 FALLTHROUGH;
1122 case kBeforeHTMLMode:
1123 DCHECK_EQ(GetInsertionMode(), kBeforeHTMLMode);
1124 if (token->GetName() == html_names::kHTMLTag) {
1125 tree_.InsertHTMLHtmlStartTagBeforeHTML(token);
1126 SetInsertionMode(kBeforeHeadMode);
1127 return;
1128 }
1129 DefaultForBeforeHTML();
1130 FALLTHROUGH;
1131 case kBeforeHeadMode:
1132 DCHECK_EQ(GetInsertionMode(), kBeforeHeadMode);
1133 if (token->GetName() == html_names::kHTMLTag) {
1134 ProcessHtmlStartTagForInBody(token);
1135 return;
1136 }
1137 if (token->GetName() == html_names::kHeadTag) {
1138 tree_.InsertHTMLHeadElement(token);
1139 SetInsertionMode(kInHeadMode);
1140 return;
1141 }
1142 DefaultForBeforeHead();
1143 FALLTHROUGH;
1144 case kInHeadMode:
1145 DCHECK_EQ(GetInsertionMode(), kInHeadMode);
1146 if (ProcessStartTagForInHead(token))
1147 return;
1148 DefaultForInHead();
1149 FALLTHROUGH;
1150 case kAfterHeadMode:
1151 DCHECK_EQ(GetInsertionMode(), kAfterHeadMode);
1152 if (token->GetName() == html_names::kHTMLTag) {
1153 ProcessHtmlStartTagForInBody(token);
1154 return;
1155 }
1156 if (token->GetName() == html_names::kBodyTag) {
1157 frameset_ok_ = false;
1158 tree_.InsertHTMLBodyElement(token);
1159 SetInsertionMode(kInBodyMode);
1160 return;
1161 }
1162 if (token->GetName() == html_names::kFramesetTag) {
1163 tree_.InsertHTMLElement(token);
1164 SetInsertionMode(kInFramesetMode);
1165 return;
1166 }
1167 if (token->GetName() == html_names::kBaseTag ||
1168 token->GetName() == html_names::kBasefontTag ||
1169 token->GetName() == html_names::kBgsoundTag ||
1170 token->GetName() == html_names::kLinkTag ||
1171 token->GetName() == html_names::kMetaTag ||
1172 token->GetName() == html_names::kNoframesTag ||
1173 token->GetName() == html_names::kScriptTag ||
1174 token->GetName() == html_names::kStyleTag ||
1175 token->GetName() == html_names::kTemplateTag ||
1176 token->GetName() == html_names::kTitleTag) {
1177 ParseError(token);
1178 DCHECK(tree_.Head());
1179 tree_.OpenElements()->PushHTMLHeadElement(tree_.HeadStackItem());
1180 ProcessStartTagForInHead(token);
1181 tree_.OpenElements()->RemoveHTMLHeadElement(tree_.Head());
1182 return;
1183 }
1184 if (token->GetName() == html_names::kHeadTag) {
1185 ParseError(token);
1186 return;
1187 }
1188 DefaultForAfterHead();
1189 FALLTHROUGH;
1190 case kInBodyMode:
1191 DCHECK_EQ(GetInsertionMode(), kInBodyMode);
1192 ProcessStartTagForInBody(token);
1193 break;
1194 case kInTableMode:
1195 DCHECK_EQ(GetInsertionMode(), kInTableMode);
1196 ProcessStartTagForInTable(token);
1197 break;
1198 case kInCaptionMode:
1199 DCHECK_EQ(GetInsertionMode(), kInCaptionMode);
1200 if (IsCaptionColOrColgroupTag(token->GetName()) ||
1201 IsTableBodyContextTag(token->GetName()) ||
1202 IsTableCellContextTag(token->GetName()) ||
1203 token->GetName() == html_names::kTrTag) {
1204 ParseError(token);
1205 if (!ProcessCaptionEndTagForInCaption()) {
1206 DCHECK(IsParsingFragment());
1207 return;
1208 }
1209 ProcessStartTag(token);
1210 return;
1211 }
1212 ProcessStartTagForInBody(token);
1213 break;
1214 case kInColumnGroupMode:
1215 DCHECK_EQ(GetInsertionMode(), kInColumnGroupMode);
1216 if (token->GetName() == html_names::kHTMLTag) {
1217 ProcessHtmlStartTagForInBody(token);
1218 return;
1219 }
1220 if (token->GetName() == html_names::kColTag) {
1221 tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
1222 return;
1223 }
1224 if (token->GetName() == html_names::kTemplateTag) {
1225 ProcessTemplateStartTag(token);
1226 return;
1227 }
1228 if (!ProcessColgroupEndTagForInColumnGroup()) {
1229 DCHECK(IsParsingFragmentOrTemplateContents());
1230 return;
1231 }
1232 ProcessStartTag(token);
1233 break;
1234 case kInTableBodyMode:
1235 DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
1236 if (token->GetName() == html_names::kTrTag) {
1237 // How is there ever anything to pop?
1238 tree_.OpenElements()->PopUntilTableBodyScopeMarker();
1239 tree_.InsertHTMLElement(token);
1240 SetInsertionMode(kInRowMode);
1241 return;
1242 }
1243 if (IsTableCellContextTag(token->GetName())) {
1244 ParseError(token);
1245 ProcessFakeStartTag(html_names::kTrTag);
1246 DCHECK_EQ(GetInsertionMode(), kInRowMode);
1247 ProcessStartTag(token);
1248 return;
1249 }
1250 if (IsCaptionColOrColgroupTag(token->GetName()) ||
1251 IsTableBodyContextTag(token->GetName())) {
1252 // FIXME: This is slow.
1253 if (!tree_.OpenElements()->InTableScope(html_names::kTbodyTag) &&
1254 !tree_.OpenElements()->InTableScope(html_names::kTheadTag) &&
1255 !tree_.OpenElements()->InTableScope(html_names::kTfootTag)) {
1256 DCHECK(IsParsingFragmentOrTemplateContents());
1257 ParseError(token);
1258 return;
1259 }
1260 tree_.OpenElements()->PopUntilTableBodyScopeMarker();
1261 DCHECK(IsTableBodyContextTag(tree_.CurrentStackItem()->LocalName()));
1262 ProcessFakeEndTag(tree_.CurrentStackItem()->LocalName());
1263 ProcessStartTag(token);
1264 return;
1265 }
1266 ProcessStartTagForInTable(token);
1267 break;
1268 case kInRowMode:
1269 DCHECK_EQ(GetInsertionMode(), kInRowMode);
1270 if (IsTableCellContextTag(token->GetName())) {
1271 tree_.OpenElements()->PopUntilTableRowScopeMarker();
1272 tree_.InsertHTMLElement(token);
1273 SetInsertionMode(kInCellMode);
1274 tree_.ActiveFormattingElements()->AppendMarker();
1275 return;
1276 }
1277 if (token->GetName() == html_names::kTrTag ||
1278 IsCaptionColOrColgroupTag(token->GetName()) ||
1279 IsTableBodyContextTag(token->GetName())) {
1280 if (!ProcessTrEndTagForInRow()) {
1281 DCHECK(IsParsingFragmentOrTemplateContents());
1282 return;
1283 }
1284 DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
1285 ProcessStartTag(token);
1286 return;
1287 }
1288 ProcessStartTagForInTable(token);
1289 break;
1290 case kInCellMode:
1291 DCHECK_EQ(GetInsertionMode(), kInCellMode);
1292 if (IsCaptionColOrColgroupTag(token->GetName()) ||
1293 IsTableCellContextTag(token->GetName()) ||
1294 token->GetName() == html_names::kTrTag ||
1295 IsTableBodyContextTag(token->GetName())) {
1296 // FIXME: This could be more efficient.
1297 if (!tree_.OpenElements()->InTableScope(html_names::kTdTag) &&
1298 !tree_.OpenElements()->InTableScope(html_names::kThTag)) {
1299 DCHECK(IsParsingFragment());
1300 ParseError(token);
1301 return;
1302 }
1303 CloseTheCell();
1304 ProcessStartTag(token);
1305 return;
1306 }
1307 ProcessStartTagForInBody(token);
1308 break;
1309 case kAfterBodyMode:
1310 case kAfterAfterBodyMode:
1311 DCHECK(GetInsertionMode() == kAfterBodyMode ||
1312 GetInsertionMode() == kAfterAfterBodyMode);
1313 if (token->GetName() == html_names::kHTMLTag) {
1314 ProcessHtmlStartTagForInBody(token);
1315 return;
1316 }
1317 SetInsertionMode(kInBodyMode);
1318 ProcessStartTag(token);
1319 break;
1320 case kInHeadNoscriptMode:
1321 DCHECK_EQ(GetInsertionMode(), kInHeadNoscriptMode);
1322 if (token->GetName() == html_names::kHTMLTag) {
1323 ProcessHtmlStartTagForInBody(token);
1324 return;
1325 }
1326 if (token->GetName() == html_names::kBasefontTag ||
1327 token->GetName() == html_names::kBgsoundTag ||
1328 token->GetName() == html_names::kLinkTag ||
1329 token->GetName() == html_names::kMetaTag ||
1330 token->GetName() == html_names::kNoframesTag ||
1331 token->GetName() == html_names::kStyleTag) {
1332 bool did_process = ProcessStartTagForInHead(token);
1333 DCHECK(did_process);
1334 return;
1335 }
1336 if (token->GetName() == html_names::kHTMLTag ||
1337 token->GetName() == html_names::kNoscriptTag) {
1338 ParseError(token);
1339 return;
1340 }
1341 DefaultForInHeadNoscript();
1342 ProcessToken(token);
1343 break;
1344 case kInFramesetMode:
1345 DCHECK_EQ(GetInsertionMode(), kInFramesetMode);
1346 if (token->GetName() == html_names::kHTMLTag) {
1347 ProcessHtmlStartTagForInBody(token);
1348 return;
1349 }
1350 if (token->GetName() == html_names::kFramesetTag) {
1351 tree_.InsertHTMLElement(token);
1352 return;
1353 }
1354 if (token->GetName() == html_names::kFrameTag) {
1355 tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
1356 return;
1357 }
1358 if (token->GetName() == html_names::kNoframesTag) {
1359 ProcessStartTagForInHead(token);
1360 return;
1361 }
1362 ParseError(token);
1363 break;
1364 case kAfterFramesetMode:
1365 case kAfterAfterFramesetMode:
1366 DCHECK(GetInsertionMode() == kAfterFramesetMode ||
1367 GetInsertionMode() == kAfterAfterFramesetMode);
1368 if (token->GetName() == html_names::kHTMLTag) {
1369 ProcessHtmlStartTagForInBody(token);
1370 return;
1371 }
1372 if (token->GetName() == html_names::kNoframesTag) {
1373 ProcessStartTagForInHead(token);
1374 return;
1375 }
1376 ParseError(token);
1377 break;
1378 case kInSelectInTableMode:
1379 DCHECK_EQ(GetInsertionMode(), kInSelectInTableMode);
1380 if (token->GetName() == html_names::kCaptionTag ||
1381 token->GetName() == html_names::kTableTag ||
1382 IsTableBodyContextTag(token->GetName()) ||
1383 token->GetName() == html_names::kTrTag ||
1384 IsTableCellContextTag(token->GetName())) {
1385 ParseError(token);
1386 AtomicHTMLToken end_select(HTMLToken::kEndTag,
1387 html_names::kSelectTag.LocalName());
1388 ProcessEndTag(&end_select);
1389 ProcessStartTag(token);
1390 return;
1391 }
1392 FALLTHROUGH;
1393 case kInSelectMode:
1394 DCHECK(GetInsertionMode() == kInSelectMode ||
1395 GetInsertionMode() == kInSelectInTableMode);
1396 if (token->GetName() == html_names::kHTMLTag) {
1397 ProcessHtmlStartTagForInBody(token);
1398 return;
1399 }
1400 if (token->GetName() == html_names::kOptionTag) {
1401 if (tree_.CurrentStackItem()->HasTagName(html_names::kOptionTag)) {
1402 AtomicHTMLToken end_option(HTMLToken::kEndTag,
1403 html_names::kOptionTag.LocalName());
1404 ProcessEndTag(&end_option);
1405 }
1406 tree_.InsertHTMLElement(token);
1407 return;
1408 }
1409 if (token->GetName() == html_names::kOptgroupTag) {
1410 if (tree_.CurrentStackItem()->HasTagName(html_names::kOptionTag)) {
1411 AtomicHTMLToken end_option(HTMLToken::kEndTag,
1412 html_names::kOptionTag.LocalName());
1413 ProcessEndTag(&end_option);
1414 }
1415 if (tree_.CurrentStackItem()->HasTagName(html_names::kOptgroupTag)) {
1416 AtomicHTMLToken end_optgroup(HTMLToken::kEndTag,
1417 html_names::kOptgroupTag.LocalName());
1418 ProcessEndTag(&end_optgroup);
1419 }
1420 tree_.InsertHTMLElement(token);
1421 return;
1422 }
1423 if (token->GetName() == html_names::kSelectTag) {
1424 ParseError(token);
1425 AtomicHTMLToken end_select(HTMLToken::kEndTag,
1426 html_names::kSelectTag.LocalName());
1427 ProcessEndTag(&end_select);
1428 return;
1429 }
1430 if (token->GetName() == html_names::kInputTag ||
1431 token->GetName() == html_names::kKeygenTag ||
1432 token->GetName() == html_names::kTextareaTag) {
1433 ParseError(token);
1434 if (!tree_.OpenElements()->InSelectScope(html_names::kSelectTag)) {
1435 DCHECK(IsParsingFragment());
1436 return;
1437 }
1438 AtomicHTMLToken end_select(HTMLToken::kEndTag,
1439 html_names::kSelectTag.LocalName());
1440 ProcessEndTag(&end_select);
1441 ProcessStartTag(token);
1442 return;
1443 }
1444 if (token->GetName() == html_names::kScriptTag) {
1445 bool did_process = ProcessStartTagForInHead(token);
1446 DCHECK(did_process);
1447 return;
1448 }
1449 if (token->GetName() == html_names::kTemplateTag) {
1450 ProcessTemplateStartTag(token);
1451 return;
1452 }
1453 break;
1454 case kInTableTextMode:
1455 DefaultForInTableText();
1456 ProcessStartTag(token);
1457 break;
1458 case kTextMode:
1459 NOTREACHED();
1460 break;
1461 case kTemplateContentsMode:
1462 if (token->GetName() == html_names::kTemplateTag) {
1463 ProcessTemplateStartTag(token);
1464 return;
1465 }
1466
1467 if (token->GetName() == html_names::kLinkTag ||
1468 token->GetName() == html_names::kScriptTag ||
1469 token->GetName() == html_names::kStyleTag ||
1470 token->GetName() == html_names::kMetaTag) {
1471 ProcessStartTagForInHead(token);
1472 return;
1473 }
1474
1475 InsertionMode insertion_mode = kTemplateContentsMode;
1476 if (token->GetName() == html_names::kColTag)
1477 insertion_mode = kInColumnGroupMode;
1478 else if (IsCaptionColOrColgroupTag(token->GetName()) ||
1479 IsTableBodyContextTag(token->GetName()))
1480 insertion_mode = kInTableMode;
1481 else if (token->GetName() == html_names::kTrTag)
1482 insertion_mode = kInTableBodyMode;
1483 else if (IsTableCellContextTag(token->GetName()))
1484 insertion_mode = kInRowMode;
1485 else
1486 insertion_mode = kInBodyMode;
1487
1488 DCHECK_NE(insertion_mode, kTemplateContentsMode);
1489 DCHECK_EQ(template_insertion_modes_.back(), kTemplateContentsMode);
1490 template_insertion_modes_.back() = insertion_mode;
1491 SetInsertionMode(insertion_mode);
1492
1493 ProcessStartTag(token);
1494 break;
1495 }
1496 }
1497
ProcessHtmlStartTagForInBody(AtomicHTMLToken * token)1498 void HTMLTreeBuilder::ProcessHtmlStartTagForInBody(AtomicHTMLToken* token) {
1499 ParseError(token);
1500 if (tree_.OpenElements()->HasTemplateInHTMLScope()) {
1501 DCHECK(IsParsingTemplateContents());
1502 return;
1503 }
1504 tree_.InsertHTMLHtmlStartTagInBody(token);
1505 }
1506
ProcessBodyEndTagForInBody(AtomicHTMLToken * token)1507 bool HTMLTreeBuilder::ProcessBodyEndTagForInBody(AtomicHTMLToken* token) {
1508 DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1509 DCHECK(token->GetName() == html_names::kBodyTag);
1510 if (!tree_.OpenElements()->InScope(html_names::kBodyTag.LocalName())) {
1511 ParseError(token);
1512 return false;
1513 }
1514 // Emit a more specific parse error based on stack contents.
1515 DVLOG(1) << "Not implmeneted.";
1516 SetInsertionMode(kAfterBodyMode);
1517 return true;
1518 }
1519
ProcessAnyOtherEndTagForInBody(AtomicHTMLToken * token)1520 void HTMLTreeBuilder::ProcessAnyOtherEndTagForInBody(AtomicHTMLToken* token) {
1521 DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1522 HTMLElementStack::ElementRecord* record = tree_.OpenElements()->TopRecord();
1523 while (1) {
1524 HTMLStackItem* item = record->StackItem();
1525 if (item->MatchesHTMLTag(token->GetName())) {
1526 tree_.GenerateImpliedEndTagsWithExclusion(token->GetName());
1527 if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1528 ParseError(token);
1529 tree_.OpenElements()->PopUntilPopped(item->GetElement());
1530 return;
1531 }
1532 if (item->IsSpecialNode()) {
1533 ParseError(token);
1534 return;
1535 }
1536 record = record->Next();
1537 }
1538 }
1539
1540 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
CallTheAdoptionAgency(AtomicHTMLToken * token)1541 void HTMLTreeBuilder::CallTheAdoptionAgency(AtomicHTMLToken* token) {
1542 // The adoption agency algorithm is N^2. We limit the number of iterations
1543 // to stop from hanging the whole browser. This limit is specified in the
1544 // adoption agency algorithm:
1545 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody
1546 static const int kOuterIterationLimit = 8;
1547 static const int kInnerIterationLimit = 3;
1548
1549 // 1, 2, 3 and 16 are covered by the for() loop.
1550 for (int i = 0; i < kOuterIterationLimit; ++i) {
1551 // 4.
1552 Element* formatting_element =
1553 tree_.ActiveFormattingElements()->ClosestElementInScopeWithName(
1554 token->GetName());
1555 // 4.a
1556 if (!formatting_element)
1557 return ProcessAnyOtherEndTagForInBody(token);
1558 // 4.c
1559 if ((tree_.OpenElements()->Contains(formatting_element)) &&
1560 !tree_.OpenElements()->InScope(formatting_element)) {
1561 ParseError(token);
1562 // Check the stack of open elements for a more specific parse error.
1563 DVLOG(1) << "Not implemented.";
1564 return;
1565 }
1566 // 4.b
1567 HTMLElementStack::ElementRecord* formatting_element_record =
1568 tree_.OpenElements()->Find(formatting_element);
1569 if (!formatting_element_record) {
1570 ParseError(token);
1571 tree_.ActiveFormattingElements()->Remove(formatting_element);
1572 return;
1573 }
1574 // 4.d
1575 if (formatting_element != tree_.CurrentElement())
1576 ParseError(token);
1577 // 5.
1578 HTMLElementStack::ElementRecord* furthest_block =
1579 tree_.OpenElements()->FurthestBlockForFormattingElement(
1580 formatting_element);
1581 // 6.
1582 if (!furthest_block) {
1583 tree_.OpenElements()->PopUntilPopped(formatting_element);
1584 tree_.ActiveFormattingElements()->Remove(formatting_element);
1585 return;
1586 }
1587 // 7.
1588 DCHECK(furthest_block->IsAbove(formatting_element_record));
1589 HTMLStackItem* common_ancestor =
1590 formatting_element_record->Next()->StackItem();
1591 // 8.
1592 HTMLFormattingElementList::Bookmark bookmark =
1593 tree_.ActiveFormattingElements()->BookmarkFor(formatting_element);
1594 // 9.
1595 HTMLElementStack::ElementRecord* node = furthest_block;
1596 HTMLElementStack::ElementRecord* next_node = node->Next();
1597 HTMLElementStack::ElementRecord* last_node = furthest_block;
1598 // 9.1, 9.2, 9.3 and 9.11 are covered by the for() loop.
1599 for (int j = 0; j < kInnerIterationLimit; ++j) {
1600 // 9.4
1601 node = next_node;
1602 DCHECK(node);
1603 // Save node->next() for the next iteration in case node is deleted in
1604 // 9.5.
1605 next_node = node->Next();
1606 // 9.5
1607 if (!tree_.ActiveFormattingElements()->Contains(node->GetElement())) {
1608 tree_.OpenElements()->Remove(node->GetElement());
1609 node = nullptr;
1610 continue;
1611 }
1612 // 9.6
1613 if (node == formatting_element_record)
1614 break;
1615 // 9.7
1616 HTMLStackItem* new_item =
1617 tree_.CreateElementFromSavedToken(node->StackItem());
1618
1619 HTMLFormattingElementList::Entry* node_entry =
1620 tree_.ActiveFormattingElements()->Find(node->GetElement());
1621 node_entry->ReplaceElement(new_item);
1622 node->ReplaceElement(new_item);
1623
1624 // 9.8
1625 if (last_node == furthest_block)
1626 bookmark.MoveToAfter(node_entry);
1627 // 9.9
1628 tree_.Reparent(node, last_node);
1629 // 9.10
1630 last_node = node;
1631 }
1632 // 10.
1633 tree_.InsertAlreadyParsedChild(common_ancestor, last_node);
1634 // 11.
1635 HTMLStackItem* new_item = tree_.CreateElementFromSavedToken(
1636 formatting_element_record->StackItem());
1637 // 12.
1638 tree_.TakeAllChildren(new_item, furthest_block);
1639 // 13.
1640 tree_.Reparent(furthest_block, new_item);
1641 // 14.
1642 tree_.ActiveFormattingElements()->SwapTo(formatting_element, new_item,
1643 bookmark);
1644 // 15.
1645 tree_.OpenElements()->Remove(formatting_element);
1646 tree_.OpenElements()->InsertAbove(new_item, furthest_block);
1647 }
1648 }
1649
ResetInsertionModeAppropriately()1650 void HTMLTreeBuilder::ResetInsertionModeAppropriately() {
1651 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately
1652 bool last = false;
1653 HTMLElementStack::ElementRecord* node_record =
1654 tree_.OpenElements()->TopRecord();
1655 while (1) {
1656 HTMLStackItem* item = node_record->StackItem();
1657 if (item->GetNode() == tree_.OpenElements()->RootNode()) {
1658 last = true;
1659 if (IsParsingFragment())
1660 item = fragment_context_.ContextElementStackItem();
1661 }
1662 if (item->HasTagName(html_names::kTemplateTag))
1663 return SetInsertionMode(template_insertion_modes_.back());
1664 if (item->HasTagName(html_names::kSelectTag)) {
1665 if (!last) {
1666 while (item->GetNode() != tree_.OpenElements()->RootNode() &&
1667 !item->HasTagName(html_names::kTemplateTag)) {
1668 node_record = node_record->Next();
1669 item = node_record->StackItem();
1670 if (item->HasTagName(html_names::kTableTag))
1671 return SetInsertionMode(kInSelectInTableMode);
1672 }
1673 }
1674 return SetInsertionMode(kInSelectMode);
1675 }
1676 if (item->HasTagName(html_names::kTdTag) ||
1677 item->HasTagName(html_names::kThTag))
1678 return SetInsertionMode(kInCellMode);
1679 if (item->HasTagName(html_names::kTrTag))
1680 return SetInsertionMode(kInRowMode);
1681 if (item->HasTagName(html_names::kTbodyTag) ||
1682 item->HasTagName(html_names::kTheadTag) ||
1683 item->HasTagName(html_names::kTfootTag))
1684 return SetInsertionMode(kInTableBodyMode);
1685 if (item->HasTagName(html_names::kCaptionTag))
1686 return SetInsertionMode(kInCaptionMode);
1687 if (item->HasTagName(html_names::kColgroupTag)) {
1688 return SetInsertionMode(kInColumnGroupMode);
1689 }
1690 if (item->HasTagName(html_names::kTableTag))
1691 return SetInsertionMode(kInTableMode);
1692 if (item->HasTagName(html_names::kHeadTag)) {
1693 if (!fragment_context_.Fragment() ||
1694 fragment_context_.ContextElement() != item->GetNode())
1695 return SetInsertionMode(kInHeadMode);
1696 return SetInsertionMode(kInBodyMode);
1697 }
1698 if (item->HasTagName(html_names::kBodyTag))
1699 return SetInsertionMode(kInBodyMode);
1700 if (item->HasTagName(html_names::kFramesetTag)) {
1701 return SetInsertionMode(kInFramesetMode);
1702 }
1703 if (item->HasTagName(html_names::kHTMLTag)) {
1704 if (tree_.HeadStackItem())
1705 return SetInsertionMode(kAfterHeadMode);
1706
1707 DCHECK(IsParsingFragment());
1708 return SetInsertionMode(kBeforeHeadMode);
1709 }
1710 if (last) {
1711 DCHECK(IsParsingFragment());
1712 return SetInsertionMode(kInBodyMode);
1713 }
1714 node_record = node_record->Next();
1715 }
1716 }
1717
ProcessEndTagForInTableBody(AtomicHTMLToken * token)1718 void HTMLTreeBuilder::ProcessEndTagForInTableBody(AtomicHTMLToken* token) {
1719 DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1720 if (IsTableBodyContextTag(token->GetName())) {
1721 if (!tree_.OpenElements()->InTableScope(token->GetName())) {
1722 ParseError(token);
1723 return;
1724 }
1725 tree_.OpenElements()->PopUntilTableBodyScopeMarker();
1726 tree_.OpenElements()->Pop();
1727 SetInsertionMode(kInTableMode);
1728 return;
1729 }
1730 if (token->GetName() == html_names::kTableTag) {
1731 // FIXME: This is slow.
1732 if (!tree_.OpenElements()->InTableScope(html_names::kTbodyTag) &&
1733 !tree_.OpenElements()->InTableScope(html_names::kTheadTag) &&
1734 !tree_.OpenElements()->InTableScope(html_names::kTfootTag)) {
1735 DCHECK(IsParsingFragmentOrTemplateContents());
1736 ParseError(token);
1737 return;
1738 }
1739 tree_.OpenElements()->PopUntilTableBodyScopeMarker();
1740 DCHECK(IsTableBodyContextTag(tree_.CurrentStackItem()->LocalName()));
1741 ProcessFakeEndTag(tree_.CurrentStackItem()->LocalName());
1742 ProcessEndTag(token);
1743 return;
1744 }
1745 if (token->GetName() == html_names::kBodyTag ||
1746 IsCaptionColOrColgroupTag(token->GetName()) ||
1747 token->GetName() == html_names::kHTMLTag ||
1748 IsTableCellContextTag(token->GetName()) ||
1749 token->GetName() == html_names::kTrTag) {
1750 ParseError(token);
1751 return;
1752 }
1753 ProcessEndTagForInTable(token);
1754 }
1755
ProcessEndTagForInRow(AtomicHTMLToken * token)1756 void HTMLTreeBuilder::ProcessEndTagForInRow(AtomicHTMLToken* token) {
1757 DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1758 if (token->GetName() == html_names::kTrTag) {
1759 ProcessTrEndTagForInRow();
1760 return;
1761 }
1762 if (token->GetName() == html_names::kTableTag) {
1763 if (!ProcessTrEndTagForInRow()) {
1764 DCHECK(IsParsingFragmentOrTemplateContents());
1765 return;
1766 }
1767 DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
1768 ProcessEndTag(token);
1769 return;
1770 }
1771 if (IsTableBodyContextTag(token->GetName())) {
1772 if (!tree_.OpenElements()->InTableScope(token->GetName())) {
1773 ParseError(token);
1774 return;
1775 }
1776 ProcessFakeEndTag(html_names::kTrTag);
1777 DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
1778 ProcessEndTag(token);
1779 return;
1780 }
1781 if (token->GetName() == html_names::kBodyTag ||
1782 IsCaptionColOrColgroupTag(token->GetName()) ||
1783 token->GetName() == html_names::kHTMLTag ||
1784 IsTableCellContextTag(token->GetName())) {
1785 ParseError(token);
1786 return;
1787 }
1788 ProcessEndTagForInTable(token);
1789 }
1790
ProcessEndTagForInCell(AtomicHTMLToken * token)1791 void HTMLTreeBuilder::ProcessEndTagForInCell(AtomicHTMLToken* token) {
1792 DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1793 if (IsTableCellContextTag(token->GetName())) {
1794 if (!tree_.OpenElements()->InTableScope(token->GetName())) {
1795 ParseError(token);
1796 return;
1797 }
1798 tree_.GenerateImpliedEndTags();
1799 if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1800 ParseError(token);
1801 tree_.OpenElements()->PopUntilPopped(token->GetName());
1802 tree_.ActiveFormattingElements()->ClearToLastMarker();
1803 SetInsertionMode(kInRowMode);
1804 return;
1805 }
1806 if (token->GetName() == html_names::kBodyTag ||
1807 IsCaptionColOrColgroupTag(token->GetName()) ||
1808 token->GetName() == html_names::kHTMLTag) {
1809 ParseError(token);
1810 return;
1811 }
1812 if (token->GetName() == html_names::kTableTag ||
1813 token->GetName() == html_names::kTrTag ||
1814 IsTableBodyContextTag(token->GetName())) {
1815 if (!tree_.OpenElements()->InTableScope(token->GetName())) {
1816 DCHECK(IsTableBodyContextTag(token->GetName()) ||
1817 tree_.OpenElements()->InTableScope(html_names::kTemplateTag) ||
1818 IsParsingFragment());
1819 ParseError(token);
1820 return;
1821 }
1822 CloseTheCell();
1823 ProcessEndTag(token);
1824 return;
1825 }
1826 ProcessEndTagForInBody(token);
1827 }
1828
ProcessEndTagForInBody(AtomicHTMLToken * token)1829 void HTMLTreeBuilder::ProcessEndTagForInBody(AtomicHTMLToken* token) {
1830 DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
1831 if (token->GetName() == html_names::kBodyTag) {
1832 ProcessBodyEndTagForInBody(token);
1833 return;
1834 }
1835 if (token->GetName() == html_names::kHTMLTag) {
1836 AtomicHTMLToken end_body(HTMLToken::kEndTag,
1837 html_names::kBodyTag.LocalName());
1838 if (ProcessBodyEndTagForInBody(&end_body))
1839 ProcessEndTag(token);
1840 return;
1841 }
1842 if (token->GetName() == html_names::kAddressTag ||
1843 token->GetName() == html_names::kArticleTag ||
1844 token->GetName() == html_names::kAsideTag ||
1845 token->GetName() == html_names::kBlockquoteTag ||
1846 token->GetName() == html_names::kButtonTag ||
1847 token->GetName() == html_names::kCenterTag ||
1848 token->GetName() == html_names::kDetailsTag ||
1849 token->GetName() == html_names::kDirTag ||
1850 token->GetName() == html_names::kDivTag ||
1851 token->GetName() == html_names::kDlTag ||
1852 token->GetName() == html_names::kFieldsetTag ||
1853 token->GetName() == html_names::kFigcaptionTag ||
1854 token->GetName() == html_names::kFigureTag ||
1855 token->GetName() == html_names::kFooterTag ||
1856 token->GetName() == html_names::kHeaderTag ||
1857 token->GetName() == html_names::kHgroupTag ||
1858 token->GetName() == html_names::kListingTag ||
1859 token->GetName() == html_names::kMainTag ||
1860 token->GetName() == html_names::kMenuTag ||
1861 token->GetName() == html_names::kNavTag ||
1862 token->GetName() == html_names::kOlTag ||
1863 token->GetName() == html_names::kPreTag ||
1864 token->GetName() == html_names::kSectionTag ||
1865 token->GetName() == html_names::kSummaryTag ||
1866 token->GetName() == html_names::kUlTag) {
1867 if (!tree_.OpenElements()->InScope(token->GetName())) {
1868 ParseError(token);
1869 return;
1870 }
1871 tree_.GenerateImpliedEndTags();
1872 if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1873 ParseError(token);
1874 tree_.OpenElements()->PopUntilPopped(token->GetName());
1875 return;
1876 }
1877 if (token->GetName() == html_names::kFormTag &&
1878 !IsParsingTemplateContents()) {
1879 Element* node = tree_.TakeForm();
1880 if (!node || !tree_.OpenElements()->InScope(node)) {
1881 ParseError(token);
1882 return;
1883 }
1884 tree_.GenerateImpliedEndTags();
1885 if (tree_.CurrentElement() != node)
1886 ParseError(token);
1887 tree_.OpenElements()->Remove(node);
1888 }
1889 if (token->GetName() == html_names::kPTag) {
1890 if (!tree_.OpenElements()->InButtonScope(token->GetName())) {
1891 ParseError(token);
1892 ProcessFakeStartTag(html_names::kPTag);
1893 DCHECK(tree_.OpenElements()->InScope(token->GetName()));
1894 ProcessEndTag(token);
1895 return;
1896 }
1897 tree_.GenerateImpliedEndTagsWithExclusion(token->GetName());
1898 if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1899 ParseError(token);
1900 tree_.OpenElements()->PopUntilPopped(token->GetName());
1901 return;
1902 }
1903 if (token->GetName() == html_names::kLiTag) {
1904 if (!tree_.OpenElements()->InListItemScope(token->GetName())) {
1905 ParseError(token);
1906 return;
1907 }
1908 tree_.GenerateImpliedEndTagsWithExclusion(token->GetName());
1909 if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1910 ParseError(token);
1911 tree_.OpenElements()->PopUntilPopped(token->GetName());
1912 return;
1913 }
1914 if (token->GetName() == html_names::kDdTag ||
1915 token->GetName() == html_names::kDtTag) {
1916 if (!tree_.OpenElements()->InScope(token->GetName())) {
1917 ParseError(token);
1918 return;
1919 }
1920 tree_.GenerateImpliedEndTagsWithExclusion(token->GetName());
1921 if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1922 ParseError(token);
1923 tree_.OpenElements()->PopUntilPopped(token->GetName());
1924 return;
1925 }
1926 if (IsNumberedHeaderTag(token->GetName())) {
1927 if (!tree_.OpenElements()->HasNumberedHeaderElementInScope()) {
1928 ParseError(token);
1929 return;
1930 }
1931 tree_.GenerateImpliedEndTags();
1932 if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1933 ParseError(token);
1934 tree_.OpenElements()->PopUntilNumberedHeaderElementPopped();
1935 return;
1936 }
1937 if (IsFormattingTag(token->GetName())) {
1938 CallTheAdoptionAgency(token);
1939 return;
1940 }
1941 if (token->GetName() == html_names::kAppletTag ||
1942 token->GetName() == html_names::kMarqueeTag ||
1943 token->GetName() == html_names::kObjectTag) {
1944 if (!tree_.OpenElements()->InScope(token->GetName())) {
1945 ParseError(token);
1946 return;
1947 }
1948 tree_.GenerateImpliedEndTags();
1949 if (!tree_.CurrentStackItem()->MatchesHTMLTag(token->GetName()))
1950 ParseError(token);
1951 tree_.OpenElements()->PopUntilPopped(token->GetName());
1952 tree_.ActiveFormattingElements()->ClearToLastMarker();
1953 return;
1954 }
1955 if (token->GetName() == html_names::kBrTag) {
1956 ParseError(token);
1957 ProcessFakeStartTag(html_names::kBrTag);
1958 return;
1959 }
1960 if (token->GetName() == html_names::kTemplateTag) {
1961 ProcessTemplateEndTag(token);
1962 return;
1963 }
1964 ProcessAnyOtherEndTagForInBody(token);
1965 }
1966
ProcessCaptionEndTagForInCaption()1967 bool HTMLTreeBuilder::ProcessCaptionEndTagForInCaption() {
1968 if (!tree_.OpenElements()->InTableScope(
1969 html_names::kCaptionTag.LocalName())) {
1970 DCHECK(IsParsingFragment());
1971 // FIXME: parse error
1972 return false;
1973 }
1974 tree_.GenerateImpliedEndTags();
1975 // FIXME: parse error if
1976 // (!tree_.CurrentStackItem()->HasTagName(html_names::kCaptionTag))
1977 tree_.OpenElements()->PopUntilPopped(html_names::kCaptionTag.LocalName());
1978 tree_.ActiveFormattingElements()->ClearToLastMarker();
1979 SetInsertionMode(kInTableMode);
1980 return true;
1981 }
1982
ProcessTrEndTagForInRow()1983 bool HTMLTreeBuilder::ProcessTrEndTagForInRow() {
1984 if (!tree_.OpenElements()->InTableScope(html_names::kTrTag)) {
1985 DCHECK(IsParsingFragmentOrTemplateContents());
1986 // FIXME: parse error
1987 return false;
1988 }
1989 tree_.OpenElements()->PopUntilTableRowScopeMarker();
1990 DCHECK(tree_.CurrentStackItem()->HasTagName(html_names::kTrTag));
1991 tree_.OpenElements()->Pop();
1992 SetInsertionMode(kInTableBodyMode);
1993 return true;
1994 }
1995
ProcessTableEndTagForInTable()1996 bool HTMLTreeBuilder::ProcessTableEndTagForInTable() {
1997 if (!tree_.OpenElements()->InTableScope(html_names::kTableTag)) {
1998 DCHECK(IsParsingFragmentOrTemplateContents());
1999 // FIXME: parse error.
2000 return false;
2001 }
2002 tree_.OpenElements()->PopUntilPopped(html_names::kTableTag.LocalName());
2003 ResetInsertionModeAppropriately();
2004 return true;
2005 }
2006
ProcessEndTagForInTable(AtomicHTMLToken * token)2007 void HTMLTreeBuilder::ProcessEndTagForInTable(AtomicHTMLToken* token) {
2008 DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
2009 if (token->GetName() == html_names::kTableTag) {
2010 ProcessTableEndTagForInTable();
2011 return;
2012 }
2013 if (token->GetName() == html_names::kBodyTag ||
2014 IsCaptionColOrColgroupTag(token->GetName()) ||
2015 token->GetName() == html_names::kHTMLTag ||
2016 IsTableBodyContextTag(token->GetName()) ||
2017 IsTableCellContextTag(token->GetName()) ||
2018 token->GetName() == html_names::kTrTag) {
2019 ParseError(token);
2020 return;
2021 }
2022 ParseError(token);
2023 // Is this redirection necessary here?
2024 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(tree_);
2025 ProcessEndTagForInBody(token);
2026 }
2027
ProcessEndTag(AtomicHTMLToken * token)2028 void HTMLTreeBuilder::ProcessEndTag(AtomicHTMLToken* token) {
2029 DCHECK_EQ(token->GetType(), HTMLToken::kEndTag);
2030 switch (GetInsertionMode()) {
2031 case kInitialMode:
2032 DCHECK_EQ(GetInsertionMode(), kInitialMode);
2033 DefaultForInitial();
2034 FALLTHROUGH;
2035 case kBeforeHTMLMode:
2036 DCHECK_EQ(GetInsertionMode(), kBeforeHTMLMode);
2037 if (token->GetName() != html_names::kHeadTag &&
2038 token->GetName() != html_names::kBodyTag &&
2039 token->GetName() != html_names::kHTMLTag &&
2040 token->GetName() != html_names::kBrTag) {
2041 ParseError(token);
2042 return;
2043 }
2044 DefaultForBeforeHTML();
2045 FALLTHROUGH;
2046 case kBeforeHeadMode:
2047 DCHECK_EQ(GetInsertionMode(), kBeforeHeadMode);
2048 if (token->GetName() != html_names::kHeadTag &&
2049 token->GetName() != html_names::kBodyTag &&
2050 token->GetName() != html_names::kHTMLTag &&
2051 token->GetName() != html_names::kBrTag) {
2052 ParseError(token);
2053 return;
2054 }
2055 DefaultForBeforeHead();
2056 FALLTHROUGH;
2057 case kInHeadMode:
2058 DCHECK_EQ(GetInsertionMode(), kInHeadMode);
2059 // FIXME: This case should be broken out into processEndTagForInHead,
2060 // because other end tag cases now refer to it ("process the token for
2061 // using the rules of the "in head" insertion mode"). but because the
2062 // logic falls through to AfterHeadMode, that gets a little messy.
2063 if (token->GetName() == html_names::kTemplateTag) {
2064 ProcessTemplateEndTag(token);
2065 return;
2066 }
2067 if (token->GetName() == html_names::kHeadTag) {
2068 tree_.OpenElements()->PopHTMLHeadElement();
2069 SetInsertionMode(kAfterHeadMode);
2070 return;
2071 }
2072 if (token->GetName() != html_names::kBodyTag &&
2073 token->GetName() != html_names::kHTMLTag &&
2074 token->GetName() != html_names::kBrTag) {
2075 ParseError(token);
2076 return;
2077 }
2078 DefaultForInHead();
2079 FALLTHROUGH;
2080 case kAfterHeadMode:
2081 DCHECK_EQ(GetInsertionMode(), kAfterHeadMode);
2082 if (token->GetName() != html_names::kBodyTag &&
2083 token->GetName() != html_names::kHTMLTag &&
2084 token->GetName() != html_names::kBrTag) {
2085 ParseError(token);
2086 return;
2087 }
2088 DefaultForAfterHead();
2089 FALLTHROUGH;
2090 case kInBodyMode:
2091 DCHECK_EQ(GetInsertionMode(), kInBodyMode);
2092 ProcessEndTagForInBody(token);
2093 break;
2094 case kInTableMode:
2095 DCHECK_EQ(GetInsertionMode(), kInTableMode);
2096 ProcessEndTagForInTable(token);
2097 break;
2098 case kInCaptionMode:
2099 DCHECK_EQ(GetInsertionMode(), kInCaptionMode);
2100 if (token->GetName() == html_names::kCaptionTag) {
2101 ProcessCaptionEndTagForInCaption();
2102 return;
2103 }
2104 if (token->GetName() == html_names::kTableTag) {
2105 ParseError(token);
2106 if (!ProcessCaptionEndTagForInCaption()) {
2107 DCHECK(IsParsingFragment());
2108 return;
2109 }
2110 ProcessEndTag(token);
2111 return;
2112 }
2113 if (token->GetName() == html_names::kBodyTag ||
2114 token->GetName() == html_names::kColTag ||
2115 token->GetName() == html_names::kColgroupTag ||
2116 token->GetName() == html_names::kHTMLTag ||
2117 IsTableBodyContextTag(token->GetName()) ||
2118 IsTableCellContextTag(token->GetName()) ||
2119 token->GetName() == html_names::kTrTag) {
2120 ParseError(token);
2121 return;
2122 }
2123 ProcessEndTagForInBody(token);
2124 break;
2125 case kInColumnGroupMode:
2126 DCHECK_EQ(GetInsertionMode(), kInColumnGroupMode);
2127 if (token->GetName() == html_names::kColgroupTag) {
2128 ProcessColgroupEndTagForInColumnGroup();
2129 return;
2130 }
2131 if (token->GetName() == html_names::kColTag) {
2132 ParseError(token);
2133 return;
2134 }
2135 if (token->GetName() == html_names::kTemplateTag) {
2136 ProcessTemplateEndTag(token);
2137 return;
2138 }
2139 if (!ProcessColgroupEndTagForInColumnGroup()) {
2140 DCHECK(IsParsingFragmentOrTemplateContents());
2141 return;
2142 }
2143 ProcessEndTag(token);
2144 break;
2145 case kInRowMode:
2146 DCHECK_EQ(GetInsertionMode(), kInRowMode);
2147 ProcessEndTagForInRow(token);
2148 break;
2149 case kInCellMode:
2150 DCHECK_EQ(GetInsertionMode(), kInCellMode);
2151 ProcessEndTagForInCell(token);
2152 break;
2153 case kInTableBodyMode:
2154 DCHECK_EQ(GetInsertionMode(), kInTableBodyMode);
2155 ProcessEndTagForInTableBody(token);
2156 break;
2157 case kAfterBodyMode:
2158 DCHECK_EQ(GetInsertionMode(), kAfterBodyMode);
2159 if (token->GetName() == html_names::kHTMLTag) {
2160 if (IsParsingFragment()) {
2161 ParseError(token);
2162 return;
2163 }
2164 SetInsertionMode(kAfterAfterBodyMode);
2165 return;
2166 }
2167 FALLTHROUGH;
2168 case kAfterAfterBodyMode:
2169 DCHECK(GetInsertionMode() == kAfterBodyMode ||
2170 GetInsertionMode() == kAfterAfterBodyMode);
2171 ParseError(token);
2172 SetInsertionMode(kInBodyMode);
2173 ProcessEndTag(token);
2174 break;
2175 case kInHeadNoscriptMode:
2176 DCHECK_EQ(GetInsertionMode(), kInHeadNoscriptMode);
2177 if (token->GetName() == html_names::kNoscriptTag) {
2178 DCHECK(tree_.CurrentStackItem()->HasTagName(html_names::kNoscriptTag));
2179 tree_.OpenElements()->Pop();
2180 DCHECK(tree_.CurrentStackItem()->HasTagName(html_names::kHeadTag));
2181 SetInsertionMode(kInHeadMode);
2182 return;
2183 }
2184 if (token->GetName() != html_names::kBrTag) {
2185 ParseError(token);
2186 return;
2187 }
2188 DefaultForInHeadNoscript();
2189 ProcessToken(token);
2190 break;
2191 case kTextMode:
2192 if (token->GetName() == html_names::kScriptTag &&
2193 tree_.CurrentStackItem()->HasTagName(html_names::kScriptTag)) {
2194 // Pause ourselves so that parsing stops until the script can be
2195 // processed by the caller.
2196 if (ScriptingContentIsAllowed(tree_.GetParserContentPolicy()))
2197 script_to_process_ = tree_.CurrentElement();
2198 tree_.OpenElements()->Pop();
2199 SetInsertionMode(original_insertion_mode_);
2200
2201 if (parser_->Tokenizer()) {
2202 // We must set the tokenizer's state to DataState explicitly if the
2203 // tokenizer didn't have a chance to.
2204 parser_->Tokenizer()->SetState(HTMLTokenizer::kDataState);
2205 }
2206 return;
2207 }
2208 tree_.OpenElements()->Pop();
2209 SetInsertionMode(original_insertion_mode_);
2210 break;
2211 case kInFramesetMode:
2212 DCHECK_EQ(GetInsertionMode(), kInFramesetMode);
2213 if (token->GetName() == html_names::kFramesetTag) {
2214 bool ignore_frameset_for_fragment_parsing = tree_.CurrentIsRootNode();
2215 ignore_frameset_for_fragment_parsing =
2216 ignore_frameset_for_fragment_parsing ||
2217 tree_.OpenElements()->HasTemplateInHTMLScope();
2218 if (ignore_frameset_for_fragment_parsing) {
2219 DCHECK(IsParsingFragmentOrTemplateContents());
2220 ParseError(token);
2221 return;
2222 }
2223 tree_.OpenElements()->Pop();
2224 if (!IsParsingFragment() &&
2225 !tree_.CurrentStackItem()->HasTagName(html_names::kFramesetTag))
2226 SetInsertionMode(kAfterFramesetMode);
2227 return;
2228 }
2229 break;
2230 case kAfterFramesetMode:
2231 DCHECK_EQ(GetInsertionMode(), kAfterFramesetMode);
2232 if (token->GetName() == html_names::kHTMLTag) {
2233 SetInsertionMode(kAfterAfterFramesetMode);
2234 return;
2235 }
2236 FALLTHROUGH;
2237 case kAfterAfterFramesetMode:
2238 DCHECK(GetInsertionMode() == kAfterFramesetMode ||
2239 GetInsertionMode() == kAfterAfterFramesetMode);
2240 ParseError(token);
2241 break;
2242 case kInSelectInTableMode:
2243 DCHECK(GetInsertionMode() == kInSelectInTableMode);
2244 if (token->GetName() == html_names::kCaptionTag ||
2245 token->GetName() == html_names::kTableTag ||
2246 IsTableBodyContextTag(token->GetName()) ||
2247 token->GetName() == html_names::kTrTag ||
2248 IsTableCellContextTag(token->GetName())) {
2249 ParseError(token);
2250 if (tree_.OpenElements()->InTableScope(token->GetName())) {
2251 AtomicHTMLToken end_select(HTMLToken::kEndTag,
2252 html_names::kSelectTag.LocalName());
2253 ProcessEndTag(&end_select);
2254 ProcessEndTag(token);
2255 }
2256 return;
2257 }
2258 FALLTHROUGH;
2259 case kInSelectMode:
2260 DCHECK(GetInsertionMode() == kInSelectMode ||
2261 GetInsertionMode() == kInSelectInTableMode);
2262 if (token->GetName() == html_names::kOptgroupTag) {
2263 if (tree_.CurrentStackItem()->HasTagName(html_names::kOptionTag) &&
2264 tree_.OneBelowTop() &&
2265 tree_.OneBelowTop()->HasTagName(html_names::kOptgroupTag))
2266 ProcessFakeEndTag(html_names::kOptionTag);
2267 if (tree_.CurrentStackItem()->HasTagName(html_names::kOptgroupTag)) {
2268 tree_.OpenElements()->Pop();
2269 return;
2270 }
2271 ParseError(token);
2272 return;
2273 }
2274 if (token->GetName() == html_names::kOptionTag) {
2275 if (tree_.CurrentStackItem()->HasTagName(html_names::kOptionTag)) {
2276 tree_.OpenElements()->Pop();
2277 return;
2278 }
2279 ParseError(token);
2280 return;
2281 }
2282 if (token->GetName() == html_names::kSelectTag) {
2283 if (!tree_.OpenElements()->InSelectScope(token->GetName())) {
2284 DCHECK(IsParsingFragment());
2285 ParseError(token);
2286 return;
2287 }
2288 tree_.OpenElements()->PopUntilPopped(
2289 html_names::kSelectTag.LocalName());
2290 ResetInsertionModeAppropriately();
2291 return;
2292 }
2293 if (token->GetName() == html_names::kTemplateTag) {
2294 ProcessTemplateEndTag(token);
2295 return;
2296 }
2297 break;
2298 case kInTableTextMode:
2299 DefaultForInTableText();
2300 ProcessEndTag(token);
2301 break;
2302 case kTemplateContentsMode:
2303 if (token->GetName() == html_names::kTemplateTag) {
2304 ProcessTemplateEndTag(token);
2305 return;
2306 }
2307 break;
2308 }
2309 }
2310
ProcessComment(AtomicHTMLToken * token)2311 void HTMLTreeBuilder::ProcessComment(AtomicHTMLToken* token) {
2312 DCHECK_EQ(token->GetType(), HTMLToken::kComment);
2313 if (insertion_mode_ == kInitialMode || insertion_mode_ == kBeforeHTMLMode ||
2314 insertion_mode_ == kAfterAfterBodyMode ||
2315 insertion_mode_ == kAfterAfterFramesetMode) {
2316 tree_.InsertCommentOnDocument(token);
2317 return;
2318 }
2319 if (insertion_mode_ == kAfterBodyMode) {
2320 tree_.InsertCommentOnHTMLHtmlElement(token);
2321 return;
2322 }
2323 if (insertion_mode_ == kInTableTextMode) {
2324 DefaultForInTableText();
2325 ProcessComment(token);
2326 return;
2327 }
2328 tree_.InsertComment(token);
2329 }
2330
ProcessCharacter(AtomicHTMLToken * token)2331 void HTMLTreeBuilder::ProcessCharacter(AtomicHTMLToken* token) {
2332 DCHECK_EQ(token->GetType(), HTMLToken::kCharacter);
2333 CharacterTokenBuffer buffer(token);
2334 ProcessCharacterBuffer(buffer);
2335 }
2336
ProcessCharacterBuffer(CharacterTokenBuffer & buffer)2337 void HTMLTreeBuilder::ProcessCharacterBuffer(CharacterTokenBuffer& buffer) {
2338 ReprocessBuffer:
2339 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody
2340 // Note that this logic is different than the generic \r\n collapsing
2341 // handled in the input stream preprocessor. This logic is here as an
2342 // "authoring convenience" so folks can write:
2343 //
2344 // <pre>
2345 // lorem ipsum
2346 // lorem ipsum
2347 // </pre>
2348 //
2349 // without getting an extra newline at the start of their <pre> element.
2350 if (should_skip_leading_newline_) {
2351 should_skip_leading_newline_ = false;
2352 buffer.SkipAtMostOneLeadingNewline();
2353 if (buffer.IsEmpty())
2354 return;
2355 }
2356
2357 switch (GetInsertionMode()) {
2358 case kInitialMode: {
2359 DCHECK_EQ(GetInsertionMode(), kInitialMode);
2360 buffer.SkipLeadingWhitespace();
2361 if (buffer.IsEmpty())
2362 return;
2363 DefaultForInitial();
2364 FALLTHROUGH;
2365 }
2366 case kBeforeHTMLMode: {
2367 DCHECK_EQ(GetInsertionMode(), kBeforeHTMLMode);
2368 buffer.SkipLeadingWhitespace();
2369 if (buffer.IsEmpty())
2370 return;
2371 DefaultForBeforeHTML();
2372 if (parser_->IsStopped()) {
2373 buffer.SkipRemaining();
2374 return;
2375 }
2376 FALLTHROUGH;
2377 }
2378 case kBeforeHeadMode: {
2379 DCHECK_EQ(GetInsertionMode(), kBeforeHeadMode);
2380 buffer.SkipLeadingWhitespace();
2381 if (buffer.IsEmpty())
2382 return;
2383 DefaultForBeforeHead();
2384 FALLTHROUGH;
2385 }
2386 case kInHeadMode: {
2387 DCHECK_EQ(GetInsertionMode(), kInHeadMode);
2388 StringView leading_whitespace = buffer.TakeLeadingWhitespace();
2389 if (!leading_whitespace.IsEmpty())
2390 tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2391 if (buffer.IsEmpty())
2392 return;
2393 DefaultForInHead();
2394 FALLTHROUGH;
2395 }
2396 case kAfterHeadMode: {
2397 DCHECK_EQ(GetInsertionMode(), kAfterHeadMode);
2398 StringView leading_whitespace = buffer.TakeLeadingWhitespace();
2399 if (!leading_whitespace.IsEmpty())
2400 tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2401 if (buffer.IsEmpty())
2402 return;
2403 DefaultForAfterHead();
2404 FALLTHROUGH;
2405 }
2406 case kInBodyMode:
2407 case kInCaptionMode:
2408 case kTemplateContentsMode:
2409 case kInCellMode: {
2410 DCHECK(GetInsertionMode() == kInBodyMode ||
2411 GetInsertionMode() == kInCaptionMode ||
2412 GetInsertionMode() == kInCellMode ||
2413 GetInsertionMode() == kTemplateContentsMode);
2414 ProcessCharacterBufferForInBody(buffer);
2415 break;
2416 }
2417 case kInTableMode:
2418 case kInTableBodyMode:
2419 case kInRowMode: {
2420 DCHECK(GetInsertionMode() == kInTableMode ||
2421 GetInsertionMode() == kInTableBodyMode ||
2422 GetInsertionMode() == kInRowMode);
2423 DCHECK(pending_table_characters_.IsEmpty());
2424 if (tree_.CurrentStackItem()->IsElementNode() &&
2425 (tree_.CurrentStackItem()->HasTagName(html_names::kTableTag) ||
2426 tree_.CurrentStackItem()->HasTagName(html_names::kTbodyTag) ||
2427 tree_.CurrentStackItem()->HasTagName(html_names::kTfootTag) ||
2428 tree_.CurrentStackItem()->HasTagName(html_names::kTheadTag) ||
2429 tree_.CurrentStackItem()->HasTagName(html_names::kTrTag))) {
2430 original_insertion_mode_ = insertion_mode_;
2431 SetInsertionMode(kInTableTextMode);
2432 // Note that we fall through to the InTableTextMode case below.
2433 } else {
2434 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(tree_);
2435 ProcessCharacterBufferForInBody(buffer);
2436 break;
2437 }
2438 FALLTHROUGH;
2439 }
2440 case kInTableTextMode: {
2441 buffer.GiveRemainingTo(pending_table_characters_);
2442 break;
2443 }
2444 case kInColumnGroupMode: {
2445 DCHECK_EQ(GetInsertionMode(), kInColumnGroupMode);
2446 StringView leading_whitespace = buffer.TakeLeadingWhitespace();
2447 if (!leading_whitespace.IsEmpty())
2448 tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2449 if (buffer.IsEmpty())
2450 return;
2451 if (!ProcessColgroupEndTagForInColumnGroup()) {
2452 DCHECK(IsParsingFragmentOrTemplateContents());
2453 // The spec tells us to drop these characters on the floor.
2454 buffer.SkipLeadingNonWhitespace();
2455 if (buffer.IsEmpty())
2456 return;
2457 }
2458 goto ReprocessBuffer;
2459 }
2460 case kAfterBodyMode:
2461 case kAfterAfterBodyMode: {
2462 DCHECK(GetInsertionMode() == kAfterBodyMode ||
2463 GetInsertionMode() == kAfterAfterBodyMode);
2464 // FIXME: parse error
2465 SetInsertionMode(kInBodyMode);
2466 goto ReprocessBuffer;
2467 }
2468 case kTextMode: {
2469 DCHECK_EQ(GetInsertionMode(), kTextMode);
2470 tree_.InsertTextNode(buffer.TakeRemaining());
2471 break;
2472 }
2473 case kInHeadNoscriptMode: {
2474 DCHECK_EQ(GetInsertionMode(), kInHeadNoscriptMode);
2475 StringView leading_whitespace = buffer.TakeLeadingWhitespace();
2476 if (!leading_whitespace.IsEmpty())
2477 tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2478 if (buffer.IsEmpty())
2479 return;
2480 DefaultForInHeadNoscript();
2481 goto ReprocessBuffer;
2482 }
2483 case kInFramesetMode:
2484 case kAfterFramesetMode: {
2485 DCHECK(GetInsertionMode() == kInFramesetMode ||
2486 GetInsertionMode() == kAfterFramesetMode ||
2487 GetInsertionMode() == kAfterAfterFramesetMode);
2488 String leading_whitespace = buffer.TakeRemainingWhitespace();
2489 if (!leading_whitespace.IsEmpty())
2490 tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2491 // FIXME: We should generate a parse error if we skipped over any
2492 // non-whitespace characters.
2493 break;
2494 }
2495 case kInSelectInTableMode:
2496 case kInSelectMode: {
2497 DCHECK(GetInsertionMode() == kInSelectMode ||
2498 GetInsertionMode() == kInSelectInTableMode);
2499 tree_.InsertTextNode(buffer.TakeRemaining());
2500 break;
2501 }
2502 case kAfterAfterFramesetMode: {
2503 String leading_whitespace = buffer.TakeRemainingWhitespace();
2504 if (!leading_whitespace.IsEmpty()) {
2505 tree_.ReconstructTheActiveFormattingElements();
2506 tree_.InsertTextNode(leading_whitespace, kAllWhitespace);
2507 }
2508 // FIXME: We should generate a parse error if we skipped over any
2509 // non-whitespace characters.
2510 break;
2511 }
2512 }
2513 }
2514
ProcessCharacterBufferForInBody(CharacterTokenBuffer & buffer)2515 void HTMLTreeBuilder::ProcessCharacterBufferForInBody(
2516 CharacterTokenBuffer& buffer) {
2517 tree_.ReconstructTheActiveFormattingElements();
2518 StringView characters = buffer.TakeRemaining();
2519 tree_.InsertTextNode(characters);
2520 if (frameset_ok_ && !IsAllWhitespaceOrReplacementCharacters(characters))
2521 frameset_ok_ = false;
2522 }
2523
ProcessEndOfFile(AtomicHTMLToken * token)2524 void HTMLTreeBuilder::ProcessEndOfFile(AtomicHTMLToken* token) {
2525 DCHECK_EQ(token->GetType(), HTMLToken::kEndOfFile);
2526 switch (GetInsertionMode()) {
2527 case kInitialMode:
2528 DCHECK_EQ(GetInsertionMode(), kInitialMode);
2529 DefaultForInitial();
2530 FALLTHROUGH;
2531 case kBeforeHTMLMode:
2532 DCHECK_EQ(GetInsertionMode(), kBeforeHTMLMode);
2533 DefaultForBeforeHTML();
2534 FALLTHROUGH;
2535 case kBeforeHeadMode:
2536 DCHECK_EQ(GetInsertionMode(), kBeforeHeadMode);
2537 DefaultForBeforeHead();
2538 FALLTHROUGH;
2539 case kInHeadMode:
2540 DCHECK_EQ(GetInsertionMode(), kInHeadMode);
2541 DefaultForInHead();
2542 FALLTHROUGH;
2543 case kAfterHeadMode:
2544 DCHECK_EQ(GetInsertionMode(), kAfterHeadMode);
2545 DefaultForAfterHead();
2546 FALLTHROUGH;
2547 case kInBodyMode:
2548 case kInCellMode:
2549 case kInCaptionMode:
2550 case kInRowMode:
2551 DCHECK(GetInsertionMode() == kInBodyMode ||
2552 GetInsertionMode() == kInCellMode ||
2553 GetInsertionMode() == kInCaptionMode ||
2554 GetInsertionMode() == kInRowMode ||
2555 GetInsertionMode() == kTemplateContentsMode);
2556 // Emit parse error based on what elements are still open.
2557 DVLOG(1) << "Not implemented.";
2558 if (!template_insertion_modes_.IsEmpty() &&
2559 ProcessEndOfFileForInTemplateContents(token))
2560 return;
2561 break;
2562 case kAfterBodyMode:
2563 case kAfterAfterBodyMode:
2564 DCHECK(GetInsertionMode() == kAfterBodyMode ||
2565 GetInsertionMode() == kAfterAfterBodyMode);
2566 break;
2567 case kInHeadNoscriptMode:
2568 DCHECK_EQ(GetInsertionMode(), kInHeadNoscriptMode);
2569 DefaultForInHeadNoscript();
2570 ProcessEndOfFile(token);
2571 return;
2572 case kAfterFramesetMode:
2573 case kAfterAfterFramesetMode:
2574 DCHECK(GetInsertionMode() == kAfterFramesetMode ||
2575 GetInsertionMode() == kAfterAfterFramesetMode);
2576 break;
2577 case kInColumnGroupMode:
2578 if (tree_.CurrentIsRootNode()) {
2579 DCHECK(IsParsingFragment());
2580 return; // FIXME: Should we break here instead of returning?
2581 }
2582 DCHECK(tree_.CurrentNode()->HasTagName(html_names::kColgroupTag) ||
2583 IsA<HTMLTemplateElement>(tree_.CurrentNode()));
2584 ProcessColgroupEndTagForInColumnGroup();
2585 FALLTHROUGH;
2586 case kInFramesetMode:
2587 case kInTableMode:
2588 case kInTableBodyMode:
2589 case kInSelectInTableMode:
2590 case kInSelectMode:
2591 DCHECK(GetInsertionMode() == kInSelectMode ||
2592 GetInsertionMode() == kInSelectInTableMode ||
2593 GetInsertionMode() == kInTableMode ||
2594 GetInsertionMode() == kInFramesetMode ||
2595 GetInsertionMode() == kInTableBodyMode ||
2596 GetInsertionMode() == kInColumnGroupMode);
2597 if (tree_.CurrentNode() != tree_.OpenElements()->RootNode())
2598 ParseError(token);
2599 if (!template_insertion_modes_.IsEmpty() &&
2600 ProcessEndOfFileForInTemplateContents(token))
2601 return;
2602 break;
2603 case kInTableTextMode:
2604 DefaultForInTableText();
2605 ProcessEndOfFile(token);
2606 return;
2607 case kTextMode: {
2608 ParseError(token);
2609 if (tree_.CurrentStackItem()->HasTagName(html_names::kScriptTag)) {
2610 // Mark the script element as "already started".
2611 DVLOG(1) << "Not implemented.";
2612 }
2613 Element* el = tree_.OpenElements()->Top();
2614 if (IsA<HTMLTextAreaElement>(el))
2615 To<HTMLFormControlElement>(el)->SetBlocksFormSubmission(true);
2616 tree_.OpenElements()->Pop();
2617 DCHECK_NE(original_insertion_mode_, kTextMode);
2618 SetInsertionMode(original_insertion_mode_);
2619 ProcessEndOfFile(token);
2620 return;
2621 }
2622 case kTemplateContentsMode:
2623 if (ProcessEndOfFileForInTemplateContents(token))
2624 return;
2625 break;
2626 }
2627 tree_.ProcessEndOfFile();
2628 }
2629
DefaultForInitial()2630 void HTMLTreeBuilder::DefaultForInitial() {
2631 DVLOG(1) << "Not implemented.";
2632 tree_.SetDefaultCompatibilityMode();
2633 // FIXME: parse error
2634 SetInsertionMode(kBeforeHTMLMode);
2635 }
2636
DefaultForBeforeHTML()2637 void HTMLTreeBuilder::DefaultForBeforeHTML() {
2638 AtomicHTMLToken start_html(HTMLToken::kStartTag,
2639 html_names::kHTMLTag.LocalName());
2640 tree_.InsertHTMLHtmlStartTagBeforeHTML(&start_html);
2641 SetInsertionMode(kBeforeHeadMode);
2642 }
2643
DefaultForBeforeHead()2644 void HTMLTreeBuilder::DefaultForBeforeHead() {
2645 AtomicHTMLToken start_head(HTMLToken::kStartTag,
2646 html_names::kHeadTag.LocalName());
2647 ProcessStartTag(&start_head);
2648 }
2649
DefaultForInHead()2650 void HTMLTreeBuilder::DefaultForInHead() {
2651 AtomicHTMLToken end_head(HTMLToken::kEndTag,
2652 html_names::kHeadTag.LocalName());
2653 ProcessEndTag(&end_head);
2654 }
2655
DefaultForInHeadNoscript()2656 void HTMLTreeBuilder::DefaultForInHeadNoscript() {
2657 AtomicHTMLToken end_noscript(HTMLToken::kEndTag,
2658 html_names::kNoscriptTag.LocalName());
2659 ProcessEndTag(&end_noscript);
2660 }
2661
DefaultForAfterHead()2662 void HTMLTreeBuilder::DefaultForAfterHead() {
2663 AtomicHTMLToken start_body(HTMLToken::kStartTag,
2664 html_names::kBodyTag.LocalName());
2665 ProcessStartTag(&start_body);
2666 frameset_ok_ = true;
2667 }
2668
DefaultForInTableText()2669 void HTMLTreeBuilder::DefaultForInTableText() {
2670 String characters = pending_table_characters_.ToString();
2671 pending_table_characters_.Clear();
2672 if (!IsAllWhitespace(characters)) {
2673 // FIXME: parse error
2674 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(tree_);
2675 tree_.ReconstructTheActiveFormattingElements();
2676 tree_.InsertTextNode(characters, kNotAllWhitespace);
2677 frameset_ok_ = false;
2678 SetInsertionMode(original_insertion_mode_);
2679 return;
2680 }
2681 tree_.InsertTextNode(characters);
2682 SetInsertionMode(original_insertion_mode_);
2683 }
2684
ProcessStartTagForInHead(AtomicHTMLToken * token)2685 bool HTMLTreeBuilder::ProcessStartTagForInHead(AtomicHTMLToken* token) {
2686 DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
2687 if (token->GetName() == html_names::kHTMLTag) {
2688 ProcessHtmlStartTagForInBody(token);
2689 return true;
2690 }
2691 if (token->GetName() == html_names::kBaseTag ||
2692 token->GetName() == html_names::kBasefontTag ||
2693 token->GetName() == html_names::kBgsoundTag ||
2694 token->GetName() == html_names::kCommandTag ||
2695 token->GetName() == html_names::kLinkTag ||
2696 token->GetName() == html_names::kMetaTag) {
2697 tree_.InsertSelfClosingHTMLElementDestroyingToken(token);
2698 // Note: The custom processing for the <meta> tag is done in
2699 // HTMLMetaElement::process().
2700 return true;
2701 }
2702 if (token->GetName() == html_names::kTitleTag) {
2703 ProcessGenericRCDATAStartTag(token);
2704 return true;
2705 }
2706 if (token->GetName() == html_names::kNoscriptTag) {
2707 if (options_.scripting_flag) {
2708 ProcessGenericRawTextStartTag(token);
2709 return true;
2710 }
2711 tree_.InsertHTMLElement(token);
2712 SetInsertionMode(kInHeadNoscriptMode);
2713 return true;
2714 }
2715 if (token->GetName() == html_names::kNoframesTag ||
2716 token->GetName() == html_names::kStyleTag) {
2717 ProcessGenericRawTextStartTag(token);
2718 return true;
2719 }
2720 if (token->GetName() == html_names::kScriptTag) {
2721 ProcessScriptStartTag(token);
2722 return true;
2723 }
2724 if (token->GetName() == html_names::kTemplateTag) {
2725 ProcessTemplateStartTag(token);
2726 return true;
2727 }
2728 if (token->GetName() == html_names::kHeadTag) {
2729 ParseError(token);
2730 return true;
2731 }
2732 return false;
2733 }
2734
ProcessGenericRCDATAStartTag(AtomicHTMLToken * token)2735 void HTMLTreeBuilder::ProcessGenericRCDATAStartTag(AtomicHTMLToken* token) {
2736 DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
2737 tree_.InsertHTMLElement(token);
2738 if (parser_->Tokenizer())
2739 parser_->Tokenizer()->SetState(HTMLTokenizer::kRCDATAState);
2740 original_insertion_mode_ = insertion_mode_;
2741 SetInsertionMode(kTextMode);
2742 }
2743
ProcessGenericRawTextStartTag(AtomicHTMLToken * token)2744 void HTMLTreeBuilder::ProcessGenericRawTextStartTag(AtomicHTMLToken* token) {
2745 DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
2746 tree_.InsertHTMLElement(token);
2747 if (parser_->Tokenizer())
2748 parser_->Tokenizer()->SetState(HTMLTokenizer::kRAWTEXTState);
2749 original_insertion_mode_ = insertion_mode_;
2750 SetInsertionMode(kTextMode);
2751 }
2752
ProcessScriptStartTag(AtomicHTMLToken * token)2753 void HTMLTreeBuilder::ProcessScriptStartTag(AtomicHTMLToken* token) {
2754 DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
2755 tree_.InsertScriptElement(token);
2756 if (parser_->Tokenizer())
2757 parser_->Tokenizer()->SetState(HTMLTokenizer::kScriptDataState);
2758 original_insertion_mode_ = insertion_mode_;
2759
2760 TextPosition position = parser_->GetTextPosition();
2761
2762 script_to_process_start_position_ = position;
2763
2764 SetInsertionMode(kTextMode);
2765 }
2766
2767 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
ShouldProcessTokenInForeignContent(AtomicHTMLToken * token)2768 bool HTMLTreeBuilder::ShouldProcessTokenInForeignContent(
2769 AtomicHTMLToken* token) {
2770 if (tree_.IsEmpty())
2771 return false;
2772 HTMLStackItem* adjusted_current_node = AdjustedCurrentStackItem();
2773
2774 if (adjusted_current_node->IsInHTMLNamespace())
2775 return false;
2776 if (HTMLElementStack::IsMathMLTextIntegrationPoint(adjusted_current_node)) {
2777 if (token->GetType() == HTMLToken::kStartTag &&
2778 token->GetName() != mathml_names::kMglyphTag &&
2779 token->GetName() != mathml_names::kMalignmarkTag)
2780 return false;
2781 if (token->GetType() == HTMLToken::kCharacter)
2782 return false;
2783 }
2784 if (adjusted_current_node->HasTagName(mathml_names::kAnnotationXmlTag) &&
2785 token->GetType() == HTMLToken::kStartTag &&
2786 token->GetName() == svg_names::kSVGTag)
2787 return false;
2788 if (HTMLElementStack::IsHTMLIntegrationPoint(adjusted_current_node)) {
2789 if (token->GetType() == HTMLToken::kStartTag)
2790 return false;
2791 if (token->GetType() == HTMLToken::kCharacter)
2792 return false;
2793 }
2794 if (token->GetType() == HTMLToken::kEndOfFile)
2795 return false;
2796 return true;
2797 }
2798
ProcessTokenInForeignContent(AtomicHTMLToken * token)2799 void HTMLTreeBuilder::ProcessTokenInForeignContent(AtomicHTMLToken* token) {
2800 if (token->GetType() == HTMLToken::kCharacter) {
2801 const String& characters = token->Characters();
2802 tree_.InsertTextNode(characters);
2803 if (frameset_ok_ && !IsAllWhitespaceOrReplacementCharacters(characters))
2804 frameset_ok_ = false;
2805 return;
2806 }
2807
2808 tree_.Flush(kFlushAlways);
2809 HTMLStackItem* adjusted_current_node = AdjustedCurrentStackItem();
2810
2811 switch (token->GetType()) {
2812 case HTMLToken::kUninitialized:
2813 NOTREACHED();
2814 break;
2815 case HTMLToken::DOCTYPE:
2816 ParseError(token);
2817 break;
2818 case HTMLToken::kStartTag: {
2819 if (token->GetName() == html_names::kBTag ||
2820 token->GetName() == html_names::kBigTag ||
2821 token->GetName() == html_names::kBlockquoteTag ||
2822 token->GetName() == html_names::kBodyTag ||
2823 token->GetName() == html_names::kBrTag ||
2824 token->GetName() == html_names::kCenterTag ||
2825 token->GetName() == html_names::kCodeTag ||
2826 token->GetName() == html_names::kDdTag ||
2827 token->GetName() == html_names::kDivTag ||
2828 token->GetName() == html_names::kDlTag ||
2829 token->GetName() == html_names::kDtTag ||
2830 token->GetName() == html_names::kEmTag ||
2831 token->GetName() == html_names::kEmbedTag ||
2832 IsNumberedHeaderTag(token->GetName()) ||
2833 token->GetName() == html_names::kHeadTag ||
2834 token->GetName() == html_names::kHrTag ||
2835 token->GetName() == html_names::kITag ||
2836 token->GetName() == html_names::kImgTag ||
2837 token->GetName() == html_names::kLiTag ||
2838 token->GetName() == html_names::kListingTag ||
2839 token->GetName() == html_names::kMenuTag ||
2840 token->GetName() == html_names::kMetaTag ||
2841 token->GetName() == html_names::kNobrTag ||
2842 token->GetName() == html_names::kOlTag ||
2843 token->GetName() == html_names::kPTag ||
2844 token->GetName() == html_names::kPreTag ||
2845 token->GetName() == html_names::kRubyTag ||
2846 token->GetName() == html_names::kSTag ||
2847 token->GetName() == html_names::kSmallTag ||
2848 token->GetName() == html_names::kSpanTag ||
2849 token->GetName() == html_names::kStrongTag ||
2850 token->GetName() == html_names::kStrikeTag ||
2851 token->GetName() == html_names::kSubTag ||
2852 token->GetName() == html_names::kSupTag ||
2853 token->GetName() == html_names::kTableTag ||
2854 token->GetName() == html_names::kTtTag ||
2855 token->GetName() == html_names::kUTag ||
2856 token->GetName() == html_names::kUlTag ||
2857 token->GetName() == html_names::kVarTag ||
2858 (token->GetName() == html_names::kFontTag &&
2859 (token->GetAttributeItem(html_names::kColorAttr) ||
2860 token->GetAttributeItem(html_names::kFaceAttr) ||
2861 token->GetAttributeItem(html_names::kSizeAttr)))) {
2862 ParseError(token);
2863 tree_.OpenElements()->PopUntilForeignContentScopeMarker();
2864 ProcessStartTag(token);
2865 return;
2866 }
2867 if (token->GetName() == html_names::kScriptTag)
2868 script_to_process_start_position_ = parser_->GetTextPosition();
2869 const AtomicString& current_namespace =
2870 adjusted_current_node->NamespaceURI();
2871 if (current_namespace == mathml_names::kNamespaceURI)
2872 AdjustMathMLAttributes(token);
2873 if (current_namespace == svg_names::kNamespaceURI) {
2874 AdjustSVGTagNameCase(token);
2875 AdjustSVGAttributes(token);
2876 }
2877 AdjustForeignAttributes(token);
2878 tree_.InsertForeignElement(token, current_namespace);
2879 break;
2880 }
2881 case HTMLToken::kEndTag: {
2882 if (adjusted_current_node->NamespaceURI() == svg_names::kNamespaceURI)
2883 AdjustSVGTagNameCase(token);
2884
2885 if (token->GetName() == svg_names::kScriptTag &&
2886 tree_.CurrentStackItem()->HasTagName(svg_names::kScriptTag)) {
2887 if (ScriptingContentIsAllowed(tree_.GetParserContentPolicy()))
2888 script_to_process_ = tree_.CurrentElement();
2889 tree_.OpenElements()->Pop();
2890 return;
2891 }
2892 if (token->GetName() == html_names::kBrTag ||
2893 token->GetName() == html_names::kPTag) {
2894 ParseError(token);
2895 tree_.OpenElements()->PopUntilForeignContentScopeMarker();
2896 ProcessEndTag(token);
2897 return;
2898 }
2899 if (!tree_.CurrentStackItem()->IsInHTMLNamespace()) {
2900 // FIXME: This code just wants an Element* iterator, instead of an
2901 // ElementRecord*
2902 HTMLElementStack::ElementRecord* node_record =
2903 tree_.OpenElements()->TopRecord();
2904 if (!node_record->StackItem()->HasLocalName(token->GetName()))
2905 ParseError(token);
2906 while (1) {
2907 if (node_record->StackItem()->HasLocalName(token->GetName())) {
2908 tree_.OpenElements()->PopUntilPopped(node_record->GetElement());
2909 return;
2910 }
2911 node_record = node_record->Next();
2912
2913 if (node_record->StackItem()->IsInHTMLNamespace())
2914 break;
2915 }
2916 }
2917 // Otherwise, process the token according to the rules given in the
2918 // section corresponding to the current insertion mode in HTML content.
2919 ProcessEndTag(token);
2920 break;
2921 }
2922 case HTMLToken::kComment:
2923 tree_.InsertComment(token);
2924 break;
2925 case HTMLToken::kCharacter:
2926 case HTMLToken::kEndOfFile:
2927 NOTREACHED();
2928 break;
2929 }
2930 }
2931
Finished()2932 void HTMLTreeBuilder::Finished() {
2933 if (IsParsingFragment())
2934 return;
2935
2936 DCHECK(template_insertion_modes_.IsEmpty());
2937 #if DCHECK_IS_ON()
2938 DCHECK(is_attached_);
2939 #endif
2940 // Warning, this may detach the parser. Do not do anything else after this.
2941 tree_.FinishedParsing();
2942 }
2943
ParseError(AtomicHTMLToken *)2944 void HTMLTreeBuilder::ParseError(AtomicHTMLToken*) {}
2945
2946 #ifndef NDEBUG
ToString(HTMLTreeBuilder::InsertionMode mode)2947 const char* HTMLTreeBuilder::ToString(HTMLTreeBuilder::InsertionMode mode) {
2948 switch (mode) {
2949 #define DEFINE_STRINGIFY(mode) \
2950 case mode: \
2951 return #mode;
2952 DEFINE_STRINGIFY(kInitialMode)
2953 DEFINE_STRINGIFY(kBeforeHTMLMode)
2954 DEFINE_STRINGIFY(kBeforeHeadMode)
2955 DEFINE_STRINGIFY(kInHeadMode)
2956 DEFINE_STRINGIFY(kInHeadNoscriptMode)
2957 DEFINE_STRINGIFY(kAfterHeadMode)
2958 DEFINE_STRINGIFY(kTemplateContentsMode)
2959 DEFINE_STRINGIFY(kInBodyMode)
2960 DEFINE_STRINGIFY(kTextMode)
2961 DEFINE_STRINGIFY(kInTableMode)
2962 DEFINE_STRINGIFY(kInTableTextMode)
2963 DEFINE_STRINGIFY(kInCaptionMode)
2964 DEFINE_STRINGIFY(kInColumnGroupMode)
2965 DEFINE_STRINGIFY(kInTableBodyMode)
2966 DEFINE_STRINGIFY(kInRowMode)
2967 DEFINE_STRINGIFY(kInCellMode)
2968 DEFINE_STRINGIFY(kInSelectMode)
2969 DEFINE_STRINGIFY(kInSelectInTableMode)
2970 DEFINE_STRINGIFY(kAfterBodyMode)
2971 DEFINE_STRINGIFY(kInFramesetMode)
2972 DEFINE_STRINGIFY(kAfterFramesetMode)
2973 DEFINE_STRINGIFY(kAfterAfterBodyMode)
2974 DEFINE_STRINGIFY(kAfterAfterFramesetMode)
2975 #undef DEFINE_STRINGIFY
2976 }
2977 return "<unknown>";
2978 }
2979 #endif
2980
2981 } // namespace blink
2982