1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "third_party/blink/renderer/core/html/parser/html_construction_site.h"
28 
29 #include <limits>
30 #include "third_party/blink/renderer/core/dom/comment.h"
31 #include "third_party/blink/renderer/core/dom/document_fragment.h"
32 #include "third_party/blink/renderer/core/dom/document_type.h"
33 #include "third_party/blink/renderer/core/dom/element.h"
34 #include "third_party/blink/renderer/core/dom/element_traversal.h"
35 #include "third_party/blink/renderer/core/dom/node.h"
36 #include "third_party/blink/renderer/core/dom/template_content_document_fragment.h"
37 #include "third_party/blink/renderer/core/dom/text.h"
38 #include "third_party/blink/renderer/core/dom/throw_on_dynamic_markup_insertion_count_incrementer.h"
39 #include "third_party/blink/renderer/core/frame/local_dom_window.h"
40 #include "third_party/blink/renderer/core/frame/local_frame.h"
41 #include "third_party/blink/renderer/core/frame/local_frame_client.h"
42 #include "third_party/blink/renderer/core/html/custom/ce_reactions_scope.h"
43 #include "third_party/blink/renderer/core/html/custom/custom_element.h"
44 #include "third_party/blink/renderer/core/html/custom/custom_element_definition.h"
45 #include "third_party/blink/renderer/core/html/custom/custom_element_descriptor.h"
46 #include "third_party/blink/renderer/core/html/custom/custom_element_registry.h"
47 #include "third_party/blink/renderer/core/html/forms/form_associated.h"
48 #include "third_party/blink/renderer/core/html/forms/html_form_element.h"
49 #include "third_party/blink/renderer/core/html/html_html_element.h"
50 #include "third_party/blink/renderer/core/html/html_plugin_element.h"
51 #include "third_party/blink/renderer/core/html/html_script_element.h"
52 #include "third_party/blink/renderer/core/html/html_style_element.h"
53 #include "third_party/blink/renderer/core/html/html_template_element.h"
54 #include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
55 #include "third_party/blink/renderer/core/html/parser/html_parser_idioms.h"
56 #include "third_party/blink/renderer/core/html/parser/html_parser_reentry_permit.h"
57 #include "third_party/blink/renderer/core/html/parser/html_stack_item.h"
58 #include "third_party/blink/renderer/core/html/parser/html_token.h"
59 #include "third_party/blink/renderer/core/html_element_factory.h"
60 #include "third_party/blink/renderer/core/html_names.h"
61 #include "third_party/blink/renderer/core/loader/frame_loader.h"
62 #include "third_party/blink/renderer/core/script/ignore_destructive_write_count_incrementer.h"
63 #include "third_party/blink/renderer/core/svg/svg_script_element.h"
64 #include "third_party/blink/renderer/platform/bindings/microtask.h"
65 #include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
66 #include "third_party/blink/renderer/platform/heap/heap.h"
67 #include "third_party/blink/renderer/platform/instrumentation/use_counter.h"
68 #include "third_party/blink/renderer/platform/text/text_break_iterator.h"
69 
70 namespace blink {
71 
72 static const unsigned kMaximumHTMLParserDOMTreeDepth = 512;
73 
SetAttributes(Element * element,AtomicHTMLToken * token,ParserContentPolicy parser_content_policy)74 static inline void SetAttributes(Element* element,
75                                  AtomicHTMLToken* token,
76                                  ParserContentPolicy parser_content_policy) {
77   if (!ScriptingContentIsAllowed(parser_content_policy))
78     element->StripScriptingAttributes(token->Attributes());
79   element->ParserSetAttributes(token->Attributes());
80   if (token->HasDuplicateAttribute()) {
81     UseCounter::Count(element->GetDocument(), WebFeature::kDuplicatedAttribute);
82     element->SetHasDuplicateAttributes();
83   }
84 }
85 
HasImpliedEndTag(const HTMLStackItem * item)86 static bool HasImpliedEndTag(const HTMLStackItem* item) {
87   return item->HasTagName(html_names::kDdTag) ||
88          item->HasTagName(html_names::kDtTag) ||
89          item->HasTagName(html_names::kLiTag) ||
90          item->HasTagName(html_names::kOptionTag) ||
91          item->HasTagName(html_names::kOptgroupTag) ||
92          item->HasTagName(html_names::kPTag) ||
93          item->HasTagName(html_names::kRbTag) ||
94          item->HasTagName(html_names::kRpTag) ||
95          item->HasTagName(html_names::kRtTag) ||
96          item->HasTagName(html_names::kRTCTag);
97 }
98 
ShouldUseLengthLimit(const ContainerNode & node)99 static bool ShouldUseLengthLimit(const ContainerNode& node) {
100   return !IsA<HTMLScriptElement>(node) && !IsA<HTMLStyleElement>(node) &&
101          !IsA<SVGScriptElement>(node);
102 }
103 
TextLengthLimitForContainer(const ContainerNode & node)104 static unsigned TextLengthLimitForContainer(const ContainerNode& node) {
105   return ShouldUseLengthLimit(node) ? Text::kDefaultLengthLimit
106                                     : std::numeric_limits<unsigned>::max();
107 }
108 
IsAllWhitespace(const String & string)109 static inline bool IsAllWhitespace(const String& string) {
110   return string.IsAllSpecialCharacters<IsHTMLSpace<UChar>>();
111 }
112 
Insert(HTMLConstructionSiteTask & task)113 static inline void Insert(HTMLConstructionSiteTask& task) {
114   if (auto* template_element = DynamicTo<HTMLTemplateElement>(*task.parent))
115     task.parent = template_element->content();
116 
117   // https://html.spec.whatwg.org/C/#insert-a-foreign-element
118   // 3.1, (3) Push (pop) an element queue
119   CEReactionsScope reactions;
120   if (task.next_child)
121     task.parent->ParserInsertBefore(task.child.Get(), *task.next_child);
122   else
123     task.parent->ParserAppendChild(task.child.Get());
124 }
125 
ExecuteInsertTask(HTMLConstructionSiteTask & task)126 static inline void ExecuteInsertTask(HTMLConstructionSiteTask& task) {
127   DCHECK_EQ(task.operation, HTMLConstructionSiteTask::kInsert);
128 
129   Insert(task);
130   if (auto* child = DynamicTo<Element>(task.child.Get())) {
131     child->BeginParsingChildren();
132     if (task.self_closing)
133       child->FinishParsingChildren();
134   }
135 }
136 
ExecuteInsertTextTask(HTMLConstructionSiteTask & task)137 static inline void ExecuteInsertTextTask(HTMLConstructionSiteTask& task) {
138   DCHECK_EQ(task.operation, HTMLConstructionSiteTask::kInsertText);
139 
140   // Merge text nodes into previous ones if possible:
141   // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
142   auto* new_text = To<Text>(task.child.Get());
143   Node* previous_child = task.next_child ? task.next_child->previousSibling()
144                                          : task.parent->lastChild();
145   if (auto* previous_text = DynamicTo<Text>(previous_child)) {
146     unsigned length_limit = TextLengthLimitForContainer(*task.parent);
147     if (previous_text->length() + new_text->length() < length_limit) {
148       previous_text->ParserAppendData(new_text->data());
149       return;
150     }
151   }
152 
153   Insert(task);
154 }
155 
ExecuteReparentTask(HTMLConstructionSiteTask & task)156 static inline void ExecuteReparentTask(HTMLConstructionSiteTask& task) {
157   DCHECK_EQ(task.operation, HTMLConstructionSiteTask::kReparent);
158 
159   task.parent->ParserAppendChild(task.child);
160 }
161 
ExecuteInsertAlreadyParsedChildTask(HTMLConstructionSiteTask & task)162 static inline void ExecuteInsertAlreadyParsedChildTask(
163     HTMLConstructionSiteTask& task) {
164   DCHECK_EQ(task.operation,
165             HTMLConstructionSiteTask::kInsertAlreadyParsedChild);
166 
167   Insert(task);
168 }
169 
ExecuteTakeAllChildrenTask(HTMLConstructionSiteTask & task)170 static inline void ExecuteTakeAllChildrenTask(HTMLConstructionSiteTask& task) {
171   DCHECK_EQ(task.operation, HTMLConstructionSiteTask::kTakeAllChildren);
172 
173   task.parent->ParserTakeAllChildrenFrom(*task.OldParent());
174 }
175 
ExecuteTask(HTMLConstructionSiteTask & task)176 void HTMLConstructionSite::ExecuteTask(HTMLConstructionSiteTask& task) {
177   DCHECK(task_queue_.IsEmpty());
178   if (task.operation == HTMLConstructionSiteTask::kInsert)
179     return ExecuteInsertTask(task);
180 
181   if (task.operation == HTMLConstructionSiteTask::kInsertText)
182     return ExecuteInsertTextTask(task);
183 
184   // All the cases below this point are only used by the adoption agency.
185 
186   if (task.operation == HTMLConstructionSiteTask::kInsertAlreadyParsedChild)
187     return ExecuteInsertAlreadyParsedChildTask(task);
188 
189   if (task.operation == HTMLConstructionSiteTask::kReparent)
190     return ExecuteReparentTask(task);
191 
192   if (task.operation == HTMLConstructionSiteTask::kTakeAllChildren)
193     return ExecuteTakeAllChildrenTask(task);
194 
195   NOTREACHED();
196 }
197 
198 // This is only needed for TextDocuments where we might have text nodes
199 // approaching the default length limit (~64k) and we don't want to break a text
200 // node in the middle of a combining character.
FindBreakIndexBetween(const StringBuilder & string,unsigned current_position,unsigned proposed_break_index)201 static unsigned FindBreakIndexBetween(const StringBuilder& string,
202                                       unsigned current_position,
203                                       unsigned proposed_break_index) {
204   DCHECK_LT(current_position, proposed_break_index);
205   DCHECK_LE(proposed_break_index, string.length());
206   // The end of the string is always a valid break.
207   if (proposed_break_index == string.length())
208     return proposed_break_index;
209 
210   // Latin-1 does not have breakable boundaries. If we ever moved to a different
211   // 8-bit encoding this could be wrong.
212   if (string.Is8Bit())
213     return proposed_break_index;
214 
215   const UChar* break_search_characters =
216       string.Characters16() + current_position;
217   // We need at least two characters look-ahead to account for UTF-16
218   // surrogates, but can't search off the end of the buffer!
219   unsigned break_search_length =
220       std::min(proposed_break_index - current_position + 2,
221                string.length() - current_position);
222   NonSharedCharacterBreakIterator it(break_search_characters,
223                                      break_search_length);
224 
225   if (it.IsBreak(proposed_break_index - current_position))
226     return proposed_break_index;
227 
228   int adjusted_break_index_in_substring =
229       it.Preceding(proposed_break_index - current_position);
230   if (adjusted_break_index_in_substring > 0)
231     return current_position + adjusted_break_index_in_substring;
232   // We failed to find a breakable point, let the caller figure out what to do.
233   return 0;
234 }
235 
AtomizeIfAllWhitespace(const String & string,WhitespaceMode whitespace_mode)236 static String AtomizeIfAllWhitespace(const String& string,
237                                      WhitespaceMode whitespace_mode) {
238   // Strings composed entirely of whitespace are likely to be repeated. Turn
239   // them into AtomicString so we share a single string for each.
240   if (whitespace_mode == kAllWhitespace ||
241       (whitespace_mode == kWhitespaceUnknown && IsAllWhitespace(string)))
242     return AtomicString(string).GetString();
243   return string;
244 }
245 
FlushPendingText(FlushMode mode)246 void HTMLConstructionSite::FlushPendingText(FlushMode mode) {
247   if (pending_text_.IsEmpty())
248     return;
249 
250   if (mode == kFlushIfAtTextLimit &&
251       !ShouldUseLengthLimit(*pending_text_.parent))
252     return;
253 
254   PendingText pending_text;
255   // Hold onto the current pending text on the stack so that queueTask doesn't
256   // recurse infinitely.
257   pending_text_.Swap(pending_text);
258   DCHECK(pending_text_.IsEmpty());
259 
260   // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is
261   // necessary for performance, see:
262   // https://bugs.webkit.org/show_bug.cgi?id=55898
263   unsigned length_limit = TextLengthLimitForContainer(*pending_text.parent);
264 
265   unsigned current_position = 0;
266   const StringBuilder& string = pending_text.string_builder;
267   while (current_position < string.length()) {
268     unsigned proposed_break_index =
269         std::min(current_position + length_limit, string.length());
270     unsigned break_index =
271         FindBreakIndexBetween(string, current_position, proposed_break_index);
272     DCHECK_LE(break_index, string.length());
273     String substring =
274         string.Substring(current_position, break_index - current_position);
275     substring = AtomizeIfAllWhitespace(substring, pending_text.whitespace_mode);
276 
277     HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kInsertText);
278     task.parent = pending_text.parent;
279     task.next_child = pending_text.next_child;
280     task.child = Text::Create(task.parent->GetDocument(), substring);
281     QueueTask(task);
282 
283     DCHECK_GT(break_index, current_position);
284     DCHECK_EQ(break_index - current_position, substring.length());
285     DCHECK_EQ(To<Text>(task.child.Get())->length(), substring.length());
286     current_position = break_index;
287   }
288 }
289 
QueueTask(const HTMLConstructionSiteTask & task)290 void HTMLConstructionSite::QueueTask(const HTMLConstructionSiteTask& task) {
291   FlushPendingText(kFlushAlways);
292   DCHECK(pending_text_.IsEmpty());
293   task_queue_.push_back(task);
294 }
295 
AttachLater(ContainerNode * parent,Node * child,bool self_closing)296 void HTMLConstructionSite::AttachLater(ContainerNode* parent,
297                                        Node* child,
298                                        bool self_closing) {
299   auto* element = DynamicTo<Element>(child);
300   DCHECK(ScriptingContentIsAllowed(parser_content_policy_) || !element ||
301          !element->IsScriptElement());
302   DCHECK(PluginContentIsAllowed(parser_content_policy_) ||
303          !IsA<HTMLPlugInElement>(child));
304 
305   HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kInsert);
306   task.parent = parent;
307   task.child = child;
308   task.self_closing = self_closing;
309 
310   if (ShouldFosterParent()) {
311     FosterParent(task.child);
312     return;
313   }
314 
315   // Add as a sibling of the parent if we have reached the maximum depth
316   // allowed.
317   if (open_elements_.StackDepth() > kMaximumHTMLParserDOMTreeDepth &&
318       task.parent->parentNode())
319     task.parent = task.parent->parentNode();
320 
321   DCHECK(task.parent);
322   QueueTask(task);
323 }
324 
ExecuteQueuedTasks()325 void HTMLConstructionSite::ExecuteQueuedTasks() {
326   // This has no affect on pendingText, and we may have pendingText remaining
327   // after executing all other queued tasks.
328   const size_t size = task_queue_.size();
329   if (!size)
330     return;
331 
332   // Fast path for when |size| is 1, which is the common case
333   if (size == 1) {
334     HTMLConstructionSiteTask task = task_queue_.front();
335     task_queue_.pop_back();
336     ExecuteTask(task);
337     return;
338   }
339 
340   // Copy the task queue into a local variable in case executeTask re-enters the
341   // parser.
342   TaskQueue queue;
343   queue.swap(task_queue_);
344 
345   for (auto& task : queue)
346     ExecuteTask(task);
347 
348   // We might be detached now.
349 }
350 
HTMLConstructionSite(HTMLParserReentryPermit * reentry_permit,Document & document,ParserContentPolicy parser_content_policy)351 HTMLConstructionSite::HTMLConstructionSite(
352     HTMLParserReentryPermit* reentry_permit,
353     Document& document,
354     ParserContentPolicy parser_content_policy)
355     : reentry_permit_(reentry_permit),
356       document_(&document),
357       attachment_root_(document),
358       parser_content_policy_(parser_content_policy),
359       is_parsing_fragment_(false),
360       redirect_attach_to_foster_parent_(false),
361       in_quirks_mode_(document.InQuirksMode()) {
362   DCHECK(document_->IsHTMLDocument() || document_->IsXHTMLDocument());
363 }
364 
InitFragmentParsing(DocumentFragment * fragment,Element * context_element)365 void HTMLConstructionSite::InitFragmentParsing(DocumentFragment* fragment,
366                                                Element* context_element) {
367   DCHECK(context_element);
368   DCHECK_EQ(document_, &fragment->GetDocument());
369   DCHECK_EQ(in_quirks_mode_, fragment->GetDocument().InQuirksMode());
370   DCHECK(!is_parsing_fragment_);
371   DCHECK(!form_);
372 
373   attachment_root_ = fragment;
374   is_parsing_fragment_ = true;
375 
376   if (!context_element->GetDocument().IsTemplateDocument())
377     form_ = Traversal<HTMLFormElement>::FirstAncestorOrSelf(*context_element);
378 }
379 
~HTMLConstructionSite()380 HTMLConstructionSite::~HTMLConstructionSite() {
381   // Depending on why we're being destroyed it might be OK to forget queued
382   // tasks, but currently we don't expect to.
383   DCHECK(task_queue_.IsEmpty());
384   // Currently we assume that text will never be the last token in the document
385   // and that we'll always queue some additional task to cause it to flush.
386   DCHECK(pending_text_.IsEmpty());
387 }
388 
Trace(Visitor * visitor)389 void HTMLConstructionSite::Trace(Visitor* visitor) {
390   visitor->Trace(document_);
391   visitor->Trace(attachment_root_);
392   visitor->Trace(head_);
393   visitor->Trace(form_);
394   visitor->Trace(open_elements_);
395   visitor->Trace(active_formatting_elements_);
396   visitor->Trace(task_queue_);
397   visitor->Trace(pending_text_);
398 }
399 
Detach()400 void HTMLConstructionSite::Detach() {
401   // FIXME: We'd like to ASSERT here that we're canceling and not just
402   // discarding text that really should have made it into the DOM earlier, but
403   // there doesn't seem to be a nice way to do that.
404   pending_text_.Discard();
405   document_ = nullptr;
406   attachment_root_ = nullptr;
407 }
408 
TakeForm()409 HTMLFormElement* HTMLConstructionSite::TakeForm() {
410   return form_.Release();
411 }
412 
InsertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken * token)413 void HTMLConstructionSite::InsertHTMLHtmlStartTagBeforeHTML(
414     AtomicHTMLToken* token) {
415   DCHECK(document_);
416   HTMLHtmlElement* element;
417   if (const auto* is_attribute = token->GetAttributeItem(html_names::kIsAttr)) {
418     element = To<HTMLHtmlElement>(document_->CreateElement(
419         html_names::kHTMLTag, GetCreateElementFlags(), is_attribute->Value()));
420   } else {
421     element = MakeGarbageCollected<HTMLHtmlElement>(*document_);
422   }
423   SetAttributes(element, token, parser_content_policy_);
424   AttachLater(attachment_root_, element);
425   open_elements_.PushHTMLHtmlElement(
426       MakeGarbageCollected<HTMLStackItem>(element, token));
427 
428   ExecuteQueuedTasks();
429   element->InsertedByParser();
430 }
431 
MergeAttributesFromTokenIntoElement(AtomicHTMLToken * token,Element * element)432 void HTMLConstructionSite::MergeAttributesFromTokenIntoElement(
433     AtomicHTMLToken* token,
434     Element* element) {
435   if (token->Attributes().IsEmpty())
436     return;
437 
438   for (const auto& token_attribute : token->Attributes()) {
439     if (element->AttributesWithoutUpdate().FindIndex(
440             token_attribute.GetName()) == kNotFound)
441       element->setAttribute(token_attribute.GetName(), token_attribute.Value());
442   }
443 }
444 
InsertHTMLHtmlStartTagInBody(AtomicHTMLToken * token)445 void HTMLConstructionSite::InsertHTMLHtmlStartTagInBody(
446     AtomicHTMLToken* token) {
447   // Fragments do not have a root HTML element, so any additional HTML elements
448   // encountered during fragment parsing should be ignored.
449   if (is_parsing_fragment_)
450     return;
451 
452   MergeAttributesFromTokenIntoElement(token, open_elements_.HtmlElement());
453 }
454 
InsertHTMLBodyStartTagInBody(AtomicHTMLToken * token)455 void HTMLConstructionSite::InsertHTMLBodyStartTagInBody(
456     AtomicHTMLToken* token) {
457   MergeAttributesFromTokenIntoElement(token, open_elements_.BodyElement());
458 }
459 
SetDefaultCompatibilityMode()460 void HTMLConstructionSite::SetDefaultCompatibilityMode() {
461   if (is_parsing_fragment_)
462     return;
463   SetCompatibilityMode(Document::kQuirksMode);
464 }
465 
SetCompatibilityMode(Document::CompatibilityMode mode)466 void HTMLConstructionSite::SetCompatibilityMode(
467     Document::CompatibilityMode mode) {
468   in_quirks_mode_ = (mode == Document::kQuirksMode);
469   document_->SetCompatibilityMode(mode);
470 }
471 
SetCompatibilityModeFromDoctype(const String & name,const String & public_id,const String & system_id)472 void HTMLConstructionSite::SetCompatibilityModeFromDoctype(
473     const String& name,
474     const String& public_id,
475     const String& system_id) {
476   // There are three possible compatibility modes:
477   // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in
478   // this mode, e.g., unit types can be omitted from numbers.
479   // Limited Quirks - This mode is identical to no-quirks mode except for its
480   // treatment of line-height in the inline box model.
481   // No Quirks - no quirks apply. Web pages will obey the specifications to the
482   // letter.
483 
484   // Check for Quirks Mode.
485   if (name != "html" ||
486       public_id.StartsWithIgnoringASCIICase(
487           "+//Silmaril//dtd html Pro v0r11 19970101//") ||
488       public_id.StartsWithIgnoringASCIICase(
489           "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//") ||
490       public_id.StartsWithIgnoringASCIICase(
491           "-//AS//DTD HTML 3.0 asWedit + extensions//") ||
492       public_id.StartsWithIgnoringASCIICase(
493           "-//IETF//DTD HTML 2.0 Level 1//") ||
494       public_id.StartsWithIgnoringASCIICase(
495           "-//IETF//DTD HTML 2.0 Level 2//") ||
496       public_id.StartsWithIgnoringASCIICase(
497           "-//IETF//DTD HTML 2.0 Strict Level 1//") ||
498       public_id.StartsWithIgnoringASCIICase(
499           "-//IETF//DTD HTML 2.0 Strict Level 2//") ||
500       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 2.0 Strict//") ||
501       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 2.0//") ||
502       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 2.1E//") ||
503       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 3.0//") ||
504       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 3.2 Final//") ||
505       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 3.2//") ||
506       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 3//") ||
507       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML Level 0//") ||
508       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML Level 1//") ||
509       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML Level 2//") ||
510       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML Level 3//") ||
511       public_id.StartsWithIgnoringASCIICase(
512           "-//IETF//DTD HTML Strict Level 0//") ||
513       public_id.StartsWithIgnoringASCIICase(
514           "-//IETF//DTD HTML Strict Level 1//") ||
515       public_id.StartsWithIgnoringASCIICase(
516           "-//IETF//DTD HTML Strict Level 2//") ||
517       public_id.StartsWithIgnoringASCIICase(
518           "-//IETF//DTD HTML Strict Level 3//") ||
519       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML Strict//") ||
520       public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML//") ||
521       public_id.StartsWithIgnoringASCIICase(
522           "-//Metrius//DTD Metrius Presentational//") ||
523       public_id.StartsWithIgnoringASCIICase(
524           "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//") ||
525       public_id.StartsWithIgnoringASCIICase(
526           "-//Microsoft//DTD Internet Explorer 2.0 HTML//") ||
527       public_id.StartsWithIgnoringASCIICase(
528           "-//Microsoft//DTD Internet Explorer 2.0 Tables//") ||
529       public_id.StartsWithIgnoringASCIICase(
530           "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//") ||
531       public_id.StartsWithIgnoringASCIICase(
532           "-//Microsoft//DTD Internet Explorer 3.0 HTML//") ||
533       public_id.StartsWithIgnoringASCIICase(
534           "-//Microsoft//DTD Internet Explorer 3.0 Tables//") ||
535       public_id.StartsWithIgnoringASCIICase(
536           "-//Netscape Comm. Corp.//DTD HTML//") ||
537       public_id.StartsWithIgnoringASCIICase(
538           "-//Netscape Comm. Corp.//DTD Strict HTML//") ||
539       public_id.StartsWithIgnoringASCIICase(
540           "-//O'Reilly and Associates//DTD HTML 2.0//") ||
541       public_id.StartsWithIgnoringASCIICase(
542           "-//O'Reilly and Associates//DTD HTML Extended 1.0//") ||
543       public_id.StartsWithIgnoringASCIICase(
544           "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//") ||
545       public_id.StartsWithIgnoringASCIICase(
546           "-//SoftQuad Software//DTD HoTMetaL PRO "
547           "6.0::19990601::extensions to HTML 4.0//") ||
548       public_id.StartsWithIgnoringASCIICase(
549           "-//SoftQuad//DTD HoTMetaL PRO "
550           "4.0::19971010::extensions to HTML 4.0//") ||
551       public_id.StartsWithIgnoringASCIICase(
552           "-//Spyglass//DTD HTML 2.0 Extended//") ||
553       public_id.StartsWithIgnoringASCIICase(
554           "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//") ||
555       public_id.StartsWithIgnoringASCIICase(
556           "-//Sun Microsystems Corp.//DTD HotJava HTML//") ||
557       public_id.StartsWithIgnoringASCIICase(
558           "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//") ||
559       public_id.StartsWithIgnoringASCIICase(
560           "-//W3C//DTD HTML 3 1995-03-24//") ||
561       public_id.StartsWithIgnoringASCIICase("-//W3C//DTD HTML 3.2 Draft//") ||
562       public_id.StartsWithIgnoringASCIICase("-//W3C//DTD HTML 3.2 Final//") ||
563       public_id.StartsWithIgnoringASCIICase("-//W3C//DTD HTML 3.2//") ||
564       public_id.StartsWithIgnoringASCIICase("-//W3C//DTD HTML 3.2S Draft//") ||
565       public_id.StartsWithIgnoringASCIICase(
566           "-//W3C//DTD HTML 4.0 Frameset//") ||
567       public_id.StartsWithIgnoringASCIICase(
568           "-//W3C//DTD HTML 4.0 Transitional//") ||
569       public_id.StartsWithIgnoringASCIICase(
570           "-//W3C//DTD HTML Experimental 19960712//") ||
571       public_id.StartsWithIgnoringASCIICase(
572           "-//W3C//DTD HTML Experimental 970421//") ||
573       public_id.StartsWithIgnoringASCIICase("-//W3C//DTD W3 HTML//") ||
574       public_id.StartsWithIgnoringASCIICase("-//W3O//DTD W3 HTML 3.0//") ||
575       EqualIgnoringASCIICase(public_id,
576                              "-//W3O//DTD W3 HTML Strict 3.0//EN//") ||
577       public_id.StartsWithIgnoringASCIICase(
578           "-//WebTechs//DTD Mozilla HTML 2.0//") ||
579       public_id.StartsWithIgnoringASCIICase(
580           "-//WebTechs//DTD Mozilla HTML//") ||
581       EqualIgnoringASCIICase(public_id, "-/W3C/DTD HTML 4.0 Transitional/EN") ||
582       EqualIgnoringASCIICase(public_id, "HTML") ||
583       EqualIgnoringASCIICase(
584           system_id,
585           "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") ||
586       (system_id.IsEmpty() && public_id.StartsWithIgnoringASCIICase(
587                                   "-//W3C//DTD HTML 4.01 Frameset//")) ||
588       (system_id.IsEmpty() && public_id.StartsWithIgnoringASCIICase(
589                                   "-//W3C//DTD HTML 4.01 Transitional//"))) {
590     SetCompatibilityMode(Document::kQuirksMode);
591     return;
592   }
593 
594   // Check for Limited Quirks Mode.
595   if (public_id.StartsWithIgnoringASCIICase(
596           "-//W3C//DTD XHTML 1.0 Frameset//") ||
597       public_id.StartsWithIgnoringASCIICase(
598           "-//W3C//DTD XHTML 1.0 Transitional//") ||
599       (!system_id.IsEmpty() && public_id.StartsWithIgnoringASCIICase(
600                                    "-//W3C//DTD HTML 4.01 Frameset//")) ||
601       (!system_id.IsEmpty() && public_id.StartsWithIgnoringASCIICase(
602                                    "-//W3C//DTD HTML 4.01 Transitional//"))) {
603     SetCompatibilityMode(Document::kLimitedQuirksMode);
604     return;
605   }
606 
607   // Otherwise we are No Quirks Mode.
608   SetCompatibilityMode(Document::kNoQuirksMode);
609 }
610 
ProcessEndOfFile()611 void HTMLConstructionSite::ProcessEndOfFile() {
612   DCHECK(CurrentNode());
613   Flush(kFlushAlways);
614   OpenElements()->PopAll();
615 }
616 
FinishedParsing()617 void HTMLConstructionSite::FinishedParsing() {
618   // We shouldn't have any queued tasks but we might have pending text which we
619   // need to promote to tasks and execute.
620   DCHECK(task_queue_.IsEmpty());
621   Flush(kFlushAlways);
622   document_->FinishedParsing();
623 }
624 
InsertDoctype(AtomicHTMLToken * token)625 void HTMLConstructionSite::InsertDoctype(AtomicHTMLToken* token) {
626   DCHECK_EQ(token->GetType(), HTMLToken::DOCTYPE);
627 
628   const String& public_id =
629       StringImpl::Create8BitIfPossible(token->PublicIdentifier());
630   const String& system_id =
631       StringImpl::Create8BitIfPossible(token->SystemIdentifier());
632   auto* doctype = MakeGarbageCollected<DocumentType>(
633       document_, token->GetName(), public_id, system_id);
634   AttachLater(attachment_root_, doctype);
635 
636   // DOCTYPE nodes are only processed when parsing fragments w/o
637   // contextElements, which never occurs.  However, if we ever chose to support
638   // such, this code is subtly wrong, because context-less fragments can
639   // determine their own quirks mode, and thus change parsing rules (like <p>
640   // inside <table>).  For now we ASSERT that we never hit this code in a
641   // fragment, as changing the owning document's compatibility mode would be
642   // wrong.
643   DCHECK(!is_parsing_fragment_);
644   if (is_parsing_fragment_)
645     return;
646 
647   if (token->ForceQuirks())
648     SetCompatibilityMode(Document::kQuirksMode);
649   else {
650     SetCompatibilityModeFromDoctype(token->GetName(), public_id, system_id);
651   }
652 }
653 
InsertComment(AtomicHTMLToken * token)654 void HTMLConstructionSite::InsertComment(AtomicHTMLToken* token) {
655   DCHECK_EQ(token->GetType(), HTMLToken::kComment);
656   AttachLater(CurrentNode(),
657               Comment::Create(OwnerDocumentForCurrentNode(), token->Comment()));
658 }
659 
InsertCommentOnDocument(AtomicHTMLToken * token)660 void HTMLConstructionSite::InsertCommentOnDocument(AtomicHTMLToken* token) {
661   DCHECK_EQ(token->GetType(), HTMLToken::kComment);
662   DCHECK(document_);
663   AttachLater(attachment_root_, Comment::Create(*document_, token->Comment()));
664 }
665 
InsertCommentOnHTMLHtmlElement(AtomicHTMLToken * token)666 void HTMLConstructionSite::InsertCommentOnHTMLHtmlElement(
667     AtomicHTMLToken* token) {
668   DCHECK_EQ(token->GetType(), HTMLToken::kComment);
669   ContainerNode* parent = open_elements_.RootNode();
670   AttachLater(parent, Comment::Create(parent->GetDocument(), token->Comment()));
671 }
672 
InsertHTMLHeadElement(AtomicHTMLToken * token)673 void HTMLConstructionSite::InsertHTMLHeadElement(AtomicHTMLToken* token) {
674   DCHECK(!ShouldFosterParent());
675   head_ = MakeGarbageCollected<HTMLStackItem>(
676       CreateElement(token, html_names::xhtmlNamespaceURI), token);
677   AttachLater(CurrentNode(), head_->GetElement());
678   open_elements_.PushHTMLHeadElement(head_);
679 }
680 
InsertHTMLBodyElement(AtomicHTMLToken * token)681 void HTMLConstructionSite::InsertHTMLBodyElement(AtomicHTMLToken* token) {
682   DCHECK(!ShouldFosterParent());
683   Element* body = CreateElement(token, html_names::xhtmlNamespaceURI);
684   AttachLater(CurrentNode(), body);
685   open_elements_.PushHTMLBodyElement(
686       MakeGarbageCollected<HTMLStackItem>(body, token));
687   if (document_)
688     document_->WillInsertBody();
689 }
690 
InsertHTMLFormElement(AtomicHTMLToken * token,bool is_demoted)691 void HTMLConstructionSite::InsertHTMLFormElement(AtomicHTMLToken* token,
692                                                  bool is_demoted) {
693   auto* form_element =
694       To<HTMLFormElement>(CreateElement(token, html_names::xhtmlNamespaceURI));
695   if (!OpenElements()->HasTemplateInHTMLScope())
696     form_ = form_element;
697   if (is_demoted) {
698     UseCounter::Count(OwnerDocumentForCurrentNode(),
699                       WebFeature::kDemotedFormElement);
700   }
701   AttachLater(CurrentNode(), form_element);
702   open_elements_.Push(MakeGarbageCollected<HTMLStackItem>(form_element, token));
703 }
704 
InsertHTMLElement(AtomicHTMLToken * token)705 void HTMLConstructionSite::InsertHTMLElement(AtomicHTMLToken* token) {
706   Element* element = CreateElement(token, html_names::xhtmlNamespaceURI);
707   AttachLater(CurrentNode(), element);
708   open_elements_.Push(MakeGarbageCollected<HTMLStackItem>(element, token));
709 }
710 
InsertSelfClosingHTMLElementDestroyingToken(AtomicHTMLToken * token)711 void HTMLConstructionSite::InsertSelfClosingHTMLElementDestroyingToken(
712     AtomicHTMLToken* token) {
713   DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
714   // Normally HTMLElementStack is responsible for calling finishParsingChildren,
715   // but self-closing elements are never in the element stack so the stack
716   // doesn't get a chance to tell them that we're done parsing their children.
717   AttachLater(CurrentNode(),
718               CreateElement(token, html_names::xhtmlNamespaceURI), true);
719   // FIXME: Do we want to acknowledge the token's self-closing flag?
720   // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
721 }
722 
InsertFormattingElement(AtomicHTMLToken * token)723 void HTMLConstructionSite::InsertFormattingElement(AtomicHTMLToken* token) {
724   // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
725   // Possible active formatting elements include:
726   // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
727   InsertHTMLElement(token);
728   active_formatting_elements_.Append(CurrentElementRecord()->StackItem());
729 }
730 
InsertScriptElement(AtomicHTMLToken * token)731 void HTMLConstructionSite::InsertScriptElement(AtomicHTMLToken* token) {
732   CreateElementFlags flags;
733   flags
734       // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
735       // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
736       // For createContextualFragment, the specifications say to mark it
737       // parser-inserted and already-started and later unmark them. However, we
738       // short circuit that logic to avoid the subtree traversal to find script
739       // elements since scripts can never see those flags or effects thereof.
740       .SetCreatedByParser(parser_content_policy_ !=
741                           kAllowScriptingContentAndDoNotMarkAlreadyStarted)
742       .SetAlreadyStarted(is_parsing_fragment_ && flags.IsCreatedByParser());
743   HTMLScriptElement* element = nullptr;
744   if (const auto* is_attribute = token->GetAttributeItem(html_names::kIsAttr)) {
745     element = To<HTMLScriptElement>(OwnerDocumentForCurrentNode().CreateElement(
746         html_names::kScriptTag, flags, is_attribute->Value()));
747   } else {
748     element = MakeGarbageCollected<HTMLScriptElement>(
749         OwnerDocumentForCurrentNode(), flags);
750   }
751   SetAttributes(element, token, parser_content_policy_);
752   if (ScriptingContentIsAllowed(parser_content_policy_))
753     AttachLater(CurrentNode(), element);
754   open_elements_.Push(MakeGarbageCollected<HTMLStackItem>(element, token));
755 }
756 
InsertForeignElement(AtomicHTMLToken * token,const AtomicString & namespace_uri)757 void HTMLConstructionSite::InsertForeignElement(
758     AtomicHTMLToken* token,
759     const AtomicString& namespace_uri) {
760   DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
761   // parseError when xmlns or xmlns:xlink are wrong.
762   DVLOG(1) << "Not implemented.";
763 
764   Element* element = CreateElement(token, namespace_uri);
765   if (ScriptingContentIsAllowed(parser_content_policy_) ||
766       !element->IsScriptElement()) {
767     AttachLater(CurrentNode(), element, token->SelfClosing());
768   }
769   if (!token->SelfClosing()) {
770     open_elements_.Push(
771         MakeGarbageCollected<HTMLStackItem>(element, token, namespace_uri));
772   }
773 }
774 
InsertTextNode(const StringView & string,WhitespaceMode whitespace_mode)775 void HTMLConstructionSite::InsertTextNode(const StringView& string,
776                                           WhitespaceMode whitespace_mode) {
777   HTMLConstructionSiteTask dummy_task(HTMLConstructionSiteTask::kInsert);
778   dummy_task.parent = CurrentNode();
779 
780   if (ShouldFosterParent())
781     FindFosterSite(dummy_task);
782 
783   // FIXME: This probably doesn't need to be done both here and in insert(Task).
784   if (auto* template_element =
785           DynamicTo<HTMLTemplateElement>(*dummy_task.parent))
786     dummy_task.parent = template_element->content();
787 
788   // Unclear when parent != case occurs. Somehow we insert text into two
789   // separate nodes while processing the same Token. The nextChild !=
790   // dummy.nextChild case occurs whenever foster parenting happened and we hit a
791   // new text node "<table>a</table>b" In either case we have to flush the
792   // pending text into the task queue before making more.
793   if (!pending_text_.IsEmpty() &&
794       (pending_text_.parent != dummy_task.parent ||
795        pending_text_.next_child != dummy_task.next_child))
796     FlushPendingText(kFlushAlways);
797   pending_text_.Append(dummy_task.parent, dummy_task.next_child, string,
798                        whitespace_mode);
799 }
800 
Reparent(HTMLElementStack::ElementRecord * new_parent,HTMLElementStack::ElementRecord * child)801 void HTMLConstructionSite::Reparent(HTMLElementStack::ElementRecord* new_parent,
802                                     HTMLElementStack::ElementRecord* child) {
803   HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kReparent);
804   task.parent = new_parent->GetNode();
805   task.child = child->GetNode();
806   QueueTask(task);
807 }
808 
Reparent(HTMLElementStack::ElementRecord * new_parent,HTMLStackItem * child)809 void HTMLConstructionSite::Reparent(HTMLElementStack::ElementRecord* new_parent,
810                                     HTMLStackItem* child) {
811   HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kReparent);
812   task.parent = new_parent->GetNode();
813   task.child = child->GetNode();
814   QueueTask(task);
815 }
816 
InsertAlreadyParsedChild(HTMLStackItem * new_parent,HTMLElementStack::ElementRecord * child)817 void HTMLConstructionSite::InsertAlreadyParsedChild(
818     HTMLStackItem* new_parent,
819     HTMLElementStack::ElementRecord* child) {
820   if (new_parent->CausesFosterParenting()) {
821     FosterParent(child->GetNode());
822     return;
823   }
824 
825   HTMLConstructionSiteTask task(
826       HTMLConstructionSiteTask::kInsertAlreadyParsedChild);
827   task.parent = new_parent->GetNode();
828   task.child = child->GetNode();
829   QueueTask(task);
830 }
831 
TakeAllChildren(HTMLStackItem * new_parent,HTMLElementStack::ElementRecord * old_parent)832 void HTMLConstructionSite::TakeAllChildren(
833     HTMLStackItem* new_parent,
834     HTMLElementStack::ElementRecord* old_parent) {
835   HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kTakeAllChildren);
836   task.parent = new_parent->GetNode();
837   task.child = old_parent->GetNode();
838   QueueTask(task);
839 }
840 
GetCreateElementFlags() const841 CreateElementFlags HTMLConstructionSite::GetCreateElementFlags() const {
842   return is_parsing_fragment_ ? CreateElementFlags::ByFragmentParser()
843                               : CreateElementFlags::ByParser();
844 }
845 
OwnerDocumentForCurrentNode()846 Document& HTMLConstructionSite::OwnerDocumentForCurrentNode() {
847   if (auto* template_element = DynamicTo<HTMLTemplateElement>(*CurrentNode()))
848     return template_element->content()->GetDocument();
849   return CurrentNode()->GetDocument();
850 }
851 
852 // "look up a custom element definition" for a token
853 // https://html.spec.whatwg.org/C/#look-up-a-custom-element-definition
LookUpCustomElementDefinition(Document & document,const QualifiedName & tag_name,const AtomicString & is)854 CustomElementDefinition* HTMLConstructionSite::LookUpCustomElementDefinition(
855     Document& document,
856     const QualifiedName& tag_name,
857     const AtomicString& is) {
858   // "1. If namespace is not the HTML namespace, return null."
859   if (tag_name.NamespaceURI() != html_names::xhtmlNamespaceURI)
860     return nullptr;
861 
862   // "2. If document does not have a browsing context, return null."
863   LocalDOMWindow* window = document.ExecutingWindow();
864   if (!window)
865     return nullptr;
866 
867   // "3. Let registry be document's browsing context's Window's
868   // CustomElementRegistry object."
869   CustomElementRegistry* registry = window->MaybeCustomElements();
870   if (!registry)
871     return nullptr;
872 
873   const AtomicString& local_name = tag_name.LocalName();
874   const AtomicString& name = !is.IsNull() ? is : local_name;
875   CustomElementDescriptor descriptor(name, local_name);
876 
877   // 4.-6.
878   return registry->DefinitionFor(descriptor);
879 }
880 
881 // "create an element for a token"
882 // https://html.spec.whatwg.org/C/#create-an-element-for-the-token
CreateElement(AtomicHTMLToken * token,const AtomicString & namespace_uri)883 Element* HTMLConstructionSite::CreateElement(
884     AtomicHTMLToken* token,
885     const AtomicString& namespace_uri) {
886   // "1. Let document be intended parent's node document."
887   Document& document = OwnerDocumentForCurrentNode();
888 
889   // "2. Let local name be the tag name of the token."
890   QualifiedName tag_name(g_null_atom, token->GetName(), namespace_uri);
891   // "3. Let is be the value of the "is" attribute in the given token ..." etc.
892   const Attribute* is_attribute = token->GetAttributeItem(html_names::kIsAttr);
893   const AtomicString& is = is_attribute ? is_attribute->Value() : g_null_atom;
894   // "4. Let definition be the result of looking up a custom element ..." etc.
895   auto* definition = LookUpCustomElementDefinition(document, tag_name, is);
896   // "5. If definition is non-null and the parser was not originally created
897   // for the HTML fragment parsing algorithm, then let will execute script
898   // be true."
899   bool will_execute_script = definition && !is_parsing_fragment_;
900 
901   Element* element;
902 
903   if (will_execute_script) {
904     // "6.1 Increment the document's throw-on-dynamic-insertion counter."
905     ThrowOnDynamicMarkupInsertionCountIncrementer
906         throw_on_dynamic_markup_insertions(&document);
907 
908     // "6.2 If the JavaScript execution context stack is empty,
909     // then perform a microtask checkpoint."
910 
911     // TODO(dominicc): This is the way the Blink HTML parser performs
912     // checkpoints, but note the spec is different--it talks about the
913     // JavaScript stack, not the script nesting level.
914     if (0u == reentry_permit_->ScriptNestingLevel())
915       Microtask::PerformCheckpoint(V8PerIsolateData::MainThreadIsolate());
916 
917     // "6.3 Push a new element queue onto the custom element
918     // reactions stack."
919     CEReactionsScope reactions;
920 
921     // "7. Let element be the result of creating an element given document,
922     // localName, given namespace, null, and is. If will execute script is true,
923     // set the synchronous custom elements flag; otherwise, leave it unset."
924     // TODO(crbug.com/1080673): We clear the CreatedbyParser flag here, so that
925     // elements get fully constructed. Some elements (e.g. HTMLInputElement)
926     // only partially construct themselves when created by the parser, but since
927     // this is a custom element, we need a fully-constructed element here.
928     element = definition->CreateElement(
929         document, tag_name, GetCreateElementFlags().SetCreatedByParser(false));
930 
931     // "8. Append each attribute in the given token to element." We don't use
932     // setAttributes here because the custom element constructor may have
933     // manipulated attributes.
934     for (const auto& attribute : token->Attributes())
935       element->setAttribute(attribute.GetName(), attribute.Value());
936 
937     // "9. If will execute script is true, then ..." etc. The CEReactionsScope
938     // and ThrowOnDynamicMarkupInsertionCountIncrementer destructors implement
939     // steps 9.1-3.
940   } else {
941     if (definition) {
942       DCHECK(GetCreateElementFlags().IsAsyncCustomElements());
943       element = definition->CreateElement(document, tag_name,
944                                           GetCreateElementFlags());
945     } else {
946       element = CustomElement::CreateUncustomizedOrUndefinedElement(
947           document, tag_name, GetCreateElementFlags(), is);
948     }
949     // Definition for the created element does not exist here and it cannot be
950     // custom or failed.
951     DCHECK_NE(element->GetCustomElementState(), CustomElementState::kCustom);
952     DCHECK_NE(element->GetCustomElementState(), CustomElementState::kFailed);
953 
954     // TODO(dominicc): Move these steps so they happen for custom
955     // elements as well as built-in elements when customized built in
956     // elements are implemented for resettable, listed elements.
957 
958     // 10. If element has an xmlns attribute in the XMLNS namespace
959     // whose value is not exactly the same as the element's namespace,
960     // that is a parse error. Similarly, if element has an xmlns:xlink
961     // attribute in the XMLNS namespace whose value is not the XLink
962     // Namespace, that is a parse error.
963 
964     // TODO(dominicc): Implement step 10 when the HTML parser does
965     // something useful with parse errors.
966 
967     // 11. If element is a resettable element, invoke its reset
968     // algorithm. (This initializes the element's value and
969     // checkedness based on the element's attributes.)
970     // TODO(dominicc): Implement step 11, resettable elements.
971 
972     // 12. If element is a form-associated element, and the form
973     // element pointer is not null, and there is no template element
974     // on the stack of open elements, ...
975     auto* html_element = DynamicTo<HTMLElement>(element);
976     FormAssociated* form_associated_element =
977         html_element ? html_element->ToFormAssociatedOrNull() : nullptr;
978     if (form_associated_element && document.GetFrame() && form_.Get()) {
979       // ... and element is either not listed or doesn't have a form
980       // attribute, and the intended parent is in the same tree as the
981       // element pointed to by the form element pointer, associate
982       // element with the form element pointed to by the form element
983       // pointer, and suppress the running of the reset the form owner
984       // algorithm when the parser subsequently attempts to insert the
985       // element.
986 
987       // TODO(dominicc): There are many differences to the spec here;
988       // some of them are observable:
989       //
990       // - The HTML spec tracks whether there is a template element on
991       //   the stack both for manipulating the form element pointer
992       //   and using it here.
993       // - FormAssociated::AssociateWith implementations don't do the
994       //   "same tree" check; for example
995       //   HTMLImageElement::AssociateWith just checks whether the form
996       //   is in *a* tree. This check should be done here consistently.
997       // - ListedElement is a mixin; add IsListedElement and skip
998       //   setting the form for listed attributes with form=. Instead
999       //   we set attributes (step 8) out of order, after this step,
1000       //   to reset the form association.
1001       form_associated_element->AssociateWith(form_.Get());
1002     }
1003     // "8. Append each attribute in the given token to element."
1004     SetAttributes(element, token, parser_content_policy_);
1005   }
1006 
1007   return element;
1008 }
1009 
CreateElementFromSavedToken(HTMLStackItem * item)1010 HTMLStackItem* HTMLConstructionSite::CreateElementFromSavedToken(
1011     HTMLStackItem* item) {
1012   Element* element;
1013   // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
1014   AtomicHTMLToken fake_token(HTMLToken::kStartTag, item->LocalName(),
1015                              item->Attributes());
1016   element = CreateElement(&fake_token, item->NamespaceURI());
1017   return MakeGarbageCollected<HTMLStackItem>(element, &fake_token,
1018                                              item->NamespaceURI());
1019 }
1020 
IndexOfFirstUnopenFormattingElement(unsigned & first_unopen_element_index) const1021 bool HTMLConstructionSite::IndexOfFirstUnopenFormattingElement(
1022     unsigned& first_unopen_element_index) const {
1023   if (active_formatting_elements_.IsEmpty())
1024     return false;
1025   unsigned index = active_formatting_elements_.size();
1026   do {
1027     --index;
1028     const HTMLFormattingElementList::Entry& entry =
1029         active_formatting_elements_.at(index);
1030     if (entry.IsMarker() || open_elements_.Contains(entry.GetElement())) {
1031       first_unopen_element_index = index + 1;
1032       return first_unopen_element_index < active_formatting_elements_.size();
1033     }
1034   } while (index);
1035   first_unopen_element_index = index;
1036   return true;
1037 }
1038 
ReconstructTheActiveFormattingElements()1039 void HTMLConstructionSite::ReconstructTheActiveFormattingElements() {
1040   unsigned first_unopen_element_index;
1041   if (!IndexOfFirstUnopenFormattingElement(first_unopen_element_index))
1042     return;
1043 
1044   unsigned unopen_entry_index = first_unopen_element_index;
1045   DCHECK_LT(unopen_entry_index, active_formatting_elements_.size());
1046   for (; unopen_entry_index < active_formatting_elements_.size();
1047        ++unopen_entry_index) {
1048     HTMLFormattingElementList::Entry& unopened_entry =
1049         active_formatting_elements_.at(unopen_entry_index);
1050     HTMLStackItem* reconstructed =
1051         CreateElementFromSavedToken(unopened_entry.StackItem());
1052     AttachLater(CurrentNode(), reconstructed->GetNode());
1053     open_elements_.Push(reconstructed);
1054     unopened_entry.ReplaceElement(reconstructed);
1055   }
1056 }
1057 
GenerateImpliedEndTagsWithExclusion(const AtomicString & tag_name)1058 void HTMLConstructionSite::GenerateImpliedEndTagsWithExclusion(
1059     const AtomicString& tag_name) {
1060   while (HasImpliedEndTag(CurrentStackItem()) &&
1061          !CurrentStackItem()->MatchesHTMLTag(tag_name))
1062     open_elements_.Pop();
1063 }
1064 
GenerateImpliedEndTags()1065 void HTMLConstructionSite::GenerateImpliedEndTags() {
1066   while (HasImpliedEndTag(CurrentStackItem()))
1067     open_elements_.Pop();
1068 }
1069 
InQuirksMode()1070 bool HTMLConstructionSite::InQuirksMode() {
1071   return in_quirks_mode_;
1072 }
1073 
1074 // Adjusts |task| to match the "adjusted insertion location" determined by the
1075 // foster parenting algorithm, laid out as the substeps of step 2 of
1076 // https://html.spec.whatwg.org/C/#appropriate-place-for-inserting-a-node
FindFosterSite(HTMLConstructionSiteTask & task)1077 void HTMLConstructionSite::FindFosterSite(HTMLConstructionSiteTask& task) {
1078   // 2.1
1079   HTMLElementStack::ElementRecord* last_template =
1080       open_elements_.Topmost(html_names::kTemplateTag.LocalName());
1081 
1082   // 2.2
1083   HTMLElementStack::ElementRecord* last_table =
1084       open_elements_.Topmost(html_names::kTableTag.LocalName());
1085 
1086   // 2.3
1087   if (last_template && (!last_table || last_template->IsAbove(last_table))) {
1088     task.parent = last_template->GetElement();
1089     return;
1090   }
1091 
1092   // 2.4
1093   if (!last_table) {
1094     // Fragment case
1095     task.parent = open_elements_.RootNode();  // DocumentFragment
1096     return;
1097   }
1098 
1099   // 2.5
1100   if (ContainerNode* parent = last_table->GetElement()->parentNode()) {
1101     task.parent = parent;
1102     task.next_child = last_table->GetElement();
1103     return;
1104   }
1105 
1106   // 2.6, 2.7
1107   task.parent = last_table->Next()->GetElement();
1108 }
1109 
ShouldFosterParent() const1110 bool HTMLConstructionSite::ShouldFosterParent() const {
1111   return redirect_attach_to_foster_parent_ &&
1112          CurrentStackItem()->IsElementNode() &&
1113          CurrentStackItem()->CausesFosterParenting();
1114 }
1115 
FosterParent(Node * node)1116 void HTMLConstructionSite::FosterParent(Node* node) {
1117   HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kInsert);
1118   FindFosterSite(task);
1119   task.child = node;
1120   DCHECK(task.parent);
1121   QueueTask(task);
1122 }
1123 
Trace(Visitor * visitor)1124 void HTMLConstructionSite::PendingText::Trace(Visitor* visitor) {
1125   visitor->Trace(parent);
1126   visitor->Trace(next_child);
1127 }
1128 
1129 }  // namespace blink
1130