1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "third_party/blink/renderer/core/html/parser/html_construction_site.h"
28
29 #include <limits>
30 #include "third_party/blink/renderer/core/dom/comment.h"
31 #include "third_party/blink/renderer/core/dom/document_fragment.h"
32 #include "third_party/blink/renderer/core/dom/document_type.h"
33 #include "third_party/blink/renderer/core/dom/element.h"
34 #include "third_party/blink/renderer/core/dom/element_traversal.h"
35 #include "third_party/blink/renderer/core/dom/node.h"
36 #include "third_party/blink/renderer/core/dom/template_content_document_fragment.h"
37 #include "third_party/blink/renderer/core/dom/text.h"
38 #include "third_party/blink/renderer/core/dom/throw_on_dynamic_markup_insertion_count_incrementer.h"
39 #include "third_party/blink/renderer/core/frame/local_dom_window.h"
40 #include "third_party/blink/renderer/core/frame/local_frame.h"
41 #include "third_party/blink/renderer/core/frame/local_frame_client.h"
42 #include "third_party/blink/renderer/core/html/custom/ce_reactions_scope.h"
43 #include "third_party/blink/renderer/core/html/custom/custom_element.h"
44 #include "third_party/blink/renderer/core/html/custom/custom_element_definition.h"
45 #include "third_party/blink/renderer/core/html/custom/custom_element_descriptor.h"
46 #include "third_party/blink/renderer/core/html/custom/custom_element_registry.h"
47 #include "third_party/blink/renderer/core/html/forms/form_associated.h"
48 #include "third_party/blink/renderer/core/html/forms/html_form_element.h"
49 #include "third_party/blink/renderer/core/html/html_html_element.h"
50 #include "third_party/blink/renderer/core/html/html_plugin_element.h"
51 #include "third_party/blink/renderer/core/html/html_script_element.h"
52 #include "third_party/blink/renderer/core/html/html_style_element.h"
53 #include "third_party/blink/renderer/core/html/html_template_element.h"
54 #include "third_party/blink/renderer/core/html/parser/atomic_html_token.h"
55 #include "third_party/blink/renderer/core/html/parser/html_parser_idioms.h"
56 #include "third_party/blink/renderer/core/html/parser/html_parser_reentry_permit.h"
57 #include "third_party/blink/renderer/core/html/parser/html_stack_item.h"
58 #include "third_party/blink/renderer/core/html/parser/html_token.h"
59 #include "third_party/blink/renderer/core/html_element_factory.h"
60 #include "third_party/blink/renderer/core/html_names.h"
61 #include "third_party/blink/renderer/core/loader/frame_loader.h"
62 #include "third_party/blink/renderer/core/script/ignore_destructive_write_count_incrementer.h"
63 #include "third_party/blink/renderer/core/svg/svg_script_element.h"
64 #include "third_party/blink/renderer/platform/bindings/microtask.h"
65 #include "third_party/blink/renderer/platform/bindings/v8_per_isolate_data.h"
66 #include "third_party/blink/renderer/platform/heap/heap.h"
67 #include "third_party/blink/renderer/platform/instrumentation/use_counter.h"
68 #include "third_party/blink/renderer/platform/text/text_break_iterator.h"
69
70 namespace blink {
71
72 static const unsigned kMaximumHTMLParserDOMTreeDepth = 512;
73
SetAttributes(Element * element,AtomicHTMLToken * token,ParserContentPolicy parser_content_policy)74 static inline void SetAttributes(Element* element,
75 AtomicHTMLToken* token,
76 ParserContentPolicy parser_content_policy) {
77 if (!ScriptingContentIsAllowed(parser_content_policy))
78 element->StripScriptingAttributes(token->Attributes());
79 element->ParserSetAttributes(token->Attributes());
80 if (token->HasDuplicateAttribute()) {
81 UseCounter::Count(element->GetDocument(), WebFeature::kDuplicatedAttribute);
82 element->SetHasDuplicateAttributes();
83 }
84 }
85
HasImpliedEndTag(const HTMLStackItem * item)86 static bool HasImpliedEndTag(const HTMLStackItem* item) {
87 return item->HasTagName(html_names::kDdTag) ||
88 item->HasTagName(html_names::kDtTag) ||
89 item->HasTagName(html_names::kLiTag) ||
90 item->HasTagName(html_names::kOptionTag) ||
91 item->HasTagName(html_names::kOptgroupTag) ||
92 item->HasTagName(html_names::kPTag) ||
93 item->HasTagName(html_names::kRbTag) ||
94 item->HasTagName(html_names::kRpTag) ||
95 item->HasTagName(html_names::kRtTag) ||
96 item->HasTagName(html_names::kRTCTag);
97 }
98
ShouldUseLengthLimit(const ContainerNode & node)99 static bool ShouldUseLengthLimit(const ContainerNode& node) {
100 return !IsA<HTMLScriptElement>(node) && !IsA<HTMLStyleElement>(node) &&
101 !IsA<SVGScriptElement>(node);
102 }
103
TextLengthLimitForContainer(const ContainerNode & node)104 static unsigned TextLengthLimitForContainer(const ContainerNode& node) {
105 return ShouldUseLengthLimit(node) ? Text::kDefaultLengthLimit
106 : std::numeric_limits<unsigned>::max();
107 }
108
IsAllWhitespace(const String & string)109 static inline bool IsAllWhitespace(const String& string) {
110 return string.IsAllSpecialCharacters<IsHTMLSpace<UChar>>();
111 }
112
Insert(HTMLConstructionSiteTask & task)113 static inline void Insert(HTMLConstructionSiteTask& task) {
114 if (auto* template_element = DynamicTo<HTMLTemplateElement>(*task.parent))
115 task.parent = template_element->content();
116
117 // https://html.spec.whatwg.org/C/#insert-a-foreign-element
118 // 3.1, (3) Push (pop) an element queue
119 CEReactionsScope reactions;
120 if (task.next_child)
121 task.parent->ParserInsertBefore(task.child.Get(), *task.next_child);
122 else
123 task.parent->ParserAppendChild(task.child.Get());
124 }
125
ExecuteInsertTask(HTMLConstructionSiteTask & task)126 static inline void ExecuteInsertTask(HTMLConstructionSiteTask& task) {
127 DCHECK_EQ(task.operation, HTMLConstructionSiteTask::kInsert);
128
129 Insert(task);
130 if (auto* child = DynamicTo<Element>(task.child.Get())) {
131 child->BeginParsingChildren();
132 if (task.self_closing)
133 child->FinishParsingChildren();
134 }
135 }
136
ExecuteInsertTextTask(HTMLConstructionSiteTask & task)137 static inline void ExecuteInsertTextTask(HTMLConstructionSiteTask& task) {
138 DCHECK_EQ(task.operation, HTMLConstructionSiteTask::kInsertText);
139
140 // Merge text nodes into previous ones if possible:
141 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#insert-a-character
142 auto* new_text = To<Text>(task.child.Get());
143 Node* previous_child = task.next_child ? task.next_child->previousSibling()
144 : task.parent->lastChild();
145 if (auto* previous_text = DynamicTo<Text>(previous_child)) {
146 unsigned length_limit = TextLengthLimitForContainer(*task.parent);
147 if (previous_text->length() + new_text->length() < length_limit) {
148 previous_text->ParserAppendData(new_text->data());
149 return;
150 }
151 }
152
153 Insert(task);
154 }
155
ExecuteReparentTask(HTMLConstructionSiteTask & task)156 static inline void ExecuteReparentTask(HTMLConstructionSiteTask& task) {
157 DCHECK_EQ(task.operation, HTMLConstructionSiteTask::kReparent);
158
159 task.parent->ParserAppendChild(task.child);
160 }
161
ExecuteInsertAlreadyParsedChildTask(HTMLConstructionSiteTask & task)162 static inline void ExecuteInsertAlreadyParsedChildTask(
163 HTMLConstructionSiteTask& task) {
164 DCHECK_EQ(task.operation,
165 HTMLConstructionSiteTask::kInsertAlreadyParsedChild);
166
167 Insert(task);
168 }
169
ExecuteTakeAllChildrenTask(HTMLConstructionSiteTask & task)170 static inline void ExecuteTakeAllChildrenTask(HTMLConstructionSiteTask& task) {
171 DCHECK_EQ(task.operation, HTMLConstructionSiteTask::kTakeAllChildren);
172
173 task.parent->ParserTakeAllChildrenFrom(*task.OldParent());
174 }
175
ExecuteTask(HTMLConstructionSiteTask & task)176 void HTMLConstructionSite::ExecuteTask(HTMLConstructionSiteTask& task) {
177 DCHECK(task_queue_.IsEmpty());
178 if (task.operation == HTMLConstructionSiteTask::kInsert)
179 return ExecuteInsertTask(task);
180
181 if (task.operation == HTMLConstructionSiteTask::kInsertText)
182 return ExecuteInsertTextTask(task);
183
184 // All the cases below this point are only used by the adoption agency.
185
186 if (task.operation == HTMLConstructionSiteTask::kInsertAlreadyParsedChild)
187 return ExecuteInsertAlreadyParsedChildTask(task);
188
189 if (task.operation == HTMLConstructionSiteTask::kReparent)
190 return ExecuteReparentTask(task);
191
192 if (task.operation == HTMLConstructionSiteTask::kTakeAllChildren)
193 return ExecuteTakeAllChildrenTask(task);
194
195 NOTREACHED();
196 }
197
198 // This is only needed for TextDocuments where we might have text nodes
199 // approaching the default length limit (~64k) and we don't want to break a text
200 // node in the middle of a combining character.
FindBreakIndexBetween(const StringBuilder & string,unsigned current_position,unsigned proposed_break_index)201 static unsigned FindBreakIndexBetween(const StringBuilder& string,
202 unsigned current_position,
203 unsigned proposed_break_index) {
204 DCHECK_LT(current_position, proposed_break_index);
205 DCHECK_LE(proposed_break_index, string.length());
206 // The end of the string is always a valid break.
207 if (proposed_break_index == string.length())
208 return proposed_break_index;
209
210 // Latin-1 does not have breakable boundaries. If we ever moved to a different
211 // 8-bit encoding this could be wrong.
212 if (string.Is8Bit())
213 return proposed_break_index;
214
215 const UChar* break_search_characters =
216 string.Characters16() + current_position;
217 // We need at least two characters look-ahead to account for UTF-16
218 // surrogates, but can't search off the end of the buffer!
219 unsigned break_search_length =
220 std::min(proposed_break_index - current_position + 2,
221 string.length() - current_position);
222 NonSharedCharacterBreakIterator it(break_search_characters,
223 break_search_length);
224
225 if (it.IsBreak(proposed_break_index - current_position))
226 return proposed_break_index;
227
228 int adjusted_break_index_in_substring =
229 it.Preceding(proposed_break_index - current_position);
230 if (adjusted_break_index_in_substring > 0)
231 return current_position + adjusted_break_index_in_substring;
232 // We failed to find a breakable point, let the caller figure out what to do.
233 return 0;
234 }
235
AtomizeIfAllWhitespace(const String & string,WhitespaceMode whitespace_mode)236 static String AtomizeIfAllWhitespace(const String& string,
237 WhitespaceMode whitespace_mode) {
238 // Strings composed entirely of whitespace are likely to be repeated. Turn
239 // them into AtomicString so we share a single string for each.
240 if (whitespace_mode == kAllWhitespace ||
241 (whitespace_mode == kWhitespaceUnknown && IsAllWhitespace(string)))
242 return AtomicString(string).GetString();
243 return string;
244 }
245
FlushPendingText(FlushMode mode)246 void HTMLConstructionSite::FlushPendingText(FlushMode mode) {
247 if (pending_text_.IsEmpty())
248 return;
249
250 if (mode == kFlushIfAtTextLimit &&
251 !ShouldUseLengthLimit(*pending_text_.parent))
252 return;
253
254 PendingText pending_text;
255 // Hold onto the current pending text on the stack so that queueTask doesn't
256 // recurse infinitely.
257 pending_text_.Swap(pending_text);
258 DCHECK(pending_text_.IsEmpty());
259
260 // Splitting text nodes into smaller chunks contradicts HTML5 spec, but is
261 // necessary for performance, see:
262 // https://bugs.webkit.org/show_bug.cgi?id=55898
263 unsigned length_limit = TextLengthLimitForContainer(*pending_text.parent);
264
265 unsigned current_position = 0;
266 const StringBuilder& string = pending_text.string_builder;
267 while (current_position < string.length()) {
268 unsigned proposed_break_index =
269 std::min(current_position + length_limit, string.length());
270 unsigned break_index =
271 FindBreakIndexBetween(string, current_position, proposed_break_index);
272 DCHECK_LE(break_index, string.length());
273 String substring =
274 string.Substring(current_position, break_index - current_position);
275 substring = AtomizeIfAllWhitespace(substring, pending_text.whitespace_mode);
276
277 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kInsertText);
278 task.parent = pending_text.parent;
279 task.next_child = pending_text.next_child;
280 task.child = Text::Create(task.parent->GetDocument(), substring);
281 QueueTask(task);
282
283 DCHECK_GT(break_index, current_position);
284 DCHECK_EQ(break_index - current_position, substring.length());
285 DCHECK_EQ(To<Text>(task.child.Get())->length(), substring.length());
286 current_position = break_index;
287 }
288 }
289
QueueTask(const HTMLConstructionSiteTask & task)290 void HTMLConstructionSite::QueueTask(const HTMLConstructionSiteTask& task) {
291 FlushPendingText(kFlushAlways);
292 DCHECK(pending_text_.IsEmpty());
293 task_queue_.push_back(task);
294 }
295
AttachLater(ContainerNode * parent,Node * child,bool self_closing)296 void HTMLConstructionSite::AttachLater(ContainerNode* parent,
297 Node* child,
298 bool self_closing) {
299 auto* element = DynamicTo<Element>(child);
300 DCHECK(ScriptingContentIsAllowed(parser_content_policy_) || !element ||
301 !element->IsScriptElement());
302 DCHECK(PluginContentIsAllowed(parser_content_policy_) ||
303 !IsA<HTMLPlugInElement>(child));
304
305 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kInsert);
306 task.parent = parent;
307 task.child = child;
308 task.self_closing = self_closing;
309
310 if (ShouldFosterParent()) {
311 FosterParent(task.child);
312 return;
313 }
314
315 // Add as a sibling of the parent if we have reached the maximum depth
316 // allowed.
317 if (open_elements_.StackDepth() > kMaximumHTMLParserDOMTreeDepth &&
318 task.parent->parentNode())
319 task.parent = task.parent->parentNode();
320
321 DCHECK(task.parent);
322 QueueTask(task);
323 }
324
ExecuteQueuedTasks()325 void HTMLConstructionSite::ExecuteQueuedTasks() {
326 // This has no affect on pendingText, and we may have pendingText remaining
327 // after executing all other queued tasks.
328 const size_t size = task_queue_.size();
329 if (!size)
330 return;
331
332 // Fast path for when |size| is 1, which is the common case
333 if (size == 1) {
334 HTMLConstructionSiteTask task = task_queue_.front();
335 task_queue_.pop_back();
336 ExecuteTask(task);
337 return;
338 }
339
340 // Copy the task queue into a local variable in case executeTask re-enters the
341 // parser.
342 TaskQueue queue;
343 queue.swap(task_queue_);
344
345 for (auto& task : queue)
346 ExecuteTask(task);
347
348 // We might be detached now.
349 }
350
HTMLConstructionSite(HTMLParserReentryPermit * reentry_permit,Document & document,ParserContentPolicy parser_content_policy)351 HTMLConstructionSite::HTMLConstructionSite(
352 HTMLParserReentryPermit* reentry_permit,
353 Document& document,
354 ParserContentPolicy parser_content_policy)
355 : reentry_permit_(reentry_permit),
356 document_(&document),
357 attachment_root_(document),
358 parser_content_policy_(parser_content_policy),
359 is_parsing_fragment_(false),
360 redirect_attach_to_foster_parent_(false),
361 in_quirks_mode_(document.InQuirksMode()) {
362 DCHECK(document_->IsHTMLDocument() || document_->IsXHTMLDocument());
363 }
364
InitFragmentParsing(DocumentFragment * fragment,Element * context_element)365 void HTMLConstructionSite::InitFragmentParsing(DocumentFragment* fragment,
366 Element* context_element) {
367 DCHECK(context_element);
368 DCHECK_EQ(document_, &fragment->GetDocument());
369 DCHECK_EQ(in_quirks_mode_, fragment->GetDocument().InQuirksMode());
370 DCHECK(!is_parsing_fragment_);
371 DCHECK(!form_);
372
373 attachment_root_ = fragment;
374 is_parsing_fragment_ = true;
375
376 if (!context_element->GetDocument().IsTemplateDocument())
377 form_ = Traversal<HTMLFormElement>::FirstAncestorOrSelf(*context_element);
378 }
379
~HTMLConstructionSite()380 HTMLConstructionSite::~HTMLConstructionSite() {
381 // Depending on why we're being destroyed it might be OK to forget queued
382 // tasks, but currently we don't expect to.
383 DCHECK(task_queue_.IsEmpty());
384 // Currently we assume that text will never be the last token in the document
385 // and that we'll always queue some additional task to cause it to flush.
386 DCHECK(pending_text_.IsEmpty());
387 }
388
Trace(Visitor * visitor)389 void HTMLConstructionSite::Trace(Visitor* visitor) {
390 visitor->Trace(document_);
391 visitor->Trace(attachment_root_);
392 visitor->Trace(head_);
393 visitor->Trace(form_);
394 visitor->Trace(open_elements_);
395 visitor->Trace(active_formatting_elements_);
396 visitor->Trace(task_queue_);
397 visitor->Trace(pending_text_);
398 }
399
Detach()400 void HTMLConstructionSite::Detach() {
401 // FIXME: We'd like to ASSERT here that we're canceling and not just
402 // discarding text that really should have made it into the DOM earlier, but
403 // there doesn't seem to be a nice way to do that.
404 pending_text_.Discard();
405 document_ = nullptr;
406 attachment_root_ = nullptr;
407 }
408
TakeForm()409 HTMLFormElement* HTMLConstructionSite::TakeForm() {
410 return form_.Release();
411 }
412
InsertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken * token)413 void HTMLConstructionSite::InsertHTMLHtmlStartTagBeforeHTML(
414 AtomicHTMLToken* token) {
415 DCHECK(document_);
416 HTMLHtmlElement* element;
417 if (const auto* is_attribute = token->GetAttributeItem(html_names::kIsAttr)) {
418 element = To<HTMLHtmlElement>(document_->CreateElement(
419 html_names::kHTMLTag, GetCreateElementFlags(), is_attribute->Value()));
420 } else {
421 element = MakeGarbageCollected<HTMLHtmlElement>(*document_);
422 }
423 SetAttributes(element, token, parser_content_policy_);
424 AttachLater(attachment_root_, element);
425 open_elements_.PushHTMLHtmlElement(
426 MakeGarbageCollected<HTMLStackItem>(element, token));
427
428 ExecuteQueuedTasks();
429 element->InsertedByParser();
430 }
431
MergeAttributesFromTokenIntoElement(AtomicHTMLToken * token,Element * element)432 void HTMLConstructionSite::MergeAttributesFromTokenIntoElement(
433 AtomicHTMLToken* token,
434 Element* element) {
435 if (token->Attributes().IsEmpty())
436 return;
437
438 for (const auto& token_attribute : token->Attributes()) {
439 if (element->AttributesWithoutUpdate().FindIndex(
440 token_attribute.GetName()) == kNotFound)
441 element->setAttribute(token_attribute.GetName(), token_attribute.Value());
442 }
443 }
444
InsertHTMLHtmlStartTagInBody(AtomicHTMLToken * token)445 void HTMLConstructionSite::InsertHTMLHtmlStartTagInBody(
446 AtomicHTMLToken* token) {
447 // Fragments do not have a root HTML element, so any additional HTML elements
448 // encountered during fragment parsing should be ignored.
449 if (is_parsing_fragment_)
450 return;
451
452 MergeAttributesFromTokenIntoElement(token, open_elements_.HtmlElement());
453 }
454
InsertHTMLBodyStartTagInBody(AtomicHTMLToken * token)455 void HTMLConstructionSite::InsertHTMLBodyStartTagInBody(
456 AtomicHTMLToken* token) {
457 MergeAttributesFromTokenIntoElement(token, open_elements_.BodyElement());
458 }
459
SetDefaultCompatibilityMode()460 void HTMLConstructionSite::SetDefaultCompatibilityMode() {
461 if (is_parsing_fragment_)
462 return;
463 SetCompatibilityMode(Document::kQuirksMode);
464 }
465
SetCompatibilityMode(Document::CompatibilityMode mode)466 void HTMLConstructionSite::SetCompatibilityMode(
467 Document::CompatibilityMode mode) {
468 in_quirks_mode_ = (mode == Document::kQuirksMode);
469 document_->SetCompatibilityMode(mode);
470 }
471
SetCompatibilityModeFromDoctype(const String & name,const String & public_id,const String & system_id)472 void HTMLConstructionSite::SetCompatibilityModeFromDoctype(
473 const String& name,
474 const String& public_id,
475 const String& system_id) {
476 // There are three possible compatibility modes:
477 // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in
478 // this mode, e.g., unit types can be omitted from numbers.
479 // Limited Quirks - This mode is identical to no-quirks mode except for its
480 // treatment of line-height in the inline box model.
481 // No Quirks - no quirks apply. Web pages will obey the specifications to the
482 // letter.
483
484 // Check for Quirks Mode.
485 if (name != "html" ||
486 public_id.StartsWithIgnoringASCIICase(
487 "+//Silmaril//dtd html Pro v0r11 19970101//") ||
488 public_id.StartsWithIgnoringASCIICase(
489 "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//") ||
490 public_id.StartsWithIgnoringASCIICase(
491 "-//AS//DTD HTML 3.0 asWedit + extensions//") ||
492 public_id.StartsWithIgnoringASCIICase(
493 "-//IETF//DTD HTML 2.0 Level 1//") ||
494 public_id.StartsWithIgnoringASCIICase(
495 "-//IETF//DTD HTML 2.0 Level 2//") ||
496 public_id.StartsWithIgnoringASCIICase(
497 "-//IETF//DTD HTML 2.0 Strict Level 1//") ||
498 public_id.StartsWithIgnoringASCIICase(
499 "-//IETF//DTD HTML 2.0 Strict Level 2//") ||
500 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 2.0 Strict//") ||
501 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 2.0//") ||
502 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 2.1E//") ||
503 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 3.0//") ||
504 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 3.2 Final//") ||
505 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 3.2//") ||
506 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML 3//") ||
507 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML Level 0//") ||
508 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML Level 1//") ||
509 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML Level 2//") ||
510 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML Level 3//") ||
511 public_id.StartsWithIgnoringASCIICase(
512 "-//IETF//DTD HTML Strict Level 0//") ||
513 public_id.StartsWithIgnoringASCIICase(
514 "-//IETF//DTD HTML Strict Level 1//") ||
515 public_id.StartsWithIgnoringASCIICase(
516 "-//IETF//DTD HTML Strict Level 2//") ||
517 public_id.StartsWithIgnoringASCIICase(
518 "-//IETF//DTD HTML Strict Level 3//") ||
519 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML Strict//") ||
520 public_id.StartsWithIgnoringASCIICase("-//IETF//DTD HTML//") ||
521 public_id.StartsWithIgnoringASCIICase(
522 "-//Metrius//DTD Metrius Presentational//") ||
523 public_id.StartsWithIgnoringASCIICase(
524 "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//") ||
525 public_id.StartsWithIgnoringASCIICase(
526 "-//Microsoft//DTD Internet Explorer 2.0 HTML//") ||
527 public_id.StartsWithIgnoringASCIICase(
528 "-//Microsoft//DTD Internet Explorer 2.0 Tables//") ||
529 public_id.StartsWithIgnoringASCIICase(
530 "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//") ||
531 public_id.StartsWithIgnoringASCIICase(
532 "-//Microsoft//DTD Internet Explorer 3.0 HTML//") ||
533 public_id.StartsWithIgnoringASCIICase(
534 "-//Microsoft//DTD Internet Explorer 3.0 Tables//") ||
535 public_id.StartsWithIgnoringASCIICase(
536 "-//Netscape Comm. Corp.//DTD HTML//") ||
537 public_id.StartsWithIgnoringASCIICase(
538 "-//Netscape Comm. Corp.//DTD Strict HTML//") ||
539 public_id.StartsWithIgnoringASCIICase(
540 "-//O'Reilly and Associates//DTD HTML 2.0//") ||
541 public_id.StartsWithIgnoringASCIICase(
542 "-//O'Reilly and Associates//DTD HTML Extended 1.0//") ||
543 public_id.StartsWithIgnoringASCIICase(
544 "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//") ||
545 public_id.StartsWithIgnoringASCIICase(
546 "-//SoftQuad Software//DTD HoTMetaL PRO "
547 "6.0::19990601::extensions to HTML 4.0//") ||
548 public_id.StartsWithIgnoringASCIICase(
549 "-//SoftQuad//DTD HoTMetaL PRO "
550 "4.0::19971010::extensions to HTML 4.0//") ||
551 public_id.StartsWithIgnoringASCIICase(
552 "-//Spyglass//DTD HTML 2.0 Extended//") ||
553 public_id.StartsWithIgnoringASCIICase(
554 "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//") ||
555 public_id.StartsWithIgnoringASCIICase(
556 "-//Sun Microsystems Corp.//DTD HotJava HTML//") ||
557 public_id.StartsWithIgnoringASCIICase(
558 "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//") ||
559 public_id.StartsWithIgnoringASCIICase(
560 "-//W3C//DTD HTML 3 1995-03-24//") ||
561 public_id.StartsWithIgnoringASCIICase("-//W3C//DTD HTML 3.2 Draft//") ||
562 public_id.StartsWithIgnoringASCIICase("-//W3C//DTD HTML 3.2 Final//") ||
563 public_id.StartsWithIgnoringASCIICase("-//W3C//DTD HTML 3.2//") ||
564 public_id.StartsWithIgnoringASCIICase("-//W3C//DTD HTML 3.2S Draft//") ||
565 public_id.StartsWithIgnoringASCIICase(
566 "-//W3C//DTD HTML 4.0 Frameset//") ||
567 public_id.StartsWithIgnoringASCIICase(
568 "-//W3C//DTD HTML 4.0 Transitional//") ||
569 public_id.StartsWithIgnoringASCIICase(
570 "-//W3C//DTD HTML Experimental 19960712//") ||
571 public_id.StartsWithIgnoringASCIICase(
572 "-//W3C//DTD HTML Experimental 970421//") ||
573 public_id.StartsWithIgnoringASCIICase("-//W3C//DTD W3 HTML//") ||
574 public_id.StartsWithIgnoringASCIICase("-//W3O//DTD W3 HTML 3.0//") ||
575 EqualIgnoringASCIICase(public_id,
576 "-//W3O//DTD W3 HTML Strict 3.0//EN//") ||
577 public_id.StartsWithIgnoringASCIICase(
578 "-//WebTechs//DTD Mozilla HTML 2.0//") ||
579 public_id.StartsWithIgnoringASCIICase(
580 "-//WebTechs//DTD Mozilla HTML//") ||
581 EqualIgnoringASCIICase(public_id, "-/W3C/DTD HTML 4.0 Transitional/EN") ||
582 EqualIgnoringASCIICase(public_id, "HTML") ||
583 EqualIgnoringASCIICase(
584 system_id,
585 "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") ||
586 (system_id.IsEmpty() && public_id.StartsWithIgnoringASCIICase(
587 "-//W3C//DTD HTML 4.01 Frameset//")) ||
588 (system_id.IsEmpty() && public_id.StartsWithIgnoringASCIICase(
589 "-//W3C//DTD HTML 4.01 Transitional//"))) {
590 SetCompatibilityMode(Document::kQuirksMode);
591 return;
592 }
593
594 // Check for Limited Quirks Mode.
595 if (public_id.StartsWithIgnoringASCIICase(
596 "-//W3C//DTD XHTML 1.0 Frameset//") ||
597 public_id.StartsWithIgnoringASCIICase(
598 "-//W3C//DTD XHTML 1.0 Transitional//") ||
599 (!system_id.IsEmpty() && public_id.StartsWithIgnoringASCIICase(
600 "-//W3C//DTD HTML 4.01 Frameset//")) ||
601 (!system_id.IsEmpty() && public_id.StartsWithIgnoringASCIICase(
602 "-//W3C//DTD HTML 4.01 Transitional//"))) {
603 SetCompatibilityMode(Document::kLimitedQuirksMode);
604 return;
605 }
606
607 // Otherwise we are No Quirks Mode.
608 SetCompatibilityMode(Document::kNoQuirksMode);
609 }
610
ProcessEndOfFile()611 void HTMLConstructionSite::ProcessEndOfFile() {
612 DCHECK(CurrentNode());
613 Flush(kFlushAlways);
614 OpenElements()->PopAll();
615 }
616
FinishedParsing()617 void HTMLConstructionSite::FinishedParsing() {
618 // We shouldn't have any queued tasks but we might have pending text which we
619 // need to promote to tasks and execute.
620 DCHECK(task_queue_.IsEmpty());
621 Flush(kFlushAlways);
622 document_->FinishedParsing();
623 }
624
InsertDoctype(AtomicHTMLToken * token)625 void HTMLConstructionSite::InsertDoctype(AtomicHTMLToken* token) {
626 DCHECK_EQ(token->GetType(), HTMLToken::DOCTYPE);
627
628 const String& public_id =
629 StringImpl::Create8BitIfPossible(token->PublicIdentifier());
630 const String& system_id =
631 StringImpl::Create8BitIfPossible(token->SystemIdentifier());
632 auto* doctype = MakeGarbageCollected<DocumentType>(
633 document_, token->GetName(), public_id, system_id);
634 AttachLater(attachment_root_, doctype);
635
636 // DOCTYPE nodes are only processed when parsing fragments w/o
637 // contextElements, which never occurs. However, if we ever chose to support
638 // such, this code is subtly wrong, because context-less fragments can
639 // determine their own quirks mode, and thus change parsing rules (like <p>
640 // inside <table>). For now we ASSERT that we never hit this code in a
641 // fragment, as changing the owning document's compatibility mode would be
642 // wrong.
643 DCHECK(!is_parsing_fragment_);
644 if (is_parsing_fragment_)
645 return;
646
647 if (token->ForceQuirks())
648 SetCompatibilityMode(Document::kQuirksMode);
649 else {
650 SetCompatibilityModeFromDoctype(token->GetName(), public_id, system_id);
651 }
652 }
653
InsertComment(AtomicHTMLToken * token)654 void HTMLConstructionSite::InsertComment(AtomicHTMLToken* token) {
655 DCHECK_EQ(token->GetType(), HTMLToken::kComment);
656 AttachLater(CurrentNode(),
657 Comment::Create(OwnerDocumentForCurrentNode(), token->Comment()));
658 }
659
InsertCommentOnDocument(AtomicHTMLToken * token)660 void HTMLConstructionSite::InsertCommentOnDocument(AtomicHTMLToken* token) {
661 DCHECK_EQ(token->GetType(), HTMLToken::kComment);
662 DCHECK(document_);
663 AttachLater(attachment_root_, Comment::Create(*document_, token->Comment()));
664 }
665
InsertCommentOnHTMLHtmlElement(AtomicHTMLToken * token)666 void HTMLConstructionSite::InsertCommentOnHTMLHtmlElement(
667 AtomicHTMLToken* token) {
668 DCHECK_EQ(token->GetType(), HTMLToken::kComment);
669 ContainerNode* parent = open_elements_.RootNode();
670 AttachLater(parent, Comment::Create(parent->GetDocument(), token->Comment()));
671 }
672
InsertHTMLHeadElement(AtomicHTMLToken * token)673 void HTMLConstructionSite::InsertHTMLHeadElement(AtomicHTMLToken* token) {
674 DCHECK(!ShouldFosterParent());
675 head_ = MakeGarbageCollected<HTMLStackItem>(
676 CreateElement(token, html_names::xhtmlNamespaceURI), token);
677 AttachLater(CurrentNode(), head_->GetElement());
678 open_elements_.PushHTMLHeadElement(head_);
679 }
680
InsertHTMLBodyElement(AtomicHTMLToken * token)681 void HTMLConstructionSite::InsertHTMLBodyElement(AtomicHTMLToken* token) {
682 DCHECK(!ShouldFosterParent());
683 Element* body = CreateElement(token, html_names::xhtmlNamespaceURI);
684 AttachLater(CurrentNode(), body);
685 open_elements_.PushHTMLBodyElement(
686 MakeGarbageCollected<HTMLStackItem>(body, token));
687 if (document_)
688 document_->WillInsertBody();
689 }
690
InsertHTMLFormElement(AtomicHTMLToken * token,bool is_demoted)691 void HTMLConstructionSite::InsertHTMLFormElement(AtomicHTMLToken* token,
692 bool is_demoted) {
693 auto* form_element =
694 To<HTMLFormElement>(CreateElement(token, html_names::xhtmlNamespaceURI));
695 if (!OpenElements()->HasTemplateInHTMLScope())
696 form_ = form_element;
697 if (is_demoted) {
698 UseCounter::Count(OwnerDocumentForCurrentNode(),
699 WebFeature::kDemotedFormElement);
700 }
701 AttachLater(CurrentNode(), form_element);
702 open_elements_.Push(MakeGarbageCollected<HTMLStackItem>(form_element, token));
703 }
704
InsertHTMLElement(AtomicHTMLToken * token)705 void HTMLConstructionSite::InsertHTMLElement(AtomicHTMLToken* token) {
706 Element* element = CreateElement(token, html_names::xhtmlNamespaceURI);
707 AttachLater(CurrentNode(), element);
708 open_elements_.Push(MakeGarbageCollected<HTMLStackItem>(element, token));
709 }
710
InsertSelfClosingHTMLElementDestroyingToken(AtomicHTMLToken * token)711 void HTMLConstructionSite::InsertSelfClosingHTMLElementDestroyingToken(
712 AtomicHTMLToken* token) {
713 DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
714 // Normally HTMLElementStack is responsible for calling finishParsingChildren,
715 // but self-closing elements are never in the element stack so the stack
716 // doesn't get a chance to tell them that we're done parsing their children.
717 AttachLater(CurrentNode(),
718 CreateElement(token, html_names::xhtmlNamespaceURI), true);
719 // FIXME: Do we want to acknowledge the token's self-closing flag?
720 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
721 }
722
InsertFormattingElement(AtomicHTMLToken * token)723 void HTMLConstructionSite::InsertFormattingElement(AtomicHTMLToken* token) {
724 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
725 // Possible active formatting elements include:
726 // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
727 InsertHTMLElement(token);
728 active_formatting_elements_.Append(CurrentElementRecord()->StackItem());
729 }
730
InsertScriptElement(AtomicHTMLToken * token)731 void HTMLConstructionSite::InsertScriptElement(AtomicHTMLToken* token) {
732 CreateElementFlags flags;
733 flags
734 // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
735 // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
736 // For createContextualFragment, the specifications say to mark it
737 // parser-inserted and already-started and later unmark them. However, we
738 // short circuit that logic to avoid the subtree traversal to find script
739 // elements since scripts can never see those flags or effects thereof.
740 .SetCreatedByParser(parser_content_policy_ !=
741 kAllowScriptingContentAndDoNotMarkAlreadyStarted)
742 .SetAlreadyStarted(is_parsing_fragment_ && flags.IsCreatedByParser());
743 HTMLScriptElement* element = nullptr;
744 if (const auto* is_attribute = token->GetAttributeItem(html_names::kIsAttr)) {
745 element = To<HTMLScriptElement>(OwnerDocumentForCurrentNode().CreateElement(
746 html_names::kScriptTag, flags, is_attribute->Value()));
747 } else {
748 element = MakeGarbageCollected<HTMLScriptElement>(
749 OwnerDocumentForCurrentNode(), flags);
750 }
751 SetAttributes(element, token, parser_content_policy_);
752 if (ScriptingContentIsAllowed(parser_content_policy_))
753 AttachLater(CurrentNode(), element);
754 open_elements_.Push(MakeGarbageCollected<HTMLStackItem>(element, token));
755 }
756
InsertForeignElement(AtomicHTMLToken * token,const AtomicString & namespace_uri)757 void HTMLConstructionSite::InsertForeignElement(
758 AtomicHTMLToken* token,
759 const AtomicString& namespace_uri) {
760 DCHECK_EQ(token->GetType(), HTMLToken::kStartTag);
761 // parseError when xmlns or xmlns:xlink are wrong.
762 DVLOG(1) << "Not implemented.";
763
764 Element* element = CreateElement(token, namespace_uri);
765 if (ScriptingContentIsAllowed(parser_content_policy_) ||
766 !element->IsScriptElement()) {
767 AttachLater(CurrentNode(), element, token->SelfClosing());
768 }
769 if (!token->SelfClosing()) {
770 open_elements_.Push(
771 MakeGarbageCollected<HTMLStackItem>(element, token, namespace_uri));
772 }
773 }
774
InsertTextNode(const StringView & string,WhitespaceMode whitespace_mode)775 void HTMLConstructionSite::InsertTextNode(const StringView& string,
776 WhitespaceMode whitespace_mode) {
777 HTMLConstructionSiteTask dummy_task(HTMLConstructionSiteTask::kInsert);
778 dummy_task.parent = CurrentNode();
779
780 if (ShouldFosterParent())
781 FindFosterSite(dummy_task);
782
783 // FIXME: This probably doesn't need to be done both here and in insert(Task).
784 if (auto* template_element =
785 DynamicTo<HTMLTemplateElement>(*dummy_task.parent))
786 dummy_task.parent = template_element->content();
787
788 // Unclear when parent != case occurs. Somehow we insert text into two
789 // separate nodes while processing the same Token. The nextChild !=
790 // dummy.nextChild case occurs whenever foster parenting happened and we hit a
791 // new text node "<table>a</table>b" In either case we have to flush the
792 // pending text into the task queue before making more.
793 if (!pending_text_.IsEmpty() &&
794 (pending_text_.parent != dummy_task.parent ||
795 pending_text_.next_child != dummy_task.next_child))
796 FlushPendingText(kFlushAlways);
797 pending_text_.Append(dummy_task.parent, dummy_task.next_child, string,
798 whitespace_mode);
799 }
800
Reparent(HTMLElementStack::ElementRecord * new_parent,HTMLElementStack::ElementRecord * child)801 void HTMLConstructionSite::Reparent(HTMLElementStack::ElementRecord* new_parent,
802 HTMLElementStack::ElementRecord* child) {
803 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kReparent);
804 task.parent = new_parent->GetNode();
805 task.child = child->GetNode();
806 QueueTask(task);
807 }
808
Reparent(HTMLElementStack::ElementRecord * new_parent,HTMLStackItem * child)809 void HTMLConstructionSite::Reparent(HTMLElementStack::ElementRecord* new_parent,
810 HTMLStackItem* child) {
811 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kReparent);
812 task.parent = new_parent->GetNode();
813 task.child = child->GetNode();
814 QueueTask(task);
815 }
816
InsertAlreadyParsedChild(HTMLStackItem * new_parent,HTMLElementStack::ElementRecord * child)817 void HTMLConstructionSite::InsertAlreadyParsedChild(
818 HTMLStackItem* new_parent,
819 HTMLElementStack::ElementRecord* child) {
820 if (new_parent->CausesFosterParenting()) {
821 FosterParent(child->GetNode());
822 return;
823 }
824
825 HTMLConstructionSiteTask task(
826 HTMLConstructionSiteTask::kInsertAlreadyParsedChild);
827 task.parent = new_parent->GetNode();
828 task.child = child->GetNode();
829 QueueTask(task);
830 }
831
TakeAllChildren(HTMLStackItem * new_parent,HTMLElementStack::ElementRecord * old_parent)832 void HTMLConstructionSite::TakeAllChildren(
833 HTMLStackItem* new_parent,
834 HTMLElementStack::ElementRecord* old_parent) {
835 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kTakeAllChildren);
836 task.parent = new_parent->GetNode();
837 task.child = old_parent->GetNode();
838 QueueTask(task);
839 }
840
GetCreateElementFlags() const841 CreateElementFlags HTMLConstructionSite::GetCreateElementFlags() const {
842 return is_parsing_fragment_ ? CreateElementFlags::ByFragmentParser()
843 : CreateElementFlags::ByParser();
844 }
845
OwnerDocumentForCurrentNode()846 Document& HTMLConstructionSite::OwnerDocumentForCurrentNode() {
847 if (auto* template_element = DynamicTo<HTMLTemplateElement>(*CurrentNode()))
848 return template_element->content()->GetDocument();
849 return CurrentNode()->GetDocument();
850 }
851
852 // "look up a custom element definition" for a token
853 // https://html.spec.whatwg.org/C/#look-up-a-custom-element-definition
LookUpCustomElementDefinition(Document & document,const QualifiedName & tag_name,const AtomicString & is)854 CustomElementDefinition* HTMLConstructionSite::LookUpCustomElementDefinition(
855 Document& document,
856 const QualifiedName& tag_name,
857 const AtomicString& is) {
858 // "1. If namespace is not the HTML namespace, return null."
859 if (tag_name.NamespaceURI() != html_names::xhtmlNamespaceURI)
860 return nullptr;
861
862 // "2. If document does not have a browsing context, return null."
863 LocalDOMWindow* window = document.ExecutingWindow();
864 if (!window)
865 return nullptr;
866
867 // "3. Let registry be document's browsing context's Window's
868 // CustomElementRegistry object."
869 CustomElementRegistry* registry = window->MaybeCustomElements();
870 if (!registry)
871 return nullptr;
872
873 const AtomicString& local_name = tag_name.LocalName();
874 const AtomicString& name = !is.IsNull() ? is : local_name;
875 CustomElementDescriptor descriptor(name, local_name);
876
877 // 4.-6.
878 return registry->DefinitionFor(descriptor);
879 }
880
881 // "create an element for a token"
882 // https://html.spec.whatwg.org/C/#create-an-element-for-the-token
CreateElement(AtomicHTMLToken * token,const AtomicString & namespace_uri)883 Element* HTMLConstructionSite::CreateElement(
884 AtomicHTMLToken* token,
885 const AtomicString& namespace_uri) {
886 // "1. Let document be intended parent's node document."
887 Document& document = OwnerDocumentForCurrentNode();
888
889 // "2. Let local name be the tag name of the token."
890 QualifiedName tag_name(g_null_atom, token->GetName(), namespace_uri);
891 // "3. Let is be the value of the "is" attribute in the given token ..." etc.
892 const Attribute* is_attribute = token->GetAttributeItem(html_names::kIsAttr);
893 const AtomicString& is = is_attribute ? is_attribute->Value() : g_null_atom;
894 // "4. Let definition be the result of looking up a custom element ..." etc.
895 auto* definition = LookUpCustomElementDefinition(document, tag_name, is);
896 // "5. If definition is non-null and the parser was not originally created
897 // for the HTML fragment parsing algorithm, then let will execute script
898 // be true."
899 bool will_execute_script = definition && !is_parsing_fragment_;
900
901 Element* element;
902
903 if (will_execute_script) {
904 // "6.1 Increment the document's throw-on-dynamic-insertion counter."
905 ThrowOnDynamicMarkupInsertionCountIncrementer
906 throw_on_dynamic_markup_insertions(&document);
907
908 // "6.2 If the JavaScript execution context stack is empty,
909 // then perform a microtask checkpoint."
910
911 // TODO(dominicc): This is the way the Blink HTML parser performs
912 // checkpoints, but note the spec is different--it talks about the
913 // JavaScript stack, not the script nesting level.
914 if (0u == reentry_permit_->ScriptNestingLevel())
915 Microtask::PerformCheckpoint(V8PerIsolateData::MainThreadIsolate());
916
917 // "6.3 Push a new element queue onto the custom element
918 // reactions stack."
919 CEReactionsScope reactions;
920
921 // "7. Let element be the result of creating an element given document,
922 // localName, given namespace, null, and is. If will execute script is true,
923 // set the synchronous custom elements flag; otherwise, leave it unset."
924 // TODO(crbug.com/1080673): We clear the CreatedbyParser flag here, so that
925 // elements get fully constructed. Some elements (e.g. HTMLInputElement)
926 // only partially construct themselves when created by the parser, but since
927 // this is a custom element, we need a fully-constructed element here.
928 element = definition->CreateElement(
929 document, tag_name, GetCreateElementFlags().SetCreatedByParser(false));
930
931 // "8. Append each attribute in the given token to element." We don't use
932 // setAttributes here because the custom element constructor may have
933 // manipulated attributes.
934 for (const auto& attribute : token->Attributes())
935 element->setAttribute(attribute.GetName(), attribute.Value());
936
937 // "9. If will execute script is true, then ..." etc. The CEReactionsScope
938 // and ThrowOnDynamicMarkupInsertionCountIncrementer destructors implement
939 // steps 9.1-3.
940 } else {
941 if (definition) {
942 DCHECK(GetCreateElementFlags().IsAsyncCustomElements());
943 element = definition->CreateElement(document, tag_name,
944 GetCreateElementFlags());
945 } else {
946 element = CustomElement::CreateUncustomizedOrUndefinedElement(
947 document, tag_name, GetCreateElementFlags(), is);
948 }
949 // Definition for the created element does not exist here and it cannot be
950 // custom or failed.
951 DCHECK_NE(element->GetCustomElementState(), CustomElementState::kCustom);
952 DCHECK_NE(element->GetCustomElementState(), CustomElementState::kFailed);
953
954 // TODO(dominicc): Move these steps so they happen for custom
955 // elements as well as built-in elements when customized built in
956 // elements are implemented for resettable, listed elements.
957
958 // 10. If element has an xmlns attribute in the XMLNS namespace
959 // whose value is not exactly the same as the element's namespace,
960 // that is a parse error. Similarly, if element has an xmlns:xlink
961 // attribute in the XMLNS namespace whose value is not the XLink
962 // Namespace, that is a parse error.
963
964 // TODO(dominicc): Implement step 10 when the HTML parser does
965 // something useful with parse errors.
966
967 // 11. If element is a resettable element, invoke its reset
968 // algorithm. (This initializes the element's value and
969 // checkedness based on the element's attributes.)
970 // TODO(dominicc): Implement step 11, resettable elements.
971
972 // 12. If element is a form-associated element, and the form
973 // element pointer is not null, and there is no template element
974 // on the stack of open elements, ...
975 auto* html_element = DynamicTo<HTMLElement>(element);
976 FormAssociated* form_associated_element =
977 html_element ? html_element->ToFormAssociatedOrNull() : nullptr;
978 if (form_associated_element && document.GetFrame() && form_.Get()) {
979 // ... and element is either not listed or doesn't have a form
980 // attribute, and the intended parent is in the same tree as the
981 // element pointed to by the form element pointer, associate
982 // element with the form element pointed to by the form element
983 // pointer, and suppress the running of the reset the form owner
984 // algorithm when the parser subsequently attempts to insert the
985 // element.
986
987 // TODO(dominicc): There are many differences to the spec here;
988 // some of them are observable:
989 //
990 // - The HTML spec tracks whether there is a template element on
991 // the stack both for manipulating the form element pointer
992 // and using it here.
993 // - FormAssociated::AssociateWith implementations don't do the
994 // "same tree" check; for example
995 // HTMLImageElement::AssociateWith just checks whether the form
996 // is in *a* tree. This check should be done here consistently.
997 // - ListedElement is a mixin; add IsListedElement and skip
998 // setting the form for listed attributes with form=. Instead
999 // we set attributes (step 8) out of order, after this step,
1000 // to reset the form association.
1001 form_associated_element->AssociateWith(form_.Get());
1002 }
1003 // "8. Append each attribute in the given token to element."
1004 SetAttributes(element, token, parser_content_policy_);
1005 }
1006
1007 return element;
1008 }
1009
CreateElementFromSavedToken(HTMLStackItem * item)1010 HTMLStackItem* HTMLConstructionSite::CreateElementFromSavedToken(
1011 HTMLStackItem* item) {
1012 Element* element;
1013 // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
1014 AtomicHTMLToken fake_token(HTMLToken::kStartTag, item->LocalName(),
1015 item->Attributes());
1016 element = CreateElement(&fake_token, item->NamespaceURI());
1017 return MakeGarbageCollected<HTMLStackItem>(element, &fake_token,
1018 item->NamespaceURI());
1019 }
1020
IndexOfFirstUnopenFormattingElement(unsigned & first_unopen_element_index) const1021 bool HTMLConstructionSite::IndexOfFirstUnopenFormattingElement(
1022 unsigned& first_unopen_element_index) const {
1023 if (active_formatting_elements_.IsEmpty())
1024 return false;
1025 unsigned index = active_formatting_elements_.size();
1026 do {
1027 --index;
1028 const HTMLFormattingElementList::Entry& entry =
1029 active_formatting_elements_.at(index);
1030 if (entry.IsMarker() || open_elements_.Contains(entry.GetElement())) {
1031 first_unopen_element_index = index + 1;
1032 return first_unopen_element_index < active_formatting_elements_.size();
1033 }
1034 } while (index);
1035 first_unopen_element_index = index;
1036 return true;
1037 }
1038
ReconstructTheActiveFormattingElements()1039 void HTMLConstructionSite::ReconstructTheActiveFormattingElements() {
1040 unsigned first_unopen_element_index;
1041 if (!IndexOfFirstUnopenFormattingElement(first_unopen_element_index))
1042 return;
1043
1044 unsigned unopen_entry_index = first_unopen_element_index;
1045 DCHECK_LT(unopen_entry_index, active_formatting_elements_.size());
1046 for (; unopen_entry_index < active_formatting_elements_.size();
1047 ++unopen_entry_index) {
1048 HTMLFormattingElementList::Entry& unopened_entry =
1049 active_formatting_elements_.at(unopen_entry_index);
1050 HTMLStackItem* reconstructed =
1051 CreateElementFromSavedToken(unopened_entry.StackItem());
1052 AttachLater(CurrentNode(), reconstructed->GetNode());
1053 open_elements_.Push(reconstructed);
1054 unopened_entry.ReplaceElement(reconstructed);
1055 }
1056 }
1057
GenerateImpliedEndTagsWithExclusion(const AtomicString & tag_name)1058 void HTMLConstructionSite::GenerateImpliedEndTagsWithExclusion(
1059 const AtomicString& tag_name) {
1060 while (HasImpliedEndTag(CurrentStackItem()) &&
1061 !CurrentStackItem()->MatchesHTMLTag(tag_name))
1062 open_elements_.Pop();
1063 }
1064
GenerateImpliedEndTags()1065 void HTMLConstructionSite::GenerateImpliedEndTags() {
1066 while (HasImpliedEndTag(CurrentStackItem()))
1067 open_elements_.Pop();
1068 }
1069
InQuirksMode()1070 bool HTMLConstructionSite::InQuirksMode() {
1071 return in_quirks_mode_;
1072 }
1073
1074 // Adjusts |task| to match the "adjusted insertion location" determined by the
1075 // foster parenting algorithm, laid out as the substeps of step 2 of
1076 // https://html.spec.whatwg.org/C/#appropriate-place-for-inserting-a-node
FindFosterSite(HTMLConstructionSiteTask & task)1077 void HTMLConstructionSite::FindFosterSite(HTMLConstructionSiteTask& task) {
1078 // 2.1
1079 HTMLElementStack::ElementRecord* last_template =
1080 open_elements_.Topmost(html_names::kTemplateTag.LocalName());
1081
1082 // 2.2
1083 HTMLElementStack::ElementRecord* last_table =
1084 open_elements_.Topmost(html_names::kTableTag.LocalName());
1085
1086 // 2.3
1087 if (last_template && (!last_table || last_template->IsAbove(last_table))) {
1088 task.parent = last_template->GetElement();
1089 return;
1090 }
1091
1092 // 2.4
1093 if (!last_table) {
1094 // Fragment case
1095 task.parent = open_elements_.RootNode(); // DocumentFragment
1096 return;
1097 }
1098
1099 // 2.5
1100 if (ContainerNode* parent = last_table->GetElement()->parentNode()) {
1101 task.parent = parent;
1102 task.next_child = last_table->GetElement();
1103 return;
1104 }
1105
1106 // 2.6, 2.7
1107 task.parent = last_table->Next()->GetElement();
1108 }
1109
ShouldFosterParent() const1110 bool HTMLConstructionSite::ShouldFosterParent() const {
1111 return redirect_attach_to_foster_parent_ &&
1112 CurrentStackItem()->IsElementNode() &&
1113 CurrentStackItem()->CausesFosterParenting();
1114 }
1115
FosterParent(Node * node)1116 void HTMLConstructionSite::FosterParent(Node* node) {
1117 HTMLConstructionSiteTask task(HTMLConstructionSiteTask::kInsert);
1118 FindFosterSite(task);
1119 task.child = node;
1120 DCHECK(task.parent);
1121 QueueTask(task);
1122 }
1123
Trace(Visitor * visitor)1124 void HTMLConstructionSite::PendingText::Trace(Visitor* visitor) {
1125 visitor->Trace(parent);
1126 visitor->Trace(next_child);
1127 }
1128
1129 } // namespace blink
1130