1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifndef THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_CONSTRUCTION_SITE_H_
28 #define THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_CONSTRUCTION_SITE_H_
29
30 #include "base/macros.h"
31 #include "third_party/blink/renderer/core/dom/document.h"
32 #include "third_party/blink/renderer/core/dom/parser_content_policy.h"
33 #include "third_party/blink/renderer/core/html/parser/html_element_stack.h"
34 #include "third_party/blink/renderer/core/html/parser/html_formatting_element_list.h"
35 #include "third_party/blink/renderer/platform/heap/handle.h"
36 #include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
37 #include "third_party/blink/renderer/platform/wtf/vector.h"
38
39 namespace blink {
40
41 struct HTMLConstructionSiteTask {
42 DISALLOW_NEW();
43
44 public:
45 enum Operation {
46 kInsert,
47 kInsertText, // Handles possible merging of text nodes.
48 kInsertAlreadyParsedChild, // Insert w/o calling begin/end parsing.
49 kReparent,
50 kTakeAllChildren,
51 };
52
HTMLConstructionSiteTaskHTMLConstructionSiteTask53 explicit HTMLConstructionSiteTask(Operation op)
54 : operation(op), self_closing(false) {}
55
TraceHTMLConstructionSiteTask56 void Trace(Visitor* visitor) {
57 visitor->Trace(parent);
58 visitor->Trace(next_child);
59 visitor->Trace(child);
60 }
61
OldParentHTMLConstructionSiteTask62 ContainerNode* OldParent() {
63 // It's sort of ugly, but we store the |oldParent| in the |child| field of
64 // the task so that we don't bloat the HTMLConstructionSiteTask object in
65 // the common case of the Insert operation.
66 return To<ContainerNode>(child.Get());
67 }
68
69 Operation operation;
70 Member<ContainerNode> parent;
71 Member<Node> next_child;
72 Member<Node> child;
73 bool self_closing;
74 };
75
76 } // namespace blink
77
WTF_ALLOW_MOVE_INIT_AND_COMPARE_WITH_MEM_FUNCTIONS(blink::HTMLConstructionSiteTask)78 WTF_ALLOW_MOVE_INIT_AND_COMPARE_WITH_MEM_FUNCTIONS(
79 blink::HTMLConstructionSiteTask)
80
81 namespace blink {
82
83 // Note: These are intentionally ordered so that when we concatonate strings and
84 // whitespaces the resulting whitespace is ws = min(ws1, ws2).
85 enum WhitespaceMode {
86 kWhitespaceUnknown,
87 kNotAllWhitespace,
88 kAllWhitespace,
89 };
90
91 enum FlushMode {
92 // Flush pending text. Flush queued tasks.
93 kFlushAlways,
94
95 // Flush pending text if node has length limit. Flush queued tasks.
96 kFlushIfAtTextLimit,
97 };
98
99 class AtomicHTMLToken;
100 class CustomElementDefinition;
101 class Document;
102 class Element;
103 class HTMLFormElement;
104 class HTMLParserReentryPermit;
105
106 class HTMLConstructionSite final {
107 DISALLOW_NEW();
108
109 public:
110 HTMLConstructionSite(HTMLParserReentryPermit*,
111 Document&,
112 ParserContentPolicy);
113 ~HTMLConstructionSite();
114 void Trace(Visitor*);
115
116 void InitFragmentParsing(DocumentFragment*, Element* context_element);
117
118 void Detach();
119
120 // executeQueuedTasks empties the queue but does not flush pending text.
121 // NOTE: Possible reentrancy via JavaScript execution.
122 void ExecuteQueuedTasks();
123
124 // flushPendingText turns pending text into queued Text insertions, but does
125 // not execute them.
126 void FlushPendingText(FlushMode);
127
128 // Called before every token in HTMLTreeBuilder::processToken, thus inlined:
129 void Flush(FlushMode mode) {
130 if (!HasPendingTasks())
131 return;
132 FlushPendingText(mode);
133 // NOTE: Possible reentrancy via JavaScript execution.
134 ExecuteQueuedTasks();
135 DCHECK(mode == kFlushIfAtTextLimit || !HasPendingTasks());
136 }
137
138 bool HasPendingTasks() {
139 return !pending_text_.IsEmpty() || !task_queue_.IsEmpty();
140 }
141
142 void SetDefaultCompatibilityMode();
143 void ProcessEndOfFile();
144 void FinishedParsing();
145
146 void InsertDoctype(AtomicHTMLToken*);
147 void InsertComment(AtomicHTMLToken*);
148 void InsertCommentOnDocument(AtomicHTMLToken*);
149 void InsertCommentOnHTMLHtmlElement(AtomicHTMLToken*);
150 void InsertHTMLElement(AtomicHTMLToken*);
151 void InsertSelfClosingHTMLElementDestroyingToken(AtomicHTMLToken*);
152 void InsertFormattingElement(AtomicHTMLToken*);
153 void InsertHTMLHeadElement(AtomicHTMLToken*);
154 void InsertHTMLBodyElement(AtomicHTMLToken*);
155 void InsertHTMLFormElement(AtomicHTMLToken*, bool is_demoted = false);
156 void InsertScriptElement(AtomicHTMLToken*);
157 void InsertTextNode(const StringView&, WhitespaceMode = kWhitespaceUnknown);
158 void InsertForeignElement(AtomicHTMLToken*,
159 const AtomicString& namespace_uri);
160
161 void InsertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken*);
162 void InsertHTMLHtmlStartTagInBody(AtomicHTMLToken*);
163 void InsertHTMLBodyStartTagInBody(AtomicHTMLToken*);
164
165 void Reparent(HTMLElementStack::ElementRecord* new_parent,
166 HTMLElementStack::ElementRecord* child);
167 void Reparent(HTMLElementStack::ElementRecord* new_parent,
168 HTMLStackItem* child);
169 // insertAlreadyParsedChild assumes that |child| has already been parsed
170 // (i.e., we're just moving it around in the tree rather than parsing it for
171 // the first time). That means this function doesn't call beginParsingChildren
172 // / finishParsingChildren.
173 void InsertAlreadyParsedChild(HTMLStackItem* new_parent,
174 HTMLElementStack::ElementRecord* child);
175 void TakeAllChildren(HTMLStackItem* new_parent,
176 HTMLElementStack::ElementRecord* old_parent);
177
178 HTMLStackItem* CreateElementFromSavedToken(HTMLStackItem*);
179
180 bool ShouldFosterParent() const;
181 void FosterParent(Node*);
182
183 bool IndexOfFirstUnopenFormattingElement(
184 unsigned& first_unopen_element_index) const;
185 void ReconstructTheActiveFormattingElements();
186
187 void GenerateImpliedEndTags();
188 void GenerateImpliedEndTagsWithExclusion(const AtomicString& tag_name);
189
190 bool InQuirksMode();
191
192 bool IsEmpty() const { return !open_elements_.StackDepth(); }
193 HTMLElementStack::ElementRecord* CurrentElementRecord() const {
194 return open_elements_.TopRecord();
195 }
196 Element* CurrentElement() const { return open_elements_.Top(); }
197 ContainerNode* CurrentNode() const { return open_elements_.TopNode(); }
198 HTMLStackItem* CurrentStackItem() const {
199 return open_elements_.TopStackItem();
200 }
201 HTMLStackItem* OneBelowTop() const { return open_elements_.OneBelowTop(); }
202 Document& OwnerDocumentForCurrentNode();
203 HTMLElementStack* OpenElements() const { return &open_elements_; }
204 HTMLFormattingElementList* ActiveFormattingElements() const {
205 return &active_formatting_elements_;
206 }
207 bool CurrentIsRootNode() {
208 return open_elements_.TopNode() == open_elements_.RootNode();
209 }
210
211 Element* Head() const { return head_->GetElement(); }
212 HTMLStackItem* HeadStackItem() const { return head_.Get(); }
213
214 bool IsFormElementPointerNonNull() const { return form_; }
215 HTMLFormElement* TakeForm();
216
217 ParserContentPolicy GetParserContentPolicy() {
218 return parser_content_policy_;
219 }
220
221 class RedirectToFosterParentGuard {
222 STACK_ALLOCATED();
223 DISALLOW_COPY_AND_ASSIGN(RedirectToFosterParentGuard);
224
225 public:
226 RedirectToFosterParentGuard(HTMLConstructionSite& tree)
227 : tree_(tree),
228 was_redirecting_before_(tree.redirect_attach_to_foster_parent_) {
229 tree_.redirect_attach_to_foster_parent_ = true;
230 }
231
232 ~RedirectToFosterParentGuard() {
233 tree_.redirect_attach_to_foster_parent_ = was_redirecting_before_;
234 }
235
236 private:
237 HTMLConstructionSite& tree_;
238 bool was_redirecting_before_;
239 };
240
241 private:
242 // In the common case, this queue will have only one task because most tokens
243 // produce only one DOM mutation.
244 typedef HeapVector<HTMLConstructionSiteTask, 1> TaskQueue;
245
246 void SetCompatibilityMode(Document::CompatibilityMode);
247 void SetCompatibilityModeFromDoctype(const String& name,
248 const String& public_id,
249 const String& system_id);
250
251 void AttachLater(ContainerNode* parent,
252 Node* child,
253 bool self_closing = false);
254
255 void FindFosterSite(HTMLConstructionSiteTask&);
256
257 CreateElementFlags GetCreateElementFlags() const;
258 Element* CreateElement(AtomicHTMLToken*, const AtomicString& namespace_uri);
259
260 void MergeAttributesFromTokenIntoElement(AtomicHTMLToken*, Element*);
261
262 void ExecuteTask(HTMLConstructionSiteTask&);
263 void QueueTask(const HTMLConstructionSiteTask&);
264
265 CustomElementDefinition* LookUpCustomElementDefinition(
266 Document&,
267 const QualifiedName&,
268 const AtomicString& is);
269
270 HTMLParserReentryPermit* reentry_permit_;
271 Member<Document> document_;
272
273 // This is the root ContainerNode to which the parser attaches all newly
274 // constructed nodes. It points to a DocumentFragment when parsing fragments
275 // and a Document in all other cases.
276 Member<ContainerNode> attachment_root_;
277
278 // https://html.spec.whatwg.org/C/#head-element-pointer
279 Member<HTMLStackItem> head_;
280 // https://html.spec.whatwg.org/C/#form-element-pointer
281 Member<HTMLFormElement> form_;
282 mutable HTMLElementStack open_elements_;
283 mutable HTMLFormattingElementList active_formatting_elements_;
284
285 TaskQueue task_queue_;
286
287 class PendingText final {
288 DISALLOW_NEW();
289
290 public:
291 PendingText() : whitespace_mode(kWhitespaceUnknown) {}
292
293 void Append(ContainerNode* new_parent,
294 Node* new_next_child,
295 const StringView& new_string,
296 WhitespaceMode new_whitespace_mode) {
297 DCHECK(!parent || parent == new_parent);
298 parent = new_parent;
299 DCHECK(!next_child || next_child == new_next_child);
300 next_child = new_next_child;
301 string_builder.Append(new_string);
302 whitespace_mode = std::min(whitespace_mode, new_whitespace_mode);
303 }
304
305 void Swap(PendingText& other) {
306 std::swap(whitespace_mode, other.whitespace_mode);
307 parent.Swap(other.parent);
308 next_child.Swap(other.next_child);
309 string_builder.Swap(other.string_builder);
310 }
311
312 void Discard() {
313 PendingText discarded_text;
314 Swap(discarded_text);
315 }
316
317 bool IsEmpty() {
318 // When the stringbuilder is empty, the parent and whitespace should also
319 // be "empty".
320 DCHECK_EQ(string_builder.IsEmpty(), !parent);
321 DCHECK(!string_builder.IsEmpty() || !next_child);
322 DCHECK(!string_builder.IsEmpty() ||
323 (whitespace_mode == kWhitespaceUnknown));
324 return string_builder.IsEmpty();
325 }
326
327 void Trace(Visitor*);
328
329 Member<ContainerNode> parent;
330 Member<Node> next_child;
331 StringBuilder string_builder;
332 WhitespaceMode whitespace_mode;
333 };
334
335 PendingText pending_text_;
336
337 ParserContentPolicy parser_content_policy_;
338 bool is_parsing_fragment_;
339
340 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-intable
341 // In the "in table" insertion mode, we sometimes get into a state where
342 // "whenever a node would be inserted into the current node, it must instead
343 // be foster parented." This flag tracks whether we're in that state.
344 bool redirect_attach_to_foster_parent_;
345
346 bool in_quirks_mode_;
347
348 DISALLOW_COPY_AND_ASSIGN(HTMLConstructionSite);
349 };
350
351 } // namespace blink
352
353 #endif // THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_CONSTRUCTION_SITE_H_
354