1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #ifndef THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_CONSTRUCTION_SITE_H_
28 #define THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_CONSTRUCTION_SITE_H_
29 
30 #include "base/macros.h"
31 #include "third_party/blink/renderer/core/dom/document.h"
32 #include "third_party/blink/renderer/core/dom/parser_content_policy.h"
33 #include "third_party/blink/renderer/core/html/parser/html_element_stack.h"
34 #include "third_party/blink/renderer/core/html/parser/html_formatting_element_list.h"
35 #include "third_party/blink/renderer/platform/heap/handle.h"
36 #include "third_party/blink/renderer/platform/wtf/text/string_builder.h"
37 #include "third_party/blink/renderer/platform/wtf/vector.h"
38 
39 namespace blink {
40 
41 struct HTMLConstructionSiteTask {
42   DISALLOW_NEW();
43 
44  public:
45   enum Operation {
46     kInsert,
47     kInsertText,                // Handles possible merging of text nodes.
48     kInsertAlreadyParsedChild,  // Insert w/o calling begin/end parsing.
49     kReparent,
50     kTakeAllChildren,
51   };
52 
HTMLConstructionSiteTaskHTMLConstructionSiteTask53   explicit HTMLConstructionSiteTask(Operation op)
54       : operation(op), self_closing(false) {}
55 
TraceHTMLConstructionSiteTask56   void Trace(Visitor* visitor) {
57     visitor->Trace(parent);
58     visitor->Trace(next_child);
59     visitor->Trace(child);
60   }
61 
OldParentHTMLConstructionSiteTask62   ContainerNode* OldParent() {
63     // It's sort of ugly, but we store the |oldParent| in the |child| field of
64     // the task so that we don't bloat the HTMLConstructionSiteTask object in
65     // the common case of the Insert operation.
66     return To<ContainerNode>(child.Get());
67   }
68 
69   Operation operation;
70   Member<ContainerNode> parent;
71   Member<Node> next_child;
72   Member<Node> child;
73   bool self_closing;
74 };
75 
76 }  // namespace blink
77 
WTF_ALLOW_MOVE_INIT_AND_COMPARE_WITH_MEM_FUNCTIONS(blink::HTMLConstructionSiteTask)78 WTF_ALLOW_MOVE_INIT_AND_COMPARE_WITH_MEM_FUNCTIONS(
79     blink::HTMLConstructionSiteTask)
80 
81 namespace blink {
82 
83 // Note: These are intentionally ordered so that when we concatonate strings and
84 // whitespaces the resulting whitespace is ws = min(ws1, ws2).
85 enum WhitespaceMode {
86   kWhitespaceUnknown,
87   kNotAllWhitespace,
88   kAllWhitespace,
89 };
90 
91 enum FlushMode {
92   // Flush pending text. Flush queued tasks.
93   kFlushAlways,
94 
95   // Flush pending text if node has length limit. Flush queued tasks.
96   kFlushIfAtTextLimit,
97 };
98 
99 class AtomicHTMLToken;
100 class CustomElementDefinition;
101 class Document;
102 class Element;
103 class HTMLFormElement;
104 class HTMLParserReentryPermit;
105 
106 class HTMLConstructionSite final {
107   DISALLOW_NEW();
108 
109  public:
110   HTMLConstructionSite(HTMLParserReentryPermit*,
111                        Document&,
112                        ParserContentPolicy);
113   ~HTMLConstructionSite();
114   void Trace(Visitor*);
115 
116   void InitFragmentParsing(DocumentFragment*, Element* context_element);
117 
118   void Detach();
119 
120   // executeQueuedTasks empties the queue but does not flush pending text.
121   // NOTE: Possible reentrancy via JavaScript execution.
122   void ExecuteQueuedTasks();
123 
124   // flushPendingText turns pending text into queued Text insertions, but does
125   // not execute them.
126   void FlushPendingText(FlushMode);
127 
128   // Called before every token in HTMLTreeBuilder::processToken, thus inlined:
129   void Flush(FlushMode mode) {
130     if (!HasPendingTasks())
131       return;
132     FlushPendingText(mode);
133     // NOTE: Possible reentrancy via JavaScript execution.
134     ExecuteQueuedTasks();
135     DCHECK(mode == kFlushIfAtTextLimit || !HasPendingTasks());
136   }
137 
138   bool HasPendingTasks() {
139     return !pending_text_.IsEmpty() || !task_queue_.IsEmpty();
140   }
141 
142   void SetDefaultCompatibilityMode();
143   void ProcessEndOfFile();
144   void FinishedParsing();
145 
146   void InsertDoctype(AtomicHTMLToken*);
147   void InsertComment(AtomicHTMLToken*);
148   void InsertCommentOnDocument(AtomicHTMLToken*);
149   void InsertCommentOnHTMLHtmlElement(AtomicHTMLToken*);
150   void InsertHTMLElement(AtomicHTMLToken*);
151   void InsertSelfClosingHTMLElementDestroyingToken(AtomicHTMLToken*);
152   void InsertFormattingElement(AtomicHTMLToken*);
153   void InsertHTMLHeadElement(AtomicHTMLToken*);
154   void InsertHTMLBodyElement(AtomicHTMLToken*);
155   void InsertHTMLFormElement(AtomicHTMLToken*, bool is_demoted = false);
156   void InsertScriptElement(AtomicHTMLToken*);
157   void InsertTextNode(const StringView&, WhitespaceMode = kWhitespaceUnknown);
158   void InsertForeignElement(AtomicHTMLToken*,
159                             const AtomicString& namespace_uri);
160 
161   void InsertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken*);
162   void InsertHTMLHtmlStartTagInBody(AtomicHTMLToken*);
163   void InsertHTMLBodyStartTagInBody(AtomicHTMLToken*);
164 
165   void Reparent(HTMLElementStack::ElementRecord* new_parent,
166                 HTMLElementStack::ElementRecord* child);
167   void Reparent(HTMLElementStack::ElementRecord* new_parent,
168                 HTMLStackItem* child);
169   // insertAlreadyParsedChild assumes that |child| has already been parsed
170   // (i.e., we're just moving it around in the tree rather than parsing it for
171   // the first time). That means this function doesn't call beginParsingChildren
172   // / finishParsingChildren.
173   void InsertAlreadyParsedChild(HTMLStackItem* new_parent,
174                                 HTMLElementStack::ElementRecord* child);
175   void TakeAllChildren(HTMLStackItem* new_parent,
176                        HTMLElementStack::ElementRecord* old_parent);
177 
178   HTMLStackItem* CreateElementFromSavedToken(HTMLStackItem*);
179 
180   bool ShouldFosterParent() const;
181   void FosterParent(Node*);
182 
183   bool IndexOfFirstUnopenFormattingElement(
184       unsigned& first_unopen_element_index) const;
185   void ReconstructTheActiveFormattingElements();
186 
187   void GenerateImpliedEndTags();
188   void GenerateImpliedEndTagsWithExclusion(const AtomicString& tag_name);
189 
190   bool InQuirksMode();
191 
192   bool IsEmpty() const { return !open_elements_.StackDepth(); }
193   HTMLElementStack::ElementRecord* CurrentElementRecord() const {
194     return open_elements_.TopRecord();
195   }
196   Element* CurrentElement() const { return open_elements_.Top(); }
197   ContainerNode* CurrentNode() const { return open_elements_.TopNode(); }
198   HTMLStackItem* CurrentStackItem() const {
199     return open_elements_.TopStackItem();
200   }
201   HTMLStackItem* OneBelowTop() const { return open_elements_.OneBelowTop(); }
202   Document& OwnerDocumentForCurrentNode();
203   HTMLElementStack* OpenElements() const { return &open_elements_; }
204   HTMLFormattingElementList* ActiveFormattingElements() const {
205     return &active_formatting_elements_;
206   }
207   bool CurrentIsRootNode() {
208     return open_elements_.TopNode() == open_elements_.RootNode();
209   }
210 
211   Element* Head() const { return head_->GetElement(); }
212   HTMLStackItem* HeadStackItem() const { return head_.Get(); }
213 
214   bool IsFormElementPointerNonNull() const { return form_; }
215   HTMLFormElement* TakeForm();
216 
217   ParserContentPolicy GetParserContentPolicy() {
218     return parser_content_policy_;
219   }
220 
221   class RedirectToFosterParentGuard {
222     STACK_ALLOCATED();
223     DISALLOW_COPY_AND_ASSIGN(RedirectToFosterParentGuard);
224 
225    public:
226     RedirectToFosterParentGuard(HTMLConstructionSite& tree)
227         : tree_(tree),
228           was_redirecting_before_(tree.redirect_attach_to_foster_parent_) {
229       tree_.redirect_attach_to_foster_parent_ = true;
230     }
231 
232     ~RedirectToFosterParentGuard() {
233       tree_.redirect_attach_to_foster_parent_ = was_redirecting_before_;
234     }
235 
236    private:
237     HTMLConstructionSite& tree_;
238     bool was_redirecting_before_;
239   };
240 
241  private:
242   // In the common case, this queue will have only one task because most tokens
243   // produce only one DOM mutation.
244   typedef HeapVector<HTMLConstructionSiteTask, 1> TaskQueue;
245 
246   void SetCompatibilityMode(Document::CompatibilityMode);
247   void SetCompatibilityModeFromDoctype(const String& name,
248                                        const String& public_id,
249                                        const String& system_id);
250 
251   void AttachLater(ContainerNode* parent,
252                    Node* child,
253                    bool self_closing = false);
254 
255   void FindFosterSite(HTMLConstructionSiteTask&);
256 
257   CreateElementFlags GetCreateElementFlags() const;
258   Element* CreateElement(AtomicHTMLToken*, const AtomicString& namespace_uri);
259 
260   void MergeAttributesFromTokenIntoElement(AtomicHTMLToken*, Element*);
261 
262   void ExecuteTask(HTMLConstructionSiteTask&);
263   void QueueTask(const HTMLConstructionSiteTask&);
264 
265   CustomElementDefinition* LookUpCustomElementDefinition(
266       Document&,
267       const QualifiedName&,
268       const AtomicString& is);
269 
270   HTMLParserReentryPermit* reentry_permit_;
271   Member<Document> document_;
272 
273   // This is the root ContainerNode to which the parser attaches all newly
274   // constructed nodes. It points to a DocumentFragment when parsing fragments
275   // and a Document in all other cases.
276   Member<ContainerNode> attachment_root_;
277 
278   // https://html.spec.whatwg.org/C/#head-element-pointer
279   Member<HTMLStackItem> head_;
280   // https://html.spec.whatwg.org/C/#form-element-pointer
281   Member<HTMLFormElement> form_;
282   mutable HTMLElementStack open_elements_;
283   mutable HTMLFormattingElementList active_formatting_elements_;
284 
285   TaskQueue task_queue_;
286 
287   class PendingText final {
288     DISALLOW_NEW();
289 
290    public:
291     PendingText() : whitespace_mode(kWhitespaceUnknown) {}
292 
293     void Append(ContainerNode* new_parent,
294                 Node* new_next_child,
295                 const StringView& new_string,
296                 WhitespaceMode new_whitespace_mode) {
297       DCHECK(!parent || parent == new_parent);
298       parent = new_parent;
299       DCHECK(!next_child || next_child == new_next_child);
300       next_child = new_next_child;
301       string_builder.Append(new_string);
302       whitespace_mode = std::min(whitespace_mode, new_whitespace_mode);
303     }
304 
305     void Swap(PendingText& other) {
306       std::swap(whitespace_mode, other.whitespace_mode);
307       parent.Swap(other.parent);
308       next_child.Swap(other.next_child);
309       string_builder.Swap(other.string_builder);
310     }
311 
312     void Discard() {
313       PendingText discarded_text;
314       Swap(discarded_text);
315     }
316 
317     bool IsEmpty() {
318       // When the stringbuilder is empty, the parent and whitespace should also
319       // be "empty".
320       DCHECK_EQ(string_builder.IsEmpty(), !parent);
321       DCHECK(!string_builder.IsEmpty() || !next_child);
322       DCHECK(!string_builder.IsEmpty() ||
323              (whitespace_mode == kWhitespaceUnknown));
324       return string_builder.IsEmpty();
325     }
326 
327     void Trace(Visitor*);
328 
329     Member<ContainerNode> parent;
330     Member<Node> next_child;
331     StringBuilder string_builder;
332     WhitespaceMode whitespace_mode;
333   };
334 
335   PendingText pending_text_;
336 
337   ParserContentPolicy parser_content_policy_;
338   bool is_parsing_fragment_;
339 
340   // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-intable
341   // In the "in table" insertion mode, we sometimes get into a state where
342   // "whenever a node would be inserted into the current node, it must instead
343   // be foster parented."  This flag tracks whether we're in that state.
344   bool redirect_attach_to_foster_parent_;
345 
346   bool in_quirks_mode_;
347 
348   DISALLOW_COPY_AND_ASSIGN(HTMLConstructionSite);
349 };
350 
351 }  // namespace blink
352 
353 #endif  // THIRD_PARTY_BLINK_RENDERER_CORE_HTML_PARSER_HTML_CONSTRUCTION_SITE_H_
354