1 /*
2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3  * Copyright (C) 2011 Apple Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29 
30 #include "Comment.h"
31 #include "DocumentFragment.h"
32 #include "DocumentType.h"
33 #include "Element.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLElementFactory.h"
37 #include "HTMLFormElement.h"
38 #include "HTMLHtmlElement.h"
39 #include "HTMLNames.h"
40 #include "HTMLScriptElement.h"
41 #include "HTMLToken.h"
42 #include "HTMLTokenizer.h"
43 #include "LocalizedStrings.h"
44 #if ENABLE(MATHML)
45 #include "MathMLNames.h"
46 #endif
47 #include "NotImplemented.h"
48 #if ENABLE(SVG)
49 #include "SVGNames.h"
50 #endif
51 #include "Settings.h"
52 #include "Text.h"
53 #include <wtf/UnusedParam.h>
54 
55 namespace WebCore {
56 
57 using namespace HTMLNames;
58 
59 namespace {
60 
hasImpliedEndTag(ContainerNode * node)61 bool hasImpliedEndTag(ContainerNode* node)
62 {
63     return node->hasTagName(ddTag)
64         || node->hasTagName(dtTag)
65         || node->hasTagName(liTag)
66         || node->hasTagName(optionTag)
67         || node->hasTagName(optgroupTag)
68         || node->hasTagName(pTag)
69         || node->hasTagName(rpTag)
70         || node->hasTagName(rtTag);
71 }
72 
causesFosterParenting(const QualifiedName & tagName)73 bool causesFosterParenting(const QualifiedName& tagName)
74 {
75     return tagName == tableTag
76         || tagName == tbodyTag
77         || tagName == tfootTag
78         || tagName == theadTag
79         || tagName == trTag;
80 }
81 
82 } // namespace
83 
84 template<typename ChildType>
attach(ContainerNode * rawParent,PassRefPtr<ChildType> prpChild)85 PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* rawParent, PassRefPtr<ChildType> prpChild)
86 {
87     RefPtr<ChildType> child = prpChild;
88     RefPtr<ContainerNode> parent = rawParent;
89 
90     // FIXME: It's confusing that HTMLConstructionSite::attach does the magic
91     // redirection to the foster parent but HTMLConstructionSite::attachAtSite
92     // doesn't. It feels like we're missing a concept somehow.
93     if (shouldFosterParent()) {
94         fosterParent(child.get());
95         ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached());
96         return child.release();
97     }
98 
99     parent->parserAddChild(child);
100 
101     // An event handler (DOM Mutation, beforeload, et al.) could have removed
102     // the child, in which case we shouldn't try attaching it.
103     if (!child->parentNode())
104         return child.release();
105 
106     if (parent->attached() && !child->attached())
107         child->attach();
108     return child.release();
109 }
110 
attachAtSite(const AttachmentSite & site,PassRefPtr<Node> prpChild)111 void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
112 {
113     // FIXME: It's unfortunate that we need to hold a reference to child
114     // here to call attach().  We should investigate whether we can rely on
115     // |site.parent| to hold a ref at this point.
116     RefPtr<Node> child = prpChild;
117 
118     if (site.nextChild)
119         site.parent->parserInsertBefore(child, site.nextChild);
120     else
121         site.parent->parserAddChild(child);
122 
123     // JavaScript run from beforeload (or DOM Mutation or event handlers)
124     // might have removed the child, in which case we should not attach it.
125     if (child->parentNode() && site.parent->attached() && !child->attached())
126         child->attach();
127 }
128 
HTMLConstructionSite(Document * document)129 HTMLConstructionSite::HTMLConstructionSite(Document* document)
130     : m_document(document)
131     , m_attachmentRoot(document)
132     , m_fragmentScriptingPermission(FragmentScriptingAllowed)
133     , m_isParsingFragment(false)
134     , m_redirectAttachToFosterParent(false)
135 {
136 }
137 
HTMLConstructionSite(DocumentFragment * fragment,FragmentScriptingPermission scriptingPermission)138 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
139     : m_document(fragment->document())
140     , m_attachmentRoot(fragment)
141     , m_fragmentScriptingPermission(scriptingPermission)
142     , m_isParsingFragment(true)
143     , m_redirectAttachToFosterParent(false)
144 {
145 }
146 
~HTMLConstructionSite()147 HTMLConstructionSite::~HTMLConstructionSite()
148 {
149 }
150 
detach()151 void HTMLConstructionSite::detach()
152 {
153     m_document = 0;
154     m_attachmentRoot = 0;
155 }
156 
setForm(HTMLFormElement * form)157 void HTMLConstructionSite::setForm(HTMLFormElement* form)
158 {
159     // This method should only be needed for HTMLTreeBuilder in the fragment case.
160     ASSERT(!m_form);
161     m_form = form;
162 }
163 
takeForm()164 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
165 {
166     return m_form.release();
167 }
168 
dispatchDocumentElementAvailableIfNeeded()169 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
170 {
171     ASSERT(m_document);
172     if (m_document->frame() && !m_isParsingFragment)
173         m_document->frame()->loader()->dispatchDocumentElementAvailable();
174 }
175 
insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken & token)176 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
177 {
178     RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
179     element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
180     m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get()));
181 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
182     element->insertedByParser();
183 #endif
184     dispatchDocumentElementAvailableIfNeeded();
185 }
186 
mergeAttributesFromTokenIntoElement(AtomicHTMLToken & token,Element * element)187 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
188 {
189     if (!token.attributes())
190         return;
191 
192     NamedNodeMap* attributes = element->attributes(false);
193     for (unsigned i = 0; i < token.attributes()->length(); ++i) {
194         Attribute* attribute = token.attributes()->attributeItem(i);
195         if (!attributes->getAttributeItem(attribute->name()))
196             element->setAttribute(attribute->name(), attribute->value());
197     }
198 }
199 
insertHTMLHtmlStartTagInBody(AtomicHTMLToken & token)200 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
201 {
202     // FIXME: parse error
203 
204     // Fragments do not have a root HTML element, so any additional HTML elements
205     // encountered during fragment parsing should be ignored.
206     if (m_isParsingFragment)
207         return;
208 
209     mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
210 }
211 
insertHTMLBodyStartTagInBody(AtomicHTMLToken & token)212 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
213 {
214     // FIXME: parse error
215     mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
216 }
217 
insertDoctype(AtomicHTMLToken & token)218 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
219 {
220     ASSERT(token.type() == HTMLToken::DOCTYPE);
221     attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
222 
223     // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
224     // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
225     // because context-less fragments can determine their own quirks mode, and thus change
226     // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
227     // in a fragment, as changing the owning document's compatibility mode would be wrong.
228     ASSERT(!m_isParsingFragment);
229     if (m_isParsingFragment)
230         return;
231 
232     if (token.forceQuirks())
233         m_document->setCompatibilityMode(Document::QuirksMode);
234     else
235         m_document->setCompatibilityModeFromDoctype();
236 }
237 
insertComment(AtomicHTMLToken & token)238 void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
239 {
240     ASSERT(token.type() == HTMLToken::Comment);
241     attach(currentNode(), Comment::create(currentNode()->document(), token.comment()));
242 }
243 
insertCommentOnDocument(AtomicHTMLToken & token)244 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
245 {
246     ASSERT(token.type() == HTMLToken::Comment);
247     attach(m_attachmentRoot, Comment::create(m_document, token.comment()));
248 }
249 
insertCommentOnHTMLHtmlElement(AtomicHTMLToken & token)250 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
251 {
252     ASSERT(token.type() == HTMLToken::Comment);
253     ContainerNode* parent = m_openElements.rootNode();
254     attach(parent, Comment::create(parent->document(), token.comment()));
255 }
256 
attachToCurrent(PassRefPtr<Element> child)257 PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
258 {
259     return attach(currentNode(), child);
260 }
261 
insertHTMLHeadElement(AtomicHTMLToken & token)262 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
263 {
264     ASSERT(!shouldFosterParent());
265     m_head = attachToCurrent(createHTMLElement(token));
266     m_openElements.pushHTMLHeadElement(m_head);
267 }
268 
insertHTMLBodyElement(AtomicHTMLToken & token)269 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
270 {
271     ASSERT(!shouldFosterParent());
272     m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
273 }
274 
insertHTMLFormElement(AtomicHTMLToken & token,bool isDemoted)275 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
276 {
277     RefPtr<Element> element = createHTMLElement(token);
278     ASSERT(element->hasTagName(formTag));
279     RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
280     form->setDemoted(isDemoted);
281     m_openElements.push(attachToCurrent(form.release()));
282     ASSERT(currentElement()->isHTMLElement());
283     ASSERT(currentElement()->hasTagName(formTag));
284     m_form = static_cast<HTMLFormElement*>(currentElement());
285 }
286 
insertHTMLElement(AtomicHTMLToken & token)287 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
288 {
289     m_openElements.push(attachToCurrent(createHTMLElement(token)));
290 }
291 
insertSelfClosingHTMLElement(AtomicHTMLToken & token)292 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
293 {
294     ASSERT(token.type() == HTMLToken::StartTag);
295     RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
296     // Normally HTMLElementStack is responsible for calling finishParsingChildren,
297     // but self-closing elements are never in the element stack so the stack
298     // doesn't get a chance to tell them that we're done parsing their children.
299     element->finishParsingChildren();
300     // FIXME: Do we want to acknowledge the token's self-closing flag?
301     // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
302 }
303 
insertFormattingElement(AtomicHTMLToken & token)304 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
305 {
306     // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
307     // Possible active formatting elements include:
308     // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
309     insertHTMLElement(token);
310     m_activeFormattingElements.append(currentElement());
311 }
312 
insertScriptElement(AtomicHTMLToken & token)313 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
314 {
315     RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), true);
316     if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
317         element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
318     m_openElements.push(attachToCurrent(element.release()));
319 }
320 
insertForeignElement(AtomicHTMLToken & token,const AtomicString & namespaceURI)321 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
322 {
323     ASSERT(token.type() == HTMLToken::StartTag);
324     notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
325 
326     RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
327     if (!token.selfClosing())
328         m_openElements.push(element);
329 }
330 
insertTextNode(const String & characters)331 void HTMLConstructionSite::insertTextNode(const String& characters)
332 {
333     AttachmentSite site;
334     site.parent = currentNode();
335     site.nextChild = 0;
336     if (shouldFosterParent())
337         findFosterSite(site);
338 
339     unsigned currentPosition = 0;
340 
341     // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
342     // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>.
343 
344     Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild();
345     if (previousChild && previousChild->isTextNode()) {
346         // FIXME: We're only supposed to append to this text node if it
347         // was the last text node inserted by the parser.
348         CharacterData* textNode = static_cast<CharacterData*>(previousChild);
349         currentPosition = textNode->parserAppendData(characters.characters(), characters.length(), Text::defaultLengthLimit);
350     }
351 
352     while (currentPosition < characters.length()) {
353         RefPtr<Text> textNode = Text::createWithLengthLimit(site.parent->document(), characters, currentPosition);
354         // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
355         if (!textNode->length())
356             textNode = Text::create(site.parent->document(), characters.substring(currentPosition));
357 
358         currentPosition += textNode->length();
359         ASSERT(currentPosition <= characters.length());
360         attachAtSite(site, textNode.release());
361     }
362 }
363 
createElement(AtomicHTMLToken & token,const AtomicString & namespaceURI)364 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
365 {
366     QualifiedName tagName(nullAtom, token.name(), namespaceURI);
367     RefPtr<Element> element = currentNode()->document()->createElement(tagName, true);
368     element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
369     return element.release();
370 }
371 
createHTMLElement(AtomicHTMLToken & token)372 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
373 {
374     QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
375     // FIXME: This can't use HTMLConstructionSite::createElement because we
376     // have to pass the current form element.  We should rework form association
377     // to occur after construction to allow better code sharing here.
378     RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true);
379     element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
380     ASSERT(element->isHTMLElement());
381     return element.release();
382 }
383 
createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord * record)384 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
385 {
386     return createHTMLElementFromSavedElement(record->element());
387 }
388 
389 namespace {
390 
cloneAttributes(Element * element)391 PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
392 {
393     NamedNodeMap* attributes = element->attributes(true);
394     if (!attributes)
395         return 0;
396 
397     RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
398     for (size_t i = 0; i < attributes->length(); ++i) {
399         Attribute* attribute = attributes->attributeItem(i);
400         RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
401         newAttributes->addAttribute(clone);
402     }
403     return newAttributes.release();
404 }
405 
406 }
407 
createHTMLElementFromSavedElement(Element * element)408 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
409 {
410     // FIXME: This method is wrong.  We should be using the original token.
411     // Using an Element* causes us to fail examples like this:
412     // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
413     // When reconstructTheActiveFormattingElements calls this method to open
414     // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
415     // spec implies it should be "1".  Minefield matches the HTML5 spec here.
416 
417     ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
418     AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element));
419     return createHTMLElement(fakeToken);
420 }
421 
indexOfFirstUnopenFormattingElement(unsigned & firstUnopenElementIndex) const422 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
423 {
424     if (m_activeFormattingElements.isEmpty())
425         return false;
426     unsigned index = m_activeFormattingElements.size();
427     do {
428         --index;
429         const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
430         if (entry.isMarker() || m_openElements.contains(entry.element())) {
431             firstUnopenElementIndex = index + 1;
432             return firstUnopenElementIndex < m_activeFormattingElements.size();
433         }
434     } while (index);
435     firstUnopenElementIndex = index;
436     return true;
437 }
438 
reconstructTheActiveFormattingElements()439 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
440 {
441     unsigned firstUnopenElementIndex;
442     if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
443         return;
444 
445     unsigned unopenEntryIndex = firstUnopenElementIndex;
446     ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
447     for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
448         HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
449         RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
450         m_openElements.push(attachToCurrent(reconstructed.release()));
451         unopenedEntry.replaceElement(currentElement());
452     }
453 }
454 
generateImpliedEndTagsWithExclusion(const AtomicString & tagName)455 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
456 {
457     while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName))
458         m_openElements.pop();
459 }
460 
generateImpliedEndTags()461 void HTMLConstructionSite::generateImpliedEndTags()
462 {
463     while (hasImpliedEndTag(currentNode()))
464         m_openElements.pop();
465 }
466 
findFosterSite(AttachmentSite & site)467 void HTMLConstructionSite::findFosterSite(AttachmentSite& site)
468 {
469     HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
470     if (lastTableElementRecord) {
471         Element* lastTableElement = lastTableElementRecord->element();
472         if (ContainerNode* parent = lastTableElement->parentNode()) {
473             site.parent = parent;
474             site.nextChild = lastTableElement;
475             return;
476         }
477         site.parent = lastTableElementRecord->next()->element();
478         site.nextChild = 0;
479         return;
480     }
481     // Fragment case
482     site.parent = m_openElements.rootNode(); // DocumentFragment
483     site.nextChild = 0;
484 }
485 
shouldFosterParent() const486 bool HTMLConstructionSite::shouldFosterParent() const
487 {
488     return m_redirectAttachToFosterParent
489         && currentNode()->isElementNode()
490         && causesFosterParenting(currentElement()->tagQName());
491 }
492 
fosterParent(Node * node)493 void HTMLConstructionSite::fosterParent(Node* node)
494 {
495     AttachmentSite site;
496     findFosterSite(site);
497     attachAtSite(site, node);
498 }
499 
500 }
501