1 /*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28 #include "HTMLTreeBuilder.h"
29
30 #include "Comment.h"
31 #include "DocumentFragment.h"
32 #include "DocumentType.h"
33 #include "Element.h"
34 #include "Frame.h"
35 #include "HTMLDocument.h"
36 #include "HTMLElementFactory.h"
37 #include "HTMLFormElement.h"
38 #include "HTMLHtmlElement.h"
39 #include "HTMLNames.h"
40 #include "HTMLScriptElement.h"
41 #include "HTMLToken.h"
42 #include "HTMLTokenizer.h"
43 #include "LocalizedStrings.h"
44 #if ENABLE(MATHML)
45 #include "MathMLNames.h"
46 #endif
47 #include "NotImplemented.h"
48 #if ENABLE(SVG)
49 #include "SVGNames.h"
50 #endif
51 #include "Settings.h"
52 #include "Text.h"
53 #include <wtf/UnusedParam.h>
54
55 namespace WebCore {
56
57 using namespace HTMLNames;
58
59 namespace {
60
hasImpliedEndTag(ContainerNode * node)61 bool hasImpliedEndTag(ContainerNode* node)
62 {
63 return node->hasTagName(ddTag)
64 || node->hasTagName(dtTag)
65 || node->hasTagName(liTag)
66 || node->hasTagName(optionTag)
67 || node->hasTagName(optgroupTag)
68 || node->hasTagName(pTag)
69 || node->hasTagName(rpTag)
70 || node->hasTagName(rtTag);
71 }
72
causesFosterParenting(const QualifiedName & tagName)73 bool causesFosterParenting(const QualifiedName& tagName)
74 {
75 return tagName == tableTag
76 || tagName == tbodyTag
77 || tagName == tfootTag
78 || tagName == theadTag
79 || tagName == trTag;
80 }
81
82 } // namespace
83
84 template<typename ChildType>
attach(ContainerNode * rawParent,PassRefPtr<ChildType> prpChild)85 PassRefPtr<ChildType> HTMLConstructionSite::attach(ContainerNode* rawParent, PassRefPtr<ChildType> prpChild)
86 {
87 RefPtr<ChildType> child = prpChild;
88 RefPtr<ContainerNode> parent = rawParent;
89
90 // FIXME: It's confusing that HTMLConstructionSite::attach does the magic
91 // redirection to the foster parent but HTMLConstructionSite::attachAtSite
92 // doesn't. It feels like we're missing a concept somehow.
93 if (shouldFosterParent()) {
94 fosterParent(child.get());
95 ASSERT(child->attached() || !child->parentNode() || !child->parentNode()->attached());
96 return child.release();
97 }
98
99 parent->parserAddChild(child);
100
101 // An event handler (DOM Mutation, beforeload, et al.) could have removed
102 // the child, in which case we shouldn't try attaching it.
103 if (!child->parentNode())
104 return child.release();
105
106 if (parent->attached() && !child->attached())
107 child->attach();
108 return child.release();
109 }
110
attachAtSite(const AttachmentSite & site,PassRefPtr<Node> prpChild)111 void HTMLConstructionSite::attachAtSite(const AttachmentSite& site, PassRefPtr<Node> prpChild)
112 {
113 // FIXME: It's unfortunate that we need to hold a reference to child
114 // here to call attach(). We should investigate whether we can rely on
115 // |site.parent| to hold a ref at this point.
116 RefPtr<Node> child = prpChild;
117
118 if (site.nextChild)
119 site.parent->parserInsertBefore(child, site.nextChild);
120 else
121 site.parent->parserAddChild(child);
122
123 // JavaScript run from beforeload (or DOM Mutation or event handlers)
124 // might have removed the child, in which case we should not attach it.
125 if (child->parentNode() && site.parent->attached() && !child->attached())
126 child->attach();
127 }
128
HTMLConstructionSite(Document * document)129 HTMLConstructionSite::HTMLConstructionSite(Document* document)
130 : m_document(document)
131 , m_attachmentRoot(document)
132 , m_fragmentScriptingPermission(FragmentScriptingAllowed)
133 , m_isParsingFragment(false)
134 , m_redirectAttachToFosterParent(false)
135 {
136 }
137
HTMLConstructionSite(DocumentFragment * fragment,FragmentScriptingPermission scriptingPermission)138 HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, FragmentScriptingPermission scriptingPermission)
139 : m_document(fragment->document())
140 , m_attachmentRoot(fragment)
141 , m_fragmentScriptingPermission(scriptingPermission)
142 , m_isParsingFragment(true)
143 , m_redirectAttachToFosterParent(false)
144 {
145 }
146
~HTMLConstructionSite()147 HTMLConstructionSite::~HTMLConstructionSite()
148 {
149 }
150
detach()151 void HTMLConstructionSite::detach()
152 {
153 m_document = 0;
154 m_attachmentRoot = 0;
155 }
156
setForm(HTMLFormElement * form)157 void HTMLConstructionSite::setForm(HTMLFormElement* form)
158 {
159 // This method should only be needed for HTMLTreeBuilder in the fragment case.
160 ASSERT(!m_form);
161 m_form = form;
162 }
163
takeForm()164 PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
165 {
166 return m_form.release();
167 }
168
dispatchDocumentElementAvailableIfNeeded()169 void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
170 {
171 ASSERT(m_document);
172 if (m_document->frame() && !m_isParsingFragment)
173 m_document->frame()->loader()->dispatchDocumentElementAvailable();
174 }
175
insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken & token)176 void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken& token)
177 {
178 RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
179 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
180 m_openElements.pushHTMLHtmlElement(attach<Element>(m_attachmentRoot, element.get()));
181 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
182 element->insertedByParser();
183 #endif
184 dispatchDocumentElementAvailableIfNeeded();
185 }
186
mergeAttributesFromTokenIntoElement(AtomicHTMLToken & token,Element * element)187 void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken& token, Element* element)
188 {
189 if (!token.attributes())
190 return;
191
192 NamedNodeMap* attributes = element->attributes(false);
193 for (unsigned i = 0; i < token.attributes()->length(); ++i) {
194 Attribute* attribute = token.attributes()->attributeItem(i);
195 if (!attributes->getAttributeItem(attribute->name()))
196 element->setAttribute(attribute->name(), attribute->value());
197 }
198 }
199
insertHTMLHtmlStartTagInBody(AtomicHTMLToken & token)200 void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken& token)
201 {
202 // FIXME: parse error
203
204 // Fragments do not have a root HTML element, so any additional HTML elements
205 // encountered during fragment parsing should be ignored.
206 if (m_isParsingFragment)
207 return;
208
209 mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
210 }
211
insertHTMLBodyStartTagInBody(AtomicHTMLToken & token)212 void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken& token)
213 {
214 // FIXME: parse error
215 mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
216 }
217
insertDoctype(AtomicHTMLToken & token)218 void HTMLConstructionSite::insertDoctype(AtomicHTMLToken& token)
219 {
220 ASSERT(token.type() == HTMLToken::DOCTYPE);
221 attach(m_attachmentRoot, DocumentType::create(m_document, token.name(), String::adopt(token.publicIdentifier()), String::adopt(token.systemIdentifier())));
222
223 // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
224 // never occurs. However, if we ever chose to support such, this code is subtly wrong,
225 // because context-less fragments can determine their own quirks mode, and thus change
226 // parsing rules (like <p> inside <table>). For now we ASSERT that we never hit this code
227 // in a fragment, as changing the owning document's compatibility mode would be wrong.
228 ASSERT(!m_isParsingFragment);
229 if (m_isParsingFragment)
230 return;
231
232 if (token.forceQuirks())
233 m_document->setCompatibilityMode(Document::QuirksMode);
234 else
235 m_document->setCompatibilityModeFromDoctype();
236 }
237
insertComment(AtomicHTMLToken & token)238 void HTMLConstructionSite::insertComment(AtomicHTMLToken& token)
239 {
240 ASSERT(token.type() == HTMLToken::Comment);
241 attach(currentNode(), Comment::create(currentNode()->document(), token.comment()));
242 }
243
insertCommentOnDocument(AtomicHTMLToken & token)244 void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken& token)
245 {
246 ASSERT(token.type() == HTMLToken::Comment);
247 attach(m_attachmentRoot, Comment::create(m_document, token.comment()));
248 }
249
insertCommentOnHTMLHtmlElement(AtomicHTMLToken & token)250 void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken& token)
251 {
252 ASSERT(token.type() == HTMLToken::Comment);
253 ContainerNode* parent = m_openElements.rootNode();
254 attach(parent, Comment::create(parent->document(), token.comment()));
255 }
256
attachToCurrent(PassRefPtr<Element> child)257 PassRefPtr<Element> HTMLConstructionSite::attachToCurrent(PassRefPtr<Element> child)
258 {
259 return attach(currentNode(), child);
260 }
261
insertHTMLHeadElement(AtomicHTMLToken & token)262 void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken& token)
263 {
264 ASSERT(!shouldFosterParent());
265 m_head = attachToCurrent(createHTMLElement(token));
266 m_openElements.pushHTMLHeadElement(m_head);
267 }
268
insertHTMLBodyElement(AtomicHTMLToken & token)269 void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken& token)
270 {
271 ASSERT(!shouldFosterParent());
272 m_openElements.pushHTMLBodyElement(attachToCurrent(createHTMLElement(token)));
273 }
274
insertHTMLFormElement(AtomicHTMLToken & token,bool isDemoted)275 void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken& token, bool isDemoted)
276 {
277 RefPtr<Element> element = createHTMLElement(token);
278 ASSERT(element->hasTagName(formTag));
279 RefPtr<HTMLFormElement> form = static_pointer_cast<HTMLFormElement>(element.release());
280 form->setDemoted(isDemoted);
281 m_openElements.push(attachToCurrent(form.release()));
282 ASSERT(currentElement()->isHTMLElement());
283 ASSERT(currentElement()->hasTagName(formTag));
284 m_form = static_cast<HTMLFormElement*>(currentElement());
285 }
286
insertHTMLElement(AtomicHTMLToken & token)287 void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken& token)
288 {
289 m_openElements.push(attachToCurrent(createHTMLElement(token)));
290 }
291
insertSelfClosingHTMLElement(AtomicHTMLToken & token)292 void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken& token)
293 {
294 ASSERT(token.type() == HTMLToken::StartTag);
295 RefPtr<Element> element = attachToCurrent(createHTMLElement(token));
296 // Normally HTMLElementStack is responsible for calling finishParsingChildren,
297 // but self-closing elements are never in the element stack so the stack
298 // doesn't get a chance to tell them that we're done parsing their children.
299 element->finishParsingChildren();
300 // FIXME: Do we want to acknowledge the token's self-closing flag?
301 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
302 }
303
insertFormattingElement(AtomicHTMLToken & token)304 void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken& token)
305 {
306 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
307 // Possible active formatting elements include:
308 // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
309 insertHTMLElement(token);
310 m_activeFormattingElements.append(currentElement());
311 }
312
insertScriptElement(AtomicHTMLToken & token)313 void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken& token)
314 {
315 RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, currentNode()->document(), true);
316 if (m_fragmentScriptingPermission == FragmentScriptingAllowed)
317 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
318 m_openElements.push(attachToCurrent(element.release()));
319 }
320
insertForeignElement(AtomicHTMLToken & token,const AtomicString & namespaceURI)321 void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
322 {
323 ASSERT(token.type() == HTMLToken::StartTag);
324 notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
325
326 RefPtr<Element> element = attachToCurrent(createElement(token, namespaceURI));
327 if (!token.selfClosing())
328 m_openElements.push(element);
329 }
330
insertTextNode(const String & characters)331 void HTMLConstructionSite::insertTextNode(const String& characters)
332 {
333 AttachmentSite site;
334 site.parent = currentNode();
335 site.nextChild = 0;
336 if (shouldFosterParent())
337 findFosterSite(site);
338
339 unsigned currentPosition = 0;
340
341 // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
342 // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>.
343
344 Node* previousChild = site.nextChild ? site.nextChild->previousSibling() : site.parent->lastChild();
345 if (previousChild && previousChild->isTextNode()) {
346 // FIXME: We're only supposed to append to this text node if it
347 // was the last text node inserted by the parser.
348 CharacterData* textNode = static_cast<CharacterData*>(previousChild);
349 currentPosition = textNode->parserAppendData(characters.characters(), characters.length(), Text::defaultLengthLimit);
350 }
351
352 while (currentPosition < characters.length()) {
353 RefPtr<Text> textNode = Text::createWithLengthLimit(site.parent->document(), characters, currentPosition);
354 // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
355 if (!textNode->length())
356 textNode = Text::create(site.parent->document(), characters.substring(currentPosition));
357
358 currentPosition += textNode->length();
359 ASSERT(currentPosition <= characters.length());
360 attachAtSite(site, textNode.release());
361 }
362 }
363
createElement(AtomicHTMLToken & token,const AtomicString & namespaceURI)364 PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken& token, const AtomicString& namespaceURI)
365 {
366 QualifiedName tagName(nullAtom, token.name(), namespaceURI);
367 RefPtr<Element> element = currentNode()->document()->createElement(tagName, true);
368 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
369 return element.release();
370 }
371
createHTMLElement(AtomicHTMLToken & token)372 PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken& token)
373 {
374 QualifiedName tagName(nullAtom, token.name(), xhtmlNamespaceURI);
375 // FIXME: This can't use HTMLConstructionSite::createElement because we
376 // have to pass the current form element. We should rework form association
377 // to occur after construction to allow better code sharing here.
378 RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, currentNode()->document(), form(), true);
379 element->setAttributeMap(token.takeAtributes(), m_fragmentScriptingPermission);
380 ASSERT(element->isHTMLElement());
381 return element.release();
382 }
383
createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord * record)384 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromElementRecord(HTMLElementStack::ElementRecord* record)
385 {
386 return createHTMLElementFromSavedElement(record->element());
387 }
388
389 namespace {
390
cloneAttributes(Element * element)391 PassRefPtr<NamedNodeMap> cloneAttributes(Element* element)
392 {
393 NamedNodeMap* attributes = element->attributes(true);
394 if (!attributes)
395 return 0;
396
397 RefPtr<NamedNodeMap> newAttributes = NamedNodeMap::create();
398 for (size_t i = 0; i < attributes->length(); ++i) {
399 Attribute* attribute = attributes->attributeItem(i);
400 RefPtr<Attribute> clone = Attribute::createMapped(attribute->name(), attribute->value());
401 newAttributes->addAttribute(clone);
402 }
403 return newAttributes.release();
404 }
405
406 }
407
createHTMLElementFromSavedElement(Element * element)408 PassRefPtr<Element> HTMLConstructionSite::createHTMLElementFromSavedElement(Element* element)
409 {
410 // FIXME: This method is wrong. We should be using the original token.
411 // Using an Element* causes us to fail examples like this:
412 // <b id="1"><p><script>document.getElementById("1").id = "2"</script></p>TEXT</b>
413 // When reconstructTheActiveFormattingElements calls this method to open
414 // a second <b> tag to wrap TEXT, it will have id "2", even though the HTML5
415 // spec implies it should be "1". Minefield matches the HTML5 spec here.
416
417 ASSERT(element->isHTMLElement()); // otherwise localName() might be wrong.
418 AtomicHTMLToken fakeToken(HTMLToken::StartTag, element->localName(), cloneAttributes(element));
419 return createHTMLElement(fakeToken);
420 }
421
indexOfFirstUnopenFormattingElement(unsigned & firstUnopenElementIndex) const422 bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
423 {
424 if (m_activeFormattingElements.isEmpty())
425 return false;
426 unsigned index = m_activeFormattingElements.size();
427 do {
428 --index;
429 const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
430 if (entry.isMarker() || m_openElements.contains(entry.element())) {
431 firstUnopenElementIndex = index + 1;
432 return firstUnopenElementIndex < m_activeFormattingElements.size();
433 }
434 } while (index);
435 firstUnopenElementIndex = index;
436 return true;
437 }
438
reconstructTheActiveFormattingElements()439 void HTMLConstructionSite::reconstructTheActiveFormattingElements()
440 {
441 unsigned firstUnopenElementIndex;
442 if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
443 return;
444
445 unsigned unopenEntryIndex = firstUnopenElementIndex;
446 ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
447 for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
448 HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
449 RefPtr<Element> reconstructed = createHTMLElementFromSavedElement(unopenedEntry.element());
450 m_openElements.push(attachToCurrent(reconstructed.release()));
451 unopenedEntry.replaceElement(currentElement());
452 }
453 }
454
generateImpliedEndTagsWithExclusion(const AtomicString & tagName)455 void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
456 {
457 while (hasImpliedEndTag(currentNode()) && !currentNode()->hasLocalName(tagName))
458 m_openElements.pop();
459 }
460
generateImpliedEndTags()461 void HTMLConstructionSite::generateImpliedEndTags()
462 {
463 while (hasImpliedEndTag(currentNode()))
464 m_openElements.pop();
465 }
466
findFosterSite(AttachmentSite & site)467 void HTMLConstructionSite::findFosterSite(AttachmentSite& site)
468 {
469 HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
470 if (lastTableElementRecord) {
471 Element* lastTableElement = lastTableElementRecord->element();
472 if (ContainerNode* parent = lastTableElement->parentNode()) {
473 site.parent = parent;
474 site.nextChild = lastTableElement;
475 return;
476 }
477 site.parent = lastTableElementRecord->next()->element();
478 site.nextChild = 0;
479 return;
480 }
481 // Fragment case
482 site.parent = m_openElements.rootNode(); // DocumentFragment
483 site.nextChild = 0;
484 }
485
shouldFosterParent() const486 bool HTMLConstructionSite::shouldFosterParent() const
487 {
488 return m_redirectAttachToFosterParent
489 && currentNode()->isElementNode()
490 && causesFosterParenting(currentElement()->tagQName());
491 }
492
fosterParent(Node * node)493 void HTMLConstructionSite::fosterParent(Node* node)
494 {
495 AttachmentSite site;
496 findFosterSite(site);
497 attachAtSite(site, node);
498 }
499
500 }
501