1 /*
2  * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3  * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4  * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5  * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7  * Copyright (C) 2008 Holger Hans Peter Freyther
8  * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Library General Public
12  * License as published by the Free Software Foundation; either
13  * version 2 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Library General Public License for more details.
19  *
20  * You should have received a copy of the GNU Library General Public License
21  * along with this library; see the file COPYING.LIB.  If not, write to
22  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23  * Boston, MA 02110-1301, USA.
24  */
25 
26 #include "config.h"
27 #include "XMLDocumentParser.h"
28 
29 #include "CDATASection.h"
30 #include "CachedScript.h"
31 #include "Comment.h"
32 #include "CachedResourceLoader.h"
33 #include "Document.h"
34 #include "DocumentFragment.h"
35 #include "DocumentType.h"
36 #include "Frame.h"
37 #include "FrameLoader.h"
38 #include "FrameView.h"
39 #include "HTMLLinkElement.h"
40 #include "HTMLNames.h"
41 #include "HTMLStyleElement.h"
42 #include "ImageLoader.h"
43 #include "ProcessingInstruction.h"
44 #include "ResourceError.h"
45 #include "ResourceHandle.h"
46 #include "ResourceRequest.h"
47 #include "ResourceResponse.h"
48 #include "ScriptElement.h"
49 #include "ScriptSourceCode.h"
50 #include "ScriptValue.h"
51 #include "TextResourceDecoder.h"
52 #include "TreeDepthLimit.h"
53 #include <wtf/text/StringConcatenate.h>
54 #include <wtf/StringExtras.h>
55 #include <wtf/Threading.h>
56 #include <wtf/Vector.h>
57 
58 #if ENABLE(SVG)
59 #include "SVGNames.h"
60 #include "SVGStyleElement.h"
61 #endif
62 
63 using namespace std;
64 
65 namespace WebCore {
66 
67 using namespace HTMLNames;
68 
69 const int maxErrors = 25;
70 
pushCurrentNode(Node * n)71 void XMLDocumentParser::pushCurrentNode(Node* n)
72 {
73     ASSERT(n);
74     ASSERT(m_currentNode);
75     if (n != document())
76         n->ref();
77     m_currentNodeStack.append(m_currentNode);
78     m_currentNode = n;
79     if (m_currentNodeStack.size() > maxDOMTreeDepth)
80         handleError(fatal, "Excessive node nesting.", lineNumber(), columnNumber());
81 }
82 
popCurrentNode()83 void XMLDocumentParser::popCurrentNode()
84 {
85     if (!m_currentNode)
86         return;
87     ASSERT(m_currentNodeStack.size());
88 
89     if (m_currentNode != document())
90         m_currentNode->deref();
91 
92     m_currentNode = m_currentNodeStack.last();
93     m_currentNodeStack.removeLast();
94 }
95 
clearCurrentNodeStack()96 void XMLDocumentParser::clearCurrentNodeStack()
97 {
98     if (m_currentNode && m_currentNode != document())
99         m_currentNode->deref();
100     m_currentNode = 0;
101 
102     if (m_currentNodeStack.size()) { // Aborted parsing.
103         for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
104             m_currentNodeStack[i]->deref();
105         if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
106             m_currentNodeStack[0]->deref();
107         m_currentNodeStack.clear();
108     }
109 }
110 
insert(const SegmentedString &)111 void XMLDocumentParser::insert(const SegmentedString&)
112 {
113     ASSERT_NOT_REACHED();
114 }
115 
append(const SegmentedString & s)116 void XMLDocumentParser::append(const SegmentedString& s)
117 {
118     String parseString = s.toString();
119 
120     if (m_sawXSLTransform || !m_sawFirstElement)
121         m_originalSourceForTransform += parseString;
122 
123     if (isStopped() || m_sawXSLTransform)
124         return;
125 
126     if (m_parserPaused) {
127         m_pendingSrc.append(s);
128         return;
129     }
130 
131     doWrite(s.toString());
132 
133     // After parsing, go ahead and dispatch image beforeload events.
134     ImageLoader::dispatchPendingBeforeLoadEvents();
135 }
136 
handleError(ErrorType type,const char * m,int lineNumber,int columnNumber)137 void XMLDocumentParser::handleError(ErrorType type, const char* m, int lineNumber, int columnNumber)
138 {
139     handleError(type, m, TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(lineNumber), WTF::OneBasedNumber::fromOneBasedInt(columnNumber)));
140 }
141 
handleError(ErrorType type,const char * m,TextPosition1 position)142 void XMLDocumentParser::handleError(ErrorType type, const char* m, TextPosition1 position)
143 {
144     if (type == fatal || (m_errorCount < maxErrors && m_lastErrorPosition.m_line != position.m_line && m_lastErrorPosition.m_column != position.m_column)) {
145         switch (type) {
146             case warning:
147                 m_errorMessages += makeString("warning on line ", String::number(position.m_line.oneBasedInt()), " at column ", String::number(position.m_column.oneBasedInt()), ": ", m);
148                 break;
149             case fatal:
150             case nonFatal:
151                 m_errorMessages += makeString("error on line ", String::number(position.m_line.oneBasedInt()), " at column ", String::number(position.m_column.oneBasedInt()), ": ", m);
152         }
153 
154         m_lastErrorPosition = position;
155         ++m_errorCount;
156     }
157 
158     if (type != warning)
159         m_sawError = true;
160 
161     if (type == fatal)
162         stopParsing();
163 }
164 
enterText()165 void XMLDocumentParser::enterText()
166 {
167 #if !USE(QXMLSTREAM)
168     ASSERT(m_bufferedText.size() == 0);
169 #endif
170     RefPtr<Node> newNode = Text::create(document(), "");
171     m_currentNode->deprecatedParserAddChild(newNode.get());
172     pushCurrentNode(newNode.get());
173 }
174 
175 #if !USE(QXMLSTREAM)
toString(const xmlChar * string,size_t size)176 static inline String toString(const xmlChar* string, size_t size)
177 {
178     return String::fromUTF8(reinterpret_cast<const char*>(string), size);
179 }
180 #endif
181 
182 
exitText()183 void XMLDocumentParser::exitText()
184 {
185     if (isStopped())
186         return;
187 
188     if (!m_currentNode || !m_currentNode->isTextNode())
189         return;
190 
191 #if !USE(QXMLSTREAM)
192     ExceptionCode ec = 0;
193     static_cast<Text*>(m_currentNode)->appendData(toString(m_bufferedText.data(), m_bufferedText.size()), ec);
194     Vector<xmlChar> empty;
195     m_bufferedText.swap(empty);
196 #endif
197 
198     if (m_view && m_currentNode && !m_currentNode->attached())
199         m_currentNode->attach();
200 
201     popCurrentNode();
202 }
203 
detach()204 void XMLDocumentParser::detach()
205 {
206     clearCurrentNodeStack();
207     ScriptableDocumentParser::detach();
208 }
209 
end()210 void XMLDocumentParser::end()
211 {
212     // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
213     // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
214     ASSERT(!m_parsingFragment);
215 
216     doEnd();
217 
218     // doEnd() could process a script tag, thus pausing parsing.
219     if (m_parserPaused)
220         return;
221 
222     if (m_sawError)
223         insertErrorMessageBlock();
224     else {
225         exitText();
226         document()->styleSelectorChanged(RecalcStyleImmediately);
227     }
228 
229     if (isParsing())
230         prepareToStopParsing();
231     document()->setReadyState(Document::Interactive);
232     clearCurrentNodeStack();
233     document()->finishedParsing();
234 }
235 
finish()236 void XMLDocumentParser::finish()
237 {
238     // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
239     // makes sense to call any methods on DocumentParser once it's been stopped.
240     // However, FrameLoader::stop calls Document::finishParsing unconditionally
241     // which in turn calls m_parser->finish().
242 
243     if (m_parserPaused)
244         m_finishCalled = true;
245     else
246         end();
247 }
248 
finishWasCalled()249 bool XMLDocumentParser::finishWasCalled()
250 {
251     return m_finishCalled;
252 }
253 
createXHTMLParserErrorHeader(Document * doc,const String & errorMessages)254 static inline RefPtr<Element> createXHTMLParserErrorHeader(Document* doc, const String& errorMessages)
255 {
256     RefPtr<Element> reportElement = doc->createElement(QualifiedName(nullAtom, "parsererror", xhtmlNamespaceURI), false);
257     reportElement->setAttribute(styleAttr, "display: block; white-space: pre; border: 2px solid #c77; padding: 0 1em 0 1em; margin: 1em; background-color: #fdd; color: black");
258 
259     ExceptionCode ec = 0;
260     RefPtr<Element> h3 = doc->createElement(h3Tag, false);
261     reportElement->appendChild(h3.get(), ec);
262     h3->appendChild(doc->createTextNode("This page contains the following errors:"), ec);
263 
264     RefPtr<Element> fixed = doc->createElement(divTag, false);
265     reportElement->appendChild(fixed.get(), ec);
266     fixed->setAttribute(styleAttr, "font-family:monospace;font-size:12px");
267     fixed->appendChild(doc->createTextNode(errorMessages), ec);
268 
269     h3 = doc->createElement(h3Tag, false);
270     reportElement->appendChild(h3.get(), ec);
271     h3->appendChild(doc->createTextNode("Below is a rendering of the page up to the first error."), ec);
272 
273     return reportElement;
274 }
275 
insertErrorMessageBlock()276 void XMLDocumentParser::insertErrorMessageBlock()
277 {
278 #if USE(QXMLSTREAM)
279     if (m_parsingFragment)
280         return;
281 #endif
282     // One or more errors occurred during parsing of the code. Display an error block to the user above
283     // the normal content (the DOM tree is created manually and includes line/col info regarding
284     // where the errors are located)
285 
286     // Create elements for display
287     ExceptionCode ec = 0;
288     Document* document = this->document();
289     RefPtr<Element> documentElement = document->documentElement();
290     if (!documentElement) {
291         RefPtr<Element> rootElement = document->createElement(htmlTag, false);
292         document->appendChild(rootElement, ec);
293         RefPtr<Element> body = document->createElement(bodyTag, false);
294         rootElement->appendChild(body, ec);
295         documentElement = body.get();
296     }
297 #if ENABLE(SVG)
298     else if (documentElement->namespaceURI() == SVGNames::svgNamespaceURI) {
299         RefPtr<Element> rootElement = document->createElement(htmlTag, false);
300         RefPtr<Element> body = document->createElement(bodyTag, false);
301         rootElement->appendChild(body, ec);
302         body->appendChild(documentElement, ec);
303         document->appendChild(rootElement.get(), ec);
304         documentElement = body.get();
305     }
306 #endif
307     RefPtr<Element> reportElement = createXHTMLParserErrorHeader(document, m_errorMessages);
308     documentElement->insertBefore(reportElement, documentElement->firstChild(), ec);
309 #if ENABLE(XSLT)
310     if (document->transformSourceDocument()) {
311         RefPtr<Element> paragraph = document->createElement(pTag, false);
312         paragraph->setAttribute(styleAttr, "white-space: normal");
313         paragraph->appendChild(document->createTextNode("This document was created as the result of an XSL transformation. The line and column numbers given are from the transformed result."), ec);
314         reportElement->appendChild(paragraph.release(), ec);
315     }
316 #endif
317     document->updateStyleIfNeeded();
318 }
319 
notifyFinished(CachedResource * unusedResource)320 void XMLDocumentParser::notifyFinished(CachedResource* unusedResource)
321 {
322     ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
323     ASSERT(m_pendingScript->accessCount() > 0);
324 
325     ScriptSourceCode sourceCode(m_pendingScript.get());
326     bool errorOccurred = m_pendingScript->errorOccurred();
327     bool wasCanceled = m_pendingScript->wasCanceled();
328 
329     m_pendingScript->removeClient(this);
330     m_pendingScript = 0;
331 
332     RefPtr<Element> e = m_scriptElement;
333     m_scriptElement = 0;
334 
335     ScriptElement* scriptElement = toScriptElement(e.get());
336     ASSERT(scriptElement);
337 
338     // JavaScript can detach this parser, make sure it's kept alive even if detached.
339     RefPtr<XMLDocumentParser> protect(this);
340 
341     if (errorOccurred)
342         scriptElement->dispatchErrorEvent();
343     else if (!wasCanceled) {
344         scriptElement->executeScript(sourceCode);
345         scriptElement->dispatchLoadEvent();
346     }
347 
348     m_scriptElement = 0;
349 
350     if (!isDetached() && !m_requestingScript)
351         resumeParsing();
352 }
353 
isWaitingForScripts() const354 bool XMLDocumentParser::isWaitingForScripts() const
355 {
356     return m_pendingScript;
357 }
358 
pauseParsing()359 void XMLDocumentParser::pauseParsing()
360 {
361     if (m_parsingFragment)
362         return;
363 
364     m_parserPaused = true;
365 }
366 
parseDocumentFragment(const String & chunk,DocumentFragment * fragment,Element * contextElement,FragmentScriptingPermission scriptingPermission)367 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
368 {
369     if (!chunk.length())
370         return true;
371 
372     // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
373     // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
374     // For now we have a hack for script/style innerHTML support:
375     if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) {
376         fragment->parserAddChild(fragment->document()->createTextNode(chunk));
377         return true;
378     }
379 
380     RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, scriptingPermission);
381     bool wellFormed = parser->appendFragmentSource(chunk);
382     // Do not call finish().  Current finish() and doEnd() implementations touch the main Document/loader
383     // and can cause crashes in the fragment case.
384     parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
385     return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
386 }
387 
388 } // namespace WebCore
389