1 /*
2  * Copyright (C) 2000 Peter Kelly <pmk@post.com>
3  * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4  * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
5  * Copyright (C) 2007 Samuel Weinig <sam@webkit.org>
6  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7  * Copyright (C) 2008 Holger Hans Peter Freyther
8  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9  * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Library General Public
13  * License as published by the Free Software Foundation; either
14  * version 2 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * Library General Public License for more details.
20  *
21  * You should have received a copy of the GNU Library General Public License
22  * along with this library; see the file COPYING.LIB.  If not, write to
23  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24  * Boston, MA 02110-1301, USA.
25  */
26 
27 #include "config.h"
28 #include "XMLDocumentParser.h"
29 
30 #include "CDATASection.h"
31 #include "CachedScript.h"
32 #include "Comment.h"
33 #include "CachedResourceLoader.h"
34 #include "Document.h"
35 #include "DocumentFragment.h"
36 #include "DocumentType.h"
37 #include "Frame.h"
38 #include "FrameLoader.h"
39 #include "FrameView.h"
40 #include "HTMLEntityParser.h"
41 #include "HTMLHtmlElement.h"
42 #include "HTMLLinkElement.h"
43 #include "HTMLNames.h"
44 #include "HTMLStyleElement.h"
45 #include "ProcessingInstruction.h"
46 #include "ResourceError.h"
47 #include "ResourceHandle.h"
48 #include "ResourceRequest.h"
49 #include "ResourceResponse.h"
50 #include "ScriptElement.h"
51 #include "ScriptSourceCode.h"
52 #include "ScriptValue.h"
53 #include "SecurityOrigin.h"
54 #include "TextResourceDecoder.h"
55 #include "TransformSource.h"
56 #include "XMLNSNames.h"
57 #include "XMLDocumentParserScope.h"
58 #include <libxml/parser.h>
59 #include <libxml/parserInternals.h>
60 #include <wtf/text/CString.h>
61 #include <wtf/StringExtras.h>
62 #include <wtf/Threading.h>
63 #include <wtf/UnusedParam.h>
64 #include <wtf/Vector.h>
65 
66 #if ENABLE(XSLT)
67 #include "XMLTreeViewer.h"
68 #include <libxslt/xslt.h>
69 #endif
70 
71 #if ENABLE(XHTMLMP)
72 #include "HTMLScriptElement.h"
73 #endif
74 
75 
76 using namespace std;
77 
78 namespace WebCore {
79 
80 class PendingCallbacks {
81     WTF_MAKE_NONCOPYABLE(PendingCallbacks);
82 public:
~PendingCallbacks()83     ~PendingCallbacks() { }
create()84     static PassOwnPtr<PendingCallbacks> create()
85     {
86         return adoptPtr(new PendingCallbacks);
87     }
88 
appendStartElementNSCallback(const xmlChar * xmlLocalName,const xmlChar * xmlPrefix,const xmlChar * xmlURI,int nb_namespaces,const xmlChar ** namespaces,int nb_attributes,int nb_defaulted,const xmlChar ** attributes)89     void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
90                                       const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes)
91     {
92         OwnPtr<PendingStartElementNSCallback> callback = adoptPtr(new PendingStartElementNSCallback);
93 
94         callback->xmlLocalName = xmlStrdup(xmlLocalName);
95         callback->xmlPrefix = xmlStrdup(xmlPrefix);
96         callback->xmlURI = xmlStrdup(xmlURI);
97         callback->nb_namespaces = nb_namespaces;
98         callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2));
99         for (int i = 0; i < nb_namespaces * 2 ; i++)
100             callback->namespaces[i] = xmlStrdup(namespaces[i]);
101         callback->nb_attributes = nb_attributes;
102         callback->nb_defaulted = nb_defaulted;
103         callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5));
104         for (int i = 0; i < nb_attributes; i++) {
105             // Each attribute has 5 elements in the array:
106             // name, prefix, uri, value and an end pointer.
107 
108             for (int j = 0; j < 3; j++)
109                 callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
110 
111             int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
112 
113             callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
114             callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
115         }
116 
117         m_callbacks.append(callback.release());
118     }
119 
appendEndElementNSCallback()120     void appendEndElementNSCallback()
121     {
122         m_callbacks.append(adoptPtr(new PendingEndElementNSCallback));
123     }
124 
appendCharactersCallback(const xmlChar * s,int len)125     void appendCharactersCallback(const xmlChar* s, int len)
126     {
127         OwnPtr<PendingCharactersCallback> callback = adoptPtr(new PendingCharactersCallback);
128 
129         callback->s = xmlStrndup(s, len);
130         callback->len = len;
131 
132         m_callbacks.append(callback.release());
133     }
134 
appendProcessingInstructionCallback(const xmlChar * target,const xmlChar * data)135     void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
136     {
137         OwnPtr<PendingProcessingInstructionCallback> callback = adoptPtr(new PendingProcessingInstructionCallback);
138 
139         callback->target = xmlStrdup(target);
140         callback->data = xmlStrdup(data);
141 
142         m_callbacks.append(callback.release());
143     }
144 
appendCDATABlockCallback(const xmlChar * s,int len)145     void appendCDATABlockCallback(const xmlChar* s, int len)
146     {
147         OwnPtr<PendingCDATABlockCallback> callback = adoptPtr(new PendingCDATABlockCallback);
148 
149         callback->s = xmlStrndup(s, len);
150         callback->len = len;
151 
152         m_callbacks.append(callback.release());
153     }
154 
appendCommentCallback(const xmlChar * s)155     void appendCommentCallback(const xmlChar* s)
156     {
157         OwnPtr<PendingCommentCallback> callback = adoptPtr(new PendingCommentCallback);
158 
159         callback->s = xmlStrdup(s);
160 
161         m_callbacks.append(callback.release());
162     }
163 
appendInternalSubsetCallback(const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)164     void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
165     {
166         OwnPtr<PendingInternalSubsetCallback> callback = adoptPtr(new PendingInternalSubsetCallback);
167 
168         callback->name = xmlStrdup(name);
169         callback->externalID = xmlStrdup(externalID);
170         callback->systemID = xmlStrdup(systemID);
171 
172         m_callbacks.append(callback.release());
173     }
174 
appendErrorCallback(XMLDocumentParser::ErrorType type,const xmlChar * message,int lineNumber,int columnNumber)175     void appendErrorCallback(XMLDocumentParser::ErrorType type, const xmlChar* message, int lineNumber, int columnNumber)
176     {
177         OwnPtr<PendingErrorCallback> callback = adoptPtr(new PendingErrorCallback);
178 
179         callback->message = xmlStrdup(message);
180         callback->type = type;
181         callback->lineNumber = lineNumber;
182         callback->columnNumber = columnNumber;
183 
184         m_callbacks.append(callback.release());
185     }
186 
callAndRemoveFirstCallback(XMLDocumentParser * parser)187     void callAndRemoveFirstCallback(XMLDocumentParser* parser)
188     {
189         OwnPtr<PendingCallback> callback = m_callbacks.takeFirst();
190         callback->call(parser);
191     }
192 
isEmpty() const193     bool isEmpty() const { return m_callbacks.isEmpty(); }
194 
195 private:
PendingCallbacks()196     PendingCallbacks() { }
197 
198     struct PendingCallback {
~PendingCallbackWebCore::PendingCallbacks::PendingCallback199         virtual ~PendingCallback() { }
200         virtual void call(XMLDocumentParser* parser) = 0;
201     };
202 
203     struct PendingStartElementNSCallback : public PendingCallback {
~PendingStartElementNSCallbackWebCore::PendingCallbacks::PendingStartElementNSCallback204         virtual ~PendingStartElementNSCallback()
205         {
206             xmlFree(xmlLocalName);
207             xmlFree(xmlPrefix);
208             xmlFree(xmlURI);
209             for (int i = 0; i < nb_namespaces * 2; i++)
210                 xmlFree(namespaces[i]);
211             xmlFree(namespaces);
212             for (int i = 0; i < nb_attributes; i++)
213                 for (int j = 0; j < 4; j++)
214                     xmlFree(attributes[i * 5 + j]);
215             xmlFree(attributes);
216         }
217 
callWebCore::PendingCallbacks::PendingStartElementNSCallback218         virtual void call(XMLDocumentParser* parser)
219         {
220             parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI,
221                                       nb_namespaces, const_cast<const xmlChar**>(namespaces),
222                                       nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes));
223         }
224 
225         xmlChar* xmlLocalName;
226         xmlChar* xmlPrefix;
227         xmlChar* xmlURI;
228         int nb_namespaces;
229         xmlChar** namespaces;
230         int nb_attributes;
231         int nb_defaulted;
232         xmlChar** attributes;
233     };
234 
235     struct PendingEndElementNSCallback : public PendingCallback {
callWebCore::PendingCallbacks::PendingEndElementNSCallback236         virtual void call(XMLDocumentParser* parser)
237         {
238             parser->endElementNs();
239         }
240     };
241 
242     struct PendingCharactersCallback : public PendingCallback {
~PendingCharactersCallbackWebCore::PendingCallbacks::PendingCharactersCallback243         virtual ~PendingCharactersCallback()
244         {
245             xmlFree(s);
246         }
247 
callWebCore::PendingCallbacks::PendingCharactersCallback248         virtual void call(XMLDocumentParser* parser)
249         {
250             parser->characters(s, len);
251         }
252 
253         xmlChar* s;
254         int len;
255     };
256 
257     struct PendingProcessingInstructionCallback : public PendingCallback {
~PendingProcessingInstructionCallbackWebCore::PendingCallbacks::PendingProcessingInstructionCallback258         virtual ~PendingProcessingInstructionCallback()
259         {
260             xmlFree(target);
261             xmlFree(data);
262         }
263 
callWebCore::PendingCallbacks::PendingProcessingInstructionCallback264         virtual void call(XMLDocumentParser* parser)
265         {
266             parser->processingInstruction(target, data);
267         }
268 
269         xmlChar* target;
270         xmlChar* data;
271     };
272 
273     struct PendingCDATABlockCallback : public PendingCallback {
~PendingCDATABlockCallbackWebCore::PendingCallbacks::PendingCDATABlockCallback274         virtual ~PendingCDATABlockCallback()
275         {
276             xmlFree(s);
277         }
278 
callWebCore::PendingCallbacks::PendingCDATABlockCallback279         virtual void call(XMLDocumentParser* parser)
280         {
281             parser->cdataBlock(s, len);
282         }
283 
284         xmlChar* s;
285         int len;
286     };
287 
288     struct PendingCommentCallback : public PendingCallback {
~PendingCommentCallbackWebCore::PendingCallbacks::PendingCommentCallback289         virtual ~PendingCommentCallback()
290         {
291             xmlFree(s);
292         }
293 
callWebCore::PendingCallbacks::PendingCommentCallback294         virtual void call(XMLDocumentParser* parser)
295         {
296             parser->comment(s);
297         }
298 
299         xmlChar* s;
300     };
301 
302     struct PendingInternalSubsetCallback : public PendingCallback {
~PendingInternalSubsetCallbackWebCore::PendingCallbacks::PendingInternalSubsetCallback303         virtual ~PendingInternalSubsetCallback()
304         {
305             xmlFree(name);
306             xmlFree(externalID);
307             xmlFree(systemID);
308         }
309 
callWebCore::PendingCallbacks::PendingInternalSubsetCallback310         virtual void call(XMLDocumentParser* parser)
311         {
312             parser->internalSubset(name, externalID, systemID);
313         }
314 
315         xmlChar* name;
316         xmlChar* externalID;
317         xmlChar* systemID;
318     };
319 
320     struct PendingErrorCallback: public PendingCallback {
~PendingErrorCallbackWebCore::PendingCallbacks::PendingErrorCallback321         virtual ~PendingErrorCallback()
322         {
323             xmlFree(message);
324         }
325 
callWebCore::PendingCallbacks::PendingErrorCallback326         virtual void call(XMLDocumentParser* parser)
327         {
328             parser->handleError(type, reinterpret_cast<char*>(message), lineNumber, columnNumber);
329         }
330 
331         XMLDocumentParser::ErrorType type;
332         xmlChar* message;
333         int lineNumber;
334         int columnNumber;
335     };
336 
337     Deque<OwnPtr<PendingCallback> > m_callbacks;
338 };
339 // --------------------------------
340 
341 static int globalDescriptor = 0;
342 static ThreadIdentifier libxmlLoaderThread = 0;
343 
matchFunc(const char *)344 static int matchFunc(const char*)
345 {
346     // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
347     // interfering with client applications that also use libxml2.  http://bugs.webkit.org/show_bug.cgi?id=17353
348     return XMLDocumentParserScope::currentCachedResourceLoader && currentThread() == libxmlLoaderThread;
349 }
350 
351 class OffsetBuffer {
352 public:
OffsetBuffer(const Vector<char> & b)353     OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
354 
readOutBytes(char * outputBuffer,unsigned askedToRead)355     int readOutBytes(char* outputBuffer, unsigned askedToRead)
356     {
357         unsigned bytesLeft = m_buffer.size() - m_currentOffset;
358         unsigned lenToCopy = min(askedToRead, bytesLeft);
359         if (lenToCopy) {
360             memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
361             m_currentOffset += lenToCopy;
362         }
363         return lenToCopy;
364     }
365 
366 private:
367     Vector<char> m_buffer;
368     unsigned m_currentOffset;
369 };
370 
switchToUTF16(xmlParserCtxtPtr ctxt)371 static void switchToUTF16(xmlParserCtxtPtr ctxt)
372 {
373     // Hack around libxml2's lack of encoding overide support by manually
374     // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
375     // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
376     // and switch encodings, causing the parse to fail.
377     const UChar BOM = 0xFEFF;
378     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
379     xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
380 }
381 
shouldAllowExternalLoad(const KURL & url)382 static bool shouldAllowExternalLoad(const KURL& url)
383 {
384     String urlString = url.string();
385 
386     // On non-Windows platforms libxml asks for this URL, the
387     // "XML_XML_DEFAULT_CATALOG", on initialization.
388     if (urlString == "file:///etc/xml/catalog")
389         return false;
390 
391     // On Windows, libxml computes a URL relative to where its DLL resides.
392     if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
393         return false;
394 
395     // The most common DTD.  There isn't much point in hammering www.w3c.org
396     // by requesting this URL for every XHTML document.
397     if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
398         return false;
399 
400     // Similarly, there isn't much point in requesting the SVG DTD.
401     if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
402         return false;
403 
404     // The libxml doesn't give us a lot of context for deciding whether to
405     // allow this request.  In the worst case, this load could be for an
406     // external entity and the resulting document could simply read the
407     // retrieved content.  If we had more context, we could potentially allow
408     // the parser to load a DTD.  As things stand, we take the conservative
409     // route and allow same-origin requests only.
410     if (!XMLDocumentParserScope::currentCachedResourceLoader->document()->securityOrigin()->canRequest(url)) {
411         XMLDocumentParserScope::currentCachedResourceLoader->printAccessDeniedMessage(url);
412         return false;
413     }
414 
415     return true;
416 }
417 
openFunc(const char * uri)418 static void* openFunc(const char* uri)
419 {
420     ASSERT(XMLDocumentParserScope::currentCachedResourceLoader);
421     ASSERT(currentThread() == libxmlLoaderThread);
422 
423     KURL url(KURL(), uri);
424 
425     if (!shouldAllowExternalLoad(url))
426         return &globalDescriptor;
427 
428     ResourceError error;
429     ResourceResponse response;
430     Vector<char> data;
431 
432 
433     {
434         CachedResourceLoader* cachedResourceLoader = XMLDocumentParserScope::currentCachedResourceLoader;
435         XMLDocumentParserScope scope(0);
436         // FIXME: We should restore the original global error handler as well.
437 
438         if (cachedResourceLoader->frame())
439             cachedResourceLoader->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, error, response, data);
440     }
441 
442     // We have to check the URL again after the load to catch redirects.
443     // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
444     if (!shouldAllowExternalLoad(response.url()))
445         return &globalDescriptor;
446 
447     return new OffsetBuffer(data);
448 }
449 
readFunc(void * context,char * buffer,int len)450 static int readFunc(void* context, char* buffer, int len)
451 {
452     // Do 0-byte reads in case of a null descriptor
453     if (context == &globalDescriptor)
454         return 0;
455 
456     OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
457     return data->readOutBytes(buffer, len);
458 }
459 
writeFunc(void *,const char *,int)460 static int writeFunc(void*, const char*, int)
461 {
462     // Always just do 0-byte writes
463     return 0;
464 }
465 
closeFunc(void * context)466 static int closeFunc(void* context)
467 {
468     if (context != &globalDescriptor) {
469         OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
470         delete data;
471     }
472     return 0;
473 }
474 
475 #if ENABLE(XSLT)
errorFunc(void *,const char *,...)476 static void errorFunc(void*, const char*, ...)
477 {
478     // FIXME: It would be nice to display error messages somewhere.
479 }
480 #endif
481 
482 static bool didInit = false;
483 
createStringParser(xmlSAXHandlerPtr handlers,void * userData)484 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
485 {
486     if (!didInit) {
487         xmlInitParser();
488         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
489         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
490         libxmlLoaderThread = currentThread();
491         didInit = true;
492     }
493 
494     xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
495     parser->_private = userData;
496     parser->replaceEntities = true;
497     switchToUTF16(parser);
498 
499     return adoptRef(new XMLParserContext(parser));
500 }
501 
502 
503 // Chunk should be encoded in UTF-8
createMemoryParser(xmlSAXHandlerPtr handlers,void * userData,const CString & chunk)504 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
505 {
506     if (!didInit) {
507         xmlInitParser();
508         xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
509         xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
510         libxmlLoaderThread = currentThread();
511         didInit = true;
512     }
513 
514     // appendFragmentSource() checks that the length doesn't overflow an int.
515     xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
516 
517     if (!parser)
518         return 0;
519 
520     // Copy the sax handler
521     memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
522 
523     // Set parser options.
524     // XML_PARSE_NODICT: default dictionary option.
525     // XML_PARSE_NOENT: force entities substitutions.
526     xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
527 
528     // Internal initialization
529     parser->sax2 = 1;
530     parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
531     parser->depth = 0;
532     parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
533     parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
534     parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
535     parser->_private = userData;
536 
537     return adoptRef(new XMLParserContext(parser));
538 }
539 
540 // --------------------------------
541 
supportsXMLVersion(const String & version)542 bool XMLDocumentParser::supportsXMLVersion(const String& version)
543 {
544     return version == "1.0";
545 }
546 
XMLDocumentParser(Document * document,FrameView * frameView)547 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
548     : ScriptableDocumentParser(document)
549     , m_view(frameView)
550     , m_context(0)
551     , m_pendingCallbacks(PendingCallbacks::create())
552     , m_currentNode(document)
553     , m_sawError(false)
554     , m_sawCSS(false)
555     , m_sawXSLTransform(false)
556     , m_sawFirstElement(false)
557     , m_isXHTMLDocument(false)
558 #if ENABLE(XHTMLMP)
559     , m_isXHTMLMPDocument(false)
560     , m_hasDocTypeDeclaration(false)
561 #endif
562     , m_parserPaused(false)
563     , m_requestingScript(false)
564     , m_finishCalled(false)
565     , m_errorCount(0)
566     , m_lastErrorPosition(TextPosition1::belowRangePosition())
567     , m_pendingScript(0)
568     , m_scriptStartPosition(TextPosition1::belowRangePosition())
569     , m_parsingFragment(false)
570     , m_scriptingPermission(FragmentScriptingAllowed)
571 {
572 }
573 
XMLDocumentParser(DocumentFragment * fragment,Element * parentElement,FragmentScriptingPermission scriptingPermission)574 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, FragmentScriptingPermission scriptingPermission)
575     : ScriptableDocumentParser(fragment->document())
576     , m_view(0)
577     , m_context(0)
578     , m_pendingCallbacks(PendingCallbacks::create())
579     , m_currentNode(fragment)
580     , m_sawError(false)
581     , m_sawCSS(false)
582     , m_sawXSLTransform(false)
583     , m_sawFirstElement(false)
584     , m_isXHTMLDocument(false)
585 #if ENABLE(XHTMLMP)
586     , m_isXHTMLMPDocument(false)
587     , m_hasDocTypeDeclaration(false)
588 #endif
589     , m_parserPaused(false)
590     , m_requestingScript(false)
591     , m_finishCalled(false)
592     , m_errorCount(0)
593     , m_lastErrorPosition(TextPosition1::belowRangePosition())
594     , m_pendingScript(0)
595     , m_scriptStartPosition(TextPosition1::belowRangePosition())
596     , m_parsingFragment(true)
597     , m_scriptingPermission(scriptingPermission)
598 {
599     fragment->ref();
600 
601     // Add namespaces based on the parent node
602     Vector<Element*> elemStack;
603     while (parentElement) {
604         elemStack.append(parentElement);
605 
606         ContainerNode* n = parentElement->parentNode();
607         if (!n || !n->isElementNode())
608             break;
609         parentElement = static_cast<Element*>(n);
610     }
611 
612     if (elemStack.isEmpty())
613         return;
614 
615     for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) {
616         if (NamedNodeMap* attrs = element->attributes()) {
617             for (unsigned i = 0; i < attrs->length(); i++) {
618                 Attribute* attr = attrs->attributeItem(i);
619                 if (attr->localName() == xmlnsAtom)
620                     m_defaultNamespaceURI = attr->value();
621                 else if (attr->prefix() == xmlnsAtom)
622                     m_prefixToNamespaceMap.set(attr->localName(), attr->value());
623             }
624         }
625     }
626 
627     // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
628     if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
629         m_defaultNamespaceURI = parentElement->namespaceURI();
630 }
631 
~XMLParserContext()632 XMLParserContext::~XMLParserContext()
633 {
634     if (m_context->myDoc)
635         xmlFreeDoc(m_context->myDoc);
636     xmlFreeParserCtxt(m_context);
637 }
638 
~XMLDocumentParser()639 XMLDocumentParser::~XMLDocumentParser()
640 {
641     // The XMLDocumentParser will always be detached before being destroyed.
642     ASSERT(m_currentNodeStack.isEmpty());
643     ASSERT(!m_currentNode);
644 
645     // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
646     if (m_pendingScript)
647         m_pendingScript->removeClient(this);
648 }
649 
doWrite(const String & parseString)650 void XMLDocumentParser::doWrite(const String& parseString)
651 {
652     ASSERT(!isDetached());
653     if (!m_context)
654         initializeParserContext();
655 
656     // Protect the libxml context from deletion during a callback
657     RefPtr<XMLParserContext> context = m_context;
658 
659     // libXML throws an error if you try to switch the encoding for an empty string.
660     if (parseString.length()) {
661         // JavaScript may cause the parser to detach during xmlParseChunk
662         // keep this alive until this function is done.
663         RefPtr<XMLDocumentParser> protect(this);
664 
665         switchToUTF16(context->context());
666         XMLDocumentParserScope scope(document()->cachedResourceLoader());
667         xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
668 
669         // JavaScript (which may be run under the xmlParseChunk callstack) may
670         // cause the parser to be stopped or detached.
671         if (isStopped())
672             return;
673     }
674 
675     // FIXME: Why is this here?  And why is it after we process the passed source?
676     if (document()->decoder() && document()->decoder()->sawError()) {
677         // If the decoder saw an error, report it as fatal (stops parsing)
678         handleError(fatal, "Encoding error", context->context()->input->line, context->context()->input->col);
679     }
680 }
681 
toString(const xmlChar * string,size_t size)682 static inline String toString(const xmlChar* string, size_t size)
683 {
684     return String::fromUTF8(reinterpret_cast<const char*>(string), size);
685 }
686 
toString(const xmlChar * string)687 static inline String toString(const xmlChar* string)
688 {
689     return String::fromUTF8(reinterpret_cast<const char*>(string));
690 }
691 
toAtomicString(const xmlChar * string,size_t size)692 static inline AtomicString toAtomicString(const xmlChar* string, size_t size)
693 {
694     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size);
695 }
696 
toAtomicString(const xmlChar * string)697 static inline AtomicString toAtomicString(const xmlChar* string)
698 {
699     return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
700 }
701 
702 struct _xmlSAX2Namespace {
703     const xmlChar* prefix;
704     const xmlChar* uri;
705 };
706 typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
707 
handleElementNamespaces(Element * newElement,const xmlChar ** libxmlNamespaces,int nb_namespaces,ExceptionCode & ec,FragmentScriptingPermission scriptingPermission)708 static inline void handleElementNamespaces(Element* newElement, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
709 {
710     xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
711     for (int i = 0; i < nb_namespaces; i++) {
712         AtomicString namespaceQName = xmlnsAtom;
713         AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
714         if (namespaces[i].prefix)
715             namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
716         newElement->setAttributeNS(XMLNSNames::xmlnsNamespaceURI, namespaceQName, namespaceURI, ec, scriptingPermission);
717         if (ec) // exception setting attributes
718             return;
719     }
720 }
721 
722 struct _xmlSAX2Attributes {
723     const xmlChar* localname;
724     const xmlChar* prefix;
725     const xmlChar* uri;
726     const xmlChar* value;
727     const xmlChar* end;
728 };
729 typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
730 
handleElementAttributes(Element * newElement,const xmlChar ** libxmlAttributes,int nb_attributes,ExceptionCode & ec,FragmentScriptingPermission scriptingPermission)731 static inline void handleElementAttributes(Element* newElement, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec, FragmentScriptingPermission scriptingPermission)
732 {
733     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
734     for (int i = 0; i < nb_attributes; i++) {
735         int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
736         AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
737         String attrPrefix = toString(attributes[i].prefix);
738         AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
739         AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : AtomicString(attrPrefix + ":" + toString(attributes[i].localname));
740 
741         newElement->setAttributeNS(attrURI, attrQName, attrValue, ec, scriptingPermission);
742         if (ec) // exception setting attributes
743             return;
744     }
745 }
746 
startElementNs(const xmlChar * xmlLocalName,const xmlChar * xmlPrefix,const xmlChar * xmlURI,int nb_namespaces,const xmlChar ** libxmlNamespaces,int nb_attributes,int nb_defaulted,const xmlChar ** libxmlAttributes)747 void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
748                                   const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
749 {
750     if (isStopped())
751         return;
752 
753     if (m_parserPaused) {
754         m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces,
755                                                          nb_attributes, nb_defaulted, libxmlAttributes);
756         return;
757     }
758 
759 #if ENABLE(XHTMLMP)
760     // check if the DOCTYPE Declaration of XHTMLMP document exists
761     if (!m_hasDocTypeDeclaration && document()->isXHTMLMPDocument()) {
762         handleError(fatal, "DOCTYPE declaration lost.", lineNumber(), columnNumber());
763         return;
764     }
765 #endif
766 
767     exitText();
768 
769     AtomicString localName = toAtomicString(xmlLocalName);
770     AtomicString uri = toAtomicString(xmlURI);
771     AtomicString prefix = toAtomicString(xmlPrefix);
772 
773     if (m_parsingFragment && uri.isNull()) {
774         if (!prefix.isNull())
775             uri = m_prefixToNamespaceMap.get(prefix);
776         else
777             uri = m_defaultNamespaceURI;
778     }
779 
780 #if ENABLE(XHTMLMP)
781     if (!m_sawFirstElement && isXHTMLMPDocument()) {
782         // As per the section 7.1 of OMA-WAP-XHTMLMP-V1_1-20061020-A.pdf,
783         // we should make sure that the root element MUST be 'html' and
784         // ensure the name of the default namespace on the root elment 'html'
785         // MUST be 'http://www.w3.org/1999/xhtml'
786         if (localName != HTMLNames::htmlTag.localName()) {
787             handleError(fatal, "XHTMLMP document expects 'html' as root element.", lineNumber(), columnNumber());
788             return;
789         }
790 
791         if (uri.isNull()) {
792             m_defaultNamespaceURI = HTMLNames::xhtmlNamespaceURI;
793             uri = m_defaultNamespaceURI;
794         }
795     }
796 #endif
797 
798     bool isFirstElement = !m_sawFirstElement;
799     m_sawFirstElement = true;
800 
801     QualifiedName qName(prefix, localName, uri);
802     RefPtr<Element> newElement = document()->createElement(qName, true);
803     if (!newElement) {
804         stopParsing();
805         return;
806     }
807 
808     ExceptionCode ec = 0;
809     handleElementNamespaces(newElement.get(), libxmlNamespaces, nb_namespaces, ec, m_scriptingPermission);
810     if (ec) {
811         stopParsing();
812         return;
813     }
814 
815     handleElementAttributes(newElement.get(), libxmlAttributes, nb_attributes, ec, m_scriptingPermission);
816     if (ec) {
817         stopParsing();
818         return;
819     }
820 
821     newElement->beginParsingChildren();
822 
823     ScriptElement* scriptElement = toScriptElement(newElement.get());
824     if (scriptElement)
825         m_scriptStartPosition = textPositionOneBased();
826 
827     m_currentNode->deprecatedParserAddChild(newElement.get());
828 
829     pushCurrentNode(newElement.get());
830     if (m_view && !newElement->attached())
831         newElement->attach();
832 
833 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
834     if (newElement->hasTagName(HTMLNames::htmlTag))
835         static_cast<HTMLHtmlElement*>(newElement.get())->insertedByParser();
836 #endif
837 
838     if (!m_parsingFragment && isFirstElement && document()->frame())
839         document()->frame()->loader()->dispatchDocumentElementAvailable();
840 }
841 
endElementNs()842 void XMLDocumentParser::endElementNs()
843 {
844     if (isStopped())
845         return;
846 
847     if (m_parserPaused) {
848         m_pendingCallbacks->appendEndElementNSCallback();
849         return;
850     }
851 
852     // JavaScript can detach the parser.  Make sure this is not released
853     // before the end of this method.
854     RefPtr<XMLDocumentParser> protect(this);
855 
856     exitText();
857 
858     RefPtr<Node> n = m_currentNode;
859     n->finishParsingChildren();
860 
861     if (m_scriptingPermission == FragmentScriptingNotAllowed && n->isElementNode() && toScriptElement(static_cast<Element*>(n.get()))) {
862         popCurrentNode();
863         ExceptionCode ec;
864         n->remove(ec);
865         return;
866     }
867 
868     if (!n->isElementNode() || !m_view) {
869         popCurrentNode();
870         return;
871     }
872 
873     Element* element = static_cast<Element*>(n.get());
874 
875     // The element's parent may have already been removed from document.
876     // Parsing continues in this case, but scripts aren't executed.
877     if (!element->inDocument()) {
878         popCurrentNode();
879         return;
880     }
881 
882     ScriptElement* scriptElement = toScriptElement(element);
883     if (!scriptElement) {
884         popCurrentNode();
885         return;
886     }
887 
888     // Don't load external scripts for standalone documents (for now).
889     ASSERT(!m_pendingScript);
890     m_requestingScript = true;
891 
892     bool successfullyPrepared = scriptElement->prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute);
893     if (!successfullyPrepared) {
894 #if ENABLE(XHTMLMP)
895         if (!scriptElement->isScriptTypeSupported(ScriptElement::AllowLegacyTypeInTypeAttribute))
896             document()->setShouldProcessNoscriptElement(true);
897 #endif
898     } else {
899         // FIXME: Script execution should be shared between
900         // the libxml2 and Qt XMLDocumentParser implementations.
901 
902         if (scriptElement->readyToBeParserExecuted())
903             scriptElement->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartPosition));
904         else if (scriptElement->willBeParserExecuted()) {
905             m_pendingScript = scriptElement->cachedScript();
906             m_scriptElement = element;
907             m_pendingScript->addClient(this);
908 
909             // m_pendingScript will be 0 if script was already loaded and addClient() executed it.
910             if (m_pendingScript)
911                 pauseParsing();
912         } else
913             m_scriptElement = 0;
914 
915         // JavaScript may have detached the parser
916         if (isDetached())
917             return;
918     }
919     m_requestingScript = false;
920     popCurrentNode();
921 }
922 
characters(const xmlChar * s,int len)923 void XMLDocumentParser::characters(const xmlChar* s, int len)
924 {
925     if (isStopped())
926         return;
927 
928     if (m_parserPaused) {
929         m_pendingCallbacks->appendCharactersCallback(s, len);
930         return;
931     }
932 
933     if (!m_currentNode->isTextNode())
934         enterText();
935     m_bufferedText.append(s, len);
936 }
937 
error(ErrorType type,const char * message,va_list args)938 void XMLDocumentParser::error(ErrorType type, const char* message, va_list args)
939 {
940     if (isStopped())
941         return;
942 
943 #if COMPILER(MSVC) || COMPILER(RVCT) || COMPILER(MINGW)
944     char m[1024];
945     vsnprintf(m, sizeof(m) - 1, message, args);
946 #else
947     char* m;
948     if (vasprintf(&m, message, args) == -1)
949         return;
950 #endif
951 
952     if (m_parserPaused)
953         m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(m), lineNumber(), columnNumber());
954     else
955         handleError(type, m, lineNumber(), columnNumber());
956 
957 #if !COMPILER(MSVC) && !COMPILER(RVCT) && !COMPILER(MINGW)
958     free(m);
959 #endif
960 }
961 
processingInstruction(const xmlChar * target,const xmlChar * data)962 void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data)
963 {
964     if (isStopped())
965         return;
966 
967     if (m_parserPaused) {
968         m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
969         return;
970     }
971 
972     exitText();
973 
974     // ### handle exceptions
975     ExceptionCode ec = 0;
976     RefPtr<ProcessingInstruction> pi = document()->createProcessingInstruction(
977         toString(target), toString(data), ec);
978     if (ec)
979         return;
980 
981     pi->setCreatedByParser(true);
982 
983     m_currentNode->deprecatedParserAddChild(pi.get());
984     if (m_view && !pi->attached())
985         pi->attach();
986 
987     pi->finishParsingChildren();
988 
989     if (pi->isCSS())
990         m_sawCSS = true;
991 #if ENABLE(XSLT)
992     m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
993     if (m_sawXSLTransform && !document()->transformSourceDocument())
994         stopParsing();
995 #endif
996 }
997 
cdataBlock(const xmlChar * s,int len)998 void XMLDocumentParser::cdataBlock(const xmlChar* s, int len)
999 {
1000     if (isStopped())
1001         return;
1002 
1003     if (m_parserPaused) {
1004         m_pendingCallbacks->appendCDATABlockCallback(s, len);
1005         return;
1006     }
1007 
1008     exitText();
1009 
1010     RefPtr<Node> newNode = CDATASection::create(document(), toString(s, len));
1011     m_currentNode->deprecatedParserAddChild(newNode.get());
1012     if (m_view && !newNode->attached())
1013         newNode->attach();
1014 }
1015 
comment(const xmlChar * s)1016 void XMLDocumentParser::comment(const xmlChar* s)
1017 {
1018     if (isStopped())
1019         return;
1020 
1021     if (m_parserPaused) {
1022         m_pendingCallbacks->appendCommentCallback(s);
1023         return;
1024     }
1025 
1026     exitText();
1027 
1028     RefPtr<Node> newNode = Comment::create(document(), toString(s));
1029     m_currentNode->deprecatedParserAddChild(newNode.get());
1030     if (m_view && !newNode->attached())
1031         newNode->attach();
1032 }
1033 
startDocument(const xmlChar * version,const xmlChar * encoding,int standalone)1034 void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
1035 {
1036     ExceptionCode ec = 0;
1037 
1038     if (version)
1039         document()->setXMLVersion(toString(version), ec);
1040     document()->setXMLStandalone(standalone == 1, ec); // possible values are 0, 1, and -1
1041     if (encoding)
1042         document()->setXMLEncoding(toString(encoding));
1043 }
1044 
endDocument()1045 void XMLDocumentParser::endDocument()
1046 {
1047     exitText();
1048 #if ENABLE(XHTMLMP)
1049     m_hasDocTypeDeclaration = false;
1050 #endif
1051 }
1052 
internalSubset(const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)1053 void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1054 {
1055     if (isStopped())
1056         return;
1057 
1058     if (m_parserPaused) {
1059         m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
1060         return;
1061     }
1062 
1063     if (document()) {
1064 #if ENABLE(XHTMLMP)
1065         String extId = toString(externalID);
1066         String dtdName = toString(name);
1067         if (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
1068             || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN") {
1069             if (dtdName != HTMLNames::htmlTag.localName()) {
1070                 handleError(fatal, "Invalid DOCTYPE declaration, expected 'html' as root element.", lineNumber(), columnNumber());
1071                 return;
1072             }
1073 
1074             if (document()->isXHTMLMPDocument())
1075                 setIsXHTMLMPDocument(true);
1076             else
1077                 setIsXHTMLDocument(true);
1078 
1079             m_hasDocTypeDeclaration = true;
1080         }
1081 #endif
1082 
1083         document()->parserAddChild(DocumentType::create(document(), toString(name), toString(externalID), toString(systemID)));
1084     }
1085 }
1086 
getParser(void * closure)1087 static inline XMLDocumentParser* getParser(void* closure)
1088 {
1089     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1090     return static_cast<XMLDocumentParser*>(ctxt->_private);
1091 }
1092 
1093 // This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
1094 // Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
hackAroundLibXMLEntityBug(void * closure)1095 static inline bool hackAroundLibXMLEntityBug(void* closure)
1096 {
1097 #if LIBXML_VERSION >= 20627
1098     UNUSED_PARAM(closure);
1099 
1100     // This bug has been fixed in libxml 2.6.27.
1101     return false;
1102 #else
1103     return static_cast<xmlParserCtxtPtr>(closure)->node;
1104 #endif
1105 }
1106 
startElementNsHandler(void * closure,const xmlChar * localname,const xmlChar * prefix,const xmlChar * uri,int nb_namespaces,const xmlChar ** namespaces,int nb_attributes,int nb_defaulted,const xmlChar ** libxmlAttributes)1107 static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
1108 {
1109     if (hackAroundLibXMLEntityBug(closure))
1110         return;
1111 
1112     getParser(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes);
1113 }
1114 
endElementNsHandler(void * closure,const xmlChar *,const xmlChar *,const xmlChar *)1115 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1116 {
1117     if (hackAroundLibXMLEntityBug(closure))
1118         return;
1119 
1120     getParser(closure)->endElementNs();
1121 }
1122 
charactersHandler(void * closure,const xmlChar * s,int len)1123 static void charactersHandler(void* closure, const xmlChar* s, int len)
1124 {
1125     if (hackAroundLibXMLEntityBug(closure))
1126         return;
1127 
1128     getParser(closure)->characters(s, len);
1129 }
1130 
processingInstructionHandler(void * closure,const xmlChar * target,const xmlChar * data)1131 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1132 {
1133     if (hackAroundLibXMLEntityBug(closure))
1134         return;
1135 
1136     getParser(closure)->processingInstruction(target, data);
1137 }
1138 
cdataBlockHandler(void * closure,const xmlChar * s,int len)1139 static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
1140 {
1141     if (hackAroundLibXMLEntityBug(closure))
1142         return;
1143 
1144     getParser(closure)->cdataBlock(s, len);
1145 }
1146 
commentHandler(void * closure,const xmlChar * comment)1147 static void commentHandler(void* closure, const xmlChar* comment)
1148 {
1149     if (hackAroundLibXMLEntityBug(closure))
1150         return;
1151 
1152     getParser(closure)->comment(comment);
1153 }
1154 
1155 WTF_ATTRIBUTE_PRINTF(2, 3)
warningHandler(void * closure,const char * message,...)1156 static void warningHandler(void* closure, const char* message, ...)
1157 {
1158     va_list args;
1159     va_start(args, message);
1160     getParser(closure)->error(XMLDocumentParser::warning, message, args);
1161     va_end(args);
1162 }
1163 
1164 WTF_ATTRIBUTE_PRINTF(2, 3)
fatalErrorHandler(void * closure,const char * message,...)1165 static void fatalErrorHandler(void* closure, const char* message, ...)
1166 {
1167     va_list args;
1168     va_start(args, message);
1169     getParser(closure)->error(XMLDocumentParser::fatal, message, args);
1170     va_end(args);
1171 }
1172 
1173 WTF_ATTRIBUTE_PRINTF(2, 3)
normalErrorHandler(void * closure,const char * message,...)1174 static void normalErrorHandler(void* closure, const char* message, ...)
1175 {
1176     va_list args;
1177     va_start(args, message);
1178     getParser(closure)->error(XMLDocumentParser::nonFatal, message, args);
1179     va_end(args);
1180 }
1181 
1182 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
1183 // a hack to avoid malloc/free. Using a global variable like this could cause trouble
1184 // if libxml implementation details were to change
1185 static xmlChar sharedXHTMLEntityResult[5] = {0, 0, 0, 0, 0};
1186 
sharedXHTMLEntity()1187 static xmlEntityPtr sharedXHTMLEntity()
1188 {
1189     static xmlEntity entity;
1190     if (!entity.type) {
1191         entity.type = XML_ENTITY_DECL;
1192         entity.orig = sharedXHTMLEntityResult;
1193         entity.content = sharedXHTMLEntityResult;
1194         entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1195     }
1196     return &entity;
1197 }
1198 
getXHTMLEntity(const xmlChar * name)1199 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1200 {
1201     UChar c = decodeNamedEntity(reinterpret_cast<const char*>(name));
1202     if (!c)
1203         return 0;
1204 
1205     CString value = String(&c, 1).utf8();
1206     ASSERT(value.length() < 5);
1207     xmlEntityPtr entity = sharedXHTMLEntity();
1208     entity->length = value.length();
1209     entity->name = name;
1210     memcpy(sharedXHTMLEntityResult, value.data(), entity->length + 1);
1211 
1212     return entity;
1213 }
1214 
getEntityHandler(void * closure,const xmlChar * name)1215 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1216 {
1217     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1218     xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1219     if (ent) {
1220         ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1221         return ent;
1222     }
1223 
1224     ent = xmlGetDocEntity(ctxt->myDoc, name);
1225     if (!ent && (getParser(closure)->isXHTMLDocument()
1226 #if ENABLE(XHTMLMP)
1227                  || getParser(closure)->isXHTMLMPDocument()
1228 #endif
1229        )) {
1230         ent = getXHTMLEntity(name);
1231         if (ent)
1232             ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1233     }
1234 
1235     return ent;
1236 }
1237 
startDocumentHandler(void * closure)1238 static void startDocumentHandler(void* closure)
1239 {
1240     xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1241     switchToUTF16(ctxt);
1242     getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
1243     xmlSAX2StartDocument(closure);
1244 }
1245 
endDocumentHandler(void * closure)1246 static void endDocumentHandler(void* closure)
1247 {
1248     getParser(closure)->endDocument();
1249     xmlSAX2EndDocument(closure);
1250 }
1251 
internalSubsetHandler(void * closure,const xmlChar * name,const xmlChar * externalID,const xmlChar * systemID)1252 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1253 {
1254     getParser(closure)->internalSubset(name, externalID, systemID);
1255     xmlSAX2InternalSubset(closure, name, externalID, systemID);
1256 }
1257 
externalSubsetHandler(void * closure,const xmlChar *,const xmlChar * externalId,const xmlChar *)1258 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1259 {
1260     String extId = toString(externalId);
1261     if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
1262         || (extId == "-//W3C//DTD XHTML 1.1//EN")
1263         || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
1264         || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
1265         || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
1266         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
1267         || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
1268         || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
1269        )
1270         getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
1271 }
1272 
ignorableWhitespaceHandler(void *,const xmlChar *,int)1273 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1274 {
1275     // nothing to do, but we need this to work around a crasher
1276     // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1277     // http://bugs.webkit.org/show_bug.cgi?id=5792
1278 }
1279 
initializeParserContext(const CString & chunk)1280 void XMLDocumentParser::initializeParserContext(const CString& chunk)
1281 {
1282     xmlSAXHandler sax;
1283     memset(&sax, 0, sizeof(sax));
1284 
1285     sax.error = normalErrorHandler;
1286     sax.fatalError = fatalErrorHandler;
1287     sax.characters = charactersHandler;
1288     sax.processingInstruction = processingInstructionHandler;
1289     sax.cdataBlock = cdataBlockHandler;
1290     sax.comment = commentHandler;
1291     sax.warning = warningHandler;
1292     sax.startElementNs = startElementNsHandler;
1293     sax.endElementNs = endElementNsHandler;
1294     sax.getEntity = getEntityHandler;
1295     sax.startDocument = startDocumentHandler;
1296     sax.endDocument = endDocumentHandler;
1297     sax.internalSubset = internalSubsetHandler;
1298     sax.externalSubset = externalSubsetHandler;
1299     sax.ignorableWhitespace = ignorableWhitespaceHandler;
1300     sax.entityDecl = xmlSAX2EntityDecl;
1301     sax.initialized = XML_SAX2_MAGIC;
1302     DocumentParser::startParsing();
1303     m_sawError = false;
1304     m_sawCSS = false;
1305     m_sawXSLTransform = false;
1306     m_sawFirstElement = false;
1307 
1308     XMLDocumentParserScope scope(document()->cachedResourceLoader());
1309     if (m_parsingFragment)
1310         m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1311     else {
1312         ASSERT(!chunk.data());
1313         m_context = XMLParserContext::createStringParser(&sax, this);
1314     }
1315 }
1316 
doEnd()1317 void XMLDocumentParser::doEnd()
1318 {
1319     if (!isStopped()) {
1320         if (m_context) {
1321             // Tell libxml we're done.
1322             {
1323                 XMLDocumentParserScope scope(document()->cachedResourceLoader());
1324                 xmlParseChunk(context(), 0, 0, 1);
1325             }
1326 
1327             m_context = 0;
1328         }
1329     }
1330 
1331 #if ENABLE(XSLT)
1332     XMLTreeViewer xmlTreeViewer(document());
1333     bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && xmlTreeViewer.hasNoStyleInformation();
1334     if (xmlViewerMode)
1335         xmlTreeViewer.transformDocumentToTreeView();
1336 
1337     if (m_sawXSLTransform) {
1338         void* doc = xmlDocPtrForString(document()->cachedResourceLoader(), m_originalSourceForTransform, document()->url().string());
1339         document()->setTransformSource(adoptPtr(new TransformSource(doc)));
1340 
1341         document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
1342         document()->styleSelectorChanged(RecalcStyleImmediately);
1343         document()->setParsing(true);
1344 
1345         DocumentParser::stopParsing();
1346     }
1347 #endif
1348 }
1349 
1350 #if ENABLE(XSLT)
xmlDocPtrForString(CachedResourceLoader * cachedResourceLoader,const String & source,const String & url)1351 void* xmlDocPtrForString(CachedResourceLoader* cachedResourceLoader, const String& source, const String& url)
1352 {
1353     if (source.isEmpty())
1354         return 0;
1355 
1356     // Parse in a single chunk into an xmlDocPtr
1357     // FIXME: Hook up error handlers so that a failure to parse the main document results in
1358     // good error messages.
1359     const UChar BOM = 0xFEFF;
1360     const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
1361 
1362     XMLDocumentParserScope scope(cachedResourceLoader, errorFunc, 0);
1363     xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()),
1364                                         source.length() * sizeof(UChar),
1365                                         url.latin1().data(),
1366                                         BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE",
1367                                         XSLT_PARSE_OPTIONS);
1368     return sourceDoc;
1369 }
1370 #endif
1371 
lineNumber() const1372 int XMLDocumentParser::lineNumber() const
1373 {
1374     // FIXME: The implementation probably returns 1-based int, but method should return 0-based.
1375     return context() ? context()->input->line : 1;
1376 }
1377 
columnNumber() const1378 int XMLDocumentParser::columnNumber() const
1379 {
1380     // FIXME: The implementation probably returns 1-based int, but method should return 0-based.
1381     return context() ? context()->input->col : 1;
1382 }
1383 
textPosition() const1384 TextPosition0 XMLDocumentParser::textPosition() const
1385 {
1386     xmlParserCtxtPtr context = this->context();
1387     if (!context)
1388         return TextPosition0::minimumPosition();
1389     // FIXME: The context probably contains 1-based numbers, but we treat them as 0-based,
1390     //        to be consistent with fixme's in lineNumber() and columnNumber
1391     //        methods.
1392     return TextPosition0(WTF::ZeroBasedNumber::fromZeroBasedInt(context->input->line),
1393         WTF::ZeroBasedNumber::fromZeroBasedInt(context->input->col));
1394 }
1395 
1396 // This method has a correct implementation, in contrast to textPosition() method.
1397 // It should replace textPosition().
textPositionOneBased() const1398 TextPosition1 XMLDocumentParser::textPositionOneBased() const
1399 {
1400     xmlParserCtxtPtr context = this->context();
1401     if (!context)
1402         return TextPosition1::minimumPosition();
1403     return TextPosition1(WTF::OneBasedNumber::fromOneBasedInt(context->input->line),
1404         WTF::OneBasedNumber::fromOneBasedInt(context->input->col));
1405 }
1406 
stopParsing()1407 void XMLDocumentParser::stopParsing()
1408 {
1409     DocumentParser::stopParsing();
1410     if (context())
1411         xmlStopParser(context());
1412 }
1413 
resumeParsing()1414 void XMLDocumentParser::resumeParsing()
1415 {
1416     ASSERT(!isDetached());
1417     ASSERT(m_parserPaused);
1418 
1419     m_parserPaused = false;
1420 
1421     // First, execute any pending callbacks
1422     while (!m_pendingCallbacks->isEmpty()) {
1423         m_pendingCallbacks->callAndRemoveFirstCallback(this);
1424 
1425         // A callback paused the parser
1426         if (m_parserPaused)
1427             return;
1428     }
1429 
1430     // Then, write any pending data
1431     SegmentedString rest = m_pendingSrc;
1432     m_pendingSrc.clear();
1433     append(rest);
1434 
1435     // Finally, if finish() has been called and write() didn't result
1436     // in any further callbacks being queued, call end()
1437     if (m_finishCalled && m_pendingCallbacks->isEmpty())
1438         end();
1439 }
1440 
appendFragmentSource(const String & chunk)1441 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1442 {
1443     ASSERT(!m_context);
1444     ASSERT(m_parsingFragment);
1445 
1446     CString chunkAsUtf8 = chunk.utf8();
1447 
1448     // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB.
1449     if (chunkAsUtf8.length() > INT_MAX)
1450         return false;
1451 
1452     initializeParserContext(chunkAsUtf8);
1453     xmlParseContent(context());
1454     endDocument(); // Close any open text nodes.
1455 
1456     // FIXME: If this code is actually needed, it should probably move to finish()
1457     // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
1458     // Check if all the chunk has been processed.
1459     long bytesProcessed = xmlByteConsumed(context());
1460     if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
1461         // FIXME: I don't believe we can hit this case without also having seen an error or a null byte.
1462         // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
1463         ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1464         return false;
1465     }
1466 
1467     // No error if the chunk is well formed or it is not but we have no error.
1468     return context()->wellFormed || !xmlCtxtGetLastError(context());
1469 }
1470 
1471 // --------------------------------
1472 
1473 struct AttributeParseState {
1474     HashMap<String, String> attributes;
1475     bool gotAttributes;
1476 };
1477 
attributesStartElementNsHandler(void * closure,const xmlChar * xmlLocalName,const xmlChar *,const xmlChar *,int,const xmlChar **,int nb_attributes,int,const xmlChar ** libxmlAttributes)1478 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1479                                             const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/,
1480                                             int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes)
1481 {
1482     if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1483         return;
1484 
1485     xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1486     AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1487 
1488     state->gotAttributes = true;
1489 
1490     xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1491     for (int i = 0; i < nb_attributes; i++) {
1492         String attrLocalName = toString(attributes[i].localname);
1493         int valueLength = (int) (attributes[i].end - attributes[i].value);
1494         String attrValue = toString(attributes[i].value, valueLength);
1495         String attrPrefix = toString(attributes[i].prefix);
1496         String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1497 
1498         state->attributes.set(attrQName, attrValue);
1499     }
1500 }
1501 
parseAttributes(const String & string,bool & attrsOK)1502 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1503 {
1504     AttributeParseState state;
1505     state.gotAttributes = false;
1506 
1507     xmlSAXHandler sax;
1508     memset(&sax, 0, sizeof(sax));
1509     sax.startElementNs = attributesStartElementNsHandler;
1510     sax.initialized = XML_SAX2_MAGIC;
1511     RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1512     String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1513     xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
1514     attrsOK = state.gotAttributes;
1515     return state.attributes;
1516 }
1517 
1518 }
1519