1 /*
2  * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3  * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved.
4  * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
5  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
6  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Library General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Library General Public License for more details.
17  *
18  * You should have received a copy of the GNU Library General Public License
19  * along with this library; see the file COPYING.LIB.  If not, write to
20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  *
23  */
24 
25 #ifndef XMLDocumentParser_h
26 #define XMLDocumentParser_h
27 
28 #include "CachedResourceClient.h"
29 #include "CachedResourceHandle.h"
30 #include "FragmentScriptingPermission.h"
31 #include "ScriptableDocumentParser.h"
32 #include "SegmentedString.h"
33 #include <wtf/HashMap.h>
34 #include <wtf/OwnPtr.h>
35 #include <wtf/text/CString.h>
36 #include <wtf/text/StringHash.h>
37 
38 #if USE(QXMLSTREAM)
39 #include <qxmlstream.h>
40 #else
41 #include <libxml/tree.h>
42 #include <libxml/xmlstring.h>
43 #endif
44 
45 namespace WebCore {
46 
47     class Node;
48     class CachedScript;
49     class CachedResourceLoader;
50     class DocumentFragment;
51     class Document;
52     class Element;
53     class FrameView;
54     class PendingCallbacks;
55     class ScriptElement;
56 
57 #if !USE(QXMLSTREAM)
58     class XMLParserContext : public RefCounted<XMLParserContext> {
59     public:
60         static PassRefPtr<XMLParserContext> createMemoryParser(xmlSAXHandlerPtr, void* userData, const CString& chunk);
61         static PassRefPtr<XMLParserContext> createStringParser(xmlSAXHandlerPtr, void* userData);
62         ~XMLParserContext();
context()63         xmlParserCtxtPtr context() const { return m_context; }
64 
65     private:
XMLParserContext(xmlParserCtxtPtr context)66         XMLParserContext(xmlParserCtxtPtr context)
67             : m_context(context)
68         {
69         }
70         xmlParserCtxtPtr m_context;
71     };
72 #endif
73 
74     class XMLDocumentParser : public ScriptableDocumentParser, public CachedResourceClient {
75         WTF_MAKE_FAST_ALLOCATED;
76     public:
create(Document * document,FrameView * view)77         static PassRefPtr<XMLDocumentParser> create(Document* document, FrameView* view)
78         {
79             return adoptRef(new XMLDocumentParser(document, view));
80         }
create(DocumentFragment * fragment,Element * element,FragmentScriptingPermission permission)81         static PassRefPtr<XMLDocumentParser> create(DocumentFragment* fragment, Element* element, FragmentScriptingPermission permission)
82         {
83             return adoptRef(new XMLDocumentParser(fragment, element, permission));
84         }
85 
86         ~XMLDocumentParser();
87 
88         // Exposed for callbacks:
89         enum ErrorType { warning, nonFatal, fatal };
90         void handleError(ErrorType, const char* message, int lineNumber, int columnNumber);
91         void handleError(ErrorType, const char* message, TextPosition1);
92 
setIsXHTMLDocument(bool isXHTML)93         void setIsXHTMLDocument(bool isXHTML) { m_isXHTMLDocument = isXHTML; }
isXHTMLDocument()94         bool isXHTMLDocument() const { return m_isXHTMLDocument; }
95 #if ENABLE(XHTMLMP)
setIsXHTMLMPDocument(bool isXHTML)96         void setIsXHTMLMPDocument(bool isXHTML) { m_isXHTMLMPDocument = isXHTML; }
isXHTMLMPDocument()97         bool isXHTMLMPDocument() const { return m_isXHTMLMPDocument; }
98 #endif
99 
100         static bool parseDocumentFragment(const String&, DocumentFragment*, Element* parent = 0, FragmentScriptingPermission = FragmentScriptingAllowed);
101 
102         // FIXME: This function used to be used by WML. Can we remove it?
wellFormed()103         virtual bool wellFormed() const { return !m_sawError; }
104 
105         TextPosition0 textPosition() const;
106         TextPosition1 textPositionOneBased() const;
107 
108         static bool supportsXMLVersion(const String&);
109 
110     private:
111         XMLDocumentParser(Document*, FrameView* = 0);
112         XMLDocumentParser(DocumentFragment*, Element*, FragmentScriptingPermission);
113 
114         // From DocumentParser
115         virtual void insert(const SegmentedString&);
116         virtual void append(const SegmentedString&);
117         virtual void finish();
118         virtual bool finishWasCalled();
119         virtual bool isWaitingForScripts() const;
120         virtual void stopParsing();
121         virtual void detach();
122         virtual int lineNumber() const;
123         int columnNumber() const;
124 
125         // from CachedResourceClient
126         virtual void notifyFinished(CachedResource*);
127 
128         void end();
129 
130         void pauseParsing();
131         void resumeParsing();
132 
133         bool appendFragmentSource(const String&);
134 
135 #if USE(QXMLSTREAM)
136 private:
137         void parse();
138         void startDocument();
139         void parseStartElement();
140         void parseEndElement();
141         void parseCharacters();
142         void parseProcessingInstruction();
143         void parseCdata();
144         void parseComment();
145         void endDocument();
146         void parseDtd();
147         bool hasError() const;
148 #else
149 public:
150         // callbacks from parser SAX
151         void error(ErrorType, const char* message, va_list args) WTF_ATTRIBUTE_PRINTF(3, 0);
152         void startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
153                             const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes);
154         void endElementNs();
155         void characters(const xmlChar* s, int len);
156         void processingInstruction(const xmlChar* target, const xmlChar* data);
157         void cdataBlock(const xmlChar* s, int len);
158         void comment(const xmlChar* s);
159         void startDocument(const xmlChar* version, const xmlChar* encoding, int standalone);
160         void internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID);
161         void endDocument();
162 #endif
163     private:
164         void initializeParserContext(const CString& chunk = CString());
165 
166         void pushCurrentNode(Node*);
167         void popCurrentNode();
168         void clearCurrentNodeStack();
169 
170         void insertErrorMessageBlock();
171 
172         void enterText();
173         void exitText();
174 
175         void doWrite(const String&);
176         void doEnd();
177 
178         FrameView* m_view;
179 
180         String m_originalSourceForTransform;
181 
182 #if USE(QXMLSTREAM)
183         QXmlStreamReader m_stream;
184         bool m_wroteText;
185 #else
context()186         xmlParserCtxtPtr context() const { return m_context ? m_context->context() : 0; };
187         RefPtr<XMLParserContext> m_context;
188         OwnPtr<PendingCallbacks> m_pendingCallbacks;
189         Vector<xmlChar> m_bufferedText;
190 #endif
191         Node* m_currentNode;
192         Vector<Node*> m_currentNodeStack;
193 
194         bool m_sawError;
195         bool m_sawCSS;
196         bool m_sawXSLTransform;
197         bool m_sawFirstElement;
198         bool m_isXHTMLDocument;
199 #if ENABLE(XHTMLMP)
200         bool m_isXHTMLMPDocument;
201         bool m_hasDocTypeDeclaration;
202 #endif
203 
204         bool m_parserPaused;
205         bool m_requestingScript;
206         bool m_finishCalled;
207 
208         int m_errorCount;
209         TextPosition1 m_lastErrorPosition;
210         String m_errorMessages;
211 
212         CachedResourceHandle<CachedScript> m_pendingScript;
213         RefPtr<Element> m_scriptElement;
214         TextPosition1 m_scriptStartPosition;
215 
216         bool m_parsingFragment;
217         AtomicString m_defaultNamespaceURI;
218 
219         typedef HashMap<AtomicString, AtomicString> PrefixForNamespaceMap;
220         PrefixForNamespaceMap m_prefixToNamespaceMap;
221         SegmentedString m_pendingSrc;
222         FragmentScriptingPermission m_scriptingPermission;
223     };
224 
225 #if ENABLE(XSLT)
226 void* xmlDocPtrForString(CachedResourceLoader*, const String& source, const String& url);
227 #endif
228 
229 HashMap<String, String> parseAttributes(const String&, bool& attrsOK);
230 
231 } // namespace WebCore
232 
233 #endif // XMLDocumentParser_h
234