1 /*
2  * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3  * Copyright (C) 2005, 2006, 2007 Apple Inc. All rights reserved.
4  * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
5  * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
6  * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved.
7  * (http://www.torchmobile.com/)
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Library General Public
11  * License as published by the Free Software Foundation; either
12  * version 2 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Library General Public License for more details.
18  *
19  * You should have received a copy of the GNU Library General Public License
20  * along with this library; see the file COPYING.LIB.  If not, write to
21  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22  * Boston, MA 02110-1301, USA.
23  *
24  */
25 
26 #ifndef THIRD_PARTY_BLINK_RENDERER_CORE_XML_PARSER_XML_DOCUMENT_PARSER_H_
27 #define THIRD_PARTY_BLINK_RENDERER_CORE_XML_PARSER_XML_DOCUMENT_PARSER_H_
28 
29 #include <libxml/tree.h>
30 #include <memory>
31 #include "third_party/blink/renderer/core/dom/parser_content_policy.h"
32 #include "third_party/blink/renderer/core/dom/scriptable_document_parser.h"
33 #include "third_party/blink/renderer/core/script/xml_parser_script_runner.h"
34 #include "third_party/blink/renderer/core/script/xml_parser_script_runner_host.h"
35 #include "third_party/blink/renderer/core/xml/parser/xml_errors.h"
36 #include "third_party/blink/renderer/platform/heap/handle.h"
37 #include "third_party/blink/renderer/platform/loader/fetch/resource_client.h"
38 #include "third_party/blink/renderer/platform/text/segmented_string.h"
39 #include "third_party/blink/renderer/platform/wtf/allocator/allocator.h"
40 #include "third_party/blink/renderer/platform/wtf/hash_map.h"
41 #include "third_party/blink/renderer/platform/wtf/ref_counted.h"
42 #include "third_party/blink/renderer/platform/wtf/text/string_hash.h"
43 
44 namespace blink {
45 
46 class ContainerNode;
47 class Document;
48 class DocumentFragment;
49 class Element;
50 class LocalFrameView;
51 class Text;
52 
53 class XMLParserContext : public RefCounted<XMLParserContext> {
54   USING_FAST_MALLOC(XMLParserContext);
55 
56  public:
57   static scoped_refptr<XMLParserContext> CreateMemoryParser(
58       xmlSAXHandlerPtr,
59       void* user_data,
60       const std::string& chunk);
61   static scoped_refptr<XMLParserContext> CreateStringParser(xmlSAXHandlerPtr,
62                                                             void* user_data);
63   ~XMLParserContext();
Context()64   xmlParserCtxtPtr Context() const { return context_; }
65 
66  private:
XMLParserContext(xmlParserCtxtPtr context)67   XMLParserContext(xmlParserCtxtPtr context) : context_(context) {}
68 
69   xmlParserCtxtPtr context_;
70 };
71 
72 class XMLDocumentParser final : public ScriptableDocumentParser,
73                                 public XMLParserScriptRunnerHost {
74   USING_GARBAGE_COLLECTED_MIXIN(XMLDocumentParser);
75 
76  public:
77   explicit XMLDocumentParser(Document&, LocalFrameView* = nullptr);
78   XMLDocumentParser(DocumentFragment*, Element*, ParserContentPolicy);
79   ~XMLDocumentParser() override;
80   void Trace(Visitor*) override;
81 
82   // Exposed for callbacks:
83   void HandleError(XMLErrors::ErrorType, const char* message, TextPosition);
84 
SetIsXHTMLDocument(bool is_xhtml)85   void SetIsXHTMLDocument(bool is_xhtml) { is_xhtml_document_ = is_xhtml; }
IsXHTMLDocument()86   bool IsXHTMLDocument() const { return is_xhtml_document_; }
87 
IsCurrentlyParsing8BitChunk()88   bool IsCurrentlyParsing8BitChunk() {
89     return is_currently_parsing8_bit_chunk_;
90   }
91 
92   static bool ParseDocumentFragment(
93       const String&,
94       DocumentFragment*,
95       Element* parent = nullptr,
96       ParserContentPolicy = kAllowScriptingContent);
97 
98   // Used by the XMLHttpRequest to check if the responseXML was well formed.
WellFormed()99   bool WellFormed() const override { return !saw_error_; }
100 
101   TextPosition GetTextPosition() const override;
102 
103   static bool SupportsXMLVersion(const String&);
104 
105   class PendingCallback {
106     USING_FAST_MALLOC(PendingCallback);
107 
108    public:
109     virtual ~PendingCallback() = default;
110     virtual void Call(XMLDocumentParser*) = 0;
111 
GetTextPosition()112     TextPosition GetTextPosition() const { return text_position_; }
LineNumber()113     OrdinalNumber LineNumber() const { return text_position_.line_; }
ColumnNumber()114     OrdinalNumber ColumnNumber() const { return text_position_.column_; }
115 
116    protected:
PendingCallback(TextPosition text_position)117     PendingCallback(TextPosition text_position)
118         : text_position_(text_position) {}
119 
120    private:
121     TextPosition text_position_;
122   };
123 
124   void SetScriptStartPosition(TextPosition);
125 
126  private:
127   // From DocumentParser
insert(const String &)128   void insert(const String&) override { NOTREACHED(); }
129   void Append(const String&) override;
130   void Finish() override;
131   void ExecuteScriptsWaitingForResources() final;
132   bool IsWaitingForScripts() const override;
133   void StopParsing() override;
134   void Detach() override;
135   OrdinalNumber LineNumber() const override;
136   OrdinalNumber ColumnNumber() const;
137   void DidAddPendingParserBlockingStylesheet() final;
138   void DidLoadAllPendingParserBlockingStylesheets() final;
139 
140   // XMLParserScriptRunnerHost
141   void NotifyScriptExecuted() override;
142 
143   void end();
144 
145   void PauseParsing();
146   void ResumeParsing();
147 
148   bool AppendFragmentSource(const String&);
149 
150  public:
151   // Callbacks from parser SAX
152   PRINTF_FORMAT(3, 0)
153   void GetError(XMLErrors::ErrorType, const char* message, va_list args);
154   void StartElementNs(const AtomicString& local_name,
155                       const AtomicString& prefix,
156                       const AtomicString& uri,
157                       int namespace_count,
158                       const xmlChar** namespaces,
159                       int attribute_count,
160                       int defaulted_count,
161                       const xmlChar** libxml_attributes);
162   void EndElementNs();
163   void Characters(const xmlChar* chars, int length);
164   void GetProcessingInstruction(const String& target, const String& data);
165   void CdataBlock(const String&);
166   void Comment(const String&);
167   void StartDocument(const String& version,
168                      const String& encoding,
169                      int standalone);
170   void InternalSubset(const String& name,
171                       const String& external_id,
172                       const String& system_id);
173   void EndDocument();
174 
175  private:
176   void InitializeParserContext(const std::string& chunk = std::string());
177 
178   void PushCurrentNode(ContainerNode*);
179   void PopCurrentNode();
180   void ClearCurrentNodeStack();
181 
182   void InsertErrorMessageBlock();
183 
184   void CreateLeafTextNodeIfNeeded();
185   bool UpdateLeafTextNode();
186 
187   void DoWrite(const String&);
188   void DoEnd();
189 
190   void CheckIfBlockingStyleSheetAdded();
191 
192   SegmentedString original_source_for_transform_;
193 
Context()194   xmlParserCtxtPtr Context() const {
195     return context_ ? context_->Context() : nullptr;
196   }
197   scoped_refptr<XMLParserContext> context_;
198   Deque<std::unique_ptr<PendingCallback>> pending_callbacks_;
199   std::unique_ptr<PendingCallback> callback_;
200   Vector<xmlChar> buffered_text_;
201 
202   Member<ContainerNode> current_node_;
203   HeapVector<Member<ContainerNode>> current_node_stack_;
204 
205   Member<Text> leaf_text_node_;
206 
207   bool is_currently_parsing8_bit_chunk_;
208   bool saw_error_;
209   bool saw_css_;
210   bool saw_xsl_transform_;
211   bool saw_first_element_;
212   bool is_xhtml_document_;
213   bool parser_paused_;
214   bool requesting_script_;
215   bool finish_called_;
216   bool waiting_for_stylesheets_ = false;
217   bool added_pending_parser_blocking_stylesheet_ = false;
218 
219   XMLErrors xml_errors_;
220 
221   Member<XMLParserScriptRunner> script_runner_;
222   TextPosition script_start_position_;
223 
224   bool parsing_fragment_;
225   AtomicString default_namespace_uri_;
226 
227   typedef HashMap<AtomicString, AtomicString> PrefixForNamespaceMap;
228   PrefixForNamespaceMap prefix_to_namespace_map_;
229   SegmentedString pending_src_;
230 };
231 
232 xmlDocPtr XmlDocPtrForString(Document*,
233                              const String& source,
234                              const String& url);
235 HashMap<String, String> ParseAttributes(const String&, bool& attrs_ok);
236 
237 }  // namespace blink
238 
239 #endif  // THIRD_PARTY_BLINK_RENDERER_CORE_XML_PARSER_XML_DOCUMENT_PARSER_H_
240