1 /* document.h
2  * this file is part of libxml++
3  *
4  * parts of the code copyright (C) 2003 by Stefan Seefeld
5  * others copyright (C) 2003 by libxml++ developer's team
6  *
7  * this file is covered by the GNU Lesser General Public License,
8  * which should be included with libxml++ as the file COPYING.
9  */
10 
11 #ifndef __LIBXMLPP_DOCUMENT_H
12 #define __LIBXMLPP_DOCUMENT_H
13 
14 #include <libxml++/exceptions/parse_error.h>
15 #include <libxml++/exceptions/internal_error.h>
16 #include <libxml++/nodes/element.h>
17 #include <libxml++/dtd.h>
18 
19 #include <ostream>
20 
21 #ifndef DOXYGEN_SHOULD_SKIP_THIS
22 extern "C" {
23   struct _xmlDoc;
24   struct _xmlEntity;
25 }
26 #endif //DOXYGEN_SHOULD_SKIP_THIS
27 
28 namespace xmlpp
29 {
30 
31 typedef enum {
32     XML_INTERNAL_GENERAL_ENTITY = 1,
33     XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2,
34     XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3,
35     XML_INTERNAL_PARAMETER_ENTITY = 4,
36     XML_EXTERNAL_PARAMETER_ENTITY = 5,
37     XML_INTERNAL_PREDEFINED_ENTITY = 6
38 } XmlEntityType;
39 
40 class Document;
41 
42 //TODO: Make Document inherit from Node, when we can break ABI one day?
43 //
44 //libxml might intend xmlDoc to derive (theoretically) from xmlNode.
45 //This is suggested because the xmlNodeSet returned by xmlXPathEval (see the Node::find() implementation) can contain either xmlNode or xmlDocument elements.
46 /**
47  * Represents an XML document in the DOM model.
48  */
49 class Document : NonCopyable
50 {
51   //Ensure that libxml is properly initialised:
52   class Init
53   {
54   public:
55     Init();
56 
57     //TODO: Remove the virtual when we can break ABI?
58     virtual ~Init();
59   };
60 
61   friend class SaxParser;
62 
63 public:
64   /** Create a new document.
65    * @param version XML version.
66    * @throws xmlpp::internal_error If memory allocation fails.
67    */
68   explicit Document(const Glib::ustring& version = "1.0");
69 
70   /** Create a new C++ wrapper for an xmlDoc struct.
71    * The created xmlpp::Document takes ownership of the xmlDoc.
72    * When the Document is deleted, so is the xmlDoc and all its nodes.
73    * @param doc A pointer to an xmlDoc struct. Must not be <tt>nullptr</tt>.
74    */
75   explicit Document(_xmlDoc* doc);
76 
77   ~Document() override;
78 
79   /** @return The encoding used in the source from which the document has been loaded.
80    */
81   Glib::ustring get_encoding() const;
82 
83   /** Get the internal subset of this document.
84    * @returns A pointer to the DTD, or <tt>nullptr</tt> if not found.
85    */
86   Dtd* get_internal_subset() const;
87 
88   /** Create the internal subset of this document.
89    * If the document already has an internal subset, a new one is not created.
90    * @param name The DTD name.
91    * @param external_id The external (PUBLIC) ID, or an empty string.
92    * @param system_id The system ID, or an empty string.
93    */
94   void set_internal_subset(const Glib::ustring& name,
95                            const Glib::ustring& external_id,
96                            const Glib::ustring& system_id);
97 
98   //TODO: There should be a const and non-const version.
99   //See the patch here: https://bugzilla.gnome.org/show_bug.cgi?id=632522
100   /** Return the root node.
101    * This function does @b not create a default root node if it doesn't exist.
102    * @return A pointer to the root node if it exists, <tt>nullptr</tt> otherwise.
103    */
104   Element* get_root_node() const;
105 
106   /** Create the root element node.
107    * If the document already contains a root element node, it is replaced, and
108    * the old root element node and all its descendants are deleted.
109    * @param name The node's name.
110    * @param ns_uri The namespace URI. A namespace declaration will be added to
111    *        this node, because it could not have been declared before.
112    * @param ns_prefix The namespace prefix to associate with the namespace.
113    *        If no namespace prefix is specified then the namespace URI will be the default namespace.
114    * @return A pointer to the new root node.
115    * @throws xmlpp::internal_error If memory allocation fails.
116    * @throws xmlpp::exception If a new namespace node cannot be created.
117    */
118   Element* create_root_node(const Glib::ustring& name,
119                             const Glib::ustring& ns_uri = Glib::ustring(),
120                             const Glib::ustring& ns_prefix = Glib::ustring() );
121 
122   /** Create a root element node by importing the node from another document,
123    * without affecting the source node.
124    * If the document already contains a root element node, it is replaced, and
125    * the old root element node and all its descendants are deleted.
126    * @param node The node to copy and insert as the root node of the document.
127    *             It must be an element node.
128    * @param recursive Whether to import the child nodes also. Defaults to true.
129    * @return A pointer to the new root node
130    * @throws xmlpp::exception If the node can't be copied.
131    */
132   Element* create_root_node_by_import(const Node* node,
133 				      bool recursive = true);
134 
135   /** Append a new comment node.
136    * @param content The text. This should be unescaped - see ContentNode::set_content().
137    * @returns The new comment node.
138    * @throws xmlpp::internal_error
139    */
140   CommentNode* add_comment(const Glib::ustring& content);
141 
142   /** Append a new processing instruction node.
143    *
144    * @newin{2,36}
145    *
146    * @param name The name of the application to which the instruction is directed.
147    * @param content The content of the instruction. This should be unescaped - see ContentNode::set_content().
148    * @returns The new processing instruction node.
149    * @throws xmlpp::internal_error
150    */
151   ProcessingInstructionNode* add_processing_instruction(
152     const Glib::ustring& name, const Glib::ustring& content);
153 
154   //TODO: Use std::string for filenames.
155   /** Write the document to a file.
156    * @param filename
157    * @param encoding If not provided, UTF-8 is used
158    * @throws xmlpp::exception
159    */
160   void write_to_file(const Glib::ustring& filename, const Glib::ustring& encoding = Glib::ustring());
161 
162   /** Write the document to a file.
163    * The output is formatted by inserting whitespaces, which is easier to read for a human,
164    * but may insert unwanted significant whitespaces. Use with care !
165    * @param filename
166    * @param encoding If not provided, UTF-8 is used
167    * @throws xmlpp::exception
168    */
169   void write_to_file_formatted(const Glib::ustring& filename, const Glib::ustring& encoding = Glib::ustring());
170 
171   /** Write the document to the memory.
172    * @param encoding If not provided, UTF-8 is used
173    * @returns The written document.
174    * @throws xmlpp::exception
175    */
176   Glib::ustring write_to_string(const Glib::ustring& encoding = Glib::ustring());
177 
178   /** Write the document to the memory.
179    * The output is formatted by inserting whitespaces, which is easier to read for a human,
180    * but may insert unwanted significant whitespaces. Use with care !
181    * @param encoding If not provided, UTF-8 is used
182    * @returns The written document.
183    * @throws xmlpp::exception
184    */
185   Glib::ustring write_to_string_formatted(const Glib::ustring& encoding = Glib::ustring());
186 
187   /** Write the document to a std::ostream.
188    * @param output A reference to the stream in which the document will be written
189    * @param encoding If not provided, UTF-8 is used
190    * @throws xmlpp::exception
191    * @throws xmlpp::internal_error
192    * @warning This method is much less efficient than write_to_string if you want to dump the
193    * document to a buffer or the standard output. Writing to a fstream is almost as fast as write_to_file
194    */
195   void write_to_stream(std::ostream& output, const Glib::ustring& encoding = Glib::ustring());
196 
197   /** Write the document to a std::ostream.
198    * The output is formatted by inserting whitespaces, which is easier to read for a human,
199    * but may insert unwanted significant whitespaces. Use with care !
200    * @param output A reference to the stream in which the document will be written
201    * @param encoding If not provided, UTF-8 is used
202    * @throws xmlpp::exception
203    * @throws xmlpp::internal_error
204    * @warning See write_to_stream
205    */
206   void write_to_stream_formatted(std::ostream & output, const Glib::ustring& encoding = Glib::ustring());
207 
208   /** Add an Entity declaration to the document.
209    * @param name The name of the entity that will be used in an entity reference.
210    * @param type The type of entity.
211    * @param publicId The public ID of the subset.
212    * @param systemId The system ID of the subset.
213    * @param content The value of the Entity. In entity reference substitutions, this
214    * is the replacement value.
215    * @throws xmlpp::internal_error
216    */
217   virtual void set_entity_declaration(const Glib::ustring& name, XmlEntityType type,
218                                       const Glib::ustring& publicId, const Glib::ustring& systemId,
219                                       const Glib::ustring& content);
220 
221   /** Perform XInclude substitution on the XML document.
222    * XInclude substitution may both add and delete nodes in the document,
223    * as well as change the type of some nodes. All pointers to deleted nodes
224    * and nodes whose type is changed become invalid.
225    * (The node type represented by an underlying xmlNode struct can change.
226    * The type of a C++ wrapper can't change. The old wrapper is deleted, and a
227    * new one is created if and when it's required.)
228    *
229    * @newin{2,36}
230    *
231    * @param generate_xinclude_nodes Generate XIncludeStart and XIncludeEnd nodes.
232    * @returns The number of substitutions.
233    * @throws xmlpp::exception
234    */
235   int process_xinclude(bool generate_xinclude_nodes = true);
236 
237   ///Access the underlying libxml implementation.
238   _xmlDoc* cobj();
239 
240   ///Access the underlying libxml implementation.
241   const _xmlDoc* cobj() const;
242 
243 protected:
244   /** Retrieve an Entity.
245    * The entity can be from an external subset or internally declared.
246    * @param name The name of the entity to get.
247    * @returns A pointer to the libxml2 entity structure, or <tt>nullptr</tt> if not found.
248    */
249   _xmlEntity* get_entity(const Glib::ustring& name);
250 
251 private:
252   //TODO: Remove virtuals when we can break ABI.
253 
254   virtual void do_write_to_file(const Glib::ustring& filename, const Glib::ustring& encoding, bool format);
255   virtual Glib::ustring do_write_to_string(const Glib::ustring& encoding, bool format);
256   virtual void do_write_to_stream(std::ostream& output, const Glib::ustring& encoding, bool format);
257 
258   static Init init_;
259 
260   _xmlDoc* impl_;
261 };
262 
263 } //namespace xmlpp
264 
265 #endif //__LIBXMLPP_DOCUMENT_H
266