1 /*
2  * Copyright (C) 2001-2003 Peter J Jones (pjones@pmade.org)
3  * Copyright (C) 2013 Vaclav Slavik <vslavik@gmail.com>
4  * All Rights Reserved
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  * 3. Neither the name of the Author nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR
24  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /**
35     @file
36 
37     This file contains the definition of the xml::document class.
38  */
39 
40 #ifndef _xmlwrapp_document_h_
41 #define _xmlwrapp_document_h_
42 
43 // xmlwrapp includes
44 #include "xmlwrapp/init.h"
45 #include "xmlwrapp/node.h"
46 #include "xmlwrapp/export.h"
47 #include "xmlwrapp/errors.h"
48 
49 // standard includes
50 #include <iosfwd>
51 #include <string>
52 #include <cstddef>
53 
54 // forward declaration
55 namespace xslt
56 {
57 
58 class stylesheet;
59 namespace impl
60 {
61 class result;
62 }
63 
64 } // end xslt namespace
65 
66 namespace xml
67 {
68 
69 // forward declarations
70 class schema;
71 class tree_parser;
72 
73 namespace impl
74 {
75 struct doc_impl;
76 }
77 
78 /**
79     The xml::document class is used to hold the XML tree and various bits of
80     information about it.
81  */
82 class XMLWRAPP_API document
83 {
84 public:
85     /// size type
86     typedef std::size_t size_type;
87 
88     /**
89         Create a new XML document with the default settings. The new document
90         will contain a root node with a name of "blank".
91      */
92     document();
93 
94     /**
95         Create a new XML document and set the name of the root element to the
96         given text.
97 
98         @param root_name What to set the name of the root element to.
99 
100         @deprecated Use `xml::document(xml::node(root_name))` constructor instead.
101      */
102     XMLWRAPP_DEPRECATED("use xml::document(xml::node(root_name)) instead")
103     explicit document(const char *root_name);
104 
105     /**
106         Create a new XML document and set the root node.
107 
108         @param n The node to use as the root node. n will be copied.
109      */
110     explicit document(const node& n);
111 
112     /**
113         Load XML document from given file.
114 
115         Errors are handled by @a on_error handler; if you pass
116         xml::throw_on_error, xml::exception is thrown on errors. If there's a
117         fatal error that prevents the document from being loaded and the error
118         handler doesn't throw an exception, the constructor will throw
119         xml::exception anyway.
120 
121         @param filename The name of the file to parse.
122         @param on_error Handler called to process errors and warnings.
123 
124         @since 0.7.0
125      */
126     explicit document(const char *filename, error_handler& on_error);
127 
128     /**
129         Load XML document from given data.
130 
131         Errors are handled by @a on_error handler; by default, xml::exception
132         is thrown on errors. If there's a fatal error that prevents the document
133         from being loaded and the error handler doesn't throw an exception, the
134         constructor will throw xml::exception anyway.
135 
136         @param data The XML data to parse.
137         @param size The size of the XML data to parse.
138         @param on_error Handler called to process errors and warnings.
139 
140         @since 0.7.0
141      */
142     explicit document(const char *data, size_type size, error_handler& on_error = throw_on_error);
143 
144     /**
145         Copy construct a new XML document. The new document will be an exact
146         copy of the original.
147 
148         @param other The other document object to copy from.
149      */
150     document(const document& other);
151 
152     /**
153         Copy another document object into this one using the assignment
154         operator. This document object will be an exact copy of the other
155         document after the assignment.
156 
157         @param other The document to copy from.
158         @return *this.
159      */
160     document& operator=(const document& other);
161 
162     /**
163         Swap one xml::document object for another.
164 
165         @param other The other document to swap
166      */
167     void swap(document& other);
168 
169     /**
170         Clean up after an XML document object.
171      */
172     ~document();
173 
174     /**
175         Get a reference to the root node of this document. If no root node
176         has been set, the returned node will be a blank node. You should take
177         caution to use a reference so that you don't copy the whole node
178         tree!
179 
180         @return A const reference to the root node.
181      */
182     const node& get_root_node() const;
183 
184     /**
185         Get a reference to the root node of this document. If no root node
186         has been set, the returned node will be a blank node. You should take
187         caution to use a reference so that you don't copy the whole node
188         tree!
189 
190         @return A reference to the root node.
191      */
192     node& get_root_node();
193 
194     /**
195         Set the root node to the given node. A full copy is made and stored
196         in the document object.
197 
198         @param n The new root node to use.
199      */
200     void set_root_node(const node& n);
201 
202     /**
203         Get the XML version for this document. For generated documents, the
204         version will be the default. For parsed documents, this will be the
205         version from the XML processing instruction.
206 
207         @return The XML version string for this document.
208      */
209     const std::string& get_version() const;
210 
211     /**
212         Set the XML version number for this document. This version string
213         will be used when generating the XML output.
214 
215         @param version The version string to use, like "1.0".
216      */
217     void set_version(const char *version);
218 
219     /**
220         Get the XML encoding for this document. The default encoding is
221         ISO-8859-1.
222 
223         @return The encoding string.
224      */
225     const std::string& get_encoding() const;
226 
227     /**
228         Set the XML encoding string. If you don't set this, it will default
229         to ISO-8859-1.
230 
231         @param encoding The XML encoding to use.
232      */
233     void set_encoding(const char *encoding);
234 
235     /**
236         Find out if the current document is a standalone document. For
237         generated documents, this will be the default. For parsed documents
238         this will be set based on the XML processing instruction.
239 
240         @return True if this document is standalone.
241         @return False if this document is not standalone.
242      */
243     bool get_is_standalone() const;
244 
245     /**
246         Set the standalone flag. This will show up in the XML output in the
247         correct processing instruction.
248 
249         @param sa What to set the standalone flag to.
250      */
251     void set_is_standalone(bool sa);
252 
253     /**
254         Walk through the document and expand <xi:include> elements. For more
255         information, please see the w3c recommendation for XInclude.
256         http://www.w3.org/2001/XInclude.
257 
258         The return value of this function may change to int after a bug has
259         been fixed in libxml2 (xmlXIncludeDoProcess).
260 
261         @return False if there was an error with substitutions.
262         @return True if there were no errors (with or without substitutions).
263      */
264     bool process_xinclude();
265 
266     /**
267         Test to see if this document has an internal subset. That is, DTD
268         data that is declared within the XML document itself.
269 
270         @return True if this document has an internal subset.
271         @return False otherwise.
272      */
273     bool has_internal_subset() const;
274 
275     /**
276         Test to see if this document has an external subset. That is, it
277         references a DTD from an external source, such as a file or URL.
278 
279         @return True if this document has an external subset.
280         @return False otherwise.
281      */
282     bool has_external_subset() const;
283 
284     /**
285         Validate this document against the DTD that has been attached to it.
286         This would happen at parse time if there was a !DOCTYPE definition.
287         If the DTD is valid, and the document is valid, this member function
288         will return true.
289 
290         If it returns false, you may want to send the document through
291         xmllint to get the actual error messages.
292 
293         @return True if the document is valid.
294         @return False if there was a problem with the DTD or XML doc.
295      */
296     bool validate();
297 
298     /**
299         Parse the given DTD and try to validate this document against it. If
300         the DTD is valid, and the document is valid, this member function
301         will return true.
302 
303         If it returns false, you may want to send the document through
304         xmllint to get the actual error messages.
305 
306         This member function will add the parsed DTD to this document as the
307         external subset after the validation. If there is already an external
308         DTD attached to this document it will be removed and deleted.
309 
310         @param dtdname A filename or URL for the DTD to use.
311         @return True if the document is valid.
312         @return False if there was a problem with the DTD or XML doc.
313      */
314     bool validate(const char *dtdname);
315 
316     /**
317         Returns the number of child nodes of this document. This will always
318         be at least one, since all xmlwrapp documents must have a root node.
319         This member function is useful to find out how many document children
320         there are, including processing instructions, comments, etc.
321 
322         @return The number of children nodes that this document has.
323      */
324     size_type size() const;
325 
326     /**
327         Get an iterator to the first child node of this document. If what you
328         really wanted was the root node (the first element) you should use
329         the get_root_node() member function instead.
330 
331         @return A xml::node::iterator that points to the first child node.
332         @return An end iterator if there are no children in this document
333      */
334     node::iterator begin();
335 
336     /**
337         Get a const_iterator to the first child node of this document. If
338         what you really wanted was the root node (the first element) you
339         should use the get_root_node() member function instead.
340 
341         @return A xml::node::const_iterator that points to the first child node.
342         @return An end const_iterator if there are no children in this document.
343      */
344     node::const_iterator begin() const;
345 
346     /**
347         Get an iterator that points one past the last child node for this
348         document.
349 
350         @return An end xml::node::iterator.
351      */
352     node::iterator end();
353 
354     /**
355         Get a const_iterator that points one past the last child node for
356         this document.
357 
358         @return An end xml::node::const_iterator.
359      */
360     node::const_iterator end() const;
361 
362     /**
363         Add a child xml::node to this document. You should not add a element
364         type node, since there can only be one root node. This member
365         function is only useful for adding processing instructions, comments,
366         etc.. If you do try to add a node of type element, an exception will
367         be thrown.
368 
369         @param child The child xml::node to add.
370      */
371     void push_back (const node &child);
372 
373     /**
374         Insert a new child node. The new node will be inserted at the end of
375         the child list. This is similar to the xml::node::push_back member
376         function except that an iterator to the inserted node is returned.
377 
378         The rules from the push_back member function apply here. Don't add a
379         node of type element.
380 
381         @param n The node to insert as a child of this document.
382         @return An iterator that points to the newly inserted node.
383         @see xml::document::push_back
384      */
385     node::iterator insert (const node &n);
386 
387     /**
388         Insert a new child node. The new node will be inserted before the
389         node pointed to by the given iterator.
390 
391         The rules from the push_back member function apply here. Don't add a
392         node of type element.
393 
394         @param position An iterator that points to the location where the new node should be inserted (before it).
395         @param n The node to insert as a child of this document.
396         @return An iterator that points to the newly inserted node.
397         @see xml::document::push_back
398      */
399     node::iterator insert(node::iterator position, const node &n);
400 
401     /**
402         Replace the node pointed to by the given iterator with another node.
403         The old node will be removed, including all its children, and
404         replaced with the new node. This will invalidate any iterators that
405         point to the node to be replaced, or any pointers or references to
406         that node.
407 
408         Do not replace this root node with this member function. The same
409         rules that apply to push_back apply here. If you try to replace a
410         node of type element, an exception will be thrown.
411 
412         @param old_node An iterator that points to the node that should be removed.
413         @param new_node The node to put in old_node's place.
414         @return An iterator that points to the new node.
415         @see xml::document::push_back
416      */
417     node::iterator replace(node::iterator old_node, const node& new_node);
418 
419     /**
420         Erase the node that is pointed to by the given iterator. The node
421         and all its children will be removed from this node. This will
422         invalidate any iterators that point to the node to be erased, or any
423         pointers or references to that node.
424 
425         Do not remove the root node using this member function. The same
426         rules that apply to push_back apply here. If you try to erase the
427         root node, an exception will be thrown.
428 
429         @param to_erase An iterator that points to the node to be erased.
430         @return An iterator that points to the node after the one being erased.
431         @see xml::document::push_back
432      */
433     node::iterator erase(node::iterator to_erase);
434 
435     /**
436         Erase all nodes in the given range, from first to last. This will
437         invalidate any iterators that point to the nodes to be erased, or any
438         pointers or references to those nodes.
439 
440         Do not remove the root node using this member function. The same
441         rules that apply to push_back apply here. If you try to erase the
442         root node, an exception will be thrown.
443 
444         @param first The first node in the range to be removed.
445         @param last An iterator that points one past the last node to erase. Think xml::node::end().
446         @return An iterator that points to the node after the last one being erased.
447         @see xml::document::push_back
448      */
449     node::iterator erase(node::iterator first, node::iterator last);
450 
451     /**
452         Convert the XML document tree into XML text data and place it into
453         the given string.
454 
455         @param s The string to place the XML text data.
456      */
457     void save_to_string(std::string& s) const;
458 
459     /**
460         Convert the XML document tree into XML text data and place it into
461         the given filename.
462 
463         @param filename The name of the file to place the XML text data into.
464         @param compression_level 0 is no compression, 1-9 allowed, where 1 is
465                                  for better speed, and 9 is for smaller size
466         @return True if the data was saved successfully.
467         @return False otherwise.
468      */
469     bool save_to_file(const char *filename, int compression_level = 0) const;
470 
471     /**
472         Convert the XML document tree into XML text data and then insert it
473         into the given stream.
474 
475         @param stream The stream to insert the XML into.
476         @param doc The document to insert.
477         @return The stream from the first parameter.
478      */
479     friend XMLWRAPP_API std::ostream& operator<< (std::ostream &stream, const document &doc);
480 
481 private:
482     impl::doc_impl *pimpl_;
483 
484     void set_doc_data (void *data);
485     void set_doc_data_from_xslt (void *data, xslt::impl::result *xr);
486     void* get_doc_data();
487     void* get_doc_data_read_only() const;
488     void* release_doc_data();
489 
490     friend class tree_parser;
491     friend class schema;
492     friend class xslt::stylesheet;
493 };
494 
495 } // namespace xml
496 
497 #endif // _xmlwrapp_document_h_
498