1 /*
2  * Copyright (C) 2001-2003 Peter J Jones (pjones@pmade.org)
3  *               2009      Vaclav Slavik <vslavik@fastmail.fm>
4  * All Rights Reserved
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  * 3. Neither the name of the Author nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23  * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR
24  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
27  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /*
35  * $Id: node.hpp 543412 2017-08-09 18:22:55Z satskyse $
36  * NOTE: This file was modified from its original version 0.6.0
37  *       to fit the NCBI C++ Toolkit build framework and
38  *       API and functionality requirements.
39  * Most importantly, it adds support for XML namespaces (see "namespace.hpp").
40  */
41 
42 /** @file
43  * This file contains the definition of the xml::node class.
44 **/
45 
46 #ifndef _xmlwrapp_node_h_
47 #define _xmlwrapp_node_h_
48 
49 // for NCBI_DEPRECATED
50 #include <ncbiconf.h>
51 
52 // xmlwrapp includes
53 #include <misc/xmlwrapp/xml_init.hpp>
54 #include <misc/xmlwrapp/namespace.hpp>
55 #include <misc/xmlwrapp/attributes.hpp>
56 #include <misc/xmlwrapp/xml_save.hpp>
57 
58 // hidden stuff
59 #include <misc/xmlwrapp/impl/_cbfo.hpp>
60 
61 
62 // standard includes
63 #include <cstddef>
64 #include <iosfwd>
65 #include <string>
66 #include <deque>
67 
68 // Forward declaration for a friend below
69 extern "C" { void xslt_ext_func_cb(void *, int); }
70 extern "C" { void xslt_ext_element_cb(void*, void*, void*, void*); }
71 
72 namespace xslt {
73 class xpath_object;
74 class extension_element;
75 }
76 
77 namespace xml {
78 
79 // forward declarations
80 class document;
81 class xpath_expression;
82 class node_set;
83 
84 namespace impl {
85 class node_iterator;
86 class iter_advance_functor;
87 struct node_impl;
88 struct doc_impl;
89 struct nipimpl;
90 struct node_cmp;
91 struct node_private_data;
92 node_private_data*  attach_node_private_data(void *);
93 }
94 
95 /**
96  * The xml::node class is used to hold information about one XML node. This
97  * includes the name of the node, the namespace of the node and attributes
98  * for the node. It also has an iterator whereby you can get to the children
99  * nodes.
100  *
101  * It should be noted that any member function that returns a const char*
102  * returns a temporary value. The pointer that is returned will change with
103  * ANY operation to the xml::node. If you need the data to stick around a
104  * little longer you should put it inside a std::string.
105 **/
106 class node {
107 public:
108     /// size type
109     typedef std::size_t size_type;
110 
111     /// enum for the different types of XML nodes
112     enum node_type {
113         type_element,           ///< XML element such as "<chapter/>"
114         type_text,              ///< Text node
115         type_cdata,             ///< <![CDATA[text]]>
116         type_pi,                ///< Processing Instruction
117         type_comment,           ///< XML comment
118         type_entity,            ///< Entity as in &amp;amp;
119         type_entity_ref,        ///< Entity ref
120         type_xinclude,          ///< <xi:include/> node
121         type_document,          ///< Document node
122         type_document_type,     ///< DOCTYPE node
123         type_document_frag,     ///< Document Fragment
124         type_notation,          ///< Notation
125         type_dtd,               ///< DTD node
126         type_dtd_element,       ///< DTD <!ELEMENT> node
127         type_dtd_attribute,     ///< DTD <!ATTRLIST> node
128         type_dtd_entity,        ///< DTD <!ENTITY>
129         type_dtd_namespace      ///< ?
130     };
131 
132     /// enum for policies of adding namespace definitions
133     enum ns_definition_adding_type {
134         type_replace_if_exists, ///< replace URI if ns with the same prefix exists
135         type_throw_if_exists    ///< throw exception if ns with the same prefix exists
136     };
137 
138     /// enum to specify how to remove namespace definitions
139     enum ns_definition_erase_type {
140         type_ns_def_erase_if_not_used,  ///< Remove the definition only if it
141                                         ///< is not in use.
142                                         ///< If the definition is in use then
143                                         ///< throw an exception.
144         type_ns_def_erase_enforce       ///< Remove the definition regardless
145                                         ///< if it is used or not. If any
146                                         ///< attribute or node uses the
147                                         ///< definition then its namespace will
148                                         ///< be adjusted to a default one (if
149                                         ///< defined above) or will be set to
150                                         ///< no namespace (otherwise).
151     };
152 
153     /// enum to specify what namespaces to include into the list of the
154     /// node effective namespaces
155     enum effective_ns_list_type {
156         type_ns_all,                ///< include all namespaces
157         type_ns_only_default,       ///< include only default namespace
158         type_ns_only_non_default    ///< include only non-default namespaces
159     };
160 
161     /**
162      * Helper struct for creating a xml::node of type_cdata.
163      *
164      * @code
165      * xml::node mynode(xml::node::cdata("This is a CDATA section"));
166      * @endcode
167      */
168     struct cdata {
cdataxml::node::cdata169         explicit cdata (const char *text) : t(text) { }
170         const char *t;
171     };
172 
173     /**
174      * Helper struct for creating a xml::node of type_comment.
175      *
176      * @code
177      * xml::node mynode(xml::node::comment("This is an XML comment"));
178      * @endcode
179      */
180     struct comment {
commentxml::node::comment181         explicit comment (const char *text) : t(text) { }
182         const char *t;
183     };
184 
185     /**
186      * Helper struct for creating a xml::node of type_pi.
187      *
188      * @code
189      * xml::node mynode(xml::node::pi("xslt", "stylesheet=\"test.xsl\""));
190      * @endcode
191      */
192     struct pi {
pixml::node::pi193         explicit pi (const char *name, const char *content=0) : n(name), c(content) { }
194         const char *n, *c;
195     };
196 
197     /**
198      * Helper struct for creating a xml::node of type_text.
199      *
200      * @code
201      * xml::node mynode(xml::node::text("This is an XML text fragment"));
202      * @endcode
203      */
204     struct text {
textxml::node::text205         explicit text (const char *txt) : t(txt) { }
206         const char *t;
207     };
208 
209     //####################################################################
210     /**
211      * Construct a new blank xml::node.
212      *
213      * @author Peter Jones
214     **/
215     //####################################################################
216     node (void);
217 
218     //####################################################################
219     /**
220      * Construct a new xml::node and set the name of the node.
221      *
222      * @param name The name of the new node.
223      * @author Peter Jones
224     **/
225     //####################################################################
226     explicit node (const char *name);
227 
228     //####################################################################
229     /**
230      * Construct a new xml::node given a name and content. The content will
231      * be used to create a new child text node.
232      * All the special symbols ('<', '>', '&', '"', '\r') in the given
233      * content are encoded before assigning the new content.
234      * If entities are needed in the content please use set_raw_content(...).
235      *
236      * @param name The name of the new element.
237      * @param content The text that will be used to create a child node.
238      * @author Peter Jones
239     **/
240     //####################################################################
241     node (const char *name, const char *content);
242 
243     //####################################################################
244     /**
245      * Construct a new xml::node that is of type_cdata. The cdata_info
246      * parameter should contain the contents of the CDATA section.
247      *
248      * @note Sample Use Example:
249      * @code
250      * xml::node mynode(xml::node::cdata("This is a CDATA section"));
251      * @endcode
252      *
253      * @param cdata_info A cdata struct that tells xml::node what the content will be.
254      * @author Peter Jones
255     **/
256     //####################################################################
257     explicit node (cdata cdata_info);
258 
259     //####################################################################
260     /**
261      * Construct a new xml::node that is of type_comment. The comment_info
262      * parameter should contain the contents of the XML comment.
263      *
264      * @note Sample Use Example:
265      * @code
266      * xml::node mynode(xml::node::comment("This is an XML comment"));
267      * @endcode
268      *
269      * @param comment_info A comment struct that tells xml::node what the comment will be.
270      * @author Peter Jones
271     **/
272     //####################################################################
273     explicit node (comment comment_info);
274 
275     //####################################################################
276     /**
277      * Construct a new xml::node that is of type_pi. The pi_info parameter
278      * should contain the name of the XML processing instruction (PI), and
279      * optionally, the contents of the XML PI.
280      *
281      * @note Sample Use Example:
282      * @code
283      * xml::node mynode(xml::node::pi("xslt", "stylesheet=\"test.xsl\""));
284      * @endcode
285      *
286      * @param pi_info A pi struct that tells xml::node what the name and contents of the XML PI are.
287      * @author Peter Jones
288     **/
289     //####################################################################
290     explicit node (pi pi_info);
291 
292     //####################################################################
293     /**
294      * Construct a new xml::node that is of type_text. The text_info
295      * parameter should contain the text.
296      *
297      * @note Sample Use Example:
298      * @code
299      * xml::node mynode(xml::node::text("This is XML text"));
300      * @endcode
301      *
302      * @param text_info A text struct that tells xml::node what the text will be.
303      * @author Vaclav Slavik
304     **/
305     //####################################################################
306     explicit node (text text_info);
307 
308     //####################################################################
309     /**
310      * Create a copy of the node which is detached from the document.
311      * The nested nodes as well as namespace definitions are copied too.
312      *
313      * @return A pointer to the copied node. The user is responsible to delete
314      *         it.
315      * @exception Throws xml::exception if the copying failed.
316     **/
317     //####################################################################
318     node* detached_copy (void) const;
319 
320     //####################################################################
321     /**
322      * Copy another node object into this one. This node object will
323      * be an exact copy of the other node after the assignement.
324      *
325      * @param other The node to copy from.
326      * @return *this
327      * @author Denis Vakatov
328     **/
329     //####################################################################
330     node& assign (const node &other);
331 
332     //####################################################################
333     /**
334      * Class destructor
335      *
336      * @author Peter Jones
337     **/
338     //####################################################################
339     virtual ~node (void);
340 
341     //####################################################################
342     /**
343      * Set the name of this xml::node.
344      *
345      * @param name The new name for this xml::node.
346      * @author Peter Jones
347     **/
348     //####################################################################
349     void set_name (const char *name);
350 
351     //####################################################################
352     /**
353      * Get the name of this xml::node.
354      *
355      * This function may change in the future to return std::string.
356      * Feedback is welcome.
357      *
358      * @return The name of this node.
359      * @author Peter Jones
360     **/
361     //####################################################################
362     const char* get_name (void) const;
363 
364     //####################################################################
365     /**
366      * Set the content of a node. If this node is an element node, this
367      * function will remove all of its children nodes and replace them
368      * with one text node set to the new content.
369      * All the special symbols ('<', '>', '&', '"', '\r') in the given
370      * content are encoded before assigning the new content.
371      * If entities are needed in the content please use
372      * set_raw_content(...).
373      *
374      * @param content The content of the text node.
375      * @author Peter Jones
376     **/
377     //####################################################################
378     void set_content (const char *content);
379 
380     //####################################################################
381     /**
382      * Set the raw content of a node. If this node is an element node,
383      * this function will remove all of its children nodes and replace
384      * them with one text node set to the new content.
385      * The given content is checked for '<' and '>' characters. If found
386      * they will be replaced with '&lt;' and '&gt;' respectively and this
387      * is the only potential conversion done for the given raw content.
388      * This member is likely used if entities are needed in the node content.
389      * In any case it is the user responsibility to provide valid
390      * content for this member.
391      *
392      * @param raw_content The raw content of the text node.
393     **/
394     //####################################################################
395     void set_raw_content (const char *raw_content);
396 
397     //####################################################################
398     /**
399      * Get the content for this text node. If this node is not a text node
400      * but it has children nodes that are text nodes, the contents of those
401      * child nodes will be returned. If there is no content or these
402      * conditions do not apply, zero will be returned.
403      *
404      * This function may change in the future to return std::string.
405      * Feedback is welcome.
406      *
407      * @return The content or 0.
408      * @author Peter Jones
409     **/
410     //####################################################################
411     const char* get_content (void) const;
412 
413     //####################################################################
414     /**
415      * Get this node's "type". You can use that information to know what you
416      * can and cannot do with it.
417      *
418      * @return The node's type.
419      * @author Peter Jones
420     **/
421     //####################################################################
422     node_type get_type (void) const;
423 
424     //####################################################################
425     /**
426      * Get the list of attributes. You can use the returned object to get
427      * and set the attributes for this node. Make sure you use a reference
428      * to this returned object, to prevent a copy.
429      *
430      * @return The xml::attributes object for this node.
431      * @author Peter Jones
432     **/
433     //####################################################################
434     xml::attributes& get_attributes (void);
435 
436     //####################################################################
437     /**
438      * Get the list of attributes. You can use the returned object to get
439      * the attributes for this node. Make sure you use a reference to this
440      * returned object, to prevent a copy.
441      *
442      * @return The xml::attributes object for this node.
443      * @author Peter Jones
444     **/
445     //####################################################################
446     const xml::attributes& get_attributes (void) const;
447 
448     //####################################################################
449     /**
450      * Search for a node attribute.
451      *
452      * @param name
453      *   The name of the attribute to find. The name could be given as a
454      *   qualified name, e.g. 'prefix:attr_name'. If the name is qualified then
455      *   the nspace argument must be NULL (otherwise an exception is
456      *   generated) and the attribute search is namespace aware with an
457      *   effective namespace identified by the given prefix.
458      * @param nspace
459      *   The namespace of the atrribute to find:
460      *   - NULL matches any namespace
461      *   - Void namespace matches attributes without a namespace set
462      *   - Unsafe namespace is used as it is
463      *   - A safe namespace is resolved basing on the uri only
464      * @return iterator to the found attribute. If there is no such an
465      *         attribute then the provided iterator equals to
466      *         attributes::end().
467     **/
468     attributes::iterator find_attribute (const char* name,
469                                          const ns* nspace = NULL);
470 
471     //####################################################################
472     /**
473      * Search for a node attribute.
474      *
475      * @param name
476      *   The name of the attribute to find. The name could be given as a
477      *   qualified name, e.g. 'prefix:attr_name'. If the name is qualified then
478      *   the nspace argument must be NULL (otherwise an exception is
479      *   generated) and the attribute search is namespace aware with an
480      *   effective namespace identified by the given prefix.
481      * @param nspace
482      *   The namespace of the atrribute to find:
483      *   - NULL matches any namespace
484      *   - Void namespace matches attributes without a namespace set
485      *   - Unsafe namespace is used as it is
486      *   - A safe namespace is resolved basing on the uri only
487      * @return const iterator to the found attribute. If there is no such an
488      *         attribute then the provided iterator equals to
489      *         attributes::end().
490     **/
491     attributes::const_iterator find_attribute (const char* name,
492                                                const ns* nspace = NULL) const;
493 
494     //####################################################################
495     /**
496      * Get the namespace of this xml::node.
497      *
498      * @param type
499      *  The required type of namespace object (safe/unsafe).
500      * @return
501      *  The namespace of this node. If the node has no namespace
502      *  then return a "void" namespace object with empty prefix and URI
503      *  (for which xml::ns::is_void() returns TRUE).
504     **/
505     //####################################################################
506     ns get_namespace (ns::ns_safety_type type = ns::type_safe_ns) const;
507 
508     //####################################################################
509     /**
510       * Get the namespaces defined at this xml::node.
511       *
512       * @param type The required type of namespace objects (safe/unsafe).
513       * @return The namespaces defined at this node.
514       *         If no namespaces are defined then return an empty container.
515      **/
516     //####################################################################
517     ns_list_type get_namespace_definitions (ns::ns_safety_type type = ns::type_safe_ns) const;
518 
519     //####################################################################
520     /**
521       * Set the node namespace.
522       *
523       * The namespace definition is searched up in the hierarchy of nodes.
524       * If a namespace with the given prefix and URI is not found
525       * then throw an exception.
526       *
527       * @param name_space
528       *  Namespace to set to the node.
529       *  "Void" namespace is treated as a namespace removal request --
530       *  exactly the same as erase_namespace() call.
531       * @note There are no checks at all if an unsafe ns object is provided.
532       * @return  Unsafe namespace
533     **/
534     //####################################################################
535     ns set_namespace (const ns& name_space);
536 
537     //####################################################################
538     /**
539       * Set the node namespace.
540       *
541       * The namespace definition is searched up in the hierarchy of nodes. If
542       * a namespace with the given prefix is not found then throw an exception.
543       *
544       * @param prefix
545       *  Namespace prefix. For the default namespace use NULL or empty string.
546       * @return  Unsafe namespace
547      **/
548     //####################################################################
549     ns set_namespace (const char* prefix);
550 
551     //####################################################################
552     /**
553       * Add namespace definition to the node.
554       *
555       * If the node already has a namespace definition with the same
556       * prefix then its URI will be replaced with the new one, and that's it.
557       * Otherwise, the hierarchy of nodes (including their attributes) is
558       * walked down, updating all namespaces (with the same prefix) which do
559       * not use namespace definitions (with the same prefix) which are
560       * redefined below this node.
561       *
562       * @param name_space
563       *  The namespace definition to add to the node.
564       * @param type
565       *  What to do (replace or throw exception) when encountering a
566       *  namespace definition with the same prefix.
567       * @return  Unsafe namespace
568      **/
569     //####################################################################
570     ns add_namespace_definition (const ns&                 name_space,
571                                  ns_definition_adding_type type);
572 
573     //####################################################################
574     /**
575       * Add namespace definitions to the node.
576       *
577       * @sa add_namespace_definition
578       *
579       * @param name_spaces
580       *  List of namespace definitions to add to the node.
581       * @param type
582       *  What to do (replace or throw exception) when encountering a
583       *  namespace definition with the same prefix.
584      **/
585     //####################################################################
586     void add_namespace_definitions (const ns_list_type&       name_spaces,
587                                     ns_definition_adding_type type);
588 
589     //####################################################################
590     /**
591       * Remove the node namespace definition.
592       *
593       * @param prefix
594       *  The prefix of the namespace to be removed from the node namespace
595       *  definitions.
596       *  For the default namespace use NULL or empty string.
597       *  If there is no such namespace definition, then do nothing.
598       * @param how
599       *  Specifies what to do if the given namespace is in use.
600      **/
601     //####################################################################
602     void erase_namespace_definition (const char* prefix,
603                                      ns_definition_erase_type how =
604                                          type_ns_def_erase_if_not_used);
605 
606     //####################################################################
607     /**
608       * Remove the node namespace.
609       *
610       * The hierarchy of nodes is searched up and if a default namespace is
611       * found then it is used as a new node namespace.
612      **/
613     //####################################################################
614     void erase_namespace (void);
615 
616     //####################################################################
617     /**
618       * Look up a namespace with the given prefix.
619       *
620       * Walk the nodes hierarchy up and check the namespace definition
621       * prefixes. If the prefix matches, then return the
622       * corresponding safe/unsafe namespace object.
623       *
624       * @param prefix
625       *  Namespace prefix to look for.
626       *  For the default namespace use NULL or empty string.
627       * @param type
628       *  Type of namespace object (safe/unsafe) to return.
629       * @return
630       *  Namespace object ("void" namespace if none found).
631      **/
632     //####################################################################
633     ns lookup_namespace (const char*        prefix,
634                          ns::ns_safety_type type = ns::type_safe_ns) const;
635 
636     //####################################################################
637     /**
638       * Erase duplicate namespace definitions.
639       *
640       * Walks the nodes hierarchy down and erases dulicate namespace
641       * definitions.
642      **/
643     //####################################################################
644     void erase_duplicate_ns_defs (void);
645 
646     //####################################################################
647     /**
648       * Erase unused namespace definitions.
649       *
650       * Walks the nodes hierarchy down and erases unused namespace
651       * definitions.
652      **/
653     //####################################################################
654     void erase_unused_ns_defs (void);
655 
656     //####################################################################
657     /**
658       * Get the node path.
659       *
660       * @return node path
661       * @exception throw an exception in case of errors
662      **/
663     //####################################################################
664     std::string get_path (void) const;
665 
666     //####################################################################
667     /**
668      * Find out if this node is a text node or sometiming like a text node,
669      * CDATA for example.
670      *
671      * @return True if this node is a text node; false otherwise.
672      * @author Peter Jones
673     **/
674     //####################################################################
675     bool is_text (void) const;
676 
677     //####################################################################
678     /**
679      * Add a child xml::node to this node.
680      *
681      * @param child The child xml::node to add.
682      * @author Peter Jones
683     **/
684     //####################################################################
685     void push_back (const node &child);
686 
687     //####################################################################
688     /**
689      * Swap this node with another one.
690      *
691      * @param other The other node to swap with.
692      * @author Peter Jones
693     **/
694     //####################################################################
695     void swap (node &other);
696 
697     class const_iterator; // forward declaration
698 
699     /**
700      * The xml::node::iterator provides a way to access children nodes
701      * similar to a standard C++ container. The nodes that are pointed to by
702      * the iterator can be changed.
703      */
704     class iterator {
705     public:
706         typedef node value_type;
707         typedef std::ptrdiff_t difference_type;
708         typedef value_type* pointer;
709         typedef value_type& reference;
710         typedef std::forward_iterator_tag iterator_category;
711 
iterator(void)712         iterator  (void) : pimpl_(0) {}
713         iterator  (const iterator &other);
714         iterator& operator= (const iterator& other);
715         ~iterator (void);
716 
717         reference operator*  (void) const;
718         pointer   operator-> (void) const;
719 
720         /// prefix increment
721         iterator& operator++ (void);
722 
723         /// postfix increment (avoid if possible for better performance)
724         iterator  operator++ (int);
725 
operator ==(const iterator & other) const726         bool operator==(const iterator& other) const
727         { return get_raw_node() == other.get_raw_node(); }
operator !=(const iterator & other) const728         bool operator!=(const iterator& other) const
729         { return !(*this == other); }
730 
731     private:
732         impl::nipimpl *pimpl_;
733         explicit iterator (void *data);
734         void* get_raw_node (void) const;
735         void swap (iterator &other);
736         friend class node;
737         friend class document;
738         friend class const_iterator;
739     };
740 
741     /**
742      * The xml::node::const_iterator provides a way to access children nodes
743      * similar to a standard C++ container. The nodes that are pointed to by
744      * the const_iterator cannot be changed.
745      */
746     class const_iterator {
747     public:
748         typedef const node value_type;
749         typedef std::ptrdiff_t difference_type;
750         typedef value_type* pointer;
751         typedef value_type& reference;
752         typedef std::forward_iterator_tag iterator_category;
753 
const_iterator(void)754         const_iterator  (void) : pimpl_(0) {}
755         const_iterator  (const const_iterator &other);
756         const_iterator  (const iterator &other);
757         const_iterator& operator= (const const_iterator& other);
758         ~const_iterator (void);
759 
760         reference operator*  (void) const;
761         pointer   operator-> (void) const;
762 
763         /// prefix increment
764         const_iterator& operator++ (void);
765 
766         /// postfix increment (avoid if possible for better performance)
767         const_iterator  operator++ (int);
768 
operator ==(const const_iterator & other) const769         bool operator==(const const_iterator& other) const
770         { return get_raw_node() == other.get_raw_node(); }
operator !=(const const_iterator & other) const771         bool operator!=(const const_iterator& other) const
772         { return !(*this == other); }
773     private:
774         impl::nipimpl *pimpl_;
775         explicit const_iterator (void *data);
776         void* get_raw_node (void) const;
777         void swap (const_iterator &other);
778         friend class document;
779         friend class node;
780     };
781 
782     //####################################################################
783     /**
784      * Returns the number of childer this nodes has. If you just want to
785      * know how if this node has children or not, you should use
786      * xml::node::empty() instead.
787      *
788      * @return The number of children this node has.
789      * @author Peter Jones
790     **/
791     //####################################################################
792     size_type size (void) const;
793 
794     //####################################################################
795     /**
796      * Find out if this node has any children. This is the same as
797      * xml::node::size() == 0 except it is much faster.
798      *
799      * @return True if this node DOES NOT have any children.
800      * @return False if this node does have children.
801      * @author Peter Jones
802     **/
803     //####################################################################
804     bool empty (void) const;
805 
806     //####################################################################
807     /**
808      * Get an iterator that points to the beginning of this node's children.
809      *
810      * @return An iterator that points to the beginning of the children.
811      * @author Peter Jones
812     **/
813     //####################################################################
814     iterator begin (void);
815 
816     //####################################################################
817     /**
818      * Get a const_iterator that points to the beginning of this node's
819      * children.
820      *
821      * @return A const_iterator that points to the beginning of the children.
822      * @author Peter Jones
823     **/
824     //####################################################################
825     const_iterator begin (void) const;
826 
827     //####################################################################
828     /**
829      * Get an iterator that points one past the last child for this node.
830      *
831      * @return A "one past the end" iterator.
832      * @author Peter Jones
833     **/
834     //####################################################################
end(void)835     iterator end (void) { return iterator(); }
836 
837     //####################################################################
838     /**
839      * Get a const_iterator that points one past the last child for this
840      * node.
841      *
842      * @return A "one past the end" const_iterator
843      * @author Peter Jones
844     **/
845     //####################################################################
end(void) const846     const_iterator end (void) const { return const_iterator(); }
847 
848     //####################################################################
849     /**
850      * Get an iterator that points back at this node.
851      *
852      * @return An iterator that points at this node.
853      * @author Peter Jones
854     **/
855     //####################################################################
856     iterator self (void);
857 
858     //####################################################################
859     /**
860      * Get a const_iterator that points back at this node.
861      *
862      * @return A const_iterator that points at this node.
863      * @author Peter Jones
864     **/
865     //####################################################################
866     const_iterator self (void) const;
867 
868     //####################################################################
869     /**
870      * Find out if this node is a root one, i.e. has no parent.
871      *
872      * @return true if the node is root.
873     **/
874     //####################################################################
875     bool is_root (void) const;
876 
877     //####################################################################
878     /**
879      * Get an iterator that points at the parent of this node. If this node
880      * does not have a parent, this member function will return an "end"
881      * iterator.
882      *
883      * @note
884      *  It is recommended to call is_root() function before calling parent().
885      *  If is_root() returns true then the parent() provided iterator cannot
886      *  be dereferenced.
887      *
888      * @return An iterator that points to this nodes parent.
889      * @return If no parent, returns the same iterator that xml::node::end() returns.
890      * @author Peter Jones
891     **/
892     //####################################################################
893     iterator parent (void);
894 
895     //####################################################################
896     /**
897      * Get a const_iterator that points at the parent of this node. If this
898      * node does not have a parent, this member function will return an
899      * "end" const_iterator.
900      *
901      * @note
902      *  It is recommended to call is_root() function before calling parent().
903      *  If is_root() returns true then the parent() provided iterator cannot
904      *  be dereferenced.
905      *
906      * @return A const_iterator that points to this nodes parent.
907      * @return If no parent, returns the same const_iterator that xml::node::end() returns.
908      * @author Peter Jones
909     **/
910     //####################################################################
911     const_iterator parent (void) const;
912 
913     //####################################################################
914     /**
915      * Find the first child node that has the given name and namespace.
916      * If no such node can be found, this function will return the same
917      * iterator that end() would return.
918      *
919      * This function is not recursive. That is, it will not search down the
920      * tree for the requested node. Instead, it will only search one level
921      * deep, only checking the children of this node.
922      *
923      * @param name The name of the node you want to find.
924      * @param nspace The namespace of the node to find. NULL matches
925      *               any namespace. Void namespace matches node without
926      *               namespace set.
927      * @return An iterator that points to the node if found.
928      * @return An end() iterator if the node was not found.
929      * @author Peter Jones
930      *
931      * @see elements(const char*), find(const char*, iterator)
932     **/
933     //####################################################################
934     iterator find (const char *name, const ns *nspace=NULL);
935 
936     //####################################################################
937     /**
938      * Find the first child node that has the given name and namespace.
939      * If no such node can be found, this function will return the same
940      * const_iterator that end() would return.
941      *
942      * This function is not recursive. That is, it will not search down the
943      * tree for the requested node. Instead, it will only search one level
944      * deep, only checking the children of this node.
945      *
946      * @param name The name of the node you want to find.
947      * @param nspace The namespace of the node to find. NULL matches
948      *               any namespace. Void namespace matches node without
949      *               namespace set.
950      * @return A const_iterator that points to the node if found.
951      * @return An end() const_iterator if the node was not found.
952      * @author Peter Jones
953      *
954      * @see elements(const char*) const, find(const char*, const_iterator) const
955     **/
956     //####################################################################
957     const_iterator find (const char *name, const ns *nspace=NULL) const;
958 
959     //####################################################################
960     /**
961      * Find the first child node, starting with the given iterator, that has
962      * the given name and namespace. If no such node can be found, this
963      * function will return the same iterator that end() would return.
964      *
965      * This function should be given an iterator to one of this node's
966      * children. The search will begin with that node and continue with all
967      * its sibliings. This function will not recurse down the tree, it only
968      * searches in one level.
969      *
970      * @param name The name of the node you want to find.
971      * @param start Where to begin the search.
972      * @param nspace The namespace of the node to find. NULL matches
973      *               any namespace. Void namespace matches node without
974      *               namespace set.
975      * @return An iterator that points to the node if found.
976      * @return An end() iterator if the node was not found.
977      * @author Peter Jones
978      *
979      * @see elements(const char*)
980     **/
981     //####################################################################
982     iterator find (const char *name, const iterator& start, const ns *nspace=NULL);
983 
984     //####################################################################
985     /**
986      * Find the first child node, starting with the given const_iterator,
987      * that has the given name and namespace. If no such node can be found,
988      * this function will return the same const_iterator that end() would
989      * return.
990      *
991      * This function should be given a const_iterator to one of this node's
992      * children. The search will begin with that node and continue with all
993      * its siblings. This function will not recurse down the tree, it only
994      * searches in one level.
995      *
996      * @param name The name of the node you want to find.
997      * @param start Where to begin the search.
998      * @param nspace The namespace of the node to find. NULL matches
999      *               any namespace. Void namespace matches node without
1000      *               namespace set.
1001      * @return A const_iterator that points to the node if found.
1002      * @return An end() const_iterator if the node was not found.
1003      * @author Peter Jones
1004      *
1005      * @see elements(const char*) const
1006     **/
1007     //####################################################################
1008     const_iterator find (const char *name, const const_iterator& start,
1009                          const ns *nspace=NULL) const;
1010 
1011     /**
1012      * Run the given XPath query.
1013      *
1014      * @param expr
1015      *  XPath expression to run
1016      * @return
1017      *  XPath query result node set
1018      * @attention
1019      *  The result node set is essentially a list of references to an XML
1020      *  document. Thus the life time of the document must be wider than the
1021      *  node set life time. It also means that the document modifications after
1022      *  a result node set is received must be done carefully, e.g. node
1023      *  removal. The removed node may be referenced in the node set and access
1024      *  to it can cause problems.
1025      * @attention
1026      *  Expressions like "root/node" will result in 0 matches even if the
1027      *  document has <root><node/></root>, due to a bug in libxml2 (at least
1028      *  till version 2.9.1). The workaround is to use "/root/node" or
1029      *  "//root/node" depending on circumstances.
1030      * @attention
1031      *  XPath query cannot match nodes that belong specifically to the default
1032      *  namespace. Please see a detailed discussion (and solution) on that in
1033      *  the C++ Toolkit book:
1034      *  http://ncbi.github.io/cxx-toolkit/pages/ch_xmlwrapp#ch_xmlwrapp.Run_an_XPath_Query_with_a_De
1035      * @note
1036      *  If the query result is a scalar value (e.g. count() function) then
1037      *  the result set will have a single node of the following format:
1038      *  <xpath_scalar_result type="TYPE">VALUE</xpath_scalar_result>
1039      *  where TYPE is one of the following: boolean, number, or
1040      *  string depending on the result type. The VALUE is the actual result
1041      *  scalar value.
1042     **/
1043     node_set run_xpath_query (const xpath_expression& expr);
1044 
1045     /**
1046      * Run the given XPath query.
1047      *
1048      * @param expr
1049      *  XPath expression to run
1050      * @return
1051      *  XPath query const result node set
1052      * @attention
1053      *  The result node set is essentially a list of references to an XML
1054      *  document. Thus the life time of the document must be wider than the
1055      *  node set life time. It also means that the document modifications after
1056      *  a result node set is received must be done carefully, e.g. node
1057      *  removal. The removed node may be referenced in the node set and access
1058      *  to it can cause problems.
1059      * @attention
1060      *  Expressions like "root/node" will result in 0 matches even if the
1061      *  document has <root><node/></root>, due to a bug in libxml2 (at least
1062      *  till version 2.9.1). The workaround is to use "/root/node" or
1063      *  "//root/node" depending on circumstances.
1064      * @attention
1065      *  XPath query cannot match nodes that belong specifically to the default
1066      *  namespace. Please see a detailed discussion (and solution) on that in
1067      *  the C++ Toolkit book:
1068      *  http://ncbi.github.io/cxx-toolkit/pages/ch_xmlwrapp#ch_xmlwrapp.Run_an_XPath_Query_with_a_De
1069      * @note
1070      *  If the query result is a scalar value (e.g. count() function) then
1071      *  the result set will have a single node of the following format:
1072      *  <xpath_scalar_result type="TYPE">VALUE</xpath_scalar_result>
1073      *  where TYPE is one of the following: boolean, number, or
1074      *  string depending on the result type. The VALUE is the actual result
1075      *  scalar value.
1076     **/
1077     const node_set run_xpath_query (const xpath_expression& expr) const;
1078 
1079     /**
1080      * Run the given XPath query.
1081      * The method collects all the effective namespace definitions for the node
1082      * and register them automatically before running the query.
1083      *
1084      * @param expr
1085      *  XPath expression to run, must not be NULL
1086      * @return
1087      *  XPath query result nodes set
1088      * @attention
1089      *  Expressions like "root/node" will result in 0 matches even if the
1090      *  document has <root><node/></root>, due to a bug in libxml2 (at least
1091      *  till version 2.9.1). The workaround is to use "/root/node" or
1092      *  "//root/node" depending on circumstances.
1093      * @attention
1094      *  XPath query cannot match nodes that belong specifically to the default
1095      *  namespace. Please see a detailed discussion (and solution) on that in
1096      *  the C++ Toolkit book:
1097      *  http://ncbi.github.io/cxx-toolkit/pages/ch_xmlwrapp#ch_xmlwrapp.Run_an_XPath_Query_with_a_De
1098      * @exception
1099      *  Throws exceptions in case of problems
1100      * @note
1101      *  Default namespace, if so, will not be registered
1102      * @note
1103      *  If the query result is a scalar value (e.g. count() function) then
1104      *  the result set will have a single node of the following format:
1105      *  <xpath_scalar_result type="TYPE">VALUE</xpath_scalar_result>
1106      *  where TYPE is one of the following: boolean, number, or
1107      *  string depending on the result type. The VALUE is the actual result
1108      *  scalar value.
1109     **/
1110     node_set run_xpath_query (const char *  expr);
1111 
1112     /**
1113      * Run the given XPath query.
1114      * The method collects all the effective namespace definitions for the node
1115      * and register them automatically before running the query.
1116      *
1117      * @param expr
1118      *  XPath expression to run, must not be NULL
1119      * @return
1120      *  XPath query const result nodes set
1121      * @attention
1122      *  Expressions like "root/node" will result in 0 matches even if the
1123      *  document has <root><node/></root>, due to a bug in libxml2 (at least
1124      *  till version 2.9.1). The workaround is to use "/root/node" or
1125      *  "//root/node" depending on circumstances.
1126      * @attention
1127      *  XPath query cannot match nodes that belong specifically to the default
1128      *  namespace. Please see a detailed discussion (and solution) on that in
1129      *  the C++ Toolkit book:
1130      *  http://ncbi.github.io/cxx-toolkit/pages/ch_xmlwrapp#ch_xmlwrapp.Run_an_XPath_Query_with_a_De
1131      * @exception
1132      *  Throws exceptions in case of problems
1133      * @note
1134      *  Default namespace, if so, will not be registered
1135      * @note
1136      *  If the query result is a scalar value (e.g. count() function) then
1137      *  the result set will have a single node of the following format:
1138      *  <xpath_scalar_result type="TYPE">VALUE</xpath_scalar_result>
1139      *  where TYPE is one of the following: boolean, number, or
1140      *  string depending on the result type. The VALUE is the actual result
1141      *  scalar value.
1142     **/
1143     const node_set run_xpath_query (const char *  expr) const;
1144 
1145     //####################################################################
1146     /**
1147      * Insert a new child node. The new node will be inserted at the end of
1148      * the child list. This is similar to the xml::node::push_back member
1149      * function except that an iterator to the inserted node is returned.
1150      *
1151      * @param n The node to insert as a child of this node.
1152      * @return An iterator that points to the newly inserted node.
1153      * @author Peter Jones
1154     **/
1155     //####################################################################
1156     iterator insert (const node &n);
1157 
1158     //####################################################################
1159     /**
1160      * Insert a new child node. The new node will be inserted before the
1161      * node pointed to by the given iterator.
1162      *
1163      * @param position An iterator that points to the location where the new
1164      *                 node should be inserted (before it).
1165      * @param n The node to insert as a child of this node.
1166      * @return An iterator that points to the newly inserted node.
1167      * @author Peter Jones
1168     **/
1169     //####################################################################
1170     iterator insert (const iterator& position, const node &n);
1171 
1172     //####################################################################
1173     /**
1174      * Replace the node pointed to by the given iterator with another node.
1175      * The old node will be removed, including all its children, and
1176      * replaced with the new node. This will invalidate any iterators that
1177      * point to the node to be replaced, or any pointers or references to
1178      * that node.
1179      *
1180      * @param old_node An iterator that points to the node that should be removed.
1181      * @param new_node The node to put in old_node's place.
1182      * @return An iterator that points to the new node.
1183      * @author Peter Jones
1184     **/
1185     //####################################################################
1186     iterator replace (const iterator& old_node, const node &new_node);
1187 
1188     //####################################################################
1189     /**
1190      * Erase the node that is pointed to by the given iterator. The node
1191      * and all its children will be removed from this node. This will
1192      * invalidate any iterators that point to the node to be erased, or any
1193      * pointers or references to that node.
1194      *
1195      * @param to_erase An iterator that points to the node to be erased.
1196      * @return An iterator that points to the node after the one being erased.
1197      * @author Peter Jones
1198      * @author Gary A. Passero
1199     **/
1200     //####################################################################
1201     iterator erase (const iterator& to_erase);
1202 
1203     //####################################################################
1204     /**
1205      * Erase all nodes in the given range, from frist to last. This will
1206      * invalidate any iterators that point to the nodes to be erased, or any
1207      * pointers or references to those nodes.
1208      *
1209      * @param first The first node in the range to be removed.
1210      * @param last An iterator that points one past the last node to erase. Think xml::node::end().
1211      * @return An iterator that points to the node after the last one being erased.
1212      * @author Peter Jones
1213     **/
1214     //####################################################################
1215     iterator erase (iterator first, const iterator& last);
1216 
1217     //####################################################################
1218     /**
1219      * Erase all children nodes with the given name. This will find all
1220      * nodes that have the given node name and remove them from this node.
1221      * This will invalidate any iterators that point to the nodes to be
1222      * erased, or any pointers or references to those nodes.
1223      *
1224      * @param name The name of nodes to remove.
1225      * @return The number of nodes removed.
1226      * @author Peter Jones
1227     **/
1228     //####################################################################
1229     size_type erase (const char *name);
1230 
1231     //####################################################################
1232     /**
1233      * Erase all children nodes.
1234      *
1235      * @author tbrowder2
1236     */
1237     //####################################################################
1238     void clear (void);
1239 
1240     //####################################################################
1241     /**
1242      * Sort all the children nodes of this node using one of thier
1243      * attributes. Only nodes that are of xml::node::type_element will be
1244      * sorted, and they must have the given node_name.
1245      *
1246      * The sorting is done by calling std::strcmp on the value of the given
1247      * attribute.
1248      *
1249      * @param node_name The name of the nodes to sort.
1250      * @param attr_name The attribute to sort on.
1251      * @author Peter Jones
1252     **/
1253     //####################################################################
1254     void sort (const char *node_name, const char *attr_name);
1255 
1256     //####################################################################
1257     /**
1258      * Sort all the children nodes of this node using the given comparison
1259      * function object. All element type nodes will be considered for
1260      * sorting.
1261      *
1262      * @param compare The binary function object to call in order to sort all child nodes.
1263      * @author Peter Jones
1264     **/
1265     //####################################################################
sort(T compare)1266     template <typename T> void sort (T compare)
1267     { impl::sort_callback<T> cb(compare); sort_fo(cb); }
1268 
1269     //####################################################################
1270     /**
1271      * Convert the node and all its children into XML text and set the given
1272      * string to that text.
1273      *
1274      * @param xml The string to set the node's XML data to. The string is
1275                   cleared.
1276      * @param flags
1277      *        Bitwise mask of the save options. Does not affect XSLT result.
1278      *        documents.
1279      * @see xml::save_option
1280      * @note compression part of the options is currently ignored.
1281     **/
1282     //####################################################################
1283     void save_to_string (std::string &xml,
1284                          save_option_flags flags=save_op_default) const;
1285 
1286     //####################################################################
1287     /**
1288      * Convert the node and all its children into XML text and set the given
1289      * string to that text.
1290      *
1291      * @param str The string to place the XML text data (the string is cleared)
1292      * @param c14n_option Canonicalization mode
1293      * @param comments_option Comments option (strip or keep)
1294      * @param format_option Format option (let libxml2 format the document or
1295      *                      not)
1296      * @param node_sort_option To sort or not the nodes before the
1297      *                         canonicalization
1298      * @exception throws xml::exception in case of problems
1299      * @note: the member has a significant memory and CPU footprint.
1300     **/
1301     void save_to_string_canonical (
1302                     std::string &                      str,
1303                     canonicalization_option            c14n_option,
1304                     canonicalization_comments_option   comments_option,
1305                     canonicalization_format_option     format_option,
1306                     canonicalization_node_sort_option  node_sort_option) const;
1307 
1308     //####################################################################
1309     /**
1310      * Convert the node and all its children into XML text and set the given
1311      * string to that text.
1312      *
1313      * @param xml The string to set the node's XML data to (the string is not
1314      *          cleared; the content is appended to the string).
1315      * @param flags
1316      *        Bitwise mask of the save options. Does not affect XSLT result.
1317      *        documents.
1318      * @see xml::save_option
1319      * @note compression part of the options is currently ignored.
1320     **/
1321     //####################################################################
1322     void append_to_string (std::string &xml,
1323                            save_option_flags flags=save_op_default) const;
1324 
1325     //####################################################################
1326     /**
1327      * Provides a list of effective namespaces for the node.
1328      *
1329      * @param which Specifies what namespaces should be included into the list.
1330     **/
1331     ns_list_type get_effective_namespaces (effective_ns_list_type
1332                                                      which=type_ns_all) const;
1333 
1334     /**
1335      * Sorts the namespace definitions in the node in place
1336     **/
1337     void sort_namespace_definitions (void);
1338 
1339     //####################################################################
1340     /**
1341      * Write a node and all of its children to the given stream.
1342      *
1343      * @param stream The stream to write the node as XML.
1344      * @param n The node to write to the stream.
1345      * @return The stream.
1346      * @author Peter Jones
1347     **/
1348     //####################################################################
1349     friend std::ostream& operator<< (std::ostream &stream, const node &n);
1350 
1351     //####################################################################
1352     /**
1353      * Construct a new xml::node by copying another xml::node.
1354      *
1355      * @param other The other node to copy.
1356      * @author Peter Jones
1357     **/
1358     //####################################################################
1359     node (const node &other);
1360 
1361     //####################################################################
1362     /**
1363      * Make this node equal to some other node via assignment.
1364      *
1365      * @param other The other node to copy.
1366      * @return A reference to this node.
1367      * @author Peter Jones
1368     **/
1369     //####################################################################
1370     node& operator= (const node &other);
1371 
1372     /**
1373      * Moving constructor.
1374      * @param other The other node.
1375     **/
1376     node (node &&other);
1377 
1378     /**
1379      * Moving assignment.
1380      * @param other The other node.
1381     **/
1382     node& operator= (node &&other);
1383 
1384 private:
1385     impl::node_impl *pimpl_;
1386 
1387     // private ctor to create uninitialized instance
1388     explicit node (int);
1389 
1390     void set_node_data (void *data);
1391     void* get_node_data (void) const;
1392     void* release_node_data (void);
1393     node_set convert_to_nset(void *) const;
1394     friend class impl::node_iterator;
1395     friend class document;
1396     friend class xslt::xpath_object;
1397     friend struct impl::doc_impl;
1398     friend struct impl::node_cmp;
1399     friend class xslt::extension_element;
1400 
1401     friend struct impl::node_private_data *  impl::attach_node_private_data(void *);
1402 
1403     void sort_fo (impl::cbfo_node_compare &fo);
1404 
1405     // XML namespaces support
1406     ns add_namespace_def (const char* uri, const char* prefix);
1407     ns add_matched_namespace_def (void* libxml2RawNamespace, const char* uri,
1408                                   ns_definition_adding_type type);
1409     void erase_duplicate_ns_defs (void* nd, std::deque<ns_list_type>& defs);
1410     void erase_duplicate_ns_defs_single_node (void* nd, std::deque<ns_list_type>& defs);
1411     void erase_unused_ns_defs (void* nd);
1412     ns_list_type get_namespace_definitions (void* nd, ns::ns_safety_type type) const;
1413     void* find_replacement_ns_def (std::deque<ns_list_type>& defs, void* ns);
1414 
1415     // XML XPath support
1416     void* create_xpath_context (const xml::xpath_expression& expr) const;
1417     void* evaluate_xpath_expression (const xml::xpath_expression& expr, void* context) const;
1418 
1419     // XSLT extensions support
1420     friend void ::xslt_ext_func_cb(void *, int);
1421     friend void ::xslt_ext_element_cb(void*, void*, void*, void*);
1422 
1423 }; // end xml::node class
1424 
1425 } // end xml namespace
1426 #endif
1427