1 /* 2 * Copyright (C) 2001-2003 Peter J Jones (pjones@pmade.org) 3 * 2009 Vaclav Slavik <vslavik@fastmail.fm> 4 * All Rights Reserved 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * 3. Neither the name of the Author nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 23 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR 24 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 27 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * $Id: node.hpp 543412 2017-08-09 18:22:55Z satskyse $ 36 * NOTE: This file was modified from its original version 0.6.0 37 * to fit the NCBI C++ Toolkit build framework and 38 * API and functionality requirements. 39 * Most importantly, it adds support for XML namespaces (see "namespace.hpp"). 40 */ 41 42 /** @file 43 * This file contains the definition of the xml::node class. 44 **/ 45 46 #ifndef _xmlwrapp_node_h_ 47 #define _xmlwrapp_node_h_ 48 49 // for NCBI_DEPRECATED 50 #include <ncbiconf.h> 51 52 // xmlwrapp includes 53 #include <misc/xmlwrapp/xml_init.hpp> 54 #include <misc/xmlwrapp/namespace.hpp> 55 #include <misc/xmlwrapp/attributes.hpp> 56 #include <misc/xmlwrapp/xml_save.hpp> 57 58 // hidden stuff 59 #include <misc/xmlwrapp/impl/_cbfo.hpp> 60 61 62 // standard includes 63 #include <cstddef> 64 #include <iosfwd> 65 #include <string> 66 #include <deque> 67 68 // Forward declaration for a friend below 69 extern "C" { void xslt_ext_func_cb(void *, int); } 70 extern "C" { void xslt_ext_element_cb(void*, void*, void*, void*); } 71 72 namespace xslt { 73 class xpath_object; 74 class extension_element; 75 } 76 77 namespace xml { 78 79 // forward declarations 80 class document; 81 class xpath_expression; 82 class node_set; 83 84 namespace impl { 85 class node_iterator; 86 class iter_advance_functor; 87 struct node_impl; 88 struct doc_impl; 89 struct nipimpl; 90 struct node_cmp; 91 struct node_private_data; 92 node_private_data* attach_node_private_data(void *); 93 } 94 95 /** 96 * The xml::node class is used to hold information about one XML node. This 97 * includes the name of the node, the namespace of the node and attributes 98 * for the node. It also has an iterator whereby you can get to the children 99 * nodes. 100 * 101 * It should be noted that any member function that returns a const char* 102 * returns a temporary value. The pointer that is returned will change with 103 * ANY operation to the xml::node. If you need the data to stick around a 104 * little longer you should put it inside a std::string. 105 **/ 106 class node { 107 public: 108 /// size type 109 typedef std::size_t size_type; 110 111 /// enum for the different types of XML nodes 112 enum node_type { 113 type_element, ///< XML element such as "<chapter/>" 114 type_text, ///< Text node 115 type_cdata, ///< <![CDATA[text]]> 116 type_pi, ///< Processing Instruction 117 type_comment, ///< XML comment 118 type_entity, ///< Entity as in &amp; 119 type_entity_ref, ///< Entity ref 120 type_xinclude, ///< <xi:include/> node 121 type_document, ///< Document node 122 type_document_type, ///< DOCTYPE node 123 type_document_frag, ///< Document Fragment 124 type_notation, ///< Notation 125 type_dtd, ///< DTD node 126 type_dtd_element, ///< DTD <!ELEMENT> node 127 type_dtd_attribute, ///< DTD <!ATTRLIST> node 128 type_dtd_entity, ///< DTD <!ENTITY> 129 type_dtd_namespace ///< ? 130 }; 131 132 /// enum for policies of adding namespace definitions 133 enum ns_definition_adding_type { 134 type_replace_if_exists, ///< replace URI if ns with the same prefix exists 135 type_throw_if_exists ///< throw exception if ns with the same prefix exists 136 }; 137 138 /// enum to specify how to remove namespace definitions 139 enum ns_definition_erase_type { 140 type_ns_def_erase_if_not_used, ///< Remove the definition only if it 141 ///< is not in use. 142 ///< If the definition is in use then 143 ///< throw an exception. 144 type_ns_def_erase_enforce ///< Remove the definition regardless 145 ///< if it is used or not. If any 146 ///< attribute or node uses the 147 ///< definition then its namespace will 148 ///< be adjusted to a default one (if 149 ///< defined above) or will be set to 150 ///< no namespace (otherwise). 151 }; 152 153 /// enum to specify what namespaces to include into the list of the 154 /// node effective namespaces 155 enum effective_ns_list_type { 156 type_ns_all, ///< include all namespaces 157 type_ns_only_default, ///< include only default namespace 158 type_ns_only_non_default ///< include only non-default namespaces 159 }; 160 161 /** 162 * Helper struct for creating a xml::node of type_cdata. 163 * 164 * @code 165 * xml::node mynode(xml::node::cdata("This is a CDATA section")); 166 * @endcode 167 */ 168 struct cdata { cdataxml::node::cdata169 explicit cdata (const char *text) : t(text) { } 170 const char *t; 171 }; 172 173 /** 174 * Helper struct for creating a xml::node of type_comment. 175 * 176 * @code 177 * xml::node mynode(xml::node::comment("This is an XML comment")); 178 * @endcode 179 */ 180 struct comment { commentxml::node::comment181 explicit comment (const char *text) : t(text) { } 182 const char *t; 183 }; 184 185 /** 186 * Helper struct for creating a xml::node of type_pi. 187 * 188 * @code 189 * xml::node mynode(xml::node::pi("xslt", "stylesheet=\"test.xsl\"")); 190 * @endcode 191 */ 192 struct pi { pixml::node::pi193 explicit pi (const char *name, const char *content=0) : n(name), c(content) { } 194 const char *n, *c; 195 }; 196 197 /** 198 * Helper struct for creating a xml::node of type_text. 199 * 200 * @code 201 * xml::node mynode(xml::node::text("This is an XML text fragment")); 202 * @endcode 203 */ 204 struct text { textxml::node::text205 explicit text (const char *txt) : t(txt) { } 206 const char *t; 207 }; 208 209 //#################################################################### 210 /** 211 * Construct a new blank xml::node. 212 * 213 * @author Peter Jones 214 **/ 215 //#################################################################### 216 node (void); 217 218 //#################################################################### 219 /** 220 * Construct a new xml::node and set the name of the node. 221 * 222 * @param name The name of the new node. 223 * @author Peter Jones 224 **/ 225 //#################################################################### 226 explicit node (const char *name); 227 228 //#################################################################### 229 /** 230 * Construct a new xml::node given a name and content. The content will 231 * be used to create a new child text node. 232 * All the special symbols ('<', '>', '&', '"', '\r') in the given 233 * content are encoded before assigning the new content. 234 * If entities are needed in the content please use set_raw_content(...). 235 * 236 * @param name The name of the new element. 237 * @param content The text that will be used to create a child node. 238 * @author Peter Jones 239 **/ 240 //#################################################################### 241 node (const char *name, const char *content); 242 243 //#################################################################### 244 /** 245 * Construct a new xml::node that is of type_cdata. The cdata_info 246 * parameter should contain the contents of the CDATA section. 247 * 248 * @note Sample Use Example: 249 * @code 250 * xml::node mynode(xml::node::cdata("This is a CDATA section")); 251 * @endcode 252 * 253 * @param cdata_info A cdata struct that tells xml::node what the content will be. 254 * @author Peter Jones 255 **/ 256 //#################################################################### 257 explicit node (cdata cdata_info); 258 259 //#################################################################### 260 /** 261 * Construct a new xml::node that is of type_comment. The comment_info 262 * parameter should contain the contents of the XML comment. 263 * 264 * @note Sample Use Example: 265 * @code 266 * xml::node mynode(xml::node::comment("This is an XML comment")); 267 * @endcode 268 * 269 * @param comment_info A comment struct that tells xml::node what the comment will be. 270 * @author Peter Jones 271 **/ 272 //#################################################################### 273 explicit node (comment comment_info); 274 275 //#################################################################### 276 /** 277 * Construct a new xml::node that is of type_pi. The pi_info parameter 278 * should contain the name of the XML processing instruction (PI), and 279 * optionally, the contents of the XML PI. 280 * 281 * @note Sample Use Example: 282 * @code 283 * xml::node mynode(xml::node::pi("xslt", "stylesheet=\"test.xsl\"")); 284 * @endcode 285 * 286 * @param pi_info A pi struct that tells xml::node what the name and contents of the XML PI are. 287 * @author Peter Jones 288 **/ 289 //#################################################################### 290 explicit node (pi pi_info); 291 292 //#################################################################### 293 /** 294 * Construct a new xml::node that is of type_text. The text_info 295 * parameter should contain the text. 296 * 297 * @note Sample Use Example: 298 * @code 299 * xml::node mynode(xml::node::text("This is XML text")); 300 * @endcode 301 * 302 * @param text_info A text struct that tells xml::node what the text will be. 303 * @author Vaclav Slavik 304 **/ 305 //#################################################################### 306 explicit node (text text_info); 307 308 //#################################################################### 309 /** 310 * Create a copy of the node which is detached from the document. 311 * The nested nodes as well as namespace definitions are copied too. 312 * 313 * @return A pointer to the copied node. The user is responsible to delete 314 * it. 315 * @exception Throws xml::exception if the copying failed. 316 **/ 317 //#################################################################### 318 node* detached_copy (void) const; 319 320 //#################################################################### 321 /** 322 * Copy another node object into this one. This node object will 323 * be an exact copy of the other node after the assignement. 324 * 325 * @param other The node to copy from. 326 * @return *this 327 * @author Denis Vakatov 328 **/ 329 //#################################################################### 330 node& assign (const node &other); 331 332 //#################################################################### 333 /** 334 * Class destructor 335 * 336 * @author Peter Jones 337 **/ 338 //#################################################################### 339 virtual ~node (void); 340 341 //#################################################################### 342 /** 343 * Set the name of this xml::node. 344 * 345 * @param name The new name for this xml::node. 346 * @author Peter Jones 347 **/ 348 //#################################################################### 349 void set_name (const char *name); 350 351 //#################################################################### 352 /** 353 * Get the name of this xml::node. 354 * 355 * This function may change in the future to return std::string. 356 * Feedback is welcome. 357 * 358 * @return The name of this node. 359 * @author Peter Jones 360 **/ 361 //#################################################################### 362 const char* get_name (void) const; 363 364 //#################################################################### 365 /** 366 * Set the content of a node. If this node is an element node, this 367 * function will remove all of its children nodes and replace them 368 * with one text node set to the new content. 369 * All the special symbols ('<', '>', '&', '"', '\r') in the given 370 * content are encoded before assigning the new content. 371 * If entities are needed in the content please use 372 * set_raw_content(...). 373 * 374 * @param content The content of the text node. 375 * @author Peter Jones 376 **/ 377 //#################################################################### 378 void set_content (const char *content); 379 380 //#################################################################### 381 /** 382 * Set the raw content of a node. If this node is an element node, 383 * this function will remove all of its children nodes and replace 384 * them with one text node set to the new content. 385 * The given content is checked for '<' and '>' characters. If found 386 * they will be replaced with '<' and '>' respectively and this 387 * is the only potential conversion done for the given raw content. 388 * This member is likely used if entities are needed in the node content. 389 * In any case it is the user responsibility to provide valid 390 * content for this member. 391 * 392 * @param raw_content The raw content of the text node. 393 **/ 394 //#################################################################### 395 void set_raw_content (const char *raw_content); 396 397 //#################################################################### 398 /** 399 * Get the content for this text node. If this node is not a text node 400 * but it has children nodes that are text nodes, the contents of those 401 * child nodes will be returned. If there is no content or these 402 * conditions do not apply, zero will be returned. 403 * 404 * This function may change in the future to return std::string. 405 * Feedback is welcome. 406 * 407 * @return The content or 0. 408 * @author Peter Jones 409 **/ 410 //#################################################################### 411 const char* get_content (void) const; 412 413 //#################################################################### 414 /** 415 * Get this node's "type". You can use that information to know what you 416 * can and cannot do with it. 417 * 418 * @return The node's type. 419 * @author Peter Jones 420 **/ 421 //#################################################################### 422 node_type get_type (void) const; 423 424 //#################################################################### 425 /** 426 * Get the list of attributes. You can use the returned object to get 427 * and set the attributes for this node. Make sure you use a reference 428 * to this returned object, to prevent a copy. 429 * 430 * @return The xml::attributes object for this node. 431 * @author Peter Jones 432 **/ 433 //#################################################################### 434 xml::attributes& get_attributes (void); 435 436 //#################################################################### 437 /** 438 * Get the list of attributes. You can use the returned object to get 439 * the attributes for this node. Make sure you use a reference to this 440 * returned object, to prevent a copy. 441 * 442 * @return The xml::attributes object for this node. 443 * @author Peter Jones 444 **/ 445 //#################################################################### 446 const xml::attributes& get_attributes (void) const; 447 448 //#################################################################### 449 /** 450 * Search for a node attribute. 451 * 452 * @param name 453 * The name of the attribute to find. The name could be given as a 454 * qualified name, e.g. 'prefix:attr_name'. If the name is qualified then 455 * the nspace argument must be NULL (otherwise an exception is 456 * generated) and the attribute search is namespace aware with an 457 * effective namespace identified by the given prefix. 458 * @param nspace 459 * The namespace of the atrribute to find: 460 * - NULL matches any namespace 461 * - Void namespace matches attributes without a namespace set 462 * - Unsafe namespace is used as it is 463 * - A safe namespace is resolved basing on the uri only 464 * @return iterator to the found attribute. If there is no such an 465 * attribute then the provided iterator equals to 466 * attributes::end(). 467 **/ 468 attributes::iterator find_attribute (const char* name, 469 const ns* nspace = NULL); 470 471 //#################################################################### 472 /** 473 * Search for a node attribute. 474 * 475 * @param name 476 * The name of the attribute to find. The name could be given as a 477 * qualified name, e.g. 'prefix:attr_name'. If the name is qualified then 478 * the nspace argument must be NULL (otherwise an exception is 479 * generated) and the attribute search is namespace aware with an 480 * effective namespace identified by the given prefix. 481 * @param nspace 482 * The namespace of the atrribute to find: 483 * - NULL matches any namespace 484 * - Void namespace matches attributes without a namespace set 485 * - Unsafe namespace is used as it is 486 * - A safe namespace is resolved basing on the uri only 487 * @return const iterator to the found attribute. If there is no such an 488 * attribute then the provided iterator equals to 489 * attributes::end(). 490 **/ 491 attributes::const_iterator find_attribute (const char* name, 492 const ns* nspace = NULL) const; 493 494 //#################################################################### 495 /** 496 * Get the namespace of this xml::node. 497 * 498 * @param type 499 * The required type of namespace object (safe/unsafe). 500 * @return 501 * The namespace of this node. If the node has no namespace 502 * then return a "void" namespace object with empty prefix and URI 503 * (for which xml::ns::is_void() returns TRUE). 504 **/ 505 //#################################################################### 506 ns get_namespace (ns::ns_safety_type type = ns::type_safe_ns) const; 507 508 //#################################################################### 509 /** 510 * Get the namespaces defined at this xml::node. 511 * 512 * @param type The required type of namespace objects (safe/unsafe). 513 * @return The namespaces defined at this node. 514 * If no namespaces are defined then return an empty container. 515 **/ 516 //#################################################################### 517 ns_list_type get_namespace_definitions (ns::ns_safety_type type = ns::type_safe_ns) const; 518 519 //#################################################################### 520 /** 521 * Set the node namespace. 522 * 523 * The namespace definition is searched up in the hierarchy of nodes. 524 * If a namespace with the given prefix and URI is not found 525 * then throw an exception. 526 * 527 * @param name_space 528 * Namespace to set to the node. 529 * "Void" namespace is treated as a namespace removal request -- 530 * exactly the same as erase_namespace() call. 531 * @note There are no checks at all if an unsafe ns object is provided. 532 * @return Unsafe namespace 533 **/ 534 //#################################################################### 535 ns set_namespace (const ns& name_space); 536 537 //#################################################################### 538 /** 539 * Set the node namespace. 540 * 541 * The namespace definition is searched up in the hierarchy of nodes. If 542 * a namespace with the given prefix is not found then throw an exception. 543 * 544 * @param prefix 545 * Namespace prefix. For the default namespace use NULL or empty string. 546 * @return Unsafe namespace 547 **/ 548 //#################################################################### 549 ns set_namespace (const char* prefix); 550 551 //#################################################################### 552 /** 553 * Add namespace definition to the node. 554 * 555 * If the node already has a namespace definition with the same 556 * prefix then its URI will be replaced with the new one, and that's it. 557 * Otherwise, the hierarchy of nodes (including their attributes) is 558 * walked down, updating all namespaces (with the same prefix) which do 559 * not use namespace definitions (with the same prefix) which are 560 * redefined below this node. 561 * 562 * @param name_space 563 * The namespace definition to add to the node. 564 * @param type 565 * What to do (replace or throw exception) when encountering a 566 * namespace definition with the same prefix. 567 * @return Unsafe namespace 568 **/ 569 //#################################################################### 570 ns add_namespace_definition (const ns& name_space, 571 ns_definition_adding_type type); 572 573 //#################################################################### 574 /** 575 * Add namespace definitions to the node. 576 * 577 * @sa add_namespace_definition 578 * 579 * @param name_spaces 580 * List of namespace definitions to add to the node. 581 * @param type 582 * What to do (replace or throw exception) when encountering a 583 * namespace definition with the same prefix. 584 **/ 585 //#################################################################### 586 void add_namespace_definitions (const ns_list_type& name_spaces, 587 ns_definition_adding_type type); 588 589 //#################################################################### 590 /** 591 * Remove the node namespace definition. 592 * 593 * @param prefix 594 * The prefix of the namespace to be removed from the node namespace 595 * definitions. 596 * For the default namespace use NULL or empty string. 597 * If there is no such namespace definition, then do nothing. 598 * @param how 599 * Specifies what to do if the given namespace is in use. 600 **/ 601 //#################################################################### 602 void erase_namespace_definition (const char* prefix, 603 ns_definition_erase_type how = 604 type_ns_def_erase_if_not_used); 605 606 //#################################################################### 607 /** 608 * Remove the node namespace. 609 * 610 * The hierarchy of nodes is searched up and if a default namespace is 611 * found then it is used as a new node namespace. 612 **/ 613 //#################################################################### 614 void erase_namespace (void); 615 616 //#################################################################### 617 /** 618 * Look up a namespace with the given prefix. 619 * 620 * Walk the nodes hierarchy up and check the namespace definition 621 * prefixes. If the prefix matches, then return the 622 * corresponding safe/unsafe namespace object. 623 * 624 * @param prefix 625 * Namespace prefix to look for. 626 * For the default namespace use NULL or empty string. 627 * @param type 628 * Type of namespace object (safe/unsafe) to return. 629 * @return 630 * Namespace object ("void" namespace if none found). 631 **/ 632 //#################################################################### 633 ns lookup_namespace (const char* prefix, 634 ns::ns_safety_type type = ns::type_safe_ns) const; 635 636 //#################################################################### 637 /** 638 * Erase duplicate namespace definitions. 639 * 640 * Walks the nodes hierarchy down and erases dulicate namespace 641 * definitions. 642 **/ 643 //#################################################################### 644 void erase_duplicate_ns_defs (void); 645 646 //#################################################################### 647 /** 648 * Erase unused namespace definitions. 649 * 650 * Walks the nodes hierarchy down and erases unused namespace 651 * definitions. 652 **/ 653 //#################################################################### 654 void erase_unused_ns_defs (void); 655 656 //#################################################################### 657 /** 658 * Get the node path. 659 * 660 * @return node path 661 * @exception throw an exception in case of errors 662 **/ 663 //#################################################################### 664 std::string get_path (void) const; 665 666 //#################################################################### 667 /** 668 * Find out if this node is a text node or sometiming like a text node, 669 * CDATA for example. 670 * 671 * @return True if this node is a text node; false otherwise. 672 * @author Peter Jones 673 **/ 674 //#################################################################### 675 bool is_text (void) const; 676 677 //#################################################################### 678 /** 679 * Add a child xml::node to this node. 680 * 681 * @param child The child xml::node to add. 682 * @author Peter Jones 683 **/ 684 //#################################################################### 685 void push_back (const node &child); 686 687 //#################################################################### 688 /** 689 * Swap this node with another one. 690 * 691 * @param other The other node to swap with. 692 * @author Peter Jones 693 **/ 694 //#################################################################### 695 void swap (node &other); 696 697 class const_iterator; // forward declaration 698 699 /** 700 * The xml::node::iterator provides a way to access children nodes 701 * similar to a standard C++ container. The nodes that are pointed to by 702 * the iterator can be changed. 703 */ 704 class iterator { 705 public: 706 typedef node value_type; 707 typedef std::ptrdiff_t difference_type; 708 typedef value_type* pointer; 709 typedef value_type& reference; 710 typedef std::forward_iterator_tag iterator_category; 711 iterator(void)712 iterator (void) : pimpl_(0) {} 713 iterator (const iterator &other); 714 iterator& operator= (const iterator& other); 715 ~iterator (void); 716 717 reference operator* (void) const; 718 pointer operator-> (void) const; 719 720 /// prefix increment 721 iterator& operator++ (void); 722 723 /// postfix increment (avoid if possible for better performance) 724 iterator operator++ (int); 725 operator ==(const iterator & other) const726 bool operator==(const iterator& other) const 727 { return get_raw_node() == other.get_raw_node(); } operator !=(const iterator & other) const728 bool operator!=(const iterator& other) const 729 { return !(*this == other); } 730 731 private: 732 impl::nipimpl *pimpl_; 733 explicit iterator (void *data); 734 void* get_raw_node (void) const; 735 void swap (iterator &other); 736 friend class node; 737 friend class document; 738 friend class const_iterator; 739 }; 740 741 /** 742 * The xml::node::const_iterator provides a way to access children nodes 743 * similar to a standard C++ container. The nodes that are pointed to by 744 * the const_iterator cannot be changed. 745 */ 746 class const_iterator { 747 public: 748 typedef const node value_type; 749 typedef std::ptrdiff_t difference_type; 750 typedef value_type* pointer; 751 typedef value_type& reference; 752 typedef std::forward_iterator_tag iterator_category; 753 const_iterator(void)754 const_iterator (void) : pimpl_(0) {} 755 const_iterator (const const_iterator &other); 756 const_iterator (const iterator &other); 757 const_iterator& operator= (const const_iterator& other); 758 ~const_iterator (void); 759 760 reference operator* (void) const; 761 pointer operator-> (void) const; 762 763 /// prefix increment 764 const_iterator& operator++ (void); 765 766 /// postfix increment (avoid if possible for better performance) 767 const_iterator operator++ (int); 768 operator ==(const const_iterator & other) const769 bool operator==(const const_iterator& other) const 770 { return get_raw_node() == other.get_raw_node(); } operator !=(const const_iterator & other) const771 bool operator!=(const const_iterator& other) const 772 { return !(*this == other); } 773 private: 774 impl::nipimpl *pimpl_; 775 explicit const_iterator (void *data); 776 void* get_raw_node (void) const; 777 void swap (const_iterator &other); 778 friend class document; 779 friend class node; 780 }; 781 782 //#################################################################### 783 /** 784 * Returns the number of childer this nodes has. If you just want to 785 * know how if this node has children or not, you should use 786 * xml::node::empty() instead. 787 * 788 * @return The number of children this node has. 789 * @author Peter Jones 790 **/ 791 //#################################################################### 792 size_type size (void) const; 793 794 //#################################################################### 795 /** 796 * Find out if this node has any children. This is the same as 797 * xml::node::size() == 0 except it is much faster. 798 * 799 * @return True if this node DOES NOT have any children. 800 * @return False if this node does have children. 801 * @author Peter Jones 802 **/ 803 //#################################################################### 804 bool empty (void) const; 805 806 //#################################################################### 807 /** 808 * Get an iterator that points to the beginning of this node's children. 809 * 810 * @return An iterator that points to the beginning of the children. 811 * @author Peter Jones 812 **/ 813 //#################################################################### 814 iterator begin (void); 815 816 //#################################################################### 817 /** 818 * Get a const_iterator that points to the beginning of this node's 819 * children. 820 * 821 * @return A const_iterator that points to the beginning of the children. 822 * @author Peter Jones 823 **/ 824 //#################################################################### 825 const_iterator begin (void) const; 826 827 //#################################################################### 828 /** 829 * Get an iterator that points one past the last child for this node. 830 * 831 * @return A "one past the end" iterator. 832 * @author Peter Jones 833 **/ 834 //#################################################################### end(void)835 iterator end (void) { return iterator(); } 836 837 //#################################################################### 838 /** 839 * Get a const_iterator that points one past the last child for this 840 * node. 841 * 842 * @return A "one past the end" const_iterator 843 * @author Peter Jones 844 **/ 845 //#################################################################### end(void) const846 const_iterator end (void) const { return const_iterator(); } 847 848 //#################################################################### 849 /** 850 * Get an iterator that points back at this node. 851 * 852 * @return An iterator that points at this node. 853 * @author Peter Jones 854 **/ 855 //#################################################################### 856 iterator self (void); 857 858 //#################################################################### 859 /** 860 * Get a const_iterator that points back at this node. 861 * 862 * @return A const_iterator that points at this node. 863 * @author Peter Jones 864 **/ 865 //#################################################################### 866 const_iterator self (void) const; 867 868 //#################################################################### 869 /** 870 * Find out if this node is a root one, i.e. has no parent. 871 * 872 * @return true if the node is root. 873 **/ 874 //#################################################################### 875 bool is_root (void) const; 876 877 //#################################################################### 878 /** 879 * Get an iterator that points at the parent of this node. If this node 880 * does not have a parent, this member function will return an "end" 881 * iterator. 882 * 883 * @note 884 * It is recommended to call is_root() function before calling parent(). 885 * If is_root() returns true then the parent() provided iterator cannot 886 * be dereferenced. 887 * 888 * @return An iterator that points to this nodes parent. 889 * @return If no parent, returns the same iterator that xml::node::end() returns. 890 * @author Peter Jones 891 **/ 892 //#################################################################### 893 iterator parent (void); 894 895 //#################################################################### 896 /** 897 * Get a const_iterator that points at the parent of this node. If this 898 * node does not have a parent, this member function will return an 899 * "end" const_iterator. 900 * 901 * @note 902 * It is recommended to call is_root() function before calling parent(). 903 * If is_root() returns true then the parent() provided iterator cannot 904 * be dereferenced. 905 * 906 * @return A const_iterator that points to this nodes parent. 907 * @return If no parent, returns the same const_iterator that xml::node::end() returns. 908 * @author Peter Jones 909 **/ 910 //#################################################################### 911 const_iterator parent (void) const; 912 913 //#################################################################### 914 /** 915 * Find the first child node that has the given name and namespace. 916 * If no such node can be found, this function will return the same 917 * iterator that end() would return. 918 * 919 * This function is not recursive. That is, it will not search down the 920 * tree for the requested node. Instead, it will only search one level 921 * deep, only checking the children of this node. 922 * 923 * @param name The name of the node you want to find. 924 * @param nspace The namespace of the node to find. NULL matches 925 * any namespace. Void namespace matches node without 926 * namespace set. 927 * @return An iterator that points to the node if found. 928 * @return An end() iterator if the node was not found. 929 * @author Peter Jones 930 * 931 * @see elements(const char*), find(const char*, iterator) 932 **/ 933 //#################################################################### 934 iterator find (const char *name, const ns *nspace=NULL); 935 936 //#################################################################### 937 /** 938 * Find the first child node that has the given name and namespace. 939 * If no such node can be found, this function will return the same 940 * const_iterator that end() would return. 941 * 942 * This function is not recursive. That is, it will not search down the 943 * tree for the requested node. Instead, it will only search one level 944 * deep, only checking the children of this node. 945 * 946 * @param name The name of the node you want to find. 947 * @param nspace The namespace of the node to find. NULL matches 948 * any namespace. Void namespace matches node without 949 * namespace set. 950 * @return A const_iterator that points to the node if found. 951 * @return An end() const_iterator if the node was not found. 952 * @author Peter Jones 953 * 954 * @see elements(const char*) const, find(const char*, const_iterator) const 955 **/ 956 //#################################################################### 957 const_iterator find (const char *name, const ns *nspace=NULL) const; 958 959 //#################################################################### 960 /** 961 * Find the first child node, starting with the given iterator, that has 962 * the given name and namespace. If no such node can be found, this 963 * function will return the same iterator that end() would return. 964 * 965 * This function should be given an iterator to one of this node's 966 * children. The search will begin with that node and continue with all 967 * its sibliings. This function will not recurse down the tree, it only 968 * searches in one level. 969 * 970 * @param name The name of the node you want to find. 971 * @param start Where to begin the search. 972 * @param nspace The namespace of the node to find. NULL matches 973 * any namespace. Void namespace matches node without 974 * namespace set. 975 * @return An iterator that points to the node if found. 976 * @return An end() iterator if the node was not found. 977 * @author Peter Jones 978 * 979 * @see elements(const char*) 980 **/ 981 //#################################################################### 982 iterator find (const char *name, const iterator& start, const ns *nspace=NULL); 983 984 //#################################################################### 985 /** 986 * Find the first child node, starting with the given const_iterator, 987 * that has the given name and namespace. If no such node can be found, 988 * this function will return the same const_iterator that end() would 989 * return. 990 * 991 * This function should be given a const_iterator to one of this node's 992 * children. The search will begin with that node and continue with all 993 * its siblings. This function will not recurse down the tree, it only 994 * searches in one level. 995 * 996 * @param name The name of the node you want to find. 997 * @param start Where to begin the search. 998 * @param nspace The namespace of the node to find. NULL matches 999 * any namespace. Void namespace matches node without 1000 * namespace set. 1001 * @return A const_iterator that points to the node if found. 1002 * @return An end() const_iterator if the node was not found. 1003 * @author Peter Jones 1004 * 1005 * @see elements(const char*) const 1006 **/ 1007 //#################################################################### 1008 const_iterator find (const char *name, const const_iterator& start, 1009 const ns *nspace=NULL) const; 1010 1011 /** 1012 * Run the given XPath query. 1013 * 1014 * @param expr 1015 * XPath expression to run 1016 * @return 1017 * XPath query result node set 1018 * @attention 1019 * The result node set is essentially a list of references to an XML 1020 * document. Thus the life time of the document must be wider than the 1021 * node set life time. It also means that the document modifications after 1022 * a result node set is received must be done carefully, e.g. node 1023 * removal. The removed node may be referenced in the node set and access 1024 * to it can cause problems. 1025 * @attention 1026 * Expressions like "root/node" will result in 0 matches even if the 1027 * document has <root><node/></root>, due to a bug in libxml2 (at least 1028 * till version 2.9.1). The workaround is to use "/root/node" or 1029 * "//root/node" depending on circumstances. 1030 * @attention 1031 * XPath query cannot match nodes that belong specifically to the default 1032 * namespace. Please see a detailed discussion (and solution) on that in 1033 * the C++ Toolkit book: 1034 * http://ncbi.github.io/cxx-toolkit/pages/ch_xmlwrapp#ch_xmlwrapp.Run_an_XPath_Query_with_a_De 1035 * @note 1036 * If the query result is a scalar value (e.g. count() function) then 1037 * the result set will have a single node of the following format: 1038 * <xpath_scalar_result type="TYPE">VALUE</xpath_scalar_result> 1039 * where TYPE is one of the following: boolean, number, or 1040 * string depending on the result type. The VALUE is the actual result 1041 * scalar value. 1042 **/ 1043 node_set run_xpath_query (const xpath_expression& expr); 1044 1045 /** 1046 * Run the given XPath query. 1047 * 1048 * @param expr 1049 * XPath expression to run 1050 * @return 1051 * XPath query const result node set 1052 * @attention 1053 * The result node set is essentially a list of references to an XML 1054 * document. Thus the life time of the document must be wider than the 1055 * node set life time. It also means that the document modifications after 1056 * a result node set is received must be done carefully, e.g. node 1057 * removal. The removed node may be referenced in the node set and access 1058 * to it can cause problems. 1059 * @attention 1060 * Expressions like "root/node" will result in 0 matches even if the 1061 * document has <root><node/></root>, due to a bug in libxml2 (at least 1062 * till version 2.9.1). The workaround is to use "/root/node" or 1063 * "//root/node" depending on circumstances. 1064 * @attention 1065 * XPath query cannot match nodes that belong specifically to the default 1066 * namespace. Please see a detailed discussion (and solution) on that in 1067 * the C++ Toolkit book: 1068 * http://ncbi.github.io/cxx-toolkit/pages/ch_xmlwrapp#ch_xmlwrapp.Run_an_XPath_Query_with_a_De 1069 * @note 1070 * If the query result is a scalar value (e.g. count() function) then 1071 * the result set will have a single node of the following format: 1072 * <xpath_scalar_result type="TYPE">VALUE</xpath_scalar_result> 1073 * where TYPE is one of the following: boolean, number, or 1074 * string depending on the result type. The VALUE is the actual result 1075 * scalar value. 1076 **/ 1077 const node_set run_xpath_query (const xpath_expression& expr) const; 1078 1079 /** 1080 * Run the given XPath query. 1081 * The method collects all the effective namespace definitions for the node 1082 * and register them automatically before running the query. 1083 * 1084 * @param expr 1085 * XPath expression to run, must not be NULL 1086 * @return 1087 * XPath query result nodes set 1088 * @attention 1089 * Expressions like "root/node" will result in 0 matches even if the 1090 * document has <root><node/></root>, due to a bug in libxml2 (at least 1091 * till version 2.9.1). The workaround is to use "/root/node" or 1092 * "//root/node" depending on circumstances. 1093 * @attention 1094 * XPath query cannot match nodes that belong specifically to the default 1095 * namespace. Please see a detailed discussion (and solution) on that in 1096 * the C++ Toolkit book: 1097 * http://ncbi.github.io/cxx-toolkit/pages/ch_xmlwrapp#ch_xmlwrapp.Run_an_XPath_Query_with_a_De 1098 * @exception 1099 * Throws exceptions in case of problems 1100 * @note 1101 * Default namespace, if so, will not be registered 1102 * @note 1103 * If the query result is a scalar value (e.g. count() function) then 1104 * the result set will have a single node of the following format: 1105 * <xpath_scalar_result type="TYPE">VALUE</xpath_scalar_result> 1106 * where TYPE is one of the following: boolean, number, or 1107 * string depending on the result type. The VALUE is the actual result 1108 * scalar value. 1109 **/ 1110 node_set run_xpath_query (const char * expr); 1111 1112 /** 1113 * Run the given XPath query. 1114 * The method collects all the effective namespace definitions for the node 1115 * and register them automatically before running the query. 1116 * 1117 * @param expr 1118 * XPath expression to run, must not be NULL 1119 * @return 1120 * XPath query const result nodes set 1121 * @attention 1122 * Expressions like "root/node" will result in 0 matches even if the 1123 * document has <root><node/></root>, due to a bug in libxml2 (at least 1124 * till version 2.9.1). The workaround is to use "/root/node" or 1125 * "//root/node" depending on circumstances. 1126 * @attention 1127 * XPath query cannot match nodes that belong specifically to the default 1128 * namespace. Please see a detailed discussion (and solution) on that in 1129 * the C++ Toolkit book: 1130 * http://ncbi.github.io/cxx-toolkit/pages/ch_xmlwrapp#ch_xmlwrapp.Run_an_XPath_Query_with_a_De 1131 * @exception 1132 * Throws exceptions in case of problems 1133 * @note 1134 * Default namespace, if so, will not be registered 1135 * @note 1136 * If the query result is a scalar value (e.g. count() function) then 1137 * the result set will have a single node of the following format: 1138 * <xpath_scalar_result type="TYPE">VALUE</xpath_scalar_result> 1139 * where TYPE is one of the following: boolean, number, or 1140 * string depending on the result type. The VALUE is the actual result 1141 * scalar value. 1142 **/ 1143 const node_set run_xpath_query (const char * expr) const; 1144 1145 //#################################################################### 1146 /** 1147 * Insert a new child node. The new node will be inserted at the end of 1148 * the child list. This is similar to the xml::node::push_back member 1149 * function except that an iterator to the inserted node is returned. 1150 * 1151 * @param n The node to insert as a child of this node. 1152 * @return An iterator that points to the newly inserted node. 1153 * @author Peter Jones 1154 **/ 1155 //#################################################################### 1156 iterator insert (const node &n); 1157 1158 //#################################################################### 1159 /** 1160 * Insert a new child node. The new node will be inserted before the 1161 * node pointed to by the given iterator. 1162 * 1163 * @param position An iterator that points to the location where the new 1164 * node should be inserted (before it). 1165 * @param n The node to insert as a child of this node. 1166 * @return An iterator that points to the newly inserted node. 1167 * @author Peter Jones 1168 **/ 1169 //#################################################################### 1170 iterator insert (const iterator& position, const node &n); 1171 1172 //#################################################################### 1173 /** 1174 * Replace the node pointed to by the given iterator with another node. 1175 * The old node will be removed, including all its children, and 1176 * replaced with the new node. This will invalidate any iterators that 1177 * point to the node to be replaced, or any pointers or references to 1178 * that node. 1179 * 1180 * @param old_node An iterator that points to the node that should be removed. 1181 * @param new_node The node to put in old_node's place. 1182 * @return An iterator that points to the new node. 1183 * @author Peter Jones 1184 **/ 1185 //#################################################################### 1186 iterator replace (const iterator& old_node, const node &new_node); 1187 1188 //#################################################################### 1189 /** 1190 * Erase the node that is pointed to by the given iterator. The node 1191 * and all its children will be removed from this node. This will 1192 * invalidate any iterators that point to the node to be erased, or any 1193 * pointers or references to that node. 1194 * 1195 * @param to_erase An iterator that points to the node to be erased. 1196 * @return An iterator that points to the node after the one being erased. 1197 * @author Peter Jones 1198 * @author Gary A. Passero 1199 **/ 1200 //#################################################################### 1201 iterator erase (const iterator& to_erase); 1202 1203 //#################################################################### 1204 /** 1205 * Erase all nodes in the given range, from frist to last. This will 1206 * invalidate any iterators that point to the nodes to be erased, or any 1207 * pointers or references to those nodes. 1208 * 1209 * @param first The first node in the range to be removed. 1210 * @param last An iterator that points one past the last node to erase. Think xml::node::end(). 1211 * @return An iterator that points to the node after the last one being erased. 1212 * @author Peter Jones 1213 **/ 1214 //#################################################################### 1215 iterator erase (iterator first, const iterator& last); 1216 1217 //#################################################################### 1218 /** 1219 * Erase all children nodes with the given name. This will find all 1220 * nodes that have the given node name and remove them from this node. 1221 * This will invalidate any iterators that point to the nodes to be 1222 * erased, or any pointers or references to those nodes. 1223 * 1224 * @param name The name of nodes to remove. 1225 * @return The number of nodes removed. 1226 * @author Peter Jones 1227 **/ 1228 //#################################################################### 1229 size_type erase (const char *name); 1230 1231 //#################################################################### 1232 /** 1233 * Erase all children nodes. 1234 * 1235 * @author tbrowder2 1236 */ 1237 //#################################################################### 1238 void clear (void); 1239 1240 //#################################################################### 1241 /** 1242 * Sort all the children nodes of this node using one of thier 1243 * attributes. Only nodes that are of xml::node::type_element will be 1244 * sorted, and they must have the given node_name. 1245 * 1246 * The sorting is done by calling std::strcmp on the value of the given 1247 * attribute. 1248 * 1249 * @param node_name The name of the nodes to sort. 1250 * @param attr_name The attribute to sort on. 1251 * @author Peter Jones 1252 **/ 1253 //#################################################################### 1254 void sort (const char *node_name, const char *attr_name); 1255 1256 //#################################################################### 1257 /** 1258 * Sort all the children nodes of this node using the given comparison 1259 * function object. All element type nodes will be considered for 1260 * sorting. 1261 * 1262 * @param compare The binary function object to call in order to sort all child nodes. 1263 * @author Peter Jones 1264 **/ 1265 //#################################################################### sort(T compare)1266 template <typename T> void sort (T compare) 1267 { impl::sort_callback<T> cb(compare); sort_fo(cb); } 1268 1269 //#################################################################### 1270 /** 1271 * Convert the node and all its children into XML text and set the given 1272 * string to that text. 1273 * 1274 * @param xml The string to set the node's XML data to. The string is 1275 cleared. 1276 * @param flags 1277 * Bitwise mask of the save options. Does not affect XSLT result. 1278 * documents. 1279 * @see xml::save_option 1280 * @note compression part of the options is currently ignored. 1281 **/ 1282 //#################################################################### 1283 void save_to_string (std::string &xml, 1284 save_option_flags flags=save_op_default) const; 1285 1286 //#################################################################### 1287 /** 1288 * Convert the node and all its children into XML text and set the given 1289 * string to that text. 1290 * 1291 * @param str The string to place the XML text data (the string is cleared) 1292 * @param c14n_option Canonicalization mode 1293 * @param comments_option Comments option (strip or keep) 1294 * @param format_option Format option (let libxml2 format the document or 1295 * not) 1296 * @param node_sort_option To sort or not the nodes before the 1297 * canonicalization 1298 * @exception throws xml::exception in case of problems 1299 * @note: the member has a significant memory and CPU footprint. 1300 **/ 1301 void save_to_string_canonical ( 1302 std::string & str, 1303 canonicalization_option c14n_option, 1304 canonicalization_comments_option comments_option, 1305 canonicalization_format_option format_option, 1306 canonicalization_node_sort_option node_sort_option) const; 1307 1308 //#################################################################### 1309 /** 1310 * Convert the node and all its children into XML text and set the given 1311 * string to that text. 1312 * 1313 * @param xml The string to set the node's XML data to (the string is not 1314 * cleared; the content is appended to the string). 1315 * @param flags 1316 * Bitwise mask of the save options. Does not affect XSLT result. 1317 * documents. 1318 * @see xml::save_option 1319 * @note compression part of the options is currently ignored. 1320 **/ 1321 //#################################################################### 1322 void append_to_string (std::string &xml, 1323 save_option_flags flags=save_op_default) const; 1324 1325 //#################################################################### 1326 /** 1327 * Provides a list of effective namespaces for the node. 1328 * 1329 * @param which Specifies what namespaces should be included into the list. 1330 **/ 1331 ns_list_type get_effective_namespaces (effective_ns_list_type 1332 which=type_ns_all) const; 1333 1334 /** 1335 * Sorts the namespace definitions in the node in place 1336 **/ 1337 void sort_namespace_definitions (void); 1338 1339 //#################################################################### 1340 /** 1341 * Write a node and all of its children to the given stream. 1342 * 1343 * @param stream The stream to write the node as XML. 1344 * @param n The node to write to the stream. 1345 * @return The stream. 1346 * @author Peter Jones 1347 **/ 1348 //#################################################################### 1349 friend std::ostream& operator<< (std::ostream &stream, const node &n); 1350 1351 //#################################################################### 1352 /** 1353 * Construct a new xml::node by copying another xml::node. 1354 * 1355 * @param other The other node to copy. 1356 * @author Peter Jones 1357 **/ 1358 //#################################################################### 1359 node (const node &other); 1360 1361 //#################################################################### 1362 /** 1363 * Make this node equal to some other node via assignment. 1364 * 1365 * @param other The other node to copy. 1366 * @return A reference to this node. 1367 * @author Peter Jones 1368 **/ 1369 //#################################################################### 1370 node& operator= (const node &other); 1371 1372 /** 1373 * Moving constructor. 1374 * @param other The other node. 1375 **/ 1376 node (node &&other); 1377 1378 /** 1379 * Moving assignment. 1380 * @param other The other node. 1381 **/ 1382 node& operator= (node &&other); 1383 1384 private: 1385 impl::node_impl *pimpl_; 1386 1387 // private ctor to create uninitialized instance 1388 explicit node (int); 1389 1390 void set_node_data (void *data); 1391 void* get_node_data (void) const; 1392 void* release_node_data (void); 1393 node_set convert_to_nset(void *) const; 1394 friend class impl::node_iterator; 1395 friend class document; 1396 friend class xslt::xpath_object; 1397 friend struct impl::doc_impl; 1398 friend struct impl::node_cmp; 1399 friend class xslt::extension_element; 1400 1401 friend struct impl::node_private_data * impl::attach_node_private_data(void *); 1402 1403 void sort_fo (impl::cbfo_node_compare &fo); 1404 1405 // XML namespaces support 1406 ns add_namespace_def (const char* uri, const char* prefix); 1407 ns add_matched_namespace_def (void* libxml2RawNamespace, const char* uri, 1408 ns_definition_adding_type type); 1409 void erase_duplicate_ns_defs (void* nd, std::deque<ns_list_type>& defs); 1410 void erase_duplicate_ns_defs_single_node (void* nd, std::deque<ns_list_type>& defs); 1411 void erase_unused_ns_defs (void* nd); 1412 ns_list_type get_namespace_definitions (void* nd, ns::ns_safety_type type) const; 1413 void* find_replacement_ns_def (std::deque<ns_list_type>& defs, void* ns); 1414 1415 // XML XPath support 1416 void* create_xpath_context (const xml::xpath_expression& expr) const; 1417 void* evaluate_xpath_expression (const xml::xpath_expression& expr, void* context) const; 1418 1419 // XSLT extensions support 1420 friend void ::xslt_ext_func_cb(void *, int); 1421 friend void ::xslt_ext_element_cb(void*, void*, void*, void*); 1422 1423 }; // end xml::node class 1424 1425 } // end xml namespace 1426 #endif 1427