1 /* 2 * Copyright (C) 2001-2003 Peter J Jones (pjones@pmade.org) 3 * Copyright (C) 2013 Vaclav Slavik <vslavik@gmail.com> 4 * All Rights Reserved 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in 14 * the documentation and/or other materials provided with the 15 * distribution. 16 * 3. Neither the name of the Author nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' 21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 23 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR 24 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 27 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /** 35 @file 36 37 This file contains the definition of the xml::document class. 38 */ 39 40 #ifndef _xmlwrapp_document_h_ 41 #define _xmlwrapp_document_h_ 42 43 // xmlwrapp includes 44 #include "xmlwrapp/init.h" 45 #include "xmlwrapp/node.h" 46 #include "xmlwrapp/export.h" 47 #include "xmlwrapp/errors.h" 48 49 // standard includes 50 #include <iosfwd> 51 #include <string> 52 #include <cstddef> 53 54 // forward declaration 55 namespace xslt 56 { 57 58 class stylesheet; 59 namespace impl 60 { 61 class result; 62 } 63 64 } // end xslt namespace 65 66 namespace xml 67 { 68 69 // forward declarations 70 class schema; 71 class tree_parser; 72 73 namespace impl 74 { 75 struct doc_impl; 76 } 77 78 /** 79 The xml::document class is used to hold the XML tree and various bits of 80 information about it. 81 */ 82 class XMLWRAPP_API document 83 { 84 public: 85 /// size type 86 typedef std::size_t size_type; 87 88 /** 89 Create a new XML document with the default settings. The new document 90 will contain a root node with a name of "blank". 91 */ 92 document(); 93 94 /** 95 Create a new XML document and set the name of the root element to the 96 given text. 97 98 @param root_name What to set the name of the root element to. 99 100 @deprecated Use `xml::document(xml::node(root_name))` constructor instead. 101 */ 102 XMLWRAPP_DEPRECATED("use xml::document(xml::node(root_name)) instead") 103 explicit document(const char *root_name); 104 105 /** 106 Create a new XML document and set the root node. 107 108 @param n The node to use as the root node. n will be copied. 109 */ 110 explicit document(const node& n); 111 112 /** 113 Load XML document from given file. 114 115 Errors are handled by @a on_error handler; if you pass 116 xml::throw_on_error, xml::exception is thrown on errors. If there's a 117 fatal error that prevents the document from being loaded and the error 118 handler doesn't throw an exception, the constructor will throw 119 xml::exception anyway. 120 121 @param filename The name of the file to parse. 122 @param on_error Handler called to process errors and warnings. 123 124 @since 0.7.0 125 */ 126 explicit document(const char *filename, error_handler& on_error); 127 128 /** 129 Load XML document from given data. 130 131 Errors are handled by @a on_error handler; by default, xml::exception 132 is thrown on errors. If there's a fatal error that prevents the document 133 from being loaded and the error handler doesn't throw an exception, the 134 constructor will throw xml::exception anyway. 135 136 @param data The XML data to parse. 137 @param size The size of the XML data to parse. 138 @param on_error Handler called to process errors and warnings. 139 140 @since 0.7.0 141 */ 142 explicit document(const char *data, size_type size, error_handler& on_error = throw_on_error); 143 144 /** 145 Copy construct a new XML document. The new document will be an exact 146 copy of the original. 147 148 @param other The other document object to copy from. 149 */ 150 document(const document& other); 151 152 /** 153 Copy another document object into this one using the assignment 154 operator. This document object will be an exact copy of the other 155 document after the assignment. 156 157 @param other The document to copy from. 158 @return *this. 159 */ 160 document& operator=(const document& other); 161 162 /** 163 Swap one xml::document object for another. 164 165 @param other The other document to swap 166 */ 167 void swap(document& other); 168 169 /** 170 Clean up after an XML document object. 171 */ 172 ~document(); 173 174 /** 175 Get a reference to the root node of this document. If no root node 176 has been set, the returned node will be a blank node. You should take 177 caution to use a reference so that you don't copy the whole node 178 tree! 179 180 @return A const reference to the root node. 181 */ 182 const node& get_root_node() const; 183 184 /** 185 Get a reference to the root node of this document. If no root node 186 has been set, the returned node will be a blank node. You should take 187 caution to use a reference so that you don't copy the whole node 188 tree! 189 190 @return A reference to the root node. 191 */ 192 node& get_root_node(); 193 194 /** 195 Set the root node to the given node. A full copy is made and stored 196 in the document object. 197 198 @param n The new root node to use. 199 */ 200 void set_root_node(const node& n); 201 202 /** 203 Get the XML version for this document. For generated documents, the 204 version will be the default. For parsed documents, this will be the 205 version from the XML processing instruction. 206 207 @return The XML version string for this document. 208 */ 209 const std::string& get_version() const; 210 211 /** 212 Set the XML version number for this document. This version string 213 will be used when generating the XML output. 214 215 @param version The version string to use, like "1.0". 216 */ 217 void set_version(const char *version); 218 219 /** 220 Get the XML encoding for this document. The default encoding is 221 ISO-8859-1. 222 223 @return The encoding string. 224 */ 225 const std::string& get_encoding() const; 226 227 /** 228 Set the XML encoding string. If you don't set this, it will default 229 to ISO-8859-1. 230 231 @param encoding The XML encoding to use. 232 */ 233 void set_encoding(const char *encoding); 234 235 /** 236 Find out if the current document is a standalone document. For 237 generated documents, this will be the default. For parsed documents 238 this will be set based on the XML processing instruction. 239 240 @return True if this document is standalone. 241 @return False if this document is not standalone. 242 */ 243 bool get_is_standalone() const; 244 245 /** 246 Set the standalone flag. This will show up in the XML output in the 247 correct processing instruction. 248 249 @param sa What to set the standalone flag to. 250 */ 251 void set_is_standalone(bool sa); 252 253 /** 254 Walk through the document and expand <xi:include> elements. For more 255 information, please see the w3c recommendation for XInclude. 256 http://www.w3.org/2001/XInclude. 257 258 The return value of this function may change to int after a bug has 259 been fixed in libxml2 (xmlXIncludeDoProcess). 260 261 @return False if there was an error with substitutions. 262 @return True if there were no errors (with or without substitutions). 263 */ 264 bool process_xinclude(); 265 266 /** 267 Test to see if this document has an internal subset. That is, DTD 268 data that is declared within the XML document itself. 269 270 @return True if this document has an internal subset. 271 @return False otherwise. 272 */ 273 bool has_internal_subset() const; 274 275 /** 276 Test to see if this document has an external subset. That is, it 277 references a DTD from an external source, such as a file or URL. 278 279 @return True if this document has an external subset. 280 @return False otherwise. 281 */ 282 bool has_external_subset() const; 283 284 /** 285 Validate this document against the DTD that has been attached to it. 286 This would happen at parse time if there was a !DOCTYPE definition. 287 If the DTD is valid, and the document is valid, this member function 288 will return true. 289 290 If it returns false, you may want to send the document through 291 xmllint to get the actual error messages. 292 293 @return True if the document is valid. 294 @return False if there was a problem with the DTD or XML doc. 295 */ 296 bool validate(); 297 298 /** 299 Parse the given DTD and try to validate this document against it. If 300 the DTD is valid, and the document is valid, this member function 301 will return true. 302 303 If it returns false, you may want to send the document through 304 xmllint to get the actual error messages. 305 306 This member function will add the parsed DTD to this document as the 307 external subset after the validation. If there is already an external 308 DTD attached to this document it will be removed and deleted. 309 310 @param dtdname A filename or URL for the DTD to use. 311 @return True if the document is valid. 312 @return False if there was a problem with the DTD or XML doc. 313 */ 314 bool validate(const char *dtdname); 315 316 /** 317 Returns the number of child nodes of this document. This will always 318 be at least one, since all xmlwrapp documents must have a root node. 319 This member function is useful to find out how many document children 320 there are, including processing instructions, comments, etc. 321 322 @return The number of children nodes that this document has. 323 */ 324 size_type size() const; 325 326 /** 327 Get an iterator to the first child node of this document. If what you 328 really wanted was the root node (the first element) you should use 329 the get_root_node() member function instead. 330 331 @return A xml::node::iterator that points to the first child node. 332 @return An end iterator if there are no children in this document 333 */ 334 node::iterator begin(); 335 336 /** 337 Get a const_iterator to the first child node of this document. If 338 what you really wanted was the root node (the first element) you 339 should use the get_root_node() member function instead. 340 341 @return A xml::node::const_iterator that points to the first child node. 342 @return An end const_iterator if there are no children in this document. 343 */ 344 node::const_iterator begin() const; 345 346 /** 347 Get an iterator that points one past the last child node for this 348 document. 349 350 @return An end xml::node::iterator. 351 */ 352 node::iterator end(); 353 354 /** 355 Get a const_iterator that points one past the last child node for 356 this document. 357 358 @return An end xml::node::const_iterator. 359 */ 360 node::const_iterator end() const; 361 362 /** 363 Add a child xml::node to this document. You should not add a element 364 type node, since there can only be one root node. This member 365 function is only useful for adding processing instructions, comments, 366 etc.. If you do try to add a node of type element, an exception will 367 be thrown. 368 369 @param child The child xml::node to add. 370 */ 371 void push_back (const node &child); 372 373 /** 374 Insert a new child node. The new node will be inserted at the end of 375 the child list. This is similar to the xml::node::push_back member 376 function except that an iterator to the inserted node is returned. 377 378 The rules from the push_back member function apply here. Don't add a 379 node of type element. 380 381 @param n The node to insert as a child of this document. 382 @return An iterator that points to the newly inserted node. 383 @see xml::document::push_back 384 */ 385 node::iterator insert (const node &n); 386 387 /** 388 Insert a new child node. The new node will be inserted before the 389 node pointed to by the given iterator. 390 391 The rules from the push_back member function apply here. Don't add a 392 node of type element. 393 394 @param position An iterator that points to the location where the new node should be inserted (before it). 395 @param n The node to insert as a child of this document. 396 @return An iterator that points to the newly inserted node. 397 @see xml::document::push_back 398 */ 399 node::iterator insert(node::iterator position, const node &n); 400 401 /** 402 Replace the node pointed to by the given iterator with another node. 403 The old node will be removed, including all its children, and 404 replaced with the new node. This will invalidate any iterators that 405 point to the node to be replaced, or any pointers or references to 406 that node. 407 408 Do not replace this root node with this member function. The same 409 rules that apply to push_back apply here. If you try to replace a 410 node of type element, an exception will be thrown. 411 412 @param old_node An iterator that points to the node that should be removed. 413 @param new_node The node to put in old_node's place. 414 @return An iterator that points to the new node. 415 @see xml::document::push_back 416 */ 417 node::iterator replace(node::iterator old_node, const node& new_node); 418 419 /** 420 Erase the node that is pointed to by the given iterator. The node 421 and all its children will be removed from this node. This will 422 invalidate any iterators that point to the node to be erased, or any 423 pointers or references to that node. 424 425 Do not remove the root node using this member function. The same 426 rules that apply to push_back apply here. If you try to erase the 427 root node, an exception will be thrown. 428 429 @param to_erase An iterator that points to the node to be erased. 430 @return An iterator that points to the node after the one being erased. 431 @see xml::document::push_back 432 */ 433 node::iterator erase(node::iterator to_erase); 434 435 /** 436 Erase all nodes in the given range, from first to last. This will 437 invalidate any iterators that point to the nodes to be erased, or any 438 pointers or references to those nodes. 439 440 Do not remove the root node using this member function. The same 441 rules that apply to push_back apply here. If you try to erase the 442 root node, an exception will be thrown. 443 444 @param first The first node in the range to be removed. 445 @param last An iterator that points one past the last node to erase. Think xml::node::end(). 446 @return An iterator that points to the node after the last one being erased. 447 @see xml::document::push_back 448 */ 449 node::iterator erase(node::iterator first, node::iterator last); 450 451 /** 452 Convert the XML document tree into XML text data and place it into 453 the given string. 454 455 @param s The string to place the XML text data. 456 */ 457 void save_to_string(std::string& s) const; 458 459 /** 460 Convert the XML document tree into XML text data and place it into 461 the given filename. 462 463 @param filename The name of the file to place the XML text data into. 464 @param compression_level 0 is no compression, 1-9 allowed, where 1 is 465 for better speed, and 9 is for smaller size 466 @return True if the data was saved successfully. 467 @return False otherwise. 468 */ 469 bool save_to_file(const char *filename, int compression_level = 0) const; 470 471 /** 472 Convert the XML document tree into XML text data and then insert it 473 into the given stream. 474 475 @param stream The stream to insert the XML into. 476 @param doc The document to insert. 477 @return The stream from the first parameter. 478 */ 479 friend XMLWRAPP_API std::ostream& operator<< (std::ostream &stream, const document &doc); 480 481 private: 482 impl::doc_impl *pimpl_; 483 484 void set_doc_data (void *data); 485 void set_doc_data_from_xslt (void *data, xslt::impl::result *xr); 486 void* get_doc_data(); 487 void* get_doc_data_read_only() const; 488 void* release_doc_data(); 489 490 friend class tree_parser; 491 friend class schema; 492 friend class xslt::stylesheet; 493 }; 494 495 } // namespace xml 496 497 #endif // _xmlwrapp_document_h_ 498