1 #ifndef RAPIDXML_HPP_INCLUDED 2 #define RAPIDXML_HPP_INCLUDED 3 4 // Copyright (C) 2006, 2009 Marcin Kalicinski 5 // Version 1.13 6 // Revision $DateTime: 2009/05/13 01:46:17 $ 7 //! \file rapidxml.hpp This file contains rapidxml parser and DOM implementation 8 9 // If standard library is disabled, user must provide implementations of required functions and typedefs 10 #if !defined(RAPIDXML_NO_STDLIB) 11 #include <cstdlib> // For std::size_t 12 #include <cassert> // For assert 13 #include <new> // For placement new 14 #endif 15 16 // On MSVC, disable "conditional expression is constant" warning (level 4). 17 // This warning is almost impossible to avoid with certain types of templated code 18 #ifdef _MSC_VER 19 #pragma warning(push) 20 #pragma warning(disable:4127) // Conditional expression is constant 21 #endif 22 23 /////////////////////////////////////////////////////////////////////////// 24 // RAPIDXML_PARSE_ERROR 25 26 #if defined(RAPIDXML_NO_EXCEPTIONS) 27 28 #define RAPIDXML_PARSE_ERROR(what, where) { parse_error_handler(what, where); assert(0); } 29 30 namespace rapidxml 31 { 32 //! When exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, 33 //! this function is called to notify user about the error. 34 //! It must be defined by the user. 35 //! <br><br> 36 //! This function cannot return. If it does, the results are undefined. 37 //! <br><br> 38 //! A very simple definition might look like that: 39 //! <pre> 40 //! void %rapidxml::%parse_error_handler(const char *what, void *where) 41 //! { 42 //! std::cout << "Parse error: " << what << "\n"; 43 //! std::abort(); 44 //! } 45 //! </pre> 46 //! \param what Human readable description of the error. 47 //! \param where Pointer to character data where error was detected. 48 void parse_error_handler(const char *what, void *where); 49 } 50 51 #else 52 53 #include <exception> // For std::exception 54 55 #define RAPIDXML_PARSE_ERROR(what, where) throw parse_error(what, where) 56 57 namespace rapidxml 58 { 59 60 //! Parse error exception. 61 //! This exception is thrown by the parser when an error occurs. 62 //! Use what() function to get human-readable error message. 63 //! Use where() function to get a pointer to position within source text where error was detected. 64 //! <br><br> 65 //! If throwing exceptions by the parser is undesirable, 66 //! it can be disabled by defining RAPIDXML_NO_EXCEPTIONS macro before rapidxml.hpp is included. 67 //! This will cause the parser to call rapidxml::parse_error_handler() function instead of throwing an exception. 68 //! This function must be defined by the user. 69 //! <br><br> 70 //! This class derives from <code>std::exception</code> class. 71 class parse_error: public std::exception 72 { 73 74 public: 75 76 //! Constructs parse error parse_error(const char * what,void * where)77 parse_error(const char *what, void *where) 78 : m_what(what) 79 , m_where(where) 80 { 81 } 82 83 //! Gets human readable description of error. 84 //! \return Pointer to null terminated description of the error. what() const85 virtual const char *what() const throw() 86 { 87 return m_what; 88 } 89 90 //! Gets pointer to character data where error happened. 91 //! Ch should be the same as char type of xml_document that produced the error. 92 //! \return Pointer to location within the parsed string where error occured. 93 template<class Ch> where() const94 Ch *where() const 95 { 96 return reinterpret_cast<Ch *>(m_where); 97 } 98 99 private: 100 101 const char *m_what; 102 void *m_where; 103 104 }; 105 } 106 107 #endif 108 109 /////////////////////////////////////////////////////////////////////////// 110 // Pool sizes 111 112 #ifndef RAPIDXML_STATIC_POOL_SIZE 113 // Size of static memory block of memory_pool. 114 // Define RAPIDXML_STATIC_POOL_SIZE before including rapidxml.hpp if you want to override the default value. 115 // No dynamic memory allocations are performed by memory_pool until static memory is exhausted. 116 #define RAPIDXML_STATIC_POOL_SIZE (64 * 1024) 117 #endif 118 119 #ifndef RAPIDXML_DYNAMIC_POOL_SIZE 120 // Size of dynamic memory block of memory_pool. 121 // Define RAPIDXML_DYNAMIC_POOL_SIZE before including rapidxml.hpp if you want to override the default value. 122 // After the static block is exhausted, dynamic blocks with approximately this size are allocated by memory_pool. 123 #define RAPIDXML_DYNAMIC_POOL_SIZE (64 * 1024) 124 #endif 125 126 #ifndef RAPIDXML_ALIGNMENT 127 // Memory allocation alignment. 128 // Define RAPIDXML_ALIGNMENT before including rapidxml.hpp if you want to override the default value, which is the size of pointer. 129 // All memory allocations for nodes, attributes and strings will be aligned to this value. 130 // This must be a power of 2 and at least 1, otherwise memory_pool will not work. 131 #define RAPIDXML_ALIGNMENT sizeof(void *) 132 #endif 133 134 namespace rapidxml 135 { 136 // Forward declarations 137 template<class Ch> class xml_node; 138 template<class Ch> class xml_attribute; 139 template<class Ch> class xml_document; 140 141 //! Enumeration listing all node types produced by the parser. 142 //! Use xml_node::type() function to query node type. 143 enum node_type 144 { 145 node_document, //!< A document node. Name and value are empty. 146 node_element, //!< An element node. Name contains element name. Value contains text of first data node. 147 node_data, //!< A data node. Name is empty. Value contains data text. 148 node_cdata, //!< A CDATA node. Name is empty. Value contains data text. 149 node_comment, //!< A comment node. Name is empty. Value contains comment text. 150 node_declaration, //!< A declaration node. Name and value are empty. Declaration parameters (version, encoding and standalone) are in node attributes. 151 node_doctype, //!< A DOCTYPE node. Name is empty. Value contains DOCTYPE text. 152 node_pi //!< A PI node. Name contains target. Value contains instructions. 153 }; 154 155 /////////////////////////////////////////////////////////////////////// 156 // Parsing flags 157 158 //! Parse flag instructing the parser to not create data nodes. 159 //! Text of first data node will still be placed in value of parent element, unless rapidxml::parse_no_element_values flag is also specified. 160 //! Can be combined with other flags by use of | operator. 161 //! <br><br> 162 //! See xml_document::parse() function. 163 const int parse_no_data_nodes = 0x1; 164 165 //! Parse flag instructing the parser to not use text of first data node as a value of parent element. 166 //! Can be combined with other flags by use of | operator. 167 //! Note that child data nodes of element node take precendence over its value when printing. 168 //! That is, if element has one or more child data nodes <em>and</em> a value, the value will be ignored. 169 //! Use rapidxml::parse_no_data_nodes flag to prevent creation of data nodes if you want to manipulate data using values of elements. 170 //! <br><br> 171 //! See xml_document::parse() function. 172 const int parse_no_element_values = 0x2; 173 174 //! Parse flag instructing the parser to not place zero terminators after strings in the source text. 175 //! By default zero terminators are placed, modifying source text. 176 //! Can be combined with other flags by use of | operator. 177 //! <br><br> 178 //! See xml_document::parse() function. 179 const int parse_no_string_terminators = 0x4; 180 181 //! Parse flag instructing the parser to not translate entities in the source text. 182 //! By default entities are translated, modifying source text. 183 //! Can be combined with other flags by use of | operator. 184 //! <br><br> 185 //! See xml_document::parse() function. 186 const int parse_no_entity_translation = 0x8; 187 188 //! Parse flag instructing the parser to disable UTF-8 handling and assume plain 8 bit characters. 189 //! By default, UTF-8 handling is enabled. 190 //! Can be combined with other flags by use of | operator. 191 //! <br><br> 192 //! See xml_document::parse() function. 193 const int parse_no_utf8 = 0x10; 194 195 //! Parse flag instructing the parser to create XML declaration node. 196 //! By default, declaration node is not created. 197 //! Can be combined with other flags by use of | operator. 198 //! <br><br> 199 //! See xml_document::parse() function. 200 const int parse_declaration_node = 0x20; 201 202 //! Parse flag instructing the parser to create comments nodes. 203 //! By default, comment nodes are not created. 204 //! Can be combined with other flags by use of | operator. 205 //! <br><br> 206 //! See xml_document::parse() function. 207 const int parse_comment_nodes = 0x40; 208 209 //! Parse flag instructing the parser to create DOCTYPE node. 210 //! By default, doctype node is not created. 211 //! Although W3C specification allows at most one DOCTYPE node, RapidXml will silently accept documents with more than one. 212 //! Can be combined with other flags by use of | operator. 213 //! <br><br> 214 //! See xml_document::parse() function. 215 const int parse_doctype_node = 0x80; 216 217 //! Parse flag instructing the parser to create PI nodes. 218 //! By default, PI nodes are not created. 219 //! Can be combined with other flags by use of | operator. 220 //! <br><br> 221 //! See xml_document::parse() function. 222 const int parse_pi_nodes = 0x100; 223 224 //! Parse flag instructing the parser to validate closing tag names. 225 //! If not set, name inside closing tag is irrelevant to the parser. 226 //! By default, closing tags are not validated. 227 //! Can be combined with other flags by use of | operator. 228 //! <br><br> 229 //! See xml_document::parse() function. 230 const int parse_validate_closing_tags = 0x200; 231 232 //! Parse flag instructing the parser to trim all leading and trailing whitespace of data nodes. 233 //! By default, whitespace is not trimmed. 234 //! This flag does not cause the parser to modify source text. 235 //! Can be combined with other flags by use of | operator. 236 //! <br><br> 237 //! See xml_document::parse() function. 238 const int parse_trim_whitespace = 0x400; 239 240 //! Parse flag instructing the parser to condense all whitespace runs of data nodes to a single space character. 241 //! Trimming of leading and trailing whitespace of data is controlled by rapidxml::parse_trim_whitespace flag. 242 //! By default, whitespace is not normalized. 243 //! If this flag is specified, source text will be modified. 244 //! Can be combined with other flags by use of | operator. 245 //! <br><br> 246 //! See xml_document::parse() function. 247 const int parse_normalize_whitespace = 0x800; 248 249 // Compound flags 250 251 //! Parse flags which represent default behaviour of the parser. 252 //! This is always equal to 0, so that all other flags can be simply ored together. 253 //! Normally there is no need to inconveniently disable flags by anding with their negated (~) values. 254 //! This also means that meaning of each flag is a <i>negation</i> of the default setting. 255 //! For example, if flag name is rapidxml::parse_no_utf8, it means that utf-8 is <i>enabled</i> by default, 256 //! and using the flag will disable it. 257 //! <br><br> 258 //! See xml_document::parse() function. 259 const int parse_default = 0; 260 261 //! A combination of parse flags that forbids any modifications of the source text. 262 //! This also results in faster parsing. However, note that the following will occur: 263 //! <ul> 264 //! <li>names and values of nodes will not be zero terminated, you have to use xml_base::name_size() and xml_base::value_size() functions to determine where name and value ends</li> 265 //! <li>entities will not be translated</li> 266 //! <li>whitespace will not be normalized</li> 267 //! </ul> 268 //! See xml_document::parse() function. 269 const int parse_non_destructive = parse_no_string_terminators | parse_no_entity_translation; 270 271 //! A combination of parse flags resulting in fastest possible parsing, without sacrificing important data. 272 //! <br><br> 273 //! See xml_document::parse() function. 274 const int parse_fastest = parse_non_destructive | parse_no_data_nodes; 275 276 //! A combination of parse flags resulting in largest amount of data being extracted. 277 //! This usually results in slowest parsing. 278 //! <br><br> 279 //! See xml_document::parse() function. 280 const int parse_full = parse_declaration_node | parse_comment_nodes | parse_doctype_node | parse_pi_nodes | parse_validate_closing_tags; 281 282 /////////////////////////////////////////////////////////////////////// 283 // Internals 284 285 //! \cond internal 286 namespace internal 287 { 288 289 // Struct that contains lookup tables for the parser 290 // It must be a template to allow correct linking (because it has static data members, which are defined in a header file). 291 template<int Dummy> 292 struct lookup_tables 293 { 294 static const unsigned char lookup_whitespace[256]; // Whitespace table 295 static const unsigned char lookup_node_name[256]; // Node name table 296 static const unsigned char lookup_text[256]; // Text table 297 static const unsigned char lookup_text_pure_no_ws[256]; // Text table 298 static const unsigned char lookup_text_pure_with_ws[256]; // Text table 299 static const unsigned char lookup_attribute_name[256]; // Attribute name table 300 static const unsigned char lookup_attribute_data_1[256]; // Attribute data table with single quote 301 static const unsigned char lookup_attribute_data_1_pure[256]; // Attribute data table with single quote 302 static const unsigned char lookup_attribute_data_2[256]; // Attribute data table with double quotes 303 static const unsigned char lookup_attribute_data_2_pure[256]; // Attribute data table with double quotes 304 static const unsigned char lookup_digits[256]; // Digits 305 static const unsigned char lookup_upcase[256]; // To uppercase conversion table for ASCII characters 306 }; 307 308 // Find length of the string 309 template<class Ch> measure(const Ch * p)310 inline std::size_t measure(const Ch *p) 311 { 312 const Ch *tmp = p; 313 while (*tmp) 314 ++tmp; 315 return tmp - p; 316 } 317 318 // Compare strings for equality 319 template<class Ch> compare(const Ch * p1,std::size_t size1,const Ch * p2,std::size_t size2,bool case_sensitive)320 inline bool compare(const Ch *p1, std::size_t size1, const Ch *p2, std::size_t size2, bool case_sensitive) 321 { 322 if (size1 != size2) 323 return false; 324 if (case_sensitive) 325 { 326 for (const Ch *end = p1 + size1; p1 < end; ++p1, ++p2) 327 if (*p1 != *p2) 328 return false; 329 } 330 else 331 { 332 for (const Ch *end = p1 + size1; p1 < end; ++p1, ++p2) 333 if (lookup_tables<0>::lookup_upcase[static_cast<unsigned char>(*p1)] != lookup_tables<0>::lookup_upcase[static_cast<unsigned char>(*p2)]) 334 return false; 335 } 336 return true; 337 } 338 } 339 //! \endcond 340 341 /////////////////////////////////////////////////////////////////////// 342 // Memory pool 343 344 //! This class is used by the parser to create new nodes and attributes, without overheads of dynamic memory allocation. 345 //! In most cases, you will not need to use this class directly. 346 //! However, if you need to create nodes manually or modify names/values of nodes, 347 //! you are encouraged to use memory_pool of relevant xml_document to allocate the memory. 348 //! Not only is this faster than allocating them by using <code>new</code> operator, 349 //! but also their lifetime will be tied to the lifetime of document, 350 //! possibly simplyfing memory management. 351 //! <br><br> 352 //! Call allocate_node() or allocate_attribute() functions to obtain new nodes or attributes from the pool. 353 //! You can also call allocate_string() function to allocate strings. 354 //! Such strings can then be used as names or values of nodes without worrying about their lifetime. 355 //! Note that there is no <code>free()</code> function -- all allocations are freed at once when clear() function is called, 356 //! or when the pool is destroyed. 357 //! <br><br> 358 //! It is also possible to create a standalone memory_pool, and use it 359 //! to allocate nodes, whose lifetime will not be tied to any document. 360 //! <br><br> 361 //! Pool maintains <code>RAPIDXML_STATIC_POOL_SIZE</code> bytes of statically allocated memory. 362 //! Until static memory is exhausted, no dynamic memory allocations are done. 363 //! When static memory is exhausted, pool allocates additional blocks of memory of size <code>RAPIDXML_DYNAMIC_POOL_SIZE</code> each, 364 //! by using global <code>new[]</code> and <code>delete[]</code> operators. 365 //! This behaviour can be changed by setting custom allocation routines. 366 //! Use set_allocator() function to set them. 367 //! <br><br> 368 //! Allocations for nodes, attributes and strings are aligned at <code>RAPIDXML_ALIGNMENT</code> bytes. 369 //! This value defaults to the size of pointer on target architecture. 370 //! <br><br> 371 //! To obtain absolutely top performance from the parser, 372 //! it is important that all nodes are allocated from a single, contiguous block of memory. 373 //! Otherwise, cache misses when jumping between two (or more) disjoint blocks of memory can slow down parsing quite considerably. 374 //! If required, you can tweak <code>RAPIDXML_STATIC_POOL_SIZE</code>, <code>RAPIDXML_DYNAMIC_POOL_SIZE</code> and <code>RAPIDXML_ALIGNMENT</code> 375 //! to obtain best wasted memory to performance compromise. 376 //! To do it, define their values before rapidxml.hpp file is included. 377 //! \param Ch Character type of created nodes. 378 template<class Ch = char> 379 class memory_pool 380 { 381 382 public: 383 384 //! \cond internal 385 typedef void *(alloc_func)(std::size_t); // Type of user-defined function used to allocate memory 386 typedef void (free_func)(void *); // Type of user-defined function used to free memory 387 //! \endcond 388 389 //! Constructs empty pool with default allocator functions. memory_pool()390 memory_pool() 391 : m_alloc_func(0) 392 , m_free_func(0) 393 { 394 init(); 395 } 396 397 //! Destroys pool and frees all the memory. 398 //! This causes memory occupied by nodes allocated by the pool to be freed. 399 //! Nodes allocated from the pool are no longer valid. ~memory_pool()400 ~memory_pool() 401 { 402 clear(); 403 } 404 405 //! Allocates a new node from the pool, and optionally assigns name and value to it. 406 //! If the allocation request cannot be accomodated, this function will throw <code>std::bad_alloc</code>. 407 //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function 408 //! will call rapidxml::parse_error_handler() function. 409 //! \param type Type of node to create. 410 //! \param name Name to assign to the node, or 0 to assign no name. 411 //! \param value Value to assign to the node, or 0 to assign no value. 412 //! \param name_size Size of name to assign, or 0 to automatically calculate size from name string. 413 //! \param value_size Size of value to assign, or 0 to automatically calculate size from value string. 414 //! \return Pointer to allocated node. This pointer will never be NULL. allocate_node(node_type type,const Ch * name=0,const Ch * value=0,std::size_t name_size=0,std::size_t value_size=0)415 xml_node<Ch> *allocate_node(node_type type, 416 const Ch *name = 0, const Ch *value = 0, 417 std::size_t name_size = 0, std::size_t value_size = 0) 418 { 419 void *memory = allocate_aligned(sizeof(xml_node<Ch>)); 420 xml_node<Ch> *node = new(memory) xml_node<Ch>(type); 421 if (name) 422 { 423 if (name_size > 0) 424 node->name(name, name_size); 425 else 426 node->name(name); 427 } 428 if (value) 429 { 430 if (value_size > 0) 431 node->value(value, value_size); 432 else 433 node->value(value); 434 } 435 return node; 436 } 437 438 //! Allocates a new attribute from the pool, and optionally assigns name and value to it. 439 //! If the allocation request cannot be accomodated, this function will throw <code>std::bad_alloc</code>. 440 //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function 441 //! will call rapidxml::parse_error_handler() function. 442 //! \param name Name to assign to the attribute, or 0 to assign no name. 443 //! \param value Value to assign to the attribute, or 0 to assign no value. 444 //! \param name_size Size of name to assign, or 0 to automatically calculate size from name string. 445 //! \param value_size Size of value to assign, or 0 to automatically calculate size from value string. 446 //! \return Pointer to allocated attribute. This pointer will never be NULL. allocate_attribute(const Ch * name=0,const Ch * value=0,std::size_t name_size=0,std::size_t value_size=0)447 xml_attribute<Ch> *allocate_attribute(const Ch *name = 0, const Ch *value = 0, 448 std::size_t name_size = 0, std::size_t value_size = 0) 449 { 450 void *memory = allocate_aligned(sizeof(xml_attribute<Ch>)); 451 xml_attribute<Ch> *attribute = new(memory) xml_attribute<Ch>; 452 if (name) 453 { 454 if (name_size > 0) 455 attribute->name(name, name_size); 456 else 457 attribute->name(name); 458 } 459 if (value) 460 { 461 if (value_size > 0) 462 attribute->value(value, value_size); 463 else 464 attribute->value(value); 465 } 466 return attribute; 467 } 468 469 //! Allocates a char array of given size from the pool, and optionally copies a given string to it. 470 //! If the allocation request cannot be accomodated, this function will throw <code>std::bad_alloc</code>. 471 //! If exceptions are disabled by defining RAPIDXML_NO_EXCEPTIONS, this function 472 //! will call rapidxml::parse_error_handler() function. 473 //! \param source String to initialize the allocated memory with, or 0 to not initialize it. 474 //! \param size Number of characters to allocate, or zero to calculate it automatically from source string length; if size is 0, source string must be specified and null terminated. 475 //! \return Pointer to allocated char array. This pointer will never be NULL. allocate_string(const Ch * source=0,std::size_t size=0)476 Ch *allocate_string(const Ch *source = 0, std::size_t size = 0) 477 { 478 assert(source || size); // Either source or size (or both) must be specified 479 if (size == 0) 480 size = internal::measure(source) + 1; 481 Ch *result = static_cast<Ch *>(allocate_aligned(size * sizeof(Ch))); 482 if (source) 483 for (std::size_t i = 0; i < size; ++i) 484 result[i] = source[i]; 485 return result; 486 } 487 488 //! Clones an xml_node and its hierarchy of child nodes and attributes. 489 //! Nodes and attributes are allocated from this memory pool. 490 //! Names and values are not cloned, they are shared between the clone and the source. 491 //! Result node can be optionally specified as a second parameter, 492 //! in which case its contents will be replaced with cloned source node. 493 //! This is useful when you want to clone entire document. 494 //! \param source Node to clone. 495 //! \param result Node to put results in, or 0 to automatically allocate result node 496 //! \return Pointer to cloned node. This pointer will never be NULL. clone_node(const xml_node<Ch> * source,xml_node<Ch> * result=0)497 xml_node<Ch> *clone_node(const xml_node<Ch> *source, xml_node<Ch> *result = 0) 498 { 499 // Prepare result node 500 if (result) 501 { 502 result->remove_all_attributes(); 503 result->remove_all_nodes(); 504 result->type(source->type()); 505 } 506 else 507 result = allocate_node(source->type()); 508 509 // Clone name and value 510 result->name(source->name(), source->name_size()); 511 result->value(source->value(), source->value_size()); 512 513 // Clone child nodes and attributes 514 for (xml_node<Ch> *child = source->first_node(); child; child = child->next_sibling()) 515 result->append_node(clone_node(child)); 516 for (xml_attribute<Ch> *attr = source->first_attribute(); attr; attr = attr->next_attribute()) 517 result->append_attribute(allocate_attribute(attr->name(), attr->value(), attr->name_size(), attr->value_size())); 518 519 return result; 520 } 521 522 //! Clears the pool. 523 //! This causes memory occupied by nodes allocated by the pool to be freed. 524 //! Any nodes or strings allocated from the pool will no longer be valid. clear()525 void clear() 526 { 527 while (m_begin != m_static_memory) 528 { 529 char *previous_begin = reinterpret_cast<header *>(align(m_begin))->previous_begin; 530 if (m_free_func) 531 m_free_func(m_begin); 532 else 533 delete[] m_begin; 534 m_begin = previous_begin; 535 } 536 init(); 537 } 538 539 //! Sets or resets the user-defined memory allocation functions for the pool. 540 //! This can only be called when no memory is allocated from the pool yet, otherwise results are undefined. 541 //! Allocation function must not return invalid pointer on failure. It should either throw, 542 //! stop the program, or use <code>longjmp()</code> function to pass control to other place of program. 543 //! If it returns invalid pointer, results are undefined. 544 //! <br><br> 545 //! User defined allocation functions must have the following forms: 546 //! <br><code> 547 //! <br>void *allocate(std::size_t size); 548 //! <br>void free(void *pointer); 549 //! </code><br> 550 //! \param af Allocation function, or 0 to restore default function 551 //! \param ff Free function, or 0 to restore default function set_allocator(alloc_func * af,free_func * ff)552 void set_allocator(alloc_func *af, free_func *ff) 553 { 554 assert(m_begin == m_static_memory && m_ptr == align(m_begin)); // Verify that no memory is allocated yet 555 m_alloc_func = af; 556 m_free_func = ff; 557 } 558 559 private: 560 561 struct header 562 { 563 char *previous_begin; 564 }; 565 init()566 void init() 567 { 568 m_begin = m_static_memory; 569 m_ptr = align(m_begin); 570 m_end = m_static_memory + sizeof(m_static_memory); 571 } 572 align(char * ptr)573 char *align(char *ptr) 574 { 575 std::size_t alignment = ((RAPIDXML_ALIGNMENT - (std::size_t(ptr) & (RAPIDXML_ALIGNMENT - 1))) & (RAPIDXML_ALIGNMENT - 1)); 576 return ptr + alignment; 577 } 578 allocate_raw(std::size_t size)579 char *allocate_raw(std::size_t size) 580 { 581 // Allocate 582 void *memory; 583 if (m_alloc_func) // Allocate memory using either user-specified allocation function or global operator new[] 584 { 585 memory = m_alloc_func(size); 586 assert(memory); // Allocator is not allowed to return 0, on failure it must either throw, stop the program or use longjmp 587 } 588 else 589 { 590 memory = new char[size]; 591 #ifdef RAPIDXML_NO_EXCEPTIONS 592 if (!memory) // If exceptions are disabled, verify memory allocation, because new will not be able to throw bad_alloc 593 RAPIDXML_PARSE_ERROR("out of memory", 0); 594 #endif 595 } 596 return static_cast<char *>(memory); 597 } 598 allocate_aligned(std::size_t size)599 void *allocate_aligned(std::size_t size) 600 { 601 // Calculate aligned pointer 602 char *result = align(m_ptr); 603 604 // If not enough memory left in current pool, allocate a new pool 605 if (result + size > m_end) 606 { 607 // Calculate required pool size (may be bigger than RAPIDXML_DYNAMIC_POOL_SIZE) 608 std::size_t pool_size = RAPIDXML_DYNAMIC_POOL_SIZE; 609 if (pool_size < size) 610 pool_size = size; 611 612 // Allocate 613 std::size_t alloc_size = sizeof(header) + (2 * RAPIDXML_ALIGNMENT - 2) + pool_size; // 2 alignments required in worst case: one for header, one for actual allocation 614 char *raw_memory = allocate_raw(alloc_size); 615 616 // Setup new pool in allocated memory 617 char *pool = align(raw_memory); 618 header *new_header = reinterpret_cast<header *>(pool); 619 new_header->previous_begin = m_begin; 620 m_begin = raw_memory; 621 m_ptr = pool + sizeof(header); 622 m_end = raw_memory + alloc_size; 623 624 // Calculate aligned pointer again using new pool 625 result = align(m_ptr); 626 } 627 628 // Update pool and return aligned pointer 629 m_ptr = result + size; 630 return result; 631 } 632 633 char *m_begin; // Start of raw memory making up current pool 634 char *m_ptr; // First free byte in current pool 635 char *m_end; // One past last available byte in current pool 636 char m_static_memory[RAPIDXML_STATIC_POOL_SIZE]; // Static raw memory 637 alloc_func *m_alloc_func; // Allocator function, or 0 if default is to be used 638 free_func *m_free_func; // Free function, or 0 if default is to be used 639 }; 640 641 /////////////////////////////////////////////////////////////////////////// 642 // XML base 643 644 //! Base class for xml_node and xml_attribute implementing common functions: 645 //! name(), name_size(), value(), value_size() and parent(). 646 //! \param Ch Character type to use 647 template<class Ch = char> 648 class xml_base 649 { 650 651 public: 652 653 /////////////////////////////////////////////////////////////////////////// 654 // Construction & destruction 655 656 // Construct a base with empty name, value and parent xml_base()657 xml_base() 658 : m_name(0) 659 , m_value(0) 660 , m_parent(0) 661 , m_name_size(0) 662 , m_value_size(0) 663 { 664 } 665 666 /////////////////////////////////////////////////////////////////////////// 667 // Node data access 668 669 //! Gets name of the node. 670 //! Interpretation of name depends on type of node. 671 //! Note that name will not be zero-terminated if rapidxml::parse_no_string_terminators option was selected during parse. 672 //! <br><br> 673 //! Use name_size() function to determine length of the name. 674 //! \return Name of node, or empty string if node has no name. name() const675 Ch *name() const 676 { 677 return m_name ? m_name : nullstr(); 678 } 679 680 //! Gets size of node name, not including terminator character. 681 //! This function works correctly irrespective of whether name is or is not zero terminated. 682 //! \return Size of node name, in characters. name_size() const683 std::size_t name_size() const 684 { 685 return m_name ? m_name_size : 0; 686 } 687 688 //! Gets value of node. 689 //! Interpretation of value depends on type of node. 690 //! Note that value will not be zero-terminated if rapidxml::parse_no_string_terminators option was selected during parse. 691 //! <br><br> 692 //! Use value_size() function to determine length of the value. 693 //! \return Value of node, or empty string if node has no value. value() const694 Ch *value() const 695 { 696 return m_value ? m_value : nullstr(); 697 } 698 699 //! Gets size of node value, not including terminator character. 700 //! This function works correctly irrespective of whether value is or is not zero terminated. 701 //! \return Size of node value, in characters. value_size() const702 std::size_t value_size() const 703 { 704 return m_value ? m_value_size : 0; 705 } 706 707 /////////////////////////////////////////////////////////////////////////// 708 // Node modification 709 710 //! Sets name of node to a non zero-terminated string. 711 //! See \ref ownership_of_strings. 712 //! <br><br> 713 //! Note that node does not own its name or value, it only stores a pointer to it. 714 //! It will not delete or otherwise free the pointer on destruction. 715 //! It is reponsibility of the user to properly manage lifetime of the string. 716 //! The easiest way to achieve it is to use memory_pool of the document to allocate the string - 717 //! on destruction of the document the string will be automatically freed. 718 //! <br><br> 719 //! Size of name must be specified separately, because name does not have to be zero terminated. 720 //! Use name(const Ch *) function to have the length automatically calculated (string must be zero terminated). 721 //! \param name Name of node to set. Does not have to be zero terminated. 722 //! \param size Size of name, in characters. This does not include zero terminator, if one is present. name(const Ch * name,std::size_t size)723 void name(const Ch *name, std::size_t size) 724 { 725 m_name = const_cast<Ch *>(name); 726 m_name_size = size; 727 } 728 729 //! Sets name of node to a zero-terminated string. 730 //! See also \ref ownership_of_strings and xml_node::name(const Ch *, std::size_t). 731 //! \param name Name of node to set. Must be zero terminated. name(const Ch * name)732 void name(const Ch *name) 733 { 734 this->name(name, internal::measure(name)); 735 } 736 737 //! Sets value of node to a non zero-terminated string. 738 //! See \ref ownership_of_strings. 739 //! <br><br> 740 //! Note that node does not own its name or value, it only stores a pointer to it. 741 //! It will not delete or otherwise free the pointer on destruction. 742 //! It is reponsibility of the user to properly manage lifetime of the string. 743 //! The easiest way to achieve it is to use memory_pool of the document to allocate the string - 744 //! on destruction of the document the string will be automatically freed. 745 //! <br><br> 746 //! Size of value must be specified separately, because it does not have to be zero terminated. 747 //! Use value(const Ch *) function to have the length automatically calculated (string must be zero terminated). 748 //! <br><br> 749 //! If an element has a child node of type node_data, it will take precedence over element value when printing. 750 //! If you want to manipulate data of elements using values, use parser flag rapidxml::parse_no_data_nodes to prevent creation of data nodes by the parser. 751 //! \param value value of node to set. Does not have to be zero terminated. 752 //! \param size Size of value, in characters. This does not include zero terminator, if one is present. value(const Ch * value,std::size_t size)753 void value(const Ch *value, std::size_t size) 754 { 755 m_value = const_cast<Ch *>(value); 756 m_value_size = size; 757 } 758 759 //! Sets value of node to a zero-terminated string. 760 //! See also \ref ownership_of_strings and xml_node::value(const Ch *, std::size_t). 761 //! \param value Vame of node to set. Must be zero terminated. value(const Ch * value)762 void value(const Ch *value) 763 { 764 this->value(value, internal::measure(value)); 765 } 766 767 /////////////////////////////////////////////////////////////////////////// 768 // Related nodes access 769 770 //! Gets node parent. 771 //! \return Pointer to parent node, or 0 if there is no parent. parent() const772 xml_node<Ch> *parent() const 773 { 774 return m_parent; 775 } 776 777 protected: 778 779 // Return empty string nullstr()780 static Ch *nullstr() 781 { 782 static Ch zero = Ch('\0'); 783 return &zero; 784 } 785 786 Ch *m_name; // Name of node, or 0 if no name 787 Ch *m_value; // Value of node, or 0 if no value 788 std::size_t m_name_size; // Length of node name, or undefined of no name 789 std::size_t m_value_size; // Length of node value, or undefined if no value 790 xml_node<Ch> *m_parent; // Pointer to parent node, or 0 if none 791 792 }; 793 794 //! Class representing attribute node of XML document. 795 //! Each attribute has name and value strings, which are available through name() and value() functions (inherited from xml_base). 796 //! Note that after parse, both name and value of attribute will point to interior of source text used for parsing. 797 //! Thus, this text must persist in memory for the lifetime of attribute. 798 //! \param Ch Character type to use. 799 template<class Ch = char> 800 class xml_attribute: public xml_base<Ch> 801 { 802 803 friend class xml_node<Ch>; 804 805 public: 806 807 /////////////////////////////////////////////////////////////////////////// 808 // Construction & destruction 809 810 //! Constructs an empty attribute with the specified type. 811 //! Consider using memory_pool of appropriate xml_document if allocating attributes manually. xml_attribute()812 xml_attribute() : m_next_attribute(0), m_prev_attribute(0) 813 { 814 } 815 816 /////////////////////////////////////////////////////////////////////////// 817 // Related nodes access 818 819 //! Gets document of which attribute is a child. 820 //! \return Pointer to document that contains this attribute, or 0 if there is no parent document. document() const821 xml_document<Ch> *document() const 822 { 823 if (xml_node<Ch> *node = this->parent()) 824 { 825 while (node->parent()) 826 node = node->parent(); 827 return node->type() == node_document ? static_cast<xml_document<Ch> *>(node) : 0; 828 } 829 else 830 return 0; 831 } 832 833 //! Gets previous attribute, optionally matching attribute name. 834 //! \param name Name of attribute to find, or 0 to return previous attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero 835 //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string 836 //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters 837 //! \return Pointer to found attribute, or 0 if not found. previous_attribute(const Ch * name=0,std::size_t name_size=0,bool case_sensitive=true) const838 xml_attribute<Ch> *previous_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 839 { 840 if (name) 841 { 842 if (name_size == 0) 843 name_size = internal::measure(name); 844 for (xml_attribute<Ch> *attribute = m_prev_attribute; attribute; attribute = attribute->m_prev_attribute) 845 if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) 846 return attribute; 847 return 0; 848 } 849 else 850 return this->m_parent ? m_prev_attribute : 0; 851 } 852 853 //! Gets next attribute, optionally matching attribute name. 854 //! \param name Name of attribute to find, or 0 to return next attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero 855 //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string 856 //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters 857 //! \return Pointer to found attribute, or 0 if not found. next_attribute(const Ch * name=0,std::size_t name_size=0,bool case_sensitive=true) const858 xml_attribute<Ch> *next_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 859 { 860 if (name) 861 { 862 if (name_size == 0) 863 name_size = internal::measure(name); 864 for (xml_attribute<Ch> *attribute = m_next_attribute; attribute; attribute = attribute->m_next_attribute) 865 if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) 866 return attribute; 867 return 0; 868 } 869 else 870 return this->m_parent ? m_next_attribute : 0; 871 } 872 873 private: 874 875 xml_attribute<Ch> *m_prev_attribute; // Pointer to previous sibling of attribute, or 0 if none; only valid if parent is non-zero 876 xml_attribute<Ch> *m_next_attribute; // Pointer to next sibling of attribute, or 0 if none; only valid if parent is non-zero 877 878 }; 879 880 /////////////////////////////////////////////////////////////////////////// 881 // XML node 882 883 //! Class representing a node of XML document. 884 //! Each node may have associated name and value strings, which are available through name() and value() functions. 885 //! Interpretation of name and value depends on type of the node. 886 //! Type of node can be determined by using type() function. 887 //! <br><br> 888 //! Note that after parse, both name and value of node, if any, will point interior of source text used for parsing. 889 //! Thus, this text must persist in the memory for the lifetime of node. 890 //! \param Ch Character type to use. 891 template<class Ch = char> 892 class xml_node: public xml_base<Ch> 893 { 894 895 public: 896 897 /////////////////////////////////////////////////////////////////////////// 898 // Construction & destruction 899 900 //! Constructs an empty node with the specified type. 901 //! Consider using memory_pool of appropriate document to allocate nodes manually. 902 //! \param type Type of node to construct. xml_node(node_type type)903 xml_node(node_type type) 904 : m_type(type) 905 , m_first_node(0) 906 , m_first_attribute(0) 907 , m_last_attribute(0) 908 , m_last_node(0) 909 , m_next_sibling(0) 910 , m_prev_sibling(0) 911 { 912 } 913 914 /////////////////////////////////////////////////////////////////////////// 915 // Node data access 916 917 //! Gets type of node. 918 //! \return Type of node. type() const919 node_type type() const 920 { 921 return m_type; 922 } 923 924 /////////////////////////////////////////////////////////////////////////// 925 // Related nodes access 926 927 //! Gets document of which node is a child. 928 //! \return Pointer to document that contains this node, or 0 if there is no parent document. document() const929 xml_document<Ch> *document() const 930 { 931 xml_node<Ch> *node = const_cast<xml_node<Ch> *>(this); 932 while (node->parent()) 933 node = node->parent(); 934 return node->type() == node_document ? static_cast<xml_document<Ch> *>(node) : 0; 935 } 936 937 //! Gets first child node, optionally matching node name. 938 //! \param name Name of child to find, or 0 to return first child regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero 939 //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string 940 //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters 941 //! \return Pointer to found child, or 0 if not found. first_node(const Ch * name=0,std::size_t name_size=0,bool case_sensitive=true) const942 xml_node<Ch> *first_node(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 943 { 944 if (name) 945 { 946 if (name_size == 0) 947 name_size = internal::measure(name); 948 for (xml_node<Ch> *child = m_first_node; child; child = child->next_sibling()) 949 if (internal::compare(child->name(), child->name_size(), name, name_size, case_sensitive)) 950 return child; 951 return 0; 952 } 953 else 954 return m_first_node; 955 } 956 957 //! Gets last child node, optionally matching node name. 958 //! Behaviour is undefined if node has no children. 959 //! Use first_node() to test if node has children. 960 //! \param name Name of child to find, or 0 to return last child regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero 961 //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string 962 //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters 963 //! \return Pointer to found child, or 0 if not found. last_node(const Ch * name=0,std::size_t name_size=0,bool case_sensitive=true) const964 xml_node<Ch> *last_node(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 965 { 966 assert(m_first_node); // Cannot query for last child if node has no children 967 if (name) 968 { 969 if (name_size == 0) 970 name_size = internal::measure(name); 971 for (xml_node<Ch> *child = m_last_node; child; child = child->previous_sibling()) 972 if (internal::compare(child->name(), child->name_size(), name, name_size, case_sensitive)) 973 return child; 974 return 0; 975 } 976 else 977 return m_last_node; 978 } 979 980 //! Gets previous sibling node, optionally matching node name. 981 //! Behaviour is undefined if node has no parent. 982 //! Use parent() to test if node has a parent. 983 //! \param name Name of sibling to find, or 0 to return previous sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero 984 //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string 985 //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters 986 //! \return Pointer to found sibling, or 0 if not found. previous_sibling(const Ch * name=0,std::size_t name_size=0,bool case_sensitive=true) const987 xml_node<Ch> *previous_sibling(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 988 { 989 assert(this->m_parent); // Cannot query for siblings if node has no parent 990 if (name) 991 { 992 if (name_size == 0) 993 name_size = internal::measure(name); 994 for (xml_node<Ch> *sibling = m_prev_sibling; sibling; sibling = sibling->m_prev_sibling) 995 if (internal::compare(sibling->name(), sibling->name_size(), name, name_size, case_sensitive)) 996 return sibling; 997 return 0; 998 } 999 else 1000 return m_prev_sibling; 1001 } 1002 1003 //! Gets next sibling node, optionally matching node name. 1004 //! Behaviour is undefined if node has no parent. 1005 //! Use parent() to test if node has a parent. 1006 //! \param name Name of sibling to find, or 0 to return next sibling regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero 1007 //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string 1008 //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters 1009 //! \return Pointer to found sibling, or 0 if not found. next_sibling(const Ch * name=0,std::size_t name_size=0,bool case_sensitive=true) const1010 xml_node<Ch> *next_sibling(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 1011 { 1012 assert(this->m_parent); // Cannot query for siblings if node has no parent 1013 if (name) 1014 { 1015 if (name_size == 0) 1016 name_size = internal::measure(name); 1017 for (xml_node<Ch> *sibling = m_next_sibling; sibling; sibling = sibling->m_next_sibling) 1018 if (internal::compare(sibling->name(), sibling->name_size(), name, name_size, case_sensitive)) 1019 return sibling; 1020 return 0; 1021 } 1022 else 1023 return m_next_sibling; 1024 } 1025 1026 //! Gets first attribute of node, optionally matching attribute name. 1027 //! \param name Name of attribute to find, or 0 to return first attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero 1028 //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string 1029 //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters 1030 //! \return Pointer to found attribute, or 0 if not found. first_attribute(const Ch * name=0,std::size_t name_size=0,bool case_sensitive=true) const1031 xml_attribute<Ch> *first_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 1032 { 1033 if (name) 1034 { 1035 if (name_size == 0) 1036 name_size = internal::measure(name); 1037 for (xml_attribute<Ch> *attribute = m_first_attribute; attribute; attribute = attribute->m_next_attribute) 1038 if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) 1039 return attribute; 1040 return 0; 1041 } 1042 else 1043 return m_first_attribute; 1044 } 1045 1046 //! Gets last attribute of node, optionally matching attribute name. 1047 //! \param name Name of attribute to find, or 0 to return last attribute regardless of its name; this string doesn't have to be zero-terminated if name_size is non-zero 1048 //! \param name_size Size of name, in characters, or 0 to have size calculated automatically from string 1049 //! \param case_sensitive Should name comparison be case-sensitive; non case-sensitive comparison works properly only for ASCII characters 1050 //! \return Pointer to found attribute, or 0 if not found. last_attribute(const Ch * name=0,std::size_t name_size=0,bool case_sensitive=true) const1051 xml_attribute<Ch> *last_attribute(const Ch *name = 0, std::size_t name_size = 0, bool case_sensitive = true) const 1052 { 1053 if (name) 1054 { 1055 if (name_size == 0) 1056 name_size = internal::measure(name); 1057 for (xml_attribute<Ch> *attribute = m_last_attribute; attribute; attribute = attribute->m_prev_attribute) 1058 if (internal::compare(attribute->name(), attribute->name_size(), name, name_size, case_sensitive)) 1059 return attribute; 1060 return 0; 1061 } 1062 else 1063 return m_first_attribute ? m_last_attribute : 0; 1064 } 1065 1066 /////////////////////////////////////////////////////////////////////////// 1067 // Node modification 1068 1069 //! Sets type of node. 1070 //! \param type Type of node to set. type(node_type type)1071 void type(node_type type) 1072 { 1073 m_type = type; 1074 } 1075 1076 /////////////////////////////////////////////////////////////////////////// 1077 // Node manipulation 1078 1079 //! Prepends a new child node. 1080 //! The prepended child becomes the first child, and all existing children are moved one position back. 1081 //! \param child Node to prepend. prepend_node(xml_node<Ch> * child)1082 void prepend_node(xml_node<Ch> *child) 1083 { 1084 assert(child && !child->parent() && child->type() != node_document); 1085 if (first_node()) 1086 { 1087 child->m_next_sibling = m_first_node; 1088 m_first_node->m_prev_sibling = child; 1089 } 1090 else 1091 { 1092 child->m_next_sibling = 0; 1093 m_last_node = child; 1094 } 1095 m_first_node = child; 1096 child->m_parent = this; 1097 child->m_prev_sibling = 0; 1098 } 1099 1100 //! Appends a new child node. 1101 //! The appended child becomes the last child. 1102 //! \param child Node to append. append_node(xml_node<Ch> * child)1103 void append_node(xml_node<Ch> *child) 1104 { 1105 assert(child && !child->parent() && child->type() != node_document); 1106 if (first_node()) 1107 { 1108 child->m_prev_sibling = m_last_node; 1109 m_last_node->m_next_sibling = child; 1110 } 1111 else 1112 { 1113 child->m_prev_sibling = 0; 1114 m_first_node = child; 1115 } 1116 m_last_node = child; 1117 child->m_parent = this; 1118 child->m_next_sibling = 0; 1119 } 1120 1121 //! Inserts a new child node at specified place inside the node. 1122 //! All children after and including the specified node are moved one position back. 1123 //! \param where Place where to insert the child, or 0 to insert at the back. 1124 //! \param child Node to insert. insert_node(xml_node<Ch> * where,xml_node<Ch> * child)1125 void insert_node(xml_node<Ch> *where, xml_node<Ch> *child) 1126 { 1127 assert(!where || where->parent() == this); 1128 assert(child && !child->parent() && child->type() != node_document); 1129 if (where == m_first_node) 1130 prepend_node(child); 1131 else if (where == 0) 1132 append_node(child); 1133 else 1134 { 1135 child->m_prev_sibling = where->m_prev_sibling; 1136 child->m_next_sibling = where; 1137 where->m_prev_sibling->m_next_sibling = child; 1138 where->m_prev_sibling = child; 1139 child->m_parent = this; 1140 } 1141 } 1142 1143 //! Removes first child node. 1144 //! If node has no children, behaviour is undefined. 1145 //! Use first_node() to test if node has children. remove_first_node()1146 void remove_first_node() 1147 { 1148 assert(first_node()); 1149 xml_node<Ch> *child = m_first_node; 1150 m_first_node = child->m_next_sibling; 1151 if (child->m_next_sibling) 1152 child->m_next_sibling->m_prev_sibling = 0; 1153 else 1154 m_last_node = 0; 1155 child->m_parent = 0; 1156 } 1157 1158 //! Removes last child of the node. 1159 //! If node has no children, behaviour is undefined. 1160 //! Use first_node() to test if node has children. remove_last_node()1161 void remove_last_node() 1162 { 1163 assert(first_node()); 1164 xml_node<Ch> *child = m_last_node; 1165 if (child->m_prev_sibling) 1166 { 1167 m_last_node = child->m_prev_sibling; 1168 child->m_prev_sibling->m_next_sibling = 0; 1169 } 1170 else 1171 m_first_node = 0; 1172 child->m_parent = 0; 1173 } 1174 1175 //! Removes specified child from the node 1176 // \param where Pointer to child to be removed. remove_node(xml_node<Ch> * where)1177 void remove_node(xml_node<Ch> *where) 1178 { 1179 assert(where && where->parent() == this); 1180 assert(first_node()); 1181 if (where == m_first_node) 1182 remove_first_node(); 1183 else if (where == m_last_node) 1184 remove_last_node(); 1185 else 1186 { 1187 where->m_prev_sibling->m_next_sibling = where->m_next_sibling; 1188 where->m_next_sibling->m_prev_sibling = where->m_prev_sibling; 1189 where->m_parent = 0; 1190 } 1191 } 1192 1193 //! Removes all child nodes (but not attributes). remove_all_nodes()1194 void remove_all_nodes() 1195 { 1196 for (xml_node<Ch> *node = first_node(); node; node = node->m_next_sibling) 1197 node->m_parent = 0; 1198 m_first_node = 0; 1199 } 1200 1201 //! Prepends a new attribute to the node. 1202 //! \param attribute Attribute to prepend. prepend_attribute(xml_attribute<Ch> * attribute)1203 void prepend_attribute(xml_attribute<Ch> *attribute) 1204 { 1205 assert(attribute && !attribute->parent()); 1206 if (first_attribute()) 1207 { 1208 attribute->m_next_attribute = m_first_attribute; 1209 m_first_attribute->m_prev_attribute = attribute; 1210 } 1211 else 1212 { 1213 attribute->m_next_attribute = 0; 1214 m_last_attribute = attribute; 1215 } 1216 m_first_attribute = attribute; 1217 attribute->m_parent = this; 1218 attribute->m_prev_attribute = 0; 1219 } 1220 1221 //! Appends a new attribute to the node. 1222 //! \param attribute Attribute to append. append_attribute(xml_attribute<Ch> * attribute)1223 void append_attribute(xml_attribute<Ch> *attribute) 1224 { 1225 assert(attribute && !attribute->parent()); 1226 if (first_attribute()) 1227 { 1228 attribute->m_prev_attribute = m_last_attribute; 1229 m_last_attribute->m_next_attribute = attribute; 1230 } 1231 else 1232 { 1233 attribute->m_prev_attribute = 0; 1234 m_first_attribute = attribute; 1235 } 1236 m_last_attribute = attribute; 1237 attribute->m_parent = this; 1238 attribute->m_next_attribute = 0; 1239 } 1240 1241 //! Inserts a new attribute at specified place inside the node. 1242 //! All attributes after and including the specified attribute are moved one position back. 1243 //! \param where Place where to insert the attribute, or 0 to insert at the back. 1244 //! \param attribute Attribute to insert. insert_attribute(xml_attribute<Ch> * where,xml_attribute<Ch> * attribute)1245 void insert_attribute(xml_attribute<Ch> *where, xml_attribute<Ch> *attribute) 1246 { 1247 assert(!where || where->parent() == this); 1248 assert(attribute && !attribute->parent()); 1249 if (where == m_first_attribute) 1250 prepend_attribute(attribute); 1251 else if (where == 0) 1252 append_attribute(attribute); 1253 else 1254 { 1255 attribute->m_prev_attribute = where->m_prev_attribute; 1256 attribute->m_next_attribute = where; 1257 where->m_prev_attribute->m_next_attribute = attribute; 1258 where->m_prev_attribute = attribute; 1259 attribute->m_parent = this; 1260 } 1261 } 1262 1263 //! Removes first attribute of the node. 1264 //! If node has no attributes, behaviour is undefined. 1265 //! Use first_attribute() to test if node has attributes. remove_first_attribute()1266 void remove_first_attribute() 1267 { 1268 assert(first_attribute()); 1269 xml_attribute<Ch> *attribute = m_first_attribute; 1270 if (attribute->m_next_attribute) 1271 { 1272 attribute->m_next_attribute->m_prev_attribute = 0; 1273 } 1274 else 1275 m_last_attribute = 0; 1276 attribute->m_parent = 0; 1277 m_first_attribute = attribute->m_next_attribute; 1278 } 1279 1280 //! Removes last attribute of the node. 1281 //! If node has no attributes, behaviour is undefined. 1282 //! Use first_attribute() to test if node has attributes. remove_last_attribute()1283 void remove_last_attribute() 1284 { 1285 assert(first_attribute()); 1286 xml_attribute<Ch> *attribute = m_last_attribute; 1287 if (attribute->m_prev_attribute) 1288 { 1289 attribute->m_prev_attribute->m_next_attribute = 0; 1290 m_last_attribute = attribute->m_prev_attribute; 1291 } 1292 else 1293 m_first_attribute = 0; 1294 attribute->m_parent = 0; 1295 } 1296 1297 //! Removes specified attribute from node. 1298 //! \param where Pointer to attribute to be removed. remove_attribute(xml_attribute<Ch> * where)1299 void remove_attribute(xml_attribute<Ch> *where) 1300 { 1301 assert(first_attribute() && where->parent() == this); 1302 if (where == m_first_attribute) 1303 remove_first_attribute(); 1304 else if (where == m_last_attribute) 1305 remove_last_attribute(); 1306 else 1307 { 1308 where->m_prev_attribute->m_next_attribute = where->m_next_attribute; 1309 where->m_next_attribute->m_prev_attribute = where->m_prev_attribute; 1310 where->m_parent = 0; 1311 } 1312 } 1313 1314 //! Removes all attributes of node. remove_all_attributes()1315 void remove_all_attributes() 1316 { 1317 for (xml_attribute<Ch> *attribute = first_attribute(); attribute; attribute = attribute->m_next_attribute) 1318 attribute->m_parent = 0; 1319 m_first_attribute = 0; 1320 } 1321 1322 private: 1323 1324 /////////////////////////////////////////////////////////////////////////// 1325 // Restrictions 1326 1327 // No copying 1328 xml_node(const xml_node &); 1329 void operator =(const xml_node &); 1330 1331 /////////////////////////////////////////////////////////////////////////// 1332 // Data members 1333 1334 // Note that some of the pointers below have UNDEFINED values if certain other pointers are 0. 1335 // This is required for maximum performance, as it allows the parser to omit initialization of 1336 // unneded/redundant values. 1337 // 1338 // The rules are as follows: 1339 // 1. first_node and first_attribute contain valid pointers, or 0 if node has no children/attributes respectively 1340 // 2. last_node and last_attribute are valid only if node has at least one child/attribute respectively, otherwise they contain garbage 1341 // 3. prev_sibling and next_sibling are valid only if node has a parent, otherwise they contain garbage 1342 1343 node_type m_type; // Type of node; always valid 1344 xml_node<Ch> *m_first_node; // Pointer to first child node, or 0 if none; always valid 1345 xml_node<Ch> *m_last_node; // Pointer to last child node, or 0 if none; this value is only valid if m_first_node is non-zero 1346 xml_attribute<Ch> *m_first_attribute; // Pointer to first attribute of node, or 0 if none; always valid 1347 xml_attribute<Ch> *m_last_attribute; // Pointer to last attribute of node, or 0 if none; this value is only valid if m_first_attribute is non-zero 1348 xml_node<Ch> *m_prev_sibling; // Pointer to previous sibling of node, or 0 if none; this value is only valid if m_parent is non-zero 1349 xml_node<Ch> *m_next_sibling; // Pointer to next sibling of node, or 0 if none; this value is only valid if m_parent is non-zero 1350 1351 }; 1352 1353 /////////////////////////////////////////////////////////////////////////// 1354 // XML document 1355 1356 //! This class represents root of the DOM hierarchy. 1357 //! It is also an xml_node and a memory_pool through public inheritance. 1358 //! Use parse() function to build a DOM tree from a zero-terminated XML text string. 1359 //! parse() function allocates memory for nodes and attributes by using functions of xml_document, 1360 //! which are inherited from memory_pool. 1361 //! To access root node of the document, use the document itself, as if it was an xml_node. 1362 //! \param Ch Character type to use. 1363 template<class Ch = char> 1364 class xml_document: public xml_node<Ch>, public memory_pool<Ch> 1365 { 1366 1367 public: 1368 1369 //! Constructs empty XML document xml_document()1370 xml_document() 1371 : xml_node<Ch>(node_document) 1372 { 1373 } 1374 1375 //! Parses zero-terminated XML string according to given flags. 1376 //! Passed string will be modified by the parser, unless rapidxml::parse_non_destructive flag is used. 1377 //! The string must persist for the lifetime of the document. 1378 //! In case of error, rapidxml::parse_error exception will be thrown. 1379 //! <br><br> 1380 //! If you want to parse contents of a file, you must first load the file into the memory, and pass pointer to its beginning. 1381 //! Make sure that data is zero-terminated. 1382 //! <br><br> 1383 //! Document can be parsed into multiple times. 1384 //! Each new call to parse removes previous nodes and attributes (if any), but does not clear memory pool. 1385 //! \param text XML data to parse; pointer is non-const to denote fact that this data may be modified by the parser. 1386 template<int Flags> parse(Ch * text)1387 void parse(Ch *text) 1388 { 1389 assert(text); 1390 1391 // Remove current contents 1392 this->remove_all_nodes(); 1393 this->remove_all_attributes(); 1394 1395 // Parse BOM, if any 1396 parse_bom<Flags>(text); 1397 1398 // Parse children 1399 while (1) 1400 { 1401 // Skip whitespace before node 1402 skip<whitespace_pred, Flags>(text); 1403 if (*text == 0) 1404 break; 1405 1406 // Parse and append new child 1407 if (*text == Ch('<')) 1408 { 1409 ++text; // Skip '<' 1410 if (xml_node<Ch> *node = parse_node<Flags>(text)) 1411 this->append_node(node); 1412 } 1413 else 1414 RAPIDXML_PARSE_ERROR("expected <", text); 1415 } 1416 1417 } 1418 1419 //! Clears the document by deleting all nodes and clearing the memory pool. 1420 //! All nodes owned by document pool are destroyed. clear()1421 void clear() 1422 { 1423 this->remove_all_nodes(); 1424 this->remove_all_attributes(); 1425 memory_pool<Ch>::clear(); 1426 } 1427 1428 private: 1429 1430 /////////////////////////////////////////////////////////////////////// 1431 // Internal character utility functions 1432 1433 // Detect whitespace character 1434 struct whitespace_pred 1435 { testrapidxml::xml_document::whitespace_pred1436 static unsigned char test(Ch ch) 1437 { 1438 return internal::lookup_tables<0>::lookup_whitespace[static_cast<unsigned char>(ch)]; 1439 } 1440 }; 1441 1442 // Detect node name character 1443 struct node_name_pred 1444 { testrapidxml::xml_document::node_name_pred1445 static unsigned char test(Ch ch) 1446 { 1447 return internal::lookup_tables<0>::lookup_node_name[static_cast<unsigned char>(ch)]; 1448 } 1449 }; 1450 1451 // Detect attribute name character 1452 struct attribute_name_pred 1453 { testrapidxml::xml_document::attribute_name_pred1454 static unsigned char test(Ch ch) 1455 { 1456 return internal::lookup_tables<0>::lookup_attribute_name[static_cast<unsigned char>(ch)]; 1457 } 1458 }; 1459 1460 // Detect text character (PCDATA) 1461 struct text_pred 1462 { testrapidxml::xml_document::text_pred1463 static unsigned char test(Ch ch) 1464 { 1465 return internal::lookup_tables<0>::lookup_text[static_cast<unsigned char>(ch)]; 1466 } 1467 }; 1468 1469 // Detect text character (PCDATA) that does not require processing 1470 struct text_pure_no_ws_pred 1471 { testrapidxml::xml_document::text_pure_no_ws_pred1472 static unsigned char test(Ch ch) 1473 { 1474 return internal::lookup_tables<0>::lookup_text_pure_no_ws[static_cast<unsigned char>(ch)]; 1475 } 1476 }; 1477 1478 // Detect text character (PCDATA) that does not require processing 1479 struct text_pure_with_ws_pred 1480 { testrapidxml::xml_document::text_pure_with_ws_pred1481 static unsigned char test(Ch ch) 1482 { 1483 return internal::lookup_tables<0>::lookup_text_pure_with_ws[static_cast<unsigned char>(ch)]; 1484 } 1485 }; 1486 1487 // Detect attribute value character 1488 template<Ch Quote> 1489 struct attribute_value_pred 1490 { testrapidxml::xml_document::attribute_value_pred1491 static unsigned char test(Ch ch) 1492 { 1493 if (Quote == Ch('\'')) 1494 return internal::lookup_tables<0>::lookup_attribute_data_1[static_cast<unsigned char>(ch)]; 1495 if (Quote == Ch('\"')) 1496 return internal::lookup_tables<0>::lookup_attribute_data_2[static_cast<unsigned char>(ch)]; 1497 return 0; // Should never be executed, to avoid warnings on Comeau 1498 } 1499 }; 1500 1501 // Detect attribute value character 1502 template<Ch Quote> 1503 struct attribute_value_pure_pred 1504 { testrapidxml::xml_document::attribute_value_pure_pred1505 static unsigned char test(Ch ch) 1506 { 1507 if (Quote == Ch('\'')) 1508 return internal::lookup_tables<0>::lookup_attribute_data_1_pure[static_cast<unsigned char>(ch)]; 1509 if (Quote == Ch('\"')) 1510 return internal::lookup_tables<0>::lookup_attribute_data_2_pure[static_cast<unsigned char>(ch)]; 1511 return 0; // Should never be executed, to avoid warnings on Comeau 1512 } 1513 }; 1514 1515 // Insert coded character, using UTF8 or 8-bit ASCII 1516 template<int Flags> insert_coded_character(Ch * & text,unsigned long code)1517 static void insert_coded_character(Ch *&text, unsigned long code) 1518 { 1519 if (Flags & parse_no_utf8) 1520 { 1521 // Insert 8-bit ASCII character 1522 // possibly verify that code is less than 256 and use replacement char otherwise? 1523 text[0] = static_cast<unsigned char>(code); 1524 text += 1; 1525 } 1526 else 1527 { 1528 // Insert UTF8 sequence 1529 if (code < 0x80) // 1 byte sequence 1530 { 1531 text[0] = static_cast<unsigned char>(code); 1532 text += 1; 1533 } 1534 else if (code < 0x800) // 2 byte sequence 1535 { 1536 text[1] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 1537 text[0] = static_cast<unsigned char>(code | 0xC0); 1538 text += 2; 1539 } 1540 else if (code < 0x10000) // 3 byte sequence 1541 { 1542 text[2] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 1543 text[1] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 1544 text[0] = static_cast<unsigned char>(code | 0xE0); 1545 text += 3; 1546 } 1547 else if (code < 0x110000) // 4 byte sequence 1548 { 1549 text[3] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 1550 text[2] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 1551 text[1] = static_cast<unsigned char>((code | 0x80) & 0xBF); code >>= 6; 1552 text[0] = static_cast<unsigned char>(code | 0xF0); 1553 text += 4; 1554 } 1555 else // Invalid, only codes up to 0x10FFFF are allowed in Unicode 1556 { 1557 RAPIDXML_PARSE_ERROR("invalid numeric character entity", text); 1558 } 1559 } 1560 } 1561 1562 // Skip characters until predicate evaluates to true 1563 template<class StopPred, int Flags> skip(Ch * & text)1564 static void skip(Ch *&text) 1565 { 1566 Ch *tmp = text; 1567 while (StopPred::test(*tmp)) 1568 ++tmp; 1569 text = tmp; 1570 } 1571 1572 // Skip characters until predicate evaluates to true while doing the following: 1573 // - replacing XML character entity references with proper characters (' & " < > &#...;) 1574 // - condensing whitespace sequences to single space character 1575 template<class StopPred, class StopPredPure, int Flags> skip_and_expand_character_refs(Ch * & text)1576 static Ch *skip_and_expand_character_refs(Ch *&text) 1577 { 1578 // If entity translation, whitespace condense and whitespace trimming is disabled, use plain skip 1579 if (Flags & parse_no_entity_translation && 1580 !(Flags & parse_normalize_whitespace) && 1581 !(Flags & parse_trim_whitespace)) 1582 { 1583 skip<StopPred, Flags>(text); 1584 return text; 1585 } 1586 1587 // Use simple skip until first modification is detected 1588 skip<StopPredPure, Flags>(text); 1589 1590 // Use translation skip 1591 Ch *src = text; 1592 Ch *dest = src; 1593 while (StopPred::test(*src)) 1594 { 1595 // If entity translation is enabled 1596 if (!(Flags & parse_no_entity_translation)) 1597 { 1598 // Test if replacement is needed 1599 if (src[0] == Ch('&')) 1600 { 1601 switch (src[1]) 1602 { 1603 1604 // & ' 1605 case Ch('a'): 1606 if (src[2] == Ch('m') && src[3] == Ch('p') && src[4] == Ch(';')) 1607 { 1608 *dest = Ch('&'); 1609 ++dest; 1610 src += 5; 1611 continue; 1612 } 1613 if (src[2] == Ch('p') && src[3] == Ch('o') && src[4] == Ch('s') && src[5] == Ch(';')) 1614 { 1615 *dest = Ch('\''); 1616 ++dest; 1617 src += 6; 1618 continue; 1619 } 1620 break; 1621 1622 // " 1623 case Ch('q'): 1624 if (src[2] == Ch('u') && src[3] == Ch('o') && src[4] == Ch('t') && src[5] == Ch(';')) 1625 { 1626 *dest = Ch('"'); 1627 ++dest; 1628 src += 6; 1629 continue; 1630 } 1631 break; 1632 1633 // > 1634 case Ch('g'): 1635 if (src[2] == Ch('t') && src[3] == Ch(';')) 1636 { 1637 *dest = Ch('>'); 1638 ++dest; 1639 src += 4; 1640 continue; 1641 } 1642 break; 1643 1644 // < 1645 case Ch('l'): 1646 if (src[2] == Ch('t') && src[3] == Ch(';')) 1647 { 1648 *dest = Ch('<'); 1649 ++dest; 1650 src += 4; 1651 continue; 1652 } 1653 break; 1654 1655 // &#...; - assumes ASCII 1656 case Ch('#'): 1657 if (src[2] == Ch('x')) 1658 { 1659 unsigned long code = 0; 1660 src += 3; // Skip &#x 1661 while (1) 1662 { 1663 unsigned char digit = internal::lookup_tables<0>::lookup_digits[static_cast<unsigned char>(*src)]; 1664 if (digit == 0xFF) 1665 break; 1666 code = code * 16 + digit; 1667 ++src; 1668 } 1669 insert_coded_character<Flags>(dest, code); // Put character in output 1670 } 1671 else 1672 { 1673 unsigned long code = 0; 1674 src += 2; // Skip &# 1675 while (1) 1676 { 1677 unsigned char digit = internal::lookup_tables<0>::lookup_digits[static_cast<unsigned char>(*src)]; 1678 if (digit == 0xFF) 1679 break; 1680 code = code * 10 + digit; 1681 ++src; 1682 } 1683 insert_coded_character<Flags>(dest, code); // Put character in output 1684 } 1685 if (*src == Ch(';')) 1686 ++src; 1687 else 1688 RAPIDXML_PARSE_ERROR("expected ;", src); 1689 continue; 1690 1691 // Something else 1692 default: 1693 // Ignore, just copy '&' verbatim 1694 break; 1695 1696 } 1697 } 1698 } 1699 1700 // If whitespace condensing is enabled 1701 if (Flags & parse_normalize_whitespace) 1702 { 1703 // Test if condensing is needed 1704 if (whitespace_pred::test(*src)) 1705 { 1706 *dest = Ch(' '); ++dest; // Put single space in dest 1707 ++src; // Skip first whitespace char 1708 // Skip remaining whitespace chars 1709 while (whitespace_pred::test(*src)) 1710 ++src; 1711 continue; 1712 } 1713 } 1714 1715 // No replacement, only copy character 1716 *dest++ = *src++; 1717 1718 } 1719 1720 // Return new end 1721 text = src; 1722 return dest; 1723 1724 } 1725 1726 /////////////////////////////////////////////////////////////////////// 1727 // Internal parsing functions 1728 1729 // Parse BOM, if any 1730 template<int Flags> parse_bom(Ch * & text)1731 void parse_bom(Ch *&text) 1732 { 1733 // UTF-8? 1734 if (static_cast<unsigned char>(text[0]) == 0xEF && 1735 static_cast<unsigned char>(text[1]) == 0xBB && 1736 static_cast<unsigned char>(text[2]) == 0xBF) 1737 { 1738 text += 3; // Skup utf-8 bom 1739 } 1740 } 1741 1742 // Parse XML declaration (<?xml...) 1743 template<int Flags> parse_xml_declaration(Ch * & text)1744 xml_node<Ch> *parse_xml_declaration(Ch *&text) 1745 { 1746 // If parsing of declaration is disabled 1747 if (!(Flags & parse_declaration_node)) 1748 { 1749 // Skip until end of declaration 1750 while (text[0] != Ch('?') || text[1] != Ch('>')) 1751 { 1752 if (!text[0]) 1753 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 1754 ++text; 1755 } 1756 text += 2; // Skip '?>' 1757 return 0; 1758 } 1759 1760 // Create declaration 1761 xml_node<Ch> *declaration = this->allocate_node(node_declaration); 1762 1763 // Skip whitespace before attributes or ?> 1764 skip<whitespace_pred, Flags>(text); 1765 1766 // Parse declaration attributes 1767 parse_node_attributes<Flags>(text, declaration); 1768 1769 // Skip ?> 1770 if (text[0] != Ch('?') || text[1] != Ch('>')) 1771 RAPIDXML_PARSE_ERROR("expected ?>", text); 1772 text += 2; 1773 1774 return declaration; 1775 } 1776 1777 // Parse XML comment (<!--...) 1778 template<int Flags> parse_comment(Ch * & text)1779 xml_node<Ch> *parse_comment(Ch *&text) 1780 { 1781 // If parsing of comments is disabled 1782 if (!(Flags & parse_comment_nodes)) 1783 { 1784 // Skip until end of comment 1785 while (text[0] != Ch('-') || text[1] != Ch('-') || text[2] != Ch('>')) 1786 { 1787 if (!text[0]) 1788 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 1789 ++text; 1790 } 1791 text += 3; // Skip '-->' 1792 return 0; // Do not produce comment node 1793 } 1794 1795 // Remember value start 1796 Ch *value = text; 1797 1798 // Skip until end of comment 1799 while (text[0] != Ch('-') || text[1] != Ch('-') || text[2] != Ch('>')) 1800 { 1801 if (!text[0]) 1802 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 1803 ++text; 1804 } 1805 1806 // Create comment node 1807 xml_node<Ch> *comment = this->allocate_node(node_comment); 1808 comment->value(value, text - value); 1809 1810 // Place zero terminator after comment value 1811 if (!(Flags & parse_no_string_terminators)) 1812 *text = Ch('\0'); 1813 1814 text += 3; // Skip '-->' 1815 return comment; 1816 } 1817 1818 // Parse DOCTYPE 1819 template<int Flags> parse_doctype(Ch * & text)1820 xml_node<Ch> *parse_doctype(Ch *&text) 1821 { 1822 // Remember value start 1823 Ch *value = text; 1824 1825 // Skip to > 1826 while (*text != Ch('>')) 1827 { 1828 // Determine character type 1829 switch (*text) 1830 { 1831 1832 // If '[' encountered, scan for matching ending ']' using naive algorithm with depth 1833 // This works for all W3C test files except for 2 most wicked 1834 case Ch('['): 1835 { 1836 ++text; // Skip '[' 1837 int depth = 1; 1838 while (depth > 0) 1839 { 1840 switch (*text) 1841 { 1842 case Ch('['): ++depth; break; 1843 case Ch(']'): --depth; break; 1844 case 0: RAPIDXML_PARSE_ERROR("unexpected end of data", text); 1845 } 1846 ++text; 1847 } 1848 break; 1849 } 1850 1851 // Error on end of text 1852 case Ch('\0'): 1853 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 1854 1855 // Other character, skip it 1856 default: 1857 ++text; 1858 break; 1859 } 1860 } 1861 1862 // If DOCTYPE nodes enabled 1863 if (Flags & parse_doctype_node) 1864 { 1865 // Create a new doctype node 1866 xml_node<Ch> *doctype = this->allocate_node(node_doctype); 1867 doctype->value(value, text - value); 1868 1869 // Place zero terminator after value 1870 if (!(Flags & parse_no_string_terminators)) 1871 *text = Ch('\0'); 1872 1873 text += 1; // skip '>' 1874 return doctype; 1875 } 1876 else 1877 { 1878 text += 1; // skip '>' 1879 return 0; 1880 } 1881 1882 } 1883 1884 // Parse PI 1885 template<int Flags> parse_pi(Ch * & text)1886 xml_node<Ch> *parse_pi(Ch *&text) 1887 { 1888 // If creation of PI nodes is enabled 1889 if (Flags & parse_pi_nodes) 1890 { 1891 // Create pi node 1892 xml_node<Ch> *pi = this->allocate_node(node_pi); 1893 1894 // Extract PI target name 1895 Ch *name = text; 1896 skip<node_name_pred, Flags>(text); 1897 if (text == name) 1898 RAPIDXML_PARSE_ERROR("expected PI target", text); 1899 pi->name(name, text - name); 1900 1901 // Skip whitespace between pi target and pi 1902 skip<whitespace_pred, Flags>(text); 1903 1904 // Remember start of pi 1905 Ch *value = text; 1906 1907 // Skip to '?>' 1908 while (text[0] != Ch('?') || text[1] != Ch('>')) 1909 { 1910 if (*text == Ch('\0')) 1911 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 1912 ++text; 1913 } 1914 1915 // Set pi value (verbatim, no entity expansion or whitespace normalization) 1916 pi->value(value, text - value); 1917 1918 // Place zero terminator after name and value 1919 if (!(Flags & parse_no_string_terminators)) 1920 { 1921 pi->name()[pi->name_size()] = Ch('\0'); 1922 pi->value()[pi->value_size()] = Ch('\0'); 1923 } 1924 1925 text += 2; // Skip '?>' 1926 return pi; 1927 } 1928 else 1929 { 1930 // Skip to '?>' 1931 while (text[0] != Ch('?') || text[1] != Ch('>')) 1932 { 1933 if (*text == Ch('\0')) 1934 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 1935 ++text; 1936 } 1937 text += 2; // Skip '?>' 1938 return 0; 1939 } 1940 } 1941 1942 // Parse and append data 1943 // Return character that ends data. 1944 // This is necessary because this character might have been overwritten by a terminating 0 1945 template<int Flags> parse_and_append_data(xml_node<Ch> * node,Ch * & text,Ch * contents_start)1946 Ch parse_and_append_data(xml_node<Ch> *node, Ch *&text, Ch *contents_start) 1947 { 1948 // Backup to contents start if whitespace trimming is disabled 1949 if (!(Flags & parse_trim_whitespace)) 1950 text = contents_start; 1951 1952 // Skip until end of data 1953 Ch *value = text, *end; 1954 if (Flags & parse_normalize_whitespace) 1955 end = skip_and_expand_character_refs<text_pred, text_pure_with_ws_pred, Flags>(text); 1956 else 1957 end = skip_and_expand_character_refs<text_pred, text_pure_no_ws_pred, Flags>(text); 1958 1959 // Trim trailing whitespace if flag is set; leading was already trimmed by whitespace skip after > 1960 if (Flags & parse_trim_whitespace) 1961 { 1962 if (Flags & parse_normalize_whitespace) 1963 { 1964 // Whitespace is already condensed to single space characters by skipping function, so just trim 1 char off the end 1965 if (*(end - 1) == Ch(' ')) 1966 --end; 1967 } 1968 else 1969 { 1970 // Backup until non-whitespace character is found 1971 while (whitespace_pred::test(*(end - 1))) 1972 --end; 1973 } 1974 } 1975 1976 // If characters are still left between end and value (this test is only necessary if normalization is enabled) 1977 // Create new data node 1978 if (!(Flags & parse_no_data_nodes)) 1979 { 1980 xml_node<Ch> *data = this->allocate_node(node_data); 1981 data->value(value, end - value); 1982 node->append_node(data); 1983 } 1984 1985 // Add data to parent node if no data exists yet 1986 if (!(Flags & parse_no_element_values)) 1987 if (*node->value() == Ch('\0')) 1988 node->value(value, end - value); 1989 1990 // Place zero terminator after value 1991 if (!(Flags & parse_no_string_terminators)) 1992 { 1993 Ch ch = *text; 1994 *end = Ch('\0'); 1995 return ch; // Return character that ends data; this is required because zero terminator overwritten it 1996 } 1997 1998 // Return character that ends data 1999 return *text; 2000 } 2001 2002 // Parse CDATA 2003 template<int Flags> parse_cdata(Ch * & text)2004 xml_node<Ch> *parse_cdata(Ch *&text) 2005 { 2006 // If CDATA is disabled 2007 if (Flags & parse_no_data_nodes) 2008 { 2009 // Skip until end of cdata 2010 while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) 2011 { 2012 if (!text[0]) 2013 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 2014 ++text; 2015 } 2016 text += 3; // Skip ]]> 2017 return 0; // Do not produce CDATA node 2018 } 2019 2020 // Skip until end of cdata 2021 Ch *value = text; 2022 while (text[0] != Ch(']') || text[1] != Ch(']') || text[2] != Ch('>')) 2023 { 2024 if (!text[0]) 2025 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 2026 ++text; 2027 } 2028 2029 // Create new cdata node 2030 xml_node<Ch> *cdata = this->allocate_node(node_cdata); 2031 cdata->value(value, text - value); 2032 2033 // Place zero terminator after value 2034 if (!(Flags & parse_no_string_terminators)) 2035 *text = Ch('\0'); 2036 2037 text += 3; // Skip ]]> 2038 return cdata; 2039 } 2040 2041 // Parse element node 2042 template<int Flags> parse_element(Ch * & text)2043 xml_node<Ch> *parse_element(Ch *&text) 2044 { 2045 // Create element node 2046 xml_node<Ch> *element = this->allocate_node(node_element); 2047 2048 // Extract element name 2049 Ch *name = text; 2050 skip<node_name_pred, Flags>(text); 2051 if (text == name) 2052 RAPIDXML_PARSE_ERROR("expected element name", text); 2053 element->name(name, text - name); 2054 2055 // Skip whitespace between element name and attributes or > 2056 skip<whitespace_pred, Flags>(text); 2057 2058 // Parse attributes, if any 2059 parse_node_attributes<Flags>(text, element); 2060 2061 // Determine ending type 2062 if (*text == Ch('>')) 2063 { 2064 ++text; 2065 parse_node_contents<Flags>(text, element); 2066 } 2067 else if (*text == Ch('/')) 2068 { 2069 ++text; 2070 if (*text != Ch('>')) 2071 RAPIDXML_PARSE_ERROR("expected >", text); 2072 ++text; 2073 } 2074 else 2075 RAPIDXML_PARSE_ERROR("expected >", text); 2076 2077 // Place zero terminator after name 2078 if (!(Flags & parse_no_string_terminators)) 2079 element->name()[element->name_size()] = Ch('\0'); 2080 2081 // Return parsed element 2082 return element; 2083 } 2084 2085 // Determine node type, and parse it 2086 template<int Flags> parse_node(Ch * & text)2087 xml_node<Ch> *parse_node(Ch *&text) 2088 { 2089 // Parse proper node type 2090 switch (text[0]) 2091 { 2092 2093 // <... 2094 default: 2095 // Parse and append element node 2096 return parse_element<Flags>(text); 2097 2098 // <?... 2099 case Ch('?'): 2100 ++text; // Skip ? 2101 if ((text[0] == Ch('x') || text[0] == Ch('X')) && 2102 (text[1] == Ch('m') || text[1] == Ch('M')) && 2103 (text[2] == Ch('l') || text[2] == Ch('L')) && 2104 whitespace_pred::test(text[3])) 2105 { 2106 // '<?xml ' - xml declaration 2107 text += 4; // Skip 'xml ' 2108 return parse_xml_declaration<Flags>(text); 2109 } 2110 else 2111 { 2112 // Parse PI 2113 return parse_pi<Flags>(text); 2114 } 2115 2116 // <!... 2117 case Ch('!'): 2118 2119 // Parse proper subset of <! node 2120 switch (text[1]) 2121 { 2122 2123 // <!- 2124 case Ch('-'): 2125 if (text[2] == Ch('-')) 2126 { 2127 // '<!--' - xml comment 2128 text += 3; // Skip '!--' 2129 return parse_comment<Flags>(text); 2130 } 2131 break; 2132 2133 // <![ 2134 case Ch('['): 2135 if (text[2] == Ch('C') && text[3] == Ch('D') && text[4] == Ch('A') && 2136 text[5] == Ch('T') && text[6] == Ch('A') && text[7] == Ch('[')) 2137 { 2138 // '<![CDATA[' - cdata 2139 text += 8; // Skip '![CDATA[' 2140 return parse_cdata<Flags>(text); 2141 } 2142 break; 2143 2144 // <!D 2145 case Ch('D'): 2146 if (text[2] == Ch('O') && text[3] == Ch('C') && text[4] == Ch('T') && 2147 text[5] == Ch('Y') && text[6] == Ch('P') && text[7] == Ch('E') && 2148 whitespace_pred::test(text[8])) 2149 { 2150 // '<!DOCTYPE ' - doctype 2151 text += 9; // skip '!DOCTYPE ' 2152 return parse_doctype<Flags>(text); 2153 } 2154 2155 } // switch 2156 2157 // Attempt to skip other, unrecognized node types starting with <! 2158 ++text; // Skip ! 2159 while (*text != Ch('>')) 2160 { 2161 if (*text == 0) 2162 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 2163 ++text; 2164 } 2165 ++text; // Skip '>' 2166 return 0; // No node recognized 2167 2168 } 2169 } 2170 2171 // Parse contents of the node - children, data etc. 2172 template<int Flags> parse_node_contents(Ch * & text,xml_node<Ch> * node)2173 void parse_node_contents(Ch *&text, xml_node<Ch> *node) 2174 { 2175 // For all children and text 2176 while (1) 2177 { 2178 // Skip whitespace between > and node contents 2179 Ch *contents_start = text; // Store start of node contents before whitespace is skipped 2180 skip<whitespace_pred, Flags>(text); 2181 Ch next_char = *text; 2182 2183 // After data nodes, instead of continuing the loop, control jumps here. 2184 // This is because zero termination inside parse_and_append_data() function 2185 // would wreak havoc with the above code. 2186 // Also, skipping whitespace after data nodes is unnecessary. 2187 after_data_node: 2188 2189 // Determine what comes next: node closing, child node, data node, or 0? 2190 switch (next_char) 2191 { 2192 2193 // Node closing or child node 2194 case Ch('<'): 2195 if (text[1] == Ch('/')) 2196 { 2197 // Node closing 2198 text += 2; // Skip '</' 2199 if (Flags & parse_validate_closing_tags) 2200 { 2201 // Skip and validate closing tag name 2202 Ch *closing_name = text; 2203 skip<node_name_pred, Flags>(text); 2204 if (!internal::compare(node->name(), node->name_size(), closing_name, text - closing_name, true)) 2205 RAPIDXML_PARSE_ERROR("invalid closing tag name", text); 2206 } 2207 else 2208 { 2209 // No validation, just skip name 2210 skip<node_name_pred, Flags>(text); 2211 } 2212 // Skip remaining whitespace after node name 2213 skip<whitespace_pred, Flags>(text); 2214 if (*text != Ch('>')) 2215 RAPIDXML_PARSE_ERROR("expected >", text); 2216 ++text; // Skip '>' 2217 return; // Node closed, finished parsing contents 2218 } 2219 else 2220 { 2221 // Child node 2222 ++text; // Skip '<' 2223 if (xml_node<Ch> *child = parse_node<Flags>(text)) 2224 node->append_node(child); 2225 } 2226 break; 2227 2228 // End of data - error 2229 case Ch('\0'): 2230 RAPIDXML_PARSE_ERROR("unexpected end of data", text); 2231 2232 // Data node 2233 default: 2234 next_char = parse_and_append_data<Flags>(node, text, contents_start); 2235 goto after_data_node; // Bypass regular processing after data nodes 2236 2237 } 2238 } 2239 } 2240 2241 // Parse XML attributes of the node 2242 template<int Flags> parse_node_attributes(Ch * & text,xml_node<Ch> * node)2243 void parse_node_attributes(Ch *&text, xml_node<Ch> *node) 2244 { 2245 // For all attributes 2246 while (attribute_name_pred::test(*text)) 2247 { 2248 // Extract attribute name 2249 Ch *name = text; 2250 ++text; // Skip first character of attribute name 2251 skip<attribute_name_pred, Flags>(text); 2252 if (text == name) 2253 RAPIDXML_PARSE_ERROR("expected attribute name", name); 2254 2255 // Create new attribute 2256 xml_attribute<Ch> *attribute = this->allocate_attribute(); 2257 attribute->name(name, text - name); 2258 node->append_attribute(attribute); 2259 2260 // Skip whitespace after attribute name 2261 skip<whitespace_pred, Flags>(text); 2262 2263 // Skip = 2264 if (*text != Ch('=')) 2265 RAPIDXML_PARSE_ERROR("expected =", text); 2266 ++text; 2267 2268 // Add terminating zero after name 2269 if (!(Flags & parse_no_string_terminators)) 2270 attribute->name()[attribute->name_size()] = 0; 2271 2272 // Skip whitespace after = 2273 skip<whitespace_pred, Flags>(text); 2274 2275 // Skip quote and remember if it was ' or " 2276 Ch quote = *text; 2277 if (quote != Ch('\'') && quote != Ch('"')) 2278 RAPIDXML_PARSE_ERROR("expected ' or \"", text); 2279 ++text; 2280 2281 // Extract attribute value and expand char refs in it 2282 Ch *value = text, *end; 2283 const int AttFlags = Flags & ~parse_normalize_whitespace; // No whitespace normalization in attributes 2284 if (quote == Ch('\'')) 2285 end = skip_and_expand_character_refs<attribute_value_pred<Ch('\'')>, attribute_value_pure_pred<Ch('\'')>, AttFlags>(text); 2286 else 2287 end = skip_and_expand_character_refs<attribute_value_pred<Ch('"')>, attribute_value_pure_pred<Ch('"')>, AttFlags>(text); 2288 2289 // Set attribute value 2290 attribute->value(value, end - value); 2291 2292 // Make sure that end quote is present 2293 if (*text != quote) 2294 RAPIDXML_PARSE_ERROR("expected ' or \"", text); 2295 ++text; // Skip quote 2296 2297 // Add terminating zero after value 2298 if (!(Flags & parse_no_string_terminators)) 2299 attribute->value()[attribute->value_size()] = 0; 2300 2301 // Skip whitespace after attribute value 2302 skip<whitespace_pred, Flags>(text); 2303 } 2304 } 2305 2306 }; 2307 2308 //! \cond internal 2309 namespace internal 2310 { 2311 2312 // Whitespace (space \n \r \t) 2313 template<int Dummy> 2314 const unsigned char lookup_tables<Dummy>::lookup_whitespace[256] = 2315 { 2316 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2317 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, // 0 2318 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 2319 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 2 2320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3 2321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4 2322 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 5 2323 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6 2324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 7 2325 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8 2326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9 2327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A 2328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B 2329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // C 2330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // D 2331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // E 2332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F 2333 }; 2334 2335 // Node name (anything but space \n \r \t / > ? \0) 2336 template<int Dummy> 2337 const unsigned char lookup_tables<Dummy>::lookup_node_name[256] = 2338 { 2339 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2340 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 2341 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 2342 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 2343 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, // 3 2344 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 2345 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 2346 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 2347 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 2348 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 2349 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 2350 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 2351 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 2352 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 2353 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 2354 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 2355 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 2356 }; 2357 2358 // Text (i.e. PCDATA) (anything but < \0) 2359 template<int Dummy> 2360 const unsigned char lookup_tables<Dummy>::lookup_text[256] = 2361 { 2362 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2363 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 2364 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 2365 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 2366 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 2367 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 2368 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 2369 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 2370 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 2371 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 2372 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 2373 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 2374 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 2375 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 2376 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 2377 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 2378 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 2379 }; 2380 2381 // Text (i.e. PCDATA) that does not require processing when ws normalization is disabled 2382 // (anything but < \0 &) 2383 template<int Dummy> 2384 const unsigned char lookup_tables<Dummy>::lookup_text_pure_no_ws[256] = 2385 { 2386 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2387 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 2388 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 2389 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 2390 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 2391 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 2392 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 2393 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 2394 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 2395 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 2396 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 2397 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 2398 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 2399 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 2400 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 2401 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 2402 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 2403 }; 2404 2405 // Text (i.e. PCDATA) that does not require processing when ws normalizationis is enabled 2406 // (anything but < \0 & space \n \r \t) 2407 template<int Dummy> 2408 const unsigned char lookup_tables<Dummy>::lookup_text_pure_with_ws[256] = 2409 { 2410 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2411 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 2412 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 2413 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 2414 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, // 3 2415 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 2416 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 2417 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 2418 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 2419 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 2420 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 2421 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 2422 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 2423 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 2424 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 2425 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 2426 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 2427 }; 2428 2429 // Attribute name (anything but space \n \r \t / < > = ? ! \0) 2430 template<int Dummy> 2431 const unsigned char lookup_tables<Dummy>::lookup_attribute_name[256] = 2432 { 2433 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2434 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, // 0 2435 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 2436 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, // 2 2437 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, // 3 2438 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 2439 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 2440 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 2441 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 2442 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 2443 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 2444 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 2445 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 2446 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 2447 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 2448 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 2449 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 2450 }; 2451 2452 // Attribute data with single quote (anything but ' \0) 2453 template<int Dummy> 2454 const unsigned char lookup_tables<Dummy>::lookup_attribute_data_1[256] = 2455 { 2456 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2457 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 2458 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 2459 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 2460 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 2461 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 2462 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 2463 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 2464 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 2465 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 2466 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 2467 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 2468 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 2469 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 2470 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 2471 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 2472 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 2473 }; 2474 2475 // Attribute data with single quote that does not require processing (anything but ' \0 &) 2476 template<int Dummy> 2477 const unsigned char lookup_tables<Dummy>::lookup_attribute_data_1_pure[256] = 2478 { 2479 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2480 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 2481 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 2482 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 2 2483 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 2484 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 2485 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 2486 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 2487 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 2488 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 2489 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 2490 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 2491 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 2492 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 2493 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 2494 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 2495 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 2496 }; 2497 2498 // Attribute data with double quote (anything but " \0) 2499 template<int Dummy> 2500 const unsigned char lookup_tables<Dummy>::lookup_attribute_data_2[256] = 2501 { 2502 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2503 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 2504 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 2505 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 2506 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 2507 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 2508 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 2509 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 2510 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 2511 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 2512 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 2513 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 2514 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 2515 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 2516 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 2517 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 2518 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 2519 }; 2520 2521 // Attribute data with double quote that does not require processing (anything but " \0 &) 2522 template<int Dummy> 2523 const unsigned char lookup_tables<Dummy>::lookup_attribute_data_2_pure[256] = 2524 { 2525 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2526 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0 2527 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 1 2528 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 2 2529 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 3 2530 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 2531 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 5 2532 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 2533 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 7 2534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 8 2535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 9 2536 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // A 2537 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // B 2538 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // C 2539 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // D 2540 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // E 2541 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // F 2542 }; 2543 2544 // Digits (dec and hex, 255 denotes end of numeric character reference) 2545 template<int Dummy> 2546 const unsigned char lookup_tables<Dummy>::lookup_digits[256] = 2547 { 2548 // 0 1 2 3 4 5 6 7 8 9 A B C D E F 2549 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 0 2550 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 1 2551 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 2 2552 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,255,255,255,255,255,255, // 3 2553 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 4 2554 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 5 2555 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255, // 6 2556 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 7 2557 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 8 2558 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // 9 2559 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // A 2560 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // B 2561 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // C 2562 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // D 2563 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, // E 2564 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255 // F 2565 }; 2566 2567 // Upper case conversion 2568 template<int Dummy> 2569 const unsigned char lookup_tables<Dummy>::lookup_upcase[256] = 2570 { 2571 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A B C D E F 2572 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // 0 2573 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, // 1 2574 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, // 2 2575 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, // 3 2576 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // 4 2577 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, // 5 2578 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, // 6 2579 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 123,124,125,126,127, // 7 2580 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, // 8 2581 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, // 9 2582 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, // A 2583 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, // B 2584 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, // C 2585 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, // D 2586 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, // E 2587 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 // F 2588 }; 2589 } 2590 //! \endcond 2591 2592 } 2593 2594 // Undefine internal macros 2595 #undef RAPIDXML_PARSE_ERROR 2596 2597 // On MSVC, restore warnings state 2598 #ifdef _MSC_VER 2599 #pragma warning(pop) 2600 #endif 2601 2602 #endif 2603