1 /* 2 * Summary: the core parser module 3 * Description: Interfaces, constants and types related to the XML parser 4 * 5 * Copy: See Copyright for the status of this software. 6 * 7 * Author: Daniel Veillard 8 */ 9 10 #ifndef __XML_PARSER_H__ 11 #define __XML_PARSER_H__ 12 13 #include <libxml/xmlversion.h> 14 #include <libxml/tree.h> 15 #include <libxml/dict.h> 16 #include <libxml/hash.h> 17 #include <libxml/valid.h> 18 #include <libxml/entities.h> 19 #include <libxml/xmlerror.h> 20 #include <libxml/xmlstring.h> 21 22 #ifdef __cplusplus 23 extern "C" { 24 #endif 25 26 /** 27 * XML_DEFAULT_VERSION: 28 * 29 * The default version of XML used: 1.0 30 */ 31 #define XML_DEFAULT_VERSION "1.0" 32 33 /** 34 * xmlParserInput: 35 * 36 * An xmlParserInput is an input flow for the XML processor. 37 * Each entity parsed is associated an xmlParserInput (except the 38 * few predefined ones). This is the case both for internal entities 39 * - in which case the flow is already completely in memory - or 40 * external entities - in which case we use the buf structure for 41 * progressive reading and I18N conversions to the internal UTF-8 format. 42 */ 43 44 /** 45 * xmlParserInputDeallocate: 46 * @str: the string to deallocate 47 * 48 * Callback for freeing some parser input allocations. 49 */ 50 typedef void (* xmlParserInputDeallocate)(xmlChar *str); 51 52 struct _xmlParserInput { 53 /* Input buffer */ 54 xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ 55 56 const char *filename; /* The file analyzed, if any */ 57 const char *directory; /* the directory/base of the file */ 58 const xmlChar *base; /* Base of the array to parse */ 59 const xmlChar *cur; /* Current char being parsed */ 60 const xmlChar *end; /* end of the array to parse */ 61 int length; /* length if known */ 62 int line; /* Current line */ 63 int col; /* Current column */ 64 /* 65 * NOTE: consumed is only tested for equality in the parser code, 66 * so even if there is an overflow this should not give troubles 67 * for parsing very large instances. 68 */ 69 unsigned long consumed; /* How many xmlChars already consumed */ 70 xmlParserInputDeallocate free; /* function to deallocate the base */ 71 const xmlChar *encoding; /* the encoding string for entity */ 72 const xmlChar *version; /* the version string for entity */ 73 int standalone; /* Was that entity marked standalone */ 74 int id; /* an unique identifier for the entity */ 75 }; 76 77 /** 78 * xmlParserNodeInfo: 79 * 80 * The parser can be asked to collect Node information, i.e. at what 81 * place in the file they were detected. 82 * NOTE: This is off by default and not very well tested. 83 */ 84 typedef struct _xmlParserNodeInfo xmlParserNodeInfo; 85 typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; 86 87 struct _xmlParserNodeInfo { 88 const struct _xmlNode* node; 89 /* Position & line # that text that created the node begins & ends on */ 90 unsigned long begin_pos; 91 unsigned long begin_line; 92 unsigned long end_pos; 93 unsigned long end_line; 94 }; 95 96 typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; 97 typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; 98 struct _xmlParserNodeInfoSeq { 99 unsigned long maximum; 100 unsigned long length; 101 xmlParserNodeInfo* buffer; 102 }; 103 104 /** 105 * xmlParserInputState: 106 * 107 * The parser is now working also as a state based parser. 108 * The recursive one use the state info for entities processing. 109 */ 110 typedef enum { 111 XML_PARSER_EOF = -1, /* nothing is to be parsed */ 112 XML_PARSER_START = 0, /* nothing has been parsed */ 113 XML_PARSER_MISC, /* Misc* before int subset */ 114 XML_PARSER_PI, /* Within a processing instruction */ 115 XML_PARSER_DTD, /* within some DTD content */ 116 XML_PARSER_PROLOG, /* Misc* after internal subset */ 117 XML_PARSER_COMMENT, /* within a comment */ 118 XML_PARSER_START_TAG, /* within a start tag */ 119 XML_PARSER_CONTENT, /* within the content */ 120 XML_PARSER_CDATA_SECTION, /* within a CDATA section */ 121 XML_PARSER_END_TAG, /* within a closing tag */ 122 XML_PARSER_ENTITY_DECL, /* within an entity declaration */ 123 XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ 124 XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ 125 XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ 126 XML_PARSER_EPILOG, /* the Misc* after the last end tag */ 127 XML_PARSER_IGNORE, /* within an IGNORED section */ 128 XML_PARSER_PUBLIC_LITERAL /* within a PUBLIC value */ 129 } xmlParserInputState; 130 131 /** 132 * XML_DETECT_IDS: 133 * 134 * Bit in the loadsubset context field to tell to do ID/REFs lookups. 135 * Use it to initialize xmlLoadExtDtdDefaultValue. 136 */ 137 #define XML_DETECT_IDS 2 138 139 /** 140 * XML_COMPLETE_ATTRS: 141 * 142 * Bit in the loadsubset context field to tell to do complete the 143 * elements attributes lists with the ones defaulted from the DTDs. 144 * Use it to initialize xmlLoadExtDtdDefaultValue. 145 */ 146 #define XML_COMPLETE_ATTRS 4 147 148 /** 149 * XML_SKIP_IDS: 150 * 151 * Bit in the loadsubset context field to tell to not do ID/REFs registration. 152 * Used to initialize xmlLoadExtDtdDefaultValue in some special cases. 153 */ 154 #define XML_SKIP_IDS 8 155 156 /** 157 * xmlParserMode: 158 * 159 * A parser can operate in various modes 160 */ 161 typedef enum { 162 XML_PARSE_UNKNOWN = 0, 163 XML_PARSE_DOM = 1, 164 XML_PARSE_SAX = 2, 165 XML_PARSE_PUSH_DOM = 3, 166 XML_PARSE_PUSH_SAX = 4, 167 XML_PARSE_READER = 5 168 } xmlParserMode; 169 170 typedef struct _xmlStartTag xmlStartTag; 171 172 /** 173 * xmlParserCtxt: 174 * 175 * The parser context. 176 * NOTE This doesn't completely define the parser state, the (current ?) 177 * design of the parser uses recursive function calls since this allow 178 * and easy mapping from the production rules of the specification 179 * to the actual code. The drawback is that the actual function call 180 * also reflect the parser state. However most of the parsing routines 181 * takes as the only argument the parser context pointer, so migrating 182 * to a state based parser for progressive parsing shouldn't be too hard. 183 */ 184 struct _xmlParserCtxt { 185 struct _xmlSAXHandler *sax; /* The SAX handler */ 186 void *userData; /* For SAX interface only, used by DOM build */ 187 xmlDocPtr myDoc; /* the document being built */ 188 int wellFormed; /* is the document well formed */ 189 int replaceEntities; /* shall we replace entities ? */ 190 const xmlChar *version; /* the XML version string */ 191 const xmlChar *encoding; /* the declared encoding, if any */ 192 int standalone; /* standalone document */ 193 int html; /* an HTML(1) document 194 * 3 is HTML after <head> 195 * 10 is HTML after <body> 196 */ 197 198 /* Input stream stack */ 199 xmlParserInputPtr input; /* Current input stream */ 200 int inputNr; /* Number of current input streams */ 201 int inputMax; /* Max number of input streams */ 202 xmlParserInputPtr *inputTab; /* stack of inputs */ 203 204 /* Node analysis stack only used for DOM building */ 205 xmlNodePtr node; /* Current parsed Node */ 206 int nodeNr; /* Depth of the parsing stack */ 207 int nodeMax; /* Max depth of the parsing stack */ 208 xmlNodePtr *nodeTab; /* array of nodes */ 209 210 int record_info; /* Whether node info should be kept */ 211 xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ 212 213 int errNo; /* error code */ 214 215 int hasExternalSubset; /* reference and external subset */ 216 int hasPErefs; /* the internal subset has PE refs */ 217 int external; /* are we parsing an external entity */ 218 219 int valid; /* is the document valid */ 220 int validate; /* shall we try to validate ? */ 221 xmlValidCtxt vctxt; /* The validity context */ 222 223 xmlParserInputState instate; /* current type of input */ 224 int token; /* next char look-ahead */ 225 226 char *directory; /* the data directory */ 227 228 /* Node name stack */ 229 const xmlChar *name; /* Current parsed Node */ 230 int nameNr; /* Depth of the parsing stack */ 231 int nameMax; /* Max depth of the parsing stack */ 232 const xmlChar * *nameTab; /* array of nodes */ 233 234 long nbChars; /* unused */ 235 long checkIndex; /* used by progressive parsing lookup */ 236 int keepBlanks; /* ugly but ... */ 237 int disableSAX; /* SAX callbacks are disabled */ 238 int inSubset; /* Parsing is in int 1/ext 2 subset */ 239 const xmlChar * intSubName; /* name of subset */ 240 xmlChar * extSubURI; /* URI of external subset */ 241 xmlChar * extSubSystem; /* SYSTEM ID of external subset */ 242 243 /* xml:space values */ 244 int * space; /* Should the parser preserve spaces */ 245 int spaceNr; /* Depth of the parsing stack */ 246 int spaceMax; /* Max depth of the parsing stack */ 247 int * spaceTab; /* array of space infos */ 248 249 int depth; /* to prevent entity substitution loops */ 250 xmlParserInputPtr entity; /* used to check entities boundaries */ 251 int charset; /* encoding of the in-memory content 252 actually an xmlCharEncoding */ 253 int nodelen; /* Those two fields are there to */ 254 int nodemem; /* Speed up large node parsing */ 255 int pedantic; /* signal pedantic warnings */ 256 void *_private; /* For user data, libxml won't touch it */ 257 258 int loadsubset; /* should the external subset be loaded */ 259 int linenumbers; /* set line number in element content */ 260 void *catalogs; /* document's own catalog */ 261 int recovery; /* run in recovery mode */ 262 int progressive; /* is this a progressive parsing */ 263 xmlDictPtr dict; /* dictionary for the parser */ 264 const xmlChar * *atts; /* array for the attributes callbacks */ 265 int maxatts; /* the size of the array */ 266 int docdict; /* use strings from dict to build tree */ 267 268 /* 269 * pre-interned strings 270 */ 271 const xmlChar *str_xml; 272 const xmlChar *str_xmlns; 273 const xmlChar *str_xml_ns; 274 275 /* 276 * Everything below is used only by the new SAX mode 277 */ 278 int sax2; /* operating in the new SAX mode */ 279 int nsNr; /* the number of inherited namespaces */ 280 int nsMax; /* the size of the arrays */ 281 const xmlChar * *nsTab; /* the array of prefix/namespace name */ 282 int *attallocs; /* which attribute were allocated */ 283 xmlStartTag *pushTab; /* array of data for push */ 284 xmlHashTablePtr attsDefault; /* defaulted attributes if any */ 285 xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ 286 int nsWellFormed; /* is the document XML Namespace okay */ 287 int options; /* Extra options */ 288 289 /* 290 * Those fields are needed only for streaming parsing so far 291 */ 292 int dictNames; /* Use dictionary names for the tree */ 293 int freeElemsNr; /* number of freed element nodes */ 294 xmlNodePtr freeElems; /* List of freed element nodes */ 295 int freeAttrsNr; /* number of freed attributes nodes */ 296 xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ 297 298 /* 299 * the complete error information for the last error. 300 */ 301 xmlError lastError; 302 xmlParserMode parseMode; /* the parser mode */ 303 unsigned long nbentities; /* number of entities references */ 304 unsigned long sizeentities; /* size of parsed entities */ 305 306 /* for use by HTML non-recursive parser */ 307 xmlParserNodeInfo *nodeInfo; /* Current NodeInfo */ 308 int nodeInfoNr; /* Depth of the parsing stack */ 309 int nodeInfoMax; /* Max depth of the parsing stack */ 310 xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */ 311 312 int input_id; /* we need to label inputs */ 313 unsigned long sizeentcopy; /* volume of entity copy */ 314 }; 315 316 /** 317 * xmlSAXLocator: 318 * 319 * A SAX Locator. 320 */ 321 struct _xmlSAXLocator { 322 const xmlChar *(*getPublicId)(void *ctx); 323 const xmlChar *(*getSystemId)(void *ctx); 324 int (*getLineNumber)(void *ctx); 325 int (*getColumnNumber)(void *ctx); 326 }; 327 328 /** 329 * xmlSAXHandler: 330 * 331 * A SAX handler is bunch of callbacks called by the parser when processing 332 * of the input generate data or structure information. 333 */ 334 335 /** 336 * resolveEntitySAXFunc: 337 * @ctx: the user data (XML parser context) 338 * @publicId: The public ID of the entity 339 * @systemId: The system ID of the entity 340 * 341 * Callback: 342 * The entity loader, to control the loading of external entities, 343 * the application can either: 344 * - override this resolveEntity() callback in the SAX block 345 * - or better use the xmlSetExternalEntityLoader() function to 346 * set up it's own entity resolution routine 347 * 348 * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. 349 */ 350 typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, 351 const xmlChar *publicId, 352 const xmlChar *systemId); 353 /** 354 * internalSubsetSAXFunc: 355 * @ctx: the user data (XML parser context) 356 * @name: the root element name 357 * @ExternalID: the external ID 358 * @SystemID: the SYSTEM ID (e.g. filename or URL) 359 * 360 * Callback on internal subset declaration. 361 */ 362 typedef void (*internalSubsetSAXFunc) (void *ctx, 363 const xmlChar *name, 364 const xmlChar *ExternalID, 365 const xmlChar *SystemID); 366 /** 367 * externalSubsetSAXFunc: 368 * @ctx: the user data (XML parser context) 369 * @name: the root element name 370 * @ExternalID: the external ID 371 * @SystemID: the SYSTEM ID (e.g. filename or URL) 372 * 373 * Callback on external subset declaration. 374 */ 375 typedef void (*externalSubsetSAXFunc) (void *ctx, 376 const xmlChar *name, 377 const xmlChar *ExternalID, 378 const xmlChar *SystemID); 379 /** 380 * getEntitySAXFunc: 381 * @ctx: the user data (XML parser context) 382 * @name: The entity name 383 * 384 * Get an entity by name. 385 * 386 * Returns the xmlEntityPtr if found. 387 */ 388 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, 389 const xmlChar *name); 390 /** 391 * getParameterEntitySAXFunc: 392 * @ctx: the user data (XML parser context) 393 * @name: The entity name 394 * 395 * Get a parameter entity by name. 396 * 397 * Returns the xmlEntityPtr if found. 398 */ 399 typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, 400 const xmlChar *name); 401 /** 402 * entityDeclSAXFunc: 403 * @ctx: the user data (XML parser context) 404 * @name: the entity name 405 * @type: the entity type 406 * @publicId: The public ID of the entity 407 * @systemId: The system ID of the entity 408 * @content: the entity value (without processing). 409 * 410 * An entity definition has been parsed. 411 */ 412 typedef void (*entityDeclSAXFunc) (void *ctx, 413 const xmlChar *name, 414 int type, 415 const xmlChar *publicId, 416 const xmlChar *systemId, 417 xmlChar *content); 418 /** 419 * notationDeclSAXFunc: 420 * @ctx: the user data (XML parser context) 421 * @name: The name of the notation 422 * @publicId: The public ID of the entity 423 * @systemId: The system ID of the entity 424 * 425 * What to do when a notation declaration has been parsed. 426 */ 427 typedef void (*notationDeclSAXFunc)(void *ctx, 428 const xmlChar *name, 429 const xmlChar *publicId, 430 const xmlChar *systemId); 431 /** 432 * attributeDeclSAXFunc: 433 * @ctx: the user data (XML parser context) 434 * @elem: the name of the element 435 * @fullname: the attribute name 436 * @type: the attribute type 437 * @def: the type of default value 438 * @defaultValue: the attribute default value 439 * @tree: the tree of enumerated value set 440 * 441 * An attribute definition has been parsed. 442 */ 443 typedef void (*attributeDeclSAXFunc)(void *ctx, 444 const xmlChar *elem, 445 const xmlChar *fullname, 446 int type, 447 int def, 448 const xmlChar *defaultValue, 449 xmlEnumerationPtr tree); 450 /** 451 * elementDeclSAXFunc: 452 * @ctx: the user data (XML parser context) 453 * @name: the element name 454 * @type: the element type 455 * @content: the element value tree 456 * 457 * An element definition has been parsed. 458 */ 459 typedef void (*elementDeclSAXFunc)(void *ctx, 460 const xmlChar *name, 461 int type, 462 xmlElementContentPtr content); 463 /** 464 * unparsedEntityDeclSAXFunc: 465 * @ctx: the user data (XML parser context) 466 * @name: The name of the entity 467 * @publicId: The public ID of the entity 468 * @systemId: The system ID of the entity 469 * @notationName: the name of the notation 470 * 471 * What to do when an unparsed entity declaration is parsed. 472 */ 473 typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, 474 const xmlChar *name, 475 const xmlChar *publicId, 476 const xmlChar *systemId, 477 const xmlChar *notationName); 478 /** 479 * setDocumentLocatorSAXFunc: 480 * @ctx: the user data (XML parser context) 481 * @loc: A SAX Locator 482 * 483 * Receive the document locator at startup, actually xmlDefaultSAXLocator. 484 * Everything is available on the context, so this is useless in our case. 485 */ 486 typedef void (*setDocumentLocatorSAXFunc) (void *ctx, 487 xmlSAXLocatorPtr loc); 488 /** 489 * startDocumentSAXFunc: 490 * @ctx: the user data (XML parser context) 491 * 492 * Called when the document start being processed. 493 */ 494 typedef void (*startDocumentSAXFunc) (void *ctx); 495 /** 496 * endDocumentSAXFunc: 497 * @ctx: the user data (XML parser context) 498 * 499 * Called when the document end has been detected. 500 */ 501 typedef void (*endDocumentSAXFunc) (void *ctx); 502 /** 503 * startElementSAXFunc: 504 * @ctx: the user data (XML parser context) 505 * @name: The element name, including namespace prefix 506 * @atts: An array of name/value attributes pairs, NULL terminated 507 * 508 * Called when an opening tag has been processed. 509 */ 510 typedef void (*startElementSAXFunc) (void *ctx, 511 const xmlChar *name, 512 const xmlChar **atts); 513 /** 514 * endElementSAXFunc: 515 * @ctx: the user data (XML parser context) 516 * @name: The element name 517 * 518 * Called when the end of an element has been detected. 519 */ 520 typedef void (*endElementSAXFunc) (void *ctx, 521 const xmlChar *name); 522 /** 523 * attributeSAXFunc: 524 * @ctx: the user data (XML parser context) 525 * @name: The attribute name, including namespace prefix 526 * @value: The attribute value 527 * 528 * Handle an attribute that has been read by the parser. 529 * The default handling is to convert the attribute into an 530 * DOM subtree and past it in a new xmlAttr element added to 531 * the element. 532 */ 533 typedef void (*attributeSAXFunc) (void *ctx, 534 const xmlChar *name, 535 const xmlChar *value); 536 /** 537 * referenceSAXFunc: 538 * @ctx: the user data (XML parser context) 539 * @name: The entity name 540 * 541 * Called when an entity reference is detected. 542 */ 543 typedef void (*referenceSAXFunc) (void *ctx, 544 const xmlChar *name); 545 /** 546 * charactersSAXFunc: 547 * @ctx: the user data (XML parser context) 548 * @ch: a xmlChar string 549 * @len: the number of xmlChar 550 * 551 * Receiving some chars from the parser. 552 */ 553 typedef void (*charactersSAXFunc) (void *ctx, 554 const xmlChar *ch, 555 int len); 556 /** 557 * ignorableWhitespaceSAXFunc: 558 * @ctx: the user data (XML parser context) 559 * @ch: a xmlChar string 560 * @len: the number of xmlChar 561 * 562 * Receiving some ignorable whitespaces from the parser. 563 * UNUSED: by default the DOM building will use characters. 564 */ 565 typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, 566 const xmlChar *ch, 567 int len); 568 /** 569 * processingInstructionSAXFunc: 570 * @ctx: the user data (XML parser context) 571 * @target: the target name 572 * @data: the PI data's 573 * 574 * A processing instruction has been parsed. 575 */ 576 typedef void (*processingInstructionSAXFunc) (void *ctx, 577 const xmlChar *target, 578 const xmlChar *data); 579 /** 580 * commentSAXFunc: 581 * @ctx: the user data (XML parser context) 582 * @value: the comment content 583 * 584 * A comment has been parsed. 585 */ 586 typedef void (*commentSAXFunc) (void *ctx, 587 const xmlChar *value); 588 /** 589 * cdataBlockSAXFunc: 590 * @ctx: the user data (XML parser context) 591 * @value: The pcdata content 592 * @len: the block length 593 * 594 * Called when a pcdata block has been parsed. 595 */ 596 typedef void (*cdataBlockSAXFunc) ( 597 void *ctx, 598 const xmlChar *value, 599 int len); 600 /** 601 * warningSAXFunc: 602 * @ctx: an XML parser context 603 * @msg: the message to display/transmit 604 * @...: extra parameters for the message display 605 * 606 * Display and format a warning messages, callback. 607 */ 608 typedef void (XMLCDECL *warningSAXFunc) (void *ctx, 609 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 610 /** 611 * errorSAXFunc: 612 * @ctx: an XML parser context 613 * @msg: the message to display/transmit 614 * @...: extra parameters for the message display 615 * 616 * Display and format an error messages, callback. 617 */ 618 typedef void (XMLCDECL *errorSAXFunc) (void *ctx, 619 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 620 /** 621 * fatalErrorSAXFunc: 622 * @ctx: an XML parser context 623 * @msg: the message to display/transmit 624 * @...: extra parameters for the message display 625 * 626 * Display and format fatal error messages, callback. 627 * Note: so far fatalError() SAX callbacks are not used, error() 628 * get all the callbacks for errors. 629 */ 630 typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx, 631 const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); 632 /** 633 * isStandaloneSAXFunc: 634 * @ctx: the user data (XML parser context) 635 * 636 * Is this document tagged standalone? 637 * 638 * Returns 1 if true 639 */ 640 typedef int (*isStandaloneSAXFunc) (void *ctx); 641 /** 642 * hasInternalSubsetSAXFunc: 643 * @ctx: the user data (XML parser context) 644 * 645 * Does this document has an internal subset. 646 * 647 * Returns 1 if true 648 */ 649 typedef int (*hasInternalSubsetSAXFunc) (void *ctx); 650 651 /** 652 * hasExternalSubsetSAXFunc: 653 * @ctx: the user data (XML parser context) 654 * 655 * Does this document has an external subset? 656 * 657 * Returns 1 if true 658 */ 659 typedef int (*hasExternalSubsetSAXFunc) (void *ctx); 660 661 /************************************************************************ 662 * * 663 * The SAX version 2 API extensions * 664 * * 665 ************************************************************************/ 666 /** 667 * XML_SAX2_MAGIC: 668 * 669 * Special constant found in SAX2 blocks initialized fields 670 */ 671 #define XML_SAX2_MAGIC 0xDEEDBEAF 672 673 /** 674 * startElementNsSAX2Func: 675 * @ctx: the user data (XML parser context) 676 * @localname: the local name of the element 677 * @prefix: the element namespace prefix if available 678 * @URI: the element namespace name if available 679 * @nb_namespaces: number of namespace definitions on that node 680 * @namespaces: pointer to the array of prefix/URI pairs namespace definitions 681 * @nb_attributes: the number of attributes on that node 682 * @nb_defaulted: the number of defaulted attributes. The defaulted 683 * ones are at the end of the array 684 * @attributes: pointer to the array of (localname/prefix/URI/value/end) 685 * attribute values. 686 * 687 * SAX2 callback when an element start has been detected by the parser. 688 * It provides the namespace information for the element, as well as 689 * the new namespace declarations on the element. 690 */ 691 692 typedef void (*startElementNsSAX2Func) (void *ctx, 693 const xmlChar *localname, 694 const xmlChar *prefix, 695 const xmlChar *URI, 696 int nb_namespaces, 697 const xmlChar **namespaces, 698 int nb_attributes, 699 int nb_defaulted, 700 const xmlChar **attributes); 701 702 /** 703 * endElementNsSAX2Func: 704 * @ctx: the user data (XML parser context) 705 * @localname: the local name of the element 706 * @prefix: the element namespace prefix if available 707 * @URI: the element namespace name if available 708 * 709 * SAX2 callback when an element end has been detected by the parser. 710 * It provides the namespace information for the element. 711 */ 712 713 typedef void (*endElementNsSAX2Func) (void *ctx, 714 const xmlChar *localname, 715 const xmlChar *prefix, 716 const xmlChar *URI); 717 718 719 struct _xmlSAXHandler { 720 internalSubsetSAXFunc internalSubset; 721 isStandaloneSAXFunc isStandalone; 722 hasInternalSubsetSAXFunc hasInternalSubset; 723 hasExternalSubsetSAXFunc hasExternalSubset; 724 resolveEntitySAXFunc resolveEntity; 725 getEntitySAXFunc getEntity; 726 entityDeclSAXFunc entityDecl; 727 notationDeclSAXFunc notationDecl; 728 attributeDeclSAXFunc attributeDecl; 729 elementDeclSAXFunc elementDecl; 730 unparsedEntityDeclSAXFunc unparsedEntityDecl; 731 setDocumentLocatorSAXFunc setDocumentLocator; 732 startDocumentSAXFunc startDocument; 733 endDocumentSAXFunc endDocument; 734 startElementSAXFunc startElement; 735 endElementSAXFunc endElement; 736 referenceSAXFunc reference; 737 charactersSAXFunc characters; 738 ignorableWhitespaceSAXFunc ignorableWhitespace; 739 processingInstructionSAXFunc processingInstruction; 740 commentSAXFunc comment; 741 warningSAXFunc warning; 742 errorSAXFunc error; 743 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 744 getParameterEntitySAXFunc getParameterEntity; 745 cdataBlockSAXFunc cdataBlock; 746 externalSubsetSAXFunc externalSubset; 747 unsigned int initialized; 748 /* The following fields are extensions available only on version 2 */ 749 void *_private; 750 startElementNsSAX2Func startElementNs; 751 endElementNsSAX2Func endElementNs; 752 xmlStructuredErrorFunc serror; 753 }; 754 755 /* 756 * SAX Version 1 757 */ 758 typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1; 759 typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr; 760 struct _xmlSAXHandlerV1 { 761 internalSubsetSAXFunc internalSubset; 762 isStandaloneSAXFunc isStandalone; 763 hasInternalSubsetSAXFunc hasInternalSubset; 764 hasExternalSubsetSAXFunc hasExternalSubset; 765 resolveEntitySAXFunc resolveEntity; 766 getEntitySAXFunc getEntity; 767 entityDeclSAXFunc entityDecl; 768 notationDeclSAXFunc notationDecl; 769 attributeDeclSAXFunc attributeDecl; 770 elementDeclSAXFunc elementDecl; 771 unparsedEntityDeclSAXFunc unparsedEntityDecl; 772 setDocumentLocatorSAXFunc setDocumentLocator; 773 startDocumentSAXFunc startDocument; 774 endDocumentSAXFunc endDocument; 775 startElementSAXFunc startElement; 776 endElementSAXFunc endElement; 777 referenceSAXFunc reference; 778 charactersSAXFunc characters; 779 ignorableWhitespaceSAXFunc ignorableWhitespace; 780 processingInstructionSAXFunc processingInstruction; 781 commentSAXFunc comment; 782 warningSAXFunc warning; 783 errorSAXFunc error; 784 fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ 785 getParameterEntitySAXFunc getParameterEntity; 786 cdataBlockSAXFunc cdataBlock; 787 externalSubsetSAXFunc externalSubset; 788 unsigned int initialized; 789 }; 790 791 792 /** 793 * xmlExternalEntityLoader: 794 * @URL: The System ID of the resource requested 795 * @ID: The Public ID of the resource requested 796 * @context: the XML parser context 797 * 798 * External entity loaders types. 799 * 800 * Returns the entity input parser. 801 */ 802 typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, 803 const char *ID, 804 xmlParserCtxtPtr context); 805 806 #ifdef __cplusplus 807 } 808 #endif 809 810 #include <libxml/encoding.h> 811 #include <libxml/xmlIO.h> 812 #include <libxml/globals.h> 813 814 #ifdef __cplusplus 815 extern "C" { 816 #endif 817 818 819 /* 820 * Init/Cleanup 821 */ 822 XMLPUBFUN void XMLCALL 823 xmlInitParser (void); 824 XMLPUBFUN void XMLCALL 825 xmlCleanupParser (void); 826 827 /* 828 * Input functions 829 */ 830 XMLPUBFUN int XMLCALL 831 xmlParserInputRead (xmlParserInputPtr in, 832 int len); 833 XMLPUBFUN int XMLCALL 834 xmlParserInputGrow (xmlParserInputPtr in, 835 int len); 836 837 /* 838 * Basic parsing Interfaces 839 */ 840 #ifdef LIBXML_SAX1_ENABLED 841 XMLPUBFUN xmlDocPtr XMLCALL 842 xmlParseDoc (const xmlChar *cur); 843 XMLPUBFUN xmlDocPtr XMLCALL 844 xmlParseFile (const char *filename); 845 XMLPUBFUN xmlDocPtr XMLCALL 846 xmlParseMemory (const char *buffer, 847 int size); 848 #endif /* LIBXML_SAX1_ENABLED */ 849 XMLPUBFUN int XMLCALL 850 xmlSubstituteEntitiesDefault(int val); 851 XMLPUBFUN int XMLCALL 852 xmlKeepBlanksDefault (int val); 853 XMLPUBFUN void XMLCALL 854 xmlStopParser (xmlParserCtxtPtr ctxt); 855 XMLPUBFUN int XMLCALL 856 xmlPedanticParserDefault(int val); 857 XMLPUBFUN int XMLCALL 858 xmlLineNumbersDefault (int val); 859 860 #ifdef LIBXML_SAX1_ENABLED 861 /* 862 * Recovery mode 863 */ 864 XMLPUBFUN xmlDocPtr XMLCALL 865 xmlRecoverDoc (const xmlChar *cur); 866 XMLPUBFUN xmlDocPtr XMLCALL 867 xmlRecoverMemory (const char *buffer, 868 int size); 869 XMLPUBFUN xmlDocPtr XMLCALL 870 xmlRecoverFile (const char *filename); 871 #endif /* LIBXML_SAX1_ENABLED */ 872 873 /* 874 * Less common routines and SAX interfaces 875 */ 876 XMLPUBFUN int XMLCALL 877 xmlParseDocument (xmlParserCtxtPtr ctxt); 878 XMLPUBFUN int XMLCALL 879 xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); 880 #ifdef LIBXML_SAX1_ENABLED 881 XMLPUBFUN int XMLCALL 882 xmlSAXUserParseFile (xmlSAXHandlerPtr sax, 883 void *user_data, 884 const char *filename); 885 XMLPUBFUN int XMLCALL 886 xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, 887 void *user_data, 888 const char *buffer, 889 int size); 890 XMLPUBFUN xmlDocPtr XMLCALL 891 xmlSAXParseDoc (xmlSAXHandlerPtr sax, 892 const xmlChar *cur, 893 int recovery); 894 XMLPUBFUN xmlDocPtr XMLCALL 895 xmlSAXParseMemory (xmlSAXHandlerPtr sax, 896 const char *buffer, 897 int size, 898 int recovery); 899 XMLPUBFUN xmlDocPtr XMLCALL 900 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, 901 const char *buffer, 902 int size, 903 int recovery, 904 void *data); 905 XMLPUBFUN xmlDocPtr XMLCALL 906 xmlSAXParseFile (xmlSAXHandlerPtr sax, 907 const char *filename, 908 int recovery); 909 XMLPUBFUN xmlDocPtr XMLCALL 910 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, 911 const char *filename, 912 int recovery, 913 void *data); 914 XMLPUBFUN xmlDocPtr XMLCALL 915 xmlSAXParseEntity (xmlSAXHandlerPtr sax, 916 const char *filename); 917 XMLPUBFUN xmlDocPtr XMLCALL 918 xmlParseEntity (const char *filename); 919 #endif /* LIBXML_SAX1_ENABLED */ 920 921 #ifdef LIBXML_VALID_ENABLED 922 XMLPUBFUN xmlDtdPtr XMLCALL 923 xmlSAXParseDTD (xmlSAXHandlerPtr sax, 924 const xmlChar *ExternalID, 925 const xmlChar *SystemID); 926 XMLPUBFUN xmlDtdPtr XMLCALL 927 xmlParseDTD (const xmlChar *ExternalID, 928 const xmlChar *SystemID); 929 XMLPUBFUN xmlDtdPtr XMLCALL 930 xmlIOParseDTD (xmlSAXHandlerPtr sax, 931 xmlParserInputBufferPtr input, 932 xmlCharEncoding enc); 933 #endif /* LIBXML_VALID_ENABLE */ 934 #ifdef LIBXML_SAX1_ENABLED 935 XMLPUBFUN int XMLCALL 936 xmlParseBalancedChunkMemory(xmlDocPtr doc, 937 xmlSAXHandlerPtr sax, 938 void *user_data, 939 int depth, 940 const xmlChar *string, 941 xmlNodePtr *lst); 942 #endif /* LIBXML_SAX1_ENABLED */ 943 XMLPUBFUN xmlParserErrors XMLCALL 944 xmlParseInNodeContext (xmlNodePtr node, 945 const char *data, 946 int datalen, 947 int options, 948 xmlNodePtr *lst); 949 #ifdef LIBXML_SAX1_ENABLED 950 XMLPUBFUN int XMLCALL 951 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, 952 xmlSAXHandlerPtr sax, 953 void *user_data, 954 int depth, 955 const xmlChar *string, 956 xmlNodePtr *lst, 957 int recover); 958 XMLPUBFUN int XMLCALL 959 xmlParseExternalEntity (xmlDocPtr doc, 960 xmlSAXHandlerPtr sax, 961 void *user_data, 962 int depth, 963 const xmlChar *URL, 964 const xmlChar *ID, 965 xmlNodePtr *lst); 966 #endif /* LIBXML_SAX1_ENABLED */ 967 XMLPUBFUN int XMLCALL 968 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, 969 const xmlChar *URL, 970 const xmlChar *ID, 971 xmlNodePtr *lst); 972 973 /* 974 * Parser contexts handling. 975 */ 976 XMLPUBFUN xmlParserCtxtPtr XMLCALL 977 xmlNewParserCtxt (void); 978 XMLPUBFUN int XMLCALL 979 xmlInitParserCtxt (xmlParserCtxtPtr ctxt); 980 XMLPUBFUN void XMLCALL 981 xmlClearParserCtxt (xmlParserCtxtPtr ctxt); 982 XMLPUBFUN void XMLCALL 983 xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); 984 #ifdef LIBXML_SAX1_ENABLED 985 XMLPUBFUN void XMLCALL 986 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, 987 const xmlChar* buffer, 988 const char *filename); 989 #endif /* LIBXML_SAX1_ENABLED */ 990 XMLPUBFUN xmlParserCtxtPtr XMLCALL 991 xmlCreateDocParserCtxt (const xmlChar *cur); 992 993 #ifdef LIBXML_LEGACY_ENABLED 994 /* 995 * Reading/setting optional parsing features. 996 */ 997 XML_DEPRECATED 998 XMLPUBFUN int XMLCALL 999 xmlGetFeaturesList (int *len, 1000 const char **result); 1001 XML_DEPRECATED 1002 XMLPUBFUN int XMLCALL 1003 xmlGetFeature (xmlParserCtxtPtr ctxt, 1004 const char *name, 1005 void *result); 1006 XML_DEPRECATED 1007 XMLPUBFUN int XMLCALL 1008 xmlSetFeature (xmlParserCtxtPtr ctxt, 1009 const char *name, 1010 void *value); 1011 #endif /* LIBXML_LEGACY_ENABLED */ 1012 1013 #ifdef LIBXML_PUSH_ENABLED 1014 /* 1015 * Interfaces for the Push mode. 1016 */ 1017 XMLPUBFUN xmlParserCtxtPtr XMLCALL 1018 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, 1019 void *user_data, 1020 const char *chunk, 1021 int size, 1022 const char *filename); 1023 XMLPUBFUN int XMLCALL 1024 xmlParseChunk (xmlParserCtxtPtr ctxt, 1025 const char *chunk, 1026 int size, 1027 int terminate); 1028 #endif /* LIBXML_PUSH_ENABLED */ 1029 1030 /* 1031 * Special I/O mode. 1032 */ 1033 1034 XMLPUBFUN xmlParserCtxtPtr XMLCALL 1035 xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, 1036 void *user_data, 1037 xmlInputReadCallback ioread, 1038 xmlInputCloseCallback ioclose, 1039 void *ioctx, 1040 xmlCharEncoding enc); 1041 1042 XMLPUBFUN xmlParserInputPtr XMLCALL 1043 xmlNewIOInputStream (xmlParserCtxtPtr ctxt, 1044 xmlParserInputBufferPtr input, 1045 xmlCharEncoding enc); 1046 1047 /* 1048 * Node infos. 1049 */ 1050 XMLPUBFUN const xmlParserNodeInfo* XMLCALL 1051 xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, 1052 const xmlNodePtr node); 1053 XMLPUBFUN void XMLCALL 1054 xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1055 XMLPUBFUN void XMLCALL 1056 xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); 1057 XMLPUBFUN unsigned long XMLCALL 1058 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, 1059 const xmlNodePtr node); 1060 XMLPUBFUN void XMLCALL 1061 xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, 1062 const xmlParserNodeInfoPtr info); 1063 1064 /* 1065 * External entities handling actually implemented in xmlIO. 1066 */ 1067 1068 XMLPUBFUN void XMLCALL 1069 xmlSetExternalEntityLoader(xmlExternalEntityLoader f); 1070 XMLPUBFUN xmlExternalEntityLoader XMLCALL 1071 xmlGetExternalEntityLoader(void); 1072 XMLPUBFUN xmlParserInputPtr XMLCALL 1073 xmlLoadExternalEntity (const char *URL, 1074 const char *ID, 1075 xmlParserCtxtPtr ctxt); 1076 1077 /* 1078 * Index lookup, actually implemented in the encoding module 1079 */ 1080 XMLPUBFUN long XMLCALL 1081 xmlByteConsumed (xmlParserCtxtPtr ctxt); 1082 1083 /* 1084 * New set of simpler/more flexible APIs 1085 */ 1086 /** 1087 * xmlParserOption: 1088 * 1089 * This is the set of XML parser options that can be passed down 1090 * to the xmlReadDoc() and similar calls. 1091 */ 1092 typedef enum { 1093 XML_PARSE_RECOVER = 1<<0, /* recover on errors */ 1094 XML_PARSE_NOENT = 1<<1, /* substitute entities */ 1095 XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */ 1096 XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */ 1097 XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */ 1098 XML_PARSE_NOERROR = 1<<5, /* suppress error reports */ 1099 XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */ 1100 XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ 1101 XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ 1102 XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */ 1103 XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitution */ 1104 XML_PARSE_NONET = 1<<11,/* Forbid network access */ 1105 XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionary */ 1106 XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ 1107 XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */ 1108 XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */ 1109 XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of 1110 the tree allowed afterwards (will possibly 1111 crash if you try to modify the tree) */ 1112 XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */ 1113 XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */ 1114 XML_PARSE_HUGE = 1<<19,/* relax any hardcoded limit from the parser */ 1115 XML_PARSE_OLDSAX = 1<<20,/* parse using SAX2 interface before 2.7.0 */ 1116 XML_PARSE_IGNORE_ENC= 1<<21,/* ignore internal document encoding hint */ 1117 XML_PARSE_BIG_LINES = 1<<22 /* Store big lines numbers in text PSVI field */ 1118 } xmlParserOption; 1119 1120 XMLPUBFUN void XMLCALL 1121 xmlCtxtReset (xmlParserCtxtPtr ctxt); 1122 XMLPUBFUN int XMLCALL 1123 xmlCtxtResetPush (xmlParserCtxtPtr ctxt, 1124 const char *chunk, 1125 int size, 1126 const char *filename, 1127 const char *encoding); 1128 XMLPUBFUN int XMLCALL 1129 xmlCtxtUseOptions (xmlParserCtxtPtr ctxt, 1130 int options); 1131 XMLPUBFUN xmlDocPtr XMLCALL 1132 xmlReadDoc (const xmlChar *cur, 1133 const char *URL, 1134 const char *encoding, 1135 int options); 1136 XMLPUBFUN xmlDocPtr XMLCALL 1137 xmlReadFile (const char *URL, 1138 const char *encoding, 1139 int options); 1140 XMLPUBFUN xmlDocPtr XMLCALL 1141 xmlReadMemory (const char *buffer, 1142 int size, 1143 const char *URL, 1144 const char *encoding, 1145 int options); 1146 XMLPUBFUN xmlDocPtr XMLCALL 1147 xmlReadFd (int fd, 1148 const char *URL, 1149 const char *encoding, 1150 int options); 1151 XMLPUBFUN xmlDocPtr XMLCALL 1152 xmlReadIO (xmlInputReadCallback ioread, 1153 xmlInputCloseCallback ioclose, 1154 void *ioctx, 1155 const char *URL, 1156 const char *encoding, 1157 int options); 1158 XMLPUBFUN xmlDocPtr XMLCALL 1159 xmlCtxtReadDoc (xmlParserCtxtPtr ctxt, 1160 const xmlChar *cur, 1161 const char *URL, 1162 const char *encoding, 1163 int options); 1164 XMLPUBFUN xmlDocPtr XMLCALL 1165 xmlCtxtReadFile (xmlParserCtxtPtr ctxt, 1166 const char *filename, 1167 const char *encoding, 1168 int options); 1169 XMLPUBFUN xmlDocPtr XMLCALL 1170 xmlCtxtReadMemory (xmlParserCtxtPtr ctxt, 1171 const char *buffer, 1172 int size, 1173 const char *URL, 1174 const char *encoding, 1175 int options); 1176 XMLPUBFUN xmlDocPtr XMLCALL 1177 xmlCtxtReadFd (xmlParserCtxtPtr ctxt, 1178 int fd, 1179 const char *URL, 1180 const char *encoding, 1181 int options); 1182 XMLPUBFUN xmlDocPtr XMLCALL 1183 xmlCtxtReadIO (xmlParserCtxtPtr ctxt, 1184 xmlInputReadCallback ioread, 1185 xmlInputCloseCallback ioclose, 1186 void *ioctx, 1187 const char *URL, 1188 const char *encoding, 1189 int options); 1190 1191 /* 1192 * Library wide options 1193 */ 1194 /** 1195 * xmlFeature: 1196 * 1197 * Used to examine the existence of features that can be enabled 1198 * or disabled at compile-time. 1199 * They used to be called XML_FEATURE_xxx but this clashed with Expat 1200 */ 1201 typedef enum { 1202 XML_WITH_THREAD = 1, 1203 XML_WITH_TREE = 2, 1204 XML_WITH_OUTPUT = 3, 1205 XML_WITH_PUSH = 4, 1206 XML_WITH_READER = 5, 1207 XML_WITH_PATTERN = 6, 1208 XML_WITH_WRITER = 7, 1209 XML_WITH_SAX1 = 8, 1210 XML_WITH_FTP = 9, 1211 XML_WITH_HTTP = 10, 1212 XML_WITH_VALID = 11, 1213 XML_WITH_HTML = 12, 1214 XML_WITH_LEGACY = 13, 1215 XML_WITH_C14N = 14, 1216 XML_WITH_CATALOG = 15, 1217 XML_WITH_XPATH = 16, 1218 XML_WITH_XPTR = 17, 1219 XML_WITH_XINCLUDE = 18, 1220 XML_WITH_ICONV = 19, 1221 XML_WITH_ISO8859X = 20, 1222 XML_WITH_UNICODE = 21, 1223 XML_WITH_REGEXP = 22, 1224 XML_WITH_AUTOMATA = 23, 1225 XML_WITH_EXPR = 24, 1226 XML_WITH_SCHEMAS = 25, 1227 XML_WITH_SCHEMATRON = 26, 1228 XML_WITH_MODULES = 27, 1229 XML_WITH_DEBUG = 28, 1230 XML_WITH_DEBUG_MEM = 29, 1231 XML_WITH_DEBUG_RUN = 30, 1232 XML_WITH_ZLIB = 31, 1233 XML_WITH_ICU = 32, 1234 XML_WITH_LZMA = 33, 1235 XML_WITH_NONE = 99999 /* just to be sure of allocation size */ 1236 } xmlFeature; 1237 1238 XMLPUBFUN int XMLCALL 1239 xmlHasFeature (xmlFeature feature); 1240 1241 #ifdef __cplusplus 1242 } 1243 #endif 1244 #endif /* __XML_PARSER_H__ */ 1245