1 /*
2  * Summary: the core parser module
3  * Description: Interfaces, constants and types related to the XML parser
4  *
5  * Copy: See Copyright for the status of this software.
6  *
7  * Author: Daniel Veillard
8  */
9 
10 #ifndef __XML_PARSER_H__
11 #define __XML_PARSER_H__
12 
13 #include <stdarg.h>
14 
15 #include "xmlversion.h"
16 #include "tree.h"
17 #include "dict.h"
18 #include "hash.h"
19 #include "valid.h"
20 #include "entities.h"
21 #include "xmlerror.h"
22 #include "xmlstring.h"
23 
24 #ifdef __cplusplus
25 extern "C" {
26 #endif
27 
28 /**
29  * XML_DEFAULT_VERSION:
30  *
31  * The default version of XML used: 1.0
32  */
33 #define XML_DEFAULT_VERSION     "1.0"
34 
35 /**
36  * xmlParserInput:
37  *
38  * An xmlParserInput is an input flow for the XML processor.
39  * Each entity parsed is associated an xmlParserInput (except the
40  * few predefined ones). This is the case both for internal entities
41  * - in which case the flow is already completely in memory - or
42  * external entities - in which case we use the buf structure for
43  * progressive reading and I18N conversions to the internal UTF-8 format.
44  */
45 
46 /**
47  * xmlParserInputDeallocate:
48  * @str:  the string to deallocate
49  *
50  * Callback for freeing some parser input allocations.
51  */
52 typedef void (* xmlParserInputDeallocate)(xmlChar *str);
53 
54 struct _xmlParserInput {
55     /* Input buffer */
56     xmlParserInputBufferPtr buf;      /* UTF-8 encoded buffer */
57 
58     const char *filename;             /* The file analyzed, if any */
59     const char *directory;            /* the directory/base of the file */
60     const xmlChar *base;              /* Base of the array to parse */
61     const xmlChar *cur;               /* Current char being parsed */
62     const xmlChar *end;               /* end of the array to parse */
63     int length;                       /* length if known */
64     int line;                         /* Current line */
65     int col;                          /* Current column */
66     /*
67      * NOTE: consumed is only tested for equality in the parser code,
68      *       so even if there is an overflow this should not give troubles
69      *       for parsing very large instances.
70      */
71     unsigned long consumed;           /* How many xmlChars already consumed */
72     xmlParserInputDeallocate free;    /* function to deallocate the base */
73     const xmlChar *encoding;          /* the encoding string for entity */
74     const xmlChar *version;           /* the version string for entity */
75     int standalone;                   /* Was that entity marked standalone */
76     int id;                           /* an unique identifier for the entity */
77 };
78 
79 /**
80  * xmlParserNodeInfo:
81  *
82  * The parser can be asked to collect Node informations, i.e. at what
83  * place in the file they were detected.
84  * NOTE: This is off by default and not very well tested.
85  */
86 typedef struct _xmlParserNodeInfo xmlParserNodeInfo;
87 typedef xmlParserNodeInfo *xmlParserNodeInfoPtr;
88 
89 struct _xmlParserNodeInfo {
90   const struct _xmlNode* node;
91   /* Position & line # that text that created the node begins & ends on */
92   unsigned long begin_pos;
93   unsigned long begin_line;
94   unsigned long end_pos;
95   unsigned long end_line;
96 };
97 
98 typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
99 typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
100 struct _xmlParserNodeInfoSeq {
101   unsigned long maximum;
102   unsigned long length;
103   xmlParserNodeInfo* buffer;
104 };
105 
106 /**
107  * xmlParserInputState:
108  *
109  * The parser is now working also as a state based parser.
110  * The recursive one use the state info for entities processing.
111  */
112 typedef enum {
113     XML_PARSER_EOF = -1,        /* nothing is to be parsed */
114     XML_PARSER_START = 0,       /* nothing has been parsed */
115     XML_PARSER_MISC,            /* Misc* before int subset */
116     XML_PARSER_PI,              /* Within a processing instruction */
117     XML_PARSER_DTD,             /* within some DTD content */
118     XML_PARSER_PROLOG,          /* Misc* after internal subset */
119     XML_PARSER_COMMENT,         /* within a comment */
120     XML_PARSER_START_TAG,       /* within a start tag */
121     XML_PARSER_CONTENT,         /* within the content */
122     XML_PARSER_CDATA_SECTION,   /* within a CDATA section */
123     XML_PARSER_END_TAG,         /* within a closing tag */
124     XML_PARSER_ENTITY_DECL,     /* within an entity declaration */
125     XML_PARSER_ENTITY_VALUE,    /* within an entity value in a decl */
126     XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */
127     XML_PARSER_SYSTEM_LITERAL,  /* within a SYSTEM value */
128     XML_PARSER_EPILOG,          /* the Misc* after the last end tag */
129     XML_PARSER_IGNORE,          /* within an IGNORED section */
130     XML_PARSER_PUBLIC_LITERAL   /* within a PUBLIC value */
131 } xmlParserInputState;
132 
133 /**
134  * XML_DETECT_IDS:
135  *
136  * Bit in the loadsubset context field to tell to do ID/REFs lookups.
137  * Use it to initialize xmlLoadExtDtdDefaultValue.
138  */
139 #define XML_DETECT_IDS          2
140 
141 /**
142  * XML_COMPLETE_ATTRS:
143  *
144  * Bit in the loadsubset context field to tell to do complete the
145  * elements attributes lists with the ones defaulted from the DTDs.
146  * Use it to initialize xmlLoadExtDtdDefaultValue.
147  */
148 #define XML_COMPLETE_ATTRS      4
149 
150 /**
151  * XML_SKIP_IDS:
152  *
153  * Bit in the loadsubset context field to tell to not do ID/REFs registration.
154  * Used to initialize xmlLoadExtDtdDefaultValue in some special cases.
155  */
156 #define XML_SKIP_IDS            8
157 
158 /**
159  * xmlParserMode:
160  *
161  * A parser can operate in various modes
162  */
163 typedef enum {
164     XML_PARSE_UNKNOWN = 0,
165     XML_PARSE_DOM = 1,
166     XML_PARSE_SAX = 2,
167     XML_PARSE_PUSH_DOM = 3,
168     XML_PARSE_PUSH_SAX = 4,
169     XML_PARSE_READER = 5
170 } xmlParserMode;
171 
172 /**
173  * xmlParserCtxt:
174  *
175  * The parser context.
176  * NOTE This doesn't completely define the parser state, the (current ?)
177  *      design of the parser uses recursive function calls since this allow
178  *      and easy mapping from the production rules of the specification
179  *      to the actual code. The drawback is that the actual function call
180  *      also reflect the parser state. However most of the parsing routines
181  *      takes as the only argument the parser context pointer, so migrating
182  *      to a state based parser for progressive parsing shouldn't be too hard.
183  */
184 struct _xmlParserCtxt {
185     struct _xmlSAXHandler *sax;       /* The SAX handler */
186     void            *userData;        /* For SAX interface only, used by DOM build */
187     xmlDocPtr           myDoc;        /* the document being built */
188     int            wellFormed;        /* is the document well formed */
189     int       replaceEntities;        /* shall we replace entities ? */
190     const xmlChar    *version;        /* the XML version string */
191     const xmlChar   *encoding;        /* the declared encoding, if any */
192     int            standalone;        /* standalone document */
193     int                  html;        /* an HTML(1)/Docbook(2) document */
194 
195     /* Input stream stack */
196     xmlParserInputPtr  input;         /* Current input stream */
197     int                inputNr;       /* Number of current input streams */
198     int                inputMax;      /* Max number of input streams */
199     xmlParserInputPtr *inputTab;      /* stack of inputs */
200 
201     /* Node analysis stack only used for DOM building */
202     xmlNodePtr         node;          /* Current parsed Node */
203     int                nodeNr;        /* Depth of the parsing stack */
204     int                nodeMax;       /* Max depth of the parsing stack */
205     xmlNodePtr        *nodeTab;       /* array of nodes */
206 
207     int record_info;                  /* Whether node info should be kept */
208     xmlParserNodeInfoSeq node_seq;    /* info about each node parsed */
209 
210     int errNo;                        /* error code */
211 
212     int     hasExternalSubset;        /* reference and external subset */
213     int             hasPErefs;        /* the internal subset has PE refs */
214     int              external;        /* are we parsing an external entity */
215 
216     int                 valid;        /* is the document valid */
217     int              validate;        /* shall we try to validate ? */
218     xmlValidCtxt        vctxt;        /* The validity context */
219 
220     xmlParserInputState instate;      /* current type of input */
221     int                 token;        /* next char look-ahead */
222 
223     char           *directory;        /* the data directory */
224 
225     /* Node name stack */
226     const xmlChar     *name;          /* Current parsed Node */
227     int                nameNr;        /* Depth of the parsing stack */
228     int                nameMax;       /* Max depth of the parsing stack */
229     const xmlChar *   *nameTab;       /* array of nodes */
230 
231     long               nbChars;       /* number of xmlChar processed */
232     long            checkIndex;       /* used by progressive parsing lookup */
233     int             keepBlanks;       /* ugly but ... */
234     int             disableSAX;       /* SAX callbacks are disabled */
235     int               inSubset;       /* Parsing is in int 1/ext 2 subset */
236     const xmlChar *    intSubName;    /* name of subset */
237     xmlChar *          extSubURI;     /* URI of external subset */
238     xmlChar *          extSubSystem;  /* SYSTEM ID of external subset */
239 
240     /* xml:space values */
241     int *              space;         /* Should the parser preserve spaces */
242     int                spaceNr;       /* Depth of the parsing stack */
243     int                spaceMax;      /* Max depth of the parsing stack */
244     int *              spaceTab;      /* array of space infos */
245 
246     int                depth;         /* to prevent entity substitution loops */
247     xmlParserInputPtr  entity;        /* used to check entities boundaries */
248     int                charset;       /* encoding of the in-memory content
249                                          actually an xmlCharEncoding */
250     int                nodelen;       /* Those two fields are there to */
251     int                nodemem;       /* Speed up large node parsing */
252     int                pedantic;      /* signal pedantic warnings */
253     void              *_private;      /* For user data, libxml won't touch it */
254 
255     int                loadsubset;    /* should the external subset be loaded */
256     int                linenumbers;   /* set line number in element content */
257     void              *catalogs;      /* document's own catalog */
258     int                recovery;      /* run in recovery mode */
259     int                progressive;   /* is this a progressive parsing */
260     xmlDictPtr         dict;          /* dictionnary for the parser */
261     const xmlChar *   *atts;          /* array for the attributes callbacks */
262     int                maxatts;       /* the size of the array */
263     int                docdict;       /* use strings from dict to build tree */
264 
265     /*
266      * pre-interned strings
267      */
268     const xmlChar *str_xml;
269     const xmlChar *str_xmlns;
270     const xmlChar *str_xml_ns;
271 
272     /*
273      * Everything below is used only by the new SAX mode
274      */
275     int                sax2;          /* operating in the new SAX mode */
276     int                nsNr;          /* the number of inherited namespaces */
277     int                nsMax;         /* the size of the arrays */
278     const xmlChar *   *nsTab;         /* the array of prefix/namespace name */
279     int               *attallocs;     /* which attribute were allocated */
280     void *            *pushTab;       /* array of data for push */
281     xmlHashTablePtr    attsDefault;   /* defaulted attributes if any */
282     xmlHashTablePtr    attsSpecial;   /* non-CDATA attributes if any */
283     int                nsWellFormed;  /* is the document XML Nanespace okay */
284     int                options;       /* Extra options */
285 
286     /*
287      * Those fields are needed only for treaming parsing so far
288      */
289     int               dictNames;    /* Use dictionary names for the tree */
290     int               freeElemsNr;  /* number of freed element nodes */
291     xmlNodePtr        freeElems;    /* List of freed element nodes */
292     int               freeAttrsNr;  /* number of freed attributes nodes */
293     xmlAttrPtr        freeAttrs;    /* List of freed attributes nodes */
294 
295     /*
296      * the complete error informations for the last error.
297      */
298     xmlError          lastError;
299     xmlParserMode     parseMode;    /* the parser mode */
300 };
301 
302 /**
303  * xmlSAXLocator:
304  *
305  * A SAX Locator.
306  */
307 struct _xmlSAXLocator {
308     const xmlChar *(*getPublicId)(void *ctx);
309     const xmlChar *(*getSystemId)(void *ctx);
310     int (*getLineNumber)(void *ctx);
311     int (*getColumnNumber)(void *ctx);
312 };
313 
314 /**
315  * xmlSAXHandler:
316  *
317  * A SAX handler is bunch of callbacks called by the parser when processing
318  * of the input generate data or structure informations.
319  */
320 
321 /**
322  * resolveEntitySAXFunc:
323  * @ctx:  the user data (XML parser context)
324  * @publicId: The public ID of the entity
325  * @systemId: The system ID of the entity
326  *
327  * Callback:
328  * The entity loader, to control the loading of external entities,
329  * the application can either:
330  *    - override this resolveEntity() callback in the SAX block
331  *    - or better use the xmlSetExternalEntityLoader() function to
332  *      set up it's own entity resolution routine
333  *
334  * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour.
335  */
336 typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
337                                 const xmlChar *publicId,
338                                 const xmlChar *systemId);
339 /**
340  * internalSubsetSAXFunc:
341  * @ctx:  the user data (XML parser context)
342  * @name:  the root element name
343  * @ExternalID:  the external ID
344  * @SystemID:  the SYSTEM ID (e.g. filename or URL)
345  *
346  * Callback on internal subset declaration.
347  */
348 typedef void (*internalSubsetSAXFunc) (void *ctx,
349                                 const xmlChar *name,
350                                 const xmlChar *ExternalID,
351                                 const xmlChar *SystemID);
352 /**
353  * externalSubsetSAXFunc:
354  * @ctx:  the user data (XML parser context)
355  * @name:  the root element name
356  * @ExternalID:  the external ID
357  * @SystemID:  the SYSTEM ID (e.g. filename or URL)
358  *
359  * Callback on external subset declaration.
360  */
361 typedef void (*externalSubsetSAXFunc) (void *ctx,
362                                 const xmlChar *name,
363                                 const xmlChar *ExternalID,
364                                 const xmlChar *SystemID);
365 /**
366  * getEntitySAXFunc:
367  * @ctx:  the user data (XML parser context)
368  * @name: The entity name
369  *
370  * Get an entity by name.
371  *
372  * Returns the xmlEntityPtr if found.
373  */
374 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
375                                 const xmlChar *name);
376 /**
377  * getParameterEntitySAXFunc:
378  * @ctx:  the user data (XML parser context)
379  * @name: The entity name
380  *
381  * Get a parameter entity by name.
382  *
383  * Returns the xmlEntityPtr if found.
384  */
385 typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
386                                 const xmlChar *name);
387 /**
388  * entityDeclSAXFunc:
389  * @ctx:  the user data (XML parser context)
390  * @name:  the entity name
391  * @type:  the entity type
392  * @publicId: The public ID of the entity
393  * @systemId: The system ID of the entity
394  * @content: the entity value (without processing).
395  *
396  * An entity definition has been parsed.
397  */
398 typedef void (*entityDeclSAXFunc) (void *ctx,
399                                 const xmlChar *name,
400                                 int type,
401                                 const xmlChar *publicId,
402                                 const xmlChar *systemId,
403                                 xmlChar *content);
404 /**
405  * notationDeclSAXFunc:
406  * @ctx:  the user data (XML parser context)
407  * @name: The name of the notation
408  * @publicId: The public ID of the entity
409  * @systemId: The system ID of the entity
410  *
411  * What to do when a notation declaration has been parsed.
412  */
413 typedef void (*notationDeclSAXFunc)(void *ctx,
414                                 const xmlChar *name,
415                                 const xmlChar *publicId,
416                                 const xmlChar *systemId);
417 /**
418  * attributeDeclSAXFunc:
419  * @ctx:  the user data (XML parser context)
420  * @elem:  the name of the element
421  * @fullname:  the attribute name
422  * @type:  the attribute type
423  * @def:  the type of default value
424  * @defaultValue: the attribute default value
425  * @tree:  the tree of enumerated value set
426  *
427  * An attribute definition has been parsed.
428  */
429 typedef void (*attributeDeclSAXFunc)(void *ctx,
430                                 const xmlChar *elem,
431                                 const xmlChar *fullname,
432                                 int type,
433                                 int def,
434                                 const xmlChar *defaultValue,
435                                 xmlEnumerationPtr tree);
436 /**
437  * elementDeclSAXFunc:
438  * @ctx:  the user data (XML parser context)
439  * @name:  the element name
440  * @type:  the element type
441  * @content: the element value tree
442  *
443  * An element definition has been parsed.
444  */
445 typedef void (*elementDeclSAXFunc)(void *ctx,
446                                 const xmlChar *name,
447                                 int type,
448                                 xmlElementContentPtr content);
449 /**
450  * unparsedEntityDeclSAXFunc:
451  * @ctx:  the user data (XML parser context)
452  * @name: The name of the entity
453  * @publicId: The public ID of the entity
454  * @systemId: The system ID of the entity
455  * @notationName: the name of the notation
456  *
457  * What to do when an unparsed entity declaration is parsed.
458  */
459 typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
460                                 const xmlChar *name,
461                                 const xmlChar *publicId,
462                                 const xmlChar *systemId,
463                                 const xmlChar *notationName);
464 /**
465  * setDocumentLocatorSAXFunc:
466  * @ctx:  the user data (XML parser context)
467  * @loc: A SAX Locator
468  *
469  * Receive the document locator at startup, actually xmlDefaultSAXLocator.
470  * Everything is available on the context, so this is useless in our case.
471  */
472 typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
473                                 xmlSAXLocatorPtr loc);
474 /**
475  * startDocumentSAXFunc:
476  * @ctx:  the user data (XML parser context)
477  *
478  * Called when the document start being processed.
479  */
480 typedef void (*startDocumentSAXFunc) (void *ctx);
481 /**
482  * endDocumentSAXFunc:
483  * @ctx:  the user data (XML parser context)
484  *
485  * Called when the document end has been detected.
486  */
487 typedef void (*endDocumentSAXFunc) (void *ctx);
488 /**
489  * startElementSAXFunc:
490  * @ctx:  the user data (XML parser context)
491  * @name:  The element name, including namespace prefix
492  * @atts:  An array of name/value attributes pairs, NULL terminated
493  *
494  * Called when an opening tag has been processed.
495  */
496 typedef void (*startElementSAXFunc) (void *ctx,
497                                 const xmlChar *name,
498                                 const xmlChar **atts);
499 /**
500  * endElementSAXFunc:
501  * @ctx:  the user data (XML parser context)
502  * @name:  The element name
503  *
504  * Called when the end of an element has been detected.
505  */
506 typedef void (*endElementSAXFunc) (void *ctx,
507                                 const xmlChar *name);
508 /**
509  * attributeSAXFunc:
510  * @ctx:  the user data (XML parser context)
511  * @name:  The attribute name, including namespace prefix
512  * @value:  The attribute value
513  *
514  * Handle an attribute that has been read by the parser.
515  * The default handling is to convert the attribute into an
516  * DOM subtree and past it in a new xmlAttr element added to
517  * the element.
518  */
519 typedef void (*attributeSAXFunc) (void *ctx,
520                                 const xmlChar *name,
521                                 const xmlChar *value);
522 /**
523  * referenceSAXFunc:
524  * @ctx:  the user data (XML parser context)
525  * @name:  The entity name
526  *
527  * Called when an entity reference is detected.
528  */
529 typedef void (*referenceSAXFunc) (void *ctx,
530                                 const xmlChar *name);
531 /**
532  * charactersSAXFunc:
533  * @ctx:  the user data (XML parser context)
534  * @ch:  a xmlChar string
535  * @len: the number of xmlChar
536  *
537  * Receiving some chars from the parser.
538  */
539 typedef void (*charactersSAXFunc) (void *ctx,
540                                 const xmlChar *ch,
541                                 int len);
542 /**
543  * ignorableWhitespaceSAXFunc:
544  * @ctx:  the user data (XML parser context)
545  * @ch:  a xmlChar string
546  * @len: the number of xmlChar
547  *
548  * Receiving some ignorable whitespaces from the parser.
549  * UNUSED: by default the DOM building will use characters.
550  */
551 typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
552                                 const xmlChar *ch,
553                                 int len);
554 /**
555  * processingInstructionSAXFunc:
556  * @ctx:  the user data (XML parser context)
557  * @target:  the target name
558  * @data: the PI data's
559  *
560  * A processing instruction has been parsed.
561  */
562 typedef void (*processingInstructionSAXFunc) (void *ctx,
563                                 const xmlChar *target,
564                                 const xmlChar *data);
565 /**
566  * commentSAXFunc:
567  * @ctx:  the user data (XML parser context)
568  * @value:  the comment content
569  *
570  * A comment has been parsed.
571  */
572 typedef void (*commentSAXFunc) (void *ctx,
573                                 const xmlChar *value);
574 /**
575  * cdataBlockSAXFunc:
576  * @ctx:  the user data (XML parser context)
577  * @value:  The pcdata content
578  * @len:  the block length
579  *
580  * Called when a pcdata block has been parsed.
581  */
582 typedef void (*cdataBlockSAXFunc) (
583                                 void *ctx,
584                                 const xmlChar *value,
585                                 int len);
586 /**
587  * warningSAXFunc:
588  * @ctx:  an XML parser context
589  * @msg:  the message to display/transmit
590  * @...:  extra parameters for the message display
591  *
592  * Display and format a warning messages, callback.
593  */
594 typedef void (XMLCDECL *warningSAXFunc) (void *ctx,
595                                 const char *msg, ...);
596 /**
597  * errorSAXFunc:
598  * @ctx:  an XML parser context
599  * @msg:  the message to display/transmit
600  * @...:  extra parameters for the message display
601  *
602  * Display and format an error messages, callback.
603  */
604 typedef void (XMLCDECL *errorSAXFunc) (void *ctx,
605                                 const char *msg, ...);
606 /**
607  * fatalErrorSAXFunc:
608  * @ctx:  an XML parser context
609  * @msg:  the message to display/transmit
610  * @...:  extra parameters for the message display
611  *
612  * Display and format fatal error messages, callback.
613  * Note: so far fatalError() SAX callbacks are not used, error()
614  *       get all the callbacks for errors.
615  */
616 typedef void (XMLCDECL *fatalErrorSAXFunc) (void *ctx,
617                                 const char *msg, ...);
618 /**
619  * isStandaloneSAXFunc:
620  * @ctx:  the user data (XML parser context)
621  *
622  * Is this document tagged standalone?
623  *
624  * Returns 1 if true
625  */
626 typedef int (*isStandaloneSAXFunc) (void *ctx);
627 /**
628  * hasInternalSubsetSAXFunc:
629  * @ctx:  the user data (XML parser context)
630  *
631  * Does this document has an internal subset.
632  *
633  * Returns 1 if true
634  */
635 typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
636 
637 /**
638  * hasExternalSubsetSAXFunc:
639  * @ctx:  the user data (XML parser context)
640  *
641  * Does this document has an external subset?
642  *
643  * Returns 1 if true
644  */
645 typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
646 
647 /************************************************************************
648  *                                                                      *
649  *                      The SAX version 2 API extensions                *
650  *                                                                      *
651  ************************************************************************/
652 /**
653  * XML_SAX2_MAGIC:
654  *
655  * Special constant found in SAX2 blocks initialized fields
656  */
657 #define XML_SAX2_MAGIC 0xDEEDBEAF
658 
659 /**
660  * startElementNsSAX2Func:
661  * @ctx:  the user data (XML parser context)
662  * @localname:  the local name of the element
663  * @prefix:  the element namespace prefix if available
664  * @URI:  the element namespace name if available
665  * @nb_namespaces:  number of namespace definitions on that node
666  * @namespaces:  pointer to the array of prefix/URI pairs namespace definitions
667  * @nb_attributes:  the number of attributes on that node
668  * @nb_defaulted:  the number of defaulted attributes. The defaulted
669  *                  ones are at the end of the array
670  * @attributes:  pointer to the array of (localname/prefix/URI/value/end)
671  *               attribute values.
672  *
673  * SAX2 callback when an element start has been detected by the parser.
674  * It provides the namespace informations for the element, as well as
675  * the new namespace declarations on the element.
676  */
677 
678 typedef void (*startElementNsSAX2Func) (void *ctx,
679                                         const xmlChar *localname,
680                                         const xmlChar *prefix,
681                                         const xmlChar *URI,
682                                         int nb_namespaces,
683                                         const xmlChar **namespaces,
684                                         int nb_attributes,
685                                         int nb_defaulted,
686                                         const xmlChar **attributes);
687 
688 /**
689  * endElementNsSAX2Func:
690  * @ctx:  the user data (XML parser context)
691  * @localname:  the local name of the element
692  * @prefix:  the element namespace prefix if available
693  * @URI:  the element namespace name if available
694  *
695  * SAX2 callback when an element end has been detected by the parser.
696  * It provides the namespace informations for the element.
697  */
698 
699 typedef void (*endElementNsSAX2Func)   (void *ctx,
700                                         const xmlChar *localname,
701                                         const xmlChar *prefix,
702                                         const xmlChar *URI);
703 
704 
705 struct _xmlSAXHandler {
706     internalSubsetSAXFunc internalSubset;
707     isStandaloneSAXFunc isStandalone;
708     hasInternalSubsetSAXFunc hasInternalSubset;
709     hasExternalSubsetSAXFunc hasExternalSubset;
710     resolveEntitySAXFunc resolveEntity;
711     getEntitySAXFunc getEntity;
712     entityDeclSAXFunc entityDecl;
713     notationDeclSAXFunc notationDecl;
714     attributeDeclSAXFunc attributeDecl;
715     elementDeclSAXFunc elementDecl;
716     unparsedEntityDeclSAXFunc unparsedEntityDecl;
717     setDocumentLocatorSAXFunc setDocumentLocator;
718     startDocumentSAXFunc startDocument;
719     endDocumentSAXFunc endDocument;
720     startElementSAXFunc startElement;
721     endElementSAXFunc endElement;
722     referenceSAXFunc reference;
723     charactersSAXFunc characters;
724     ignorableWhitespaceSAXFunc ignorableWhitespace;
725     processingInstructionSAXFunc processingInstruction;
726     commentSAXFunc comment;
727     warningSAXFunc warning;
728     errorSAXFunc error;
729     fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
730     getParameterEntitySAXFunc getParameterEntity;
731     cdataBlockSAXFunc cdataBlock;
732     externalSubsetSAXFunc externalSubset;
733     unsigned int initialized;
734     /* The following fields are extensions available only on version 2 */
735     void *_private;
736     startElementNsSAX2Func startElementNs;
737     endElementNsSAX2Func endElementNs;
738     xmlStructuredErrorFunc serror;
739 };
740 
741 /*
742  * SAX Version 1
743  */
744 typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1;
745 typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr;
746 struct _xmlSAXHandlerV1 {
747     internalSubsetSAXFunc internalSubset;
748     isStandaloneSAXFunc isStandalone;
749     hasInternalSubsetSAXFunc hasInternalSubset;
750     hasExternalSubsetSAXFunc hasExternalSubset;
751     resolveEntitySAXFunc resolveEntity;
752     getEntitySAXFunc getEntity;
753     entityDeclSAXFunc entityDecl;
754     notationDeclSAXFunc notationDecl;
755     attributeDeclSAXFunc attributeDecl;
756     elementDeclSAXFunc elementDecl;
757     unparsedEntityDeclSAXFunc unparsedEntityDecl;
758     setDocumentLocatorSAXFunc setDocumentLocator;
759     startDocumentSAXFunc startDocument;
760     endDocumentSAXFunc endDocument;
761     startElementSAXFunc startElement;
762     endElementSAXFunc endElement;
763     referenceSAXFunc reference;
764     charactersSAXFunc characters;
765     ignorableWhitespaceSAXFunc ignorableWhitespace;
766     processingInstructionSAXFunc processingInstruction;
767     commentSAXFunc comment;
768     warningSAXFunc warning;
769     errorSAXFunc error;
770     fatalErrorSAXFunc fatalError; /* unused error() get all the errors */
771     getParameterEntitySAXFunc getParameterEntity;
772     cdataBlockSAXFunc cdataBlock;
773     externalSubsetSAXFunc externalSubset;
774     unsigned int initialized;
775 };
776 
777 
778 /**
779  * xmlExternalEntityLoader:
780  * @URL: The System ID of the resource requested
781  * @ID: The Public ID of the resource requested
782  * @context: the XML parser context
783  *
784  * External entity loaders types.
785  *
786  * Returns the entity input parser.
787  */
788 typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL,
789                                          const char *ID,
790                                          xmlParserCtxtPtr context);
791 
792 #ifdef __cplusplus
793 }
794 #endif
795 
796 #include "encoding.h"
797 #include "xmlIO.h"
798 #include "globals.h"
799 
800 #ifdef __cplusplus
801 extern "C" {
802 #endif
803 
804 
805 /*
806  * Init/Cleanup
807  */
808 XMLPUBFUN void XMLCALL
809                 xmlInitParser           (void);
810 XMLPUBFUN void XMLCALL
811                 xmlCleanupParser        (void);
812 
813 /*
814  * Input functions
815  */
816 XMLPUBFUN int XMLCALL
817                 xmlParserInputRead      (xmlParserInputPtr in,
818                                          int len);
819 XMLPUBFUN int XMLCALL
820                 xmlParserInputGrow      (xmlParserInputPtr in,
821                                          int len);
822 
823 /*
824  * Basic parsing Interfaces
825  */
826 #ifdef LIBXML_SAX1_ENABLED
827 XMLPUBFUN xmlDocPtr XMLCALL
828                 xmlParseDoc             (const xmlChar *cur);
829 XMLPUBFUN xmlDocPtr XMLCALL
830                 xmlParseFile            (const char *filename);
831 XMLPUBFUN xmlDocPtr XMLCALL
832                 xmlParseMemory          (const char *buffer,
833                                          int size);
834 #endif /* LIBXML_SAX1_ENABLED */
835 XMLPUBFUN int XMLCALL
836                 xmlSubstituteEntitiesDefault(int val);
837 XMLPUBFUN int XMLCALL
838                 xmlKeepBlanksDefault    (int val);
839 XMLPUBFUN void XMLCALL
840                 xmlStopParser           (xmlParserCtxtPtr ctxt);
841 XMLPUBFUN int XMLCALL
842                 xmlPedanticParserDefault(int val);
843 XMLPUBFUN int XMLCALL
844                 xmlLineNumbersDefault   (int val);
845 
846 #ifdef LIBXML_SAX1_ENABLED
847 /*
848  * Recovery mode
849  */
850 XMLPUBFUN xmlDocPtr XMLCALL
851                 xmlRecoverDoc           (xmlChar *cur);
852 XMLPUBFUN xmlDocPtr XMLCALL
853                 xmlRecoverMemory        (const char *buffer,
854                                          int size);
855 XMLPUBFUN xmlDocPtr XMLCALL
856                 xmlRecoverFile          (const char *filename);
857 #endif /* LIBXML_SAX1_ENABLED */
858 
859 /*
860  * Less common routines and SAX interfaces
861  */
862 XMLPUBFUN int XMLCALL
863                 xmlParseDocument        (xmlParserCtxtPtr ctxt);
864 XMLPUBFUN int XMLCALL
865                 xmlParseExtParsedEnt    (xmlParserCtxtPtr ctxt);
866 #ifdef LIBXML_SAX1_ENABLED
867 XMLPUBFUN int XMLCALL
868                 xmlSAXUserParseFile     (xmlSAXHandlerPtr sax,
869                                          void *user_data,
870                                          const char *filename);
871 XMLPUBFUN int XMLCALL
872                 xmlSAXUserParseMemory   (xmlSAXHandlerPtr sax,
873                                          void *user_data,
874                                          const char *buffer,
875                                          int size);
876 XMLPUBFUN xmlDocPtr XMLCALL
877                 xmlSAXParseDoc          (xmlSAXHandlerPtr sax,
878                                          const xmlChar *cur,
879                                          int recovery);
880 XMLPUBFUN xmlDocPtr XMLCALL
881                 xmlSAXParseMemory       (xmlSAXHandlerPtr sax,
882                                          const char *buffer,
883                                          int size,
884                                          int recovery);
885 XMLPUBFUN xmlDocPtr XMLCALL
886                 xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax,
887                                          const char *buffer,
888                                          int size,
889                                          int recovery,
890                                          void *data);
891 XMLPUBFUN xmlDocPtr XMLCALL
892                 xmlSAXParseFile         (xmlSAXHandlerPtr sax,
893                                          const char *filename,
894                                          int recovery);
895 XMLPUBFUN xmlDocPtr XMLCALL
896                 xmlSAXParseFileWithData (xmlSAXHandlerPtr sax,
897                                          const char *filename,
898                                          int recovery,
899                                          void *data);
900 XMLPUBFUN xmlDocPtr XMLCALL
901                 xmlSAXParseEntity       (xmlSAXHandlerPtr sax,
902                                          const char *filename);
903 XMLPUBFUN xmlDocPtr XMLCALL
904                 xmlParseEntity          (const char *filename);
905 #endif /* LIBXML_SAX1_ENABLED */
906 
907 #ifdef LIBXML_VALID_ENABLED
908 XMLPUBFUN xmlDtdPtr XMLCALL
909                 xmlSAXParseDTD          (xmlSAXHandlerPtr sax,
910                                          const xmlChar *ExternalID,
911                                          const xmlChar *SystemID);
912 XMLPUBFUN xmlDtdPtr XMLCALL
913                 xmlParseDTD             (const xmlChar *ExternalID,
914                                          const xmlChar *SystemID);
915 XMLPUBFUN xmlDtdPtr XMLCALL
916                 xmlIOParseDTD           (xmlSAXHandlerPtr sax,
917                                          xmlParserInputBufferPtr input,
918                                          xmlCharEncoding enc);
919 #endif /* LIBXML_VALID_ENABLE */
920 #ifdef LIBXML_SAX1_ENABLED
921 XMLPUBFUN int XMLCALL
922                 xmlParseBalancedChunkMemory(xmlDocPtr doc,
923                                          xmlSAXHandlerPtr sax,
924                                          void *user_data,
925                                          int depth,
926                                          const xmlChar *string,
927                                          xmlNodePtr *lst);
928 #endif /* LIBXML_SAX1_ENABLED */
929 XMLPUBFUN xmlParserErrors XMLCALL
930                 xmlParseInNodeContext   (xmlNodePtr node,
931                                          const char *data,
932                                          int datalen,
933                                          int options,
934                                          xmlNodePtr *lst);
935 #ifdef LIBXML_SAX1_ENABLED
936 XMLPUBFUN int XMLCALL
937                 xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc,
938                      xmlSAXHandlerPtr sax,
939                      void *user_data,
940                      int depth,
941                      const xmlChar *string,
942                      xmlNodePtr *lst,
943                      int recover);
944 XMLPUBFUN int XMLCALL
945                 xmlParseExternalEntity  (xmlDocPtr doc,
946                                          xmlSAXHandlerPtr sax,
947                                          void *user_data,
948                                          int depth,
949                                          const xmlChar *URL,
950                                          const xmlChar *ID,
951                                          xmlNodePtr *lst);
952 #endif /* LIBXML_SAX1_ENABLED */
953 XMLPUBFUN int XMLCALL
954                 xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx,
955                                          const xmlChar *URL,
956                                          const xmlChar *ID,
957                                          xmlNodePtr *lst);
958 
959 /*
960  * Parser contexts handling.
961  */
962 XMLPUBFUN xmlParserCtxtPtr XMLCALL
963                 xmlNewParserCtxt        (void);
964 XMLPUBFUN int XMLCALL
965                 xmlInitParserCtxt       (xmlParserCtxtPtr ctxt);
966 XMLPUBFUN void XMLCALL
967                 xmlClearParserCtxt      (xmlParserCtxtPtr ctxt);
968 XMLPUBFUN void XMLCALL
969                 xmlFreeParserCtxt       (xmlParserCtxtPtr ctxt);
970 #ifdef LIBXML_SAX1_ENABLED
971 XMLPUBFUN void XMLCALL
972                 xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt,
973                                          const xmlChar* buffer,
974                                          const char *filename);
975 #endif /* LIBXML_SAX1_ENABLED */
976 XMLPUBFUN xmlParserCtxtPtr XMLCALL
977                 xmlCreateDocParserCtxt  (const xmlChar *cur);
978 
979 #ifdef LIBXML_LEGACY_ENABLED
980 /*
981  * Reading/setting optional parsing features.
982  */
983 XMLPUBFUN int XMLCALL
984                 xmlGetFeaturesList      (int *len,
985                                          const char **result);
986 XMLPUBFUN int XMLCALL
987                 xmlGetFeature           (xmlParserCtxtPtr ctxt,
988                                          const char *name,
989                                          void *result);
990 XMLPUBFUN int XMLCALL
991                 xmlSetFeature           (xmlParserCtxtPtr ctxt,
992                                          const char *name,
993                                          void *value);
994 #endif /* LIBXML_LEGACY_ENABLED */
995 
996 #ifdef LIBXML_PUSH_ENABLED
997 /*
998  * Interfaces for the Push mode.
999  */
1000 XMLPUBFUN xmlParserCtxtPtr XMLCALL
1001                 xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax,
1002                                          void *user_data,
1003                                          const char *chunk,
1004                                          int size,
1005                                          const char *filename);
1006 XMLPUBFUN int XMLCALL
1007                 xmlParseChunk           (xmlParserCtxtPtr ctxt,
1008                                          const char *chunk,
1009                                          int size,
1010                                          int terminate);
1011 #endif /* LIBXML_PUSH_ENABLED */
1012 
1013 /*
1014  * Special I/O mode.
1015  */
1016 
1017 XMLPUBFUN xmlParserCtxtPtr XMLCALL
1018                 xmlCreateIOParserCtxt   (xmlSAXHandlerPtr sax,
1019                                          void *user_data,
1020                                          xmlInputReadCallback   ioread,
1021                                          xmlInputCloseCallback  ioclose,
1022                                          void *ioctx,
1023                                          xmlCharEncoding enc);
1024 
1025 XMLPUBFUN xmlParserInputPtr XMLCALL
1026                 xmlNewIOInputStream     (xmlParserCtxtPtr ctxt,
1027                                          xmlParserInputBufferPtr input,
1028                                          xmlCharEncoding enc);
1029 
1030 /*
1031  * Node infos.
1032  */
1033 XMLPUBFUN const xmlParserNodeInfo* XMLCALL
1034                 xmlParserFindNodeInfo   (const xmlParserCtxtPtr ctxt,
1035                                          const xmlNodePtr node);
1036 XMLPUBFUN void XMLCALL
1037                 xmlInitNodeInfoSeq      (xmlParserNodeInfoSeqPtr seq);
1038 XMLPUBFUN void XMLCALL
1039                 xmlClearNodeInfoSeq     (xmlParserNodeInfoSeqPtr seq);
1040 XMLPUBFUN unsigned long XMLCALL
1041                 xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
1042                                          const xmlNodePtr node);
1043 XMLPUBFUN void XMLCALL
1044                 xmlParserAddNodeInfo    (xmlParserCtxtPtr ctxt,
1045                                          const xmlParserNodeInfoPtr info);
1046 
1047 /*
1048  * External entities handling actually implemented in xmlIO.
1049  */
1050 
1051 XMLPUBFUN void XMLCALL
1052                 xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
1053 XMLPUBFUN xmlExternalEntityLoader XMLCALL
1054                 xmlGetExternalEntityLoader(void);
1055 XMLPUBFUN xmlParserInputPtr XMLCALL
1056                 xmlLoadExternalEntity   (const char *URL,
1057                                          const char *ID,
1058                                          xmlParserCtxtPtr ctxt);
1059 
1060 /*
1061  * Index lookup, actually implemented in the encoding module
1062  */
1063 XMLPUBFUN long XMLCALL
1064                 xmlByteConsumed         (xmlParserCtxtPtr ctxt);
1065 
1066 /*
1067  * New set of simpler/more flexible APIs
1068  */
1069 /**
1070  * xmlParserOption:
1071  *
1072  * This is the set of XML parser options that can be passed down
1073  * to the xmlReadDoc() and similar calls.
1074  */
1075 typedef enum {
1076     XML_PARSE_RECOVER   = 1<<0, /* recover on errors */
1077     XML_PARSE_NOENT     = 1<<1, /* substitute entities */
1078     XML_PARSE_DTDLOAD   = 1<<2, /* load the external subset */
1079     XML_PARSE_DTDATTR   = 1<<3, /* default DTD attributes */
1080     XML_PARSE_DTDVALID  = 1<<4, /* validate with the DTD */
1081     XML_PARSE_NOERROR   = 1<<5, /* suppress error reports */
1082     XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */
1083     XML_PARSE_PEDANTIC  = 1<<7, /* pedantic error reporting */
1084     XML_PARSE_NOBLANKS  = 1<<8, /* remove blank nodes */
1085     XML_PARSE_SAX1      = 1<<9, /* use the SAX1 interface internally */
1086     XML_PARSE_XINCLUDE  = 1<<10,/* Implement XInclude substitition  */
1087     XML_PARSE_NONET     = 1<<11,/* Forbid network access */
1088     XML_PARSE_NODICT    = 1<<12,/* Do not reuse the context dictionnary */
1089     XML_PARSE_NSCLEAN   = 1<<13,/* remove redundant namespaces declarations */
1090     XML_PARSE_NOCDATA   = 1<<14,/* merge CDATA as text nodes */
1091     XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */
1092     XML_PARSE_COMPACT   = 1<<16 /* compact small text nodes; no modification of
1093                                    the tree allowed afterwards (will possibly
1094                                    crash if you try to modify the tree) */
1095 } xmlParserOption;
1096 
1097 XMLPUBFUN void XMLCALL
1098                 xmlCtxtReset            (xmlParserCtxtPtr ctxt);
1099 XMLPUBFUN int XMLCALL
1100                 xmlCtxtResetPush        (xmlParserCtxtPtr ctxt,
1101                                          const char *chunk,
1102                                          int size,
1103                                          const char *filename,
1104                                          const char *encoding);
1105 XMLPUBFUN int XMLCALL
1106                 xmlCtxtUseOptions       (xmlParserCtxtPtr ctxt,
1107                                          int options);
1108 XMLPUBFUN xmlDocPtr XMLCALL
1109                 xmlReadDoc              (const xmlChar *cur,
1110                                          const char *URL,
1111                                          const char *encoding,
1112                                          int options);
1113 XMLPUBFUN xmlDocPtr XMLCALL
1114                 xmlReadFile             (const char *URL,
1115                                          const char *encoding,
1116                                          int options);
1117 XMLPUBFUN xmlDocPtr XMLCALL
1118                 xmlReadMemory           (const char *buffer,
1119                                          int size,
1120                                          const char *URL,
1121                                          const char *encoding,
1122                                          int options);
1123 XMLPUBFUN xmlDocPtr XMLCALL
1124                 xmlReadFd               (int fd,
1125                                          const char *URL,
1126                                          const char *encoding,
1127                                          int options);
1128 XMLPUBFUN xmlDocPtr XMLCALL
1129                 xmlReadIO               (xmlInputReadCallback ioread,
1130                                          xmlInputCloseCallback ioclose,
1131                                          void *ioctx,
1132                                          const char *URL,
1133                                          const char *encoding,
1134                                          int options);
1135 XMLPUBFUN xmlDocPtr XMLCALL
1136                 xmlCtxtReadDoc          (xmlParserCtxtPtr ctxt,
1137                                          const xmlChar *cur,
1138                                          const char *URL,
1139                                          const char *encoding,
1140                                          int options);
1141 XMLPUBFUN xmlDocPtr XMLCALL
1142                 xmlCtxtReadFile         (xmlParserCtxtPtr ctxt,
1143                                          const char *filename,
1144                                          const char *encoding,
1145                                          int options);
1146 XMLPUBFUN xmlDocPtr XMLCALL
1147                 xmlCtxtReadMemory               (xmlParserCtxtPtr ctxt,
1148                                          const char *buffer,
1149                                          int size,
1150                                          const char *URL,
1151                                          const char *encoding,
1152                                          int options);
1153 XMLPUBFUN xmlDocPtr XMLCALL
1154                 xmlCtxtReadFd           (xmlParserCtxtPtr ctxt,
1155                                          int fd,
1156                                          const char *URL,
1157                                          const char *encoding,
1158                                          int options);
1159 XMLPUBFUN xmlDocPtr XMLCALL
1160                 xmlCtxtReadIO           (xmlParserCtxtPtr ctxt,
1161                                          xmlInputReadCallback ioread,
1162                                          xmlInputCloseCallback ioclose,
1163                                          void *ioctx,
1164                                          const char *URL,
1165                                          const char *encoding,
1166                                          int options);
1167 
1168 /*
1169  * Library wide options
1170  */
1171 /**
1172  * xmlFeature:
1173  *
1174  * Used to examine the existance of features that can be enabled
1175  * or disabled at compile-time.
1176  * They used to be called XML_FEATURE_xxx but this clashed with Expat
1177  */
1178 typedef enum {
1179     XML_WITH_THREAD = 1,
1180     XML_WITH_TREE = 2,
1181     XML_WITH_OUTPUT = 3,
1182     XML_WITH_PUSH = 4,
1183     XML_WITH_READER = 5,
1184     XML_WITH_PATTERN = 6,
1185     XML_WITH_WRITER = 7,
1186     XML_WITH_SAX1 = 8,
1187     XML_WITH_FTP = 9,
1188     XML_WITH_HTTP = 10,
1189     XML_WITH_VALID = 11,
1190     XML_WITH_HTML = 12,
1191     XML_WITH_LEGACY = 13,
1192     XML_WITH_C14N = 14,
1193     XML_WITH_CATALOG = 15,
1194     XML_WITH_XPATH = 16,
1195     XML_WITH_XPTR = 17,
1196     XML_WITH_XINCLUDE = 18,
1197     XML_WITH_ICONV = 19,
1198     XML_WITH_ISO8859X = 20,
1199     XML_WITH_UNICODE = 21,
1200     XML_WITH_REGEXP = 22,
1201     XML_WITH_AUTOMATA = 23,
1202     XML_WITH_EXPR = 24,
1203     XML_WITH_SCHEMAS = 25,
1204     XML_WITH_SCHEMATRON = 26,
1205     XML_WITH_MODULES = 27,
1206     XML_WITH_DEBUG = 28,
1207     XML_WITH_DEBUG_MEM = 29,
1208     XML_WITH_DEBUG_RUN = 30,
1209     XML_WITH_ZLIB = 31,
1210     XML_WITH_NONE = 99999 /* just to be sure of allocation size */
1211 } xmlFeature;
1212 
1213 XMLPUBFUN int XMLCALL
1214                 xmlHasFeature           (xmlFeature feature);
1215 
1216 #ifdef __cplusplus
1217 }
1218 #endif
1219 #endif /* __XML_PARSER_H__ */
1220 
1221