1 /* -*-             c-basic-offset: 4; indent-tabs-mode: nil; -*-  //------100-columns-wide------>|*/
2 // for license please see accompanying LICENSE.txt file (available also at http://www.xmlpull.org/)
3 
4 package org.xmlpull.v1;
5 
6 import java.io.InputStream;
7 import java.io.IOException;
8 import java.io.Reader;
9 
10 /**
11  * XML Pull Parser is an interface that defines parsing functionality provided
12  * in <a href="http://www.xmlpull.org/">XMLPULL V1 API</a> (visit this website to
13  * learn more about API and its implementations).
14  *
15  * <p>There are following different
16  * kinds of parser depending on which features are set:<ul>
17  * <li><b>non-validating</b> parser as defined in XML 1.0 spec when
18  *   FEATURE_PROCESS_DOCDECL is set to true
19  * <li><b>validating parser</b> as defined in XML 1.0 spec when
20  *   FEATURE_VALIDATION is true (and that implies that FEATURE_PROCESS_DOCDECL is true)
21  * <li>when FEATURE_PROCESS_DOCDECL is false (this is default and
22  *   if different value is required necessary must be changed before parsing is started)
23  *   then parser behaves like XML 1.0 compliant non-validating parser under condition that
24  *  <em>no DOCDECL is present</em> in XML documents
25  *   (internal entites can still be defined with defineEntityReplacementText()).
26  *   This mode of operation is intended <b>for operation in constrained environments</b> such as J2ME.
27  * </ul>
28  *
29  *
30  * <p>There are two key methods: next() and nextToken(). While next() provides
31  * access to high level parsing events, nextToken() allows access to lower
32  * level tokens.
33  *
34  * <p>The current event state of the parser
35  * can be determined by calling the
36  * <a href="#getEventType()">getEventType()</a> method.
37  * Initially, the parser is in the <a href="#START_DOCUMENT">START_DOCUMENT</a>
38  * state.
39  *
40  * <p>The method <a href="#next()">next()</a> advances the parser to the
41  * next event. The int value returned from next determines the current parser
42  * state and is identical to the value returned from following calls to
43  * getEventType ().
44  *
45  * <p>Th following event types are seen by next()<dl>
46  * <dt><a href="#START_TAG">START_TAG</a><dd> An XML start tag was read.
47  * <dt><a href="#TEXT">TEXT</a><dd> Text content was read;
48  * the text content can be retrieved using the getText() method.
49  *  (when in validating mode next() will not report ignorable whitespace, use nextToken() instead)
50  * <dt><a href="#END_TAG">END_TAG</a><dd> An end tag was read
51  * <dt><a href="#END_DOCUMENT">END_DOCUMENT</a><dd> No more events are available
52  * </dl>
53  *
54  * <p>after first next() or nextToken() (or any other next*() method)
55  * is called user application can obtain
56  * XML version, standalone and encoding from XML declaration
57  * in following ways:<ul>
58  * <li><b>version</b>:
59  *  getProperty(&quot;<a href="http://xmlpull.org/v1/doc/properties.html#xmldecl-version">http://xmlpull.org/v1/doc/properties.html#xmldecl-version</a>&quot;)
60  *       returns String ("1.0") or null if XMLDecl was not read or if property is not supported
61  * <li><b>standalone</b>:
62  *  getProperty(&quot;<a href="http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone">http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone</a>&quot;)
63  *       returns Boolean: null if there was no standalone declaration
64  *  or if property is not supported
65  *         otherwise returns Boolean(true) if standalone="yes" and Boolean(false) when standalone="no"
66  * <li><b>encoding</b>: obtained from getInputEncoding()
67  *       null if stream had unknown encoding (not set in setInputStream)
68  *           and it was not declared in XMLDecl
69  * </ul>
70  *
71  * A minimal example for using this API may look as follows:
72  * <pre>
73  * import java.io.IOException;
74  * import java.io.StringReader;
75  *
76  * import org.xmlpull.v1.XmlPullParser;
77  * import org.xmlpull.v1.<a href="XmlPullParserException.html">XmlPullParserException</a>;
78  * import org.xmlpull.v1.<a href="XmlPullParserFactory.html">XmlPullParserFactory</a>;
79  *
80  * public class SimpleXmlPullApp
81  * {
82  *
83  *     public static void main (String args[])
84  *         throws XmlPullParserException, IOException
85  *     {
86  *         XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
87  *         factory.setNamespaceAware(true);
88  *         XmlPullParser xpp = factory.newPullParser();
89  *
90  *         xpp.<a href="#setInput">setInput</a>( new StringReader ( "&lt;foo>Hello World!&lt;/foo>" ) );
91  *         int eventType = xpp.getEventType();
92  *         while (eventType != XmlPullParser.END_DOCUMENT) {
93  *          if(eventType == XmlPullParser.START_DOCUMENT) {
94  *              System.out.println("Start document");
95  *          } else if(eventType == XmlPullParser.START_TAG) {
96  *              System.out.println("Start tag "+xpp.<a href="#getName()">getName()</a>);
97  *          } else if(eventType == XmlPullParser.END_TAG) {
98  *              System.out.println("End tag "+xpp.getName());
99  *          } else if(eventType == XmlPullParser.TEXT) {
100  *              System.out.println("Text "+xpp.<a href="#getText()">getText()</a>);
101  *          }
102  *          eventType = xpp.next();
103  *         }
104  *         System.out.println("End document");
105  *     }
106  * }
107  * </pre>
108  *
109  * <p>The above example will generate the following output:
110  * <pre>
111  * Start document
112  * Start tag foo
113  * Text Hello World!
114  * End tag foo
115  * </pre>
116  *
117  * <p>For more details on API usage, please refer to the
118  * quick Introduction available at <a href="http://www.xmlpull.org">http://www.xmlpull.org</a>
119  *
120  * @see XmlPullParserFactory
121  * @see #defineEntityReplacementText
122  * @see #getName
123  * @see #getNamespace
124  * @see #getText
125  * @see #next
126  * @see #nextToken
127  * @see #setInput
128  * @see #FEATURE_PROCESS_DOCDECL
129  * @see #FEATURE_VALIDATION
130  * @see #START_DOCUMENT
131  * @see #START_TAG
132  * @see #TEXT
133  * @see #END_TAG
134  * @see #END_DOCUMENT
135  *
136  * @author <a href="http://www-ai.cs.uni-dortmund.de/PERSONAL/haustein.html">Stefan Haustein</a>
137  * @author <a href="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a>
138  */
139 
140 public interface XmlPullParser {
141 
142     /** This constant represents the default namespace (empty string "") */
143     String NO_NAMESPACE = "";
144 
145     // ----------------------------------------------------------------------------
146     // EVENT TYPES as reported by next()
147 
148     /**
149      * Signalize that parser is at the very beginning of the document
150      * and nothing was read yet.
151      * This event type can only be observed by calling getEvent()
152      * before the first call to next(), nextToken, or nextTag()</a>).
153      *
154      * @see #next
155      * @see #nextToken
156      */
157     int START_DOCUMENT = 0;
158 
159     /**
160      * Logical end of the xml document. Returned from getEventType, next()
161      * and nextToken()
162      * when the end of the input document has been reached.
163      * <p><strong>NOTE:</strong> calling again
164      * <a href="#next()">next()</a> or <a href="#nextToken()">nextToken()</a>
165      * will result in exception being thrown.
166      *
167      * @see #next
168      * @see #nextToken
169      */
170     int END_DOCUMENT = 1;
171 
172     /**
173      * Returned from getEventType(),
174      * <a href="#next()">next()</a>, <a href="#nextToken()">nextToken()</a> when
175      * a start tag was read.
176      * The name of start tag is available from getName(), its namespace and prefix are
177      * available from getNamespace() and getPrefix()
178      * if <a href='#FEATURE_PROCESS_NAMESPACES'>namespaces are enabled</a>.
179      * See getAttribute* methods to retrieve element attributes.
180      * See getNamespace* methods to retrieve newly declared namespaces.
181      *
182      * @see #next
183      * @see #nextToken
184      * @see #getName
185      * @see #getPrefix
186      * @see #getNamespace
187      * @see #getAttributeCount
188      * @see #getDepth
189      * @see #getNamespaceCount
190      * @see #getNamespace
191      * @see #FEATURE_PROCESS_NAMESPACES
192      */
193     int START_TAG = 2;
194 
195     /**
196      * Returned from getEventType(), <a href="#next()">next()</a>, or
197      * <a href="#nextToken()">nextToken()</a> when an end tag was read.
198      * The name of start tag is available from getName(), its
199      * namespace and prefix are
200      * available from getNamespace() and getPrefix().
201      *
202      * @see #next
203      * @see #nextToken
204      * @see #getName
205      * @see #getPrefix
206      * @see #getNamespace
207      * @see #FEATURE_PROCESS_NAMESPACES
208      */
209     int END_TAG = 3;
210 
211 
212     /**
213      * Character data was read and will is available by calling getText().
214      * <p><strong>Please note:</strong> <a href="#next()">next()</a> will
215      * accumulate multiple
216      * events into one TEXT event, skipping IGNORABLE_WHITESPACE,
217      * PROCESSING_INSTRUCTION and COMMENT events,
218      * In contrast, <a href="#nextToken()">nextToken()</a> will stop reading
219      * text when any other event is observed.
220      * Also, when the state was reached by calling next(), the text value will
221      * be normalized, whereas getText() will
222      * return unnormalized content in the case of nextToken(). This allows
223      * an exact roundtrip without changing line ends when examining low
224      * level events, whereas for high level applications the text is
225      * normalized appropriately.
226      *
227      * @see #next
228      * @see #nextToken
229      * @see #getText
230      */
231     int TEXT = 4;
232 
233     // ----------------------------------------------------------------------------
234     // additional events exposed by lower level nextToken()
235 
236     /**
237      * A CDATA sections was just read;
238      * this token is available only from calls to <a href="#nextToken()">nextToken()</a>.
239      * A call to next() will accumulate various text events into a single event
240      * of type TEXT. The text contained in the CDATA section is available
241      * by calling getText().
242      *
243      * @see #nextToken
244      * @see #getText
245      */
246     int CDSECT = 5;
247 
248     /**
249      * An entity reference was just read;
250      * this token is available from <a href="#nextToken()">nextToken()</a>
251      * only. The entity name is available by calling getName(). If available,
252      * the replacement text can be obtained by calling getText(); otherwise,
253      * the user is responsible for resolving the entity reference.
254      * This event type is never returned from next(); next() will
255      * accumulate the replacement text and other text
256      * events to a single TEXT event.
257      *
258      * @see #nextToken
259      * @see #getText
260      */
261     int ENTITY_REF = 6;
262 
263     /**
264      * Ignorable whitespace was just read.
265      * This token is available only from <a href="#nextToken()">nextToken()</a>).
266      * For non-validating
267      * parsers, this event is only reported by nextToken() when outside
268      * the root element.
269      * Validating parsers may be able to detect ignorable whitespace at
270      * other locations.
271      * The ignorable whitespace string is available by calling getText()
272      *
273      * <p><strong>NOTE:</strong> this is different from calling the
274      *  isWhitespace() method, since text content
275      *  may be whitespace but not ignorable.
276      *
277      * Ignorable whitespace is skipped by next() automatically; this event
278      * type is never returned from next().
279      *
280      * @see #nextToken
281      * @see #getText
282      */
283     int IGNORABLE_WHITESPACE = 7;
284 
285     /**
286      * An XML processing instruction declaration was just read. This
287      * event type is available only via <a href="#nextToken()">nextToken()</a>.
288      * getText() will return text that is inside the processing instruction.
289      * Calls to next() will skip processing instructions automatically.
290      * @see #nextToken
291      * @see #getText
292      */
293     int PROCESSING_INSTRUCTION = 8;
294 
295     /**
296      * An XML comment was just read. This event type is this token is
297      * available via <a href="#nextToken()">nextToken()</a> only;
298      * calls to next() will skip comments automatically.
299      * The content of the comment can be accessed using the getText()
300      * method.
301      *
302      * @see #nextToken
303      * @see #getText
304      */
305     int COMMENT = 9;
306 
307     /**
308      * An XML document type declaration was just read. This token is
309      * available from <a href="#nextToken()">nextToken()</a> only.
310      * The unparsed text inside the doctype is available via
311      * the getText() method.
312      *
313      * @see #nextToken
314      * @see #getText
315      */
316     int DOCDECL = 10;
317 
318     /**
319      * This array can be used to convert the event type integer constants
320      * such as START_TAG or TEXT to
321      * to a string. For example, the value of TYPES[START_TAG] is
322      * the string "START_TAG".
323      *
324      * This array is intended for diagnostic output only. Relying
325      * on the contents of the array may be dangerous since malicious
326      * applications may alter the array, although it is final, due
327      * to limitations of the Java language.
328      */
329     String [] TYPES = {
330         "START_DOCUMENT",
331             "END_DOCUMENT",
332             "START_TAG",
333             "END_TAG",
334             "TEXT",
335             "CDSECT",
336             "ENTITY_REF",
337             "IGNORABLE_WHITESPACE",
338             "PROCESSING_INSTRUCTION",
339             "COMMENT",
340             "DOCDECL"
341     };
342 
343 
344     // ----------------------------------------------------------------------------
345     // namespace related features
346 
347     /**
348      * This feature determines whether the parser processes
349      * namespaces. As for all features, the default value is false.
350      * <p><strong>NOTE:</strong> The value can not be changed during
351      * parsing an must be set before parsing.
352      *
353      * @see #getFeature
354      * @see #setFeature
355      */
356     String FEATURE_PROCESS_NAMESPACES =
357         "http://xmlpull.org/v1/doc/features.html#process-namespaces";
358 
359     /**
360      * This feature determines whether namespace attributes are
361      * exposed via the attribute access methods. Like all features,
362      * the default value is false. This feature cannot be changed
363      * during parsing.
364      *
365      * @see #getFeature
366      * @see #setFeature
367      */
368     String FEATURE_REPORT_NAMESPACE_ATTRIBUTES =
369         "http://xmlpull.org/v1/doc/features.html#report-namespace-prefixes";
370 
371     /**
372      * This feature determines whether the document declaration
373      * is processed. If set to false,
374      * the DOCDECL event type is reported by nextToken()
375      * and ignored by next().
376      *
377      * If this feature is activated, then the document declaration
378      * must be processed by the parser.
379      *
380      * <p><strong>Please note:</strong> If the document type declaration
381      * was ignored, entity references may cause exceptions
382      * later in the parsing process.
383      * The default value of this feature is false. It cannot be changed
384      * during parsing.
385      *
386      * @see #getFeature
387      * @see #setFeature
388      */
389     String FEATURE_PROCESS_DOCDECL =
390         "http://xmlpull.org/v1/doc/features.html#process-docdecl";
391 
392     /**
393      * If this feature is activated, all validation errors as
394      * defined in the XML 1.0 specification are reported.
395      * This implies that FEATURE_PROCESS_DOCDECL is true and both, the
396      * internal and external document type declaration will be processed.
397      * <p><strong>Please Note:</strong> This feature can not be changed
398      * during parsing. The default value is false.
399      *
400      * @see #getFeature
401      * @see #setFeature
402      */
403     String FEATURE_VALIDATION =
404         "http://xmlpull.org/v1/doc/features.html#validation";
405 
406     /**
407      * Use this call to change the general behaviour of the parser,
408      * such as namespace processing or doctype declaration handling.
409      * This method must be called before the first call to next or
410      * nextToken. Otherwise, an exception is thrown.
411      * <p>Example: call setFeature(FEATURE_PROCESS_NAMESPACES, true) in order
412      * to switch on namespace processing. The initial settings correspond
413      * to the properties requested from the XML Pull Parser factory.
414      * If none were requested, all features are deactivated by default.
415      *
416      * @exception XmlPullParserException If the feature is not supported or can not be set
417      * @exception IllegalArgumentException If string with the feature name is null
418      */
setFeature(String name, boolean state)419     void setFeature(String name,
420                            boolean state) throws XmlPullParserException;
421 
422     /**
423      * Returns the current value of the given feature.
424      * <p><strong>Please note:</strong> unknown features are
425      * <strong>always</strong> returned as false.
426      *
427      * @param name The name of feature to be retrieved.
428      * @return The value of the feature.
429      * @exception IllegalArgumentException if string the feature name is null
430      */
431 
getFeature(String name)432     boolean getFeature(String name);
433 
434     /**
435      * Set the value of a property.
436      *
437      * The property name is any fully-qualified URI.
438      *
439      * @exception XmlPullParserException If the property is not supported or can not be set
440      * @exception IllegalArgumentException If string with the property name is null
441      */
setProperty(String name, Object value)442     void setProperty(String name,
443                             Object value) throws XmlPullParserException;
444 
445     /**
446      * Look up the value of a property.
447      *
448      * The property name is any fully-qualified URI.
449      * <p><strong>NOTE:</strong> unknown properties are <strong>always</strong>
450      * returned as null.
451      *
452      * @param name The name of property to be retrieved.
453      * @return The value of named property.
454      */
getProperty(String name)455     Object getProperty(String name);
456 
457 
458     /**
459      * Set the input source for parser to the given reader and
460      * resets the parser. The event type is set to the initial value
461      * START_DOCUMENT.
462      * Setting the reader to null will just stop parsing and
463      * reset parser state,
464      * allowing the parser to free internal resources
465      * such as parsing buffers.
466      */
setInput(Reader in)467     void setInput(Reader in) throws XmlPullParserException;
468 
469 
470     /**
471      * Sets the input stream the parser is going to process.
472      * This call resets the parser state and sets the event type
473      * to the initial value START_DOCUMENT.
474      *
475      * <p><strong>NOTE:</strong> If an input encoding string is passed,
476      *  it MUST be used. Otherwise,
477      *  if inputEncoding is null, the parser SHOULD try to determine
478      *  input encoding following XML 1.0 specification (see below).
479      *  If encoding detection is supported then following feature
480      *  <a href="http://xmlpull.org/v1/doc/features.html#detect-encoding">http://xmlpull.org/v1/doc/features.html#detect-encoding</a>
481      *  MUST be true amd otherwise it must be false
482      *
483      * @param inputStream contains a raw byte input stream of possibly
484      *     unknown encoding (when inputEncoding is null).
485      *
486      * @param inputEncoding if not null it MUST be used as encoding for inputStream
487      */
setInput(InputStream inputStream, String inputEncoding)488     void setInput(InputStream inputStream, String inputEncoding)
489         throws XmlPullParserException;
490 
491     /**
492      * Returns the input encoding if known, null otherwise.
493      * If setInput(InputStream, inputEncoding) was called with an inputEncoding
494      * value other than null, this value must be returned
495      * from this method. Otherwise, if inputEncoding is null and
496      * the parser supports the encoding detection feature
497      * (http://xmlpull.org/v1/doc/features.html#detect-encoding),
498      * it must return the detected encoding.
499      * If setInput(Reader) was called, null is returned.
500      * After first call to next if XML declaration was present this method
501      * will return encoding declared.
502      */
getInputEncoding()503     String getInputEncoding();
504 
505     /**
506      * Set new value for entity replacement text as defined in
507      * <a href="http://www.w3.org/TR/REC-xml#intern-replacement">XML 1.0 Section 4.5
508      * Construction of Internal Entity Replacement Text</a>.
509      * If FEATURE_PROCESS_DOCDECL or FEATURE_VALIDATION are set, calling this
510      * function will result in an exception -- when processing of DOCDECL is
511      * enabled, there is no need to the entity replacement text manually.
512      *
513      * <p>The motivation for this function is to allow very small
514      * implementations of XMLPULL that will work in J2ME environments.
515      * Though these implementations may not be able to process the document type
516      * declaration, they still can work with known DTDs by using this function.
517      *
518      * <p><b>Please notes:</b> The given value is used literally as replacement text
519      * and it corresponds to declaring entity in DTD that has all special characters
520      * escaped: left angle bracket is replaced with &amp;lt;, ampersand with &amp;amp;
521      * and so on.
522      *
523      * <p><b>Note:</b> The given value is the literal replacement text and must not
524      * contain any other entity reference (if it contains any entity reference
525      * there will be no further replacement).
526      *
527      * <p><b>Note:</b> The list of pre-defined entity names will
528      * always contain standard XML entities such as
529      * amp (&amp;amp;), lt (&amp;lt;), gt (&amp;gt;), quot (&amp;quot;), and apos (&amp;apos;).
530      * Those cannot be redefined by this method!
531      *
532      * @see #setInput
533      * @see #FEATURE_PROCESS_DOCDECL
534      * @see #FEATURE_VALIDATION
535      */
defineEntityReplacementText( String entityName, String replacementText )536     void defineEntityReplacementText( String entityName,
537                                             String replacementText ) throws XmlPullParserException;
538 
539     /**
540      * Returns the numbers of elements in the namespace stack for the given
541      * depth.
542      * If namespaces are not enabled, 0 is returned.
543      *
544      * <p><b>NOTE:</b> when parser is on END_TAG then it is allowed to call
545      *  this function with getDepth()+1 argument to retrieve position of namespace
546      *  prefixes and URIs that were declared on corresponding START_TAG.
547      * <p><b>NOTE:</b> to retrieve list of namespaces declared in current element:<pre>
548      *       XmlPullParser pp = ...
549      *       int nsStart = pp.getNamespaceCount(pp.getDepth()-1);
550      *       int nsEnd = pp.getNamespaceCount(pp.getDepth());
551      *       for (int i = nsStart; i < nsEnd; i++) {
552      *          String prefix = pp.getNamespacePrefix(i);
553      *          String ns = pp.getNamespaceUri(i);
554      *           // ...
555      *      }
556      * </pre>
557      *
558      * @see #getNamespacePrefix
559      * @see #getNamespaceUri
560      * @see #getNamespace()
561      * @see #getNamespace(String)
562      */
getNamespaceCount(int depth)563     int getNamespaceCount(int depth) throws XmlPullParserException;
564 
565     /**
566      * Returns the namespace prefix for the given position
567      * in the namespace stack.
568      * Default namespace declaration (xmlns='...') will have null as prefix.
569      * If the given index is out of range, an exception is thrown.
570      * <p><b>Please note:</b> when the parser is on an END_TAG,
571      * namespace prefixes that were declared
572      * in the corresponding START_TAG are still accessible
573      * although they are no longer in scope.
574      */
getNamespacePrefix(int pos)575     String getNamespacePrefix(int pos) throws XmlPullParserException;
576 
577     /**
578      * Returns the namespace URI for the given position in the
579      * namespace stack
580      * If the position is out of range, an exception is thrown.
581      * <p><b>NOTE:</b> when parser is on END_TAG then namespace prefixes that were declared
582      *  in corresponding START_TAG are still accessible even though they are not in scope
583      */
getNamespaceUri(int pos)584     String getNamespaceUri(int pos) throws XmlPullParserException;
585 
586     /**
587      * Returns the URI corresponding to the given prefix,
588      * depending on current state of the parser.
589      *
590      * <p>If the prefix was not declared in the current scope,
591      * null is returned. The default namespace is included
592      * in the namespace table and is available via
593      * getNamespace (null).
594      *
595      * <p>This method is a convenience method for
596      *
597      * <pre>
598      *  for (int i = getNamespaceCount(getDepth ())-1; i >= 0; i--) {
599      *   if (getNamespacePrefix(i).equals( prefix )) {
600      *     return getNamespaceUri(i);
601      *   }
602      *  }
603      *  return null;
604      * </pre>
605      *
606      * <p><strong>Please note:</strong> parser implementations
607      * may provide more efficient lookup, e.g. using a Hashtable.
608      * The 'xml' prefix is bound to "http://www.w3.org/XML/1998/namespace", as
609      * defined in the
610      * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
611      * specification. Analogous, the 'xmlns' prefix is resolved to
612      * <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
613      *
614      * @see #getNamespaceCount
615      * @see #getNamespacePrefix
616      * @see #getNamespaceUri
617      */
getNamespace(String prefix)618     String getNamespace (String prefix);
619 
620 
621     // --------------------------------------------------------------------------
622     // miscellaneous reporting methods
623 
624     /**
625      * Returns the current depth of the element.
626      * Outside the root element, the depth is 0. The
627      * depth is incremented by 1 when a start tag is reached.
628      * The depth is decremented AFTER the end tag
629      * event was observed.
630      *
631      * <pre>
632      * &lt;!-- outside --&gt;     0
633      * &lt;root>                  1
634      *   sometext                 1
635      *     &lt;foobar&gt;         2
636      *     &lt;/foobar&gt;        2
637      * &lt;/root&gt;              1
638      * &lt;!-- outside --&gt;     0
639      * </pre>
640      */
getDepth()641     int getDepth();
642 
643     /**
644      * Returns a short text describing the current parser state, including
645      * the position, a
646      * description of the current event and the data source if known.
647      * This method is especially useful to provide meaningful
648      * error messages and for debugging purposes.
649      */
getPositionDescription()650     String getPositionDescription ();
651 
652 
653     /**
654      * Returns the current line number, starting from 1.
655      * When the parser does not know the current line number
656      * or can not determine it,  -1 is returned (e.g. for WBXML).
657      *
658      * @return current line number or -1 if unknown.
659      */
getLineNumber()660     int getLineNumber();
661 
662     /**
663      * Returns the current column number, starting from 0.
664      * When the parser does not know the current column number
665      * or can not determine it,  -1 is returned (e.g. for WBXML).
666      *
667      * @return current column number or -1 if unknown.
668      */
getColumnNumber()669     int getColumnNumber();
670 
671 
672     // --------------------------------------------------------------------------
673     // TEXT related methods
674 
675     /**
676      * Checks whether the current TEXT event contains only whitespace
677      * characters.
678      * For IGNORABLE_WHITESPACE, this is always true.
679      * For TEXT and CDSECT, false is returned when the current event text
680      * contains at least one non-white space character. For any other
681      * event type an exception is thrown.
682      *
683      * <p><b>Please note:</b> non-validating parsers are not
684      * able to distinguish whitespace and ignorable whitespace,
685      * except from whitespace outside the root element. Ignorable
686      * whitespace is reported as separate event, which is exposed
687      * via nextToken only.
688      *
689      */
isWhitespace()690     boolean isWhitespace() throws XmlPullParserException;
691 
692     /**
693      * Returns the text content of the current event as String.
694      * The value returned depends on current event type,
695      * for example for TEXT event it is element content
696      * (this is typical case when next() is used).
697      *
698      * See description of nextToken() for detailed description of
699      * possible returned values for different types of events.
700      *
701      * <p><strong>NOTE:</strong> in case of ENTITY_REF, this method returns
702      * the entity replacement text (or null if not available). This is
703      * the only case where
704      * getText() and getTextCharacters() return different values.
705      *
706      * @see #getEventType
707      * @see #next
708      * @see #nextToken
709      */
getText()710     String getText ();
711 
712 
713     /**
714      * Returns the buffer that contains the text of the current event,
715      * as well as the start offset and length relevant for the current
716      * event. See getText(), next() and nextToken() for description of possible returned values.
717      *
718      * <p><strong>Please note:</strong> this buffer must not
719      * be modified and its content MAY change after a call to
720      * next() or nextToken(). This method will always return the
721      * same value as getText(), except for ENTITY_REF. In the case
722      * of ENTITY ref, getText() returns the replacement text and
723      * this method returns the actual input buffer containing the
724      * entity name.
725      * If getText() returns null, this method returns null as well and
726      * the values returned in the holder array MUST be -1 (both start
727      * and length).
728      *
729      * @see #getText
730      * @see #next
731      * @see #nextToken
732      *
733      * @param holderForStartAndLength Must hold an 2-element int array
734      * into which the start offset and length values will be written.
735      * @return char buffer that contains the text of the current event
736      *  (null if the current event has no text associated).
737      */
getTextCharacters(int [] holderForStartAndLength)738     char[] getTextCharacters(int [] holderForStartAndLength);
739 
740     // --------------------------------------------------------------------------
741     // START_TAG / END_TAG shared methods
742 
743     /**
744      * Returns the namespace URI of the current element.
745      * The default namespace is represented
746      * as empty string.
747      * If namespaces are not enabled, an empty String ("") is always returned.
748      * The current event must be START_TAG or END_TAG; otherwise,
749      * null is returned.
750      */
getNamespace()751     String getNamespace ();
752 
753     /**
754      * For START_TAG or END_TAG events, the (local) name of the current
755      * element is returned when namespaces are enabled. When namespace
756      * processing is disabled, the raw name is returned.
757      * For ENTITY_REF events, the entity name is returned.
758      * If the current event is not START_TAG, END_TAG, or ENTITY_REF,
759      * null is returned.
760      * <p><b>Please note:</b> To reconstruct the raw element name
761      *  when namespaces are enabled and the prefix is not null,
762      * you will need to  add the prefix and a colon to localName..
763      *
764      */
getName()765     String getName();
766 
767     /**
768      * Returns the prefix of the current element.
769      * If the element is in the default namespace (has no prefix),
770      * null is returned.
771      * If namespaces are not enabled, or the current event
772      * is not  START_TAG or END_TAG, null is returned.
773      */
getPrefix()774     String getPrefix();
775 
776     /**
777      * Returns true if the current event is START_TAG and the tag
778      * is degenerated
779      * (e.g. &lt;foobar/&gt;).
780      * <p><b>NOTE:</b> if the parser is not on START_TAG, an exception
781      * will be thrown.
782      */
isEmptyElementTag()783     boolean isEmptyElementTag() throws XmlPullParserException;
784 
785     // --------------------------------------------------------------------------
786     // START_TAG Attributes retrieval methods
787 
788     /**
789      * Returns the number of attributes of the current start tag, or
790      * -1 if the current event type is not START_TAG
791      *
792      * @see #getAttributeNamespace
793      * @see #getAttributeName
794      * @see #getAttributePrefix
795      * @see #getAttributeValue
796      */
getAttributeCount()797     int getAttributeCount();
798 
799     /**
800      * Returns the namespace URI of the attribute
801      * with the given index (starts from 0).
802      * Returns an empty string ("") if namespaces are not enabled
803      * or the attribute has no namespace.
804      * Throws an IndexOutOfBoundsException if the index is out of range
805      * or the current event type is not START_TAG.
806      *
807      * <p><strong>NOTE:</strong> if FEATURE_REPORT_NAMESPACE_ATTRIBUTES is set
808      * then namespace attributes (xmlns:ns='...') must be reported
809      * with namespace
810      * <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
811      * (visit this URL for description!).
812      * The default namespace attribute (xmlns="...") will be reported with empty namespace.
813      * <p><strong>NOTE:</strong>The xml prefix is bound as defined in
814      * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
815      * specification to "http://www.w3.org/XML/1998/namespace".
816      *
817      * @param index zero-based index of attribute
818      * @return attribute namespace,
819      *   empty string ("") is returned  if namespaces processing is not enabled or
820      *   namespaces processing is enabled but attribute has no namespace (it has no prefix).
821      */
getAttributeNamespace(int index)822     String getAttributeNamespace (int index);
823 
824     /**
825      * Returns the local name of the specified attribute
826      * if namespaces are enabled or just attribute name if namespaces are disabled.
827      * Throws an IndexOutOfBoundsException if the index is out of range
828      * or current event type is not START_TAG.
829      *
830      * @param index zero-based index of attribute
831      * @return attribute name (null is never returned)
832      */
getAttributeName(int index)833     String getAttributeName (int index);
834 
835     /**
836      * Returns the prefix of the specified attribute
837      * Returns null if the element has no prefix.
838      * If namespaces are disabled it will always return null.
839      * Throws an IndexOutOfBoundsException if the index is out of range
840      * or current event type is not START_TAG.
841      *
842      * @param index zero-based index of attribute
843      * @return attribute prefix or null if namespaces processing is not enabled.
844      */
getAttributePrefix(int index)845     String getAttributePrefix(int index);
846 
847     /**
848      * Returns the type of the specified attribute
849      * If parser is non-validating it MUST return CDATA.
850      *
851      * @param index zero-based index of attribute
852      * @return attribute type (null is never returned)
853      */
getAttributeType(int index)854     String getAttributeType(int index);
855 
856     /**
857      * Returns if the specified attribute was not in input was declared in XML.
858      * If parser is non-validating it MUST always return false.
859      * This information is part of XML infoset:
860      *
861      * @param index zero-based index of attribute
862      * @return false if attribute was in input
863      */
isAttributeDefault(int index)864     boolean isAttributeDefault(int index);
865 
866     /**
867      * Returns the given attributes value.
868      * Throws an IndexOutOfBoundsException if the index is out of range
869      * or current event type is not START_TAG.
870      *
871      * <p><strong>NOTE:</strong> attribute value must be normalized
872      * (including entity replacement text if PROCESS_DOCDECL is false) as described in
873      * <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
874      * 3.3.3 Attribute-Value Normalization</a>
875      *
876      * @see #defineEntityReplacementText
877      *
878      * @param index zero-based index of attribute
879      * @return value of attribute (null is never returned)
880      */
getAttributeValue(int index)881     String getAttributeValue(int index);
882 
883     /**
884      * Returns the attributes value identified by namespace URI and namespace localName.
885      * If namespaces are disabled namespace must be null.
886      * If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.
887      *
888      * <p><strong>NOTE:</strong> attribute value must be normalized
889      * (including entity replacement text if PROCESS_DOCDECL is false) as described in
890      * <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
891      * 3.3.3 Attribute-Value Normalization</a>
892      *
893      * @see #defineEntityReplacementText
894      *
895      * @param namespace Namespace of the attribute if namespaces are enabled otherwise must be null
896      * @param name If namespaces enabled local name of attribute otherwise just attribute name
897      * @return value of attribute or null if attribute with given name does not exist
898      */
getAttributeValue(String namespace, String name)899     String getAttributeValue(String namespace,
900                                     String name);
901 
902     // --------------------------------------------------------------------------
903     // actual parsing methods
904 
905     /**
906      * Returns the type of the current event (START_TAG, END_TAG, TEXT, etc.)
907      *
908      * @see #next()
909      * @see #nextToken()
910      */
getEventType()911     int getEventType()
912         throws XmlPullParserException;
913 
914     /**
915      * Get next parsing event - element content wil be coalesced and only one
916      * TEXT event must be returned for whole element content
917      * (comments and processing instructions will be ignored and entity references
918      * must be expanded or exception mus be thrown if entity reference can not be expanded).
919      * If element content is empty (content is "") then no TEXT event will be reported.
920      *
921      * <p><b>NOTE:</b> empty element (such as &lt;tag/>) will be reported
922      *  with  two separate events: START_TAG, END_TAG - it must be so to preserve
923      *   parsing equivalency of empty element to &lt;tag>&lt;/tag>.
924      *  (see isEmptyElementTag ())
925      *
926      * @see #isEmptyElementTag
927      * @see #START_TAG
928      * @see #TEXT
929      * @see #END_TAG
930      * @see #END_DOCUMENT
931      */
932 
next()933     int next()
934         throws XmlPullParserException, IOException;
935 
936 
937     /**
938      * This method works similarly to next() but will expose
939      * additional event types (COMMENT, CDSECT, DOCDECL, ENTITY_REF, PROCESSING_INSTRUCTION, or
940      * IGNORABLE_WHITESPACE) if they are available in input.
941      *
942      * <p>If special feature
943      * <a href="http://xmlpull.org/v1/doc/features.html#xml-roundtrip">FEATURE_XML_ROUNDTRIP</a>
944      * (identified by URI: http://xmlpull.org/v1/doc/features.html#xml-roundtrip)
945      * is enabled it is possible to do XML document round trip ie. reproduce
946      * exectly on output the XML input using getText():
947      * returned content is always unnormalized (exactly as in input).
948      * Otherwise returned content is end-of-line normalized as described
949      * <a href="http://www.w3.org/TR/REC-xml#sec-line-ends">XML 1.0 End-of-Line Handling</a>
950      * and. Also when this feature is enabled exact content of START_TAG, END_TAG,
951      * DOCDECL and PROCESSING_INSTRUCTION is available.
952      *
953      * <p>Here is the list of tokens that can be  returned from nextToken()
954      * and what getText() and getTextCharacters() returns:<dl>
955      * <dt>START_DOCUMENT<dd>null
956      * <dt>END_DOCUMENT<dd>null
957      * <dt>START_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
958      *   enabled and then returns XML tag, ex: &lt;tag attr='val'>
959      * <dt>END_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
960      *  id enabled and then returns XML tag, ex: &lt;/tag>
961      * <dt>TEXT<dd>return element content.
962      *  <br>Note: that element content may be delivered in multiple consecutive TEXT events.
963      * <dt>IGNORABLE_WHITESPACE<dd>return characters that are determined to be ignorable white
964      * space. If the FEATURE_XML_ROUNDTRIP is enabled all whitespace content outside root
965      * element will always reported as IGNORABLE_WHITESPACE otherwise reporting is optional.
966      *  <br>Note: that element content may be delivered in multiple consecutive IGNORABLE_WHITESPACE events.
967      * <dt>CDSECT<dd>
968      * return text <em>inside</em> CDATA
969      *  (ex. 'fo&lt;o' from &lt;!CDATA[fo&lt;o]]>)
970      * <dt>PROCESSING_INSTRUCTION<dd>
971      *  if FEATURE_XML_ROUNDTRIP is true
972      *  return exact PI content ex: 'pi foo' from &lt;?pi foo?>
973      *  otherwise it may be exact PI content or concatenation of PI target,
974      * space and data so for example for
975      *   &lt;?target    data?> string &quot;target data&quot; may
976      *       be returned if FEATURE_XML_ROUNDTRIP is false.
977      * <dt>COMMENT<dd>return comment content ex. 'foo bar' from &lt;!--foo bar-->
978      * <dt>ENTITY_REF<dd>getText() MUST return entity replacement text if PROCESS_DOCDECL is false
979      * otherwise getText() MAY return null,
980      * additionally getTextCharacters() MUST return entity name
981      * (for example 'entity_name' for &amp;entity_name;).
982      * <br><b>NOTE:</b> this is the only place where value returned from getText() and
983      *   getTextCharacters() <b>are different</b>
984      * <br><b>NOTE:</b> it is user responsibility to resolve entity reference
985      *    if PROCESS_DOCDECL is false and there is no entity replacement text set in
986      *    defineEntityReplacementText() method (getText() will be null)
987      * <br><b>NOTE:</b> character entities (ex. &amp;#32;) and standard entities such as
988      *  &amp;amp; &amp;lt; &amp;gt; &amp;quot; &amp;apos; are reported as well
989      *  and are <b>not</b> reported as TEXT tokens but as ENTITY_REF tokens!
990      *  This requirement is added to allow to do roundtrip of XML documents!
991      * <dt>DOCDECL<dd>
992      * if FEATURE_XML_ROUNDTRIP is true or PROCESS_DOCDECL is false
993      * then return what is inside of DOCDECL for example it returns:<pre>
994      * &quot; titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
995      * [&lt;!ENTITY % active.links "INCLUDE">]&quot;</pre>
996      * <p>for input document that contained:<pre>
997      * &lt;!DOCTYPE titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
998      * [&lt;!ENTITY % active.links "INCLUDE">]></pre>
999      * otherwise if FEATURE_XML_ROUNDTRIP is false and PROCESS_DOCDECL is true
1000      *    then what is returned is undefined (it may be even null)
1001      * </dd>
1002      * </dl>
1003      *
1004      * <p><strong>NOTE:</strong> there is no guarantee that there will only one TEXT or
1005      * IGNORABLE_WHITESPACE event from nextToken() as parser may chose to deliver element content in
1006      * multiple tokens (dividing element content into chunks)
1007      *
1008      * <p><strong>NOTE:</strong> whether returned text of token is end-of-line normalized
1009      *  is depending on FEATURE_XML_ROUNDTRIP.
1010      *
1011      * <p><strong>NOTE:</strong> XMLDecl (&lt;?xml ...?&gt;) is not reported but its content
1012      * is available through optional properties (see class description above).
1013      *
1014      * @see #next
1015      * @see #START_TAG
1016      * @see #TEXT
1017      * @see #END_TAG
1018      * @see #END_DOCUMENT
1019      * @see #COMMENT
1020      * @see #DOCDECL
1021      * @see #PROCESSING_INSTRUCTION
1022      * @see #ENTITY_REF
1023      * @see #IGNORABLE_WHITESPACE
1024      */
nextToken()1025     int nextToken()
1026         throws XmlPullParserException, IOException;
1027 
1028     //-----------------------------------------------------------------------------
1029     // utility methods to mak XML parsing easier ...
1030 
1031     /**
1032      * Test if the current event is of the given type and if the
1033      * namespace and name do match. null will match any namespace
1034      * and any name. If the test is not passed, an exception is
1035      * thrown. The exception text indicates the parser position,
1036      * the expected event and the current event that is not meeting the
1037      * requirement.
1038      *
1039      * <p>Essentially it does this
1040      * <pre>
1041      *  if (type != getEventType()
1042      *  || (namespace != null &amp;&amp;  !namespace.equals( getNamespace () ) )
1043      *  || (name != null &amp;&amp;  !name.equals( getName() ) ) )
1044      *     throw new XmlPullParserException( "expected "+ TYPES[ type ]+getPositionDescription());
1045      * </pre>
1046      */
require(int type, String namespace, String name)1047     void require(int type, String namespace, String name)
1048         throws XmlPullParserException, IOException;
1049 
1050     /**
1051      * If current event is START_TAG then if next element is TEXT then element content is returned
1052      * or if next event is END_TAG then empty string is returned, otherwise exception is thrown.
1053      * After calling this function successfully parser will be positioned on END_TAG.
1054      *
1055      * <p>The motivation for this function is to allow to parse consistently both
1056      * empty elements and elements that has non empty content, for example for input: <ol>
1057      * <li>&lt;tag&gt;foo&lt;/tag&gt;
1058      * <li>&lt;tag&gt;&lt;/tag&gt; (which is equivalent to &lt;tag/&gt;
1059      * both input can be parsed with the same code:
1060      * <pre>
1061      *   p.nextTag()
1062      *   p.requireEvent(p.START_TAG, "", "tag");
1063      *   String content = p.nextText();
1064      *   p.requireEvent(p.END_TAG, "", "tag");
1065      * </pre>
1066      * This function together with nextTag make it very easy to parse XML that has
1067      * no mixed content.
1068      *
1069      *
1070      * <p>Essentially it does this
1071      * <pre>
1072      *  if(getEventType() != START_TAG) {
1073      *     throw new XmlPullParserException(
1074      *       "parser must be on START_TAG to read next text", this, null);
1075      *  }
1076      *  int eventType = next();
1077      *  if(eventType == TEXT) {
1078      *     String result = getText();
1079      *     eventType = next();
1080      *     if(eventType != END_TAG) {
1081      *       throw new XmlPullParserException(
1082      *          "event TEXT it must be immediately followed by END_TAG", this, null);
1083      *      }
1084      *      return result;
1085      *  } else if(eventType == END_TAG) {
1086      *     return "";
1087      *  } else {
1088      *     throw new XmlPullParserException(
1089      *       "parser must be on START_TAG or TEXT to read text", this, null);
1090      *  }
1091      * </pre>
1092      */
nextText()1093     String nextText() throws XmlPullParserException, IOException;
1094 
1095     /**
1096      * Call next() and return event if it is START_TAG or END_TAG
1097      * otherwise throw an exception.
1098      * It will skip whitespace TEXT before actual tag if any.
1099      *
1100      * <p>essentially it does this
1101      * <pre>
1102      *   int eventType = next();
1103      *   if(eventType == TEXT &amp;&amp;  isWhitespace()) {   // skip whitespace
1104      *      eventType = next();
1105      *   }
1106      *   if (eventType != START_TAG &amp;&amp;  eventType != END_TAG) {
1107      *      throw new XmlPullParserException("expected start or end tag", this, null);
1108      *   }
1109      *   return eventType;
1110      * </pre>
1111      */
nextTag()1112     int nextTag() throws XmlPullParserException, IOException;
1113 
1114 }
1115