1 /* XMLParser.java --
2    Copyright (C) 2005  Free Software Foundation, Inc.
3 
4 This file is part of GNU Classpath.
5 
6 GNU Classpath is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10 
11 GNU Classpath is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GNU Classpath; see the file COPYING.  If not, write to the
18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301 USA.
20 
21 Linking this library statically or dynamically with other modules is
22 making a combined work based on this library.  Thus, the terms and
23 conditions of the GNU General Public License cover the whole
24 combination.
25 
26 As a special exception, the copyright holders of this library give you
27 permission to link this library with independent modules to produce an
28 executable, regardless of the license terms of these independent
29 modules, and to copy and distribute the resulting executable under
30 terms of your choice, provided that you also meet, for each linked
31 independent module, the terms and conditions of the license of that
32 module.  An independent module is a module which is not derived from
33 or based on this library.  If you modify this library, you may extend
34 this exception to your version of the library, but you are not
35 obligated to do so.  If you do not wish to do so, delete this
36 exception statement from your version.
37 
38 Partly derived from code which carried the following notice:
39 
40   Copyright (c) 1997, 1998 by Microstar Software Ltd.
41 
42   AElfred is free for both commercial and non-commercial use and
43   redistribution, provided that Microstar's copyright and disclaimer are
44   retained intact.  You are free to modify AElfred for your own use and
45   to redistribute AElfred with your modifications, provided that the
46   modifications are clearly documented.
47 
48   This program is distributed in the hope that it will be useful, but
49   WITHOUT ANY WARRANTY; without even the implied warranty of
50   merchantability or fitness for a particular purpose.  Please use it AT
51   YOUR OWN RISK.
52 */
53 
54 package gnu.xml.stream;
55 
56 import gnu.java.lang.CPStringBuilder;
57 
58 import java.io.BufferedInputStream;
59 import java.io.EOFException;
60 import java.io.File;
61 import java.io.FileOutputStream;
62 import java.io.FileWriter;
63 import java.io.InputStream;
64 import java.io.InputStreamReader;
65 import java.io.IOException;
66 import java.io.Reader;
67 import java.io.StringReader;
68 import java.io.UnsupportedEncodingException;
69 import java.net.MalformedURLException;
70 import java.net.URL;
71 import java.util.ArrayList;
72 import java.util.Collections;
73 import java.util.HashSet;
74 import java.util.Iterator;
75 import java.util.LinkedHashMap;
76 import java.util.LinkedList;
77 import java.util.Map;
78 import java.util.NoSuchElementException;
79 import java.util.StringTokenizer;
80 
81 import javax.xml.XMLConstants;
82 import javax.xml.namespace.NamespaceContext;
83 import javax.xml.namespace.QName;
84 import javax.xml.stream.Location;
85 import javax.xml.stream.XMLInputFactory;
86 import javax.xml.stream.XMLReporter;
87 import javax.xml.stream.XMLResolver;
88 import javax.xml.stream.XMLStreamConstants;
89 import javax.xml.stream.XMLStreamException;
90 import javax.xml.stream.XMLStreamReader;
91 
92 import gnu.java.net.CRLFInputStream;
93 import gnu.classpath.debug.TeeInputStream;
94 import gnu.classpath.debug.TeeReader;
95 
96 /**
97  * An XML parser.
98  * This parser supports the following additional StAX properties:
99  * <table>
100  * <tr><td>gnu.xml.stream.stringInterning</td>
101  * <td>Boolean</td>
102  * <td>Indicates whether markup strings will be interned</td></tr>
103  * <tr><td>gnu.xml.stream.xmlBase</td>
104  * <td>Boolean</td>
105  * <td>Indicates whether XML Base processing will be performed</td></tr>
106  * <tr><td>gnu.xml.stream.baseURI</td>
107  * <td>String</td>
108  * <td>Returns the base URI of the current event</td></tr>
109  * </table>
110  *
111  * @see http://www.w3.org/TR/REC-xml/
112  * @see http://www.w3.org/TR/xml11/
113  * @see http://www.w3.org/TR/REC-xml-names
114  * @see http://www.w3.org/TR/xml-names11
115  * @see http://www.w3.org/TR/xmlbase/
116  *
117  * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
118  */
119 public class XMLParser
120   implements XMLStreamReader, NamespaceContext
121 {
122 
123   // -- parser state machine states --
124   private static final int INIT = 0; // start state
125   private static final int PROLOG = 1; // in prolog
126   private static final int CONTENT = 2; // in content
127   private static final int EMPTY_ELEMENT = 3; // empty element state
128   private static final int MISC = 4; // in Misc (after root element)
129 
130   // -- parameters for parsing literals --
131   private final static int LIT_ENTITY_REF = 2;
132   private final static int LIT_NORMALIZE = 4;
133   private final static int LIT_ATTRIBUTE = 8;
134   private final static int LIT_DISABLE_PE = 16;
135   private final static int LIT_DISABLE_CREF = 32;
136   private final static int LIT_DISABLE_EREF = 64;
137   private final static int LIT_PUBID = 256;
138 
139   // -- types of attribute values --
140   final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
141   final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
142   final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
143   final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
144   final static int ATTRIBUTE_DEFAULT_FIXED = 34;
145 
146   // -- additional event types --
147   final static int START_ENTITY = 50;
148   final static int END_ENTITY = 51;
149 
150   /**
151    * The current input.
152    */
153   private Input input;
154 
155   /**
156    * Stack of inputs representing XML general entities.
157    * The input representing the XML input stream or reader is always the
158    * first element in this stack.
159    */
160   private LinkedList inputStack = new LinkedList();
161 
162   /**
163    * Stack of start-entity events to be reported.
164    */
165   private LinkedList startEntityStack = new LinkedList();
166 
167   /**
168    * Stack of end-entity events to be reported.
169    */
170   private LinkedList endEntityStack = new LinkedList();
171 
172   /**
173    * Current parser state within the main state machine.
174    */
175   private int state = INIT;
176 
177   /**
178    * The (type of the) current event.
179    */
180   private int event;
181 
182   /**
183    * The element name stack. The first element in this stack will be the
184    * root element.
185    */
186   private LinkedList stack = new LinkedList();
187 
188   /**
189    * Stack of namespace contexts. These are maps specifying prefix-to-URI
190    * mappings. The first element in this stack is the most recent namespace
191    * context (i.e. the other way around from the element name stack).
192    */
193   private LinkedList namespaces = new LinkedList();
194 
195   /**
196    * The base-URI stack. This holds the base URI context for each element.
197    * The first element in this stack is the most recent context (i.e. the
198    * other way around from the element name stack).
199    */
200   private LinkedList bases = new LinkedList();
201 
202   /**
203    * The list of attributes for the current element, in the order defined in
204    * the XML stream.
205    */
206   private ArrayList attrs = new ArrayList();
207 
208   /**
209    * Buffer for text and character data.
210    */
211   private StringBuffer buf = new StringBuffer();
212 
213   /**
214    * Buffer for NMTOKEN strings (markup).
215    */
216   private StringBuffer nmtokenBuf = new StringBuffer();
217 
218   /**
219    * Buffer for string literals. (e.g. attribute values)
220    */
221   private StringBuffer literalBuf = new StringBuffer();
222 
223   /**
224    * Temporary Unicode character buffer used during character data reads.
225    */
226   private int[] tmpBuf = new int[1024];
227 
228   /**
229    * The element content model for the current element.
230    */
231   private ContentModel currentContentModel;
232 
233   /**
234    * The validation stack. This holds lists of the elements seen for each
235    * element, in order to determine whether the names and order of these
236    * elements match the content model for the element. The last entry in
237    * this stack represents the current element.
238    */
239   private LinkedList validationStack;
240 
241   /**
242    * These sets contain the IDs and the IDREFs seen in the document, to
243    * ensure that IDs are unique and that each IDREF refers to an ID in the
244    * document.
245    */
246   private HashSet ids, idrefs;
247 
248   /**
249    * The target and data associated with the current processing instruction
250    * event.
251    */
252   private String piTarget, piData;
253 
254   /**
255    * The XML version declared in the XML declaration.
256    */
257   private String xmlVersion;
258 
259   /**
260    * The encoding declared in the XML declaration.
261    */
262   private String xmlEncoding;
263 
264   /**
265    * The standalone value declared in the XML declaration.
266    */
267   private Boolean xmlStandalone;
268 
269   /**
270    * The document type definition.
271    */
272   Doctype doctype;
273 
274   /**
275    * State variables for determining parameter-entity expansion.
276    */
277   private boolean expandPE, peIsError;
278 
279   /**
280    * Whether this is a validating parser.
281    */
282   private final boolean validating;
283 
284   /**
285    * Whether strings representing markup will be interned.
286    */
287   private final boolean stringInterning;
288 
289   /**
290    * If true, CDATA sections will be merged with adjacent text nodes into a
291    * single event.
292    */
293   private final boolean coalescing;
294 
295   /**
296    * Whether to replace general entity references with their replacement
297    * text automatically during parsing.
298    * Otherwise entity-reference events will be issued.
299    */
300   private final boolean replaceERefs;
301 
302   /**
303    * Whether to support external entities.
304    */
305   private final boolean externalEntities;
306 
307   /**
308    * Whether to support DTDs.
309    */
310   private final boolean supportDTD;
311 
312   /**
313    * Whether to support XML namespaces. If true, namespace information will
314    * be available. Otherwise namespaces will simply be reported as ordinary
315    * attributes.
316    */
317   private final boolean namespaceAware;
318 
319   /**
320    * Whether to support XML Base. If true, URIs specified in xml:base
321    * attributes will be honoured when resolving external entities.
322    */
323   private final boolean baseAware;
324 
325   /**
326    * Whether to report extended event types (START_ENTITY and END_ENTITY)
327    * in addition to the standard event types. Used by the SAX parser.
328    */
329   private final boolean extendedEventTypes;
330 
331   /**
332    * The reporter to receive parsing warnings.
333    */
334   final XMLReporter reporter;
335 
336   /**
337    * Callback interface for resolving external entities.
338    */
339   final XMLResolver resolver;
340 
341   // -- Constants for testing the next kind of markup event --
342   private static final String TEST_START_ELEMENT = "<";
343   private static final String TEST_END_ELEMENT = "</";
344   private static final String TEST_COMMENT = "<!--";
345   private static final String TEST_PI = "<?";
346   private static final String TEST_CDATA = "<![CDATA[";
347   private static final String TEST_XML_DECL = "<?xml";
348   private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE";
349   private static final String TEST_ELEMENT_DECL = "<!ELEMENT";
350   private static final String TEST_ATTLIST_DECL = "<!ATTLIST";
351   private static final String TEST_ENTITY_DECL = "<!ENTITY";
352   private static final String TEST_NOTATION_DECL = "<!NOTATION";
353   private static final String TEST_KET = ">";
354   private static final String TEST_END_COMMENT = "--";
355   private static final String TEST_END_PI = "?>";
356   private static final String TEST_END_CDATA = "]]>";
357 
358   /**
359    * The general entities predefined by the XML specification.
360    */
361   private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap();
362   static
363   {
364     PREDEFINED_ENTITIES.put("amp", "&");
365     PREDEFINED_ENTITIES.put("lt", "<");
366     PREDEFINED_ENTITIES.put("gt", ">");
367     PREDEFINED_ENTITIES.put("apos", "'");
368     PREDEFINED_ENTITIES.put("quot", "\"");
369   }
370 
371   /**
372    * Creates a new XML parser for the given input stream.
373    * This constructor should be used where possible, as it allows the
374    * encoding of the XML data to be correctly determined from the stream.
375    * @param in the input stream
376    * @param systemId the URL from which the input stream was retrieved
377    * (necessary if there are external entities to be resolved)
378    * @param validating if the parser is to be a validating parser
379    * @param namespaceAware if the parser should support XML Namespaces
380    * @param coalescing if CDATA sections should be merged into adjacent text
381    * nodes
382    * @param replaceERefs if entity references should be automatically
383    * replaced by their replacement text (otherwise they will be reported as
384    * entity-reference events)
385    * @param externalEntities if external entities should be loaded
386    * @param supportDTD if support for the XML DTD should be enabled
387    * @param baseAware if the parser should support XML Base to resolve
388    * external entities
389    * @param stringInterning whether strings will be interned during parsing
390    * @param reporter the reporter to receive warnings during processing
391    * @param resolver the callback interface used to resolve external
392    * entities
393    */
XMLParser(InputStream in, String systemId, boolean validating, boolean namespaceAware, boolean coalescing, boolean replaceERefs, boolean externalEntities, boolean supportDTD, boolean baseAware, boolean stringInterning, boolean extendedEventTypes, XMLReporter reporter, XMLResolver resolver)394   public XMLParser(InputStream in, String systemId,
395                    boolean validating,
396                    boolean namespaceAware,
397                    boolean coalescing,
398                    boolean replaceERefs,
399                    boolean externalEntities,
400                    boolean supportDTD,
401                    boolean baseAware,
402                    boolean stringInterning,
403                    boolean extendedEventTypes,
404                    XMLReporter reporter,
405                    XMLResolver resolver)
406   {
407     this.validating = validating;
408     this.namespaceAware = namespaceAware;
409     this.coalescing = coalescing;
410     this.replaceERefs = replaceERefs;
411     this.externalEntities = externalEntities;
412     this.supportDTD = supportDTD;
413     this.baseAware = baseAware;
414     this.stringInterning = stringInterning;
415     this.extendedEventTypes = extendedEventTypes;
416     this.reporter = reporter;
417     this.resolver = resolver;
418     if (validating)
419       {
420         validationStack = new LinkedList();
421         ids = new HashSet();
422         idrefs = new HashSet();
423       }
424     String debug = System.getProperty("gnu.xml.debug.input");
425     if (debug != null)
426       {
427         try
428           {
429             File file = File.createTempFile(debug, ".xml");
430             in = new TeeInputStream(in, new FileOutputStream(file));
431           }
432         catch (IOException e)
433           {
434             RuntimeException e2 = new RuntimeException();
435             e2.initCause(e);
436             throw e2;
437           }
438       }
439     systemId = canonicalize(systemId);
440     pushInput(new Input(in, null, null, systemId, null, null, false, true));
441   }
442 
443   /**
444    * Creates a new XML parser for the given character stream.
445    * This constructor is only available for compatibility with the JAXP
446    * APIs, which permit XML to be parsed from a character stream. Because
447    * the encoding specified by the character stream may conflict with that
448    * specified in the XML declaration, this method should be avoided where
449    * possible.
450    * @param in the input stream
451    * @param systemId the URL from which the input stream was retrieved
452    * (necessary if there are external entities to be resolved)
453    * @param validating if the parser is to be a validating parser
454    * @param namespaceAware if the parser should support XML Namespaces
455    * @param coalescing if CDATA sections should be merged into adjacent text
456    * nodes
457    * @param replaceERefs if entity references should be automatically
458    * replaced by their replacement text (otherwise they will be reported as
459    * entity-reference events)
460    * @param externalEntities if external entities should be loaded
461    * @param supportDTD if support for the XML DTD should be enabled
462    * @param baseAware if the parser should support XML Base to resolve
463    * external entities
464    * @param stringInterning whether strings will be interned during parsing
465    * @param reporter the reporter to receive warnings during processing
466    * @param resolver the callback interface used to resolve external
467    * entities
468    */
XMLParser(Reader reader, String systemId, boolean validating, boolean namespaceAware, boolean coalescing, boolean replaceERefs, boolean externalEntities, boolean supportDTD, boolean baseAware, boolean stringInterning, boolean extendedEventTypes, XMLReporter reporter, XMLResolver resolver)469   public XMLParser(Reader reader, String systemId,
470                    boolean validating,
471                    boolean namespaceAware,
472                    boolean coalescing,
473                    boolean replaceERefs,
474                    boolean externalEntities,
475                    boolean supportDTD,
476                    boolean baseAware,
477                    boolean stringInterning,
478                    boolean extendedEventTypes,
479                    XMLReporter reporter,
480                    XMLResolver resolver)
481   {
482     this.validating = validating;
483     this.namespaceAware = namespaceAware;
484     this.coalescing = coalescing;
485     this.replaceERefs = replaceERefs;
486     this.externalEntities = externalEntities;
487     this.supportDTD = supportDTD;
488     this.baseAware = baseAware;
489     this.stringInterning = stringInterning;
490     this.extendedEventTypes = extendedEventTypes;
491     this.reporter = reporter;
492     this.resolver = resolver;
493     if (validating)
494       {
495         validationStack = new LinkedList();
496         ids = new HashSet();
497         idrefs = new HashSet();
498       }
499     String debug = System.getProperty("gnu.xml.debug.input");
500     if (debug != null)
501       {
502         try
503           {
504             File file = File.createTempFile(debug, ".xml");
505             reader = new TeeReader(reader, new FileWriter(file));
506           }
507         catch (IOException e)
508           {
509             RuntimeException e2 = new RuntimeException();
510             e2.initCause(e);
511             throw e2;
512           }
513       }
514     systemId = canonicalize(systemId);
515     pushInput(new Input(null, reader, null, systemId, null, null, false, true));
516   }
517 
518   // -- NamespaceContext --
519 
getNamespaceURI(String prefix)520   public String getNamespaceURI(String prefix)
521   {
522     if (XMLConstants.XML_NS_PREFIX.equals(prefix))
523       return XMLConstants.XML_NS_URI;
524     if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix))
525       return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
526     for (Iterator i = namespaces.iterator(); i.hasNext(); )
527       {
528         LinkedHashMap ctx = (LinkedHashMap) i.next();
529         String namespaceURI = (String) ctx.get(prefix);
530         if (namespaceURI != null)
531           return namespaceURI;
532       }
533     return null;
534   }
535 
getPrefix(String namespaceURI)536   public String getPrefix(String namespaceURI)
537   {
538     if (XMLConstants.XML_NS_URI.equals(namespaceURI))
539       return XMLConstants.XML_NS_PREFIX;
540     if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
541       return XMLConstants.XMLNS_ATTRIBUTE;
542     for (Iterator i = namespaces.iterator(); i.hasNext(); )
543       {
544         LinkedHashMap ctx = (LinkedHashMap) i.next();
545         if (ctx.containsValue(namespaceURI))
546           {
547             for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
548               {
549                 Map.Entry entry = (Map.Entry) i.next();
550                 String uri = (String) entry.getValue();
551                 if (uri.equals(namespaceURI))
552                   return (String) entry.getKey();
553               }
554           }
555       }
556     return null;
557   }
558 
getPrefixes(String namespaceURI)559   public Iterator getPrefixes(String namespaceURI)
560   {
561     if (XMLConstants.XML_NS_URI.equals(namespaceURI))
562       return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator();
563     if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
564       return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator();
565     LinkedList acc = new LinkedList();
566     for (Iterator i = namespaces.iterator(); i.hasNext(); )
567       {
568         LinkedHashMap ctx = (LinkedHashMap) i.next();
569         if (ctx.containsValue(namespaceURI))
570           {
571             for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
572               {
573                 Map.Entry entry = (Map.Entry) i.next();
574                 String uri = (String) entry.getValue();
575                 if (uri.equals(namespaceURI))
576                   acc.add(entry.getKey());
577               }
578           }
579       }
580     return acc.iterator();
581   }
582 
583   // -- XMLStreamReader --
584 
close()585   public void close()
586     throws XMLStreamException
587   {
588     stack = null;
589     namespaces = null;
590     bases = null;
591     buf = null;
592     attrs = null;
593     doctype = null;
594 
595     inputStack = null;
596     validationStack = null;
597     ids = null;
598     idrefs = null;
599   }
600 
getNamespaceContext()601   public NamespaceContext getNamespaceContext()
602   {
603     return this;
604   }
605 
getAttributeCount()606   public int getAttributeCount()
607   {
608     return attrs.size();
609   }
610 
getAttributeLocalName(int index)611   public String getAttributeLocalName(int index)
612   {
613     Attribute a = (Attribute) attrs.get(index);
614     return a.localName;
615   }
616 
getAttributeNamespace(int index)617   public String getAttributeNamespace(int index)
618   {
619     String prefix = getAttributePrefix(index);
620     return getNamespaceURI(prefix);
621   }
622 
getAttributePrefix(int index)623   public String getAttributePrefix(int index)
624   {
625     Attribute a = (Attribute) attrs.get(index);
626     return a.prefix;
627   }
628 
getAttributeName(int index)629   public QName getAttributeName(int index)
630   {
631     Attribute a = (Attribute) attrs.get(index);
632     String namespaceURI = getNamespaceURI(a.prefix);
633     return new QName(namespaceURI, a.localName, a.prefix);
634   }
635 
getAttributeType(int index)636   public String getAttributeType(int index)
637   {
638     Attribute a = (Attribute) attrs.get(index);
639     return a.type;
640   }
641 
getAttributeType(String elementName, String attName)642   private String getAttributeType(String elementName, String attName)
643   {
644     if (doctype != null)
645       {
646         AttributeDecl att = doctype.getAttributeDecl(elementName, attName);
647         if (att != null)
648           return att.type;
649       }
650     return "CDATA";
651   }
652 
getAttributeValue(int index)653   public String getAttributeValue(int index)
654   {
655     Attribute a = (Attribute) attrs.get(index);
656     return a.value;
657   }
658 
getAttributeValue(String namespaceURI, String localName)659   public String getAttributeValue(String namespaceURI, String localName)
660   {
661     for (Iterator i = attrs.iterator(); i.hasNext(); )
662       {
663         Attribute a = (Attribute) i.next();
664         if (a.localName.equals(localName))
665           {
666             String uri = getNamespaceURI(a.prefix);
667             if ((uri == null && namespaceURI == null) ||
668                 (uri != null && uri.equals(namespaceURI)))
669               return a.value;
670           }
671       }
672     return null;
673   }
674 
isAttributeDeclared(int index)675   boolean isAttributeDeclared(int index)
676   {
677     if (doctype == null)
678       return false;
679     Attribute a = (Attribute) attrs.get(index);
680     String qn = ("".equals(a.prefix)) ? a.localName :
681       a.prefix + ":" + a.localName;
682     String elementName = buf.toString();
683     return doctype.isAttributeDeclared(elementName, qn);
684   }
685 
getCharacterEncodingScheme()686   public String getCharacterEncodingScheme()
687   {
688     return xmlEncoding;
689   }
690 
getElementText()691   public String getElementText()
692     throws XMLStreamException
693   {
694     if (event != XMLStreamConstants.START_ELEMENT)
695       throw new XMLStreamException("current event must be START_ELEMENT");
696     CPStringBuilder elementText = new CPStringBuilder();
697     int depth = stack.size();
698     while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth)
699       {
700         switch (next())
701           {
702           case XMLStreamConstants.CHARACTERS:
703           case XMLStreamConstants.SPACE:
704             elementText.append(buf.toString());
705           }
706       }
707     return elementText.toString();
708   }
709 
getEncoding()710   public String getEncoding()
711   {
712     return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
713   }
714 
getEventType()715   public int getEventType()
716   {
717     return event;
718   }
719 
getLocalName()720   public String getLocalName()
721   {
722     switch (event)
723       {
724       case XMLStreamConstants.START_ELEMENT:
725       case XMLStreamConstants.END_ELEMENT:
726         String qName = buf.toString();
727         int ci = qName.indexOf(':');
728         String localName = (ci == -1) ? qName : qName.substring(ci + 1);
729         if (stringInterning)
730           localName = localName.intern();
731         return localName;
732       default:
733         return null;
734       }
735   }
736 
getLocation()737   public Location getLocation()
738   {
739     return input;
740   }
741 
getName()742   public QName getName()
743   {
744     switch (event)
745       {
746       case XMLStreamConstants.START_ELEMENT:
747       case XMLStreamConstants.END_ELEMENT:
748         String qName = buf.toString();
749         int ci = qName.indexOf(':');
750         String localName = (ci == -1) ? qName : qName.substring(ci + 1);
751         if (stringInterning)
752           localName = localName.intern();
753         String prefix = (ci == -1) ?
754           (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
755           qName.substring(0, ci);
756         if (stringInterning && prefix != null)
757           prefix = prefix.intern();
758         String namespaceURI = getNamespaceURI(prefix);
759         return new QName(namespaceURI, localName, prefix);
760       default:
761         return null;
762       }
763   }
764 
getNamespaceCount()765   public int getNamespaceCount()
766   {
767     if (!namespaceAware || namespaces.isEmpty())
768       return 0;
769     switch (event)
770       {
771       case XMLStreamConstants.START_ELEMENT:
772       case XMLStreamConstants.END_ELEMENT:
773         LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
774         return ctx.size();
775       default:
776         return 0;
777       }
778   }
779 
getNamespacePrefix(int index)780   public String getNamespacePrefix(int index)
781   {
782     LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
783     int count = 0;
784     for (Iterator i = ctx.keySet().iterator(); i.hasNext(); )
785       {
786         String prefix = (String) i.next();
787         if (count++ == index)
788           return prefix;
789       }
790     return null;
791   }
792 
getNamespaceURI()793   public String getNamespaceURI()
794   {
795     switch (event)
796       {
797       case XMLStreamConstants.START_ELEMENT:
798       case XMLStreamConstants.END_ELEMENT:
799         String qName = buf.toString();
800         int ci = qName.indexOf(':');
801         if (ci == -1)
802           return null;
803         String prefix = qName.substring(0, ci);
804         return getNamespaceURI(prefix);
805       default:
806         return null;
807       }
808   }
809 
getNamespaceURI(int index)810   public String getNamespaceURI(int index)
811   {
812     LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
813     int count = 0;
814     for (Iterator i = ctx.values().iterator(); i.hasNext(); )
815       {
816         String uri = (String) i.next();
817         if (count++ == index)
818           return uri;
819       }
820     return null;
821   }
822 
getPIData()823   public String getPIData()
824   {
825     return piData;
826   }
827 
getPITarget()828   public String getPITarget()
829   {
830     return piTarget;
831   }
832 
getPrefix()833   public String getPrefix()
834   {
835     switch (event)
836       {
837       case XMLStreamConstants.START_ELEMENT:
838       case XMLStreamConstants.END_ELEMENT:
839         String qName = buf.toString();
840         int ci = qName.indexOf(':');
841         String prefix = (ci == -1) ?
842           (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
843           qName.substring(0, ci);
844         if (stringInterning && prefix != null)
845           prefix = prefix.intern();
846         return prefix;
847       default:
848         return null;
849       }
850   }
851 
getProperty(String name)852   public Object getProperty(String name)
853     throws IllegalArgumentException
854   {
855     if (name == null)
856       throw new IllegalArgumentException("name is null");
857     if (XMLInputFactory.ALLOCATOR.equals(name))
858       return null;
859     if (XMLInputFactory.IS_COALESCING.equals(name))
860       return coalescing ? Boolean.TRUE : Boolean.FALSE;
861     if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name))
862       return namespaceAware ? Boolean.TRUE : Boolean.FALSE;
863     if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name))
864       return replaceERefs ? Boolean.TRUE : Boolean.FALSE;
865     if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name))
866       return externalEntities ? Boolean.TRUE : Boolean.FALSE;
867     if (XMLInputFactory.IS_VALIDATING.equals(name))
868       return Boolean.FALSE;
869     if (XMLInputFactory.REPORTER.equals(name))
870       return reporter;
871     if (XMLInputFactory.RESOLVER.equals(name))
872       return resolver;
873     if (XMLInputFactory.SUPPORT_DTD.equals(name))
874       return supportDTD ? Boolean.TRUE : Boolean.FALSE;
875     if ("gnu.xml.stream.stringInterning".equals(name))
876       return stringInterning ? Boolean.TRUE : Boolean.FALSE;
877     if ("gnu.xml.stream.xmlBase".equals(name))
878       return baseAware ? Boolean.TRUE : Boolean.FALSE;
879     if ("gnu.xml.stream.baseURI".equals(name))
880       return getXMLBase();
881     return null;
882   }
883 
getText()884   public String getText()
885   {
886     return buf.toString();
887   }
888 
getTextCharacters()889   public char[] getTextCharacters()
890   {
891     return buf.toString().toCharArray();
892   }
893 
getTextCharacters(int sourceStart, char[] target, int targetStart, int length)894   public int getTextCharacters(int sourceStart, char[] target,
895                                int targetStart, int length)
896     throws XMLStreamException
897   {
898     length = Math.min(sourceStart + buf.length(), length);
899     int sourceEnd = sourceStart + length;
900     buf.getChars(sourceStart, sourceEnd, target, targetStart);
901     return length;
902   }
903 
getTextLength()904   public int getTextLength()
905   {
906     return buf.length();
907   }
908 
getTextStart()909   public int getTextStart()
910   {
911     return 0;
912   }
913 
getVersion()914   public String getVersion()
915   {
916     return (xmlVersion == null) ? "1.0" : xmlVersion;
917   }
918 
hasName()919   public boolean hasName()
920   {
921     switch (event)
922       {
923       case XMLStreamConstants.START_ELEMENT:
924       case XMLStreamConstants.END_ELEMENT:
925         return true;
926       default:
927         return false;
928       }
929   }
930 
hasText()931   public boolean hasText()
932   {
933     switch (event)
934       {
935       case XMLStreamConstants.CHARACTERS:
936       case XMLStreamConstants.SPACE:
937         return true;
938       default:
939         return false;
940       }
941   }
942 
isAttributeSpecified(int index)943   public boolean isAttributeSpecified(int index)
944   {
945     Attribute a = (Attribute) attrs.get(index);
946     return a.specified;
947   }
948 
isCharacters()949   public boolean isCharacters()
950   {
951     return (event == XMLStreamConstants.CHARACTERS);
952   }
953 
isEndElement()954   public boolean isEndElement()
955   {
956     return (event == XMLStreamConstants.END_ELEMENT);
957   }
958 
isStandalone()959   public boolean isStandalone()
960   {
961     return Boolean.TRUE.equals(xmlStandalone);
962   }
963 
isStartElement()964   public boolean isStartElement()
965   {
966     return (event == XMLStreamConstants.START_ELEMENT);
967   }
968 
isWhiteSpace()969   public boolean isWhiteSpace()
970   {
971     return (event == XMLStreamConstants.SPACE);
972   }
973 
nextTag()974   public int nextTag()
975     throws XMLStreamException
976   {
977     do
978       {
979         switch (next())
980           {
981           case XMLStreamConstants.START_ELEMENT:
982           case XMLStreamConstants.END_ELEMENT:
983           case XMLStreamConstants.CHARACTERS:
984           case XMLStreamConstants.SPACE:
985           case XMLStreamConstants.COMMENT:
986           case XMLStreamConstants.PROCESSING_INSTRUCTION:
987             break;
988           default:
989             throw new XMLStreamException("Unexpected event type: " + event);
990           }
991       }
992     while (event != XMLStreamConstants.START_ELEMENT &&
993            event != XMLStreamConstants.END_ELEMENT);
994     return event;
995   }
996 
require(int type, String namespaceURI, String localName)997   public void require(int type, String namespaceURI, String localName)
998     throws XMLStreamException
999   {
1000     if (event != type)
1001       throw new XMLStreamException("Current event type is " + event);
1002     if (event == XMLStreamConstants.START_ELEMENT ||
1003         event == XMLStreamConstants.END_ELEMENT)
1004       {
1005         String ln = getLocalName();
1006         if (!ln.equals(localName))
1007           throw new XMLStreamException("Current local-name is " + ln);
1008         String uri = getNamespaceURI();
1009         if ((uri == null && namespaceURI != null) ||
1010             (uri != null && !uri.equals(namespaceURI)))
1011           throw new XMLStreamException("Current namespace URI is " + uri);
1012       }
1013   }
1014 
standaloneSet()1015   public boolean standaloneSet()
1016   {
1017     return (xmlStandalone != null);
1018   }
1019 
hasNext()1020   public boolean hasNext()
1021     throws XMLStreamException
1022   {
1023     return (event != XMLStreamConstants.END_DOCUMENT && event != -1);
1024   }
1025 
next()1026   public int next()
1027     throws XMLStreamException
1028   {
1029     if (event == XMLStreamConstants.END_ELEMENT)
1030       {
1031         // Pop namespace context
1032         if (namespaceAware && !namespaces.isEmpty())
1033           namespaces.removeFirst();
1034         // Pop base context
1035         if (baseAware && !bases.isEmpty())
1036           bases.removeFirst();
1037       }
1038     if (!startEntityStack.isEmpty())
1039       {
1040         String entityName = (String) startEntityStack.removeFirst();
1041         buf.setLength(0);
1042         buf.append(entityName);
1043         event = START_ENTITY;
1044         return extendedEventTypes ? event : next();
1045       }
1046     else if (!endEntityStack.isEmpty())
1047       {
1048         String entityName = (String) endEntityStack.removeFirst();
1049         buf.setLength(0);
1050         buf.append(entityName);
1051         event = END_ENTITY;
1052         return extendedEventTypes ? event : next();
1053       }
1054     try
1055       {
1056         if (!input.initialized)
1057           input.init();
1058         switch (state)
1059           {
1060           case CONTENT:
1061             if (tryRead(TEST_END_ELEMENT))
1062               {
1063                 readEndElement();
1064                 if (stack.isEmpty())
1065                   state = MISC;
1066                 event = XMLStreamConstants.END_ELEMENT;
1067               }
1068             else if (tryRead(TEST_COMMENT))
1069               {
1070                 readComment(false);
1071                 event = XMLStreamConstants.COMMENT;
1072               }
1073             else if (tryRead(TEST_PI))
1074               {
1075                 readPI(false);
1076                 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1077               }
1078             else if (tryRead(TEST_CDATA))
1079               {
1080                 readCDSect();
1081                 event = XMLStreamConstants.CDATA;
1082               }
1083             else if (tryRead(TEST_START_ELEMENT))
1084               {
1085                 state = readStartElement();
1086                 event = XMLStreamConstants.START_ELEMENT;
1087               }
1088             else
1089               {
1090                 // Check for character reference or predefined entity
1091                 mark(8);
1092                 int c = readCh();
1093                 if (c == 0x26) // '&'
1094                   {
1095                     c = readCh();
1096                     if (c == 0x23) // '#'
1097                       {
1098                         reset();
1099                         event = readCharData(null);
1100                       }
1101                     else
1102                       {
1103                         // entity reference
1104                         reset();
1105                         readCh(); // &
1106                         readReference();
1107                         String ref = buf.toString();
1108                         String text = (String) PREDEFINED_ENTITIES.get(ref);
1109                         if (text != null)
1110                           {
1111                             event = readCharData(text);
1112                           }
1113                         else if (replaceERefs && !isUnparsedEntity(ref))
1114                           {
1115                             // this will report a start-entity event
1116                             boolean external = false;
1117                             if (doctype != null)
1118                               {
1119                                 Object entity = doctype.getEntity(ref);
1120                                 if (entity instanceof ExternalIds)
1121                                   external = true;
1122                               }
1123                             expandEntity(ref, false, external);
1124                             event = next();
1125                           }
1126                         else
1127                           {
1128                             event = XMLStreamConstants.ENTITY_REFERENCE;
1129                           }
1130                       }
1131                   }
1132                 else
1133                   {
1134                     reset();
1135                     event = readCharData(null);
1136                     if (validating && doctype != null)
1137                       validatePCData(buf.toString());
1138                   }
1139               }
1140             break;
1141           case EMPTY_ELEMENT:
1142             String elementName = (String) stack.removeLast();
1143             buf.setLength(0);
1144             buf.append(elementName);
1145             state = stack.isEmpty() ? MISC : CONTENT;
1146             event = XMLStreamConstants.END_ELEMENT;
1147             if (validating && doctype != null)
1148               endElementValidationHook();
1149             break;
1150           case INIT: // XMLDecl?
1151             if (tryRead(TEST_XML_DECL))
1152               readXMLDecl();
1153             input.finalizeEncoding();
1154             event = XMLStreamConstants.START_DOCUMENT;
1155             state = PROLOG;
1156             break;
1157           case PROLOG: // Misc* (doctypedecl Misc*)?
1158             skipWhitespace();
1159             if (doctype == null && tryRead(TEST_DOCTYPE_DECL))
1160               {
1161                 readDoctypeDecl();
1162                 event = XMLStreamConstants.DTD;
1163               }
1164             else if (tryRead(TEST_COMMENT))
1165               {
1166                 readComment(false);
1167                 event = XMLStreamConstants.COMMENT;
1168               }
1169             else if (tryRead(TEST_PI))
1170               {
1171                 readPI(false);
1172                 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1173               }
1174             else if (tryRead(TEST_START_ELEMENT))
1175               {
1176                 state = readStartElement();
1177                 event = XMLStreamConstants.START_ELEMENT;
1178               }
1179             else
1180               {
1181                 int c = readCh();
1182                 error("no root element: U+" + Integer.toHexString(c));
1183               }
1184             break;
1185           case MISC: // Comment | PI | S
1186             skipWhitespace();
1187             if (tryRead(TEST_COMMENT))
1188               {
1189                 readComment(false);
1190                 event = XMLStreamConstants.COMMENT;
1191               }
1192             else if (tryRead(TEST_PI))
1193               {
1194                 readPI(false);
1195                 event = XMLStreamConstants.PROCESSING_INSTRUCTION;
1196               }
1197             else
1198               {
1199                 if (event == XMLStreamConstants.END_DOCUMENT)
1200                   throw new NoSuchElementException();
1201                 int c = readCh();
1202                 if (c != -1)
1203                   error("Only comments and PIs may appear after " +
1204                         "the root element");
1205                 event = XMLStreamConstants.END_DOCUMENT;
1206               }
1207             break;
1208           default:
1209             event = -1;
1210           }
1211         return event;
1212       }
1213     catch (IOException e)
1214       {
1215         XMLStreamException e2 = new XMLStreamException();
1216         e2.initCause(e);
1217         throw e2;
1218       }
1219   }
1220 
1221   // package private
1222 
1223   /**
1224    * Returns the current element name.
1225    */
getCurrentElement()1226   String getCurrentElement()
1227   {
1228     return (String) stack.getLast();
1229   }
1230 
1231   // private
1232 
mark(int limit)1233   private void mark(int limit)
1234     throws IOException
1235   {
1236     input.mark(limit);
1237   }
1238 
reset()1239   private void reset()
1240     throws IOException
1241   {
1242     input.reset();
1243   }
1244 
read()1245   private int read()
1246     throws IOException
1247   {
1248     return input.read();
1249   }
1250 
read(int[] b, int off, int len)1251   private int read(int[] b, int off, int len)
1252     throws IOException
1253   {
1254     return input.read(b, off, len);
1255   }
1256 
1257   /**
1258    * Parsed character read.
1259    */
readCh()1260   private int readCh()
1261     throws IOException, XMLStreamException
1262   {
1263     int c = read();
1264     if (expandPE && c == 0x25) // '%'
1265       {
1266         if (peIsError)
1267           error("PE reference within decl in internal subset.");
1268         expandPEReference();
1269         return readCh();
1270       }
1271     return c;
1272   }
1273 
1274   /**
1275    * Reads the next character, ensuring it is the character specified.
1276    * @param delim the character to match
1277    * @exception XMLStreamException if the next character is not the
1278    * specified one
1279    */
require(char delim)1280   private void require(char delim)
1281     throws IOException, XMLStreamException
1282   {
1283     mark(1);
1284     int c = readCh();
1285     if (delim != c)
1286       {
1287         reset();
1288         error("required character (got U+" + Integer.toHexString(c) + ")",
1289               new Character(delim));
1290       }
1291   }
1292 
1293   /**
1294    * Reads the next few characters, ensuring they match the string specified.
1295    * @param delim the string to match
1296    * @exception XMLStreamException if the next characters do not match the
1297    * specified string
1298    */
require(String delim)1299   private void require(String delim)
1300     throws IOException, XMLStreamException
1301   {
1302     char[] chars = delim.toCharArray();
1303     int len = chars.length;
1304     mark(len);
1305     int off = 0;
1306     do
1307       {
1308         int l2 = read(tmpBuf, off, len - off);
1309         if (l2 == -1)
1310           {
1311             reset();
1312             error("EOF before required string", delim);
1313           }
1314         off += l2;
1315       }
1316     while (off < len);
1317     for (int i = 0; i < chars.length; i++)
1318       {
1319         if (chars[i] != tmpBuf[i])
1320           {
1321             reset();
1322             error("required string", delim);
1323           }
1324       }
1325   }
1326 
1327   /**
1328    * Try to read a single character. On failure, reset the stream.
1329    * @param delim the character to test
1330    * @return true if the character matched delim, false otherwise.
1331    */
tryRead(char delim)1332   private boolean tryRead(char delim)
1333     throws IOException, XMLStreamException
1334   {
1335     mark(1);
1336     int c = readCh();
1337     if (delim != c)
1338       {
1339         reset();
1340         return false;
1341       }
1342     return true;
1343   }
1344 
1345   /**
1346    * Tries to read the specified characters.
1347    * If successful, the stream is positioned after the last character,
1348    * otherwise it is reset.
1349    * @param test the string to test
1350    * @return true if the characters matched the test string, false otherwise.
1351    */
tryRead(String test)1352   private boolean tryRead(String test)
1353     throws IOException
1354   {
1355     char[] chars = test.toCharArray();
1356     int len = chars.length;
1357     mark(len);
1358     int count = 0;
1359     int l2 = read(tmpBuf, 0, len);
1360     if (l2 == -1)
1361       {
1362         reset();
1363         return false;
1364       }
1365     count += l2;
1366     // check the characters we received first before doing additional reads
1367     for (int i = 0; i < count; i++)
1368       {
1369         if (chars[i] != tmpBuf[i])
1370           {
1371             reset();
1372             return false;
1373           }
1374       }
1375     while (count < len)
1376       {
1377         // force read
1378         int c = read();
1379         if (c == -1)
1380           {
1381             reset();
1382             return false;
1383           }
1384         tmpBuf[count] = (char) c;
1385         // check each character as it is read
1386         if (chars[count] != tmpBuf[count])
1387           {
1388             reset();
1389             return false;
1390           }
1391         count++;
1392       }
1393     return true;
1394   }
1395 
1396   /**
1397    * Reads characters until the specified test string is encountered.
1398    * @param delim the string delimiting the end of the characters
1399    */
readUntil(String delim)1400   private void readUntil(String delim)
1401     throws IOException, XMLStreamException
1402   {
1403     int startLine = input.line;
1404     try
1405       {
1406         while (!tryRead(delim))
1407           {
1408             int c = readCh();
1409             if (c == -1)
1410               throw new EOFException();
1411             else if (input.xml11)
1412               {
1413                 if (!isXML11Char(c) || isXML11RestrictedChar(c))
1414                   error("illegal XML 1.1 character",
1415                         "U+" + Integer.toHexString(c));
1416               }
1417             else if (!isChar(c))
1418               error("illegal XML character",
1419                     "U+" + Integer.toHexString(c));
1420             buf.append(Character.toChars(c));
1421           }
1422       }
1423     catch (EOFException e)
1424       {
1425         error("end of input while looking for delimiter "+
1426               "(started on line " + startLine + ')', delim);
1427       }
1428   }
1429 
1430   /**
1431    * Reads any whitespace characters.
1432    * @return true if whitespace characters were read, false otherwise
1433    */
tryWhitespace()1434   private boolean tryWhitespace()
1435     throws IOException, XMLStreamException
1436   {
1437     boolean white;
1438     boolean ret = false;
1439     do
1440       {
1441         mark(1);
1442         int c = readCh();
1443         while (c == -1 && inputStack.size() > 1)
1444           {
1445             popInput();
1446             c = readCh();
1447           }
1448         white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1449         if (white)
1450           ret = true;
1451       }
1452     while (white);
1453     reset();
1454     return ret;
1455   }
1456 
1457   /**
1458    * Skip over any whitespace characters.
1459    */
skipWhitespace()1460   private void skipWhitespace()
1461     throws IOException, XMLStreamException
1462   {
1463     boolean white;
1464     do
1465       {
1466         mark(1);
1467         int c = readCh();
1468         while (c == -1 && inputStack.size() > 1)
1469           {
1470             popInput();
1471             c = readCh();
1472           }
1473         white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
1474       }
1475     while (white);
1476     reset();
1477   }
1478 
1479   /**
1480    * Try to read as many whitespace characters as are available.
1481    * @exception XMLStreamException if no whitespace characters were seen
1482    */
requireWhitespace()1483   private void requireWhitespace()
1484     throws IOException, XMLStreamException
1485   {
1486     if (!tryWhitespace())
1487       error("whitespace required");
1488   }
1489 
1490   /**
1491    * Returns the current base URI for resolving external entities.
1492    */
getXMLBase()1493   String getXMLBase()
1494   {
1495     if (baseAware)
1496       {
1497         for (Iterator i = bases.iterator(); i.hasNext(); )
1498           {
1499             String base = (String) i.next();
1500             if (base != null)
1501               return base;
1502           }
1503       }
1504     return input.systemId;
1505   }
1506 
1507   /**
1508    * Push the specified text input source.
1509    */
pushInput(String name, String text, boolean report, boolean normalize)1510   private void pushInput(String name, String text, boolean report,
1511                          boolean normalize)
1512     throws IOException, XMLStreamException
1513   {
1514     // Check for recursion
1515     if (name != null && !"".equals(name))
1516       {
1517         for (Iterator i = inputStack.iterator(); i.hasNext(); )
1518           {
1519             Input ctx = (Input) i.next();
1520             if (name.equals(ctx.name))
1521               error("entities may not be self-recursive", name);
1522           }
1523       }
1524     else
1525       report = false;
1526     pushInput(new Input(null, new StringReader(text), input.publicId,
1527                         input.systemId, name, input.inputEncoding, report,
1528                         normalize));
1529   }
1530 
1531   /**
1532    * Push the specified external input source.
1533    */
pushInput(String name, ExternalIds ids, boolean report, boolean normalize)1534   private void pushInput(String name, ExternalIds ids, boolean report,
1535                          boolean normalize)
1536     throws IOException, XMLStreamException
1537   {
1538     if (!externalEntities)
1539       return;
1540     String url = canonicalize(absolutize(input.systemId, ids.systemId));
1541     // Check for recursion
1542     for (Iterator i = inputStack.iterator(); i.hasNext(); )
1543       {
1544         Input ctx = (Input) i.next();
1545         if (url.equals(ctx.systemId))
1546           error("entities may not be self-recursive", url);
1547         if (name != null && !"".equals(name) && name.equals(ctx.name))
1548           error("entities may not be self-recursive", name);
1549       }
1550     if (name == null || "".equals(name))
1551       report = false;
1552     InputStream in = null;
1553     if (resolver != null)
1554       {
1555         Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(),
1556                                             null);
1557         if (obj instanceof InputStream)
1558           in = (InputStream) obj;
1559       }
1560     if (in == null)
1561       in = resolve(url);
1562     if (in == null)
1563       error("unable to resolve external entity",
1564             (ids.systemId != null) ? ids.systemId : ids.publicId);
1565     pushInput(new Input(in, null, ids.publicId, url, name, null, report,
1566                         normalize));
1567     input.init();
1568     if (tryRead(TEST_XML_DECL))
1569       readTextDecl();
1570     input.finalizeEncoding();
1571   }
1572 
1573   /**
1574    * Push the specified input source (general entity) onto the input stack.
1575    */
pushInput(Input input)1576   private void pushInput(Input input)
1577   {
1578     if (input.report)
1579       startEntityStack.addFirst(input.name);
1580     inputStack.addLast(input);
1581     if (this.input != null)
1582       input.xml11 = this.input.xml11;
1583     this.input = input;
1584   }
1585 
1586   /**
1587    * Returns a canonicalized version of the specified URL.
1588    * This is largely to work around a problem with the specification of
1589    * file URLs.
1590    */
canonicalize(String url)1591   static String canonicalize(String url)
1592   {
1593     if (url == null)
1594       return null;
1595     if (url.startsWith("file:") && !url.startsWith("file://"))
1596       url = "file://" + url.substring(5);
1597     return url;
1598   }
1599 
1600   /**
1601    * "Absolutize" a URL. This resolves a relative URL into an absolute one.
1602    * @param base the current base URL
1603    * @param href the (absolute or relative) URL to resolve
1604    */
absolutize(String base, String href)1605   public static String absolutize(String base, String href)
1606   {
1607     if (href == null)
1608       return null;
1609     int ci = href.indexOf(':');
1610     if (ci > 1 && isURLScheme(href.substring(0, ci)))
1611       {
1612         // href is absolute already
1613         return href;
1614       }
1615     if (base == null)
1616       base = "";
1617     else
1618       {
1619         int i = base.lastIndexOf('/');
1620         if (i != -1)
1621           base = base.substring(0, i + 1);
1622         else
1623           base = "";
1624       }
1625     if ("".equals(base))
1626       {
1627         // assume file URL relative to current directory
1628         base = System.getProperty("user.dir");
1629         if (base.charAt(0) == '/')
1630           base = base.substring(1);
1631         base = "file:///" + base.replace(File.separatorChar, '/');
1632         if (!base.endsWith("/"))
1633           base += "/";
1634       }
1635     // We can't use java.net.URL here to do the parsing, as it searches for
1636     // a protocol handler. A protocol handler may not be registered for the
1637     // URL scheme here. Do it manually.
1638     //
1639     // Set aside scheme and host portion of base URL
1640     String basePrefix = null;
1641     ci = base.indexOf(':');
1642     if (ci > 1 && isURLScheme(base.substring(0, ci)))
1643       {
1644           if (base.length() > (ci + 3)  &&
1645               base.charAt(ci + 1) == '/' &&
1646               base.charAt(ci + 2) == '/')
1647             {
1648               int si = base.indexOf('/', ci + 3);
1649               if (si == -1)
1650                 base = null;
1651               else
1652                 {
1653                   basePrefix = base.substring(0, si);
1654                   base = base.substring(si);
1655                 }
1656             }
1657           else
1658             base = null;
1659       }
1660     if (base == null) // unknown or malformed base URL, use href
1661       return href;
1662     if (href.startsWith("/")) // absolute href pathname
1663       return (basePrefix == null) ? href : basePrefix + href;
1664     // relative href pathname
1665     if (!base.endsWith("/"))
1666       {
1667         int lsi = base.lastIndexOf('/');
1668         if (lsi == -1)
1669           base = "/";
1670         else
1671           base = base.substring(0, lsi + 1);
1672       }
1673     while (href.startsWith("../") || href.startsWith("./"))
1674       {
1675         if (href.startsWith("../"))
1676           {
1677             // strip last path component from base
1678             int lsi = base.lastIndexOf('/', base.length() - 2);
1679             if (lsi > -1)
1680               base = base.substring(0, lsi + 1);
1681             href = href.substring(3); // strip ../ prefix
1682           }
1683         else
1684           {
1685             href = href.substring(2); // strip ./ prefix
1686           }
1687       }
1688     return (basePrefix == null) ? base + href : basePrefix + base + href;
1689   }
1690 
1691   /**
1692    * Indicates whether the specified characters match the scheme portion of
1693    * a URL.
1694    * @see RFC 1738 section 2.1
1695    */
isURLScheme(String text)1696   private static boolean isURLScheme(String text)
1697   {
1698     int len = text.length();
1699     for (int i = 0; i < len; i++)
1700       {
1701         char c = text.charAt(i);
1702         if (c == '+' || c == '.' || c == '-')
1703           continue;
1704         if (c < 65 || (c > 90 && c < 97) || c > 122)
1705           return false;
1706       }
1707     return true;
1708   }
1709 
1710   /**
1711    * Returns an input stream for the given URL.
1712    */
resolve(String url)1713   static InputStream resolve(String url)
1714     throws IOException
1715   {
1716     try
1717       {
1718         return new URL(url).openStream();
1719       }
1720     catch (MalformedURLException e)
1721       {
1722         return null;
1723       }
1724     catch (IOException e)
1725       {
1726         IOException e2 = new IOException("error resolving " + url);
1727         e2.initCause(e);
1728         throw e2;
1729       }
1730   }
1731 
1732   /**
1733    * Pops the current input source (general entity) off the stack.
1734    */
popInput()1735   private void popInput()
1736   {
1737     Input old = (Input) inputStack.removeLast();
1738     if (old.report)
1739       endEntityStack.addFirst(old.name);
1740     input = (Input) inputStack.getLast();
1741   }
1742 
1743   /**
1744    * Parse an entity text declaration.
1745    */
readTextDecl()1746   private void readTextDecl()
1747     throws IOException, XMLStreamException
1748   {
1749     final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1750     requireWhitespace();
1751     if (tryRead("version"))
1752       {
1753         readEq();
1754         String v = readLiteral(flags, false);
1755         if ("1.0".equals(v))
1756           input.xml11 = false;
1757         else if ("1.1".equals(v))
1758           {
1759             Input i1 = (Input) inputStack.getFirst();
1760             if (!i1.xml11)
1761               error("external entity specifies later version number");
1762             input.xml11 = true;
1763           }
1764         else
1765           throw new XMLStreamException("illegal XML version: " + v);
1766         requireWhitespace();
1767       }
1768     require("encoding");
1769     readEq();
1770     String enc = readLiteral(flags, false);
1771     skipWhitespace();
1772     require("?>");
1773     input.setInputEncoding(enc);
1774   }
1775 
1776   /**
1777    * Parse the XML declaration.
1778    */
readXMLDecl()1779   private void readXMLDecl()
1780     throws IOException, XMLStreamException
1781   {
1782     final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
1783 
1784     requireWhitespace();
1785     require("version");
1786     readEq();
1787     xmlVersion = readLiteral(flags, false);
1788     if ("1.0".equals(xmlVersion))
1789       input.xml11 = false;
1790     else if ("1.1".equals(xmlVersion))
1791       input.xml11 = true;
1792     else
1793       throw new XMLStreamException("illegal XML version: " + xmlVersion);
1794 
1795     boolean white = tryWhitespace();
1796 
1797     if (tryRead("encoding"))
1798       {
1799         if (!white)
1800           error("whitespace required before 'encoding='");
1801         readEq();
1802         xmlEncoding = readLiteral(flags, false);
1803         white = tryWhitespace();
1804       }
1805 
1806     if (tryRead("standalone"))
1807       {
1808         if (!white)
1809           error("whitespace required before 'standalone='");
1810         readEq();
1811         String standalone = readLiteral(flags, false);
1812         if ("yes".equals(standalone))
1813           xmlStandalone = Boolean.TRUE;
1814         else if ("no".equals(standalone))
1815           xmlStandalone = Boolean.FALSE;
1816         else
1817           error("standalone flag must be 'yes' or 'no'", standalone);
1818       }
1819 
1820     skipWhitespace();
1821     require("?>");
1822     if (xmlEncoding != null)
1823       input.setInputEncoding(xmlEncoding);
1824   }
1825 
1826   /**
1827    * Parse the DOCTYPE declaration.
1828    */
readDoctypeDecl()1829   private void readDoctypeDecl()
1830     throws IOException, XMLStreamException
1831   {
1832     if (!supportDTD)
1833       error("parser was configured not to support DTDs");
1834     requireWhitespace();
1835     String rootName = readNmtoken(true);
1836     skipWhitespace();
1837     ExternalIds ids = readExternalIds(false, true);
1838     doctype =
1839       this.new Doctype(rootName, ids.publicId, ids.systemId);
1840 
1841     // Parse internal subset first
1842     skipWhitespace();
1843     if (tryRead('['))
1844       {
1845         while (true)
1846           {
1847             expandPE = true;
1848             skipWhitespace();
1849             expandPE = false;
1850             if (tryRead(']'))
1851               break;
1852             else
1853               readMarkupdecl(false);
1854           }
1855       }
1856     skipWhitespace();
1857     require('>');
1858 
1859     // Parse external subset
1860     if (ids.systemId != null && externalEntities)
1861       {
1862         pushInput("", ">", false, false);
1863         pushInput("[dtd]", ids, true, true);
1864         // loop until we get back to ">"
1865         while (true)
1866           {
1867             expandPE = true;
1868             skipWhitespace();
1869             expandPE = false;
1870             mark(1);
1871             int c = readCh();
1872             if (c == 0x3e) // '>'
1873               break;
1874             else if (c == -1)
1875               popInput();
1876             else
1877               {
1878                 reset();
1879                 expandPE = true;
1880                 readMarkupdecl(true);
1881                 expandPE = true;
1882               }
1883           }
1884         if (inputStack.size() != 2)
1885           error("external subset has unmatched '>'");
1886         popInput();
1887       }
1888     checkDoctype();
1889     if (validating)
1890       validateDoctype();
1891 
1892     // Make rootName available for reading
1893     buf.setLength(0);
1894     buf.append(rootName);
1895   }
1896 
1897   /**
1898    * Checks the well-formedness of the DTD.
1899    */
checkDoctype()1900   private void checkDoctype()
1901     throws XMLStreamException
1902   {
1903     // TODO check entity recursion
1904   }
1905 
1906   /**
1907    * Parse the markupdecl production.
1908    */
readMarkupdecl(boolean inExternalSubset)1909   private void readMarkupdecl(boolean inExternalSubset)
1910     throws IOException, XMLStreamException
1911   {
1912     boolean saved = expandPE;
1913     mark(1);
1914     require('<');
1915     reset();
1916     expandPE = false;
1917     if (tryRead(TEST_ELEMENT_DECL))
1918       {
1919         expandPE = saved;
1920         readElementDecl();
1921       }
1922     else if (tryRead(TEST_ATTLIST_DECL))
1923       {
1924         expandPE = saved;
1925         readAttlistDecl();
1926       }
1927     else if (tryRead(TEST_ENTITY_DECL))
1928       {
1929         expandPE = saved;
1930         readEntityDecl(inExternalSubset);
1931       }
1932     else if (tryRead(TEST_NOTATION_DECL))
1933       {
1934         expandPE = saved;
1935         readNotationDecl(inExternalSubset);
1936       }
1937     else if (tryRead(TEST_PI))
1938       {
1939         readPI(true);
1940         expandPE = saved;
1941       }
1942     else if (tryRead(TEST_COMMENT))
1943       {
1944         readComment(true);
1945         expandPE = saved;
1946       }
1947     else if (tryRead("<!["))
1948       {
1949         // conditional section
1950         expandPE = saved;
1951         if (inputStack.size() < 2)
1952           error("conditional sections illegal in internal subset");
1953         skipWhitespace();
1954         if (tryRead("INCLUDE"))
1955           {
1956             skipWhitespace();
1957             require('[');
1958             skipWhitespace();
1959             while (!tryRead("]]>"))
1960               {
1961                 readMarkupdecl(inExternalSubset);
1962                 skipWhitespace();
1963               }
1964           }
1965         else if (tryRead("IGNORE"))
1966           {
1967             skipWhitespace();
1968             require('[');
1969             expandPE = false;
1970             for (int nesting = 1; nesting > 0; )
1971               {
1972                 int c = readCh();
1973                 switch (c)
1974                   {
1975                   case 0x3c: // '<'
1976                     if (tryRead("!["))
1977                       nesting++;
1978                     break;
1979                   case 0x5d: // ']'
1980                     if (tryRead("]>"))
1981                       nesting--;
1982                     break;
1983                   case -1:
1984                     throw new EOFException();
1985                   }
1986               }
1987             expandPE = saved;
1988           }
1989         else
1990           error("conditional section must begin with INCLUDE or IGNORE");
1991       }
1992     else
1993       error("expected markup declaration");
1994   }
1995 
1996   /**
1997    * Parse the elementdecl production.
1998    */
readElementDecl()1999   private void readElementDecl()
2000     throws IOException, XMLStreamException
2001   {
2002     requireWhitespace();
2003     boolean saved = expandPE;
2004     expandPE = (inputStack.size() > 1);
2005     String name = readNmtoken(true);
2006     expandPE = saved;
2007     requireWhitespace();
2008     readContentspec(name);
2009     skipWhitespace();
2010     require('>');
2011   }
2012 
2013   /**
2014    * Parse the contentspec production.
2015    */
readContentspec(String elementName)2016   private void readContentspec(String elementName)
2017     throws IOException, XMLStreamException
2018   {
2019     if (tryRead("EMPTY"))
2020       doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
2021     else if (tryRead("ANY"))
2022       doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
2023     else
2024       {
2025         ContentModel model;
2026         CPStringBuilder acc = new CPStringBuilder();
2027         require('(');
2028         acc.append('(');
2029         skipWhitespace();
2030         if (tryRead("#PCDATA"))
2031           {
2032             // mixed content
2033             acc.append("#PCDATA");
2034             MixedContentModel mm = new MixedContentModel();
2035             model = mm;
2036             skipWhitespace();
2037             if (tryRead(')'))
2038               {
2039                 acc.append(")");
2040                 if (tryRead('*'))
2041                   {
2042                     mm.min = 0;
2043                     mm.max = -1;
2044                   }
2045               }
2046             else
2047               {
2048                 while (!tryRead(")"))
2049                   {
2050                     require('|');
2051                     acc.append('|');
2052                     skipWhitespace();
2053                     String name = readNmtoken(true);
2054                     acc.append(name);
2055                     mm.addName(name);
2056                     skipWhitespace();
2057                   }
2058                 require('*');
2059                 acc.append(")*");
2060                 mm.min = 0;
2061                 mm.max = -1;
2062               }
2063           }
2064         else
2065           model = readElements(acc);
2066         doctype.addElementDecl(elementName, acc.toString(), model);
2067       }
2068   }
2069 
2070   /**
2071    * Parses an element content model.
2072    */
readElements(CPStringBuilder acc)2073   private ElementContentModel readElements(CPStringBuilder acc)
2074     throws IOException, XMLStreamException
2075   {
2076     int separator;
2077     ElementContentModel model = new ElementContentModel();
2078 
2079     // Parse first content particle
2080     skipWhitespace();
2081     model.addContentParticle(readContentParticle(acc));
2082     // End or separator
2083     skipWhitespace();
2084     int c = readCh();
2085     switch (c)
2086       {
2087       case 0x29: // ')'
2088         acc.append(')');
2089         mark(1);
2090         c = readCh();
2091         switch (c)
2092           {
2093           case 0x3f: // '?'
2094             acc.append('?');
2095             model.min = 0;
2096             model.max = 1;
2097             break;
2098           case 0x2a: // '*'
2099             acc.append('*');
2100             model.min = 0;
2101             model.max = -1;
2102             break;
2103           case 0x2b: // '+'
2104             acc.append('+');
2105             model.min = 1;
2106             model.max = -1;
2107             break;
2108           default:
2109             reset();
2110           }
2111         return model; // done
2112       case 0x7c: // '|'
2113         model.or = true;
2114         // fall through
2115       case 0x2c: // ','
2116         separator = c;
2117         acc.append(Character.toChars(c));
2118         break;
2119       default:
2120         error("bad separator in content model",
2121               "U+" + Integer.toHexString(c));
2122         return model;
2123       }
2124     // Parse subsequent content particles
2125     while (true)
2126       {
2127         skipWhitespace();
2128         model.addContentParticle(readContentParticle(acc));
2129         skipWhitespace();
2130         c = readCh();
2131         if (c == 0x29) // ')'
2132           {
2133             acc.append(')');
2134             break;
2135           }
2136         else if (c != separator)
2137           {
2138             error("bad separator in content model",
2139                   "U+" + Integer.toHexString(c));
2140             return model;
2141           }
2142         else
2143           acc.append(c);
2144       }
2145     // Check for occurrence indicator
2146     mark(1);
2147     c = readCh();
2148     switch (c)
2149       {
2150       case 0x3f: // '?'
2151         acc.append('?');
2152         model.min = 0;
2153         model.max = 1;
2154         break;
2155       case 0x2a: // '*'
2156         acc.append('*');
2157         model.min = 0;
2158         model.max = -1;
2159         break;
2160       case 0x2b: // '+'
2161         acc.append('+');
2162         model.min = 1;
2163         model.max = -1;
2164         break;
2165       default:
2166         reset();
2167       }
2168     return model;
2169   }
2170 
2171   /**
2172    * Parse a cp production.
2173    */
readContentParticle(CPStringBuilder acc)2174   private ContentParticle readContentParticle(CPStringBuilder acc)
2175     throws IOException, XMLStreamException
2176   {
2177     ContentParticle cp = new ContentParticle();
2178     if (tryRead('('))
2179       {
2180         acc.append('(');
2181         cp.content = readElements(acc);
2182       }
2183     else
2184       {
2185         String name = readNmtoken(true);
2186         acc.append(name);
2187         cp.content = name;
2188         mark(1);
2189         int c = readCh();
2190         switch (c)
2191           {
2192           case 0x3f: // '?'
2193             acc.append('?');
2194             cp.min = 0;
2195             cp.max = 1;
2196             break;
2197           case 0x2a: // '*'
2198             acc.append('*');
2199             cp.min = 0;
2200             cp.max = -1;
2201             break;
2202           case 0x2b: // '+'
2203             acc.append('+');
2204             cp.min = 1;
2205             cp.max = -1;
2206             break;
2207           default:
2208             reset();
2209           }
2210       }
2211     return cp;
2212   }
2213 
2214   /**
2215    * Parse an attribute-list definition.
2216    */
readAttlistDecl()2217   private void readAttlistDecl()
2218     throws IOException, XMLStreamException
2219   {
2220     requireWhitespace();
2221     boolean saved = expandPE;
2222     expandPE = (inputStack.size() > 1);
2223     String elementName = readNmtoken(true);
2224     expandPE = saved;
2225     boolean white = tryWhitespace();
2226     while (!tryRead('>'))
2227       {
2228         if (!white)
2229           error("whitespace required before attribute definition");
2230         readAttDef(elementName);
2231         white = tryWhitespace();
2232       }
2233   }
2234 
2235   /**
2236    * Parse a single attribute definition.
2237    */
readAttDef(String elementName)2238   private void readAttDef(String elementName)
2239     throws IOException, XMLStreamException
2240   {
2241     String name = readNmtoken(true);
2242     requireWhitespace();
2243     CPStringBuilder acc = new CPStringBuilder();
2244     HashSet values = new HashSet();
2245     String type = readAttType(acc, values);
2246     if (validating)
2247       {
2248         if ("ID".equals(type))
2249           {
2250             // VC: One ID per Element Type
2251             for (Iterator i = doctype.attlistIterator(elementName);
2252                  i.hasNext(); )
2253               {
2254                 Map.Entry entry = (Map.Entry) i.next();
2255                 AttributeDecl decl = (AttributeDecl) entry.getValue();
2256                 if ("ID".equals(decl.type))
2257                   error("element types must not have more than one ID " +
2258                         "attribute");
2259               }
2260           }
2261         else if ("NOTATION".equals(type))
2262           {
2263             // VC: One Notation Per Element Type
2264             for (Iterator i = doctype.attlistIterator(elementName);
2265                  i.hasNext(); )
2266               {
2267                 Map.Entry entry = (Map.Entry) i.next();
2268                 AttributeDecl decl = (AttributeDecl) entry.getValue();
2269                 if ("NOTATION".equals(decl.type))
2270                   error("element types must not have more than one NOTATION " +
2271                         "attribute");
2272               }
2273             // VC: No Notation on Empty Element
2274             ContentModel model = doctype.getElementModel(elementName);
2275             if (model != null && model.type == ContentModel.EMPTY)
2276               error("attributes of type NOTATION must not be declared on an " +
2277                     "element declared EMPTY");
2278           }
2279       }
2280     String enumer = null;
2281     if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
2282       enumer = acc.toString();
2283     else
2284       values = null;
2285     requireWhitespace();
2286     readDefault(elementName, name, type, enumer, values);
2287   }
2288 
2289   /**
2290    * Parse an attribute type.
2291    */
readAttType(CPStringBuilder acc, HashSet values)2292   private String readAttType(CPStringBuilder acc, HashSet values)
2293     throws IOException, XMLStreamException
2294   {
2295     if (tryRead('('))
2296       {
2297         readEnumeration(false, acc, values);
2298         return "ENUMERATION";
2299       }
2300     else
2301       {
2302         String typeString = readNmtoken(true);
2303         if ("NOTATION".equals(typeString))
2304           {
2305             readNotationType(acc, values);
2306             return typeString;
2307           }
2308         else if ("CDATA".equals(typeString) ||
2309                  "ID".equals(typeString) ||
2310                  "IDREF".equals(typeString) ||
2311                  "IDREFS".equals(typeString) ||
2312                  "ENTITY".equals(typeString) ||
2313                  "ENTITIES".equals(typeString) ||
2314                  "NMTOKEN".equals(typeString) ||
2315                  "NMTOKENS".equals(typeString))
2316           return typeString;
2317         else
2318           {
2319             error("illegal attribute type", typeString);
2320             return null;
2321           }
2322       }
2323   }
2324 
2325   /**
2326    * Parse an enumeration.
2327    */
readEnumeration(boolean isNames, CPStringBuilder acc, HashSet values)2328   private void readEnumeration(boolean isNames, CPStringBuilder acc,
2329                                HashSet values)
2330     throws IOException, XMLStreamException
2331   {
2332     acc.append('(');
2333     // first token
2334     skipWhitespace();
2335     String token = readNmtoken(isNames);
2336     acc.append(token);
2337     values.add(token);
2338     // subsequent tokens
2339     skipWhitespace();
2340     while (!tryRead(')'))
2341       {
2342         require('|');
2343         acc.append('|');
2344         skipWhitespace();
2345         token = readNmtoken(isNames);
2346         // VC: No Duplicate Tokens
2347         if (validating && values.contains(token))
2348           error("duplicate token", token);
2349         acc.append(token);
2350         values.add(token);
2351         skipWhitespace();
2352       }
2353     acc.append(')');
2354   }
2355 
2356   /**
2357    * Parse a notation type for an attribute.
2358    */
readNotationType(CPStringBuilder acc, HashSet values)2359   private void readNotationType(CPStringBuilder acc, HashSet values)
2360     throws IOException, XMLStreamException
2361   {
2362     requireWhitespace();
2363     require('(');
2364     readEnumeration(true, acc, values);
2365   }
2366 
2367   /**
2368    * Parse the default value for an attribute.
2369    */
readDefault(String elementName, String name, String type, String enumeration, HashSet values)2370   private void readDefault(String elementName, String name,
2371                            String type, String enumeration, HashSet values)
2372     throws IOException, XMLStreamException
2373   {
2374     int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
2375     int flags = LIT_ATTRIBUTE;
2376     String value = null, defaultType = null;
2377     boolean saved = expandPE;
2378 
2379     if (!"CDATA".equals(type))
2380       flags |= LIT_NORMALIZE;
2381 
2382     expandPE = false;
2383     if (tryRead('#'))
2384       {
2385         if (tryRead("FIXED"))
2386           {
2387             defaultType = "#FIXED";
2388             valueType = ATTRIBUTE_DEFAULT_FIXED;
2389             requireWhitespace();
2390             value = readLiteral(flags, false);
2391           }
2392         else if (tryRead("REQUIRED"))
2393           {
2394             defaultType = "#REQUIRED";
2395             valueType = ATTRIBUTE_DEFAULT_REQUIRED;
2396           }
2397         else if (tryRead("IMPLIED"))
2398           {
2399             defaultType = "#IMPLIED";
2400             valueType = ATTRIBUTE_DEFAULT_IMPLIED;
2401           }
2402         else
2403           error("illegal keyword for attribute default value");
2404       }
2405     else
2406       value = readLiteral(flags, false);
2407     expandPE = saved;
2408     if (validating)
2409       {
2410         if ("ID".equals(type))
2411           {
2412             // VC: Attribute Default Value Syntactically Correct
2413             if (value != null && !isNmtoken(value, true))
2414               error("default value must match Name production", value);
2415             // VC: ID Attribute Default
2416             if (valueType != ATTRIBUTE_DEFAULT_REQUIRED &&
2417                 valueType != ATTRIBUTE_DEFAULT_IMPLIED)
2418               error("ID attributes must have a declared default of " +
2419                     "#IMPLIED or #REQUIRED");
2420           }
2421         else if (value != null)
2422           {
2423             // VC: Attribute Default Value Syntactically Correct
2424             if ("IDREF".equals(type) || "ENTITY".equals(type))
2425               {
2426                 if (!isNmtoken(value, true))
2427                   error("default value must match Name production", value);
2428               }
2429             else if ("IDREFS".equals(type) || "ENTITIES".equals(type))
2430               {
2431                 StringTokenizer st = new StringTokenizer(value);
2432                 while (st.hasMoreTokens())
2433                   {
2434                     String token = st.nextToken();
2435                     if (!isNmtoken(token, true))
2436                       error("default value must match Name production", token);
2437                   }
2438               }
2439             else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type))
2440               {
2441                 if (!isNmtoken(value, false))
2442                   error("default value must match Nmtoken production", value);
2443               }
2444             else if ("NMTOKENS".equals(type))
2445               {
2446                 StringTokenizer st = new StringTokenizer(value);
2447                 while (st.hasMoreTokens())
2448                   {
2449                     String token = st.nextToken();
2450                     if (!isNmtoken(token, false))
2451                       error("default value must match Nmtoken production",
2452                             token);
2453                   }
2454               }
2455           }
2456       }
2457     // Register attribute def
2458     AttributeDecl attribute =
2459       new AttributeDecl(type, value, valueType, enumeration, values,
2460                         inputStack.size() != 1);
2461     doctype.addAttributeDecl(elementName, name, attribute);
2462   }
2463 
2464   /**
2465    * Parse the EntityDecl production.
2466    */
readEntityDecl(boolean inExternalSubset)2467   private void readEntityDecl(boolean inExternalSubset)
2468     throws IOException, XMLStreamException
2469   {
2470     int flags = 0;
2471     // Check if parameter entity
2472     boolean peFlag = false;
2473     expandPE = false;
2474     requireWhitespace();
2475     if (tryRead('%'))
2476       {
2477         peFlag = true;
2478         requireWhitespace();
2479       }
2480     expandPE = true;
2481     // Read entity name
2482     String name = readNmtoken(true);
2483     if (name.indexOf(':') != -1)
2484       error("illegal character ':' in entity name", name);
2485     if (peFlag)
2486       name = "%" + name;
2487     requireWhitespace();
2488     mark(1);
2489     int c = readCh();
2490     reset();
2491     if (c == 0x22 || c == 0x27) // " | '
2492       {
2493         // Internal entity replacement text
2494         String value = readLiteral(flags | LIT_DISABLE_EREF, true);
2495         int ai = value.indexOf('&');
2496         while (ai != -1)
2497           {
2498             int sci = value.indexOf(';', ai);
2499             if (sci == -1)
2500               error("malformed reference in entity value", value);
2501             String ref = value.substring(ai + 1, sci);
2502             int[] cp = UnicodeReader.toCodePointArray(ref);
2503             if (cp.length == 0)
2504               error("malformed reference in entity value", value);
2505             if (cp[0] == 0x23) // #
2506               {
2507                 if (cp.length == 1)
2508                   error("malformed reference in entity value", value);
2509                 if (cp[1] == 0x78) // 'x'
2510                   {
2511                     if (cp.length == 2)
2512                       error("malformed reference in entity value", value);
2513                     for (int i = 2; i < cp.length; i++)
2514                       {
2515                         int x = cp[i];
2516                         if (x < 0x30 ||
2517                             (x > 0x39 && x < 0x41) ||
2518                             (x > 0x46 && x < 0x61) ||
2519                             x > 0x66)
2520                           error("malformed character reference in entity value",
2521                                 value);
2522                       }
2523                   }
2524                 else
2525                   {
2526                     for (int i = 1; i < cp.length; i++)
2527                       {
2528                         int x = cp[i];
2529                         if (x < 0x30 || x > 0x39)
2530                           error("malformed character reference in entity value",
2531                                 value);
2532                       }
2533                   }
2534               }
2535             else
2536               {
2537                 if (!isNameStartCharacter(cp[0], input.xml11))
2538                   error("malformed reference in entity value", value);
2539                 for (int i = 1; i < cp.length; i++)
2540                   {
2541                     if (!isNameCharacter(cp[i], input.xml11))
2542                       error("malformed reference in entity value", value);
2543                   }
2544               }
2545             ai = value.indexOf('&', sci);
2546           }
2547         doctype.addEntityDecl(name, value, inExternalSubset);
2548       }
2549     else
2550       {
2551         ExternalIds ids = readExternalIds(false, false);
2552         // Check for NDATA
2553         boolean white = tryWhitespace();
2554         if (!peFlag && tryRead("NDATA"))
2555           {
2556             if (!white)
2557               error("whitespace required before NDATA");
2558             requireWhitespace();
2559             ids.notationName = readNmtoken(true);
2560           }
2561         doctype.addEntityDecl(name, ids, inExternalSubset);
2562       }
2563     // finish
2564     skipWhitespace();
2565     require('>');
2566   }
2567 
2568   /**
2569    * Parse the NotationDecl production.
2570    */
readNotationDecl(boolean inExternalSubset)2571   private void readNotationDecl(boolean inExternalSubset)
2572     throws IOException, XMLStreamException
2573   {
2574     requireWhitespace();
2575     String notationName = readNmtoken(true);
2576     if (notationName.indexOf(':') != -1)
2577       error("illegal character ':' in notation name", notationName);
2578     if (validating)
2579       {
2580         // VC: Unique Notation Name
2581         ExternalIds notation = doctype.getNotation(notationName);
2582         if (notation != null)
2583           error("duplicate notation name", notationName);
2584       }
2585     requireWhitespace();
2586     ExternalIds ids = readExternalIds(true, false);
2587     ids.notationName = notationName;
2588     doctype.addNotationDecl(notationName, ids, inExternalSubset);
2589     skipWhitespace();
2590     require('>');
2591   }
2592 
2593   /**
2594    * Returns a tuple {publicId, systemId}.
2595    */
readExternalIds(boolean inNotation, boolean isSubset)2596   private ExternalIds readExternalIds(boolean inNotation, boolean isSubset)
2597     throws IOException, XMLStreamException
2598   {
2599     int c;
2600     int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
2601     ExternalIds ids = new ExternalIds();
2602 
2603     if (tryRead("PUBLIC"))
2604       {
2605         requireWhitespace();
2606         ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false);
2607         if (inNotation)
2608           {
2609             skipWhitespace();
2610             mark(1);
2611             c = readCh();
2612             reset();
2613             if (c == 0x22 || c == 0x27) // " | '
2614               {
2615                 String href = readLiteral(flags, false);
2616                 ids.systemId = absolutize(input.systemId, href);
2617               }
2618           }
2619         else
2620           {
2621             requireWhitespace();
2622             String href = readLiteral(flags, false);
2623             ids.systemId = absolutize(input.systemId, href);
2624           }
2625         // Check valid URI characters
2626         for (int i = 0; i < ids.publicId.length(); i++)
2627           {
2628             char d = ids.publicId.charAt(i);
2629             if (d >= 'a' && d <= 'z')
2630               continue;
2631             if (d >= 'A' && d <= 'Z')
2632               continue;
2633             if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1)
2634               continue;
2635             error("illegal PUBLIC id character",
2636                   "U+" + Integer.toHexString(d));
2637           }
2638       }
2639     else if (tryRead("SYSTEM"))
2640       {
2641         requireWhitespace();
2642         String href = readLiteral(flags, false);
2643         ids.systemId = absolutize(input.systemId, href);
2644       }
2645     else if (!isSubset)
2646       {
2647         error("missing SYSTEM or PUBLIC keyword");
2648       }
2649     if (ids.systemId != null && !inNotation)
2650       {
2651         if (ids.systemId.indexOf('#') != -1)
2652           error("SYSTEM id has a URI fragment", ids.systemId);
2653       }
2654     return ids;
2655   }
2656 
2657   /**
2658    * Parse the start of an element.
2659    * @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT)
2660    */
readStartElement()2661   private int readStartElement()
2662     throws IOException, XMLStreamException
2663   {
2664     // Read element name
2665     String elementName = readNmtoken(true);
2666     attrs.clear();
2667     // Push namespace context
2668     if (namespaceAware)
2669       {
2670         if (elementName.charAt(0) == ':' ||
2671             elementName.charAt(elementName.length() - 1) == ':')
2672           error("not a QName", elementName);
2673         namespaces.addFirst(new LinkedHashMap());
2674       }
2675     // Read element content
2676     boolean white = tryWhitespace();
2677     mark(1);
2678     int c = readCh();
2679     while (c != 0x2f && c != 0x3e) // '/' | '>'
2680       {
2681         // Read attribute
2682         reset();
2683         if (!white)
2684           error("need whitespace between attributes");
2685         readAttribute(elementName);
2686         white = tryWhitespace();
2687         mark(1);
2688         c = readCh();
2689       }
2690     // supply defaulted attributes
2691     if (doctype != null)
2692       {
2693         for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); )
2694           {
2695             Map.Entry entry = (Map.Entry) i.next();
2696             String attName = (String) entry.getKey();
2697             AttributeDecl decl = (AttributeDecl) entry.getValue();
2698             if (validating)
2699               {
2700                 switch (decl.valueType)
2701                   {
2702                   case ATTRIBUTE_DEFAULT_REQUIRED:
2703                     // VC: Required Attribute
2704                     if (decl.value == null && !attributeSpecified(attName))
2705                       error("value for " + attName + " attribute is required");
2706                     break;
2707                   case ATTRIBUTE_DEFAULT_FIXED:
2708                     // VC: Fixed Attribute Default
2709                     for (Iterator j = attrs.iterator(); j.hasNext(); )
2710                       {
2711                         Attribute a = (Attribute) j.next();
2712                         if (attName.equals(a.name) &&
2713                             !decl.value.equals(a.value))
2714                           error("value for " + attName + " attribute must be " +
2715                                 decl.value);
2716                       }
2717                     break;
2718                   }
2719               }
2720             if (namespaceAware && attName.equals("xmlns"))
2721               {
2722                 LinkedHashMap ctx =
2723                   (LinkedHashMap) namespaces.getFirst();
2724                 if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2725                   continue; // namespace was specified
2726               }
2727             else if (namespaceAware && attName.startsWith("xmlns:"))
2728               {
2729                 LinkedHashMap ctx =
2730                   (LinkedHashMap) namespaces.getFirst();
2731                 if (ctx.containsKey(attName.substring(6)))
2732                   continue; // namespace was specified
2733               }
2734             else if (attributeSpecified(attName))
2735               continue;
2736             if (decl.value == null)
2737               continue;
2738             // VC: Standalone Document Declaration
2739             if (validating && decl.external && xmlStandalone == Boolean.TRUE)
2740               error("standalone must be 'no' if attributes inherit values " +
2741                     "from externally declared markup declarations");
2742             Attribute attr =
2743               new Attribute(attName, decl.type, false, decl.value);
2744             if (namespaceAware)
2745               {
2746                 if (!addNamespace(attr))
2747                   attrs.add(attr);
2748               }
2749             else
2750               attrs.add(attr);
2751           }
2752       }
2753     if (baseAware)
2754       {
2755         String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base");
2756         String base = getXMLBase();
2757         bases.addFirst(absolutize(base, uri));
2758       }
2759     if (namespaceAware)
2760       {
2761         // check prefix bindings
2762         int ci = elementName.indexOf(':');
2763         if (ci != -1)
2764           {
2765             String prefix = elementName.substring(0, ci);
2766             String uri = getNamespaceURI(prefix);
2767             if (uri == null)
2768               error("unbound element prefix", prefix);
2769             else if (input.xml11 && "".equals(uri))
2770               error("XML 1.1 unbound element prefix", prefix);
2771           }
2772         for (Iterator i = attrs.iterator(); i.hasNext(); )
2773           {
2774             Attribute attr = (Attribute) i.next();
2775             if (attr.prefix != null &&
2776                 !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
2777               {
2778                 String uri = getNamespaceURI(attr.prefix);
2779                 if (uri == null)
2780                   error("unbound attribute prefix", attr.prefix);
2781                 else if (input.xml11 && "".equals(uri))
2782                   error("XML 1.1 unbound attribute prefix", attr.prefix);
2783               }
2784           }
2785       }
2786     if (validating && doctype != null)
2787       {
2788         validateStartElement(elementName);
2789         currentContentModel = doctype.getElementModel(elementName);
2790         if (currentContentModel == null)
2791           error("no element declaration", elementName);
2792         validationStack.add(new LinkedList());
2793       }
2794     // make element name available for read
2795     buf.setLength(0);
2796     buf.append(elementName);
2797     // push element onto stack
2798     stack.addLast(elementName);
2799     switch (c)
2800       {
2801       case 0x3e: // '>'
2802         return CONTENT;
2803       case 0x2f: // '/'
2804         require('>');
2805         return EMPTY_ELEMENT;
2806       }
2807     return -1; // to satisfy compiler
2808   }
2809 
2810   /**
2811    * Indicates whether the specified attribute name was specified for the
2812    * current element.
2813    */
attributeSpecified(String attName)2814   private boolean attributeSpecified(String attName)
2815   {
2816     for (Iterator j = attrs.iterator(); j.hasNext(); )
2817       {
2818         Attribute a = (Attribute) j.next();
2819         if (attName.equals(a.name))
2820           return true;
2821       }
2822     return false;
2823   }
2824 
2825   /**
2826    * Parse an attribute.
2827    */
readAttribute(String elementName)2828   private void readAttribute(String elementName)
2829     throws IOException, XMLStreamException
2830   {
2831     // Read attribute name
2832     String attributeName = readNmtoken(true);
2833     String type = getAttributeType(elementName, attributeName);
2834     readEq();
2835     // Read literal
2836     final int flags = LIT_ATTRIBUTE |  LIT_ENTITY_REF;
2837     String value = (type == null || "CDATA".equals(type)) ?
2838       readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false);
2839     // add attribute event
2840     Attribute attr = this.new Attribute(attributeName, type, true, value);
2841     if (namespaceAware)
2842       {
2843         if (attributeName.charAt(0) == ':' ||
2844             attributeName.charAt(attributeName.length() - 1) == ':')
2845           error("not a QName", attributeName);
2846         else if (attributeName.equals("xmlns"))
2847           {
2848             LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2849             if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
2850               error("duplicate default namespace");
2851           }
2852         else if (attributeName.startsWith("xmlns:"))
2853           {
2854             LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2855             if (ctx.containsKey(attributeName.substring(6)))
2856               error("duplicate namespace", attributeName.substring(6));
2857           }
2858         else if (attrs.contains(attr))
2859           error("duplicate attribute", attributeName);
2860       }
2861     else if (attrs.contains(attr))
2862       error("duplicate attribute", attributeName);
2863     if (validating && doctype != null)
2864       {
2865         // VC: Attribute Value Type
2866         AttributeDecl decl =
2867           doctype.getAttributeDecl(elementName, attributeName);
2868         if (decl == null)
2869           error("attribute must be declared", attributeName);
2870         if ("ENUMERATION".equals(decl.type))
2871           {
2872             // VC: Enumeration
2873             if (!decl.values.contains(value))
2874               error("value does not match enumeration " + decl.enumeration,
2875                     value);
2876           }
2877         else if ("ID".equals(decl.type))
2878           {
2879             // VC: ID
2880             if (!isNmtoken(value, true))
2881               error("ID values must match the Name production");
2882             if (ids.contains(value))
2883               error("Duplicate ID", value);
2884             ids.add(value);
2885           }
2886         else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type))
2887           {
2888             StringTokenizer st = new StringTokenizer(value);
2889             while (st.hasMoreTokens())
2890               {
2891                 String token = st.nextToken();
2892                 // VC: IDREF
2893                 if (!isNmtoken(token, true))
2894                   error("IDREF values must match the Name production");
2895                 idrefs.add(token);
2896               }
2897           }
2898         else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type))
2899           {
2900             StringTokenizer st = new StringTokenizer(value);
2901             while (st.hasMoreTokens())
2902               {
2903                 String token = st.nextToken();
2904                 // VC: Name Token
2905                 if (!isNmtoken(token, false))
2906                   error("NMTOKEN values must match the Nmtoken production");
2907               }
2908           }
2909         else if ("ENTITY".equals(decl.type))
2910           {
2911             // VC: Entity Name
2912             if (!isNmtoken(value, true))
2913               error("ENTITY values must match the Name production");
2914             Object entity = doctype.getEntity(value);
2915             if (entity == null || !(entity instanceof ExternalIds) ||
2916                 ((ExternalIds) entity).notationName == null)
2917               error("ENTITY values must match the name of an unparsed " +
2918                     "entity declared in the DTD");
2919           }
2920         else if ("NOTATION".equals(decl.type))
2921           {
2922             if (!decl.values.contains(value))
2923               error("NOTATION values must match a declared notation name",
2924                     value);
2925             // VC: Notation Attributes
2926             ExternalIds notation = doctype.getNotation(value);
2927             if (notation == null)
2928               error("NOTATION values must match the name of a notation " +
2929                     "declared in the DTD", value);
2930           }
2931       }
2932     if (namespaceAware)
2933       {
2934         if (!addNamespace(attr))
2935           attrs.add(attr);
2936       }
2937     else
2938       attrs.add(attr);
2939   }
2940 
2941   /**
2942    * Determines whether the specified attribute is a namespace declaration,
2943    * and adds it to the current namespace context if so. Returns false if
2944    * the attribute is an ordinary attribute.
2945    */
addNamespace(Attribute attr)2946   private boolean addNamespace(Attribute attr)
2947     throws XMLStreamException
2948   {
2949     if ("xmlns".equals(attr.name))
2950       {
2951         LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2952         if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null)
2953           error("Duplicate default namespace declaration");
2954         if (XMLConstants.XML_NS_URI.equals(attr.value))
2955           error("can't bind XML namespace");
2956         ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
2957         return true;
2958       }
2959     else if ("xmlns".equals(attr.prefix))
2960       {
2961         LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
2962         if (ctx.get(attr.localName) != null)
2963           error("Duplicate namespace declaration for prefix",
2964                 attr.localName);
2965         if (XMLConstants.XML_NS_PREFIX.equals(attr.localName))
2966           {
2967             if (!XMLConstants.XML_NS_URI.equals(attr.value))
2968               error("can't redeclare xml prefix");
2969             else
2970               return false; // treat as attribute
2971           }
2972         if (XMLConstants.XML_NS_URI.equals(attr.value))
2973           error("can't bind non-xml prefix to XML namespace");
2974         if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName))
2975           error("can't redeclare xmlns prefix");
2976         if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
2977           error("can't bind non-xmlns prefix to XML Namespace namespace");
2978         if ("".equals(attr.value) && !input.xml11)
2979           error("illegal use of 1.1-style prefix unbinding in 1.0 document");
2980         ctx.put(attr.localName, attr.value);
2981         return true;
2982       }
2983     return false;
2984   }
2985 
2986   /**
2987    * Parse a closing tag.
2988    */
readEndElement()2989   private void readEndElement()
2990     throws IOException, XMLStreamException
2991   {
2992     // pop element off stack
2993     String expected = (String) stack.removeLast();
2994     require(expected);
2995     skipWhitespace();
2996     require('>');
2997     // Make element name available
2998     buf.setLength(0);
2999     buf.append(expected);
3000     if (validating && doctype != null)
3001       endElementValidationHook();
3002   }
3003 
3004   /**
3005    * Validate the end of an element.
3006    * Called on an end-element or empty element if validating.
3007    */
endElementValidationHook()3008   private void endElementValidationHook()
3009     throws XMLStreamException
3010   {
3011     validateEndElement();
3012     validationStack.removeLast();
3013     if (stack.isEmpty())
3014       currentContentModel = null;
3015     else
3016       {
3017         String parent = (String) stack.getLast();
3018         currentContentModel = doctype.getElementModel(parent);
3019       }
3020   }
3021 
3022   /**
3023    * Parse a comment.
3024    */
readComment(boolean inDTD)3025   private void readComment(boolean inDTD)
3026     throws IOException, XMLStreamException
3027   {
3028     boolean saved = expandPE;
3029     expandPE = false;
3030     buf.setLength(0);
3031     readUntil(TEST_END_COMMENT);
3032     require('>');
3033     expandPE = saved;
3034     if (inDTD)
3035       doctype.addComment(buf.toString());
3036   }
3037 
3038   /**
3039    * Parse a processing instruction.
3040    */
readPI(boolean inDTD)3041   private void readPI(boolean inDTD)
3042     throws IOException, XMLStreamException
3043   {
3044     boolean saved = expandPE;
3045     expandPE = false;
3046     piTarget = readNmtoken(true);
3047     if (piTarget.indexOf(':') != -1)
3048       error("illegal character in PI target", new Character(':'));
3049     if ("xml".equalsIgnoreCase(piTarget))
3050       error("illegal PI target", piTarget);
3051     if (tryRead(TEST_END_PI))
3052       piData = null;
3053     else
3054       {
3055         if (!tryWhitespace())
3056           error("whitespace required between PI target and data");
3057         buf.setLength(0);
3058         readUntil(TEST_END_PI);
3059         piData = buf.toString();
3060       }
3061     expandPE = saved;
3062     if (inDTD)
3063       doctype.addPI(piTarget, piData);
3064   }
3065 
3066   /**
3067    * Parse an entity reference.
3068    */
readReference()3069   private void readReference()
3070     throws IOException, XMLStreamException
3071   {
3072     buf.setLength(0);
3073     String entityName = readNmtoken(true);
3074     require(';');
3075     buf.setLength(0);
3076     buf.append(entityName);
3077   }
3078 
3079   /**
3080    * Read an CDATA section.
3081    */
readCDSect()3082   private void readCDSect()
3083     throws IOException, XMLStreamException
3084   {
3085     buf.setLength(0);
3086     readUntil(TEST_END_CDATA);
3087   }
3088 
3089   /**
3090    * Read character data.
3091    * @return the type of text read (CHARACTERS or SPACE)
3092    */
readCharData(String prefix)3093   private int readCharData(String prefix)
3094     throws IOException, XMLStreamException
3095   {
3096     boolean white = true;
3097     buf.setLength(0);
3098     if (prefix != null)
3099       buf.append(prefix);
3100     boolean done = false;
3101     boolean entities = false;
3102     while (!done)
3103       {
3104         // Block read
3105         mark(tmpBuf.length);
3106         int len = read(tmpBuf, 0, tmpBuf.length);
3107         if (len == -1)
3108           {
3109             if (inputStack.size() > 1)
3110               {
3111                 popInput();
3112                 // report end-entity
3113                 done = true;
3114               }
3115             else
3116               throw new EOFException();
3117           }
3118         for (int i = 0; i < len && !done; i++)
3119           {
3120             int c = tmpBuf[i];
3121             switch (c)
3122               {
3123               case 0x20:
3124               case 0x09:
3125               case 0x0a:
3126               case 0x0d:
3127                 buf.append(Character.toChars(c));
3128                 break; // whitespace
3129               case 0x26: // '&'
3130                 reset();
3131                 read(tmpBuf, 0, i);
3132                 // character reference?
3133                 mark(3);
3134                 c = readCh(); // &
3135                 c = readCh();
3136                 if (c == 0x23) // '#'
3137                   {
3138                     mark(1);
3139                     c = readCh();
3140                     boolean hex = (c == 0x78); // 'x'
3141                     if (!hex)
3142                       reset();
3143                     char[] ch = readCharacterRef(hex ? 16 : 10);
3144                     buf.append(ch, 0, ch.length);
3145                     for (int j = 0; j < ch.length; j++)
3146                       {
3147                         switch (ch[j])
3148                           {
3149                           case 0x20:
3150                           case 0x09:
3151                           case 0x0a:
3152                           case 0x0d:
3153                             break; // whitespace
3154                           default:
3155                             white = false;
3156                           }
3157                       }
3158                   }
3159                 else
3160                   {
3161                     // entity reference
3162                     reset();
3163                     c = readCh(); // &
3164                     String entityName = readNmtoken(true);
3165                     require(';');
3166                     String text =
3167                       (String) PREDEFINED_ENTITIES.get(entityName);
3168                     if (text != null)
3169                       buf.append(text);
3170                     else
3171                       {
3172                         pushInput("", "&" + entityName + ";", false, false);
3173                         done = true;
3174                         break;
3175                       }
3176                   }
3177                 // continue processing
3178                 i = -1;
3179                 mark(tmpBuf.length);
3180                 len = read(tmpBuf, 0, tmpBuf.length);
3181                 if (len == -1)
3182                   {
3183                     if (inputStack.size() > 1)
3184                       {
3185                         popInput();
3186                         done = true;
3187                       }
3188                     else
3189                       throw new EOFException();
3190                   }
3191                 entities = true;
3192                 break; // end of text sequence
3193               case 0x3e: // '>'
3194                 int l = buf.length();
3195                 if (l > 1 &&
3196                     buf.charAt(l - 1) == ']' &&
3197                     buf.charAt(l - 2) == ']')
3198                   error("Character data may not contain unescaped ']]>'");
3199                 buf.append(Character.toChars(c));
3200                 break;
3201               case 0x3c: // '<'
3202                 reset();
3203                 // read i characters
3204                 int count = 0, remaining = i;
3205                 do
3206                   {
3207                     int r = read(tmpBuf, 0, remaining);
3208                     count += r;
3209                     remaining -= r;
3210                   }
3211                 while (count < i);
3212                 i = len;
3213                 if (coalescing && tryRead(TEST_CDATA))
3214                   readUntil(TEST_END_CDATA); // read CDATA section into buf
3215                 else
3216                   done = true; // end of text sequence
3217                 break;
3218               default:
3219                 if (input.xml11)
3220                   {
3221                     if (!isXML11Char(c) || isXML11RestrictedChar(c))
3222                       error("illegal XML 1.1 character",
3223                             "U+" + Integer.toHexString(c));
3224                   }
3225                 else if (!isChar(c))
3226                   error("illegal XML character",
3227                         "U+" + Integer.toHexString(c));
3228                 white = false;
3229                 buf.append(Character.toChars(c));
3230               }
3231           }
3232         // if text buffer >= 2MB, return it as a chunk
3233         // to avoid excessive memory use
3234         if (buf.length() >= 2097152)
3235           done = true;
3236       }
3237     if (entities)
3238       normalizeCRLF(buf);
3239     return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS;
3240   }
3241 
3242   /**
3243    * Expands the specified entity.
3244    */
expandEntity(String name, boolean inAttr, boolean normalize)3245   private void expandEntity(String name, boolean inAttr, boolean normalize)
3246     throws IOException, XMLStreamException
3247   {
3248     if (doctype != null)
3249       {
3250         Object value = doctype.getEntity(name);
3251         if (value != null)
3252           {
3253             if (xmlStandalone == Boolean.TRUE)
3254               {
3255                 // VC: Standalone Document Declaration
3256                 if (doctype.isEntityExternal(name))
3257                   error("reference to external entity in standalone document");
3258                 else if (value instanceof ExternalIds)
3259                   {
3260                     ExternalIds ids = (ExternalIds) value;
3261                     if (ids.notationName != null &&
3262                         doctype.isNotationExternal(ids.notationName))
3263                       error("reference to external notation in " +
3264                             "standalone document");
3265                   }
3266               }
3267             if (value instanceof String)
3268               {
3269                 String text = (String) value;
3270                 if (inAttr && text.indexOf('<') != -1)
3271                   error("< in attribute value");
3272                 pushInput(name, text, !inAttr, normalize);
3273               }
3274             else if (inAttr)
3275               error("reference to external entity in attribute value", name);
3276             else
3277               pushInput(name, (ExternalIds) value, !inAttr, normalize);
3278             return;
3279           }
3280       }
3281     error("reference to undeclared entity", name);
3282   }
3283 
3284   /**
3285    * Indicates whether the specified entity is unparsed.
3286    */
isUnparsedEntity(String name)3287   private boolean isUnparsedEntity(String name)
3288   {
3289     if (doctype != null)
3290       {
3291         Object value = doctype.getEntity(name);
3292         if (value != null && value instanceof ExternalIds)
3293           return ((ExternalIds) value).notationName != null;
3294       }
3295     return false;
3296   }
3297 
3298   /**
3299    * Read an equals sign.
3300    */
readEq()3301   private void readEq()
3302     throws IOException, XMLStreamException
3303   {
3304     skipWhitespace();
3305     require('=');
3306     skipWhitespace();
3307   }
3308 
3309   /**
3310    * Character read for reading literals.
3311    * @param recognizePEs whether to recognize parameter-entity references
3312    */
literalReadCh(boolean recognizePEs)3313   private int literalReadCh(boolean recognizePEs)
3314     throws IOException, XMLStreamException
3315   {
3316     int c = recognizePEs ? readCh() : read();
3317     while (c == -1)
3318       {
3319         if (inputStack.size() > 1)
3320           {
3321             inputStack.removeLast();
3322             input = (Input) inputStack.getLast();
3323             // Don't issue end-entity
3324             c = recognizePEs ? readCh() : read();
3325           }
3326         else
3327           throw new EOFException();
3328       }
3329     return c;
3330   }
3331 
3332   /**
3333    * Read a string literal.
3334    */
readLiteral(int flags, boolean recognizePEs)3335   private String readLiteral(int flags, boolean recognizePEs)
3336     throws IOException, XMLStreamException
3337   {
3338     boolean saved = expandPE;
3339     int delim = readCh();
3340     if (delim != 0x27 && delim != 0x22)
3341       error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim));
3342     literalBuf.setLength(0);
3343     if ((flags & LIT_DISABLE_PE) != 0)
3344       expandPE = false;
3345     boolean entities = false;
3346     int inputStackSize = inputStack.size();
3347     do
3348       {
3349         int c = literalReadCh(recognizePEs);
3350         if (c == delim && inputStackSize == inputStack.size())
3351           break;
3352         switch (c)
3353           {
3354           case 0x0a:
3355           case 0x0d:
3356             if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
3357               c = 0x20; // normalize to space
3358             break;
3359           case 0x09:
3360             if ((flags & LIT_ATTRIBUTE) != 0)
3361               c = 0x20; // normalize to space
3362             break;
3363           case 0x26: // '&'
3364             mark(2);
3365             c = readCh();
3366             if (c == 0x23) // '#'
3367               {
3368                 if ((flags & LIT_DISABLE_CREF) != 0)
3369                   {
3370                     reset();
3371                     c = 0x26; // '&'
3372                   }
3373                 else
3374                   {
3375                     mark(1);
3376                     c = readCh();
3377                     boolean hex = (c == 0x78); // 'x'
3378                     if (!hex)
3379                       reset();
3380                     char[] ref = readCharacterRef(hex ? 16 : 10);
3381                     for (int i = 0; i < ref.length; i++)
3382                       literalBuf.append(ref[i]);
3383                     entities = true;
3384                     continue;
3385                   }
3386               }
3387             else
3388               {
3389                 if ((flags & LIT_DISABLE_EREF) != 0)
3390                   {
3391                     reset();
3392                     c = 0x26; // '&'
3393                   }
3394                 else
3395                   {
3396                     reset();
3397                     String entityName = readNmtoken(true);
3398                     require(';');
3399                     String text =
3400                       (String) PREDEFINED_ENTITIES.get(entityName);
3401                     if (text != null)
3402                       literalBuf.append(text);
3403                     else
3404                       expandEntity(entityName,
3405                                    (flags & LIT_ATTRIBUTE) != 0,
3406                                    true);
3407                     entities = true;
3408                     continue;
3409                   }
3410               }
3411             break;
3412           case 0x3c: // '<'
3413             if ((flags & LIT_ATTRIBUTE) != 0)
3414               error("attribute values may not contain '<'");
3415             break;
3416           case -1:
3417             if (inputStack.size() > 1)
3418               {
3419                 popInput();
3420                 continue;
3421               }
3422             throw new EOFException();
3423           default:
3424             if ((c < 0x0020 || c > 0xfffd) ||
3425                 (c >= 0xd800 && c < 0xdc00) ||
3426                 (input.xml11 && (c >= 0x007f) &&
3427                  (c <= 0x009f) && (c != 0x0085)))
3428               error("illegal character", "U+" + Integer.toHexString(c));
3429           }
3430         literalBuf.append(Character.toChars(c));
3431       }
3432     while (true);
3433     expandPE = saved;
3434     if (entities)
3435       normalizeCRLF(literalBuf);
3436     if ((flags & LIT_NORMALIZE) > 0)
3437       literalBuf = normalize(literalBuf);
3438     return literalBuf.toString();
3439   }
3440 
3441   /**
3442    * Performs attribute-value normalization of the text buffer.
3443    * This discards leading and trailing whitespace, and replaces sequences
3444    * of whitespace with a single space.
3445    */
normalize(StringBuffer buf)3446   private StringBuffer normalize(StringBuffer buf)
3447   {
3448     StringBuffer acc = new StringBuffer();
3449     int len = buf.length();
3450     int avState = 0;
3451     for (int i = 0; i < len; i++)
3452       {
3453         char c = buf.charAt(i);
3454         if (c == ' ')
3455           avState = (avState == 0) ? 0 : 1;
3456         else
3457           {
3458             if (avState == 1)
3459               acc.append(' ');
3460             acc.append(c);
3461             avState = 2;
3462           }
3463       }
3464     return acc;
3465   }
3466 
3467   /**
3468    * Replace any CR/LF pairs in the buffer with LF.
3469    * This may be necessary if combinations of CR or LF were declared as
3470    * (character) entity references in the input.
3471    */
normalizeCRLF(StringBuffer buf)3472   private void normalizeCRLF(StringBuffer buf)
3473   {
3474     int len = buf.length() - 1;
3475     for (int i = 0; i < len; i++)
3476       {
3477         char c = buf.charAt(i);
3478         if (c == '\r' && buf.charAt(i + 1) == '\n')
3479           {
3480             buf.deleteCharAt(i--);
3481             len--;
3482           }
3483       }
3484   }
3485 
3486   /**
3487    * Parse and expand a parameter entity reference.
3488    */
expandPEReference()3489   private void expandPEReference()
3490     throws IOException, XMLStreamException
3491   {
3492     String name = readNmtoken(true, new StringBuffer());
3493     require(';');
3494     mark(1); // ensure we don't reset to before the semicolon
3495     if (doctype != null)
3496       {
3497         String entityName = "%" + name;
3498         Object entity = doctype.getEntity(entityName);
3499         if (entity != null)
3500           {
3501             if (xmlStandalone == Boolean.TRUE)
3502               {
3503                 if (doctype.isEntityExternal(entityName))
3504                   error("reference to external parameter entity in " +
3505                         "standalone document");
3506               }
3507             if (entity instanceof String)
3508               {
3509                 pushInput(name, (String) entity, false, input.normalize);
3510                 //pushInput(name, " " + (String) entity + " ");
3511               }
3512             else
3513               {
3514                 //pushInput("", " ");
3515                 pushInput(name, (ExternalIds) entity, false, input.normalize);
3516                 //pushInput("", " ");
3517               }
3518           }
3519         else
3520           error("reference to undeclared parameter entity", name);
3521       }
3522     else
3523       error("reference to parameter entity without doctype", name);
3524   }
3525 
3526   /**
3527    * Parse the digits in a character reference.
3528    * @param base the base of the digits (10 or 16)
3529    */
readCharacterRef(int base)3530   private char[] readCharacterRef(int base)
3531     throws IOException, XMLStreamException
3532   {
3533     CPStringBuilder b = new CPStringBuilder();
3534     for (int c = readCh(); c != 0x3b && c != -1; c = readCh())
3535       b.append(Character.toChars(c));
3536     try
3537       {
3538         int ord = Integer.parseInt(b.toString(), base);
3539         if (input.xml11)
3540           {
3541             if (!isXML11Char(ord))
3542               error("illegal XML 1.1 character reference " +
3543                     "U+" + Integer.toHexString(ord));
3544           }
3545         else
3546           {
3547             if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d))
3548                 || (ord >= 0xd800 && ord <= 0xdfff)
3549                 || ord == 0xfffe || ord == 0xffff
3550                 || ord > 0x0010ffff)
3551               error("illegal XML character reference " +
3552                     "U+" + Integer.toHexString(ord));
3553           }
3554         return Character.toChars(ord);
3555       }
3556     catch (NumberFormatException e)
3557       {
3558         error("illegal characters in character reference", b.toString());
3559         return null;
3560       }
3561   }
3562 
3563   /**
3564    * Parses an NMTOKEN or Name production.
3565    * @param isName if a Name, otherwise an NMTOKEN
3566    */
readNmtoken(boolean isName)3567   private String readNmtoken(boolean isName)
3568     throws IOException, XMLStreamException
3569   {
3570     return readNmtoken(isName, nmtokenBuf);
3571   }
3572 
3573   /**
3574    * Parses an NMTOKEN or Name production using the specified buffer.
3575    * @param isName if a Name, otherwise an NMTOKEN
3576    * @param buf the character buffer to use
3577    */
readNmtoken(boolean isName, StringBuffer buf)3578   private String readNmtoken(boolean isName, StringBuffer buf)
3579     throws IOException, XMLStreamException
3580   {
3581     buf.setLength(0);
3582     int c = readCh();
3583     if (isName)
3584       {
3585         if (!isNameStartCharacter(c, input.xml11))
3586           error("not a name start character",
3587                 "U+" + Integer.toHexString(c));
3588       }
3589     else
3590       {
3591         if (!isNameCharacter(c, input.xml11))
3592           error("not a name character",
3593                 "U+" + Integer.toHexString(c));
3594       }
3595     buf.append(Character.toChars(c));
3596     do
3597       {
3598         mark(1);
3599         c = readCh();
3600         switch (c)
3601           {
3602           case 0x25: // '%'
3603           case 0x3c: // '<'
3604           case 0x3e: // '>'
3605           case 0x26: // '&'
3606           case 0x2c: // ','
3607           case 0x7c: // '|'
3608           case 0x2a: // '*'
3609           case 0x2b: // '+'
3610           case 0x3f: // '?'
3611           case 0x29: // ')'
3612           case 0x3d: // '='
3613           case 0x27: // '\''
3614           case 0x22: // '"'
3615           case 0x5b: // '['
3616           case 0x20: // ' '
3617           case 0x09: // '\t'
3618           case 0x0a: // '\n'
3619           case 0x0d: // '\r'
3620           case 0x3b: // ';'
3621           case 0x2f: // '/'
3622           case -1:
3623             reset();
3624             return intern(buf.toString());
3625           default:
3626             if (!isNameCharacter(c, input.xml11))
3627               error("not a name character",
3628                     "U+" + Integer.toHexString(c));
3629             else
3630               buf.append(Character.toChars(c));
3631           }
3632       }
3633     while (true);
3634   }
3635 
3636   /**
3637    * Indicates whether the specified Unicode character is an XML 1.1 Char.
3638    */
isXML11Char(int c)3639   public static boolean isXML11Char(int c)
3640   {
3641     return ((c >= 0x0001 && c <= 0xD7FF) ||
3642             (c >= 0xE000 && c < 0xFFFE) ||
3643             (c >= 0x10000 && c <= 0x10FFFF));
3644   }
3645 
3646   /**
3647    * Indicates whether the specified Unicode character is an XML 1.1
3648    * RestrictedChar.
3649    */
isXML11RestrictedChar(int c)3650   public static boolean isXML11RestrictedChar(int c)
3651   {
3652     return ((c >= 0x0001 && c <= 0x0008) ||
3653             (c >= 0x000B && c <= 0x000C) ||
3654             (c >= 0x000E && c <= 0x001F) ||
3655             (c >= 0x007F && c <= 0x0084) ||
3656             (c >= 0x0086 && c <= 0x009F));
3657   }
3658 
3659   /**
3660    * Indicates whether the specified text matches the Name or Nmtoken
3661    * production.
3662    */
isNmtoken(String text, boolean isName)3663   private boolean isNmtoken(String text, boolean isName)
3664   {
3665     try
3666       {
3667         int[] cp = UnicodeReader.toCodePointArray(text);
3668         if (cp.length == 0)
3669           return false;
3670         if (isName)
3671           {
3672             if (!isNameStartCharacter(cp[0], input.xml11))
3673               return false;
3674           }
3675         else
3676           {
3677             if (!isNameCharacter(cp[0], input.xml11))
3678               return false;
3679           }
3680         for (int i = 1; i < cp.length; i++)
3681           {
3682             if (!isNameCharacter(cp[i], input.xml11))
3683               return false;
3684           }
3685         return true;
3686       }
3687     catch (IOException e)
3688       {
3689         return false;
3690       }
3691   }
3692 
3693   /**
3694    * Indicates whether the specified Unicode character is a Name start
3695    * character.
3696    */
isNameStartCharacter(int c, boolean xml11)3697   public static boolean isNameStartCharacter(int c, boolean xml11)
3698   {
3699     if (xml11)
3700       return ((c >= 0x0041 && c <= 0x005a) ||
3701               (c >= 0x0061 && c <= 0x007a) ||
3702               c == 0x3a |
3703               c == 0x5f |
3704               (c >= 0xC0 && c <= 0xD6) ||
3705               (c >= 0xD8 && c <= 0xF6) ||
3706               (c >= 0xF8 && c <= 0x2FF) ||
3707               (c >= 0x370 && c <= 0x37D) ||
3708               (c >= 0x37F && c <= 0x1FFF) ||
3709               (c >= 0x200C && c <= 0x200D) ||
3710               (c >= 0x2070 && c <= 0x218F) ||
3711               (c >= 0x2C00 && c <= 0x2FEF) ||
3712               (c >= 0x3001 && c <= 0xD7FF) ||
3713               (c >= 0xF900 && c <= 0xFDCF) ||
3714               (c >= 0xFDF0 && c <= 0xFFFD) ||
3715               (c >= 0x10000 && c <= 0xEFFFF));
3716     else
3717       return (c == 0x5f || c == 0x3a || isLetter(c));
3718   }
3719 
3720   /**
3721    * Indicates whether the specified Unicode character is a Name non-initial
3722    * character.
3723    */
isNameCharacter(int c, boolean xml11)3724   public static boolean isNameCharacter(int c, boolean xml11)
3725   {
3726     if (xml11)
3727       return ((c >= 0x0041 && c <= 0x005a) ||
3728               (c >= 0x0061 && c <= 0x007a) ||
3729               (c >= 0x0030 && c <= 0x0039) ||
3730               c == 0x3a |
3731               c == 0x5f |
3732               c == 0x2d |
3733               c == 0x2e |
3734               c == 0xB7 |
3735               (c >= 0xC0 && c <= 0xD6) ||
3736               (c >= 0xD8 && c <= 0xF6) ||
3737               (c >= 0xF8 && c <= 0x2FF) ||
3738               (c >= 0x300 && c <= 0x37D) ||
3739               (c >= 0x37F && c <= 0x1FFF) ||
3740               (c >= 0x200C && c <= 0x200D) ||
3741               (c >= 0x203F && c <= 0x2040) ||
3742               (c >= 0x2070 && c <= 0x218F) ||
3743               (c >= 0x2C00 && c <= 0x2FEF) ||
3744               (c >= 0x3001 && c <= 0xD7FF) ||
3745               (c >= 0xF900 && c <= 0xFDCF) ||
3746               (c >= 0xFDF0 && c <= 0xFFFD) ||
3747               (c >= 0x10000 && c <= 0xEFFFF));
3748     else
3749       return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a ||
3750               isLetter(c) || isDigit(c) ||
3751               isCombiningChar(c) || isExtender(c));
3752   }
3753 
3754   /**
3755    * Indicates whether the specified Unicode character matches the Letter
3756    * production.
3757    */
isLetter(int c)3758   public static boolean isLetter(int c)
3759   {
3760     if ((c >= 0x0041 && c <= 0x005A) ||
3761         (c >= 0x0061 && c <= 0x007A) ||
3762         (c >= 0x00C0 && c <= 0x00D6) ||
3763         (c >= 0x00D8 && c <= 0x00F6) ||
3764         (c >= 0x00F8 && c <= 0x00FF) ||
3765         (c >= 0x0100 && c <= 0x0131) ||
3766         (c >= 0x0134 && c <= 0x013E) ||
3767         (c >= 0x0141 && c <= 0x0148) ||
3768         (c >= 0x014A && c <= 0x017E) ||
3769         (c >= 0x0180 && c <= 0x01C3) ||
3770         (c >= 0x01CD && c <= 0x01F0) ||
3771         (c >= 0x01F4 && c <= 0x01F5) ||
3772         (c >= 0x01FA && c <= 0x0217) ||
3773         (c >= 0x0250 && c <= 0x02A8) ||
3774         (c >= 0x02BB && c <= 0x02C1) ||
3775         c == 0x0386 ||
3776         (c >= 0x0388 && c <= 0x038A) ||
3777         c == 0x038C ||
3778         (c >= 0x038E && c <= 0x03A1) ||
3779         (c >= 0x03A3 && c <= 0x03CE) ||
3780         (c >= 0x03D0 && c <= 0x03D6) ||
3781         c == 0x03DA ||
3782       c == 0x03DC ||
3783         c == 0x03DE ||
3784         c == 0x03E0 ||
3785         (c >= 0x03E2 && c <= 0x03F3) ||
3786         (c >= 0x0401 && c <= 0x040C) ||
3787         (c >= 0x040E && c <= 0x044F) ||
3788         (c >= 0x0451 && c <= 0x045C) ||
3789         (c >= 0x045E && c <= 0x0481) ||
3790         (c >= 0x0490 && c <= 0x04C4) ||
3791         (c >= 0x04C7 && c <= 0x04C8) ||
3792         (c >= 0x04CB && c <= 0x04CC) ||
3793         (c >= 0x04D0 && c <= 0x04EB) ||
3794         (c >= 0x04EE && c <= 0x04F5) ||
3795         (c >= 0x04F8 && c <= 0x04F9) ||
3796         (c >= 0x0531 && c <= 0x0556) ||
3797         c == 0x0559 ||
3798         (c >= 0x0561 && c <= 0x0586) ||
3799         (c >= 0x05D0 && c <= 0x05EA) ||
3800         (c >= 0x05F0 && c <= 0x05F2) ||
3801         (c >= 0x0621 && c <= 0x063A) ||
3802         (c >= 0x0641 && c <= 0x064A) ||
3803         (c >= 0x0671 && c <= 0x06B7) ||
3804         (c >= 0x06BA && c <= 0x06BE) ||
3805         (c >= 0x06C0 && c <= 0x06CE) ||
3806         (c >= 0x06D0 && c <= 0x06D3) ||
3807         c == 0x06D5 ||
3808         (c >= 0x06E5 && c <= 0x06E6) ||
3809         (c >= 0x0905 && c <= 0x0939) ||
3810         c == 0x093D ||
3811         (c >= 0x0958 && c <= 0x0961) ||
3812         (c >= 0x0985 && c <= 0x098C) ||
3813         (c >= 0x098F && c <= 0x0990) ||
3814         (c >= 0x0993 && c <= 0x09A8) ||
3815         (c >= 0x09AA && c <= 0x09B0) ||
3816         c == 0x09B2 ||
3817         (c >= 0x09B6 && c <= 0x09B9) ||
3818         (c >= 0x09DC && c <= 0x09DD) ||
3819         (c >= 0x09DF && c <= 0x09E1) ||
3820         (c >= 0x09F0 && c <= 0x09F1) ||
3821         (c >= 0x0A05 && c <= 0x0A0A) ||
3822         (c >= 0x0A0F && c <= 0x0A10) ||
3823         (c >= 0x0A13 && c <= 0x0A28) ||
3824         (c >= 0x0A2A && c <= 0x0A30) ||
3825         (c >= 0x0A32 && c <= 0x0A33) ||
3826         (c >= 0x0A35 && c <= 0x0A36) ||
3827         (c >= 0x0A38 && c <= 0x0A39) ||
3828         (c >= 0x0A59 && c <= 0x0A5C) ||
3829         c == 0x0A5E ||
3830         (c >= 0x0A72 && c <= 0x0A74) ||
3831         (c >= 0x0A85 && c <= 0x0A8B) ||
3832         c == 0x0A8D ||
3833         (c >= 0x0A8F && c <= 0x0A91) ||
3834         (c >= 0x0A93 && c <= 0x0AA8) ||
3835         (c >= 0x0AAA && c <= 0x0AB0) ||
3836         (c >= 0x0AB2 && c <= 0x0AB3) ||
3837         (c >= 0x0AB5 && c <= 0x0AB9) ||
3838         c == 0x0ABD ||
3839         c == 0x0AE0 ||
3840         (c >= 0x0B05 && c <= 0x0B0C) ||
3841         (c >= 0x0B0F && c <= 0x0B10) ||
3842         (c >= 0x0B13 && c <= 0x0B28) ||
3843         (c >= 0x0B2A && c <= 0x0B30) ||
3844         (c >= 0x0B32 && c <= 0x0B33) ||
3845         (c >= 0x0B36 && c <= 0x0B39) ||
3846         c == 0x0B3D ||
3847         (c >= 0x0B5C && c <= 0x0B5D) ||
3848         (c >= 0x0B5F && c <= 0x0B61) ||
3849         (c >= 0x0B85 && c <= 0x0B8A) ||
3850         (c >= 0x0B8E && c <= 0x0B90) ||
3851         (c >= 0x0B92 && c <= 0x0B95) ||
3852         (c >= 0x0B99 && c <= 0x0B9A) ||
3853         c == 0x0B9C ||
3854         (c >= 0x0B9E && c <= 0x0B9F) ||
3855         (c >= 0x0BA3 && c <= 0x0BA4) ||
3856         (c >= 0x0BA8 && c <= 0x0BAA) ||
3857         (c >= 0x0BAE && c <= 0x0BB5) ||
3858         (c >= 0x0BB7 && c <= 0x0BB9) ||
3859         (c >= 0x0C05 && c <= 0x0C0C) ||
3860         (c >= 0x0C0E && c <= 0x0C10) ||
3861         (c >= 0x0C12 && c <= 0x0C28) ||
3862         (c >= 0x0C2A && c <= 0x0C33) ||
3863         (c >= 0x0C35 && c <= 0x0C39) ||
3864         (c >= 0x0C60 && c <= 0x0C61) ||
3865         (c >= 0x0C85 && c <= 0x0C8C) ||
3866         (c >= 0x0C8E && c <= 0x0C90) ||
3867         (c >= 0x0C92 && c <= 0x0CA8) ||
3868         (c >= 0x0CAA && c <= 0x0CB3) ||
3869         (c >= 0x0CB5 && c <= 0x0CB9) ||
3870         c == 0x0CDE ||
3871         (c >= 0x0CE0 && c <= 0x0CE1) ||
3872         (c >= 0x0D05 && c <= 0x0D0C) ||
3873         (c >= 0x0D0E && c <= 0x0D10) ||
3874         (c >= 0x0D12 && c <= 0x0D28) ||
3875         (c >= 0x0D2A && c <= 0x0D39) ||
3876         (c >= 0x0D60 && c <= 0x0D61) ||
3877         (c >= 0x0E01 && c <= 0x0E2E) ||
3878         c == 0x0E30 ||
3879         (c >= 0x0E32 && c <= 0x0E33) ||
3880         (c >= 0x0E40 && c <= 0x0E45) ||
3881         (c >= 0x0E81 && c <= 0x0E82) ||
3882         c == 0x0E84 ||
3883         (c >= 0x0E87 && c <= 0x0E88) ||
3884         c == 0x0E8A ||
3885         c == 0x0E8D ||
3886         (c >= 0x0E94 && c <= 0x0E97) ||
3887         (c >= 0x0E99 && c <= 0x0E9F) ||
3888         (c >= 0x0EA1 && c <= 0x0EA3) ||
3889         c == 0x0EA5 ||
3890         c == 0x0EA7 ||
3891         (c >= 0x0EAA && c <= 0x0EAB) ||
3892         (c >= 0x0EAD && c <= 0x0EAE) ||
3893         c == 0x0EB0 ||
3894         (c >= 0x0EB2 && c <= 0x0EB3) ||
3895         c == 0x0EBD ||
3896         (c >= 0x0EC0 && c <= 0x0EC4) ||
3897         (c >= 0x0F40 && c <= 0x0F47) ||
3898         (c >= 0x0F49 && c <= 0x0F69) ||
3899         (c >= 0x10A0 && c <= 0x10C5) ||
3900         (c >= 0x10D0 && c <= 0x10F6) ||
3901         c == 0x1100 ||
3902         (c >= 0x1102 && c <= 0x1103) ||
3903         (c >= 0x1105 && c <= 0x1107) ||
3904         c == 0x1109 ||
3905         (c >= 0x110B && c <= 0x110C) ||
3906         (c >= 0x110E && c <= 0x1112) ||
3907         c == 0x113C ||
3908         c == 0x113E ||
3909         c == 0x1140 ||
3910         c == 0x114C ||
3911         c == 0x114E ||
3912         c == 0x1150 ||
3913         (c >= 0x1154 && c <= 0x1155) ||
3914         c == 0x1159 ||
3915         (c >= 0x115F && c <= 0x1161) ||
3916         c == 0x1163 ||
3917         c == 0x1165 ||
3918         c == 0x1167 ||
3919         c == 0x1169 ||
3920         (c >= 0x116D && c <= 0x116E) ||
3921         (c >= 0x1172 && c <= 0x1173) ||
3922         c == 0x1175 ||
3923         c == 0x119E ||
3924         c == 0x11A8 ||
3925         c == 0x11AB ||
3926         (c >= 0x11AE && c <= 0x11AF) ||
3927         (c >= 0x11B7 && c <= 0x11B8) ||
3928         c == 0x11BA ||
3929         (c >= 0x11BC && c <= 0x11C2) ||
3930         c == 0x11EB ||
3931         c == 0x11F0 ||
3932         c == 0x11F9 ||
3933         (c >= 0x1E00 && c <= 0x1E9B) ||
3934         (c >= 0x1EA0 && c <= 0x1EF9) ||
3935         (c >= 0x1F00 && c <= 0x1F15) ||
3936         (c >= 0x1F18 && c <= 0x1F1D) ||
3937         (c >= 0x1F20 && c <= 0x1F45) ||
3938         (c >= 0x1F48 && c <= 0x1F4D) ||
3939         (c >= 0x1F50 && c <= 0x1F57) ||
3940         c == 0x1F59 ||
3941         c == 0x1F5B ||
3942         c == 0x1F5D ||
3943         (c >= 0x1F5F && c <= 0x1F7D) ||
3944         (c >= 0x1F80 && c <= 0x1FB4) ||
3945         (c >= 0x1FB6 && c <= 0x1FBC) ||
3946         c == 0x1FBE ||
3947         (c >= 0x1FC2 && c <= 0x1FC4) ||
3948         (c >= 0x1FC6 && c <= 0x1FCC) ||
3949         (c >= 0x1FD0 && c <= 0x1FD3) ||
3950         (c >= 0x1FD6 && c <= 0x1FDB) ||
3951         (c >= 0x1FE0 && c <= 0x1FEC) ||
3952         (c >= 0x1FF2 && c <= 0x1FF4) ||
3953         (c >= 0x1FF6 && c <= 0x1FFC) ||
3954         c == 0x2126 ||
3955         (c >= 0x212A && c <= 0x212B) ||
3956         c == 0x212E ||
3957         (c >= 0x2180 && c <= 0x2182) ||
3958         (c >= 0x3041 && c <= 0x3094) ||
3959         (c >= 0x30A1 && c <= 0x30FA) ||
3960         (c >= 0x3105 && c <= 0x312C) ||
3961         (c >= 0xAC00 && c <= 0xD7A3))
3962         return true; // BaseChar
3963     if ((c >= 0x4e00 && c <= 0x9fa5) ||
3964         c == 0x3007 ||
3965         (c >= 0x3021 && c <= 0x3029))
3966       return true; // Ideographic
3967     return false;
3968   }
3969 
3970   /**
3971    * Indicates whether the specified Unicode character matches the Digit
3972    * production.
3973    */
isDigit(int c)3974   public static boolean isDigit(int c)
3975   {
3976     return ((c >= 0x0030 && c <= 0x0039) ||
3977             (c >= 0x0660 && c <= 0x0669) ||
3978             (c >= 0x06F0 && c <= 0x06F9) ||
3979             (c >= 0x0966 && c <= 0x096F) ||
3980             (c >= 0x09E6 && c <= 0x09EF) ||
3981             (c >= 0x0A66 && c <= 0x0A6F) ||
3982             (c >= 0x0AE6 && c <= 0x0AEF) ||
3983             (c >= 0x0B66 && c <= 0x0B6F) ||
3984             (c >= 0x0BE7 && c <= 0x0BEF) ||
3985             (c >= 0x0C66 && c <= 0x0C6F) ||
3986             (c >= 0x0CE6 && c <= 0x0CEF) ||
3987             (c >= 0x0D66 && c <= 0x0D6F) ||
3988             (c >= 0x0E50 && c <= 0x0E59) ||
3989             (c >= 0x0ED0 && c <= 0x0ED9) ||
3990             (c >= 0x0F20 && c <= 0x0F29));
3991   }
3992 
3993   /**
3994    * Indicates whether the specified Unicode character matches the
3995    * CombiningChar production.
3996    */
isCombiningChar(int c)3997   public static boolean isCombiningChar(int c)
3998   {
3999     return ((c >= 0x0300 && c <= 0x0345) ||
4000             (c >= 0x0360 && c <= 0x0361) ||
4001             (c >= 0x0483 && c <= 0x0486) ||
4002             (c >= 0x0591 && c <= 0x05A1) ||
4003             (c >= 0x05A3 && c <= 0x05B9) ||
4004             (c >= 0x05BB && c <= 0x05BD) ||
4005             c == 0x05BF ||
4006             (c >= 0x05C1 && c <= 0x05C2) ||
4007             c == 0x05C4 ||
4008             (c >= 0x064B && c <= 0x0652) ||
4009             c == 0x0670 ||
4010             (c >= 0x06D6 && c <= 0x06DC) ||
4011             (c >= 0x06DD && c <= 0x06DF) ||
4012             (c >= 0x06E0 && c <= 0x06E4) ||
4013             (c >= 0x06E7 && c <= 0x06E8) ||
4014             (c >= 0x06EA && c <= 0x06ED) ||
4015             (c >= 0x0901 && c <= 0x0903) ||
4016             c == 0x093C ||
4017             (c >= 0x093E && c <= 0x094C) ||
4018             c == 0x094D ||
4019             (c >= 0x0951 && c <= 0x0954) ||
4020             (c >= 0x0962 && c <= 0x0963) ||
4021             (c >= 0x0981 && c <= 0x0983) ||
4022             c == 0x09BC ||
4023             c == 0x09BE ||
4024             c == 0x09BF ||
4025             (c >= 0x09C0 && c <= 0x09C4) ||
4026             (c >= 0x09C7 && c <= 0x09C8) ||
4027             (c >= 0x09CB && c <= 0x09CD) ||
4028             c == 0x09D7 ||
4029             (c >= 0x09E2 && c <= 0x09E3) ||
4030             c == 0x0A02 ||
4031             c == 0x0A3C ||
4032             c == 0x0A3E ||
4033             c == 0x0A3F ||
4034             (c >= 0x0A40 && c <= 0x0A42) ||
4035             (c >= 0x0A47 && c <= 0x0A48) ||
4036             (c >= 0x0A4B && c <= 0x0A4D) ||
4037             (c >= 0x0A70 && c <= 0x0A71) ||
4038             (c >= 0x0A81 && c <= 0x0A83) ||
4039             c == 0x0ABC ||
4040             (c >= 0x0ABE && c <= 0x0AC5) ||
4041             (c >= 0x0AC7 && c <= 0x0AC9) ||
4042             (c >= 0x0ACB && c <= 0x0ACD) ||
4043             (c >= 0x0B01 && c <= 0x0B03) ||
4044             c == 0x0B3C ||
4045             (c >= 0x0B3E && c <= 0x0B43) ||
4046             (c >= 0x0B47 && c <= 0x0B48) ||
4047             (c >= 0x0B4B && c <= 0x0B4D) ||
4048             (c >= 0x0B56 && c <= 0x0B57) ||
4049             (c >= 0x0B82 && c <= 0x0B83) ||
4050             (c >= 0x0BBE && c <= 0x0BC2) ||
4051             (c >= 0x0BC6 && c <= 0x0BC8) ||
4052             (c >= 0x0BCA && c <= 0x0BCD) ||
4053             c == 0x0BD7 ||
4054             (c >= 0x0C01 && c <= 0x0C03) ||
4055             (c >= 0x0C3E && c <= 0x0C44) ||
4056             (c >= 0x0C46 && c <= 0x0C48) ||
4057             (c >= 0x0C4A && c <= 0x0C4D) ||
4058             (c >= 0x0C55 && c <= 0x0C56) ||
4059             (c >= 0x0C82 && c <= 0x0C83) ||
4060             (c >= 0x0CBE && c <= 0x0CC4) ||
4061             (c >= 0x0CC6 && c <= 0x0CC8) ||
4062             (c >= 0x0CCA && c <= 0x0CCD) ||
4063             (c >= 0x0CD5 && c <= 0x0CD6) ||
4064             (c >= 0x0D02 && c <= 0x0D03) ||
4065             (c >= 0x0D3E && c <= 0x0D43) ||
4066             (c >= 0x0D46 && c <= 0x0D48) ||
4067             (c >= 0x0D4A && c <= 0x0D4D) ||
4068             c == 0x0D57 ||
4069             c == 0x0E31 ||
4070             (c >= 0x0E34 && c <= 0x0E3A) ||
4071             (c >= 0x0E47 && c <= 0x0E4E) ||
4072             c == 0x0EB1 ||
4073             (c >= 0x0EB4 && c <= 0x0EB9) ||
4074             (c >= 0x0EBB && c <= 0x0EBC) ||
4075             (c >= 0x0EC8 && c <= 0x0ECD) ||
4076             (c >= 0x0F18 && c <= 0x0F19) ||
4077             c == 0x0F35 ||
4078             c == 0x0F37 ||
4079             c == 0x0F39 ||
4080             c == 0x0F3E ||
4081             c == 0x0F3F ||
4082             (c >= 0x0F71 && c <= 0x0F84) ||
4083             (c >= 0x0F86 && c <= 0x0F8B) ||
4084             (c >= 0x0F90 && c <= 0x0F95) ||
4085             c == 0x0F97 ||
4086             (c >= 0x0F99 && c <= 0x0FAD) ||
4087             (c >= 0x0FB1 && c <= 0x0FB7) ||
4088             c == 0x0FB9 ||
4089             (c >= 0x20D0 && c <= 0x20DC) ||
4090             c == 0x20E1 ||
4091             (c >= 0x302A && c <= 0x302F) ||
4092             c == 0x3099 ||
4093             c == 0x309A);
4094   }
4095 
4096   /**
4097    * Indicates whether the specified Unicode character matches the Extender
4098    * production.
4099    */
isExtender(int c)4100   public static boolean isExtender(int c)
4101   {
4102     return (c == 0x00B7 ||
4103             c == 0x02D0 ||
4104             c == 0x02D1 ||
4105             c == 0x0387 ||
4106             c == 0x0640 ||
4107             c == 0x0E46 ||
4108             c == 0x0EC6 ||
4109             c == 0x3005 ||
4110             (c >= 0x3031 && c <= 0x3035) ||
4111             (c >= 0x309D && c <= 0x309E) ||
4112             (c >= 0x30FC && c <= 0x30FE));
4113   }
4114 
4115   /**
4116    * Indicates whether the specified Unicode character matches the Char
4117    * production.
4118    */
isChar(int c)4119   public static boolean isChar(int c)
4120   {
4121     return (c >= 0x20 && c < 0xd800) ||
4122       (c >= 0xe00 && c < 0xfffe) ||
4123       (c >= 0x10000 && c < 0x110000) ||
4124       c == 0xa || c == 0x9 || c == 0xd;
4125   }
4126 
4127   /**
4128    * Interns the specified text or not, depending on the value of
4129    * stringInterning.
4130    */
intern(String text)4131   private String intern(String text)
4132   {
4133     return stringInterning ? text.intern() : text;
4134   }
4135 
4136   /**
4137    * Report a parsing error.
4138    */
error(String message)4139   private void error(String message)
4140     throws XMLStreamException
4141   {
4142     error(message, null);
4143   }
4144 
4145   /**
4146    * Report a parsing error.
4147    */
error(String message, Object info)4148   private void error(String message, Object info)
4149     throws XMLStreamException
4150   {
4151     if (info != null)
4152       {
4153         if (info instanceof String)
4154           message += ": \"" + ((String) info) + "\"";
4155         else if (info instanceof Character)
4156           message += ": '" + ((Character) info) + "'";
4157       }
4158     throw new XMLStreamException(message);
4159   }
4160 
4161   /**
4162    * Perform validation of a start-element event.
4163    */
validateStartElement(String elementName)4164   private void validateStartElement(String elementName)
4165     throws XMLStreamException
4166   {
4167     if (currentContentModel == null)
4168       {
4169         // root element
4170         // VC: Root Element Type
4171         if (!elementName.equals(doctype.rootName))
4172           error("root element name must match name in DTD");
4173         return;
4174       }
4175     // VC: Element Valid
4176     switch (currentContentModel.type)
4177       {
4178       case ContentModel.EMPTY:
4179         error("child element found in empty element", elementName);
4180         break;
4181       case ContentModel.ELEMENT:
4182         LinkedList ctx = (LinkedList) validationStack.getLast();
4183         ctx.add(elementName);
4184         break;
4185       case ContentModel.MIXED:
4186         MixedContentModel mm = (MixedContentModel) currentContentModel;
4187         if (!mm.containsName(elementName))
4188           error("illegal element for content model", elementName);
4189         break;
4190       }
4191   }
4192 
4193   /**
4194    * Perform validation of an end-element event.
4195    */
validateEndElement()4196   private void validateEndElement()
4197     throws XMLStreamException
4198   {
4199     if (currentContentModel == null)
4200       {
4201         // root element
4202         // VC: IDREF
4203         if (!idrefs.containsAll(ids))
4204           error("IDREF values must match the value of some ID attribute");
4205         return;
4206       }
4207     // VC: Element Valid
4208     switch (currentContentModel.type)
4209       {
4210       case ContentModel.ELEMENT:
4211         LinkedList ctx = (LinkedList) validationStack.getLast();
4212         ElementContentModel ecm = (ElementContentModel) currentContentModel;
4213         validateElementContent(ecm, ctx);
4214         break;
4215       }
4216   }
4217 
4218   /**
4219    * Perform validation of character data.
4220    */
validatePCData(String text)4221   private void validatePCData(String text)
4222     throws XMLStreamException
4223   {
4224     // VC: Element Valid
4225     switch (currentContentModel.type)
4226       {
4227       case ContentModel.EMPTY:
4228         error("character data found in empty element", text);
4229         break;
4230       case ContentModel.ELEMENT:
4231         boolean white = true;
4232         int len = text.length();
4233         for (int i = 0; i < len; i++)
4234           {
4235             char c = text.charAt(i);
4236             if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
4237               {
4238                 white = false;
4239                 break;
4240               }
4241           }
4242         if (!white)
4243           error("character data found in element with element content", text);
4244         else if (xmlStandalone == Boolean.TRUE && currentContentModel.external)
4245           // VC: Standalone Document Declaration
4246           error("whitespace in element content of externally declared " +
4247                 "element in standalone document");
4248         break;
4249       }
4250   }
4251 
4252   /**
4253    * Validates the specified validation context (list of child elements)
4254    * against the element content model for the current element.
4255    */
validateElementContent(ElementContentModel model, LinkedList children)4256   private void validateElementContent(ElementContentModel model,
4257                                       LinkedList children)
4258     throws XMLStreamException
4259   {
4260     // Use regular expression
4261     CPStringBuilder buf = new CPStringBuilder();
4262     for (Iterator i = children.iterator(); i.hasNext(); )
4263       {
4264         buf.append((String) i.next());
4265         buf.append(' ');
4266       }
4267     String c = buf.toString();
4268     String regex = createRegularExpression(model);
4269     if (!c.matches(regex))
4270       error("element content "+model.text+" does not match expression "+regex, c);
4271   }
4272 
4273   /**
4274    * Creates the regular expression used to validate an element content
4275    * model.
4276    */
createRegularExpression(ElementContentModel model)4277   private String createRegularExpression(ElementContentModel model)
4278   {
4279     if (model.regex == null)
4280       {
4281         CPStringBuilder buf = new CPStringBuilder();
4282         buf.append('(');
4283         for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
4284           {
4285             ContentParticle cp = (ContentParticle) i.next();
4286             if (cp.content instanceof String)
4287               {
4288                 buf.append('(');
4289                 buf.append((String) cp.content);
4290                 buf.append(' ');
4291                 buf.append(')');
4292                 if (cp.max == -1)
4293                   {
4294                     if (cp.min == 0)
4295                       buf.append('*');
4296                     else
4297                       buf.append('+');
4298                   }
4299                 else if (cp.min == 0)
4300                   buf.append('?');
4301               }
4302             else
4303               {
4304                 ElementContentModel ecm = (ElementContentModel) cp.content;
4305                 buf.append(createRegularExpression(ecm));
4306               }
4307             if (model.or && i.hasNext())
4308               buf.append('|');
4309           }
4310         buf.append(')');
4311         if (model.max == -1)
4312           {
4313             if (model.min == 0)
4314               buf.append('*');
4315             else
4316               buf.append('+');
4317           }
4318         else if (model.min == 0)
4319           buf.append('?');
4320         model.regex = buf.toString();
4321       }
4322     return model.regex;
4323   }
4324 
4325   /**
4326    * Performs validation of a document type declaration event.
4327    */
validateDoctype()4328   void validateDoctype()
4329     throws XMLStreamException
4330   {
4331     for (Iterator i = doctype.entityIterator(); i.hasNext(); )
4332       {
4333         Map.Entry entry = (Map.Entry) i.next();
4334         Object entity = entry.getValue();
4335         if (entity instanceof ExternalIds)
4336           {
4337             ExternalIds ids = (ExternalIds) entity;
4338             if (ids.notationName != null)
4339               {
4340                 // VC: Notation Declared
4341                 ExternalIds notation = doctype.getNotation(ids.notationName);
4342                 if (notation == null)
4343                   error("Notation name must match the declared name of a " +
4344                         "notation", ids.notationName);
4345               }
4346           }
4347       }
4348   }
4349 
4350   /**
4351    * Simple test harness for reading an XML file.
4352    * args[0] is the filename of the XML file
4353    * If args[1] is "-x", enable XInclude processing
4354    */
main(String[] args)4355   public static void main(String[] args)
4356     throws Exception
4357   {
4358     boolean validating = false;
4359     boolean namespaceAware = false;
4360     boolean xIncludeAware = false;
4361     int pos = 0;
4362     while (pos < args.length && args[pos].startsWith("-"))
4363       {
4364         if ("-x".equals(args[pos]))
4365           xIncludeAware = true;
4366         else if ("-v".equals(args[pos]))
4367           validating = true;
4368         else if ("-n".equals(args[pos]))
4369           namespaceAware = true;
4370         pos++;
4371       }
4372     if (pos >= args.length)
4373       {
4374         System.out.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]");
4375         System.out.println("\t-n: use namespace aware mode");
4376         System.out.println("\t-v: use validating parser");
4377         System.out.println("\t-x: use XInclude aware mode");
4378         System.exit(2);
4379       }
4380     while (pos < args.length)
4381       {
4382         XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]),
4383                                     absolutize(null, args[pos]),
4384                                     validating, // validating
4385                                     namespaceAware, // namespaceAware
4386                                     true, // coalescing,
4387                                     true, // replaceERefs
4388                                     true, // externalEntities
4389                                     true, // supportDTD
4390                                     true, // baseAware
4391                                     true, // stringInterning
4392                                     true, // extendedEventTypes
4393                                     null,
4394                                     null);
4395         XMLStreamReader reader = p;
4396         if (xIncludeAware)
4397           reader = new XIncludeFilter(p, args[pos], true, true, true);
4398         try
4399           {
4400             int event;
4401             //do
4402             while (reader.hasNext())
4403               {
4404                 event = reader.next();
4405                 Location loc = reader.getLocation();
4406                 System.out.print(loc.getLineNumber() + ":" +
4407                                  loc.getColumnNumber() + " ");
4408                 switch (event)
4409                   {
4410                   case XMLStreamConstants.START_DOCUMENT:
4411                     System.out.println("START_DOCUMENT version=" +
4412                                        reader.getVersion() +
4413                                        " encoding=" +
4414                                        reader.getEncoding());
4415                     break;
4416                   case XMLStreamConstants.END_DOCUMENT:
4417                     System.out.println("END_DOCUMENT");
4418                     break;
4419                   case XMLStreamConstants.START_ELEMENT:
4420                     System.out.println("START_ELEMENT " +
4421                                        reader.getName());
4422                     int l = reader.getNamespaceCount();
4423                     for (int i = 0; i < l; i++)
4424                       System.out.println("\tnamespace " +
4425                                          reader.getNamespacePrefix(i) + "='" +
4426                                          reader.getNamespaceURI(i)+"'");
4427                     l = reader.getAttributeCount();
4428                     for (int i = 0; i < l; i++)
4429                       System.out.println("\tattribute " +
4430                                          reader.getAttributeName(i) + "='" +
4431                                          reader.getAttributeValue(i) + "'");
4432                     break;
4433                   case XMLStreamConstants.END_ELEMENT:
4434                     System.out.println("END_ELEMENT " + reader.getName());
4435                     break;
4436                   case XMLStreamConstants.CHARACTERS:
4437                     System.out.println("CHARACTERS '" +
4438                                        encodeText(reader.getText()) + "'");
4439                     break;
4440                   case XMLStreamConstants.CDATA:
4441                     System.out.println("CDATA '" +
4442                                        encodeText(reader.getText()) + "'");
4443                     break;
4444                   case XMLStreamConstants.SPACE:
4445                     System.out.println("SPACE '" +
4446                                        encodeText(reader.getText()) + "'");
4447                     break;
4448                   case XMLStreamConstants.DTD:
4449                     System.out.println("DTD " + reader.getText());
4450                     break;
4451                   case XMLStreamConstants.ENTITY_REFERENCE:
4452                     System.out.println("ENTITY_REFERENCE " + reader.getText());
4453                     break;
4454                   case XMLStreamConstants.COMMENT:
4455                     System.out.println("COMMENT '" +
4456                                        encodeText(reader.getText()) + "'");
4457                     break;
4458                   case XMLStreamConstants.PROCESSING_INSTRUCTION:
4459                     System.out.println("PROCESSING_INSTRUCTION " +
4460                                        reader.getPITarget() + " " +
4461                                        reader.getPIData());
4462                     break;
4463                   case START_ENTITY:
4464                     System.out.println("START_ENTITY " + reader.getText());
4465                     break;
4466                   case END_ENTITY:
4467                     System.out.println("END_ENTITY " + reader.getText());
4468                     break;
4469                   default:
4470                     System.out.println("Unknown event: " + event);
4471                   }
4472               }
4473           }
4474         catch (XMLStreamException e)
4475           {
4476             Location l = reader.getLocation();
4477             System.out.println("At line "+l.getLineNumber()+
4478                                ", column "+l.getColumnNumber()+
4479                                " of "+l.getSystemId());
4480             throw e;
4481           }
4482         pos++;
4483       }
4484   }
4485 
4486   /**
4487    * Escapes control characters in the specified text. For debugging.
4488    */
encodeText(String text)4489   private static String encodeText(String text)
4490   {
4491     CPStringBuilder b = new CPStringBuilder();
4492     int len = text.length();
4493     for (int i = 0; i < len; i++)
4494       {
4495         char c = text.charAt(i);
4496         switch (c)
4497           {
4498           case '\t':
4499             b.append("\\t");
4500             break;
4501           case '\n':
4502             b.append("\\n");
4503             break;
4504           case '\r':
4505             b.append("\\r");
4506             break;
4507           default:
4508             b.append(c);
4509           }
4510       }
4511     return b.toString();
4512   }
4513 
4514   /**
4515    * An attribute instance.
4516    */
4517   class Attribute
4518   {
4519 
4520     /**
4521      * Attribute name.
4522      */
4523     final String name;
4524 
4525     /**
4526      * Attribute type as declared in the DTD, or CDATA otherwise.
4527      */
4528     final String type;
4529 
4530     /**
4531      * Whether the attribute was specified or defaulted.
4532      */
4533     final boolean specified;
4534 
4535     /**
4536      * The attribute value.
4537      */
4538     final String value;
4539 
4540     /**
4541      * The namespace prefix.
4542      */
4543     final String prefix;
4544 
4545     /**
4546      * The namespace local-name.
4547      */
4548     final String localName;
4549 
Attribute(String name, String type, boolean specified, String value)4550     Attribute(String name, String type, boolean specified, String value)
4551     {
4552       this.name = name;
4553       this.type = type;
4554       this.specified = specified;
4555       this.value = value;
4556       int ci = name.indexOf(':');
4557       if (ci == -1)
4558         {
4559           prefix = null;
4560           localName = intern(name);
4561         }
4562       else
4563         {
4564           prefix = intern(name.substring(0, ci));
4565           localName = intern(name.substring(ci + 1));
4566         }
4567     }
4568 
equals(Object other)4569     public boolean equals(Object other)
4570     {
4571       if (other instanceof Attribute)
4572         {
4573           Attribute a = (Attribute) other;
4574           if (namespaceAware)
4575             {
4576               if (!a.localName.equals(localName))
4577                 return false;
4578               String auri = getNamespaceURI(a.prefix);
4579               String uri = getNamespaceURI(prefix);
4580               if (uri == null && (auri == null ||
4581                                   (input.xml11 && "".equals(auri))))
4582                return true;
4583               if (uri != null)
4584                 {
4585                   if ("".equals(uri) && input.xml11 && "".equals(auri))
4586                     return true;
4587                   return uri.equals(auri);
4588                 }
4589               return false;
4590             }
4591           else
4592             return a.name.equals(name);
4593         }
4594       return false;
4595     }
4596 
toString()4597     public String toString()
4598     {
4599       CPStringBuilder buf = new CPStringBuilder(getClass().getName());
4600       buf.append('[');
4601       buf.append("name=");
4602       buf.append(name);
4603       if (value != null)
4604         {
4605           buf.append(",value=");
4606           buf.append(value);
4607         }
4608       if (type != null)
4609         {
4610           buf.append(",type=");
4611           buf.append(type);
4612         }
4613       if (specified)
4614         buf.append(",specified");
4615       buf.append(']');
4616       return buf.toString();
4617     }
4618 
4619   }
4620 
4621   /**
4622    * Representation of a DTD.
4623    */
4624   class Doctype
4625   {
4626 
4627     /**
4628      * Name of the root element.
4629      */
4630     final String rootName;
4631 
4632     /**
4633      * Public ID, if any, of external subset.
4634      */
4635     final String publicId;
4636 
4637     /**
4638      * System ID (URL), if any, of external subset.
4639      */
4640     final String systemId;
4641 
4642     /**
4643      * Map of element names to content models.
4644      */
4645     private final LinkedHashMap elements = new LinkedHashMap();
4646 
4647     /**
4648      * Map of element names to maps of attribute declarations.
4649      */
4650     private final LinkedHashMap attlists = new LinkedHashMap();
4651 
4652     /**
4653      * Map of entity names to entities (String or ExternalIds).
4654      */
4655     private final LinkedHashMap entities = new LinkedHashMap();
4656 
4657     /**
4658      * Map of notation names to ExternalIds.
4659      */
4660     private final LinkedHashMap notations = new LinkedHashMap();
4661 
4662     /**
4663      * Map of anonymous keys to comments.
4664      */
4665     private final LinkedHashMap comments = new LinkedHashMap();
4666 
4667     /**
4668      * Map of anonymous keys to processing instructions (String[2]
4669      * containing {target, data}).
4670      */
4671     private final LinkedHashMap pis = new LinkedHashMap();
4672 
4673     /**
4674      * List of keys to all markup entries in the DTD.
4675      */
4676     private final LinkedList entries = new LinkedList();
4677 
4678     /**
4679      * Set of the entities defined in the external subset.
4680      */
4681     private final HashSet externalEntities = new HashSet();
4682 
4683     /**
4684      * Set of the notations defined in the external subset.
4685      */
4686     private final HashSet externalNotations = new HashSet();
4687 
4688     /**
4689      * Counter for making anonymous keys.
4690      */
4691     private int anon = 1;
4692 
4693     /**
4694      * Constructor.
4695      */
Doctype(String rootName, String publicId, String systemId)4696     Doctype(String rootName, String publicId, String systemId)
4697     {
4698       this.rootName = rootName;
4699       this.publicId = publicId;
4700       this.systemId = systemId;
4701     }
4702 
4703     /**
4704      * Adds an element declaration.
4705      * @param name the element name
4706      * @param text the content model text
4707      * @param model the parsed content model
4708      */
addElementDecl(String name, String text, ContentModel model)4709     void addElementDecl(String name, String text, ContentModel model)
4710     {
4711       if (elements.containsKey(name))
4712         return;
4713       model.text = text;
4714       model.external = (inputStack.size() != 1);
4715       elements.put(name, model);
4716       entries.add("E" + name);
4717     }
4718 
4719     /**
4720      * Adds an attribute declaration.
4721      * @param ename the element name
4722      * @param aname the attribute name
4723      * @param decl the attribute declaration details
4724      */
addAttributeDecl(String ename, String aname, AttributeDecl decl)4725     void addAttributeDecl(String ename, String aname, AttributeDecl decl)
4726     {
4727       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4728       if (attlist == null)
4729         {
4730           attlist = new LinkedHashMap();
4731           attlists.put(ename, attlist);
4732         }
4733       else if (attlist.containsKey(aname))
4734         return;
4735       attlist.put(aname, decl);
4736       String key = "A" + ename;
4737       if (!entries.contains(key))
4738         entries.add(key);
4739     }
4740 
4741     /**
4742      * Adds an entity declaration.
4743      * @param name the entity name
4744      * @param text the entity replacement text
4745      * @param inExternalSubset if we are in the exernal subset
4746      */
addEntityDecl(String name, String text, boolean inExternalSubset)4747     void addEntityDecl(String name, String text, boolean inExternalSubset)
4748     {
4749       if (entities.containsKey(name))
4750         return;
4751       entities.put(name, text);
4752       entries.add("e" + name);
4753       if (inExternalSubset)
4754         externalEntities.add(name);
4755     }
4756 
4757     /**
4758      * Adds an entity declaration.
4759      * @param name the entity name
4760      * @param ids the external IDs
4761      * @param inExternalSubset if we are in the exernal subset
4762      */
addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)4763     void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)
4764     {
4765       if (entities.containsKey(name))
4766         return;
4767       entities.put(name, ids);
4768       entries.add("e" + name);
4769       if (inExternalSubset)
4770         externalEntities.add(name);
4771     }
4772 
4773     /**
4774      * Adds a notation declaration.
4775      * @param name the notation name
4776      * @param ids the external IDs
4777      * @param inExternalSubset if we are in the exernal subset
4778      */
addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)4779     void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)
4780     {
4781       if (notations.containsKey(name))
4782         return;
4783       notations.put(name, ids);
4784       entries.add("n" + name);
4785       if (inExternalSubset)
4786         externalNotations.add(name);
4787     }
4788 
4789     /**
4790      * Adds a comment.
4791      */
addComment(String text)4792     void addComment(String text)
4793     {
4794       String key = Integer.toString(anon++);
4795       comments.put(key, text);
4796       entries.add("c" + key);
4797     }
4798 
4799     /**
4800      * Adds a processing instruction.
4801      */
addPI(String target, String data)4802     void addPI(String target, String data)
4803     {
4804       String key = Integer.toString(anon++);
4805       pis.put(key, new String[] {target, data});
4806       entries.add("p" + key);
4807     }
4808 
4809     /**
4810      * Returns the content model for the specified element.
4811      * @param name the element name
4812      */
getElementModel(String name)4813     ContentModel getElementModel(String name)
4814     {
4815       return (ContentModel) elements.get(name);
4816     }
4817 
4818     /**
4819      * Returns the attribute definition for the given attribute
4820      * @param ename the element name
4821      * @param aname the attribute name
4822      */
getAttributeDecl(String ename, String aname)4823     AttributeDecl getAttributeDecl(String ename, String aname)
4824     {
4825       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4826       return (attlist == null) ? null : (AttributeDecl) attlist.get(aname);
4827     }
4828 
4829     /**
4830      * Indicates whether the specified attribute was declared in the DTD.
4831      * @param ename the element name
4832      * @param aname the attribute name
4833      */
isAttributeDeclared(String ename, String aname)4834     boolean isAttributeDeclared(String ename, String aname)
4835     {
4836       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4837       return (attlist == null) ? false : attlist.containsKey(aname);
4838     }
4839 
4840     /**
4841      * Returns an iterator over the entries in the attribute list for the
4842      * given element.
4843      * @param ename the element name
4844      */
attlistIterator(String ename)4845     Iterator attlistIterator(String ename)
4846     {
4847       LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
4848       return (attlist == null) ? Collections.EMPTY_LIST.iterator() :
4849         attlist.entrySet().iterator();
4850     }
4851 
4852     /**
4853      * Returns the entity (String or ExternalIds) for the given entity name.
4854      */
getEntity(String name)4855     Object getEntity(String name)
4856     {
4857       return entities.get(name);
4858     }
4859 
4860     /**
4861      * Indicates whether the specified entity was declared in the external
4862      * subset.
4863      */
isEntityExternal(String name)4864     boolean isEntityExternal(String name)
4865     {
4866       return externalEntities.contains(name);
4867     }
4868 
4869     /**
4870      * Returns an iterator over the entity map entries.
4871      */
entityIterator()4872     Iterator entityIterator()
4873     {
4874       return entities.entrySet().iterator();
4875     }
4876 
4877     /**
4878      * Returns the notation IDs for the given notation name.
4879      */
getNotation(String name)4880     ExternalIds getNotation(String name)
4881     {
4882       return (ExternalIds) notations.get(name);
4883     }
4884 
4885     /**
4886      * Indicates whether the specified notation was declared in the external
4887      * subset.
4888      */
isNotationExternal(String name)4889     boolean isNotationExternal(String name)
4890     {
4891       return externalNotations.contains(name);
4892     }
4893 
4894     /**
4895      * Returns the comment associated with the specified (anonymous) key.
4896      */
getComment(String key)4897     String getComment(String key)
4898     {
4899       return (String) comments.get(key);
4900     }
4901 
4902     /**
4903      * Returns the processing instruction associated with the specified
4904      * (anonymous) key.
4905      */
getPI(String key)4906     String[] getPI(String key)
4907     {
4908       return (String[]) pis.get(key);
4909     }
4910 
4911     /**
4912      * Returns an iterator over the keys of the markup entries in this DTD,
4913      * in the order declared.
4914      */
entryIterator()4915     Iterator entryIterator()
4916     {
4917       return entries.iterator();
4918     }
4919 
4920   }
4921 
4922   /**
4923    * Combination of an ExternalID and an optional NDataDecl.
4924    */
4925   class ExternalIds
4926   {
4927 
4928     /**
4929      * The public ID.
4930      */
4931     String publicId;
4932 
4933     /**
4934      * The system ID.
4935      */
4936     String systemId;
4937 
4938     /**
4939      * The notation name declared with the NDATA keyword.
4940      */
4941     String notationName;
4942   }
4943 
4944   /**
4945    * A content model.
4946    */
4947   abstract class ContentModel
4948   {
4949     static final int EMPTY = 0;
4950     static final int ANY = 1;
4951     static final int ELEMENT = 2;
4952     static final int MIXED = 3;
4953 
4954     int min;
4955     int max;
4956     final int type;
4957     String text;
4958     boolean external;
4959 
ContentModel(int type)4960     ContentModel(int type)
4961     {
4962       this.type = type;
4963       min = 1;
4964       max = 1;
4965     }
4966 
4967   }
4968 
4969   /**
4970    * The EMPTY content model.
4971    */
4972   class EmptyContentModel
4973     extends ContentModel
4974   {
4975 
EmptyContentModel()4976     EmptyContentModel()
4977     {
4978       super(ContentModel.EMPTY);
4979       min = 0;
4980       max = 0;
4981     }
4982 
4983   }
4984 
4985   /**
4986    * The ANY content model.
4987    */
4988   class AnyContentModel
4989     extends ContentModel
4990   {
4991 
AnyContentModel()4992     AnyContentModel()
4993     {
4994       super(ContentModel.ANY);
4995       min = 0;
4996       max = -1;
4997     }
4998 
4999   }
5000 
5001   /**
5002    * An element content model.
5003    */
5004   class ElementContentModel
5005     extends ContentModel
5006   {
5007 
5008     LinkedList contentParticles;
5009     boolean or;
5010     String regex; // regular expression cache
5011 
ElementContentModel()5012     ElementContentModel()
5013     {
5014       super(ContentModel.ELEMENT);
5015       contentParticles = new LinkedList();
5016     }
5017 
addContentParticle(ContentParticle cp)5018     void addContentParticle(ContentParticle cp)
5019     {
5020       contentParticles.add(cp);
5021     }
5022 
5023   }
5024 
5025   class ContentParticle
5026   {
5027 
5028     int min = 1;
5029     int max = 1;
5030     Object content; // Name (String) or ElementContentModel
5031 
5032   }
5033 
5034   /**
5035    * A mixed content model.
5036    */
5037   class MixedContentModel
5038     extends ContentModel
5039   {
5040 
5041     private HashSet names;
5042 
MixedContentModel()5043     MixedContentModel()
5044     {
5045       super(ContentModel.MIXED);
5046       names = new HashSet();
5047     }
5048 
addName(String name)5049     void addName(String name)
5050     {
5051       names.add(name);
5052     }
5053 
containsName(String name)5054     boolean containsName(String name)
5055     {
5056       return names.contains(name);
5057     }
5058 
5059   }
5060 
5061   /**
5062    * An attribute definition.
5063    */
5064   class AttributeDecl
5065   {
5066 
5067     /**
5068      * The attribute type (CDATA, ID, etc).
5069      */
5070     final String type;
5071 
5072     /**
5073      * The default value.
5074      */
5075     final String value;
5076 
5077     /**
5078      * The value type (#FIXED, #IMPLIED, etc).
5079      */
5080     final int valueType;
5081 
5082     /**
5083      * The enumeration text.
5084      */
5085     final String enumeration;
5086 
5087     /**
5088      * The enumeration tokens.
5089      */
5090     final HashSet values;
5091 
5092     /**
5093      * Whether this attribute declaration occurred in the external subset.
5094      */
5095     final boolean external;
5096 
AttributeDecl(String type, String value, int valueType, String enumeration, HashSet values, boolean external)5097     AttributeDecl(String type, String value,
5098                   int valueType, String enumeration,
5099                   HashSet values, boolean external)
5100     {
5101       this.type = type;
5102       this.value = value;
5103       this.valueType = valueType;
5104       this.enumeration = enumeration;
5105       this.values = values;
5106       this.external = external;
5107     }
5108 
5109   }
5110 
5111   /**
5112    * An XML input source.
5113    */
5114   static class Input
5115     implements Location
5116   {
5117 
5118     int line = 1, markLine;
5119     int column, markColumn;
5120     int offset, markOffset;
5121     final String publicId, systemId, name;
5122     final boolean report; // report start- and end-entity
5123     final boolean normalize; // normalize CR, etc to LF
5124 
5125     InputStream in;
5126     Reader reader;
5127     UnicodeReader unicodeReader;
5128     boolean initialized;
5129     boolean encodingDetected;
5130     String inputEncoding;
5131     boolean xml11;
5132 
Input(InputStream in, Reader reader, String publicId, String systemId, String name, String inputEncoding, boolean report, boolean normalize)5133     Input(InputStream in, Reader reader, String publicId, String systemId,
5134           String name, String inputEncoding, boolean report,
5135           boolean normalize)
5136     {
5137       if (inputEncoding == null)
5138         inputEncoding = "UTF-8";
5139       this.inputEncoding = inputEncoding;
5140       this.publicId = publicId;
5141       this.systemId = systemId;
5142       this.name = name;
5143       this.report = report;
5144       this.normalize = normalize;
5145       if (in != null)
5146         {
5147           if (reader != null)
5148             throw new IllegalStateException("both byte and char streams "+
5149                                             "specified");
5150           if (normalize)
5151             in = new CRLFInputStream(in);
5152           in = new BufferedInputStream(in);
5153           this.in = in;
5154         }
5155       else
5156         {
5157           this.reader = normalize ? new CRLFReader(reader) : reader;
5158           unicodeReader = new UnicodeReader(this.reader);
5159         }
5160       initialized = false;
5161     }
5162 
5163     // -- Location --
5164 
getCharacterOffset()5165     public int getCharacterOffset()
5166     {
5167       return offset;
5168     }
5169 
getColumnNumber()5170     public int getColumnNumber()
5171     {
5172       return column;
5173     }
5174 
getLineNumber()5175     public int getLineNumber()
5176     {
5177       return line;
5178     }
5179 
getPublicId()5180     public String getPublicId()
5181     {
5182       return publicId;
5183     }
5184 
getSystemId()5185     public String getSystemId()
5186     {
5187       return systemId;
5188     }
5189 
init()5190     void init()
5191       throws IOException
5192     {
5193       if (initialized)
5194         return;
5195       if (in != null)
5196         detectEncoding();
5197       initialized = true;
5198     }
5199 
mark(int len)5200     void mark(int len)
5201       throws IOException
5202     {
5203       markOffset = offset;
5204       markLine = line;
5205       markColumn = column;
5206       if (unicodeReader != null)
5207         unicodeReader.mark(len);
5208       else
5209         in.mark(len);
5210     }
5211 
5212     /**
5213      * Character read.
5214      */
read()5215     int read()
5216       throws IOException
5217     {
5218       offset++;
5219       int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
5220       if (normalize &&
5221           (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
5222         {
5223           // Normalize CR etc to LF
5224           ret = 0x0a;
5225         }
5226       // Locator handling
5227       if (ret == 0x0a)
5228         {
5229           line++;
5230           column = 0;
5231         }
5232       else
5233         column++;
5234       return ret;
5235     }
5236 
5237     /**
5238      * Block read.
5239      */
read(int[] b, int off, int len)5240     int read(int[] b, int off, int len)
5241       throws IOException
5242     {
5243       int ret;
5244       if (unicodeReader != null)
5245         {
5246           ret = unicodeReader.read(b, off, len);
5247         }
5248       else
5249         {
5250           byte[] b2 = new byte[len];
5251           ret = in.read(b2, 0, len);
5252           if (ret != -1)
5253             {
5254               String s = new String(b2, 0, ret, inputEncoding);
5255               int[] c = UnicodeReader.toCodePointArray(s);
5256               ret = c.length;
5257               System.arraycopy(c, 0, b, off, ret);
5258             }
5259         }
5260       if (ret != -1)
5261         {
5262           // Locator handling
5263           for (int i = 0; i < ret; i++)
5264             {
5265               int c = b[off + i];
5266               if (normalize &&
5267                   (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
5268                 {
5269                   // Normalize CR etc to LF
5270                   c = 0x0a;
5271                   b[off + i] = c;
5272                 }
5273               if (c == 0x0a)
5274                 {
5275                   line++;
5276                   column = 0;
5277                 }
5278               else
5279                 column++;
5280             }
5281         }
5282       return ret;
5283     }
5284 
reset()5285     void reset()
5286       throws IOException
5287     {
5288       if (unicodeReader != null)
5289         unicodeReader.reset();
5290       else
5291         in.reset();
5292       offset = markOffset;
5293       line = markLine;
5294       column = markColumn;
5295     }
5296 
5297     // Detection of input encoding
5298 
5299     private static final int[] SIGNATURE_UCS_4_1234 =
5300       new int[] { 0x00, 0x00, 0x00, 0x3c };
5301     private static final int[] SIGNATURE_UCS_4_4321 =
5302       new int[] { 0x3c, 0x00, 0x00, 0x00 };
5303     private static final int[] SIGNATURE_UCS_4_2143 =
5304       new int[] { 0x00, 0x00, 0x3c, 0x00 };
5305     private static final int[] SIGNATURE_UCS_4_3412 =
5306       new int[] { 0x00, 0x3c, 0x00, 0x00 };
5307     private static final int[] SIGNATURE_UCS_2_12 =
5308       new int[] { 0xfe, 0xff };
5309     private static final int[] SIGNATURE_UCS_2_21 =
5310       new int[] { 0xff, 0xfe };
5311     private static final int[] SIGNATURE_UCS_2_12_NOBOM =
5312       new int[] { 0x00, 0x3c, 0x00, 0x3f };
5313     private static final int[] SIGNATURE_UCS_2_21_NOBOM =
5314       new int[] { 0x3c, 0x00, 0x3f, 0x00 };
5315     private static final int[] SIGNATURE_UTF_8 =
5316       new int[] { 0x3c, 0x3f, 0x78, 0x6d };
5317     private static final int[] SIGNATURE_UTF_8_BOM =
5318       new int[] { 0xef, 0xbb, 0xbf };
5319 
5320     /**
5321      * Detect the input encoding.
5322      */
detectEncoding()5323     private void detectEncoding()
5324       throws IOException
5325     {
5326       int[] signature = new int[4];
5327       in.mark(4);
5328       for (int i = 0; i < 4; i++)
5329         signature[i] = in.read();
5330       in.reset();
5331 
5332       // 4-byte encodings
5333       if (equals(SIGNATURE_UCS_4_1234, signature))
5334         {
5335           in.read();
5336           in.read();
5337           in.read();
5338           in.read();
5339           setInputEncoding("UTF-32BE");
5340           encodingDetected = true;
5341         }
5342       else if (equals(SIGNATURE_UCS_4_4321, signature))
5343         {
5344           in.read();
5345           in.read();
5346           in.read();
5347           in.read();
5348           setInputEncoding("UTF-32LE");
5349           encodingDetected = true;
5350         }
5351       else if (equals(SIGNATURE_UCS_4_2143, signature) ||
5352                equals(SIGNATURE_UCS_4_3412, signature))
5353         throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
5354 
5355       // 2-byte encodings
5356       else if (equals(SIGNATURE_UCS_2_12, signature))
5357         {
5358           in.read();
5359           in.read();
5360           setInputEncoding("UTF-16BE");
5361           encodingDetected = true;
5362         }
5363       else if (equals(SIGNATURE_UCS_2_21, signature))
5364         {
5365           in.read();
5366           in.read();
5367           setInputEncoding("UTF-16LE");
5368           encodingDetected = true;
5369         }
5370       else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
5371         {
5372           //setInputEncoding("UTF-16BE");
5373           throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5374         }
5375       else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
5376         {
5377           //setInputEncoding("UTF-16LE");
5378           throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
5379         }
5380       // ASCII-derived encodings
5381       else if (equals(SIGNATURE_UTF_8, signature))
5382         {
5383           // UTF-8 input encoding implied, TextDecl
5384         }
5385       else if (equals(SIGNATURE_UTF_8_BOM, signature))
5386         {
5387           in.read();
5388           in.read();
5389           in.read();
5390           setInputEncoding("UTF-8");
5391           encodingDetected = true;
5392         }
5393     }
5394 
equals(int[] b1, int[] b2)5395     private static boolean equals(int[] b1, int[] b2)
5396     {
5397       for (int i = 0; i < b1.length; i++)
5398         {
5399           if (b1[i] != b2[i])
5400             return false;
5401         }
5402       return true;
5403     }
5404 
setInputEncoding(String encoding)5405     void setInputEncoding(String encoding)
5406       throws IOException
5407     {
5408       if (encoding.equals(inputEncoding))
5409         return;
5410       if ("UTF-16".equalsIgnoreCase(encoding) &&
5411           inputEncoding.startsWith("UTF-16"))
5412         return;
5413       if (encodingDetected)
5414         throw new UnsupportedEncodingException("document is not in its " +
5415                                                "declared encoding " +
5416                                                inputEncoding +
5417                                                ": " + encoding);
5418       inputEncoding = encoding;
5419       finalizeEncoding();
5420     }
5421 
finalizeEncoding()5422     void finalizeEncoding()
5423       throws IOException
5424     {
5425       if (reader != null)
5426         return;
5427       reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
5428       unicodeReader = new UnicodeReader(reader);
5429       mark(1);
5430     }
5431 
5432   }
5433 
5434 }
5435