1 /* XMLParser.java -- 2 Copyright (C) 2005 Free Software Foundation, Inc. 3 4 This file is part of GNU Classpath. 5 6 GNU Classpath is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 GNU Classpath is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GNU Classpath; see the file COPYING. If not, write to the 18 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 02110-1301 USA. 20 21 Linking this library statically or dynamically with other modules is 22 making a combined work based on this library. Thus, the terms and 23 conditions of the GNU General Public License cover the whole 24 combination. 25 26 As a special exception, the copyright holders of this library give you 27 permission to link this library with independent modules to produce an 28 executable, regardless of the license terms of these independent 29 modules, and to copy and distribute the resulting executable under 30 terms of your choice, provided that you also meet, for each linked 31 independent module, the terms and conditions of the license of that 32 module. An independent module is a module which is not derived from 33 or based on this library. If you modify this library, you may extend 34 this exception to your version of the library, but you are not 35 obligated to do so. If you do not wish to do so, delete this 36 exception statement from your version. 37 38 Partly derived from code which carried the following notice: 39 40 Copyright (c) 1997, 1998 by Microstar Software Ltd. 41 42 AElfred is free for both commercial and non-commercial use and 43 redistribution, provided that Microstar's copyright and disclaimer are 44 retained intact. You are free to modify AElfred for your own use and 45 to redistribute AElfred with your modifications, provided that the 46 modifications are clearly documented. 47 48 This program is distributed in the hope that it will be useful, but 49 WITHOUT ANY WARRANTY; without even the implied warranty of 50 merchantability or fitness for a particular purpose. Please use it AT 51 YOUR OWN RISK. 52 */ 53 54 package gnu.xml.stream; 55 56 import gnu.java.lang.CPStringBuilder; 57 58 import java.io.BufferedInputStream; 59 import java.io.EOFException; 60 import java.io.File; 61 import java.io.FileOutputStream; 62 import java.io.FileWriter; 63 import java.io.InputStream; 64 import java.io.InputStreamReader; 65 import java.io.IOException; 66 import java.io.Reader; 67 import java.io.StringReader; 68 import java.io.UnsupportedEncodingException; 69 import java.net.MalformedURLException; 70 import java.net.URL; 71 import java.util.ArrayList; 72 import java.util.Collections; 73 import java.util.HashSet; 74 import java.util.Iterator; 75 import java.util.LinkedHashMap; 76 import java.util.LinkedList; 77 import java.util.Map; 78 import java.util.NoSuchElementException; 79 import java.util.StringTokenizer; 80 81 import javax.xml.XMLConstants; 82 import javax.xml.namespace.NamespaceContext; 83 import javax.xml.namespace.QName; 84 import javax.xml.stream.Location; 85 import javax.xml.stream.XMLInputFactory; 86 import javax.xml.stream.XMLReporter; 87 import javax.xml.stream.XMLResolver; 88 import javax.xml.stream.XMLStreamConstants; 89 import javax.xml.stream.XMLStreamException; 90 import javax.xml.stream.XMLStreamReader; 91 92 import gnu.java.net.CRLFInputStream; 93 import gnu.classpath.debug.TeeInputStream; 94 import gnu.classpath.debug.TeeReader; 95 96 /** 97 * An XML parser. 98 * This parser supports the following additional StAX properties: 99 * <table> 100 * <tr><td>gnu.xml.stream.stringInterning</td> 101 * <td>Boolean</td> 102 * <td>Indicates whether markup strings will be interned</td></tr> 103 * <tr><td>gnu.xml.stream.xmlBase</td> 104 * <td>Boolean</td> 105 * <td>Indicates whether XML Base processing will be performed</td></tr> 106 * <tr><td>gnu.xml.stream.baseURI</td> 107 * <td>String</td> 108 * <td>Returns the base URI of the current event</td></tr> 109 * </table> 110 * 111 * @see http://www.w3.org/TR/REC-xml/ 112 * @see http://www.w3.org/TR/xml11/ 113 * @see http://www.w3.org/TR/REC-xml-names 114 * @see http://www.w3.org/TR/xml-names11 115 * @see http://www.w3.org/TR/xmlbase/ 116 * 117 * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a> 118 */ 119 public class XMLParser 120 implements XMLStreamReader, NamespaceContext 121 { 122 123 // -- parser state machine states -- 124 private static final int INIT = 0; // start state 125 private static final int PROLOG = 1; // in prolog 126 private static final int CONTENT = 2; // in content 127 private static final int EMPTY_ELEMENT = 3; // empty element state 128 private static final int MISC = 4; // in Misc (after root element) 129 130 // -- parameters for parsing literals -- 131 private final static int LIT_ENTITY_REF = 2; 132 private final static int LIT_NORMALIZE = 4; 133 private final static int LIT_ATTRIBUTE = 8; 134 private final static int LIT_DISABLE_PE = 16; 135 private final static int LIT_DISABLE_CREF = 32; 136 private final static int LIT_DISABLE_EREF = 64; 137 private final static int LIT_PUBID = 256; 138 139 // -- types of attribute values -- 140 final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30; 141 final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31; 142 final static int ATTRIBUTE_DEFAULT_IMPLIED = 32; 143 final static int ATTRIBUTE_DEFAULT_REQUIRED = 33; 144 final static int ATTRIBUTE_DEFAULT_FIXED = 34; 145 146 // -- additional event types -- 147 final static int START_ENTITY = 50; 148 final static int END_ENTITY = 51; 149 150 /** 151 * The current input. 152 */ 153 private Input input; 154 155 /** 156 * Stack of inputs representing XML general entities. 157 * The input representing the XML input stream or reader is always the 158 * first element in this stack. 159 */ 160 private LinkedList inputStack = new LinkedList(); 161 162 /** 163 * Stack of start-entity events to be reported. 164 */ 165 private LinkedList startEntityStack = new LinkedList(); 166 167 /** 168 * Stack of end-entity events to be reported. 169 */ 170 private LinkedList endEntityStack = new LinkedList(); 171 172 /** 173 * Current parser state within the main state machine. 174 */ 175 private int state = INIT; 176 177 /** 178 * The (type of the) current event. 179 */ 180 private int event; 181 182 /** 183 * The element name stack. The first element in this stack will be the 184 * root element. 185 */ 186 private LinkedList stack = new LinkedList(); 187 188 /** 189 * Stack of namespace contexts. These are maps specifying prefix-to-URI 190 * mappings. The first element in this stack is the most recent namespace 191 * context (i.e. the other way around from the element name stack). 192 */ 193 private LinkedList namespaces = new LinkedList(); 194 195 /** 196 * The base-URI stack. This holds the base URI context for each element. 197 * The first element in this stack is the most recent context (i.e. the 198 * other way around from the element name stack). 199 */ 200 private LinkedList bases = new LinkedList(); 201 202 /** 203 * The list of attributes for the current element, in the order defined in 204 * the XML stream. 205 */ 206 private ArrayList attrs = new ArrayList(); 207 208 /** 209 * Buffer for text and character data. 210 */ 211 private StringBuffer buf = new StringBuffer(); 212 213 /** 214 * Buffer for NMTOKEN strings (markup). 215 */ 216 private StringBuffer nmtokenBuf = new StringBuffer(); 217 218 /** 219 * Buffer for string literals. (e.g. attribute values) 220 */ 221 private StringBuffer literalBuf = new StringBuffer(); 222 223 /** 224 * Temporary Unicode character buffer used during character data reads. 225 */ 226 private int[] tmpBuf = new int[1024]; 227 228 /** 229 * The element content model for the current element. 230 */ 231 private ContentModel currentContentModel; 232 233 /** 234 * The validation stack. This holds lists of the elements seen for each 235 * element, in order to determine whether the names and order of these 236 * elements match the content model for the element. The last entry in 237 * this stack represents the current element. 238 */ 239 private LinkedList validationStack; 240 241 /** 242 * These sets contain the IDs and the IDREFs seen in the document, to 243 * ensure that IDs are unique and that each IDREF refers to an ID in the 244 * document. 245 */ 246 private HashSet ids, idrefs; 247 248 /** 249 * The target and data associated with the current processing instruction 250 * event. 251 */ 252 private String piTarget, piData; 253 254 /** 255 * The XML version declared in the XML declaration. 256 */ 257 private String xmlVersion; 258 259 /** 260 * The encoding declared in the XML declaration. 261 */ 262 private String xmlEncoding; 263 264 /** 265 * The standalone value declared in the XML declaration. 266 */ 267 private Boolean xmlStandalone; 268 269 /** 270 * The document type definition. 271 */ 272 Doctype doctype; 273 274 /** 275 * State variables for determining parameter-entity expansion. 276 */ 277 private boolean expandPE, peIsError; 278 279 /** 280 * Whether this is a validating parser. 281 */ 282 private final boolean validating; 283 284 /** 285 * Whether strings representing markup will be interned. 286 */ 287 private final boolean stringInterning; 288 289 /** 290 * If true, CDATA sections will be merged with adjacent text nodes into a 291 * single event. 292 */ 293 private final boolean coalescing; 294 295 /** 296 * Whether to replace general entity references with their replacement 297 * text automatically during parsing. 298 * Otherwise entity-reference events will be issued. 299 */ 300 private final boolean replaceERefs; 301 302 /** 303 * Whether to support external entities. 304 */ 305 private final boolean externalEntities; 306 307 /** 308 * Whether to support DTDs. 309 */ 310 private final boolean supportDTD; 311 312 /** 313 * Whether to support XML namespaces. If true, namespace information will 314 * be available. Otherwise namespaces will simply be reported as ordinary 315 * attributes. 316 */ 317 private final boolean namespaceAware; 318 319 /** 320 * Whether to support XML Base. If true, URIs specified in xml:base 321 * attributes will be honoured when resolving external entities. 322 */ 323 private final boolean baseAware; 324 325 /** 326 * Whether to report extended event types (START_ENTITY and END_ENTITY) 327 * in addition to the standard event types. Used by the SAX parser. 328 */ 329 private final boolean extendedEventTypes; 330 331 /** 332 * The reporter to receive parsing warnings. 333 */ 334 final XMLReporter reporter; 335 336 /** 337 * Callback interface for resolving external entities. 338 */ 339 final XMLResolver resolver; 340 341 // -- Constants for testing the next kind of markup event -- 342 private static final String TEST_START_ELEMENT = "<"; 343 private static final String TEST_END_ELEMENT = "</"; 344 private static final String TEST_COMMENT = "<!--"; 345 private static final String TEST_PI = "<?"; 346 private static final String TEST_CDATA = "<![CDATA["; 347 private static final String TEST_XML_DECL = "<?xml"; 348 private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE"; 349 private static final String TEST_ELEMENT_DECL = "<!ELEMENT"; 350 private static final String TEST_ATTLIST_DECL = "<!ATTLIST"; 351 private static final String TEST_ENTITY_DECL = "<!ENTITY"; 352 private static final String TEST_NOTATION_DECL = "<!NOTATION"; 353 private static final String TEST_KET = ">"; 354 private static final String TEST_END_COMMENT = "--"; 355 private static final String TEST_END_PI = "?>"; 356 private static final String TEST_END_CDATA = "]]>"; 357 358 /** 359 * The general entities predefined by the XML specification. 360 */ 361 private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap(); 362 static 363 { 364 PREDEFINED_ENTITIES.put("amp", "&"); 365 PREDEFINED_ENTITIES.put("lt", "<"); 366 PREDEFINED_ENTITIES.put("gt", ">"); 367 PREDEFINED_ENTITIES.put("apos", "'"); 368 PREDEFINED_ENTITIES.put("quot", "\""); 369 } 370 371 /** 372 * Creates a new XML parser for the given input stream. 373 * This constructor should be used where possible, as it allows the 374 * encoding of the XML data to be correctly determined from the stream. 375 * @param in the input stream 376 * @param systemId the URL from which the input stream was retrieved 377 * (necessary if there are external entities to be resolved) 378 * @param validating if the parser is to be a validating parser 379 * @param namespaceAware if the parser should support XML Namespaces 380 * @param coalescing if CDATA sections should be merged into adjacent text 381 * nodes 382 * @param replaceERefs if entity references should be automatically 383 * replaced by their replacement text (otherwise they will be reported as 384 * entity-reference events) 385 * @param externalEntities if external entities should be loaded 386 * @param supportDTD if support for the XML DTD should be enabled 387 * @param baseAware if the parser should support XML Base to resolve 388 * external entities 389 * @param stringInterning whether strings will be interned during parsing 390 * @param reporter the reporter to receive warnings during processing 391 * @param resolver the callback interface used to resolve external 392 * entities 393 */ XMLParser(InputStream in, String systemId, boolean validating, boolean namespaceAware, boolean coalescing, boolean replaceERefs, boolean externalEntities, boolean supportDTD, boolean baseAware, boolean stringInterning, boolean extendedEventTypes, XMLReporter reporter, XMLResolver resolver)394 public XMLParser(InputStream in, String systemId, 395 boolean validating, 396 boolean namespaceAware, 397 boolean coalescing, 398 boolean replaceERefs, 399 boolean externalEntities, 400 boolean supportDTD, 401 boolean baseAware, 402 boolean stringInterning, 403 boolean extendedEventTypes, 404 XMLReporter reporter, 405 XMLResolver resolver) 406 { 407 this.validating = validating; 408 this.namespaceAware = namespaceAware; 409 this.coalescing = coalescing; 410 this.replaceERefs = replaceERefs; 411 this.externalEntities = externalEntities; 412 this.supportDTD = supportDTD; 413 this.baseAware = baseAware; 414 this.stringInterning = stringInterning; 415 this.extendedEventTypes = extendedEventTypes; 416 this.reporter = reporter; 417 this.resolver = resolver; 418 if (validating) 419 { 420 validationStack = new LinkedList(); 421 ids = new HashSet(); 422 idrefs = new HashSet(); 423 } 424 String debug = System.getProperty("gnu.xml.debug.input"); 425 if (debug != null) 426 { 427 try 428 { 429 File file = File.createTempFile(debug, ".xml"); 430 in = new TeeInputStream(in, new FileOutputStream(file)); 431 } 432 catch (IOException e) 433 { 434 RuntimeException e2 = new RuntimeException(); 435 e2.initCause(e); 436 throw e2; 437 } 438 } 439 systemId = canonicalize(systemId); 440 pushInput(new Input(in, null, null, systemId, null, null, false, true)); 441 } 442 443 /** 444 * Creates a new XML parser for the given character stream. 445 * This constructor is only available for compatibility with the JAXP 446 * APIs, which permit XML to be parsed from a character stream. Because 447 * the encoding specified by the character stream may conflict with that 448 * specified in the XML declaration, this method should be avoided where 449 * possible. 450 * @param in the input stream 451 * @param systemId the URL from which the input stream was retrieved 452 * (necessary if there are external entities to be resolved) 453 * @param validating if the parser is to be a validating parser 454 * @param namespaceAware if the parser should support XML Namespaces 455 * @param coalescing if CDATA sections should be merged into adjacent text 456 * nodes 457 * @param replaceERefs if entity references should be automatically 458 * replaced by their replacement text (otherwise they will be reported as 459 * entity-reference events) 460 * @param externalEntities if external entities should be loaded 461 * @param supportDTD if support for the XML DTD should be enabled 462 * @param baseAware if the parser should support XML Base to resolve 463 * external entities 464 * @param stringInterning whether strings will be interned during parsing 465 * @param reporter the reporter to receive warnings during processing 466 * @param resolver the callback interface used to resolve external 467 * entities 468 */ XMLParser(Reader reader, String systemId, boolean validating, boolean namespaceAware, boolean coalescing, boolean replaceERefs, boolean externalEntities, boolean supportDTD, boolean baseAware, boolean stringInterning, boolean extendedEventTypes, XMLReporter reporter, XMLResolver resolver)469 public XMLParser(Reader reader, String systemId, 470 boolean validating, 471 boolean namespaceAware, 472 boolean coalescing, 473 boolean replaceERefs, 474 boolean externalEntities, 475 boolean supportDTD, 476 boolean baseAware, 477 boolean stringInterning, 478 boolean extendedEventTypes, 479 XMLReporter reporter, 480 XMLResolver resolver) 481 { 482 this.validating = validating; 483 this.namespaceAware = namespaceAware; 484 this.coalescing = coalescing; 485 this.replaceERefs = replaceERefs; 486 this.externalEntities = externalEntities; 487 this.supportDTD = supportDTD; 488 this.baseAware = baseAware; 489 this.stringInterning = stringInterning; 490 this.extendedEventTypes = extendedEventTypes; 491 this.reporter = reporter; 492 this.resolver = resolver; 493 if (validating) 494 { 495 validationStack = new LinkedList(); 496 ids = new HashSet(); 497 idrefs = new HashSet(); 498 } 499 String debug = System.getProperty("gnu.xml.debug.input"); 500 if (debug != null) 501 { 502 try 503 { 504 File file = File.createTempFile(debug, ".xml"); 505 reader = new TeeReader(reader, new FileWriter(file)); 506 } 507 catch (IOException e) 508 { 509 RuntimeException e2 = new RuntimeException(); 510 e2.initCause(e); 511 throw e2; 512 } 513 } 514 systemId = canonicalize(systemId); 515 pushInput(new Input(null, reader, null, systemId, null, null, false, true)); 516 } 517 518 // -- NamespaceContext -- 519 getNamespaceURI(String prefix)520 public String getNamespaceURI(String prefix) 521 { 522 if (XMLConstants.XML_NS_PREFIX.equals(prefix)) 523 return XMLConstants.XML_NS_URI; 524 if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix)) 525 return XMLConstants.XMLNS_ATTRIBUTE_NS_URI; 526 for (Iterator i = namespaces.iterator(); i.hasNext(); ) 527 { 528 LinkedHashMap ctx = (LinkedHashMap) i.next(); 529 String namespaceURI = (String) ctx.get(prefix); 530 if (namespaceURI != null) 531 return namespaceURI; 532 } 533 return null; 534 } 535 getPrefix(String namespaceURI)536 public String getPrefix(String namespaceURI) 537 { 538 if (XMLConstants.XML_NS_URI.equals(namespaceURI)) 539 return XMLConstants.XML_NS_PREFIX; 540 if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI)) 541 return XMLConstants.XMLNS_ATTRIBUTE; 542 for (Iterator i = namespaces.iterator(); i.hasNext(); ) 543 { 544 LinkedHashMap ctx = (LinkedHashMap) i.next(); 545 if (ctx.containsValue(namespaceURI)) 546 { 547 for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); ) 548 { 549 Map.Entry entry = (Map.Entry) i.next(); 550 String uri = (String) entry.getValue(); 551 if (uri.equals(namespaceURI)) 552 return (String) entry.getKey(); 553 } 554 } 555 } 556 return null; 557 } 558 getPrefixes(String namespaceURI)559 public Iterator getPrefixes(String namespaceURI) 560 { 561 if (XMLConstants.XML_NS_URI.equals(namespaceURI)) 562 return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator(); 563 if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI)) 564 return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator(); 565 LinkedList acc = new LinkedList(); 566 for (Iterator i = namespaces.iterator(); i.hasNext(); ) 567 { 568 LinkedHashMap ctx = (LinkedHashMap) i.next(); 569 if (ctx.containsValue(namespaceURI)) 570 { 571 for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); ) 572 { 573 Map.Entry entry = (Map.Entry) i.next(); 574 String uri = (String) entry.getValue(); 575 if (uri.equals(namespaceURI)) 576 acc.add(entry.getKey()); 577 } 578 } 579 } 580 return acc.iterator(); 581 } 582 583 // -- XMLStreamReader -- 584 close()585 public void close() 586 throws XMLStreamException 587 { 588 stack = null; 589 namespaces = null; 590 bases = null; 591 buf = null; 592 attrs = null; 593 doctype = null; 594 595 inputStack = null; 596 validationStack = null; 597 ids = null; 598 idrefs = null; 599 } 600 getNamespaceContext()601 public NamespaceContext getNamespaceContext() 602 { 603 return this; 604 } 605 getAttributeCount()606 public int getAttributeCount() 607 { 608 return attrs.size(); 609 } 610 getAttributeLocalName(int index)611 public String getAttributeLocalName(int index) 612 { 613 Attribute a = (Attribute) attrs.get(index); 614 return a.localName; 615 } 616 getAttributeNamespace(int index)617 public String getAttributeNamespace(int index) 618 { 619 String prefix = getAttributePrefix(index); 620 return getNamespaceURI(prefix); 621 } 622 getAttributePrefix(int index)623 public String getAttributePrefix(int index) 624 { 625 Attribute a = (Attribute) attrs.get(index); 626 return a.prefix; 627 } 628 getAttributeName(int index)629 public QName getAttributeName(int index) 630 { 631 Attribute a = (Attribute) attrs.get(index); 632 String namespaceURI = getNamespaceURI(a.prefix); 633 return new QName(namespaceURI, a.localName, a.prefix); 634 } 635 getAttributeType(int index)636 public String getAttributeType(int index) 637 { 638 Attribute a = (Attribute) attrs.get(index); 639 return a.type; 640 } 641 getAttributeType(String elementName, String attName)642 private String getAttributeType(String elementName, String attName) 643 { 644 if (doctype != null) 645 { 646 AttributeDecl att = doctype.getAttributeDecl(elementName, attName); 647 if (att != null) 648 return att.type; 649 } 650 return "CDATA"; 651 } 652 getAttributeValue(int index)653 public String getAttributeValue(int index) 654 { 655 Attribute a = (Attribute) attrs.get(index); 656 return a.value; 657 } 658 getAttributeValue(String namespaceURI, String localName)659 public String getAttributeValue(String namespaceURI, String localName) 660 { 661 for (Iterator i = attrs.iterator(); i.hasNext(); ) 662 { 663 Attribute a = (Attribute) i.next(); 664 if (a.localName.equals(localName)) 665 { 666 String uri = getNamespaceURI(a.prefix); 667 if ((uri == null && namespaceURI == null) || 668 (uri != null && uri.equals(namespaceURI))) 669 return a.value; 670 } 671 } 672 return null; 673 } 674 isAttributeDeclared(int index)675 boolean isAttributeDeclared(int index) 676 { 677 if (doctype == null) 678 return false; 679 Attribute a = (Attribute) attrs.get(index); 680 String qn = ("".equals(a.prefix)) ? a.localName : 681 a.prefix + ":" + a.localName; 682 String elementName = buf.toString(); 683 return doctype.isAttributeDeclared(elementName, qn); 684 } 685 getCharacterEncodingScheme()686 public String getCharacterEncodingScheme() 687 { 688 return xmlEncoding; 689 } 690 getElementText()691 public String getElementText() 692 throws XMLStreamException 693 { 694 if (event != XMLStreamConstants.START_ELEMENT) 695 throw new XMLStreamException("current event must be START_ELEMENT"); 696 CPStringBuilder elementText = new CPStringBuilder(); 697 int depth = stack.size(); 698 while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth) 699 { 700 switch (next()) 701 { 702 case XMLStreamConstants.CHARACTERS: 703 case XMLStreamConstants.SPACE: 704 elementText.append(buf.toString()); 705 } 706 } 707 return elementText.toString(); 708 } 709 getEncoding()710 public String getEncoding() 711 { 712 return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding; 713 } 714 getEventType()715 public int getEventType() 716 { 717 return event; 718 } 719 getLocalName()720 public String getLocalName() 721 { 722 switch (event) 723 { 724 case XMLStreamConstants.START_ELEMENT: 725 case XMLStreamConstants.END_ELEMENT: 726 String qName = buf.toString(); 727 int ci = qName.indexOf(':'); 728 String localName = (ci == -1) ? qName : qName.substring(ci + 1); 729 if (stringInterning) 730 localName = localName.intern(); 731 return localName; 732 default: 733 return null; 734 } 735 } 736 getLocation()737 public Location getLocation() 738 { 739 return input; 740 } 741 getName()742 public QName getName() 743 { 744 switch (event) 745 { 746 case XMLStreamConstants.START_ELEMENT: 747 case XMLStreamConstants.END_ELEMENT: 748 String qName = buf.toString(); 749 int ci = qName.indexOf(':'); 750 String localName = (ci == -1) ? qName : qName.substring(ci + 1); 751 if (stringInterning) 752 localName = localName.intern(); 753 String prefix = (ci == -1) ? 754 (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) : 755 qName.substring(0, ci); 756 if (stringInterning && prefix != null) 757 prefix = prefix.intern(); 758 String namespaceURI = getNamespaceURI(prefix); 759 return new QName(namespaceURI, localName, prefix); 760 default: 761 return null; 762 } 763 } 764 getNamespaceCount()765 public int getNamespaceCount() 766 { 767 if (!namespaceAware || namespaces.isEmpty()) 768 return 0; 769 switch (event) 770 { 771 case XMLStreamConstants.START_ELEMENT: 772 case XMLStreamConstants.END_ELEMENT: 773 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); 774 return ctx.size(); 775 default: 776 return 0; 777 } 778 } 779 getNamespacePrefix(int index)780 public String getNamespacePrefix(int index) 781 { 782 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); 783 int count = 0; 784 for (Iterator i = ctx.keySet().iterator(); i.hasNext(); ) 785 { 786 String prefix = (String) i.next(); 787 if (count++ == index) 788 return prefix; 789 } 790 return null; 791 } 792 getNamespaceURI()793 public String getNamespaceURI() 794 { 795 switch (event) 796 { 797 case XMLStreamConstants.START_ELEMENT: 798 case XMLStreamConstants.END_ELEMENT: 799 String qName = buf.toString(); 800 int ci = qName.indexOf(':'); 801 if (ci == -1) 802 return null; 803 String prefix = qName.substring(0, ci); 804 return getNamespaceURI(prefix); 805 default: 806 return null; 807 } 808 } 809 getNamespaceURI(int index)810 public String getNamespaceURI(int index) 811 { 812 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); 813 int count = 0; 814 for (Iterator i = ctx.values().iterator(); i.hasNext(); ) 815 { 816 String uri = (String) i.next(); 817 if (count++ == index) 818 return uri; 819 } 820 return null; 821 } 822 getPIData()823 public String getPIData() 824 { 825 return piData; 826 } 827 getPITarget()828 public String getPITarget() 829 { 830 return piTarget; 831 } 832 getPrefix()833 public String getPrefix() 834 { 835 switch (event) 836 { 837 case XMLStreamConstants.START_ELEMENT: 838 case XMLStreamConstants.END_ELEMENT: 839 String qName = buf.toString(); 840 int ci = qName.indexOf(':'); 841 String prefix = (ci == -1) ? 842 (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) : 843 qName.substring(0, ci); 844 if (stringInterning && prefix != null) 845 prefix = prefix.intern(); 846 return prefix; 847 default: 848 return null; 849 } 850 } 851 getProperty(String name)852 public Object getProperty(String name) 853 throws IllegalArgumentException 854 { 855 if (name == null) 856 throw new IllegalArgumentException("name is null"); 857 if (XMLInputFactory.ALLOCATOR.equals(name)) 858 return null; 859 if (XMLInputFactory.IS_COALESCING.equals(name)) 860 return coalescing ? Boolean.TRUE : Boolean.FALSE; 861 if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name)) 862 return namespaceAware ? Boolean.TRUE : Boolean.FALSE; 863 if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name)) 864 return replaceERefs ? Boolean.TRUE : Boolean.FALSE; 865 if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name)) 866 return externalEntities ? Boolean.TRUE : Boolean.FALSE; 867 if (XMLInputFactory.IS_VALIDATING.equals(name)) 868 return Boolean.FALSE; 869 if (XMLInputFactory.REPORTER.equals(name)) 870 return reporter; 871 if (XMLInputFactory.RESOLVER.equals(name)) 872 return resolver; 873 if (XMLInputFactory.SUPPORT_DTD.equals(name)) 874 return supportDTD ? Boolean.TRUE : Boolean.FALSE; 875 if ("gnu.xml.stream.stringInterning".equals(name)) 876 return stringInterning ? Boolean.TRUE : Boolean.FALSE; 877 if ("gnu.xml.stream.xmlBase".equals(name)) 878 return baseAware ? Boolean.TRUE : Boolean.FALSE; 879 if ("gnu.xml.stream.baseURI".equals(name)) 880 return getXMLBase(); 881 return null; 882 } 883 getText()884 public String getText() 885 { 886 return buf.toString(); 887 } 888 getTextCharacters()889 public char[] getTextCharacters() 890 { 891 return buf.toString().toCharArray(); 892 } 893 getTextCharacters(int sourceStart, char[] target, int targetStart, int length)894 public int getTextCharacters(int sourceStart, char[] target, 895 int targetStart, int length) 896 throws XMLStreamException 897 { 898 length = Math.min(sourceStart + buf.length(), length); 899 int sourceEnd = sourceStart + length; 900 buf.getChars(sourceStart, sourceEnd, target, targetStart); 901 return length; 902 } 903 getTextLength()904 public int getTextLength() 905 { 906 return buf.length(); 907 } 908 getTextStart()909 public int getTextStart() 910 { 911 return 0; 912 } 913 getVersion()914 public String getVersion() 915 { 916 return (xmlVersion == null) ? "1.0" : xmlVersion; 917 } 918 hasName()919 public boolean hasName() 920 { 921 switch (event) 922 { 923 case XMLStreamConstants.START_ELEMENT: 924 case XMLStreamConstants.END_ELEMENT: 925 return true; 926 default: 927 return false; 928 } 929 } 930 hasText()931 public boolean hasText() 932 { 933 switch (event) 934 { 935 case XMLStreamConstants.CHARACTERS: 936 case XMLStreamConstants.SPACE: 937 return true; 938 default: 939 return false; 940 } 941 } 942 isAttributeSpecified(int index)943 public boolean isAttributeSpecified(int index) 944 { 945 Attribute a = (Attribute) attrs.get(index); 946 return a.specified; 947 } 948 isCharacters()949 public boolean isCharacters() 950 { 951 return (event == XMLStreamConstants.CHARACTERS); 952 } 953 isEndElement()954 public boolean isEndElement() 955 { 956 return (event == XMLStreamConstants.END_ELEMENT); 957 } 958 isStandalone()959 public boolean isStandalone() 960 { 961 return Boolean.TRUE.equals(xmlStandalone); 962 } 963 isStartElement()964 public boolean isStartElement() 965 { 966 return (event == XMLStreamConstants.START_ELEMENT); 967 } 968 isWhiteSpace()969 public boolean isWhiteSpace() 970 { 971 return (event == XMLStreamConstants.SPACE); 972 } 973 nextTag()974 public int nextTag() 975 throws XMLStreamException 976 { 977 do 978 { 979 switch (next()) 980 { 981 case XMLStreamConstants.START_ELEMENT: 982 case XMLStreamConstants.END_ELEMENT: 983 case XMLStreamConstants.CHARACTERS: 984 case XMLStreamConstants.SPACE: 985 case XMLStreamConstants.COMMENT: 986 case XMLStreamConstants.PROCESSING_INSTRUCTION: 987 break; 988 default: 989 throw new XMLStreamException("Unexpected event type: " + event); 990 } 991 } 992 while (event != XMLStreamConstants.START_ELEMENT && 993 event != XMLStreamConstants.END_ELEMENT); 994 return event; 995 } 996 require(int type, String namespaceURI, String localName)997 public void require(int type, String namespaceURI, String localName) 998 throws XMLStreamException 999 { 1000 if (event != type) 1001 throw new XMLStreamException("Current event type is " + event); 1002 if (event == XMLStreamConstants.START_ELEMENT || 1003 event == XMLStreamConstants.END_ELEMENT) 1004 { 1005 String ln = getLocalName(); 1006 if (!ln.equals(localName)) 1007 throw new XMLStreamException("Current local-name is " + ln); 1008 String uri = getNamespaceURI(); 1009 if ((uri == null && namespaceURI != null) || 1010 (uri != null && !uri.equals(namespaceURI))) 1011 throw new XMLStreamException("Current namespace URI is " + uri); 1012 } 1013 } 1014 standaloneSet()1015 public boolean standaloneSet() 1016 { 1017 return (xmlStandalone != null); 1018 } 1019 hasNext()1020 public boolean hasNext() 1021 throws XMLStreamException 1022 { 1023 return (event != XMLStreamConstants.END_DOCUMENT && event != -1); 1024 } 1025 next()1026 public int next() 1027 throws XMLStreamException 1028 { 1029 if (event == XMLStreamConstants.END_ELEMENT) 1030 { 1031 // Pop namespace context 1032 if (namespaceAware && !namespaces.isEmpty()) 1033 namespaces.removeFirst(); 1034 // Pop base context 1035 if (baseAware && !bases.isEmpty()) 1036 bases.removeFirst(); 1037 } 1038 if (!startEntityStack.isEmpty()) 1039 { 1040 String entityName = (String) startEntityStack.removeFirst(); 1041 buf.setLength(0); 1042 buf.append(entityName); 1043 event = START_ENTITY; 1044 return extendedEventTypes ? event : next(); 1045 } 1046 else if (!endEntityStack.isEmpty()) 1047 { 1048 String entityName = (String) endEntityStack.removeFirst(); 1049 buf.setLength(0); 1050 buf.append(entityName); 1051 event = END_ENTITY; 1052 return extendedEventTypes ? event : next(); 1053 } 1054 try 1055 { 1056 if (!input.initialized) 1057 input.init(); 1058 switch (state) 1059 { 1060 case CONTENT: 1061 if (tryRead(TEST_END_ELEMENT)) 1062 { 1063 readEndElement(); 1064 if (stack.isEmpty()) 1065 state = MISC; 1066 event = XMLStreamConstants.END_ELEMENT; 1067 } 1068 else if (tryRead(TEST_COMMENT)) 1069 { 1070 readComment(false); 1071 event = XMLStreamConstants.COMMENT; 1072 } 1073 else if (tryRead(TEST_PI)) 1074 { 1075 readPI(false); 1076 event = XMLStreamConstants.PROCESSING_INSTRUCTION; 1077 } 1078 else if (tryRead(TEST_CDATA)) 1079 { 1080 readCDSect(); 1081 event = XMLStreamConstants.CDATA; 1082 } 1083 else if (tryRead(TEST_START_ELEMENT)) 1084 { 1085 state = readStartElement(); 1086 event = XMLStreamConstants.START_ELEMENT; 1087 } 1088 else 1089 { 1090 // Check for character reference or predefined entity 1091 mark(8); 1092 int c = readCh(); 1093 if (c == 0x26) // '&' 1094 { 1095 c = readCh(); 1096 if (c == 0x23) // '#' 1097 { 1098 reset(); 1099 event = readCharData(null); 1100 } 1101 else 1102 { 1103 // entity reference 1104 reset(); 1105 readCh(); // & 1106 readReference(); 1107 String ref = buf.toString(); 1108 String text = (String) PREDEFINED_ENTITIES.get(ref); 1109 if (text != null) 1110 { 1111 event = readCharData(text); 1112 } 1113 else if (replaceERefs && !isUnparsedEntity(ref)) 1114 { 1115 // this will report a start-entity event 1116 boolean external = false; 1117 if (doctype != null) 1118 { 1119 Object entity = doctype.getEntity(ref); 1120 if (entity instanceof ExternalIds) 1121 external = true; 1122 } 1123 expandEntity(ref, false, external); 1124 event = next(); 1125 } 1126 else 1127 { 1128 event = XMLStreamConstants.ENTITY_REFERENCE; 1129 } 1130 } 1131 } 1132 else 1133 { 1134 reset(); 1135 event = readCharData(null); 1136 if (validating && doctype != null) 1137 validatePCData(buf.toString()); 1138 } 1139 } 1140 break; 1141 case EMPTY_ELEMENT: 1142 String elementName = (String) stack.removeLast(); 1143 buf.setLength(0); 1144 buf.append(elementName); 1145 state = stack.isEmpty() ? MISC : CONTENT; 1146 event = XMLStreamConstants.END_ELEMENT; 1147 if (validating && doctype != null) 1148 endElementValidationHook(); 1149 break; 1150 case INIT: // XMLDecl? 1151 if (tryRead(TEST_XML_DECL)) 1152 readXMLDecl(); 1153 input.finalizeEncoding(); 1154 event = XMLStreamConstants.START_DOCUMENT; 1155 state = PROLOG; 1156 break; 1157 case PROLOG: // Misc* (doctypedecl Misc*)? 1158 skipWhitespace(); 1159 if (doctype == null && tryRead(TEST_DOCTYPE_DECL)) 1160 { 1161 readDoctypeDecl(); 1162 event = XMLStreamConstants.DTD; 1163 } 1164 else if (tryRead(TEST_COMMENT)) 1165 { 1166 readComment(false); 1167 event = XMLStreamConstants.COMMENT; 1168 } 1169 else if (tryRead(TEST_PI)) 1170 { 1171 readPI(false); 1172 event = XMLStreamConstants.PROCESSING_INSTRUCTION; 1173 } 1174 else if (tryRead(TEST_START_ELEMENT)) 1175 { 1176 state = readStartElement(); 1177 event = XMLStreamConstants.START_ELEMENT; 1178 } 1179 else 1180 { 1181 int c = readCh(); 1182 error("no root element: U+" + Integer.toHexString(c)); 1183 } 1184 break; 1185 case MISC: // Comment | PI | S 1186 skipWhitespace(); 1187 if (tryRead(TEST_COMMENT)) 1188 { 1189 readComment(false); 1190 event = XMLStreamConstants.COMMENT; 1191 } 1192 else if (tryRead(TEST_PI)) 1193 { 1194 readPI(false); 1195 event = XMLStreamConstants.PROCESSING_INSTRUCTION; 1196 } 1197 else 1198 { 1199 if (event == XMLStreamConstants.END_DOCUMENT) 1200 throw new NoSuchElementException(); 1201 int c = readCh(); 1202 if (c != -1) 1203 error("Only comments and PIs may appear after " + 1204 "the root element"); 1205 event = XMLStreamConstants.END_DOCUMENT; 1206 } 1207 break; 1208 default: 1209 event = -1; 1210 } 1211 return event; 1212 } 1213 catch (IOException e) 1214 { 1215 XMLStreamException e2 = new XMLStreamException(); 1216 e2.initCause(e); 1217 throw e2; 1218 } 1219 } 1220 1221 // package private 1222 1223 /** 1224 * Returns the current element name. 1225 */ getCurrentElement()1226 String getCurrentElement() 1227 { 1228 return (String) stack.getLast(); 1229 } 1230 1231 // private 1232 mark(int limit)1233 private void mark(int limit) 1234 throws IOException 1235 { 1236 input.mark(limit); 1237 } 1238 reset()1239 private void reset() 1240 throws IOException 1241 { 1242 input.reset(); 1243 } 1244 read()1245 private int read() 1246 throws IOException 1247 { 1248 return input.read(); 1249 } 1250 read(int[] b, int off, int len)1251 private int read(int[] b, int off, int len) 1252 throws IOException 1253 { 1254 return input.read(b, off, len); 1255 } 1256 1257 /** 1258 * Parsed character read. 1259 */ readCh()1260 private int readCh() 1261 throws IOException, XMLStreamException 1262 { 1263 int c = read(); 1264 if (expandPE && c == 0x25) // '%' 1265 { 1266 if (peIsError) 1267 error("PE reference within decl in internal subset."); 1268 expandPEReference(); 1269 return readCh(); 1270 } 1271 return c; 1272 } 1273 1274 /** 1275 * Reads the next character, ensuring it is the character specified. 1276 * @param delim the character to match 1277 * @exception XMLStreamException if the next character is not the 1278 * specified one 1279 */ require(char delim)1280 private void require(char delim) 1281 throws IOException, XMLStreamException 1282 { 1283 mark(1); 1284 int c = readCh(); 1285 if (delim != c) 1286 { 1287 reset(); 1288 error("required character (got U+" + Integer.toHexString(c) + ")", 1289 new Character(delim)); 1290 } 1291 } 1292 1293 /** 1294 * Reads the next few characters, ensuring they match the string specified. 1295 * @param delim the string to match 1296 * @exception XMLStreamException if the next characters do not match the 1297 * specified string 1298 */ require(String delim)1299 private void require(String delim) 1300 throws IOException, XMLStreamException 1301 { 1302 char[] chars = delim.toCharArray(); 1303 int len = chars.length; 1304 mark(len); 1305 int off = 0; 1306 do 1307 { 1308 int l2 = read(tmpBuf, off, len - off); 1309 if (l2 == -1) 1310 { 1311 reset(); 1312 error("EOF before required string", delim); 1313 } 1314 off += l2; 1315 } 1316 while (off < len); 1317 for (int i = 0; i < chars.length; i++) 1318 { 1319 if (chars[i] != tmpBuf[i]) 1320 { 1321 reset(); 1322 error("required string", delim); 1323 } 1324 } 1325 } 1326 1327 /** 1328 * Try to read a single character. On failure, reset the stream. 1329 * @param delim the character to test 1330 * @return true if the character matched delim, false otherwise. 1331 */ tryRead(char delim)1332 private boolean tryRead(char delim) 1333 throws IOException, XMLStreamException 1334 { 1335 mark(1); 1336 int c = readCh(); 1337 if (delim != c) 1338 { 1339 reset(); 1340 return false; 1341 } 1342 return true; 1343 } 1344 1345 /** 1346 * Tries to read the specified characters. 1347 * If successful, the stream is positioned after the last character, 1348 * otherwise it is reset. 1349 * @param test the string to test 1350 * @return true if the characters matched the test string, false otherwise. 1351 */ tryRead(String test)1352 private boolean tryRead(String test) 1353 throws IOException 1354 { 1355 char[] chars = test.toCharArray(); 1356 int len = chars.length; 1357 mark(len); 1358 int count = 0; 1359 int l2 = read(tmpBuf, 0, len); 1360 if (l2 == -1) 1361 { 1362 reset(); 1363 return false; 1364 } 1365 count += l2; 1366 // check the characters we received first before doing additional reads 1367 for (int i = 0; i < count; i++) 1368 { 1369 if (chars[i] != tmpBuf[i]) 1370 { 1371 reset(); 1372 return false; 1373 } 1374 } 1375 while (count < len) 1376 { 1377 // force read 1378 int c = read(); 1379 if (c == -1) 1380 { 1381 reset(); 1382 return false; 1383 } 1384 tmpBuf[count] = (char) c; 1385 // check each character as it is read 1386 if (chars[count] != tmpBuf[count]) 1387 { 1388 reset(); 1389 return false; 1390 } 1391 count++; 1392 } 1393 return true; 1394 } 1395 1396 /** 1397 * Reads characters until the specified test string is encountered. 1398 * @param delim the string delimiting the end of the characters 1399 */ readUntil(String delim)1400 private void readUntil(String delim) 1401 throws IOException, XMLStreamException 1402 { 1403 int startLine = input.line; 1404 try 1405 { 1406 while (!tryRead(delim)) 1407 { 1408 int c = readCh(); 1409 if (c == -1) 1410 throw new EOFException(); 1411 else if (input.xml11) 1412 { 1413 if (!isXML11Char(c) || isXML11RestrictedChar(c)) 1414 error("illegal XML 1.1 character", 1415 "U+" + Integer.toHexString(c)); 1416 } 1417 else if (!isChar(c)) 1418 error("illegal XML character", 1419 "U+" + Integer.toHexString(c)); 1420 buf.append(Character.toChars(c)); 1421 } 1422 } 1423 catch (EOFException e) 1424 { 1425 error("end of input while looking for delimiter "+ 1426 "(started on line " + startLine + ')', delim); 1427 } 1428 } 1429 1430 /** 1431 * Reads any whitespace characters. 1432 * @return true if whitespace characters were read, false otherwise 1433 */ tryWhitespace()1434 private boolean tryWhitespace() 1435 throws IOException, XMLStreamException 1436 { 1437 boolean white; 1438 boolean ret = false; 1439 do 1440 { 1441 mark(1); 1442 int c = readCh(); 1443 while (c == -1 && inputStack.size() > 1) 1444 { 1445 popInput(); 1446 c = readCh(); 1447 } 1448 white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d); 1449 if (white) 1450 ret = true; 1451 } 1452 while (white); 1453 reset(); 1454 return ret; 1455 } 1456 1457 /** 1458 * Skip over any whitespace characters. 1459 */ skipWhitespace()1460 private void skipWhitespace() 1461 throws IOException, XMLStreamException 1462 { 1463 boolean white; 1464 do 1465 { 1466 mark(1); 1467 int c = readCh(); 1468 while (c == -1 && inputStack.size() > 1) 1469 { 1470 popInput(); 1471 c = readCh(); 1472 } 1473 white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d); 1474 } 1475 while (white); 1476 reset(); 1477 } 1478 1479 /** 1480 * Try to read as many whitespace characters as are available. 1481 * @exception XMLStreamException if no whitespace characters were seen 1482 */ requireWhitespace()1483 private void requireWhitespace() 1484 throws IOException, XMLStreamException 1485 { 1486 if (!tryWhitespace()) 1487 error("whitespace required"); 1488 } 1489 1490 /** 1491 * Returns the current base URI for resolving external entities. 1492 */ getXMLBase()1493 String getXMLBase() 1494 { 1495 if (baseAware) 1496 { 1497 for (Iterator i = bases.iterator(); i.hasNext(); ) 1498 { 1499 String base = (String) i.next(); 1500 if (base != null) 1501 return base; 1502 } 1503 } 1504 return input.systemId; 1505 } 1506 1507 /** 1508 * Push the specified text input source. 1509 */ pushInput(String name, String text, boolean report, boolean normalize)1510 private void pushInput(String name, String text, boolean report, 1511 boolean normalize) 1512 throws IOException, XMLStreamException 1513 { 1514 // Check for recursion 1515 if (name != null && !"".equals(name)) 1516 { 1517 for (Iterator i = inputStack.iterator(); i.hasNext(); ) 1518 { 1519 Input ctx = (Input) i.next(); 1520 if (name.equals(ctx.name)) 1521 error("entities may not be self-recursive", name); 1522 } 1523 } 1524 else 1525 report = false; 1526 pushInput(new Input(null, new StringReader(text), input.publicId, 1527 input.systemId, name, input.inputEncoding, report, 1528 normalize)); 1529 } 1530 1531 /** 1532 * Push the specified external input source. 1533 */ pushInput(String name, ExternalIds ids, boolean report, boolean normalize)1534 private void pushInput(String name, ExternalIds ids, boolean report, 1535 boolean normalize) 1536 throws IOException, XMLStreamException 1537 { 1538 if (!externalEntities) 1539 return; 1540 String url = canonicalize(absolutize(input.systemId, ids.systemId)); 1541 // Check for recursion 1542 for (Iterator i = inputStack.iterator(); i.hasNext(); ) 1543 { 1544 Input ctx = (Input) i.next(); 1545 if (url.equals(ctx.systemId)) 1546 error("entities may not be self-recursive", url); 1547 if (name != null && !"".equals(name) && name.equals(ctx.name)) 1548 error("entities may not be self-recursive", name); 1549 } 1550 if (name == null || "".equals(name)) 1551 report = false; 1552 InputStream in = null; 1553 if (resolver != null) 1554 { 1555 Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(), 1556 null); 1557 if (obj instanceof InputStream) 1558 in = (InputStream) obj; 1559 } 1560 if (in == null) 1561 in = resolve(url); 1562 if (in == null) 1563 error("unable to resolve external entity", 1564 (ids.systemId != null) ? ids.systemId : ids.publicId); 1565 pushInput(new Input(in, null, ids.publicId, url, name, null, report, 1566 normalize)); 1567 input.init(); 1568 if (tryRead(TEST_XML_DECL)) 1569 readTextDecl(); 1570 input.finalizeEncoding(); 1571 } 1572 1573 /** 1574 * Push the specified input source (general entity) onto the input stack. 1575 */ pushInput(Input input)1576 private void pushInput(Input input) 1577 { 1578 if (input.report) 1579 startEntityStack.addFirst(input.name); 1580 inputStack.addLast(input); 1581 if (this.input != null) 1582 input.xml11 = this.input.xml11; 1583 this.input = input; 1584 } 1585 1586 /** 1587 * Returns a canonicalized version of the specified URL. 1588 * This is largely to work around a problem with the specification of 1589 * file URLs. 1590 */ canonicalize(String url)1591 static String canonicalize(String url) 1592 { 1593 if (url == null) 1594 return null; 1595 if (url.startsWith("file:") && !url.startsWith("file://")) 1596 url = "file://" + url.substring(5); 1597 return url; 1598 } 1599 1600 /** 1601 * "Absolutize" a URL. This resolves a relative URL into an absolute one. 1602 * @param base the current base URL 1603 * @param href the (absolute or relative) URL to resolve 1604 */ absolutize(String base, String href)1605 public static String absolutize(String base, String href) 1606 { 1607 if (href == null) 1608 return null; 1609 int ci = href.indexOf(':'); 1610 if (ci > 1 && isURLScheme(href.substring(0, ci))) 1611 { 1612 // href is absolute already 1613 return href; 1614 } 1615 if (base == null) 1616 base = ""; 1617 else 1618 { 1619 int i = base.lastIndexOf('/'); 1620 if (i != -1) 1621 base = base.substring(0, i + 1); 1622 else 1623 base = ""; 1624 } 1625 if ("".equals(base)) 1626 { 1627 // assume file URL relative to current directory 1628 base = System.getProperty("user.dir"); 1629 if (base.charAt(0) == '/') 1630 base = base.substring(1); 1631 base = "file:///" + base.replace(File.separatorChar, '/'); 1632 if (!base.endsWith("/")) 1633 base += "/"; 1634 } 1635 // We can't use java.net.URL here to do the parsing, as it searches for 1636 // a protocol handler. A protocol handler may not be registered for the 1637 // URL scheme here. Do it manually. 1638 // 1639 // Set aside scheme and host portion of base URL 1640 String basePrefix = null; 1641 ci = base.indexOf(':'); 1642 if (ci > 1 && isURLScheme(base.substring(0, ci))) 1643 { 1644 if (base.length() > (ci + 3) && 1645 base.charAt(ci + 1) == '/' && 1646 base.charAt(ci + 2) == '/') 1647 { 1648 int si = base.indexOf('/', ci + 3); 1649 if (si == -1) 1650 base = null; 1651 else 1652 { 1653 basePrefix = base.substring(0, si); 1654 base = base.substring(si); 1655 } 1656 } 1657 else 1658 base = null; 1659 } 1660 if (base == null) // unknown or malformed base URL, use href 1661 return href; 1662 if (href.startsWith("/")) // absolute href pathname 1663 return (basePrefix == null) ? href : basePrefix + href; 1664 // relative href pathname 1665 if (!base.endsWith("/")) 1666 { 1667 int lsi = base.lastIndexOf('/'); 1668 if (lsi == -1) 1669 base = "/"; 1670 else 1671 base = base.substring(0, lsi + 1); 1672 } 1673 while (href.startsWith("../") || href.startsWith("./")) 1674 { 1675 if (href.startsWith("../")) 1676 { 1677 // strip last path component from base 1678 int lsi = base.lastIndexOf('/', base.length() - 2); 1679 if (lsi > -1) 1680 base = base.substring(0, lsi + 1); 1681 href = href.substring(3); // strip ../ prefix 1682 } 1683 else 1684 { 1685 href = href.substring(2); // strip ./ prefix 1686 } 1687 } 1688 return (basePrefix == null) ? base + href : basePrefix + base + href; 1689 } 1690 1691 /** 1692 * Indicates whether the specified characters match the scheme portion of 1693 * a URL. 1694 * @see RFC 1738 section 2.1 1695 */ isURLScheme(String text)1696 private static boolean isURLScheme(String text) 1697 { 1698 int len = text.length(); 1699 for (int i = 0; i < len; i++) 1700 { 1701 char c = text.charAt(i); 1702 if (c == '+' || c == '.' || c == '-') 1703 continue; 1704 if (c < 65 || (c > 90 && c < 97) || c > 122) 1705 return false; 1706 } 1707 return true; 1708 } 1709 1710 /** 1711 * Returns an input stream for the given URL. 1712 */ resolve(String url)1713 static InputStream resolve(String url) 1714 throws IOException 1715 { 1716 try 1717 { 1718 return new URL(url).openStream(); 1719 } 1720 catch (MalformedURLException e) 1721 { 1722 return null; 1723 } 1724 catch (IOException e) 1725 { 1726 IOException e2 = new IOException("error resolving " + url); 1727 e2.initCause(e); 1728 throw e2; 1729 } 1730 } 1731 1732 /** 1733 * Pops the current input source (general entity) off the stack. 1734 */ popInput()1735 private void popInput() 1736 { 1737 Input old = (Input) inputStack.removeLast(); 1738 if (old.report) 1739 endEntityStack.addFirst(old.name); 1740 input = (Input) inputStack.getLast(); 1741 } 1742 1743 /** 1744 * Parse an entity text declaration. 1745 */ readTextDecl()1746 private void readTextDecl() 1747 throws IOException, XMLStreamException 1748 { 1749 final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; 1750 requireWhitespace(); 1751 if (tryRead("version")) 1752 { 1753 readEq(); 1754 String v = readLiteral(flags, false); 1755 if ("1.0".equals(v)) 1756 input.xml11 = false; 1757 else if ("1.1".equals(v)) 1758 { 1759 Input i1 = (Input) inputStack.getFirst(); 1760 if (!i1.xml11) 1761 error("external entity specifies later version number"); 1762 input.xml11 = true; 1763 } 1764 else 1765 throw new XMLStreamException("illegal XML version: " + v); 1766 requireWhitespace(); 1767 } 1768 require("encoding"); 1769 readEq(); 1770 String enc = readLiteral(flags, false); 1771 skipWhitespace(); 1772 require("?>"); 1773 input.setInputEncoding(enc); 1774 } 1775 1776 /** 1777 * Parse the XML declaration. 1778 */ readXMLDecl()1779 private void readXMLDecl() 1780 throws IOException, XMLStreamException 1781 { 1782 final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; 1783 1784 requireWhitespace(); 1785 require("version"); 1786 readEq(); 1787 xmlVersion = readLiteral(flags, false); 1788 if ("1.0".equals(xmlVersion)) 1789 input.xml11 = false; 1790 else if ("1.1".equals(xmlVersion)) 1791 input.xml11 = true; 1792 else 1793 throw new XMLStreamException("illegal XML version: " + xmlVersion); 1794 1795 boolean white = tryWhitespace(); 1796 1797 if (tryRead("encoding")) 1798 { 1799 if (!white) 1800 error("whitespace required before 'encoding='"); 1801 readEq(); 1802 xmlEncoding = readLiteral(flags, false); 1803 white = tryWhitespace(); 1804 } 1805 1806 if (tryRead("standalone")) 1807 { 1808 if (!white) 1809 error("whitespace required before 'standalone='"); 1810 readEq(); 1811 String standalone = readLiteral(flags, false); 1812 if ("yes".equals(standalone)) 1813 xmlStandalone = Boolean.TRUE; 1814 else if ("no".equals(standalone)) 1815 xmlStandalone = Boolean.FALSE; 1816 else 1817 error("standalone flag must be 'yes' or 'no'", standalone); 1818 } 1819 1820 skipWhitespace(); 1821 require("?>"); 1822 if (xmlEncoding != null) 1823 input.setInputEncoding(xmlEncoding); 1824 } 1825 1826 /** 1827 * Parse the DOCTYPE declaration. 1828 */ readDoctypeDecl()1829 private void readDoctypeDecl() 1830 throws IOException, XMLStreamException 1831 { 1832 if (!supportDTD) 1833 error("parser was configured not to support DTDs"); 1834 requireWhitespace(); 1835 String rootName = readNmtoken(true); 1836 skipWhitespace(); 1837 ExternalIds ids = readExternalIds(false, true); 1838 doctype = 1839 this.new Doctype(rootName, ids.publicId, ids.systemId); 1840 1841 // Parse internal subset first 1842 skipWhitespace(); 1843 if (tryRead('[')) 1844 { 1845 while (true) 1846 { 1847 expandPE = true; 1848 skipWhitespace(); 1849 expandPE = false; 1850 if (tryRead(']')) 1851 break; 1852 else 1853 readMarkupdecl(false); 1854 } 1855 } 1856 skipWhitespace(); 1857 require('>'); 1858 1859 // Parse external subset 1860 if (ids.systemId != null && externalEntities) 1861 { 1862 pushInput("", ">", false, false); 1863 pushInput("[dtd]", ids, true, true); 1864 // loop until we get back to ">" 1865 while (true) 1866 { 1867 expandPE = true; 1868 skipWhitespace(); 1869 expandPE = false; 1870 mark(1); 1871 int c = readCh(); 1872 if (c == 0x3e) // '>' 1873 break; 1874 else if (c == -1) 1875 popInput(); 1876 else 1877 { 1878 reset(); 1879 expandPE = true; 1880 readMarkupdecl(true); 1881 expandPE = true; 1882 } 1883 } 1884 if (inputStack.size() != 2) 1885 error("external subset has unmatched '>'"); 1886 popInput(); 1887 } 1888 checkDoctype(); 1889 if (validating) 1890 validateDoctype(); 1891 1892 // Make rootName available for reading 1893 buf.setLength(0); 1894 buf.append(rootName); 1895 } 1896 1897 /** 1898 * Checks the well-formedness of the DTD. 1899 */ checkDoctype()1900 private void checkDoctype() 1901 throws XMLStreamException 1902 { 1903 // TODO check entity recursion 1904 } 1905 1906 /** 1907 * Parse the markupdecl production. 1908 */ readMarkupdecl(boolean inExternalSubset)1909 private void readMarkupdecl(boolean inExternalSubset) 1910 throws IOException, XMLStreamException 1911 { 1912 boolean saved = expandPE; 1913 mark(1); 1914 require('<'); 1915 reset(); 1916 expandPE = false; 1917 if (tryRead(TEST_ELEMENT_DECL)) 1918 { 1919 expandPE = saved; 1920 readElementDecl(); 1921 } 1922 else if (tryRead(TEST_ATTLIST_DECL)) 1923 { 1924 expandPE = saved; 1925 readAttlistDecl(); 1926 } 1927 else if (tryRead(TEST_ENTITY_DECL)) 1928 { 1929 expandPE = saved; 1930 readEntityDecl(inExternalSubset); 1931 } 1932 else if (tryRead(TEST_NOTATION_DECL)) 1933 { 1934 expandPE = saved; 1935 readNotationDecl(inExternalSubset); 1936 } 1937 else if (tryRead(TEST_PI)) 1938 { 1939 readPI(true); 1940 expandPE = saved; 1941 } 1942 else if (tryRead(TEST_COMMENT)) 1943 { 1944 readComment(true); 1945 expandPE = saved; 1946 } 1947 else if (tryRead("<![")) 1948 { 1949 // conditional section 1950 expandPE = saved; 1951 if (inputStack.size() < 2) 1952 error("conditional sections illegal in internal subset"); 1953 skipWhitespace(); 1954 if (tryRead("INCLUDE")) 1955 { 1956 skipWhitespace(); 1957 require('['); 1958 skipWhitespace(); 1959 while (!tryRead("]]>")) 1960 { 1961 readMarkupdecl(inExternalSubset); 1962 skipWhitespace(); 1963 } 1964 } 1965 else if (tryRead("IGNORE")) 1966 { 1967 skipWhitespace(); 1968 require('['); 1969 expandPE = false; 1970 for (int nesting = 1; nesting > 0; ) 1971 { 1972 int c = readCh(); 1973 switch (c) 1974 { 1975 case 0x3c: // '<' 1976 if (tryRead("![")) 1977 nesting++; 1978 break; 1979 case 0x5d: // ']' 1980 if (tryRead("]>")) 1981 nesting--; 1982 break; 1983 case -1: 1984 throw new EOFException(); 1985 } 1986 } 1987 expandPE = saved; 1988 } 1989 else 1990 error("conditional section must begin with INCLUDE or IGNORE"); 1991 } 1992 else 1993 error("expected markup declaration"); 1994 } 1995 1996 /** 1997 * Parse the elementdecl production. 1998 */ readElementDecl()1999 private void readElementDecl() 2000 throws IOException, XMLStreamException 2001 { 2002 requireWhitespace(); 2003 boolean saved = expandPE; 2004 expandPE = (inputStack.size() > 1); 2005 String name = readNmtoken(true); 2006 expandPE = saved; 2007 requireWhitespace(); 2008 readContentspec(name); 2009 skipWhitespace(); 2010 require('>'); 2011 } 2012 2013 /** 2014 * Parse the contentspec production. 2015 */ readContentspec(String elementName)2016 private void readContentspec(String elementName) 2017 throws IOException, XMLStreamException 2018 { 2019 if (tryRead("EMPTY")) 2020 doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel()); 2021 else if (tryRead("ANY")) 2022 doctype.addElementDecl(elementName, "ANY", new AnyContentModel()); 2023 else 2024 { 2025 ContentModel model; 2026 CPStringBuilder acc = new CPStringBuilder(); 2027 require('('); 2028 acc.append('('); 2029 skipWhitespace(); 2030 if (tryRead("#PCDATA")) 2031 { 2032 // mixed content 2033 acc.append("#PCDATA"); 2034 MixedContentModel mm = new MixedContentModel(); 2035 model = mm; 2036 skipWhitespace(); 2037 if (tryRead(')')) 2038 { 2039 acc.append(")"); 2040 if (tryRead('*')) 2041 { 2042 mm.min = 0; 2043 mm.max = -1; 2044 } 2045 } 2046 else 2047 { 2048 while (!tryRead(")")) 2049 { 2050 require('|'); 2051 acc.append('|'); 2052 skipWhitespace(); 2053 String name = readNmtoken(true); 2054 acc.append(name); 2055 mm.addName(name); 2056 skipWhitespace(); 2057 } 2058 require('*'); 2059 acc.append(")*"); 2060 mm.min = 0; 2061 mm.max = -1; 2062 } 2063 } 2064 else 2065 model = readElements(acc); 2066 doctype.addElementDecl(elementName, acc.toString(), model); 2067 } 2068 } 2069 2070 /** 2071 * Parses an element content model. 2072 */ readElements(CPStringBuilder acc)2073 private ElementContentModel readElements(CPStringBuilder acc) 2074 throws IOException, XMLStreamException 2075 { 2076 int separator; 2077 ElementContentModel model = new ElementContentModel(); 2078 2079 // Parse first content particle 2080 skipWhitespace(); 2081 model.addContentParticle(readContentParticle(acc)); 2082 // End or separator 2083 skipWhitespace(); 2084 int c = readCh(); 2085 switch (c) 2086 { 2087 case 0x29: // ')' 2088 acc.append(')'); 2089 mark(1); 2090 c = readCh(); 2091 switch (c) 2092 { 2093 case 0x3f: // '?' 2094 acc.append('?'); 2095 model.min = 0; 2096 model.max = 1; 2097 break; 2098 case 0x2a: // '*' 2099 acc.append('*'); 2100 model.min = 0; 2101 model.max = -1; 2102 break; 2103 case 0x2b: // '+' 2104 acc.append('+'); 2105 model.min = 1; 2106 model.max = -1; 2107 break; 2108 default: 2109 reset(); 2110 } 2111 return model; // done 2112 case 0x7c: // '|' 2113 model.or = true; 2114 // fall through 2115 case 0x2c: // ',' 2116 separator = c; 2117 acc.append(Character.toChars(c)); 2118 break; 2119 default: 2120 error("bad separator in content model", 2121 "U+" + Integer.toHexString(c)); 2122 return model; 2123 } 2124 // Parse subsequent content particles 2125 while (true) 2126 { 2127 skipWhitespace(); 2128 model.addContentParticle(readContentParticle(acc)); 2129 skipWhitespace(); 2130 c = readCh(); 2131 if (c == 0x29) // ')' 2132 { 2133 acc.append(')'); 2134 break; 2135 } 2136 else if (c != separator) 2137 { 2138 error("bad separator in content model", 2139 "U+" + Integer.toHexString(c)); 2140 return model; 2141 } 2142 else 2143 acc.append(c); 2144 } 2145 // Check for occurrence indicator 2146 mark(1); 2147 c = readCh(); 2148 switch (c) 2149 { 2150 case 0x3f: // '?' 2151 acc.append('?'); 2152 model.min = 0; 2153 model.max = 1; 2154 break; 2155 case 0x2a: // '*' 2156 acc.append('*'); 2157 model.min = 0; 2158 model.max = -1; 2159 break; 2160 case 0x2b: // '+' 2161 acc.append('+'); 2162 model.min = 1; 2163 model.max = -1; 2164 break; 2165 default: 2166 reset(); 2167 } 2168 return model; 2169 } 2170 2171 /** 2172 * Parse a cp production. 2173 */ readContentParticle(CPStringBuilder acc)2174 private ContentParticle readContentParticle(CPStringBuilder acc) 2175 throws IOException, XMLStreamException 2176 { 2177 ContentParticle cp = new ContentParticle(); 2178 if (tryRead('(')) 2179 { 2180 acc.append('('); 2181 cp.content = readElements(acc); 2182 } 2183 else 2184 { 2185 String name = readNmtoken(true); 2186 acc.append(name); 2187 cp.content = name; 2188 mark(1); 2189 int c = readCh(); 2190 switch (c) 2191 { 2192 case 0x3f: // '?' 2193 acc.append('?'); 2194 cp.min = 0; 2195 cp.max = 1; 2196 break; 2197 case 0x2a: // '*' 2198 acc.append('*'); 2199 cp.min = 0; 2200 cp.max = -1; 2201 break; 2202 case 0x2b: // '+' 2203 acc.append('+'); 2204 cp.min = 1; 2205 cp.max = -1; 2206 break; 2207 default: 2208 reset(); 2209 } 2210 } 2211 return cp; 2212 } 2213 2214 /** 2215 * Parse an attribute-list definition. 2216 */ readAttlistDecl()2217 private void readAttlistDecl() 2218 throws IOException, XMLStreamException 2219 { 2220 requireWhitespace(); 2221 boolean saved = expandPE; 2222 expandPE = (inputStack.size() > 1); 2223 String elementName = readNmtoken(true); 2224 expandPE = saved; 2225 boolean white = tryWhitespace(); 2226 while (!tryRead('>')) 2227 { 2228 if (!white) 2229 error("whitespace required before attribute definition"); 2230 readAttDef(elementName); 2231 white = tryWhitespace(); 2232 } 2233 } 2234 2235 /** 2236 * Parse a single attribute definition. 2237 */ readAttDef(String elementName)2238 private void readAttDef(String elementName) 2239 throws IOException, XMLStreamException 2240 { 2241 String name = readNmtoken(true); 2242 requireWhitespace(); 2243 CPStringBuilder acc = new CPStringBuilder(); 2244 HashSet values = new HashSet(); 2245 String type = readAttType(acc, values); 2246 if (validating) 2247 { 2248 if ("ID".equals(type)) 2249 { 2250 // VC: One ID per Element Type 2251 for (Iterator i = doctype.attlistIterator(elementName); 2252 i.hasNext(); ) 2253 { 2254 Map.Entry entry = (Map.Entry) i.next(); 2255 AttributeDecl decl = (AttributeDecl) entry.getValue(); 2256 if ("ID".equals(decl.type)) 2257 error("element types must not have more than one ID " + 2258 "attribute"); 2259 } 2260 } 2261 else if ("NOTATION".equals(type)) 2262 { 2263 // VC: One Notation Per Element Type 2264 for (Iterator i = doctype.attlistIterator(elementName); 2265 i.hasNext(); ) 2266 { 2267 Map.Entry entry = (Map.Entry) i.next(); 2268 AttributeDecl decl = (AttributeDecl) entry.getValue(); 2269 if ("NOTATION".equals(decl.type)) 2270 error("element types must not have more than one NOTATION " + 2271 "attribute"); 2272 } 2273 // VC: No Notation on Empty Element 2274 ContentModel model = doctype.getElementModel(elementName); 2275 if (model != null && model.type == ContentModel.EMPTY) 2276 error("attributes of type NOTATION must not be declared on an " + 2277 "element declared EMPTY"); 2278 } 2279 } 2280 String enumer = null; 2281 if ("ENUMERATION".equals(type) || "NOTATION".equals(type)) 2282 enumer = acc.toString(); 2283 else 2284 values = null; 2285 requireWhitespace(); 2286 readDefault(elementName, name, type, enumer, values); 2287 } 2288 2289 /** 2290 * Parse an attribute type. 2291 */ readAttType(CPStringBuilder acc, HashSet values)2292 private String readAttType(CPStringBuilder acc, HashSet values) 2293 throws IOException, XMLStreamException 2294 { 2295 if (tryRead('(')) 2296 { 2297 readEnumeration(false, acc, values); 2298 return "ENUMERATION"; 2299 } 2300 else 2301 { 2302 String typeString = readNmtoken(true); 2303 if ("NOTATION".equals(typeString)) 2304 { 2305 readNotationType(acc, values); 2306 return typeString; 2307 } 2308 else if ("CDATA".equals(typeString) || 2309 "ID".equals(typeString) || 2310 "IDREF".equals(typeString) || 2311 "IDREFS".equals(typeString) || 2312 "ENTITY".equals(typeString) || 2313 "ENTITIES".equals(typeString) || 2314 "NMTOKEN".equals(typeString) || 2315 "NMTOKENS".equals(typeString)) 2316 return typeString; 2317 else 2318 { 2319 error("illegal attribute type", typeString); 2320 return null; 2321 } 2322 } 2323 } 2324 2325 /** 2326 * Parse an enumeration. 2327 */ readEnumeration(boolean isNames, CPStringBuilder acc, HashSet values)2328 private void readEnumeration(boolean isNames, CPStringBuilder acc, 2329 HashSet values) 2330 throws IOException, XMLStreamException 2331 { 2332 acc.append('('); 2333 // first token 2334 skipWhitespace(); 2335 String token = readNmtoken(isNames); 2336 acc.append(token); 2337 values.add(token); 2338 // subsequent tokens 2339 skipWhitespace(); 2340 while (!tryRead(')')) 2341 { 2342 require('|'); 2343 acc.append('|'); 2344 skipWhitespace(); 2345 token = readNmtoken(isNames); 2346 // VC: No Duplicate Tokens 2347 if (validating && values.contains(token)) 2348 error("duplicate token", token); 2349 acc.append(token); 2350 values.add(token); 2351 skipWhitespace(); 2352 } 2353 acc.append(')'); 2354 } 2355 2356 /** 2357 * Parse a notation type for an attribute. 2358 */ readNotationType(CPStringBuilder acc, HashSet values)2359 private void readNotationType(CPStringBuilder acc, HashSet values) 2360 throws IOException, XMLStreamException 2361 { 2362 requireWhitespace(); 2363 require('('); 2364 readEnumeration(true, acc, values); 2365 } 2366 2367 /** 2368 * Parse the default value for an attribute. 2369 */ readDefault(String elementName, String name, String type, String enumeration, HashSet values)2370 private void readDefault(String elementName, String name, 2371 String type, String enumeration, HashSet values) 2372 throws IOException, XMLStreamException 2373 { 2374 int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; 2375 int flags = LIT_ATTRIBUTE; 2376 String value = null, defaultType = null; 2377 boolean saved = expandPE; 2378 2379 if (!"CDATA".equals(type)) 2380 flags |= LIT_NORMALIZE; 2381 2382 expandPE = false; 2383 if (tryRead('#')) 2384 { 2385 if (tryRead("FIXED")) 2386 { 2387 defaultType = "#FIXED"; 2388 valueType = ATTRIBUTE_DEFAULT_FIXED; 2389 requireWhitespace(); 2390 value = readLiteral(flags, false); 2391 } 2392 else if (tryRead("REQUIRED")) 2393 { 2394 defaultType = "#REQUIRED"; 2395 valueType = ATTRIBUTE_DEFAULT_REQUIRED; 2396 } 2397 else if (tryRead("IMPLIED")) 2398 { 2399 defaultType = "#IMPLIED"; 2400 valueType = ATTRIBUTE_DEFAULT_IMPLIED; 2401 } 2402 else 2403 error("illegal keyword for attribute default value"); 2404 } 2405 else 2406 value = readLiteral(flags, false); 2407 expandPE = saved; 2408 if (validating) 2409 { 2410 if ("ID".equals(type)) 2411 { 2412 // VC: Attribute Default Value Syntactically Correct 2413 if (value != null && !isNmtoken(value, true)) 2414 error("default value must match Name production", value); 2415 // VC: ID Attribute Default 2416 if (valueType != ATTRIBUTE_DEFAULT_REQUIRED && 2417 valueType != ATTRIBUTE_DEFAULT_IMPLIED) 2418 error("ID attributes must have a declared default of " + 2419 "#IMPLIED or #REQUIRED"); 2420 } 2421 else if (value != null) 2422 { 2423 // VC: Attribute Default Value Syntactically Correct 2424 if ("IDREF".equals(type) || "ENTITY".equals(type)) 2425 { 2426 if (!isNmtoken(value, true)) 2427 error("default value must match Name production", value); 2428 } 2429 else if ("IDREFS".equals(type) || "ENTITIES".equals(type)) 2430 { 2431 StringTokenizer st = new StringTokenizer(value); 2432 while (st.hasMoreTokens()) 2433 { 2434 String token = st.nextToken(); 2435 if (!isNmtoken(token, true)) 2436 error("default value must match Name production", token); 2437 } 2438 } 2439 else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type)) 2440 { 2441 if (!isNmtoken(value, false)) 2442 error("default value must match Nmtoken production", value); 2443 } 2444 else if ("NMTOKENS".equals(type)) 2445 { 2446 StringTokenizer st = new StringTokenizer(value); 2447 while (st.hasMoreTokens()) 2448 { 2449 String token = st.nextToken(); 2450 if (!isNmtoken(token, false)) 2451 error("default value must match Nmtoken production", 2452 token); 2453 } 2454 } 2455 } 2456 } 2457 // Register attribute def 2458 AttributeDecl attribute = 2459 new AttributeDecl(type, value, valueType, enumeration, values, 2460 inputStack.size() != 1); 2461 doctype.addAttributeDecl(elementName, name, attribute); 2462 } 2463 2464 /** 2465 * Parse the EntityDecl production. 2466 */ readEntityDecl(boolean inExternalSubset)2467 private void readEntityDecl(boolean inExternalSubset) 2468 throws IOException, XMLStreamException 2469 { 2470 int flags = 0; 2471 // Check if parameter entity 2472 boolean peFlag = false; 2473 expandPE = false; 2474 requireWhitespace(); 2475 if (tryRead('%')) 2476 { 2477 peFlag = true; 2478 requireWhitespace(); 2479 } 2480 expandPE = true; 2481 // Read entity name 2482 String name = readNmtoken(true); 2483 if (name.indexOf(':') != -1) 2484 error("illegal character ':' in entity name", name); 2485 if (peFlag) 2486 name = "%" + name; 2487 requireWhitespace(); 2488 mark(1); 2489 int c = readCh(); 2490 reset(); 2491 if (c == 0x22 || c == 0x27) // " | ' 2492 { 2493 // Internal entity replacement text 2494 String value = readLiteral(flags | LIT_DISABLE_EREF, true); 2495 int ai = value.indexOf('&'); 2496 while (ai != -1) 2497 { 2498 int sci = value.indexOf(';', ai); 2499 if (sci == -1) 2500 error("malformed reference in entity value", value); 2501 String ref = value.substring(ai + 1, sci); 2502 int[] cp = UnicodeReader.toCodePointArray(ref); 2503 if (cp.length == 0) 2504 error("malformed reference in entity value", value); 2505 if (cp[0] == 0x23) // # 2506 { 2507 if (cp.length == 1) 2508 error("malformed reference in entity value", value); 2509 if (cp[1] == 0x78) // 'x' 2510 { 2511 if (cp.length == 2) 2512 error("malformed reference in entity value", value); 2513 for (int i = 2; i < cp.length; i++) 2514 { 2515 int x = cp[i]; 2516 if (x < 0x30 || 2517 (x > 0x39 && x < 0x41) || 2518 (x > 0x46 && x < 0x61) || 2519 x > 0x66) 2520 error("malformed character reference in entity value", 2521 value); 2522 } 2523 } 2524 else 2525 { 2526 for (int i = 1; i < cp.length; i++) 2527 { 2528 int x = cp[i]; 2529 if (x < 0x30 || x > 0x39) 2530 error("malformed character reference in entity value", 2531 value); 2532 } 2533 } 2534 } 2535 else 2536 { 2537 if (!isNameStartCharacter(cp[0], input.xml11)) 2538 error("malformed reference in entity value", value); 2539 for (int i = 1; i < cp.length; i++) 2540 { 2541 if (!isNameCharacter(cp[i], input.xml11)) 2542 error("malformed reference in entity value", value); 2543 } 2544 } 2545 ai = value.indexOf('&', sci); 2546 } 2547 doctype.addEntityDecl(name, value, inExternalSubset); 2548 } 2549 else 2550 { 2551 ExternalIds ids = readExternalIds(false, false); 2552 // Check for NDATA 2553 boolean white = tryWhitespace(); 2554 if (!peFlag && tryRead("NDATA")) 2555 { 2556 if (!white) 2557 error("whitespace required before NDATA"); 2558 requireWhitespace(); 2559 ids.notationName = readNmtoken(true); 2560 } 2561 doctype.addEntityDecl(name, ids, inExternalSubset); 2562 } 2563 // finish 2564 skipWhitespace(); 2565 require('>'); 2566 } 2567 2568 /** 2569 * Parse the NotationDecl production. 2570 */ readNotationDecl(boolean inExternalSubset)2571 private void readNotationDecl(boolean inExternalSubset) 2572 throws IOException, XMLStreamException 2573 { 2574 requireWhitespace(); 2575 String notationName = readNmtoken(true); 2576 if (notationName.indexOf(':') != -1) 2577 error("illegal character ':' in notation name", notationName); 2578 if (validating) 2579 { 2580 // VC: Unique Notation Name 2581 ExternalIds notation = doctype.getNotation(notationName); 2582 if (notation != null) 2583 error("duplicate notation name", notationName); 2584 } 2585 requireWhitespace(); 2586 ExternalIds ids = readExternalIds(true, false); 2587 ids.notationName = notationName; 2588 doctype.addNotationDecl(notationName, ids, inExternalSubset); 2589 skipWhitespace(); 2590 require('>'); 2591 } 2592 2593 /** 2594 * Returns a tuple {publicId, systemId}. 2595 */ readExternalIds(boolean inNotation, boolean isSubset)2596 private ExternalIds readExternalIds(boolean inNotation, boolean isSubset) 2597 throws IOException, XMLStreamException 2598 { 2599 int c; 2600 int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; 2601 ExternalIds ids = new ExternalIds(); 2602 2603 if (tryRead("PUBLIC")) 2604 { 2605 requireWhitespace(); 2606 ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false); 2607 if (inNotation) 2608 { 2609 skipWhitespace(); 2610 mark(1); 2611 c = readCh(); 2612 reset(); 2613 if (c == 0x22 || c == 0x27) // " | ' 2614 { 2615 String href = readLiteral(flags, false); 2616 ids.systemId = absolutize(input.systemId, href); 2617 } 2618 } 2619 else 2620 { 2621 requireWhitespace(); 2622 String href = readLiteral(flags, false); 2623 ids.systemId = absolutize(input.systemId, href); 2624 } 2625 // Check valid URI characters 2626 for (int i = 0; i < ids.publicId.length(); i++) 2627 { 2628 char d = ids.publicId.charAt(i); 2629 if (d >= 'a' && d <= 'z') 2630 continue; 2631 if (d >= 'A' && d <= 'Z') 2632 continue; 2633 if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1) 2634 continue; 2635 error("illegal PUBLIC id character", 2636 "U+" + Integer.toHexString(d)); 2637 } 2638 } 2639 else if (tryRead("SYSTEM")) 2640 { 2641 requireWhitespace(); 2642 String href = readLiteral(flags, false); 2643 ids.systemId = absolutize(input.systemId, href); 2644 } 2645 else if (!isSubset) 2646 { 2647 error("missing SYSTEM or PUBLIC keyword"); 2648 } 2649 if (ids.systemId != null && !inNotation) 2650 { 2651 if (ids.systemId.indexOf('#') != -1) 2652 error("SYSTEM id has a URI fragment", ids.systemId); 2653 } 2654 return ids; 2655 } 2656 2657 /** 2658 * Parse the start of an element. 2659 * @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT) 2660 */ readStartElement()2661 private int readStartElement() 2662 throws IOException, XMLStreamException 2663 { 2664 // Read element name 2665 String elementName = readNmtoken(true); 2666 attrs.clear(); 2667 // Push namespace context 2668 if (namespaceAware) 2669 { 2670 if (elementName.charAt(0) == ':' || 2671 elementName.charAt(elementName.length() - 1) == ':') 2672 error("not a QName", elementName); 2673 namespaces.addFirst(new LinkedHashMap()); 2674 } 2675 // Read element content 2676 boolean white = tryWhitespace(); 2677 mark(1); 2678 int c = readCh(); 2679 while (c != 0x2f && c != 0x3e) // '/' | '>' 2680 { 2681 // Read attribute 2682 reset(); 2683 if (!white) 2684 error("need whitespace between attributes"); 2685 readAttribute(elementName); 2686 white = tryWhitespace(); 2687 mark(1); 2688 c = readCh(); 2689 } 2690 // supply defaulted attributes 2691 if (doctype != null) 2692 { 2693 for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); ) 2694 { 2695 Map.Entry entry = (Map.Entry) i.next(); 2696 String attName = (String) entry.getKey(); 2697 AttributeDecl decl = (AttributeDecl) entry.getValue(); 2698 if (validating) 2699 { 2700 switch (decl.valueType) 2701 { 2702 case ATTRIBUTE_DEFAULT_REQUIRED: 2703 // VC: Required Attribute 2704 if (decl.value == null && !attributeSpecified(attName)) 2705 error("value for " + attName + " attribute is required"); 2706 break; 2707 case ATTRIBUTE_DEFAULT_FIXED: 2708 // VC: Fixed Attribute Default 2709 for (Iterator j = attrs.iterator(); j.hasNext(); ) 2710 { 2711 Attribute a = (Attribute) j.next(); 2712 if (attName.equals(a.name) && 2713 !decl.value.equals(a.value)) 2714 error("value for " + attName + " attribute must be " + 2715 decl.value); 2716 } 2717 break; 2718 } 2719 } 2720 if (namespaceAware && attName.equals("xmlns")) 2721 { 2722 LinkedHashMap ctx = 2723 (LinkedHashMap) namespaces.getFirst(); 2724 if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX)) 2725 continue; // namespace was specified 2726 } 2727 else if (namespaceAware && attName.startsWith("xmlns:")) 2728 { 2729 LinkedHashMap ctx = 2730 (LinkedHashMap) namespaces.getFirst(); 2731 if (ctx.containsKey(attName.substring(6))) 2732 continue; // namespace was specified 2733 } 2734 else if (attributeSpecified(attName)) 2735 continue; 2736 if (decl.value == null) 2737 continue; 2738 // VC: Standalone Document Declaration 2739 if (validating && decl.external && xmlStandalone == Boolean.TRUE) 2740 error("standalone must be 'no' if attributes inherit values " + 2741 "from externally declared markup declarations"); 2742 Attribute attr = 2743 new Attribute(attName, decl.type, false, decl.value); 2744 if (namespaceAware) 2745 { 2746 if (!addNamespace(attr)) 2747 attrs.add(attr); 2748 } 2749 else 2750 attrs.add(attr); 2751 } 2752 } 2753 if (baseAware) 2754 { 2755 String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base"); 2756 String base = getXMLBase(); 2757 bases.addFirst(absolutize(base, uri)); 2758 } 2759 if (namespaceAware) 2760 { 2761 // check prefix bindings 2762 int ci = elementName.indexOf(':'); 2763 if (ci != -1) 2764 { 2765 String prefix = elementName.substring(0, ci); 2766 String uri = getNamespaceURI(prefix); 2767 if (uri == null) 2768 error("unbound element prefix", prefix); 2769 else if (input.xml11 && "".equals(uri)) 2770 error("XML 1.1 unbound element prefix", prefix); 2771 } 2772 for (Iterator i = attrs.iterator(); i.hasNext(); ) 2773 { 2774 Attribute attr = (Attribute) i.next(); 2775 if (attr.prefix != null && 2776 !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix)) 2777 { 2778 String uri = getNamespaceURI(attr.prefix); 2779 if (uri == null) 2780 error("unbound attribute prefix", attr.prefix); 2781 else if (input.xml11 && "".equals(uri)) 2782 error("XML 1.1 unbound attribute prefix", attr.prefix); 2783 } 2784 } 2785 } 2786 if (validating && doctype != null) 2787 { 2788 validateStartElement(elementName); 2789 currentContentModel = doctype.getElementModel(elementName); 2790 if (currentContentModel == null) 2791 error("no element declaration", elementName); 2792 validationStack.add(new LinkedList()); 2793 } 2794 // make element name available for read 2795 buf.setLength(0); 2796 buf.append(elementName); 2797 // push element onto stack 2798 stack.addLast(elementName); 2799 switch (c) 2800 { 2801 case 0x3e: // '>' 2802 return CONTENT; 2803 case 0x2f: // '/' 2804 require('>'); 2805 return EMPTY_ELEMENT; 2806 } 2807 return -1; // to satisfy compiler 2808 } 2809 2810 /** 2811 * Indicates whether the specified attribute name was specified for the 2812 * current element. 2813 */ attributeSpecified(String attName)2814 private boolean attributeSpecified(String attName) 2815 { 2816 for (Iterator j = attrs.iterator(); j.hasNext(); ) 2817 { 2818 Attribute a = (Attribute) j.next(); 2819 if (attName.equals(a.name)) 2820 return true; 2821 } 2822 return false; 2823 } 2824 2825 /** 2826 * Parse an attribute. 2827 */ readAttribute(String elementName)2828 private void readAttribute(String elementName) 2829 throws IOException, XMLStreamException 2830 { 2831 // Read attribute name 2832 String attributeName = readNmtoken(true); 2833 String type = getAttributeType(elementName, attributeName); 2834 readEq(); 2835 // Read literal 2836 final int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF; 2837 String value = (type == null || "CDATA".equals(type)) ? 2838 readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false); 2839 // add attribute event 2840 Attribute attr = this.new Attribute(attributeName, type, true, value); 2841 if (namespaceAware) 2842 { 2843 if (attributeName.charAt(0) == ':' || 2844 attributeName.charAt(attributeName.length() - 1) == ':') 2845 error("not a QName", attributeName); 2846 else if (attributeName.equals("xmlns")) 2847 { 2848 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); 2849 if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX)) 2850 error("duplicate default namespace"); 2851 } 2852 else if (attributeName.startsWith("xmlns:")) 2853 { 2854 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); 2855 if (ctx.containsKey(attributeName.substring(6))) 2856 error("duplicate namespace", attributeName.substring(6)); 2857 } 2858 else if (attrs.contains(attr)) 2859 error("duplicate attribute", attributeName); 2860 } 2861 else if (attrs.contains(attr)) 2862 error("duplicate attribute", attributeName); 2863 if (validating && doctype != null) 2864 { 2865 // VC: Attribute Value Type 2866 AttributeDecl decl = 2867 doctype.getAttributeDecl(elementName, attributeName); 2868 if (decl == null) 2869 error("attribute must be declared", attributeName); 2870 if ("ENUMERATION".equals(decl.type)) 2871 { 2872 // VC: Enumeration 2873 if (!decl.values.contains(value)) 2874 error("value does not match enumeration " + decl.enumeration, 2875 value); 2876 } 2877 else if ("ID".equals(decl.type)) 2878 { 2879 // VC: ID 2880 if (!isNmtoken(value, true)) 2881 error("ID values must match the Name production"); 2882 if (ids.contains(value)) 2883 error("Duplicate ID", value); 2884 ids.add(value); 2885 } 2886 else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type)) 2887 { 2888 StringTokenizer st = new StringTokenizer(value); 2889 while (st.hasMoreTokens()) 2890 { 2891 String token = st.nextToken(); 2892 // VC: IDREF 2893 if (!isNmtoken(token, true)) 2894 error("IDREF values must match the Name production"); 2895 idrefs.add(token); 2896 } 2897 } 2898 else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type)) 2899 { 2900 StringTokenizer st = new StringTokenizer(value); 2901 while (st.hasMoreTokens()) 2902 { 2903 String token = st.nextToken(); 2904 // VC: Name Token 2905 if (!isNmtoken(token, false)) 2906 error("NMTOKEN values must match the Nmtoken production"); 2907 } 2908 } 2909 else if ("ENTITY".equals(decl.type)) 2910 { 2911 // VC: Entity Name 2912 if (!isNmtoken(value, true)) 2913 error("ENTITY values must match the Name production"); 2914 Object entity = doctype.getEntity(value); 2915 if (entity == null || !(entity instanceof ExternalIds) || 2916 ((ExternalIds) entity).notationName == null) 2917 error("ENTITY values must match the name of an unparsed " + 2918 "entity declared in the DTD"); 2919 } 2920 else if ("NOTATION".equals(decl.type)) 2921 { 2922 if (!decl.values.contains(value)) 2923 error("NOTATION values must match a declared notation name", 2924 value); 2925 // VC: Notation Attributes 2926 ExternalIds notation = doctype.getNotation(value); 2927 if (notation == null) 2928 error("NOTATION values must match the name of a notation " + 2929 "declared in the DTD", value); 2930 } 2931 } 2932 if (namespaceAware) 2933 { 2934 if (!addNamespace(attr)) 2935 attrs.add(attr); 2936 } 2937 else 2938 attrs.add(attr); 2939 } 2940 2941 /** 2942 * Determines whether the specified attribute is a namespace declaration, 2943 * and adds it to the current namespace context if so. Returns false if 2944 * the attribute is an ordinary attribute. 2945 */ addNamespace(Attribute attr)2946 private boolean addNamespace(Attribute attr) 2947 throws XMLStreamException 2948 { 2949 if ("xmlns".equals(attr.name)) 2950 { 2951 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); 2952 if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null) 2953 error("Duplicate default namespace declaration"); 2954 if (XMLConstants.XML_NS_URI.equals(attr.value)) 2955 error("can't bind XML namespace"); 2956 ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value); 2957 return true; 2958 } 2959 else if ("xmlns".equals(attr.prefix)) 2960 { 2961 LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst(); 2962 if (ctx.get(attr.localName) != null) 2963 error("Duplicate namespace declaration for prefix", 2964 attr.localName); 2965 if (XMLConstants.XML_NS_PREFIX.equals(attr.localName)) 2966 { 2967 if (!XMLConstants.XML_NS_URI.equals(attr.value)) 2968 error("can't redeclare xml prefix"); 2969 else 2970 return false; // treat as attribute 2971 } 2972 if (XMLConstants.XML_NS_URI.equals(attr.value)) 2973 error("can't bind non-xml prefix to XML namespace"); 2974 if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName)) 2975 error("can't redeclare xmlns prefix"); 2976 if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value)) 2977 error("can't bind non-xmlns prefix to XML Namespace namespace"); 2978 if ("".equals(attr.value) && !input.xml11) 2979 error("illegal use of 1.1-style prefix unbinding in 1.0 document"); 2980 ctx.put(attr.localName, attr.value); 2981 return true; 2982 } 2983 return false; 2984 } 2985 2986 /** 2987 * Parse a closing tag. 2988 */ readEndElement()2989 private void readEndElement() 2990 throws IOException, XMLStreamException 2991 { 2992 // pop element off stack 2993 String expected = (String) stack.removeLast(); 2994 require(expected); 2995 skipWhitespace(); 2996 require('>'); 2997 // Make element name available 2998 buf.setLength(0); 2999 buf.append(expected); 3000 if (validating && doctype != null) 3001 endElementValidationHook(); 3002 } 3003 3004 /** 3005 * Validate the end of an element. 3006 * Called on an end-element or empty element if validating. 3007 */ endElementValidationHook()3008 private void endElementValidationHook() 3009 throws XMLStreamException 3010 { 3011 validateEndElement(); 3012 validationStack.removeLast(); 3013 if (stack.isEmpty()) 3014 currentContentModel = null; 3015 else 3016 { 3017 String parent = (String) stack.getLast(); 3018 currentContentModel = doctype.getElementModel(parent); 3019 } 3020 } 3021 3022 /** 3023 * Parse a comment. 3024 */ readComment(boolean inDTD)3025 private void readComment(boolean inDTD) 3026 throws IOException, XMLStreamException 3027 { 3028 boolean saved = expandPE; 3029 expandPE = false; 3030 buf.setLength(0); 3031 readUntil(TEST_END_COMMENT); 3032 require('>'); 3033 expandPE = saved; 3034 if (inDTD) 3035 doctype.addComment(buf.toString()); 3036 } 3037 3038 /** 3039 * Parse a processing instruction. 3040 */ readPI(boolean inDTD)3041 private void readPI(boolean inDTD) 3042 throws IOException, XMLStreamException 3043 { 3044 boolean saved = expandPE; 3045 expandPE = false; 3046 piTarget = readNmtoken(true); 3047 if (piTarget.indexOf(':') != -1) 3048 error("illegal character in PI target", new Character(':')); 3049 if ("xml".equalsIgnoreCase(piTarget)) 3050 error("illegal PI target", piTarget); 3051 if (tryRead(TEST_END_PI)) 3052 piData = null; 3053 else 3054 { 3055 if (!tryWhitespace()) 3056 error("whitespace required between PI target and data"); 3057 buf.setLength(0); 3058 readUntil(TEST_END_PI); 3059 piData = buf.toString(); 3060 } 3061 expandPE = saved; 3062 if (inDTD) 3063 doctype.addPI(piTarget, piData); 3064 } 3065 3066 /** 3067 * Parse an entity reference. 3068 */ readReference()3069 private void readReference() 3070 throws IOException, XMLStreamException 3071 { 3072 buf.setLength(0); 3073 String entityName = readNmtoken(true); 3074 require(';'); 3075 buf.setLength(0); 3076 buf.append(entityName); 3077 } 3078 3079 /** 3080 * Read an CDATA section. 3081 */ readCDSect()3082 private void readCDSect() 3083 throws IOException, XMLStreamException 3084 { 3085 buf.setLength(0); 3086 readUntil(TEST_END_CDATA); 3087 } 3088 3089 /** 3090 * Read character data. 3091 * @return the type of text read (CHARACTERS or SPACE) 3092 */ readCharData(String prefix)3093 private int readCharData(String prefix) 3094 throws IOException, XMLStreamException 3095 { 3096 boolean white = true; 3097 buf.setLength(0); 3098 if (prefix != null) 3099 buf.append(prefix); 3100 boolean done = false; 3101 boolean entities = false; 3102 while (!done) 3103 { 3104 // Block read 3105 mark(tmpBuf.length); 3106 int len = read(tmpBuf, 0, tmpBuf.length); 3107 if (len == -1) 3108 { 3109 if (inputStack.size() > 1) 3110 { 3111 popInput(); 3112 // report end-entity 3113 done = true; 3114 } 3115 else 3116 throw new EOFException(); 3117 } 3118 for (int i = 0; i < len && !done; i++) 3119 { 3120 int c = tmpBuf[i]; 3121 switch (c) 3122 { 3123 case 0x20: 3124 case 0x09: 3125 case 0x0a: 3126 case 0x0d: 3127 buf.append(Character.toChars(c)); 3128 break; // whitespace 3129 case 0x26: // '&' 3130 reset(); 3131 read(tmpBuf, 0, i); 3132 // character reference? 3133 mark(3); 3134 c = readCh(); // & 3135 c = readCh(); 3136 if (c == 0x23) // '#' 3137 { 3138 mark(1); 3139 c = readCh(); 3140 boolean hex = (c == 0x78); // 'x' 3141 if (!hex) 3142 reset(); 3143 char[] ch = readCharacterRef(hex ? 16 : 10); 3144 buf.append(ch, 0, ch.length); 3145 for (int j = 0; j < ch.length; j++) 3146 { 3147 switch (ch[j]) 3148 { 3149 case 0x20: 3150 case 0x09: 3151 case 0x0a: 3152 case 0x0d: 3153 break; // whitespace 3154 default: 3155 white = false; 3156 } 3157 } 3158 } 3159 else 3160 { 3161 // entity reference 3162 reset(); 3163 c = readCh(); // & 3164 String entityName = readNmtoken(true); 3165 require(';'); 3166 String text = 3167 (String) PREDEFINED_ENTITIES.get(entityName); 3168 if (text != null) 3169 buf.append(text); 3170 else 3171 { 3172 pushInput("", "&" + entityName + ";", false, false); 3173 done = true; 3174 break; 3175 } 3176 } 3177 // continue processing 3178 i = -1; 3179 mark(tmpBuf.length); 3180 len = read(tmpBuf, 0, tmpBuf.length); 3181 if (len == -1) 3182 { 3183 if (inputStack.size() > 1) 3184 { 3185 popInput(); 3186 done = true; 3187 } 3188 else 3189 throw new EOFException(); 3190 } 3191 entities = true; 3192 break; // end of text sequence 3193 case 0x3e: // '>' 3194 int l = buf.length(); 3195 if (l > 1 && 3196 buf.charAt(l - 1) == ']' && 3197 buf.charAt(l - 2) == ']') 3198 error("Character data may not contain unescaped ']]>'"); 3199 buf.append(Character.toChars(c)); 3200 break; 3201 case 0x3c: // '<' 3202 reset(); 3203 // read i characters 3204 int count = 0, remaining = i; 3205 do 3206 { 3207 int r = read(tmpBuf, 0, remaining); 3208 count += r; 3209 remaining -= r; 3210 } 3211 while (count < i); 3212 i = len; 3213 if (coalescing && tryRead(TEST_CDATA)) 3214 readUntil(TEST_END_CDATA); // read CDATA section into buf 3215 else 3216 done = true; // end of text sequence 3217 break; 3218 default: 3219 if (input.xml11) 3220 { 3221 if (!isXML11Char(c) || isXML11RestrictedChar(c)) 3222 error("illegal XML 1.1 character", 3223 "U+" + Integer.toHexString(c)); 3224 } 3225 else if (!isChar(c)) 3226 error("illegal XML character", 3227 "U+" + Integer.toHexString(c)); 3228 white = false; 3229 buf.append(Character.toChars(c)); 3230 } 3231 } 3232 // if text buffer >= 2MB, return it as a chunk 3233 // to avoid excessive memory use 3234 if (buf.length() >= 2097152) 3235 done = true; 3236 } 3237 if (entities) 3238 normalizeCRLF(buf); 3239 return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS; 3240 } 3241 3242 /** 3243 * Expands the specified entity. 3244 */ expandEntity(String name, boolean inAttr, boolean normalize)3245 private void expandEntity(String name, boolean inAttr, boolean normalize) 3246 throws IOException, XMLStreamException 3247 { 3248 if (doctype != null) 3249 { 3250 Object value = doctype.getEntity(name); 3251 if (value != null) 3252 { 3253 if (xmlStandalone == Boolean.TRUE) 3254 { 3255 // VC: Standalone Document Declaration 3256 if (doctype.isEntityExternal(name)) 3257 error("reference to external entity in standalone document"); 3258 else if (value instanceof ExternalIds) 3259 { 3260 ExternalIds ids = (ExternalIds) value; 3261 if (ids.notationName != null && 3262 doctype.isNotationExternal(ids.notationName)) 3263 error("reference to external notation in " + 3264 "standalone document"); 3265 } 3266 } 3267 if (value instanceof String) 3268 { 3269 String text = (String) value; 3270 if (inAttr && text.indexOf('<') != -1) 3271 error("< in attribute value"); 3272 pushInput(name, text, !inAttr, normalize); 3273 } 3274 else if (inAttr) 3275 error("reference to external entity in attribute value", name); 3276 else 3277 pushInput(name, (ExternalIds) value, !inAttr, normalize); 3278 return; 3279 } 3280 } 3281 error("reference to undeclared entity", name); 3282 } 3283 3284 /** 3285 * Indicates whether the specified entity is unparsed. 3286 */ isUnparsedEntity(String name)3287 private boolean isUnparsedEntity(String name) 3288 { 3289 if (doctype != null) 3290 { 3291 Object value = doctype.getEntity(name); 3292 if (value != null && value instanceof ExternalIds) 3293 return ((ExternalIds) value).notationName != null; 3294 } 3295 return false; 3296 } 3297 3298 /** 3299 * Read an equals sign. 3300 */ readEq()3301 private void readEq() 3302 throws IOException, XMLStreamException 3303 { 3304 skipWhitespace(); 3305 require('='); 3306 skipWhitespace(); 3307 } 3308 3309 /** 3310 * Character read for reading literals. 3311 * @param recognizePEs whether to recognize parameter-entity references 3312 */ literalReadCh(boolean recognizePEs)3313 private int literalReadCh(boolean recognizePEs) 3314 throws IOException, XMLStreamException 3315 { 3316 int c = recognizePEs ? readCh() : read(); 3317 while (c == -1) 3318 { 3319 if (inputStack.size() > 1) 3320 { 3321 inputStack.removeLast(); 3322 input = (Input) inputStack.getLast(); 3323 // Don't issue end-entity 3324 c = recognizePEs ? readCh() : read(); 3325 } 3326 else 3327 throw new EOFException(); 3328 } 3329 return c; 3330 } 3331 3332 /** 3333 * Read a string literal. 3334 */ readLiteral(int flags, boolean recognizePEs)3335 private String readLiteral(int flags, boolean recognizePEs) 3336 throws IOException, XMLStreamException 3337 { 3338 boolean saved = expandPE; 3339 int delim = readCh(); 3340 if (delim != 0x27 && delim != 0x22) 3341 error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim)); 3342 literalBuf.setLength(0); 3343 if ((flags & LIT_DISABLE_PE) != 0) 3344 expandPE = false; 3345 boolean entities = false; 3346 int inputStackSize = inputStack.size(); 3347 do 3348 { 3349 int c = literalReadCh(recognizePEs); 3350 if (c == delim && inputStackSize == inputStack.size()) 3351 break; 3352 switch (c) 3353 { 3354 case 0x0a: 3355 case 0x0d: 3356 if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0) 3357 c = 0x20; // normalize to space 3358 break; 3359 case 0x09: 3360 if ((flags & LIT_ATTRIBUTE) != 0) 3361 c = 0x20; // normalize to space 3362 break; 3363 case 0x26: // '&' 3364 mark(2); 3365 c = readCh(); 3366 if (c == 0x23) // '#' 3367 { 3368 if ((flags & LIT_DISABLE_CREF) != 0) 3369 { 3370 reset(); 3371 c = 0x26; // '&' 3372 } 3373 else 3374 { 3375 mark(1); 3376 c = readCh(); 3377 boolean hex = (c == 0x78); // 'x' 3378 if (!hex) 3379 reset(); 3380 char[] ref = readCharacterRef(hex ? 16 : 10); 3381 for (int i = 0; i < ref.length; i++) 3382 literalBuf.append(ref[i]); 3383 entities = true; 3384 continue; 3385 } 3386 } 3387 else 3388 { 3389 if ((flags & LIT_DISABLE_EREF) != 0) 3390 { 3391 reset(); 3392 c = 0x26; // '&' 3393 } 3394 else 3395 { 3396 reset(); 3397 String entityName = readNmtoken(true); 3398 require(';'); 3399 String text = 3400 (String) PREDEFINED_ENTITIES.get(entityName); 3401 if (text != null) 3402 literalBuf.append(text); 3403 else 3404 expandEntity(entityName, 3405 (flags & LIT_ATTRIBUTE) != 0, 3406 true); 3407 entities = true; 3408 continue; 3409 } 3410 } 3411 break; 3412 case 0x3c: // '<' 3413 if ((flags & LIT_ATTRIBUTE) != 0) 3414 error("attribute values may not contain '<'"); 3415 break; 3416 case -1: 3417 if (inputStack.size() > 1) 3418 { 3419 popInput(); 3420 continue; 3421 } 3422 throw new EOFException(); 3423 default: 3424 if ((c < 0x0020 || c > 0xfffd) || 3425 (c >= 0xd800 && c < 0xdc00) || 3426 (input.xml11 && (c >= 0x007f) && 3427 (c <= 0x009f) && (c != 0x0085))) 3428 error("illegal character", "U+" + Integer.toHexString(c)); 3429 } 3430 literalBuf.append(Character.toChars(c)); 3431 } 3432 while (true); 3433 expandPE = saved; 3434 if (entities) 3435 normalizeCRLF(literalBuf); 3436 if ((flags & LIT_NORMALIZE) > 0) 3437 literalBuf = normalize(literalBuf); 3438 return literalBuf.toString(); 3439 } 3440 3441 /** 3442 * Performs attribute-value normalization of the text buffer. 3443 * This discards leading and trailing whitespace, and replaces sequences 3444 * of whitespace with a single space. 3445 */ normalize(StringBuffer buf)3446 private StringBuffer normalize(StringBuffer buf) 3447 { 3448 StringBuffer acc = new StringBuffer(); 3449 int len = buf.length(); 3450 int avState = 0; 3451 for (int i = 0; i < len; i++) 3452 { 3453 char c = buf.charAt(i); 3454 if (c == ' ') 3455 avState = (avState == 0) ? 0 : 1; 3456 else 3457 { 3458 if (avState == 1) 3459 acc.append(' '); 3460 acc.append(c); 3461 avState = 2; 3462 } 3463 } 3464 return acc; 3465 } 3466 3467 /** 3468 * Replace any CR/LF pairs in the buffer with LF. 3469 * This may be necessary if combinations of CR or LF were declared as 3470 * (character) entity references in the input. 3471 */ normalizeCRLF(StringBuffer buf)3472 private void normalizeCRLF(StringBuffer buf) 3473 { 3474 int len = buf.length() - 1; 3475 for (int i = 0; i < len; i++) 3476 { 3477 char c = buf.charAt(i); 3478 if (c == '\r' && buf.charAt(i + 1) == '\n') 3479 { 3480 buf.deleteCharAt(i--); 3481 len--; 3482 } 3483 } 3484 } 3485 3486 /** 3487 * Parse and expand a parameter entity reference. 3488 */ expandPEReference()3489 private void expandPEReference() 3490 throws IOException, XMLStreamException 3491 { 3492 String name = readNmtoken(true, new StringBuffer()); 3493 require(';'); 3494 mark(1); // ensure we don't reset to before the semicolon 3495 if (doctype != null) 3496 { 3497 String entityName = "%" + name; 3498 Object entity = doctype.getEntity(entityName); 3499 if (entity != null) 3500 { 3501 if (xmlStandalone == Boolean.TRUE) 3502 { 3503 if (doctype.isEntityExternal(entityName)) 3504 error("reference to external parameter entity in " + 3505 "standalone document"); 3506 } 3507 if (entity instanceof String) 3508 { 3509 pushInput(name, (String) entity, false, input.normalize); 3510 //pushInput(name, " " + (String) entity + " "); 3511 } 3512 else 3513 { 3514 //pushInput("", " "); 3515 pushInput(name, (ExternalIds) entity, false, input.normalize); 3516 //pushInput("", " "); 3517 } 3518 } 3519 else 3520 error("reference to undeclared parameter entity", name); 3521 } 3522 else 3523 error("reference to parameter entity without doctype", name); 3524 } 3525 3526 /** 3527 * Parse the digits in a character reference. 3528 * @param base the base of the digits (10 or 16) 3529 */ readCharacterRef(int base)3530 private char[] readCharacterRef(int base) 3531 throws IOException, XMLStreamException 3532 { 3533 CPStringBuilder b = new CPStringBuilder(); 3534 for (int c = readCh(); c != 0x3b && c != -1; c = readCh()) 3535 b.append(Character.toChars(c)); 3536 try 3537 { 3538 int ord = Integer.parseInt(b.toString(), base); 3539 if (input.xml11) 3540 { 3541 if (!isXML11Char(ord)) 3542 error("illegal XML 1.1 character reference " + 3543 "U+" + Integer.toHexString(ord)); 3544 } 3545 else 3546 { 3547 if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d)) 3548 || (ord >= 0xd800 && ord <= 0xdfff) 3549 || ord == 0xfffe || ord == 0xffff 3550 || ord > 0x0010ffff) 3551 error("illegal XML character reference " + 3552 "U+" + Integer.toHexString(ord)); 3553 } 3554 return Character.toChars(ord); 3555 } 3556 catch (NumberFormatException e) 3557 { 3558 error("illegal characters in character reference", b.toString()); 3559 return null; 3560 } 3561 } 3562 3563 /** 3564 * Parses an NMTOKEN or Name production. 3565 * @param isName if a Name, otherwise an NMTOKEN 3566 */ readNmtoken(boolean isName)3567 private String readNmtoken(boolean isName) 3568 throws IOException, XMLStreamException 3569 { 3570 return readNmtoken(isName, nmtokenBuf); 3571 } 3572 3573 /** 3574 * Parses an NMTOKEN or Name production using the specified buffer. 3575 * @param isName if a Name, otherwise an NMTOKEN 3576 * @param buf the character buffer to use 3577 */ readNmtoken(boolean isName, StringBuffer buf)3578 private String readNmtoken(boolean isName, StringBuffer buf) 3579 throws IOException, XMLStreamException 3580 { 3581 buf.setLength(0); 3582 int c = readCh(); 3583 if (isName) 3584 { 3585 if (!isNameStartCharacter(c, input.xml11)) 3586 error("not a name start character", 3587 "U+" + Integer.toHexString(c)); 3588 } 3589 else 3590 { 3591 if (!isNameCharacter(c, input.xml11)) 3592 error("not a name character", 3593 "U+" + Integer.toHexString(c)); 3594 } 3595 buf.append(Character.toChars(c)); 3596 do 3597 { 3598 mark(1); 3599 c = readCh(); 3600 switch (c) 3601 { 3602 case 0x25: // '%' 3603 case 0x3c: // '<' 3604 case 0x3e: // '>' 3605 case 0x26: // '&' 3606 case 0x2c: // ',' 3607 case 0x7c: // '|' 3608 case 0x2a: // '*' 3609 case 0x2b: // '+' 3610 case 0x3f: // '?' 3611 case 0x29: // ')' 3612 case 0x3d: // '=' 3613 case 0x27: // '\'' 3614 case 0x22: // '"' 3615 case 0x5b: // '[' 3616 case 0x20: // ' ' 3617 case 0x09: // '\t' 3618 case 0x0a: // '\n' 3619 case 0x0d: // '\r' 3620 case 0x3b: // ';' 3621 case 0x2f: // '/' 3622 case -1: 3623 reset(); 3624 return intern(buf.toString()); 3625 default: 3626 if (!isNameCharacter(c, input.xml11)) 3627 error("not a name character", 3628 "U+" + Integer.toHexString(c)); 3629 else 3630 buf.append(Character.toChars(c)); 3631 } 3632 } 3633 while (true); 3634 } 3635 3636 /** 3637 * Indicates whether the specified Unicode character is an XML 1.1 Char. 3638 */ isXML11Char(int c)3639 public static boolean isXML11Char(int c) 3640 { 3641 return ((c >= 0x0001 && c <= 0xD7FF) || 3642 (c >= 0xE000 && c < 0xFFFE) || 3643 (c >= 0x10000 && c <= 0x10FFFF)); 3644 } 3645 3646 /** 3647 * Indicates whether the specified Unicode character is an XML 1.1 3648 * RestrictedChar. 3649 */ isXML11RestrictedChar(int c)3650 public static boolean isXML11RestrictedChar(int c) 3651 { 3652 return ((c >= 0x0001 && c <= 0x0008) || 3653 (c >= 0x000B && c <= 0x000C) || 3654 (c >= 0x000E && c <= 0x001F) || 3655 (c >= 0x007F && c <= 0x0084) || 3656 (c >= 0x0086 && c <= 0x009F)); 3657 } 3658 3659 /** 3660 * Indicates whether the specified text matches the Name or Nmtoken 3661 * production. 3662 */ isNmtoken(String text, boolean isName)3663 private boolean isNmtoken(String text, boolean isName) 3664 { 3665 try 3666 { 3667 int[] cp = UnicodeReader.toCodePointArray(text); 3668 if (cp.length == 0) 3669 return false; 3670 if (isName) 3671 { 3672 if (!isNameStartCharacter(cp[0], input.xml11)) 3673 return false; 3674 } 3675 else 3676 { 3677 if (!isNameCharacter(cp[0], input.xml11)) 3678 return false; 3679 } 3680 for (int i = 1; i < cp.length; i++) 3681 { 3682 if (!isNameCharacter(cp[i], input.xml11)) 3683 return false; 3684 } 3685 return true; 3686 } 3687 catch (IOException e) 3688 { 3689 return false; 3690 } 3691 } 3692 3693 /** 3694 * Indicates whether the specified Unicode character is a Name start 3695 * character. 3696 */ isNameStartCharacter(int c, boolean xml11)3697 public static boolean isNameStartCharacter(int c, boolean xml11) 3698 { 3699 if (xml11) 3700 return ((c >= 0x0041 && c <= 0x005a) || 3701 (c >= 0x0061 && c <= 0x007a) || 3702 c == 0x3a | 3703 c == 0x5f | 3704 (c >= 0xC0 && c <= 0xD6) || 3705 (c >= 0xD8 && c <= 0xF6) || 3706 (c >= 0xF8 && c <= 0x2FF) || 3707 (c >= 0x370 && c <= 0x37D) || 3708 (c >= 0x37F && c <= 0x1FFF) || 3709 (c >= 0x200C && c <= 0x200D) || 3710 (c >= 0x2070 && c <= 0x218F) || 3711 (c >= 0x2C00 && c <= 0x2FEF) || 3712 (c >= 0x3001 && c <= 0xD7FF) || 3713 (c >= 0xF900 && c <= 0xFDCF) || 3714 (c >= 0xFDF0 && c <= 0xFFFD) || 3715 (c >= 0x10000 && c <= 0xEFFFF)); 3716 else 3717 return (c == 0x5f || c == 0x3a || isLetter(c)); 3718 } 3719 3720 /** 3721 * Indicates whether the specified Unicode character is a Name non-initial 3722 * character. 3723 */ isNameCharacter(int c, boolean xml11)3724 public static boolean isNameCharacter(int c, boolean xml11) 3725 { 3726 if (xml11) 3727 return ((c >= 0x0041 && c <= 0x005a) || 3728 (c >= 0x0061 && c <= 0x007a) || 3729 (c >= 0x0030 && c <= 0x0039) || 3730 c == 0x3a | 3731 c == 0x5f | 3732 c == 0x2d | 3733 c == 0x2e | 3734 c == 0xB7 | 3735 (c >= 0xC0 && c <= 0xD6) || 3736 (c >= 0xD8 && c <= 0xF6) || 3737 (c >= 0xF8 && c <= 0x2FF) || 3738 (c >= 0x300 && c <= 0x37D) || 3739 (c >= 0x37F && c <= 0x1FFF) || 3740 (c >= 0x200C && c <= 0x200D) || 3741 (c >= 0x203F && c <= 0x2040) || 3742 (c >= 0x2070 && c <= 0x218F) || 3743 (c >= 0x2C00 && c <= 0x2FEF) || 3744 (c >= 0x3001 && c <= 0xD7FF) || 3745 (c >= 0xF900 && c <= 0xFDCF) || 3746 (c >= 0xFDF0 && c <= 0xFFFD) || 3747 (c >= 0x10000 && c <= 0xEFFFF)); 3748 else 3749 return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a || 3750 isLetter(c) || isDigit(c) || 3751 isCombiningChar(c) || isExtender(c)); 3752 } 3753 3754 /** 3755 * Indicates whether the specified Unicode character matches the Letter 3756 * production. 3757 */ isLetter(int c)3758 public static boolean isLetter(int c) 3759 { 3760 if ((c >= 0x0041 && c <= 0x005A) || 3761 (c >= 0x0061 && c <= 0x007A) || 3762 (c >= 0x00C0 && c <= 0x00D6) || 3763 (c >= 0x00D8 && c <= 0x00F6) || 3764 (c >= 0x00F8 && c <= 0x00FF) || 3765 (c >= 0x0100 && c <= 0x0131) || 3766 (c >= 0x0134 && c <= 0x013E) || 3767 (c >= 0x0141 && c <= 0x0148) || 3768 (c >= 0x014A && c <= 0x017E) || 3769 (c >= 0x0180 && c <= 0x01C3) || 3770 (c >= 0x01CD && c <= 0x01F0) || 3771 (c >= 0x01F4 && c <= 0x01F5) || 3772 (c >= 0x01FA && c <= 0x0217) || 3773 (c >= 0x0250 && c <= 0x02A8) || 3774 (c >= 0x02BB && c <= 0x02C1) || 3775 c == 0x0386 || 3776 (c >= 0x0388 && c <= 0x038A) || 3777 c == 0x038C || 3778 (c >= 0x038E && c <= 0x03A1) || 3779 (c >= 0x03A3 && c <= 0x03CE) || 3780 (c >= 0x03D0 && c <= 0x03D6) || 3781 c == 0x03DA || 3782 c == 0x03DC || 3783 c == 0x03DE || 3784 c == 0x03E0 || 3785 (c >= 0x03E2 && c <= 0x03F3) || 3786 (c >= 0x0401 && c <= 0x040C) || 3787 (c >= 0x040E && c <= 0x044F) || 3788 (c >= 0x0451 && c <= 0x045C) || 3789 (c >= 0x045E && c <= 0x0481) || 3790 (c >= 0x0490 && c <= 0x04C4) || 3791 (c >= 0x04C7 && c <= 0x04C8) || 3792 (c >= 0x04CB && c <= 0x04CC) || 3793 (c >= 0x04D0 && c <= 0x04EB) || 3794 (c >= 0x04EE && c <= 0x04F5) || 3795 (c >= 0x04F8 && c <= 0x04F9) || 3796 (c >= 0x0531 && c <= 0x0556) || 3797 c == 0x0559 || 3798 (c >= 0x0561 && c <= 0x0586) || 3799 (c >= 0x05D0 && c <= 0x05EA) || 3800 (c >= 0x05F0 && c <= 0x05F2) || 3801 (c >= 0x0621 && c <= 0x063A) || 3802 (c >= 0x0641 && c <= 0x064A) || 3803 (c >= 0x0671 && c <= 0x06B7) || 3804 (c >= 0x06BA && c <= 0x06BE) || 3805 (c >= 0x06C0 && c <= 0x06CE) || 3806 (c >= 0x06D0 && c <= 0x06D3) || 3807 c == 0x06D5 || 3808 (c >= 0x06E5 && c <= 0x06E6) || 3809 (c >= 0x0905 && c <= 0x0939) || 3810 c == 0x093D || 3811 (c >= 0x0958 && c <= 0x0961) || 3812 (c >= 0x0985 && c <= 0x098C) || 3813 (c >= 0x098F && c <= 0x0990) || 3814 (c >= 0x0993 && c <= 0x09A8) || 3815 (c >= 0x09AA && c <= 0x09B0) || 3816 c == 0x09B2 || 3817 (c >= 0x09B6 && c <= 0x09B9) || 3818 (c >= 0x09DC && c <= 0x09DD) || 3819 (c >= 0x09DF && c <= 0x09E1) || 3820 (c >= 0x09F0 && c <= 0x09F1) || 3821 (c >= 0x0A05 && c <= 0x0A0A) || 3822 (c >= 0x0A0F && c <= 0x0A10) || 3823 (c >= 0x0A13 && c <= 0x0A28) || 3824 (c >= 0x0A2A && c <= 0x0A30) || 3825 (c >= 0x0A32 && c <= 0x0A33) || 3826 (c >= 0x0A35 && c <= 0x0A36) || 3827 (c >= 0x0A38 && c <= 0x0A39) || 3828 (c >= 0x0A59 && c <= 0x0A5C) || 3829 c == 0x0A5E || 3830 (c >= 0x0A72 && c <= 0x0A74) || 3831 (c >= 0x0A85 && c <= 0x0A8B) || 3832 c == 0x0A8D || 3833 (c >= 0x0A8F && c <= 0x0A91) || 3834 (c >= 0x0A93 && c <= 0x0AA8) || 3835 (c >= 0x0AAA && c <= 0x0AB0) || 3836 (c >= 0x0AB2 && c <= 0x0AB3) || 3837 (c >= 0x0AB5 && c <= 0x0AB9) || 3838 c == 0x0ABD || 3839 c == 0x0AE0 || 3840 (c >= 0x0B05 && c <= 0x0B0C) || 3841 (c >= 0x0B0F && c <= 0x0B10) || 3842 (c >= 0x0B13 && c <= 0x0B28) || 3843 (c >= 0x0B2A && c <= 0x0B30) || 3844 (c >= 0x0B32 && c <= 0x0B33) || 3845 (c >= 0x0B36 && c <= 0x0B39) || 3846 c == 0x0B3D || 3847 (c >= 0x0B5C && c <= 0x0B5D) || 3848 (c >= 0x0B5F && c <= 0x0B61) || 3849 (c >= 0x0B85 && c <= 0x0B8A) || 3850 (c >= 0x0B8E && c <= 0x0B90) || 3851 (c >= 0x0B92 && c <= 0x0B95) || 3852 (c >= 0x0B99 && c <= 0x0B9A) || 3853 c == 0x0B9C || 3854 (c >= 0x0B9E && c <= 0x0B9F) || 3855 (c >= 0x0BA3 && c <= 0x0BA4) || 3856 (c >= 0x0BA8 && c <= 0x0BAA) || 3857 (c >= 0x0BAE && c <= 0x0BB5) || 3858 (c >= 0x0BB7 && c <= 0x0BB9) || 3859 (c >= 0x0C05 && c <= 0x0C0C) || 3860 (c >= 0x0C0E && c <= 0x0C10) || 3861 (c >= 0x0C12 && c <= 0x0C28) || 3862 (c >= 0x0C2A && c <= 0x0C33) || 3863 (c >= 0x0C35 && c <= 0x0C39) || 3864 (c >= 0x0C60 && c <= 0x0C61) || 3865 (c >= 0x0C85 && c <= 0x0C8C) || 3866 (c >= 0x0C8E && c <= 0x0C90) || 3867 (c >= 0x0C92 && c <= 0x0CA8) || 3868 (c >= 0x0CAA && c <= 0x0CB3) || 3869 (c >= 0x0CB5 && c <= 0x0CB9) || 3870 c == 0x0CDE || 3871 (c >= 0x0CE0 && c <= 0x0CE1) || 3872 (c >= 0x0D05 && c <= 0x0D0C) || 3873 (c >= 0x0D0E && c <= 0x0D10) || 3874 (c >= 0x0D12 && c <= 0x0D28) || 3875 (c >= 0x0D2A && c <= 0x0D39) || 3876 (c >= 0x0D60 && c <= 0x0D61) || 3877 (c >= 0x0E01 && c <= 0x0E2E) || 3878 c == 0x0E30 || 3879 (c >= 0x0E32 && c <= 0x0E33) || 3880 (c >= 0x0E40 && c <= 0x0E45) || 3881 (c >= 0x0E81 && c <= 0x0E82) || 3882 c == 0x0E84 || 3883 (c >= 0x0E87 && c <= 0x0E88) || 3884 c == 0x0E8A || 3885 c == 0x0E8D || 3886 (c >= 0x0E94 && c <= 0x0E97) || 3887 (c >= 0x0E99 && c <= 0x0E9F) || 3888 (c >= 0x0EA1 && c <= 0x0EA3) || 3889 c == 0x0EA5 || 3890 c == 0x0EA7 || 3891 (c >= 0x0EAA && c <= 0x0EAB) || 3892 (c >= 0x0EAD && c <= 0x0EAE) || 3893 c == 0x0EB0 || 3894 (c >= 0x0EB2 && c <= 0x0EB3) || 3895 c == 0x0EBD || 3896 (c >= 0x0EC0 && c <= 0x0EC4) || 3897 (c >= 0x0F40 && c <= 0x0F47) || 3898 (c >= 0x0F49 && c <= 0x0F69) || 3899 (c >= 0x10A0 && c <= 0x10C5) || 3900 (c >= 0x10D0 && c <= 0x10F6) || 3901 c == 0x1100 || 3902 (c >= 0x1102 && c <= 0x1103) || 3903 (c >= 0x1105 && c <= 0x1107) || 3904 c == 0x1109 || 3905 (c >= 0x110B && c <= 0x110C) || 3906 (c >= 0x110E && c <= 0x1112) || 3907 c == 0x113C || 3908 c == 0x113E || 3909 c == 0x1140 || 3910 c == 0x114C || 3911 c == 0x114E || 3912 c == 0x1150 || 3913 (c >= 0x1154 && c <= 0x1155) || 3914 c == 0x1159 || 3915 (c >= 0x115F && c <= 0x1161) || 3916 c == 0x1163 || 3917 c == 0x1165 || 3918 c == 0x1167 || 3919 c == 0x1169 || 3920 (c >= 0x116D && c <= 0x116E) || 3921 (c >= 0x1172 && c <= 0x1173) || 3922 c == 0x1175 || 3923 c == 0x119E || 3924 c == 0x11A8 || 3925 c == 0x11AB || 3926 (c >= 0x11AE && c <= 0x11AF) || 3927 (c >= 0x11B7 && c <= 0x11B8) || 3928 c == 0x11BA || 3929 (c >= 0x11BC && c <= 0x11C2) || 3930 c == 0x11EB || 3931 c == 0x11F0 || 3932 c == 0x11F9 || 3933 (c >= 0x1E00 && c <= 0x1E9B) || 3934 (c >= 0x1EA0 && c <= 0x1EF9) || 3935 (c >= 0x1F00 && c <= 0x1F15) || 3936 (c >= 0x1F18 && c <= 0x1F1D) || 3937 (c >= 0x1F20 && c <= 0x1F45) || 3938 (c >= 0x1F48 && c <= 0x1F4D) || 3939 (c >= 0x1F50 && c <= 0x1F57) || 3940 c == 0x1F59 || 3941 c == 0x1F5B || 3942 c == 0x1F5D || 3943 (c >= 0x1F5F && c <= 0x1F7D) || 3944 (c >= 0x1F80 && c <= 0x1FB4) || 3945 (c >= 0x1FB6 && c <= 0x1FBC) || 3946 c == 0x1FBE || 3947 (c >= 0x1FC2 && c <= 0x1FC4) || 3948 (c >= 0x1FC6 && c <= 0x1FCC) || 3949 (c >= 0x1FD0 && c <= 0x1FD3) || 3950 (c >= 0x1FD6 && c <= 0x1FDB) || 3951 (c >= 0x1FE0 && c <= 0x1FEC) || 3952 (c >= 0x1FF2 && c <= 0x1FF4) || 3953 (c >= 0x1FF6 && c <= 0x1FFC) || 3954 c == 0x2126 || 3955 (c >= 0x212A && c <= 0x212B) || 3956 c == 0x212E || 3957 (c >= 0x2180 && c <= 0x2182) || 3958 (c >= 0x3041 && c <= 0x3094) || 3959 (c >= 0x30A1 && c <= 0x30FA) || 3960 (c >= 0x3105 && c <= 0x312C) || 3961 (c >= 0xAC00 && c <= 0xD7A3)) 3962 return true; // BaseChar 3963 if ((c >= 0x4e00 && c <= 0x9fa5) || 3964 c == 0x3007 || 3965 (c >= 0x3021 && c <= 0x3029)) 3966 return true; // Ideographic 3967 return false; 3968 } 3969 3970 /** 3971 * Indicates whether the specified Unicode character matches the Digit 3972 * production. 3973 */ isDigit(int c)3974 public static boolean isDigit(int c) 3975 { 3976 return ((c >= 0x0030 && c <= 0x0039) || 3977 (c >= 0x0660 && c <= 0x0669) || 3978 (c >= 0x06F0 && c <= 0x06F9) || 3979 (c >= 0x0966 && c <= 0x096F) || 3980 (c >= 0x09E6 && c <= 0x09EF) || 3981 (c >= 0x0A66 && c <= 0x0A6F) || 3982 (c >= 0x0AE6 && c <= 0x0AEF) || 3983 (c >= 0x0B66 && c <= 0x0B6F) || 3984 (c >= 0x0BE7 && c <= 0x0BEF) || 3985 (c >= 0x0C66 && c <= 0x0C6F) || 3986 (c >= 0x0CE6 && c <= 0x0CEF) || 3987 (c >= 0x0D66 && c <= 0x0D6F) || 3988 (c >= 0x0E50 && c <= 0x0E59) || 3989 (c >= 0x0ED0 && c <= 0x0ED9) || 3990 (c >= 0x0F20 && c <= 0x0F29)); 3991 } 3992 3993 /** 3994 * Indicates whether the specified Unicode character matches the 3995 * CombiningChar production. 3996 */ isCombiningChar(int c)3997 public static boolean isCombiningChar(int c) 3998 { 3999 return ((c >= 0x0300 && c <= 0x0345) || 4000 (c >= 0x0360 && c <= 0x0361) || 4001 (c >= 0x0483 && c <= 0x0486) || 4002 (c >= 0x0591 && c <= 0x05A1) || 4003 (c >= 0x05A3 && c <= 0x05B9) || 4004 (c >= 0x05BB && c <= 0x05BD) || 4005 c == 0x05BF || 4006 (c >= 0x05C1 && c <= 0x05C2) || 4007 c == 0x05C4 || 4008 (c >= 0x064B && c <= 0x0652) || 4009 c == 0x0670 || 4010 (c >= 0x06D6 && c <= 0x06DC) || 4011 (c >= 0x06DD && c <= 0x06DF) || 4012 (c >= 0x06E0 && c <= 0x06E4) || 4013 (c >= 0x06E7 && c <= 0x06E8) || 4014 (c >= 0x06EA && c <= 0x06ED) || 4015 (c >= 0x0901 && c <= 0x0903) || 4016 c == 0x093C || 4017 (c >= 0x093E && c <= 0x094C) || 4018 c == 0x094D || 4019 (c >= 0x0951 && c <= 0x0954) || 4020 (c >= 0x0962 && c <= 0x0963) || 4021 (c >= 0x0981 && c <= 0x0983) || 4022 c == 0x09BC || 4023 c == 0x09BE || 4024 c == 0x09BF || 4025 (c >= 0x09C0 && c <= 0x09C4) || 4026 (c >= 0x09C7 && c <= 0x09C8) || 4027 (c >= 0x09CB && c <= 0x09CD) || 4028 c == 0x09D7 || 4029 (c >= 0x09E2 && c <= 0x09E3) || 4030 c == 0x0A02 || 4031 c == 0x0A3C || 4032 c == 0x0A3E || 4033 c == 0x0A3F || 4034 (c >= 0x0A40 && c <= 0x0A42) || 4035 (c >= 0x0A47 && c <= 0x0A48) || 4036 (c >= 0x0A4B && c <= 0x0A4D) || 4037 (c >= 0x0A70 && c <= 0x0A71) || 4038 (c >= 0x0A81 && c <= 0x0A83) || 4039 c == 0x0ABC || 4040 (c >= 0x0ABE && c <= 0x0AC5) || 4041 (c >= 0x0AC7 && c <= 0x0AC9) || 4042 (c >= 0x0ACB && c <= 0x0ACD) || 4043 (c >= 0x0B01 && c <= 0x0B03) || 4044 c == 0x0B3C || 4045 (c >= 0x0B3E && c <= 0x0B43) || 4046 (c >= 0x0B47 && c <= 0x0B48) || 4047 (c >= 0x0B4B && c <= 0x0B4D) || 4048 (c >= 0x0B56 && c <= 0x0B57) || 4049 (c >= 0x0B82 && c <= 0x0B83) || 4050 (c >= 0x0BBE && c <= 0x0BC2) || 4051 (c >= 0x0BC6 && c <= 0x0BC8) || 4052 (c >= 0x0BCA && c <= 0x0BCD) || 4053 c == 0x0BD7 || 4054 (c >= 0x0C01 && c <= 0x0C03) || 4055 (c >= 0x0C3E && c <= 0x0C44) || 4056 (c >= 0x0C46 && c <= 0x0C48) || 4057 (c >= 0x0C4A && c <= 0x0C4D) || 4058 (c >= 0x0C55 && c <= 0x0C56) || 4059 (c >= 0x0C82 && c <= 0x0C83) || 4060 (c >= 0x0CBE && c <= 0x0CC4) || 4061 (c >= 0x0CC6 && c <= 0x0CC8) || 4062 (c >= 0x0CCA && c <= 0x0CCD) || 4063 (c >= 0x0CD5 && c <= 0x0CD6) || 4064 (c >= 0x0D02 && c <= 0x0D03) || 4065 (c >= 0x0D3E && c <= 0x0D43) || 4066 (c >= 0x0D46 && c <= 0x0D48) || 4067 (c >= 0x0D4A && c <= 0x0D4D) || 4068 c == 0x0D57 || 4069 c == 0x0E31 || 4070 (c >= 0x0E34 && c <= 0x0E3A) || 4071 (c >= 0x0E47 && c <= 0x0E4E) || 4072 c == 0x0EB1 || 4073 (c >= 0x0EB4 && c <= 0x0EB9) || 4074 (c >= 0x0EBB && c <= 0x0EBC) || 4075 (c >= 0x0EC8 && c <= 0x0ECD) || 4076 (c >= 0x0F18 && c <= 0x0F19) || 4077 c == 0x0F35 || 4078 c == 0x0F37 || 4079 c == 0x0F39 || 4080 c == 0x0F3E || 4081 c == 0x0F3F || 4082 (c >= 0x0F71 && c <= 0x0F84) || 4083 (c >= 0x0F86 && c <= 0x0F8B) || 4084 (c >= 0x0F90 && c <= 0x0F95) || 4085 c == 0x0F97 || 4086 (c >= 0x0F99 && c <= 0x0FAD) || 4087 (c >= 0x0FB1 && c <= 0x0FB7) || 4088 c == 0x0FB9 || 4089 (c >= 0x20D0 && c <= 0x20DC) || 4090 c == 0x20E1 || 4091 (c >= 0x302A && c <= 0x302F) || 4092 c == 0x3099 || 4093 c == 0x309A); 4094 } 4095 4096 /** 4097 * Indicates whether the specified Unicode character matches the Extender 4098 * production. 4099 */ isExtender(int c)4100 public static boolean isExtender(int c) 4101 { 4102 return (c == 0x00B7 || 4103 c == 0x02D0 || 4104 c == 0x02D1 || 4105 c == 0x0387 || 4106 c == 0x0640 || 4107 c == 0x0E46 || 4108 c == 0x0EC6 || 4109 c == 0x3005 || 4110 (c >= 0x3031 && c <= 0x3035) || 4111 (c >= 0x309D && c <= 0x309E) || 4112 (c >= 0x30FC && c <= 0x30FE)); 4113 } 4114 4115 /** 4116 * Indicates whether the specified Unicode character matches the Char 4117 * production. 4118 */ isChar(int c)4119 public static boolean isChar(int c) 4120 { 4121 return (c >= 0x20 && c < 0xd800) || 4122 (c >= 0xe00 && c < 0xfffe) || 4123 (c >= 0x10000 && c < 0x110000) || 4124 c == 0xa || c == 0x9 || c == 0xd; 4125 } 4126 4127 /** 4128 * Interns the specified text or not, depending on the value of 4129 * stringInterning. 4130 */ intern(String text)4131 private String intern(String text) 4132 { 4133 return stringInterning ? text.intern() : text; 4134 } 4135 4136 /** 4137 * Report a parsing error. 4138 */ error(String message)4139 private void error(String message) 4140 throws XMLStreamException 4141 { 4142 error(message, null); 4143 } 4144 4145 /** 4146 * Report a parsing error. 4147 */ error(String message, Object info)4148 private void error(String message, Object info) 4149 throws XMLStreamException 4150 { 4151 if (info != null) 4152 { 4153 if (info instanceof String) 4154 message += ": \"" + ((String) info) + "\""; 4155 else if (info instanceof Character) 4156 message += ": '" + ((Character) info) + "'"; 4157 } 4158 throw new XMLStreamException(message); 4159 } 4160 4161 /** 4162 * Perform validation of a start-element event. 4163 */ validateStartElement(String elementName)4164 private void validateStartElement(String elementName) 4165 throws XMLStreamException 4166 { 4167 if (currentContentModel == null) 4168 { 4169 // root element 4170 // VC: Root Element Type 4171 if (!elementName.equals(doctype.rootName)) 4172 error("root element name must match name in DTD"); 4173 return; 4174 } 4175 // VC: Element Valid 4176 switch (currentContentModel.type) 4177 { 4178 case ContentModel.EMPTY: 4179 error("child element found in empty element", elementName); 4180 break; 4181 case ContentModel.ELEMENT: 4182 LinkedList ctx = (LinkedList) validationStack.getLast(); 4183 ctx.add(elementName); 4184 break; 4185 case ContentModel.MIXED: 4186 MixedContentModel mm = (MixedContentModel) currentContentModel; 4187 if (!mm.containsName(elementName)) 4188 error("illegal element for content model", elementName); 4189 break; 4190 } 4191 } 4192 4193 /** 4194 * Perform validation of an end-element event. 4195 */ validateEndElement()4196 private void validateEndElement() 4197 throws XMLStreamException 4198 { 4199 if (currentContentModel == null) 4200 { 4201 // root element 4202 // VC: IDREF 4203 if (!idrefs.containsAll(ids)) 4204 error("IDREF values must match the value of some ID attribute"); 4205 return; 4206 } 4207 // VC: Element Valid 4208 switch (currentContentModel.type) 4209 { 4210 case ContentModel.ELEMENT: 4211 LinkedList ctx = (LinkedList) validationStack.getLast(); 4212 ElementContentModel ecm = (ElementContentModel) currentContentModel; 4213 validateElementContent(ecm, ctx); 4214 break; 4215 } 4216 } 4217 4218 /** 4219 * Perform validation of character data. 4220 */ validatePCData(String text)4221 private void validatePCData(String text) 4222 throws XMLStreamException 4223 { 4224 // VC: Element Valid 4225 switch (currentContentModel.type) 4226 { 4227 case ContentModel.EMPTY: 4228 error("character data found in empty element", text); 4229 break; 4230 case ContentModel.ELEMENT: 4231 boolean white = true; 4232 int len = text.length(); 4233 for (int i = 0; i < len; i++) 4234 { 4235 char c = text.charAt(i); 4236 if (c != ' ' && c != '\t' && c != '\n' && c != '\r') 4237 { 4238 white = false; 4239 break; 4240 } 4241 } 4242 if (!white) 4243 error("character data found in element with element content", text); 4244 else if (xmlStandalone == Boolean.TRUE && currentContentModel.external) 4245 // VC: Standalone Document Declaration 4246 error("whitespace in element content of externally declared " + 4247 "element in standalone document"); 4248 break; 4249 } 4250 } 4251 4252 /** 4253 * Validates the specified validation context (list of child elements) 4254 * against the element content model for the current element. 4255 */ validateElementContent(ElementContentModel model, LinkedList children)4256 private void validateElementContent(ElementContentModel model, 4257 LinkedList children) 4258 throws XMLStreamException 4259 { 4260 // Use regular expression 4261 CPStringBuilder buf = new CPStringBuilder(); 4262 for (Iterator i = children.iterator(); i.hasNext(); ) 4263 { 4264 buf.append((String) i.next()); 4265 buf.append(' '); 4266 } 4267 String c = buf.toString(); 4268 String regex = createRegularExpression(model); 4269 if (!c.matches(regex)) 4270 error("element content "+model.text+" does not match expression "+regex, c); 4271 } 4272 4273 /** 4274 * Creates the regular expression used to validate an element content 4275 * model. 4276 */ createRegularExpression(ElementContentModel model)4277 private String createRegularExpression(ElementContentModel model) 4278 { 4279 if (model.regex == null) 4280 { 4281 CPStringBuilder buf = new CPStringBuilder(); 4282 buf.append('('); 4283 for (Iterator i = model.contentParticles.iterator(); i.hasNext(); ) 4284 { 4285 ContentParticle cp = (ContentParticle) i.next(); 4286 if (cp.content instanceof String) 4287 { 4288 buf.append('('); 4289 buf.append((String) cp.content); 4290 buf.append(' '); 4291 buf.append(')'); 4292 if (cp.max == -1) 4293 { 4294 if (cp.min == 0) 4295 buf.append('*'); 4296 else 4297 buf.append('+'); 4298 } 4299 else if (cp.min == 0) 4300 buf.append('?'); 4301 } 4302 else 4303 { 4304 ElementContentModel ecm = (ElementContentModel) cp.content; 4305 buf.append(createRegularExpression(ecm)); 4306 } 4307 if (model.or && i.hasNext()) 4308 buf.append('|'); 4309 } 4310 buf.append(')'); 4311 if (model.max == -1) 4312 { 4313 if (model.min == 0) 4314 buf.append('*'); 4315 else 4316 buf.append('+'); 4317 } 4318 else if (model.min == 0) 4319 buf.append('?'); 4320 model.regex = buf.toString(); 4321 } 4322 return model.regex; 4323 } 4324 4325 /** 4326 * Performs validation of a document type declaration event. 4327 */ validateDoctype()4328 void validateDoctype() 4329 throws XMLStreamException 4330 { 4331 for (Iterator i = doctype.entityIterator(); i.hasNext(); ) 4332 { 4333 Map.Entry entry = (Map.Entry) i.next(); 4334 Object entity = entry.getValue(); 4335 if (entity instanceof ExternalIds) 4336 { 4337 ExternalIds ids = (ExternalIds) entity; 4338 if (ids.notationName != null) 4339 { 4340 // VC: Notation Declared 4341 ExternalIds notation = doctype.getNotation(ids.notationName); 4342 if (notation == null) 4343 error("Notation name must match the declared name of a " + 4344 "notation", ids.notationName); 4345 } 4346 } 4347 } 4348 } 4349 4350 /** 4351 * Simple test harness for reading an XML file. 4352 * args[0] is the filename of the XML file 4353 * If args[1] is "-x", enable XInclude processing 4354 */ main(String[] args)4355 public static void main(String[] args) 4356 throws Exception 4357 { 4358 boolean validating = false; 4359 boolean namespaceAware = false; 4360 boolean xIncludeAware = false; 4361 int pos = 0; 4362 while (pos < args.length && args[pos].startsWith("-")) 4363 { 4364 if ("-x".equals(args[pos])) 4365 xIncludeAware = true; 4366 else if ("-v".equals(args[pos])) 4367 validating = true; 4368 else if ("-n".equals(args[pos])) 4369 namespaceAware = true; 4370 pos++; 4371 } 4372 if (pos >= args.length) 4373 { 4374 System.out.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]"); 4375 System.out.println("\t-n: use namespace aware mode"); 4376 System.out.println("\t-v: use validating parser"); 4377 System.out.println("\t-x: use XInclude aware mode"); 4378 System.exit(2); 4379 } 4380 while (pos < args.length) 4381 { 4382 XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]), 4383 absolutize(null, args[pos]), 4384 validating, // validating 4385 namespaceAware, // namespaceAware 4386 true, // coalescing, 4387 true, // replaceERefs 4388 true, // externalEntities 4389 true, // supportDTD 4390 true, // baseAware 4391 true, // stringInterning 4392 true, // extendedEventTypes 4393 null, 4394 null); 4395 XMLStreamReader reader = p; 4396 if (xIncludeAware) 4397 reader = new XIncludeFilter(p, args[pos], true, true, true); 4398 try 4399 { 4400 int event; 4401 //do 4402 while (reader.hasNext()) 4403 { 4404 event = reader.next(); 4405 Location loc = reader.getLocation(); 4406 System.out.print(loc.getLineNumber() + ":" + 4407 loc.getColumnNumber() + " "); 4408 switch (event) 4409 { 4410 case XMLStreamConstants.START_DOCUMENT: 4411 System.out.println("START_DOCUMENT version=" + 4412 reader.getVersion() + 4413 " encoding=" + 4414 reader.getEncoding()); 4415 break; 4416 case XMLStreamConstants.END_DOCUMENT: 4417 System.out.println("END_DOCUMENT"); 4418 break; 4419 case XMLStreamConstants.START_ELEMENT: 4420 System.out.println("START_ELEMENT " + 4421 reader.getName()); 4422 int l = reader.getNamespaceCount(); 4423 for (int i = 0; i < l; i++) 4424 System.out.println("\tnamespace " + 4425 reader.getNamespacePrefix(i) + "='" + 4426 reader.getNamespaceURI(i)+"'"); 4427 l = reader.getAttributeCount(); 4428 for (int i = 0; i < l; i++) 4429 System.out.println("\tattribute " + 4430 reader.getAttributeName(i) + "='" + 4431 reader.getAttributeValue(i) + "'"); 4432 break; 4433 case XMLStreamConstants.END_ELEMENT: 4434 System.out.println("END_ELEMENT " + reader.getName()); 4435 break; 4436 case XMLStreamConstants.CHARACTERS: 4437 System.out.println("CHARACTERS '" + 4438 encodeText(reader.getText()) + "'"); 4439 break; 4440 case XMLStreamConstants.CDATA: 4441 System.out.println("CDATA '" + 4442 encodeText(reader.getText()) + "'"); 4443 break; 4444 case XMLStreamConstants.SPACE: 4445 System.out.println("SPACE '" + 4446 encodeText(reader.getText()) + "'"); 4447 break; 4448 case XMLStreamConstants.DTD: 4449 System.out.println("DTD " + reader.getText()); 4450 break; 4451 case XMLStreamConstants.ENTITY_REFERENCE: 4452 System.out.println("ENTITY_REFERENCE " + reader.getText()); 4453 break; 4454 case XMLStreamConstants.COMMENT: 4455 System.out.println("COMMENT '" + 4456 encodeText(reader.getText()) + "'"); 4457 break; 4458 case XMLStreamConstants.PROCESSING_INSTRUCTION: 4459 System.out.println("PROCESSING_INSTRUCTION " + 4460 reader.getPITarget() + " " + 4461 reader.getPIData()); 4462 break; 4463 case START_ENTITY: 4464 System.out.println("START_ENTITY " + reader.getText()); 4465 break; 4466 case END_ENTITY: 4467 System.out.println("END_ENTITY " + reader.getText()); 4468 break; 4469 default: 4470 System.out.println("Unknown event: " + event); 4471 } 4472 } 4473 } 4474 catch (XMLStreamException e) 4475 { 4476 Location l = reader.getLocation(); 4477 System.out.println("At line "+l.getLineNumber()+ 4478 ", column "+l.getColumnNumber()+ 4479 " of "+l.getSystemId()); 4480 throw e; 4481 } 4482 pos++; 4483 } 4484 } 4485 4486 /** 4487 * Escapes control characters in the specified text. For debugging. 4488 */ encodeText(String text)4489 private static String encodeText(String text) 4490 { 4491 CPStringBuilder b = new CPStringBuilder(); 4492 int len = text.length(); 4493 for (int i = 0; i < len; i++) 4494 { 4495 char c = text.charAt(i); 4496 switch (c) 4497 { 4498 case '\t': 4499 b.append("\\t"); 4500 break; 4501 case '\n': 4502 b.append("\\n"); 4503 break; 4504 case '\r': 4505 b.append("\\r"); 4506 break; 4507 default: 4508 b.append(c); 4509 } 4510 } 4511 return b.toString(); 4512 } 4513 4514 /** 4515 * An attribute instance. 4516 */ 4517 class Attribute 4518 { 4519 4520 /** 4521 * Attribute name. 4522 */ 4523 final String name; 4524 4525 /** 4526 * Attribute type as declared in the DTD, or CDATA otherwise. 4527 */ 4528 final String type; 4529 4530 /** 4531 * Whether the attribute was specified or defaulted. 4532 */ 4533 final boolean specified; 4534 4535 /** 4536 * The attribute value. 4537 */ 4538 final String value; 4539 4540 /** 4541 * The namespace prefix. 4542 */ 4543 final String prefix; 4544 4545 /** 4546 * The namespace local-name. 4547 */ 4548 final String localName; 4549 Attribute(String name, String type, boolean specified, String value)4550 Attribute(String name, String type, boolean specified, String value) 4551 { 4552 this.name = name; 4553 this.type = type; 4554 this.specified = specified; 4555 this.value = value; 4556 int ci = name.indexOf(':'); 4557 if (ci == -1) 4558 { 4559 prefix = null; 4560 localName = intern(name); 4561 } 4562 else 4563 { 4564 prefix = intern(name.substring(0, ci)); 4565 localName = intern(name.substring(ci + 1)); 4566 } 4567 } 4568 equals(Object other)4569 public boolean equals(Object other) 4570 { 4571 if (other instanceof Attribute) 4572 { 4573 Attribute a = (Attribute) other; 4574 if (namespaceAware) 4575 { 4576 if (!a.localName.equals(localName)) 4577 return false; 4578 String auri = getNamespaceURI(a.prefix); 4579 String uri = getNamespaceURI(prefix); 4580 if (uri == null && (auri == null || 4581 (input.xml11 && "".equals(auri)))) 4582 return true; 4583 if (uri != null) 4584 { 4585 if ("".equals(uri) && input.xml11 && "".equals(auri)) 4586 return true; 4587 return uri.equals(auri); 4588 } 4589 return false; 4590 } 4591 else 4592 return a.name.equals(name); 4593 } 4594 return false; 4595 } 4596 toString()4597 public String toString() 4598 { 4599 CPStringBuilder buf = new CPStringBuilder(getClass().getName()); 4600 buf.append('['); 4601 buf.append("name="); 4602 buf.append(name); 4603 if (value != null) 4604 { 4605 buf.append(",value="); 4606 buf.append(value); 4607 } 4608 if (type != null) 4609 { 4610 buf.append(",type="); 4611 buf.append(type); 4612 } 4613 if (specified) 4614 buf.append(",specified"); 4615 buf.append(']'); 4616 return buf.toString(); 4617 } 4618 4619 } 4620 4621 /** 4622 * Representation of a DTD. 4623 */ 4624 class Doctype 4625 { 4626 4627 /** 4628 * Name of the root element. 4629 */ 4630 final String rootName; 4631 4632 /** 4633 * Public ID, if any, of external subset. 4634 */ 4635 final String publicId; 4636 4637 /** 4638 * System ID (URL), if any, of external subset. 4639 */ 4640 final String systemId; 4641 4642 /** 4643 * Map of element names to content models. 4644 */ 4645 private final LinkedHashMap elements = new LinkedHashMap(); 4646 4647 /** 4648 * Map of element names to maps of attribute declarations. 4649 */ 4650 private final LinkedHashMap attlists = new LinkedHashMap(); 4651 4652 /** 4653 * Map of entity names to entities (String or ExternalIds). 4654 */ 4655 private final LinkedHashMap entities = new LinkedHashMap(); 4656 4657 /** 4658 * Map of notation names to ExternalIds. 4659 */ 4660 private final LinkedHashMap notations = new LinkedHashMap(); 4661 4662 /** 4663 * Map of anonymous keys to comments. 4664 */ 4665 private final LinkedHashMap comments = new LinkedHashMap(); 4666 4667 /** 4668 * Map of anonymous keys to processing instructions (String[2] 4669 * containing {target, data}). 4670 */ 4671 private final LinkedHashMap pis = new LinkedHashMap(); 4672 4673 /** 4674 * List of keys to all markup entries in the DTD. 4675 */ 4676 private final LinkedList entries = new LinkedList(); 4677 4678 /** 4679 * Set of the entities defined in the external subset. 4680 */ 4681 private final HashSet externalEntities = new HashSet(); 4682 4683 /** 4684 * Set of the notations defined in the external subset. 4685 */ 4686 private final HashSet externalNotations = new HashSet(); 4687 4688 /** 4689 * Counter for making anonymous keys. 4690 */ 4691 private int anon = 1; 4692 4693 /** 4694 * Constructor. 4695 */ Doctype(String rootName, String publicId, String systemId)4696 Doctype(String rootName, String publicId, String systemId) 4697 { 4698 this.rootName = rootName; 4699 this.publicId = publicId; 4700 this.systemId = systemId; 4701 } 4702 4703 /** 4704 * Adds an element declaration. 4705 * @param name the element name 4706 * @param text the content model text 4707 * @param model the parsed content model 4708 */ addElementDecl(String name, String text, ContentModel model)4709 void addElementDecl(String name, String text, ContentModel model) 4710 { 4711 if (elements.containsKey(name)) 4712 return; 4713 model.text = text; 4714 model.external = (inputStack.size() != 1); 4715 elements.put(name, model); 4716 entries.add("E" + name); 4717 } 4718 4719 /** 4720 * Adds an attribute declaration. 4721 * @param ename the element name 4722 * @param aname the attribute name 4723 * @param decl the attribute declaration details 4724 */ addAttributeDecl(String ename, String aname, AttributeDecl decl)4725 void addAttributeDecl(String ename, String aname, AttributeDecl decl) 4726 { 4727 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); 4728 if (attlist == null) 4729 { 4730 attlist = new LinkedHashMap(); 4731 attlists.put(ename, attlist); 4732 } 4733 else if (attlist.containsKey(aname)) 4734 return; 4735 attlist.put(aname, decl); 4736 String key = "A" + ename; 4737 if (!entries.contains(key)) 4738 entries.add(key); 4739 } 4740 4741 /** 4742 * Adds an entity declaration. 4743 * @param name the entity name 4744 * @param text the entity replacement text 4745 * @param inExternalSubset if we are in the exernal subset 4746 */ addEntityDecl(String name, String text, boolean inExternalSubset)4747 void addEntityDecl(String name, String text, boolean inExternalSubset) 4748 { 4749 if (entities.containsKey(name)) 4750 return; 4751 entities.put(name, text); 4752 entries.add("e" + name); 4753 if (inExternalSubset) 4754 externalEntities.add(name); 4755 } 4756 4757 /** 4758 * Adds an entity declaration. 4759 * @param name the entity name 4760 * @param ids the external IDs 4761 * @param inExternalSubset if we are in the exernal subset 4762 */ addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)4763 void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset) 4764 { 4765 if (entities.containsKey(name)) 4766 return; 4767 entities.put(name, ids); 4768 entries.add("e" + name); 4769 if (inExternalSubset) 4770 externalEntities.add(name); 4771 } 4772 4773 /** 4774 * Adds a notation declaration. 4775 * @param name the notation name 4776 * @param ids the external IDs 4777 * @param inExternalSubset if we are in the exernal subset 4778 */ addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)4779 void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset) 4780 { 4781 if (notations.containsKey(name)) 4782 return; 4783 notations.put(name, ids); 4784 entries.add("n" + name); 4785 if (inExternalSubset) 4786 externalNotations.add(name); 4787 } 4788 4789 /** 4790 * Adds a comment. 4791 */ addComment(String text)4792 void addComment(String text) 4793 { 4794 String key = Integer.toString(anon++); 4795 comments.put(key, text); 4796 entries.add("c" + key); 4797 } 4798 4799 /** 4800 * Adds a processing instruction. 4801 */ addPI(String target, String data)4802 void addPI(String target, String data) 4803 { 4804 String key = Integer.toString(anon++); 4805 pis.put(key, new String[] {target, data}); 4806 entries.add("p" + key); 4807 } 4808 4809 /** 4810 * Returns the content model for the specified element. 4811 * @param name the element name 4812 */ getElementModel(String name)4813 ContentModel getElementModel(String name) 4814 { 4815 return (ContentModel) elements.get(name); 4816 } 4817 4818 /** 4819 * Returns the attribute definition for the given attribute 4820 * @param ename the element name 4821 * @param aname the attribute name 4822 */ getAttributeDecl(String ename, String aname)4823 AttributeDecl getAttributeDecl(String ename, String aname) 4824 { 4825 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); 4826 return (attlist == null) ? null : (AttributeDecl) attlist.get(aname); 4827 } 4828 4829 /** 4830 * Indicates whether the specified attribute was declared in the DTD. 4831 * @param ename the element name 4832 * @param aname the attribute name 4833 */ isAttributeDeclared(String ename, String aname)4834 boolean isAttributeDeclared(String ename, String aname) 4835 { 4836 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); 4837 return (attlist == null) ? false : attlist.containsKey(aname); 4838 } 4839 4840 /** 4841 * Returns an iterator over the entries in the attribute list for the 4842 * given element. 4843 * @param ename the element name 4844 */ attlistIterator(String ename)4845 Iterator attlistIterator(String ename) 4846 { 4847 LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename); 4848 return (attlist == null) ? Collections.EMPTY_LIST.iterator() : 4849 attlist.entrySet().iterator(); 4850 } 4851 4852 /** 4853 * Returns the entity (String or ExternalIds) for the given entity name. 4854 */ getEntity(String name)4855 Object getEntity(String name) 4856 { 4857 return entities.get(name); 4858 } 4859 4860 /** 4861 * Indicates whether the specified entity was declared in the external 4862 * subset. 4863 */ isEntityExternal(String name)4864 boolean isEntityExternal(String name) 4865 { 4866 return externalEntities.contains(name); 4867 } 4868 4869 /** 4870 * Returns an iterator over the entity map entries. 4871 */ entityIterator()4872 Iterator entityIterator() 4873 { 4874 return entities.entrySet().iterator(); 4875 } 4876 4877 /** 4878 * Returns the notation IDs for the given notation name. 4879 */ getNotation(String name)4880 ExternalIds getNotation(String name) 4881 { 4882 return (ExternalIds) notations.get(name); 4883 } 4884 4885 /** 4886 * Indicates whether the specified notation was declared in the external 4887 * subset. 4888 */ isNotationExternal(String name)4889 boolean isNotationExternal(String name) 4890 { 4891 return externalNotations.contains(name); 4892 } 4893 4894 /** 4895 * Returns the comment associated with the specified (anonymous) key. 4896 */ getComment(String key)4897 String getComment(String key) 4898 { 4899 return (String) comments.get(key); 4900 } 4901 4902 /** 4903 * Returns the processing instruction associated with the specified 4904 * (anonymous) key. 4905 */ getPI(String key)4906 String[] getPI(String key) 4907 { 4908 return (String[]) pis.get(key); 4909 } 4910 4911 /** 4912 * Returns an iterator over the keys of the markup entries in this DTD, 4913 * in the order declared. 4914 */ entryIterator()4915 Iterator entryIterator() 4916 { 4917 return entries.iterator(); 4918 } 4919 4920 } 4921 4922 /** 4923 * Combination of an ExternalID and an optional NDataDecl. 4924 */ 4925 class ExternalIds 4926 { 4927 4928 /** 4929 * The public ID. 4930 */ 4931 String publicId; 4932 4933 /** 4934 * The system ID. 4935 */ 4936 String systemId; 4937 4938 /** 4939 * The notation name declared with the NDATA keyword. 4940 */ 4941 String notationName; 4942 } 4943 4944 /** 4945 * A content model. 4946 */ 4947 abstract class ContentModel 4948 { 4949 static final int EMPTY = 0; 4950 static final int ANY = 1; 4951 static final int ELEMENT = 2; 4952 static final int MIXED = 3; 4953 4954 int min; 4955 int max; 4956 final int type; 4957 String text; 4958 boolean external; 4959 ContentModel(int type)4960 ContentModel(int type) 4961 { 4962 this.type = type; 4963 min = 1; 4964 max = 1; 4965 } 4966 4967 } 4968 4969 /** 4970 * The EMPTY content model. 4971 */ 4972 class EmptyContentModel 4973 extends ContentModel 4974 { 4975 EmptyContentModel()4976 EmptyContentModel() 4977 { 4978 super(ContentModel.EMPTY); 4979 min = 0; 4980 max = 0; 4981 } 4982 4983 } 4984 4985 /** 4986 * The ANY content model. 4987 */ 4988 class AnyContentModel 4989 extends ContentModel 4990 { 4991 AnyContentModel()4992 AnyContentModel() 4993 { 4994 super(ContentModel.ANY); 4995 min = 0; 4996 max = -1; 4997 } 4998 4999 } 5000 5001 /** 5002 * An element content model. 5003 */ 5004 class ElementContentModel 5005 extends ContentModel 5006 { 5007 5008 LinkedList contentParticles; 5009 boolean or; 5010 String regex; // regular expression cache 5011 ElementContentModel()5012 ElementContentModel() 5013 { 5014 super(ContentModel.ELEMENT); 5015 contentParticles = new LinkedList(); 5016 } 5017 addContentParticle(ContentParticle cp)5018 void addContentParticle(ContentParticle cp) 5019 { 5020 contentParticles.add(cp); 5021 } 5022 5023 } 5024 5025 class ContentParticle 5026 { 5027 5028 int min = 1; 5029 int max = 1; 5030 Object content; // Name (String) or ElementContentModel 5031 5032 } 5033 5034 /** 5035 * A mixed content model. 5036 */ 5037 class MixedContentModel 5038 extends ContentModel 5039 { 5040 5041 private HashSet names; 5042 MixedContentModel()5043 MixedContentModel() 5044 { 5045 super(ContentModel.MIXED); 5046 names = new HashSet(); 5047 } 5048 addName(String name)5049 void addName(String name) 5050 { 5051 names.add(name); 5052 } 5053 containsName(String name)5054 boolean containsName(String name) 5055 { 5056 return names.contains(name); 5057 } 5058 5059 } 5060 5061 /** 5062 * An attribute definition. 5063 */ 5064 class AttributeDecl 5065 { 5066 5067 /** 5068 * The attribute type (CDATA, ID, etc). 5069 */ 5070 final String type; 5071 5072 /** 5073 * The default value. 5074 */ 5075 final String value; 5076 5077 /** 5078 * The value type (#FIXED, #IMPLIED, etc). 5079 */ 5080 final int valueType; 5081 5082 /** 5083 * The enumeration text. 5084 */ 5085 final String enumeration; 5086 5087 /** 5088 * The enumeration tokens. 5089 */ 5090 final HashSet values; 5091 5092 /** 5093 * Whether this attribute declaration occurred in the external subset. 5094 */ 5095 final boolean external; 5096 AttributeDecl(String type, String value, int valueType, String enumeration, HashSet values, boolean external)5097 AttributeDecl(String type, String value, 5098 int valueType, String enumeration, 5099 HashSet values, boolean external) 5100 { 5101 this.type = type; 5102 this.value = value; 5103 this.valueType = valueType; 5104 this.enumeration = enumeration; 5105 this.values = values; 5106 this.external = external; 5107 } 5108 5109 } 5110 5111 /** 5112 * An XML input source. 5113 */ 5114 static class Input 5115 implements Location 5116 { 5117 5118 int line = 1, markLine; 5119 int column, markColumn; 5120 int offset, markOffset; 5121 final String publicId, systemId, name; 5122 final boolean report; // report start- and end-entity 5123 final boolean normalize; // normalize CR, etc to LF 5124 5125 InputStream in; 5126 Reader reader; 5127 UnicodeReader unicodeReader; 5128 boolean initialized; 5129 boolean encodingDetected; 5130 String inputEncoding; 5131 boolean xml11; 5132 Input(InputStream in, Reader reader, String publicId, String systemId, String name, String inputEncoding, boolean report, boolean normalize)5133 Input(InputStream in, Reader reader, String publicId, String systemId, 5134 String name, String inputEncoding, boolean report, 5135 boolean normalize) 5136 { 5137 if (inputEncoding == null) 5138 inputEncoding = "UTF-8"; 5139 this.inputEncoding = inputEncoding; 5140 this.publicId = publicId; 5141 this.systemId = systemId; 5142 this.name = name; 5143 this.report = report; 5144 this.normalize = normalize; 5145 if (in != null) 5146 { 5147 if (reader != null) 5148 throw new IllegalStateException("both byte and char streams "+ 5149 "specified"); 5150 if (normalize) 5151 in = new CRLFInputStream(in); 5152 in = new BufferedInputStream(in); 5153 this.in = in; 5154 } 5155 else 5156 { 5157 this.reader = normalize ? new CRLFReader(reader) : reader; 5158 unicodeReader = new UnicodeReader(this.reader); 5159 } 5160 initialized = false; 5161 } 5162 5163 // -- Location -- 5164 getCharacterOffset()5165 public int getCharacterOffset() 5166 { 5167 return offset; 5168 } 5169 getColumnNumber()5170 public int getColumnNumber() 5171 { 5172 return column; 5173 } 5174 getLineNumber()5175 public int getLineNumber() 5176 { 5177 return line; 5178 } 5179 getPublicId()5180 public String getPublicId() 5181 { 5182 return publicId; 5183 } 5184 getSystemId()5185 public String getSystemId() 5186 { 5187 return systemId; 5188 } 5189 init()5190 void init() 5191 throws IOException 5192 { 5193 if (initialized) 5194 return; 5195 if (in != null) 5196 detectEncoding(); 5197 initialized = true; 5198 } 5199 mark(int len)5200 void mark(int len) 5201 throws IOException 5202 { 5203 markOffset = offset; 5204 markLine = line; 5205 markColumn = column; 5206 if (unicodeReader != null) 5207 unicodeReader.mark(len); 5208 else 5209 in.mark(len); 5210 } 5211 5212 /** 5213 * Character read. 5214 */ read()5215 int read() 5216 throws IOException 5217 { 5218 offset++; 5219 int ret = (unicodeReader != null) ? unicodeReader.read() : in.read(); 5220 if (normalize && 5221 (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028)))) 5222 { 5223 // Normalize CR etc to LF 5224 ret = 0x0a; 5225 } 5226 // Locator handling 5227 if (ret == 0x0a) 5228 { 5229 line++; 5230 column = 0; 5231 } 5232 else 5233 column++; 5234 return ret; 5235 } 5236 5237 /** 5238 * Block read. 5239 */ read(int[] b, int off, int len)5240 int read(int[] b, int off, int len) 5241 throws IOException 5242 { 5243 int ret; 5244 if (unicodeReader != null) 5245 { 5246 ret = unicodeReader.read(b, off, len); 5247 } 5248 else 5249 { 5250 byte[] b2 = new byte[len]; 5251 ret = in.read(b2, 0, len); 5252 if (ret != -1) 5253 { 5254 String s = new String(b2, 0, ret, inputEncoding); 5255 int[] c = UnicodeReader.toCodePointArray(s); 5256 ret = c.length; 5257 System.arraycopy(c, 0, b, off, ret); 5258 } 5259 } 5260 if (ret != -1) 5261 { 5262 // Locator handling 5263 for (int i = 0; i < ret; i++) 5264 { 5265 int c = b[off + i]; 5266 if (normalize && 5267 (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028)))) 5268 { 5269 // Normalize CR etc to LF 5270 c = 0x0a; 5271 b[off + i] = c; 5272 } 5273 if (c == 0x0a) 5274 { 5275 line++; 5276 column = 0; 5277 } 5278 else 5279 column++; 5280 } 5281 } 5282 return ret; 5283 } 5284 reset()5285 void reset() 5286 throws IOException 5287 { 5288 if (unicodeReader != null) 5289 unicodeReader.reset(); 5290 else 5291 in.reset(); 5292 offset = markOffset; 5293 line = markLine; 5294 column = markColumn; 5295 } 5296 5297 // Detection of input encoding 5298 5299 private static final int[] SIGNATURE_UCS_4_1234 = 5300 new int[] { 0x00, 0x00, 0x00, 0x3c }; 5301 private static final int[] SIGNATURE_UCS_4_4321 = 5302 new int[] { 0x3c, 0x00, 0x00, 0x00 }; 5303 private static final int[] SIGNATURE_UCS_4_2143 = 5304 new int[] { 0x00, 0x00, 0x3c, 0x00 }; 5305 private static final int[] SIGNATURE_UCS_4_3412 = 5306 new int[] { 0x00, 0x3c, 0x00, 0x00 }; 5307 private static final int[] SIGNATURE_UCS_2_12 = 5308 new int[] { 0xfe, 0xff }; 5309 private static final int[] SIGNATURE_UCS_2_21 = 5310 new int[] { 0xff, 0xfe }; 5311 private static final int[] SIGNATURE_UCS_2_12_NOBOM = 5312 new int[] { 0x00, 0x3c, 0x00, 0x3f }; 5313 private static final int[] SIGNATURE_UCS_2_21_NOBOM = 5314 new int[] { 0x3c, 0x00, 0x3f, 0x00 }; 5315 private static final int[] SIGNATURE_UTF_8 = 5316 new int[] { 0x3c, 0x3f, 0x78, 0x6d }; 5317 private static final int[] SIGNATURE_UTF_8_BOM = 5318 new int[] { 0xef, 0xbb, 0xbf }; 5319 5320 /** 5321 * Detect the input encoding. 5322 */ detectEncoding()5323 private void detectEncoding() 5324 throws IOException 5325 { 5326 int[] signature = new int[4]; 5327 in.mark(4); 5328 for (int i = 0; i < 4; i++) 5329 signature[i] = in.read(); 5330 in.reset(); 5331 5332 // 4-byte encodings 5333 if (equals(SIGNATURE_UCS_4_1234, signature)) 5334 { 5335 in.read(); 5336 in.read(); 5337 in.read(); 5338 in.read(); 5339 setInputEncoding("UTF-32BE"); 5340 encodingDetected = true; 5341 } 5342 else if (equals(SIGNATURE_UCS_4_4321, signature)) 5343 { 5344 in.read(); 5345 in.read(); 5346 in.read(); 5347 in.read(); 5348 setInputEncoding("UTF-32LE"); 5349 encodingDetected = true; 5350 } 5351 else if (equals(SIGNATURE_UCS_4_2143, signature) || 5352 equals(SIGNATURE_UCS_4_3412, signature)) 5353 throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering"); 5354 5355 // 2-byte encodings 5356 else if (equals(SIGNATURE_UCS_2_12, signature)) 5357 { 5358 in.read(); 5359 in.read(); 5360 setInputEncoding("UTF-16BE"); 5361 encodingDetected = true; 5362 } 5363 else if (equals(SIGNATURE_UCS_2_21, signature)) 5364 { 5365 in.read(); 5366 in.read(); 5367 setInputEncoding("UTF-16LE"); 5368 encodingDetected = true; 5369 } 5370 else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature)) 5371 { 5372 //setInputEncoding("UTF-16BE"); 5373 throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity"); 5374 } 5375 else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature)) 5376 { 5377 //setInputEncoding("UTF-16LE"); 5378 throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity"); 5379 } 5380 // ASCII-derived encodings 5381 else if (equals(SIGNATURE_UTF_8, signature)) 5382 { 5383 // UTF-8 input encoding implied, TextDecl 5384 } 5385 else if (equals(SIGNATURE_UTF_8_BOM, signature)) 5386 { 5387 in.read(); 5388 in.read(); 5389 in.read(); 5390 setInputEncoding("UTF-8"); 5391 encodingDetected = true; 5392 } 5393 } 5394 equals(int[] b1, int[] b2)5395 private static boolean equals(int[] b1, int[] b2) 5396 { 5397 for (int i = 0; i < b1.length; i++) 5398 { 5399 if (b1[i] != b2[i]) 5400 return false; 5401 } 5402 return true; 5403 } 5404 setInputEncoding(String encoding)5405 void setInputEncoding(String encoding) 5406 throws IOException 5407 { 5408 if (encoding.equals(inputEncoding)) 5409 return; 5410 if ("UTF-16".equalsIgnoreCase(encoding) && 5411 inputEncoding.startsWith("UTF-16")) 5412 return; 5413 if (encodingDetected) 5414 throw new UnsupportedEncodingException("document is not in its " + 5415 "declared encoding " + 5416 inputEncoding + 5417 ": " + encoding); 5418 inputEncoding = encoding; 5419 finalizeEncoding(); 5420 } 5421 finalizeEncoding()5422 void finalizeEncoding() 5423 throws IOException 5424 { 5425 if (reader != null) 5426 return; 5427 reader = new BufferedReader(new InputStreamReader(in, inputEncoding)); 5428 unicodeReader = new UnicodeReader(reader); 5429 mark(1); 5430 } 5431 5432 } 5433 5434 } 5435