1 /* 2 * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDDescription; 25 import com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException; 26 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 27 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 28 import com.sun.org.apache.xerces.internal.util.NamespaceSupport; 29 import com.sun.org.apache.xerces.internal.util.XMLChar; 30 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 31 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 32 import com.sun.org.apache.xerces.internal.xni.Augmentations; 33 import com.sun.org.apache.xerces.internal.xni.NamespaceContext; 34 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 35 import com.sun.org.apache.xerces.internal.xni.XNIException; 36 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 37 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 38 import com.sun.org.apache.xerces.internal.xni.parser.XMLDTDScanner; 39 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 40 import com.sun.xml.internal.stream.Entity; 41 import com.sun.xml.internal.stream.StaxXMLInputSource; 42 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 43 import java.io.CharConversionException; 44 import java.io.EOFException; 45 import java.io.IOException; 46 import javax.xml.stream.XMLInputFactory; 47 import javax.xml.stream.events.XMLEvent; 48 import jdk.xml.internal.SecuritySupport; 49 50 51 /** 52 * This class is responsible for scanning XML document structure 53 * and content. 54 * 55 * This class has been modified as per the new design which is more suited to 56 * efficiently build pull parser. Lot of improvements have been done and 57 * the code has been added to support stax functionality/features. 58 * 59 * @author Neeraj Bajaj, Sun Microsystems 60 * @author K.Venugopal, Sun Microsystems 61 * @author Glenn Marcy, IBM 62 * @author Andy Clark, IBM 63 * @author Arnaud Le Hors, IBM 64 * @author Eric Ye, IBM 65 * @author Sunitha Reddy, Sun Microsystems 66 * 67 * Refer to the table in unit-test javax.xml.stream.XMLStreamReaderTest.SupportDTD for changes 68 * related to property SupportDTD. 69 * @author Joe Wang, Sun Microsystems 70 * @LastModified: Sep 2017 71 */ 72 public class XMLDocumentScannerImpl 73 extends XMLDocumentFragmentScannerImpl{ 74 75 // 76 // Constants 77 // 78 79 // scanner states 80 81 /** Scanner state: XML declaration. */ 82 protected static final int SCANNER_STATE_XML_DECL = 42; 83 84 /** Scanner state: prolog. */ 85 protected static final int SCANNER_STATE_PROLOG = 43; 86 87 /** Scanner state: trailing misc. */ 88 protected static final int SCANNER_STATE_TRAILING_MISC = 44; 89 90 /** Scanner state: DTD internal declarations. */ 91 protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 45; 92 93 /** Scanner state: open DTD external subset. */ 94 protected static final int SCANNER_STATE_DTD_EXTERNAL = 46; 95 96 /** Scanner state: DTD external declarations. */ 97 protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 47; 98 99 /** Scanner state: NO MORE ELEMENTS. */ 100 protected static final int SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION = 48; 101 102 // feature identifiers 103 104 /** Property identifier document scanner: */ 105 protected static final String DOCUMENT_SCANNER = 106 Constants.XERCES_PROPERTY_PREFIX + Constants.DOCUMENT_SCANNER_PROPERTY; 107 108 /** Feature identifier: load external DTD. */ 109 protected static final String LOAD_EXTERNAL_DTD = 110 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 111 112 /** Feature identifier: load external DTD. */ 113 protected static final String DISALLOW_DOCTYPE_DECL_FEATURE = 114 Constants.XERCES_FEATURE_PREFIX + Constants.DISALLOW_DOCTYPE_DECL_FEATURE; 115 116 // property identifiers 117 118 /** Property identifier: DTD scanner. */ 119 protected static final String DTD_SCANNER = 120 Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY; 121 122 // property identifier: ValidationManager 123 protected static final String VALIDATION_MANAGER = 124 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 125 126 /** property identifier: NamespaceContext */ 127 protected static final String NAMESPACE_CONTEXT = 128 Constants.XERCES_PROPERTY_PREFIX + Constants.NAMESPACE_CONTEXT_PROPERTY; 129 130 // recognized features and properties 131 132 /** Recognized features. */ 133 private static final String[] RECOGNIZED_FEATURES = { 134 LOAD_EXTERNAL_DTD, 135 DISALLOW_DOCTYPE_DECL_FEATURE, 136 }; 137 138 /** Feature defaults. */ 139 private static final Boolean[] FEATURE_DEFAULTS = { 140 Boolean.TRUE, 141 Boolean.FALSE, 142 }; 143 144 /** Recognized properties. */ 145 private static final String[] RECOGNIZED_PROPERTIES = { 146 DTD_SCANNER, 147 VALIDATION_MANAGER 148 }; 149 150 /** Property defaults. */ 151 private static final Object[] PROPERTY_DEFAULTS = { 152 null, 153 null 154 }; 155 156 // 157 // Data((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 158 // 159 160 // properties 161 162 /** DTD scanner. */ 163 protected XMLDTDScanner fDTDScanner = null; 164 165 /** Validation manager . */ 166 //xxx: fValidationManager code needs to be added yet! 167 protected ValidationManager fValidationManager; 168 169 protected XMLStringBuffer fDTDDecl = null; 170 protected boolean fReadingDTD = false; 171 protected boolean fAddedListener = false; 172 173 // protected data 174 175 // other info 176 177 /** Doctype name. */ 178 protected String fDoctypeName; 179 180 /** Doctype declaration public identifier. */ 181 protected String fDoctypePublicId; 182 183 /** Doctype declaration system identifier. */ 184 protected String fDoctypeSystemId; 185 186 /** Namespace support. */ 187 protected NamespaceContext fNamespaceContext = new NamespaceSupport(); 188 189 // features 190 191 /** Load external DTD. */ 192 protected boolean fLoadExternalDTD = true; 193 194 // state 195 196 /** Seen doctype declaration. */ 197 protected boolean fSeenDoctypeDecl; 198 199 protected boolean fScanEndElement; 200 201 //protected int fScannerLastState ; 202 203 // drivers 204 205 /** XML declaration driver. */ 206 protected Driver fXMLDeclDriver = new XMLDeclDriver(); 207 208 /** Prolog driver. */ 209 protected Driver fPrologDriver = new PrologDriver(); 210 211 /** DTD driver. */ 212 protected Driver fDTDDriver = null ; 213 214 /** Trailing miscellaneous section driver. */ 215 protected Driver fTrailingMiscDriver = new TrailingMiscDriver(); 216 protected int fStartPos = 0; 217 protected int fEndPos = 0; 218 protected boolean fSeenInternalSubset= false; 219 // temporary variables 220 221 /** Array of 3 strings. */ 222 private String[] fStrings = new String[3]; 223 224 /** External subset source. */ 225 private XMLInputSource fExternalSubsetSource = null; 226 227 /** A DTD Description. */ 228 private final XMLDTDDescription fDTDDescription = new XMLDTDDescription(null, null, null, null, null); 229 230 private static final char [] DOCTYPE = {'D','O','C','T','Y','P','E'}; 231 private static final char [] COMMENTSTRING = {'-','-'}; 232 233 // 234 // Constructors 235 // 236 237 /** Default constructor. */ XMLDocumentScannerImpl()238 public XMLDocumentScannerImpl() {} // <init>() 239 240 241 // 242 // XMLDocumentScanner methods 243 // 244 245 246 /** 247 * Sets the input source. 248 * 249 * @param inputSource The input source. 250 * 251 * @throws IOException Thrown on i/o error. 252 */ setInputSource(XMLInputSource inputSource)253 public void setInputSource(XMLInputSource inputSource) throws IOException { 254 fEntityManager.setEntityHandler(this); 255 //this starts a new entity and sets the current entity to the document entity. 256 fEntityManager.startDocumentEntity(inputSource); 257 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 258 setScannerState(XMLEvent.START_DOCUMENT); 259 } // setInputSource(XMLInputSource) 260 261 262 263 /**return the state of the scanner */ getScannetState()264 public int getScannetState(){ 265 return fScannerState ; 266 } 267 268 269 270 reset(PropertyManager propertyManager)271 public void reset(PropertyManager propertyManager) { 272 super.reset(propertyManager); 273 // other settings 274 fDoctypeName = null; 275 fDoctypePublicId = null; 276 fDoctypeSystemId = null; 277 fSeenDoctypeDecl = false; 278 fNamespaceContext.reset(); 279 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 280 281 // xerces features 282 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 283 setScannerState(XMLEvent.START_DOCUMENT); 284 setDriver(fXMLDeclDriver); 285 fSeenInternalSubset = false; 286 if(fDTDScanner != null){ 287 ((XMLDTDScannerImpl)fDTDScanner).reset(propertyManager); 288 } 289 fEndPos = 0; 290 fStartPos = 0; 291 if(fDTDDecl != null){ 292 fDTDDecl.clear(); 293 } 294 295 } 296 297 /** 298 * Resets the component. The component can query the component manager 299 * about any features and properties that affect the operation of the 300 * component. 301 * 302 * @param componentManager The component manager. 303 * 304 * @throws SAXException Thrown by component on initialization error. 305 * For example, if a feature or property is 306 * required for the operation of the component, the 307 * component manager may throw a 308 * SAXNotRecognizedException or a 309 * SAXNotSupportedException. 310 */ reset(XMLComponentManager componentManager)311 public void reset(XMLComponentManager componentManager) 312 throws XMLConfigurationException { 313 314 super.reset(componentManager); 315 316 // other settings 317 fDoctypeName = null; 318 fDoctypePublicId = null; 319 fDoctypeSystemId = null; 320 fSeenDoctypeDecl = false; 321 fExternalSubsetSource = null; 322 323 // xerces features 324 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 325 fDisallowDoctype = componentManager.getFeature(DISALLOW_DOCTYPE_DECL_FEATURE, false); 326 327 fNamespaces = componentManager.getFeature(NAMESPACES, true); 328 329 fSeenInternalSubset = false; 330 // xerces properties 331 fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER); 332 333 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 334 335 try { 336 fNamespaceContext = (NamespaceContext)componentManager.getProperty(NAMESPACE_CONTEXT); 337 } 338 catch (XMLConfigurationException e) { } 339 if (fNamespaceContext == null) { 340 fNamespaceContext = new NamespaceSupport(); 341 } 342 fNamespaceContext.reset(); 343 344 fEndPos = 0; 345 fStartPos = 0; 346 if(fDTDDecl != null) 347 fDTDDecl.clear(); 348 349 350 //fEntityScanner.registerListener((XMLBufferListener)componentManager.getProperty(DOCUMENT_SCANNER)); 351 352 // setup driver 353 setScannerState(SCANNER_STATE_XML_DECL); 354 setDriver(fXMLDeclDriver); 355 356 } // reset(XMLComponentManager) 357 358 359 /** 360 * Returns a list of feature identifiers that are recognized by 361 * this component. This method may return null if no features 362 * are recognized by this component. 363 */ getRecognizedFeatures()364 public String[] getRecognizedFeatures() { 365 String[] featureIds = super.getRecognizedFeatures(); 366 int length = featureIds != null ? featureIds.length : 0; 367 String[] combinedFeatureIds = new String[length + RECOGNIZED_FEATURES.length]; 368 if (featureIds != null) { 369 System.arraycopy(featureIds, 0, combinedFeatureIds, 0, featureIds.length); 370 } 371 System.arraycopy(RECOGNIZED_FEATURES, 0, combinedFeatureIds, length, RECOGNIZED_FEATURES.length); 372 return combinedFeatureIds; 373 } // getRecognizedFeatures():String[] 374 375 /** 376 * Sets the state of a feature. This method is called by the component 377 * manager any time after reset when a feature changes state. 378 * <p> 379 * <strong>Note:</strong> Components should silently ignore features 380 * that do not affect the operation of the component. 381 * 382 * @param featureId The feature identifier. 383 * @param state The state of the feature. 384 * 385 * @throws SAXNotRecognizedException The component should not throw 386 * this exception. 387 * @throws SAXNotSupportedException The component should not throw 388 * this exception. 389 */ setFeature(String featureId, boolean state)390 public void setFeature(String featureId, boolean state) 391 throws XMLConfigurationException { 392 393 super.setFeature(featureId, state); 394 395 // Xerces properties 396 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 397 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 398 399 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 400 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 401 fLoadExternalDTD = state; 402 return; 403 } 404 else if (suffixLength == Constants.DISALLOW_DOCTYPE_DECL_FEATURE.length() && 405 featureId.endsWith(Constants.DISALLOW_DOCTYPE_DECL_FEATURE)) { 406 fDisallowDoctype = state; 407 return; 408 } 409 } 410 411 } // setFeature(String,boolean) 412 413 /** 414 * Returns a list of property identifiers that are recognized by 415 * this component. This method may return null if no properties 416 * are recognized by this component. 417 */ getRecognizedProperties()418 public String[] getRecognizedProperties() { 419 String[] propertyIds = super.getRecognizedProperties(); 420 int length = propertyIds != null ? propertyIds.length : 0; 421 String[] combinedPropertyIds = new String[length + RECOGNIZED_PROPERTIES.length]; 422 if (propertyIds != null) { 423 System.arraycopy(propertyIds, 0, combinedPropertyIds, 0, propertyIds.length); 424 } 425 System.arraycopy(RECOGNIZED_PROPERTIES, 0, combinedPropertyIds, length, RECOGNIZED_PROPERTIES.length); 426 return combinedPropertyIds; 427 } // getRecognizedProperties():String[] 428 429 /** 430 * Sets the value of a property. This method is called by the component 431 * manager any time after reset when a property changes value. 432 * <p> 433 * <strong>Note:</strong> Components should silently ignore properties 434 * that do not affect the operation of the component. 435 * 436 * @param propertyId The property identifier. 437 * @param value The value of the property. 438 * 439 * @throws SAXNotRecognizedException The component should not throw 440 * this exception. 441 * @throws SAXNotSupportedException The component should not throw 442 * this exception. 443 */ setProperty(String propertyId, Object value)444 public void setProperty(String propertyId, Object value) 445 throws XMLConfigurationException { 446 447 super.setProperty(propertyId, value); 448 449 // Xerces properties 450 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 451 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 452 453 if (suffixLength == Constants.DTD_SCANNER_PROPERTY.length() && 454 propertyId.endsWith(Constants.DTD_SCANNER_PROPERTY)) { 455 fDTDScanner = (XMLDTDScanner)value; 456 } 457 if (suffixLength == Constants.NAMESPACE_CONTEXT_PROPERTY.length() && 458 propertyId.endsWith(Constants.NAMESPACE_CONTEXT_PROPERTY)) { 459 if (value != null) { 460 fNamespaceContext = (NamespaceContext)value; 461 } 462 } 463 464 return; 465 } 466 467 } // setProperty(String,Object) 468 469 /** 470 * Returns the default state for a feature, or null if this 471 * component does not want to report a default value for this 472 * feature. 473 * 474 * @param featureId The feature identifier. 475 * 476 * @since Xerces 2.2.0 477 */ getFeatureDefault(String featureId)478 public Boolean getFeatureDefault(String featureId) { 479 480 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 481 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 482 return FEATURE_DEFAULTS[i]; 483 } 484 } 485 return super.getFeatureDefault(featureId); 486 } // getFeatureDefault(String):Boolean 487 488 /** 489 * Returns the default state for a property, or null if this 490 * component does not want to report a default value for this 491 * property. 492 * 493 * @param propertyId The property identifier. 494 * 495 * @since Xerces 2.2.0 496 */ getPropertyDefault(String propertyId)497 public Object getPropertyDefault(String propertyId) { 498 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 499 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 500 return PROPERTY_DEFAULTS[i]; 501 } 502 } 503 return super.getPropertyDefault(propertyId); 504 } // getPropertyDefault(String):Object 505 506 // 507 // XMLEntityHandler methods 508 // 509 510 /** 511 * This method notifies of the start of an entity. The DTD has the 512 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 513 * general entities are just specified by their name. 514 * 515 * @param name The name of the entity. 516 * @param identifier The resource identifier. 517 * @param encoding The auto-detected IANA encoding name of the entity 518 * stream. This value will be null in those situations 519 * where the entity encoding is not auto-detected (e.g. 520 * internal entities or a document entity that is 521 * parsed from a java.io.Reader). 522 * 523 * @throws XNIException Thrown by handler to signal an error. 524 */ startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs)525 public void startEntity(String name, 526 XMLResourceIdentifier identifier, 527 String encoding, Augmentations augs) throws XNIException { 528 529 super.startEntity(name, identifier, encoding,augs); 530 531 //register current document scanner as a listener for XMLEntityScanner 532 fEntityScanner.registerListener(this); 533 534 // prepare to look for a TextDecl if external general entity 535 if (!name.equals("[xml]") && fEntityScanner.isExternal()) { 536 // Don't do this if we're skipping the entity! 537 if (augs == null || !((Boolean) augs.getItem(Constants.ENTITY_SKIPPED)).booleanValue()) { 538 setScannerState(SCANNER_STATE_TEXT_DECL); 539 } 540 } 541 542 // call handler 543 /** comment this part.. LOCATOR problem.. */ 544 if (fDocumentHandler != null && name.equals("[xml]")) { 545 fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null); 546 } 547 548 } // startEntity(String,identifier,String) 549 550 551 /** 552 * This method notifies the end of an entity. The DTD has the pseudo-name 553 * of "[dtd]" parameter entity names start with '%'; and general entities 554 * are just specified by their name. 555 * 556 * @param name The name of the entity. 557 * 558 * @throws XNIException Thrown by handler to signal an error. 559 */ endEntity(String name, Augmentations augs)560 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 561 562 super.endEntity(name, augs); 563 564 if(name.equals("[xml]")){ 565 //if fMarkupDepth has reached 0. 566 //and driver is fTrailingMiscDriver (which 567 //handles end of document in normal case) 568 //set the scanner state of SCANNER_STATE_TERMINATED 569 if(fMarkupDepth == 0 && fDriver == fTrailingMiscDriver){ 570 //set the scanner set to SCANNER_STATE_TERMINATED 571 setScannerState(SCANNER_STATE_TERMINATED) ; 572 } else{ 573 //else we have reached the end of document prematurely 574 //so throw EOFException. 575 throw new java.io.EOFException(); 576 } 577 578 //this is taken care in wrapper which generates XNI callbacks, There are no next events 579 580 //if (fDocumentHandler != null) { 581 //fDocumentHandler.endDocument(null); 582 //} 583 } 584 } // endEntity(String) 585 586 getDTDDecl()587 public XMLStringBuffer getDTDDecl(){ 588 Entity entity = fEntityScanner.getCurrentEntity(); 589 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 590 if(fSeenInternalSubset) 591 fDTDDecl.append("]>"); 592 return fDTDDecl; 593 } 594 getCharacterEncodingScheme()595 public String getCharacterEncodingScheme(){ 596 return fDeclaredEncoding; 597 } 598 599 /** return the next state on the input 600 * 601 * @return int 602 */ 603 next()604 public int next() throws IOException, XNIException { 605 return fDriver.next(); 606 } 607 608 //getNamespaceContext getNamespaceContext()609 public NamespaceContext getNamespaceContext(){ 610 return fNamespaceContext ; 611 } 612 613 614 615 // 616 // Protected methods 617 // 618 619 // driver factory methods 620 621 /** Creates a content driver. */ createContentDriver()622 protected Driver createContentDriver() { 623 return new ContentDriver(); 624 } // createContentDriver():Driver 625 626 // scanning methods 627 628 /** Scans a doctype declaration. */ scanDoctypeDecl(boolean supportDTD)629 protected boolean scanDoctypeDecl(boolean supportDTD) throws IOException, XNIException { 630 631 // spaces 632 if (!fEntityScanner.skipSpaces()) { 633 reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL", 634 null); 635 } 636 637 // root element name 638 fDoctypeName = fEntityScanner.scanName(NameType.DOCTYPE); 639 if (fDoctypeName == null) { 640 reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null); 641 } 642 643 // external id 644 if (fEntityScanner.skipSpaces()) { 645 scanExternalID(fStrings, false); 646 fDoctypeSystemId = fStrings[0]; 647 fDoctypePublicId = fStrings[1]; 648 fEntityScanner.skipSpaces(); 649 } 650 651 fHasExternalDTD = fDoctypeSystemId != null; 652 653 // Attempt to locate an external subset with an external subset resolver. 654 if (supportDTD && !fHasExternalDTD && fExternalSubsetResolver != null) { 655 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 656 fDTDDescription.setRootName(fDoctypeName); 657 fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 658 fHasExternalDTD = fExternalSubsetSource != null; 659 } 660 661 // call handler 662 if (supportDTD && fDocumentHandler != null) { 663 // NOTE: I don't like calling the doctypeDecl callback until 664 // end of the *full* doctype line (including internal 665 // subset) is parsed correctly but SAX2 requires that 666 // it knows the root element name and public and system 667 // identifier for the startDTD call. -Ac 668 if (fExternalSubsetSource == null) { 669 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 670 } 671 else { 672 fDocumentHandler.doctypeDecl(fDoctypeName, fExternalSubsetSource.getPublicId(), fExternalSubsetSource.getSystemId(), null); 673 } 674 } 675 676 // is there an internal subset? 677 boolean internalSubset = true; 678 if (!fEntityScanner.skipChar('[', null)) { 679 internalSubset = false; 680 fEntityScanner.skipSpaces(); 681 if (!fEntityScanner.skipChar('>', null)) { 682 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 683 } 684 fMarkupDepth--; 685 } 686 return internalSubset; 687 688 } // scanDoctypeDecl():boolean 689 690 // 691 // Private methods 692 // 693 /** Set the scanner state after scanning DTD */ setEndDTDScanState()694 protected void setEndDTDScanState() { 695 setScannerState(SCANNER_STATE_PROLOG); 696 setDriver(fPrologDriver); 697 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 698 fReadingDTD=false; 699 } 700 701 /** Returns the scanner state name. */ getScannerStateName(int state)702 protected String getScannerStateName(int state) { 703 704 switch (state) { 705 case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL"; 706 case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG"; 707 case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC"; 708 case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS"; 709 case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL"; 710 case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS"; 711 } 712 return super.getScannerStateName(state); 713 714 } // getScannerStateName(int):String 715 716 // 717 // Classes 718 // 719 720 /** 721 * Driver to handle XMLDecl scanning. 722 * 723 * This class has been modified as per the new design which is more suited to 724 * efficiently build pull parser. Lots of performance improvements have been done and 725 * the code has been added to support stax functionality/features. 726 * 727 * @author Neeraj Bajaj, Sun Microsystems. 728 * 729 * @author Andy Clark, IBM 730 */ 731 protected final class XMLDeclDriver 732 implements Driver { 733 734 // 735 // Driver methods 736 // 737 738 next()739 public int next() throws IOException, XNIException { 740 741 // next driver is prolog regardless of whether there 742 // is an XMLDecl in this document 743 setScannerState(SCANNER_STATE_PROLOG); 744 setDriver(fPrologDriver); 745 746 //System.out.println("fEntityScanner = " + fEntityScanner); 747 // scan XMLDecl 748 try { 749 if (fEntityScanner.skipString(XMLDECL)) { 750 if (XMLChar.isSpace(fEntityScanner.peekChar())) { 751 fMarkupDepth++; 752 scanXMLDeclOrTextDecl(false); 753 } else { 754 // PI, reset position 755 fEntityManager.fCurrentEntity.position = 0; 756 } 757 } 758 759 //START_OF_THE_DOCUMENT 760 fEntityManager.fCurrentEntity.mayReadChunks = true; 761 return XMLEvent.START_DOCUMENT; 762 763 } 764 // encoding errors 765 catch (MalformedByteSequenceException e) { 766 fErrorReporter.reportError(e.getDomain(), e.getKey(), 767 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 768 return -1; 769 } catch (CharConversionException e) { 770 fErrorReporter.reportError( 771 XMLMessageFormatter.XML_DOMAIN, 772 "CharConversionFailure", 773 null, 774 XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 775 return -1; 776 } 777 // premature end of file 778 catch (EOFException e) { 779 reportFatalError("PrematureEOF", null); 780 return -1; 781 //throw e; 782 } 783 784 } 785 } // class XMLDeclDriver 786 787 /** 788 * Driver to handle prolog scanning. 789 * 790 * @author Andy Clark, IBM 791 */ 792 protected final class PrologDriver 793 implements Driver { 794 795 /** 796 * Drives the parser to the next state/event on the input. Parser is guaranteed 797 * to stop at the next state/event. 798 * 799 * Internally XML document is divided into several states. Each state represents 800 * a sections of XML document. When this functions returns normally, it has read 801 * the section of XML document and returns the state corresponding to section of 802 * document which has been read. For optimizations, a particular driver 803 * can read ahead of the section of document (state returned) just read and 804 * can maintain a different internal state. 805 * 806 * @return state representing the section of document just read. 807 * 808 * @throws IOException Thrown on i/o error. 809 * @throws XNIException Thrown on parse error. 810 */ 811 next()812 public int next() throws IOException, XNIException { 813 814 try { 815 do { 816 switch (fScannerState) { 817 case SCANNER_STATE_PROLOG: { 818 fEntityScanner.skipSpaces(); 819 if (fEntityScanner.skipChar('<', null)) { 820 setScannerState(SCANNER_STATE_START_OF_MARKUP); 821 } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { 822 setScannerState(SCANNER_STATE_REFERENCE); 823 } else { 824 setScannerState(SCANNER_STATE_CONTENT); 825 } 826 break; 827 } 828 829 case SCANNER_STATE_START_OF_MARKUP: { 830 fMarkupDepth++; 831 if (isValidNameStartChar(fEntityScanner.peekChar()) || 832 isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { 833 setScannerState(SCANNER_STATE_ROOT_ELEMENT); 834 setDriver(fContentDriver); 835 //from now onwards this would be handled by fContentDriver,in the same next() call 836 return fContentDriver.next(); 837 } else if (fEntityScanner.skipChar('!', null)) { 838 if (fEntityScanner.skipChar('-', null)) { 839 if (!fEntityScanner.skipChar('-', null)) { 840 reportFatalError("InvalidCommentStart", 841 null); 842 } 843 setScannerState(SCANNER_STATE_COMMENT); 844 } else if (fEntityScanner.skipString(DOCTYPE)) { 845 setScannerState(SCANNER_STATE_DOCTYPE); 846 Entity entity = fEntityScanner.getCurrentEntity(); 847 if(entity instanceof Entity.ScannedEntity){ 848 fStartPos=((Entity.ScannedEntity)entity).position; 849 } 850 fReadingDTD=true; 851 if(fDTDDecl == null) 852 fDTDDecl = new XMLStringBuffer(); 853 fDTDDecl.append("<!DOCTYPE"); 854 855 } else { 856 reportFatalError("MarkupNotRecognizedInProlog", 857 null); 858 } 859 } else if (fEntityScanner.skipChar('?', null)) { 860 setScannerState(SCANNER_STATE_PI); 861 } else { 862 reportFatalError("MarkupNotRecognizedInProlog", 863 null); 864 } 865 break; 866 } 867 } 868 } while (fScannerState == SCANNER_STATE_PROLOG || fScannerState == SCANNER_STATE_START_OF_MARKUP ); 869 870 switch(fScannerState){ 871 /** 872 //this part is handled by FragmentContentHandler 873 case SCANNER_STATE_ROOT_ELEMENT: { 874 //we have read '<' and beginning of reading the start element tag 875 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 876 setDriver(fContentDriver); 877 //from now onwards this would be handled by fContentDriver,in the same next() call 878 return fContentDriver.next(); 879 } 880 */ 881 case SCANNER_STATE_COMMENT: { 882 //this function fills the data.. 883 scanComment(); 884 setScannerState(SCANNER_STATE_PROLOG); 885 return XMLEvent.COMMENT; 886 //setScannerState(SCANNER_STATE_PROLOG); 887 //break; 888 } 889 case SCANNER_STATE_PI: { 890 fContentBuffer.clear() ; 891 scanPI(fContentBuffer); 892 setScannerState(SCANNER_STATE_PROLOG); 893 return XMLEvent.PROCESSING_INSTRUCTION; 894 } 895 896 case SCANNER_STATE_DOCTYPE: { 897 if (fDisallowDoctype) { 898 reportFatalError("DoctypeNotAllowed", null); 899 } 900 901 if (fSeenDoctypeDecl) { 902 reportFatalError("AlreadySeenDoctype", null); 903 } 904 fSeenDoctypeDecl = true; 905 906 // scanDoctypeDecl() sends XNI doctypeDecl event that 907 // in SAX is converted to startDTD() event. 908 if (scanDoctypeDecl(fSupportDTD)) { 909 //allow parsing of entity decls to continue in order to stay well-formed 910 setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS); 911 fSeenInternalSubset = true; 912 if(fDTDDriver == null){ 913 fDTDDriver = new DTDDriver(); 914 } 915 setDriver(fContentDriver); 916 //always return DTD event, the event however, will not contain any entities 917 return fDTDDriver.next(); 918 } 919 920 if(fSeenDoctypeDecl){ 921 Entity entity = fEntityScanner.getCurrentEntity(); 922 if(entity instanceof Entity.ScannedEntity){ 923 fEndPos = ((Entity.ScannedEntity)entity).position; 924 } 925 fReadingDTD = false; 926 } 927 928 // handle external subset 929 if (fDoctypeSystemId != null) { 930 if (((fValidation || fLoadExternalDTD) 931 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 932 if (fSupportDTD) { 933 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 934 } else { 935 setScannerState(SCANNER_STATE_PROLOG); 936 } 937 938 setDriver(fContentDriver); 939 if(fDTDDriver == null) { 940 fDTDDriver = new DTDDriver(); 941 } 942 943 return fDTDDriver.next(); 944 } 945 } 946 else if (fExternalSubsetSource != null) { 947 if (((fValidation || fLoadExternalDTD) 948 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) { 949 // This handles the case of a DOCTYPE that had neither an internal subset or an external subset. 950 fDTDScanner.setInputSource(fExternalSubsetSource); 951 fExternalSubsetSource = null; 952 if (fSupportDTD) 953 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 954 else 955 setScannerState(SCANNER_STATE_PROLOG); 956 setDriver(fContentDriver); 957 if(fDTDDriver == null) 958 fDTDDriver = new DTDDriver(); 959 return fDTDDriver.next(); 960 } 961 } 962 963 // Send endDTD() call if: 964 // a) systemId is null or if an external subset resolver could not locate an external subset. 965 // b) "load-external-dtd" and validation are false 966 // c) DTD grammar is cached 967 968 // in XNI this results in 3 events: doctypeDecl, startDTD, endDTD 969 // in SAX this results in 2 events: startDTD, endDTD 970 if (fDTDScanner != null) { 971 fDTDScanner.setInputSource(null); 972 } 973 setScannerState(SCANNER_STATE_PROLOG); 974 return XMLEvent.DTD; 975 } 976 977 case SCANNER_STATE_CONTENT: { 978 reportFatalError("ContentIllegalInProlog", null); 979 fEntityScanner.scanChar(null); 980 return -1; 981 } 982 case SCANNER_STATE_REFERENCE: { 983 reportFatalError("ReferenceIllegalInProlog", null); 984 return -1; 985 } 986 987 /** 988 * if (complete) { 989 * if (fEntityScanner.scanChar() != '<') { 990 * reportFatalError("RootElementRequired", null); 991 * } 992 * setScannerState(SCANNER_STATE_ROOT_ELEMENT); 993 * setDriver(fContentDriver); 994 * } 995 */ 996 } 997 } 998 // encoding errors 999 catch (MalformedByteSequenceException e) { 1000 fErrorReporter.reportError(e.getDomain(), e.getKey(), 1001 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 1002 return -1; 1003 } catch (CharConversionException e) { 1004 fErrorReporter.reportError( 1005 XMLMessageFormatter.XML_DOMAIN, 1006 "CharConversionFailure", 1007 null, 1008 XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 1009 return -1; 1010 } 1011 // premature end of file 1012 catch (EOFException e) { 1013 reportFatalError("PrematureEOF", null); 1014 //xxx what should be returned here.... ??? 1015 return -1 ; 1016 //throw e; 1017 } 1018 //xxx what should be returned here.... ??? 1019 return -1; 1020 1021 } 1022 1023 1024 } // class PrologDriver 1025 1026 /** 1027 * Driver to handle the internal and external DTD subsets. 1028 * 1029 * @author Andy Clark, IBM 1030 */ 1031 protected final class DTDDriver 1032 implements Driver { 1033 1034 // 1035 // Driver methods 1036 // 1037 next()1038 public int next() throws IOException, XNIException{ 1039 1040 dispatch(true); 1041 1042 //xxx: remove this hack and align this with reusing DTD components 1043 //currently this routine will only be executed from Stax 1044 if(fPropertyManager != null){ 1045 dtdGrammarUtil = new DTDGrammarUtil(((XMLDTDScannerImpl)fDTDScanner).getGrammar(),fSymbolTable, fNamespaceContext); 1046 } 1047 1048 return XMLEvent.DTD ; 1049 } 1050 1051 /** 1052 * Dispatch an XML "event". 1053 * 1054 * @param complete True if this driver is intended to scan 1055 * and dispatch as much as possible. 1056 * 1057 * @return True if there is more to dispatch either from this 1058 * or a another driver. 1059 * 1060 * @throws IOException Thrown on i/o error. 1061 * @throws XNIException Thrown on parse error. 1062 */ dispatch(boolean complete)1063 public boolean dispatch(boolean complete) 1064 throws IOException, XNIException { 1065 fEntityManager.setEntityHandler(null); 1066 try { 1067 boolean again; 1068 XMLResourceIdentifierImpl resourceIdentifier = new XMLResourceIdentifierImpl(); 1069 if( fDTDScanner == null){ 1070 1071 if (fEntityManager.getEntityScanner() instanceof XML11EntityScanner){ 1072 fDTDScanner = new XML11DTDScannerImpl(); 1073 } else 1074 1075 fDTDScanner = new XMLDTDScannerImpl(); 1076 1077 ((XMLDTDScannerImpl)fDTDScanner).reset(fPropertyManager); 1078 } 1079 1080 fDTDScanner.setLimitAnalyzer(fLimitAnalyzer); 1081 do { 1082 again = false; 1083 switch (fScannerState) { 1084 case SCANNER_STATE_DTD_INTERNAL_DECLS: { 1085 boolean moreToScan = false; 1086 if (!fDTDScanner.skipDTD(fSupportDTD)) { 1087 // REVISIT: Should there be a feature for 1088 // the "complete" parameter? 1089 boolean completeDTD = true; 1090 1091 moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD && fLoadExternalDTD); 1092 } 1093 Entity entity = fEntityScanner.getCurrentEntity(); 1094 if(entity instanceof Entity.ScannedEntity){ 1095 fEndPos=((Entity.ScannedEntity)entity).position; 1096 } 1097 fReadingDTD=false; 1098 if (!moreToScan) { 1099 // end doctype declaration 1100 if (!fEntityScanner.skipChar(']', null)) { 1101 reportFatalError("DoctypedeclNotClosed", new Object[]{fDoctypeName}); 1102 } 1103 fEntityScanner.skipSpaces(); 1104 if (!fEntityScanner.skipChar('>', null)) { 1105 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName}); 1106 } 1107 fMarkupDepth--; 1108 1109 if (!fSupportDTD) { 1110 //simply reset the entity store without having to mess around 1111 //with the DTD Scanner code 1112 fEntityStore = fEntityManager.getEntityStore(); 1113 fEntityStore.reset(); 1114 } else { 1115 // scan external subset next unless we are ignoring DTDs 1116 if (fDoctypeSystemId != null && (fValidation || fLoadExternalDTD)) { 1117 setScannerState(SCANNER_STATE_DTD_EXTERNAL); 1118 break; 1119 } 1120 } 1121 1122 setEndDTDScanState(); 1123 return true; 1124 1125 } 1126 break; 1127 } 1128 case SCANNER_STATE_DTD_EXTERNAL: { 1129 /** 1130 fDTDDescription.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1131 fDTDDescription.setRootName(fDoctypeName); 1132 XMLInputSource xmlInputSource = 1133 fEntityManager.resolveEntity(fDTDDescription); 1134 fDTDScanner.setInputSource(xmlInputSource); 1135 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1136 again = true; 1137 break; 1138 */ 1139 1140 resourceIdentifier.setValues(fDoctypePublicId, fDoctypeSystemId, null, null); 1141 XMLInputSource xmlInputSource = null ; 1142 StaxXMLInputSource staxInputSource = fEntityManager.resolveEntityAsPerStax(resourceIdentifier); 1143 1144 // Check access permission. If the source is resolved by a resolver, the check is skipped. 1145 if (!staxInputSource.isCreatedByResolver()) { 1146 String accessError = checkAccess(fDoctypeSystemId, fAccessExternalDTD); 1147 if (accessError != null) { 1148 reportFatalError("AccessExternalDTD", new Object[]{ SecuritySupport.sanitizePath(fDoctypeSystemId), accessError }); 1149 } 1150 } 1151 xmlInputSource = staxInputSource.getXMLInputSource(); 1152 fDTDScanner.setInputSource(xmlInputSource); 1153 if (fEntityScanner.fCurrentEntity != null) { 1154 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS); 1155 } else { 1156 setScannerState(SCANNER_STATE_PROLOG); 1157 } 1158 again = true; 1159 break; 1160 } 1161 case SCANNER_STATE_DTD_EXTERNAL_DECLS: { 1162 // REVISIT: Should there be a feature for 1163 // the "complete" parameter? 1164 boolean completeDTD = true; 1165 boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD); 1166 if (!moreToScan) { 1167 setEndDTDScanState(); 1168 return true; 1169 } 1170 break; 1171 } 1172 case SCANNER_STATE_PROLOG : { 1173 // skip entity decls 1174 setEndDTDScanState(); 1175 return true; 1176 } 1177 default: { 1178 throw new XNIException("DTDDriver#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')'); 1179 } 1180 } 1181 } while (complete || again); 1182 } 1183 // encoding errors 1184 catch (MalformedByteSequenceException e) { 1185 fErrorReporter.reportError(e.getDomain(), e.getKey(), 1186 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 1187 return false; 1188 } catch (CharConversionException e) { 1189 fErrorReporter.reportError( 1190 XMLMessageFormatter.XML_DOMAIN, 1191 "CharConversionFailure", 1192 null, 1193 XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 1194 return false; 1195 } 1196 // premature end of file 1197 catch (EOFException e) { 1198 e.printStackTrace(); 1199 reportFatalError("PrematureEOF", null); 1200 return false; 1201 //throw e; 1202 } 1203 1204 // cleanup 1205 finally { 1206 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1207 } 1208 1209 return true; 1210 1211 } 1212 1213 // dispatch(boolean):boolean 1214 1215 } // class DTDDriver 1216 1217 /** 1218 * Driver to handle content scanning. 1219 * 1220 * @author Andy Clark, IBM 1221 * @author Eric Ye, IBM 1222 */ 1223 protected class ContentDriver 1224 extends FragmentContentDriver { 1225 1226 // 1227 // Protected methods 1228 // 1229 1230 // hooks 1231 1232 // NOTE: These hook methods are added so that the full document 1233 // scanner can share the majority of code with this class. 1234 1235 /** 1236 * Scan for DOCTYPE hook. This method is a hook for subclasses 1237 * to add code to handle scanning for a the "DOCTYPE" string 1238 * after the string "<!" has been scanned. 1239 * 1240 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 1241 * was not scanned. 1242 */ scanForDoctypeHook()1243 protected boolean scanForDoctypeHook() 1244 throws IOException, XNIException { 1245 1246 if (fEntityScanner.skipString(DOCTYPE)) { 1247 setScannerState(SCANNER_STATE_DOCTYPE); 1248 // fEntityScanner.markStartOfDTD(); 1249 return true; 1250 } 1251 return false; 1252 1253 } // scanForDoctypeHook():boolean 1254 1255 /** 1256 * Element depth iz zero. This methos is a hook for subclasses 1257 * to add code to handle when the element depth hits zero. When 1258 * scanning a document fragment, an element depth of zero is 1259 * normal. However, when scanning a full XML document, the 1260 * scanner must handle the trailing miscellanous section of 1261 * the document after the end of the document's root element. 1262 * 1263 * @return True if the caller should stop and return true which 1264 * allows the scanner to switch to a new scanning 1265 * driver. A return value of false indicates that 1266 * the content driver should continue as normal. 1267 */ elementDepthIsZeroHook()1268 protected boolean elementDepthIsZeroHook() 1269 throws IOException, XNIException { 1270 1271 setScannerState(SCANNER_STATE_TRAILING_MISC); 1272 setDriver(fTrailingMiscDriver); 1273 return true; 1274 1275 } // elementDepthIsZeroHook():boolean 1276 1277 /** 1278 * Scan for root element hook. This method is a hook for 1279 * subclasses to add code that handles scanning for the root 1280 * element. When scanning a document fragment, there is no 1281 * "root" element. However, when scanning a full XML document, 1282 * the scanner must handle the root element specially. 1283 * 1284 * @return True if the caller should stop and return true which 1285 * allows the scanner to switch to a new scanning 1286 * driver. A return value of false indicates that 1287 * the content driver should continue as normal. 1288 */ scanRootElementHook()1289 protected boolean scanRootElementHook() 1290 throws IOException, XNIException { 1291 1292 if (scanStartElement()) { 1293 setScannerState(SCANNER_STATE_TRAILING_MISC); 1294 setDriver(fTrailingMiscDriver); 1295 return true; 1296 } 1297 return false; 1298 1299 } // scanRootElementHook():boolean 1300 1301 /** 1302 * End of file hook. This method is a hook for subclasses to 1303 * add code that handles the end of file. The end of file in 1304 * a document fragment is OK if the markup depth is zero. 1305 * However, when scanning a full XML document, an end of file 1306 * is always premature. 1307 */ endOfFileHook(EOFException e)1308 protected void endOfFileHook(EOFException e) 1309 throws IOException, XNIException { 1310 1311 reportFatalError("PrematureEOF", null); 1312 // in case continue-after-fatal-error set, should not do this... 1313 //throw e; 1314 1315 } // endOfFileHook() 1316 resolveExternalSubsetAndRead()1317 protected void resolveExternalSubsetAndRead() 1318 throws IOException, XNIException { 1319 1320 fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null); 1321 fDTDDescription.setRootName(fElementQName.rawname); 1322 XMLInputSource src = fExternalSubsetResolver.getExternalSubset(fDTDDescription); 1323 1324 if (src != null) { 1325 fDoctypeName = fElementQName.rawname; 1326 fDoctypePublicId = src.getPublicId(); 1327 fDoctypeSystemId = src.getSystemId(); 1328 // call document handler 1329 if (fDocumentHandler != null) { 1330 // This inserts a doctypeDecl event into the stream though no 1331 // DOCTYPE existed in the instance document. 1332 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null); 1333 } 1334 try { 1335 fDTDScanner.setInputSource(src); 1336 while (fDTDScanner.scanDTDExternalSubset(true)); 1337 } finally { 1338 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this); 1339 } 1340 } 1341 } // resolveExternalSubsetAndRead() 1342 1343 1344 1345 } // class ContentDriver 1346 1347 /** 1348 * Driver to handle trailing miscellaneous section scanning. 1349 * 1350 * @author Andy Clark, IBM 1351 * @author Eric Ye, IBM 1352 */ 1353 protected final class TrailingMiscDriver 1354 implements Driver { 1355 1356 // 1357 // Driver methods 1358 // next()1359 public int next() throws IOException, XNIException{ 1360 //this could for cases like <foo/> 1361 //look at scanRootElementHook 1362 if(fEmptyElement){ 1363 fEmptyElement = false; 1364 return XMLEvent.END_ELEMENT; 1365 } 1366 1367 try { 1368 if(fScannerState == SCANNER_STATE_TERMINATED){ 1369 return XMLEvent.END_DOCUMENT ;} 1370 do { 1371 switch (fScannerState) { 1372 case SCANNER_STATE_TRAILING_MISC: { 1373 1374 fEntityScanner.skipSpaces(); 1375 //we should have reached the end of the document in 1376 //most cases. 1377 if(fScannerState == SCANNER_STATE_TERMINATED ){ 1378 return XMLEvent.END_DOCUMENT ; 1379 } 1380 if (fEntityScanner.skipChar('<', null)) { 1381 setScannerState(SCANNER_STATE_START_OF_MARKUP); 1382 } else { 1383 setScannerState(SCANNER_STATE_CONTENT); 1384 } 1385 break; 1386 } 1387 case SCANNER_STATE_START_OF_MARKUP: { 1388 fMarkupDepth++; 1389 if (fEntityScanner.skipChar('?', null)) { 1390 setScannerState(SCANNER_STATE_PI); 1391 } else if (fEntityScanner.skipChar('!', null)) { 1392 setScannerState(SCANNER_STATE_COMMENT); 1393 } else if (fEntityScanner.skipChar('/', null)) { 1394 reportFatalError("MarkupNotRecognizedInMisc", 1395 null); 1396 } else if (isValidNameStartChar(fEntityScanner.peekChar()) || 1397 isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { 1398 reportFatalError("MarkupNotRecognizedInMisc", 1399 null); 1400 scanStartElement(); 1401 setScannerState(SCANNER_STATE_CONTENT); 1402 } else { 1403 reportFatalError("MarkupNotRecognizedInMisc", 1404 null); 1405 } 1406 break; 1407 } 1408 } 1409 } while(fScannerState == SCANNER_STATE_START_OF_MARKUP || 1410 fScannerState == SCANNER_STATE_TRAILING_MISC); 1411 1412 switch (fScannerState){ 1413 case SCANNER_STATE_PI: { 1414 fContentBuffer.clear(); 1415 scanPI(fContentBuffer); 1416 setScannerState(SCANNER_STATE_TRAILING_MISC); 1417 return XMLEvent.PROCESSING_INSTRUCTION ; 1418 } 1419 case SCANNER_STATE_COMMENT: { 1420 if (!fEntityScanner.skipString(COMMENTSTRING)) { 1421 reportFatalError("InvalidCommentStart", null); 1422 } 1423 scanComment(); 1424 setScannerState(SCANNER_STATE_TRAILING_MISC); 1425 return XMLEvent.COMMENT; 1426 } 1427 case SCANNER_STATE_CONTENT: { 1428 int ch = fEntityScanner.peekChar(); 1429 if (ch == -1) { 1430 setScannerState(SCANNER_STATE_TERMINATED); 1431 return XMLEvent.END_DOCUMENT ; 1432 } else{ 1433 reportFatalError("ContentIllegalInTrailingMisc", 1434 null); 1435 fEntityScanner.scanChar(null); 1436 setScannerState(SCANNER_STATE_TRAILING_MISC); 1437 return XMLEvent.CHARACTERS; 1438 } 1439 1440 } 1441 case SCANNER_STATE_REFERENCE: { 1442 reportFatalError("ReferenceIllegalInTrailingMisc", 1443 null); 1444 setScannerState(SCANNER_STATE_TRAILING_MISC); 1445 return XMLEvent.ENTITY_REFERENCE ; 1446 } 1447 case SCANNER_STATE_TERMINATED: { 1448 //there can't be any element after SCANNER_STATE_TERMINATED or when the parser 1449 //has reached the end of document 1450 setScannerState(SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION); 1451 //xxx what to do when the scanner has reached the terminating state. 1452 return XMLEvent.END_DOCUMENT ; 1453 } 1454 case SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION:{ 1455 throw new java.util.NoSuchElementException("No more events to be parsed"); 1456 } 1457 default: throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 1458 }//switch 1459 // encoding errors 1460 } catch (MalformedByteSequenceException e) { 1461 fErrorReporter.reportError(e.getDomain(), e.getKey(), 1462 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 1463 return -1; 1464 } catch (CharConversionException e) { 1465 fErrorReporter.reportError( 1466 XMLMessageFormatter.XML_DOMAIN, 1467 "CharConversionFailure", 1468 null, 1469 XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 1470 return -1; 1471 } catch (EOFException e) { 1472 // NOTE: This is the only place we're allowed to reach 1473 // the real end of the document stream. Unless the 1474 // end of file was reached prematurely. 1475 if (fMarkupDepth != 0) { 1476 reportFatalError("PrematureEOF", null); 1477 return -1; 1478 //throw e; 1479 } 1480 //System.out.println("EOFException thrown") ; 1481 setScannerState(SCANNER_STATE_TERMINATED); 1482 } 1483 1484 return XMLEvent.END_DOCUMENT; 1485 1486 }//next 1487 1488 } // class TrailingMiscDriver 1489 1490 /** 1491 * Implements XMLBufferListener interface. 1492 */ 1493 1494 1495 /** 1496 * receives callbacks from {@link XMLEntityReader } when buffer 1497 * is being changed. 1498 * @param refreshPosition 1499 */ refresh(int refreshPosition)1500 public void refresh(int refreshPosition){ 1501 super.refresh(refreshPosition); 1502 if(fReadingDTD){ 1503 Entity entity = fEntityScanner.getCurrentEntity(); 1504 if(entity instanceof Entity.ScannedEntity){ 1505 fEndPos=((Entity.ScannedEntity)entity).position; 1506 } 1507 fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos); 1508 fStartPos = refreshPosition; 1509 } 1510 } 1511 1512 } // class XMLDocumentScannerImpl 1513