1 /* 2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 25 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 26 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 27 import com.sun.org.apache.xerces.internal.util.XMLChar; 28 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 29 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 30 import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 31 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 32 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 33 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 34 import com.sun.org.apache.xerces.internal.xni.Augmentations; 35 import com.sun.org.apache.xerces.internal.xni.QName; 36 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 37 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 39 import com.sun.org.apache.xerces.internal.xni.XMLString; 40 import com.sun.org.apache.xerces.internal.xni.XNIException; 41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 44 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 46 import com.sun.xml.internal.stream.XMLBufferListener; 47 import com.sun.xml.internal.stream.XMLEntityStorage; 48 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 49 import java.io.EOFException; 50 import java.io.IOException; 51 import javax.xml.stream.XMLInputFactory; 52 import javax.xml.stream.XMLStreamConstants; 53 import javax.xml.stream.events.XMLEvent; 54 55 56 /** 57 * 58 * This class is responsible for scanning the structure and content 59 * of document fragments. 60 * 61 * This class has been modified as per the new design which is more suited to 62 * efficiently build pull parser. Lot of improvements have been done and 63 * the code has been added to support stax functionality/features. 64 * 65 * @author Neeraj Bajaj SUN Microsystems 66 * @author K.Venugopal SUN Microsystems 67 * @author Glenn Marcy, IBM 68 * @author Andy Clark, IBM 69 * @author Arnaud Le Hors, IBM 70 * @author Eric Ye, IBM 71 * @author Sunitha Reddy, SUN Microsystems 72 * @version $Id: XMLDocumentFragmentScannerImpl.java,v 1.19 2010-11-02 19:54:55 joehw Exp $ 73 * 74 */ 75 public class XMLDocumentFragmentScannerImpl 76 extends XMLScanner 77 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 78 79 // 80 // Constants 81 // 82 83 protected int fElementAttributeLimit, fXMLNameLimit; 84 85 /** External subset resolver. **/ 86 protected ExternalSubsetResolver fExternalSubsetResolver; 87 88 // scanner states 89 90 //XXX this should be divided into more states. 91 /** Scanner state: start of markup. */ 92 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 93 94 /** Scanner state: content. */ 95 protected static final int SCANNER_STATE_CONTENT = 22; 96 97 /** Scanner state: processing instruction. */ 98 protected static final int SCANNER_STATE_PI = 23; 99 100 /** Scanner state: DOCTYPE. */ 101 protected static final int SCANNER_STATE_DOCTYPE = 24; 102 103 /** Scanner state: XML Declaration */ 104 protected static final int SCANNER_STATE_XML_DECL = 25; 105 106 /** Scanner state: root element. */ 107 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 108 109 /** Scanner state: comment. */ 110 protected static final int SCANNER_STATE_COMMENT = 27; 111 112 /** Scanner state: reference. */ 113 protected static final int SCANNER_STATE_REFERENCE = 28; 114 115 // <book type="hard"> reading attribute name 'type' 116 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 117 118 // <book type="hard"> //reading attribute value. 119 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 120 121 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 122 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 123 124 /** Scanner state: end of input. */ 125 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 126 127 /** Scanner state: terminated. */ 128 protected static final int SCANNER_STATE_TERMINATED = 34; 129 130 /** Scanner state: CDATA section. */ 131 protected static final int SCANNER_STATE_CDATA = 35; 132 133 /** Scanner state: Text declaration. */ 134 protected static final int SCANNER_STATE_TEXT_DECL = 36; 135 136 /** Scanner state: Text declaration. */ 137 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 138 139 //<book type="hard">foo</book> 140 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 141 142 //<book type="hard">foo</book> reading </book> 143 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 144 145 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 146 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 147 148 // feature identifiers 149 150 151 /** Feature identifier: notify built-in refereces. */ 152 protected static final String NOTIFY_BUILTIN_REFS = 153 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 154 155 /** Property identifier: entity resolver. */ 156 protected static final String ENTITY_RESOLVER = 157 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 158 159 /** Feature identifier: standard uri conformant */ 160 protected static final String STANDARD_URI_CONFORMANT = 161 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 162 163 /** Property identifier: Security property manager. */ 164 private static final String XML_SECURITY_PROPERTY_MANAGER = 165 Constants.XML_SECURITY_PROPERTY_MANAGER; 166 167 /** access external dtd: file protocol 168 * For DOM/SAX, the secure feature is set to true by default 169 */ 170 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 171 172 // recognized features and properties 173 174 /** Recognized features. */ 175 private static final String[] RECOGNIZED_FEATURES = { 176 NAMESPACES, 177 VALIDATION, 178 NOTIFY_BUILTIN_REFS, 179 NOTIFY_CHAR_REFS, 180 Constants.STAX_REPORT_CDATA_EVENT 181 }; 182 183 /** Feature defaults. */ 184 private static final Boolean[] FEATURE_DEFAULTS = { 185 Boolean.TRUE, 186 null, 187 Boolean.FALSE, 188 Boolean.FALSE, 189 Boolean.TRUE 190 }; 191 192 /** Recognized properties. */ 193 private static final String[] RECOGNIZED_PROPERTIES = { 194 SYMBOL_TABLE, 195 ERROR_REPORTER, 196 ENTITY_MANAGER, 197 XML_SECURITY_PROPERTY_MANAGER 198 }; 199 200 /** Property defaults. */ 201 private static final Object[] PROPERTY_DEFAULTS = { 202 null, 203 null, 204 null, 205 null 206 }; 207 208 private static final char [] cdata = {'[','C','D','A','T','A','['}; 209 static final char [] xmlDecl = {'<','?','x','m','l'}; 210 // private static final char [] endTag = {'<','/'}; 211 // debugging 212 213 /** Debug scanner state. */ 214 private static final boolean DEBUG_SCANNER_STATE = false; 215 216 /** Debug driver. */ 217 private static final boolean DEBUG_DISPATCHER = false; 218 219 /** Debug content driver scanning. */ 220 protected static final boolean DEBUG_START_END_ELEMENT = false; 221 222 223 /** Debug driver next */ 224 protected static final boolean DEBUG_NEXT = false ; 225 226 /** Debug driver next */ 227 protected static final boolean DEBUG = false; 228 protected static final boolean DEBUG_COALESCE = false; 229 // 230 // Data 231 // 232 233 // protected data 234 235 /** Document handler. */ 236 protected XMLDocumentHandler fDocumentHandler; 237 protected int fScannerLastState ; 238 239 /** Entity Storage */ 240 protected XMLEntityStorage fEntityStore; 241 242 /** Entity stack. */ 243 protected int[] fEntityStack = new int[4]; 244 245 /** Markup depth. */ 246 protected int fMarkupDepth; 247 248 //is the element empty 249 protected boolean fEmptyElement ; 250 251 //track if we are reading attributes, this is usefule while 252 //there is a callback 253 protected boolean fReadingAttributes = false; 254 255 /** Scanner state. */ 256 protected int fScannerState; 257 258 /** SubScanner state: inside scanContent method. */ 259 protected boolean fInScanContent = false; 260 protected boolean fLastSectionWasCData = false; 261 protected boolean fLastSectionWasEntityReference = false; 262 protected boolean fLastSectionWasCharacterData = false; 263 264 /** has external dtd */ 265 protected boolean fHasExternalDTD; 266 267 /** Standalone. */ 268 protected boolean fStandaloneSet; 269 protected boolean fStandalone; 270 protected String fVersion; 271 272 // element information 273 274 /** Current element. */ 275 protected QName fCurrentElement; 276 277 /** Element stack. */ 278 protected ElementStack fElementStack = new ElementStack(); 279 protected ElementStack2 fElementStack2 = new ElementStack2(); 280 281 // other info 282 283 /** Document system identifier. 284 * REVISIT: So what's this used for? - NG 285 * protected String fDocumentSystemId; 286 ******/ 287 288 protected String fPITarget ; 289 290 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 291 protected XMLString fPIData = new XMLString(); 292 293 // features 294 295 296 /** Notify built-in references. */ 297 protected boolean fNotifyBuiltInRefs = false; 298 299 //STAX related properties 300 //defaultValues. 301 protected boolean fSupportDTD = true; 302 protected boolean fReplaceEntityReferences = true; 303 protected boolean fSupportExternalEntities = false; 304 protected boolean fReportCdataEvent = false ; 305 protected boolean fIsCoalesce = false ; 306 protected String fDeclaredEncoding = null; 307 /** Xerces Feature: Disallow doctype declaration. */ 308 protected boolean fDisallowDoctype = false; 309 310 /** 311 * comma-delimited list of protocols that are allowed for the purpose 312 * of accessing external dtd or entity references 313 */ 314 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 315 316 /** 317 * standard uri conformant (strict uri). 318 * http://apache.org/xml/features/standard-uri-conformant 319 */ 320 protected boolean fStrictURI; 321 322 // drivers 323 324 /** Active driver. */ 325 protected Driver fDriver; 326 327 /** Content driver. */ 328 protected Driver fContentDriver = createContentDriver(); 329 330 // temporary variables 331 332 /** Element QName. */ 333 protected QName fElementQName = new QName(); 334 335 /** Attribute QName. */ 336 protected QName fAttributeQName = new QName(); 337 338 /** 339 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 340 * implements Iterator interface so we can directly give Attributes in the form of 341 * iterator. 342 */ 343 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 344 345 346 /** String. */ 347 protected XMLString fTempString = new XMLString(); 348 349 /** String. */ 350 protected XMLString fTempString2 = new XMLString(); 351 352 /** Array of 3 strings. */ 353 private String[] fStrings = new String[3]; 354 355 /** Making the buffer accesible to derived class -- String buffer. */ 356 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 357 358 /** Making the buffer accesible to derived class -- String buffer. */ 359 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 360 361 /** stores character data. */ 362 /** Making the buffer accesible to derived class -- stores PI data */ 363 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 364 365 /** Single character array. */ 366 private final char[] fSingleChar = new char[1]; 367 private String fCurrentEntityName = null; 368 369 // New members 370 protected boolean fScanToEnd = false; 371 372 protected DTDGrammarUtil dtdGrammarUtil= null; 373 374 protected boolean fAddDefaultAttr = false; 375 376 protected boolean foundBuiltInRefs = false; 377 378 379 //skip element algorithm 380 static final short MAX_DEPTH_LIMIT = 5 ; 381 static final short ELEMENT_ARRAY_LENGTH = 200 ; 382 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 383 static final boolean DEBUG_SKIP_ALGORITHM = false; 384 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 385 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 386 //pointer location where last element was skipped 387 short fLastPointerLocation = 0 ; 388 short fElementPointer = 0 ; 389 //2D array to store pointer info 390 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 391 protected String fElementRawname ; 392 protected boolean fShouldSkip = false; 393 protected boolean fAdd = false ; 394 protected boolean fSkip = false; 395 396 /** Reusable Augmentations. */ 397 private Augmentations fTempAugmentations = null; 398 // 399 // Constructors 400 // 401 402 /** Default constructor. */ XMLDocumentFragmentScannerImpl()403 public XMLDocumentFragmentScannerImpl() { 404 } // <init>() 405 406 // 407 // XMLDocumentScanner methods 408 // 409 410 /** 411 * Sets the input source. 412 * 413 * @param inputSource The input source. 414 * 415 * @throws IOException Thrown on i/o error. 416 */ setInputSource(XMLInputSource inputSource)417 public void setInputSource(XMLInputSource inputSource) throws IOException { 418 fEntityManager.setEntityHandler(this); 419 fEntityManager.startEntity(false, "$fragment$", inputSource, false, true); 420 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 421 } // setInputSource(XMLInputSource) 422 423 /** 424 * Scans a document. 425 * 426 * @param complete True if the scanner should scan the document 427 * completely, pushing all events to the registered 428 * document handler. A value of false indicates that 429 * that the scanner should only scan the next portion 430 * of the document and return. A scanner instance is 431 * permitted to completely scan a document if it does 432 * not support this "pull" scanning model. 433 * 434 * @return True if there is more to scan, false otherwise. 435 */ scanDocument(boolean complete)436 public boolean scanDocument(boolean complete) 437 throws IOException, XNIException { 438 439 // keep dispatching "events" 440 fEntityManager.setEntityHandler(this); 441 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 442 443 int event = next(); 444 do { 445 switch (event) { 446 case XMLStreamConstants.START_DOCUMENT : 447 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 448 break; 449 case XMLStreamConstants.START_ELEMENT : 450 //System.out.println(" in scann element"); 451 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 452 break; 453 case XMLStreamConstants.CHARACTERS : 454 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 455 fDocumentHandler.characters(getCharacterData(),null); 456 break; 457 case XMLStreamConstants.SPACE: 458 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 459 //System.out.println("in the space"); 460 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 461 break; 462 case XMLStreamConstants.ENTITY_REFERENCE : 463 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 464 //entity reference callback are given in startEntity 465 break; 466 case XMLStreamConstants.PROCESSING_INSTRUCTION : 467 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 468 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 469 break; 470 case XMLStreamConstants.COMMENT : 471 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 472 fDocumentHandler.comment(getCharacterData(),null); 473 break; 474 case XMLStreamConstants.DTD : 475 //all DTD related callbacks are handled in DTDScanner. 476 //1. Stax doesn't define DTD states as it does for XML Document. 477 //therefore we don't need to take care of anything here. So Just break; 478 break; 479 case XMLStreamConstants.CDATA: 480 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 481 fDocumentHandler.startCDATA(null); 482 //xxx: check if CDATA values comes from getCharacterData() function 483 fDocumentHandler.characters(getCharacterData(),null); 484 fDocumentHandler.endCDATA(null); 485 //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl"); 486 break; 487 case XMLStreamConstants.NOTATION_DECLARATION : 488 break; 489 case XMLStreamConstants.ENTITY_DECLARATION : 490 break; 491 case XMLStreamConstants.NAMESPACE : 492 break; 493 case XMLStreamConstants.ATTRIBUTE : 494 break; 495 case XMLStreamConstants.END_ELEMENT : 496 //do not give callback here. 497 //this callback is given in scanEndElement function. 498 //fDocumentHandler.endElement(getElementQName(),null); 499 break; 500 default : 501 throw new InternalError("processing event: " + event); 502 503 } 504 //System.out.println("here in before calling next"); 505 event = next(); 506 //System.out.println("here in after calling next"); 507 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 508 509 if(event == XMLStreamConstants.END_DOCUMENT) { 510 fDocumentHandler.endDocument(null); 511 return false; 512 } 513 514 return true; 515 516 } // scanDocument(boolean):boolean 517 518 519 getElementQName()520 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 521 if(fScannerLastState == XMLEvent.END_ELEMENT){ 522 fElementQName.setValues(fElementStack.getLastPoppedElement()); 523 } 524 return fElementQName ; 525 } 526 527 /** return the next state on the input 528 * @return int 529 */ 530 next()531 public int next() throws IOException, XNIException { 532 return fDriver.next(); 533 } 534 535 // 536 // XMLComponent methods 537 // 538 539 /** 540 * Resets the component. The component can query the component manager 541 * about any features and properties that affect the operation of the 542 * component. 543 * 544 * @param componentManager The component manager. 545 * 546 * @throws SAXException Thrown by component on initialization error. 547 * For example, if a feature or property is 548 * required for the operation of the component, the 549 * component manager may throw a 550 * SAXNotRecognizedException or a 551 * SAXNotSupportedException. 552 */ 553 reset(XMLComponentManager componentManager)554 public void reset(XMLComponentManager componentManager) 555 throws XMLConfigurationException { 556 557 super.reset(componentManager); 558 559 // other settings 560 // fDocumentSystemId = null; 561 562 // sax features 563 //fAttributes.setNamespaces(fNamespaces); 564 565 // xerces features 566 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 567 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 568 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 569 570 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 571 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 572 (ExternalSubsetResolver) resolver : null; 573 574 //attribute 575 fReadingAttributes = false; 576 //xxx: external entities are supported in Xerces 577 // it would be good to define feature for this case 578 fSupportExternalEntities = true; 579 fReplaceEntityReferences = true; 580 fIsCoalesce = false; 581 582 // setup Driver 583 setScannerState(SCANNER_STATE_CONTENT); 584 setDriver(fContentDriver); 585 586 // JAXP 1.5 features and properties 587 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 588 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 589 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 590 591 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 592 593 resetCommon(); 594 //fEntityManager.test(); 595 } // reset(XMLComponentManager) 596 597 reset(PropertyManager propertyManager)598 public void reset(PropertyManager propertyManager){ 599 600 super.reset(propertyManager); 601 602 // other settings 603 // fDocumentSystemId = null; 604 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue(); 605 fNotifyBuiltInRefs = false ; 606 607 //fElementStack2.clear(); 608 //fReplaceEntityReferences = true; 609 //fSupportExternalEntities = true; 610 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); 611 fReplaceEntityReferences = bo.booleanValue(); 612 bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); 613 fSupportExternalEntities = bo.booleanValue(); 614 Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 615 if(cdata != null) 616 fReportCdataEvent = cdata.booleanValue() ; 617 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 618 if(coalesce != null) 619 fIsCoalesce = coalesce.booleanValue(); 620 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 621 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 622 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 623 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 624 // setup Driver 625 //we dont need to do this -- nb. 626 //setScannerState(SCANNER_STATE_CONTENT); 627 //setDriver(fContentDriver); 628 //fEntityManager.test(); 629 630 // JAXP 1.5 features and properties 631 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 632 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 633 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 634 635 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 636 resetCommon(); 637 } // reset(XMLComponentManager) 638 resetCommon()639 void resetCommon() { 640 // initialize vars 641 fMarkupDepth = 0; 642 fCurrentElement = null; 643 fElementStack.clear(); 644 fHasExternalDTD = false; 645 fStandaloneSet = false; 646 fStandalone = false; 647 fInScanContent = false; 648 //skipping algorithm 649 fShouldSkip = false; 650 fAdd = false; 651 fSkip = false; 652 653 fEntityStore = fEntityManager.getEntityStore(); 654 dtdGrammarUtil = null; 655 656 if (fSecurityManager != null) { 657 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 658 fXMLNameLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.MAX_NAME_LIMIT); 659 } else { 660 fElementAttributeLimit = 0; 661 fXMLNameLimit = XMLSecurityManager.Limit.MAX_NAME_LIMIT.defaultValue(); 662 } 663 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 664 } 665 666 /** 667 * Returns a list of feature identifiers that are recognized by 668 * this component. This method may return null if no features 669 * are recognized by this component. 670 */ getRecognizedFeatures()671 public String[] getRecognizedFeatures() { 672 return (String[])(RECOGNIZED_FEATURES.clone()); 673 } // getRecognizedFeatures():String[] 674 675 /** 676 * Sets the state of a feature. This method is called by the component 677 * manager any time after reset when a feature changes state. 678 * <p> 679 * <strong>Note:</strong> Components should silently ignore features 680 * that do not affect the operation of the component. 681 * 682 * @param featureId The feature identifier. 683 * @param state The state of the feature. 684 * 685 * @throws SAXNotRecognizedException The component should not throw 686 * this exception. 687 * @throws SAXNotSupportedException The component should not throw 688 * this exception. 689 */ setFeature(String featureId, boolean state)690 public void setFeature(String featureId, boolean state) 691 throws XMLConfigurationException { 692 693 super.setFeature(featureId, state); 694 695 // Xerces properties 696 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 697 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 698 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 699 fNotifyBuiltInRefs = state; 700 } 701 } 702 703 } // setFeature(String,boolean) 704 705 /** 706 * Returns a list of property identifiers that are recognized by 707 * this component. This method may return null if no properties 708 * are recognized by this component. 709 */ getRecognizedProperties()710 public String[] getRecognizedProperties() { 711 return (String[])(RECOGNIZED_PROPERTIES.clone()); 712 } // getRecognizedProperties():String[] 713 714 /** 715 * Sets the value of a property. This method is called by the component 716 * manager any time after reset when a property changes value. 717 * <p> 718 * <strong>Note:</strong> Components should silently ignore properties 719 * that do not affect the operation of the component. 720 * 721 * @param propertyId The property identifier. 722 * @param value The value of the property. 723 * 724 * @throws SAXNotRecognizedException The component should not throw 725 * this exception. 726 * @throws SAXNotSupportedException The component should not throw 727 * this exception. 728 */ setProperty(String propertyId, Object value)729 public void setProperty(String propertyId, Object value) 730 throws XMLConfigurationException { 731 732 super.setProperty(propertyId, value); 733 734 // Xerces properties 735 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 736 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 737 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 738 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 739 fEntityManager = (XMLEntityManager)value; 740 return; 741 } 742 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 743 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 744 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 745 (ExternalSubsetResolver) value : null; 746 return; 747 } 748 } 749 750 751 // Xerces properties 752 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 753 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 754 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 755 fEntityManager = (XMLEntityManager)value; 756 } 757 return; 758 } 759 760 //JAXP 1.5 properties 761 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 762 { 763 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 764 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 765 } 766 767 } // setProperty(String,Object) 768 769 /** 770 * Returns the default state for a feature, or null if this 771 * component does not want to report a default value for this 772 * feature. 773 * 774 * @param featureId The feature identifier. 775 * 776 * @since Xerces 2.2.0 777 */ getFeatureDefault(String featureId)778 public Boolean getFeatureDefault(String featureId) { 779 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 780 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 781 return FEATURE_DEFAULTS[i]; 782 } 783 } 784 return null; 785 } // getFeatureDefault(String):Boolean 786 787 /** 788 * Returns the default state for a property, or null if this 789 * component does not want to report a default value for this 790 * property. 791 * 792 * @param propertyId The property identifier. 793 * 794 * @since Xerces 2.2.0 795 */ getPropertyDefault(String propertyId)796 public Object getPropertyDefault(String propertyId) { 797 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 798 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 799 return PROPERTY_DEFAULTS[i]; 800 } 801 } 802 return null; 803 } // getPropertyDefault(String):Object 804 805 // 806 // XMLDocumentSource methods 807 // 808 809 /** 810 * setDocumentHandler 811 * 812 * @param documentHandler 813 */ setDocumentHandler(XMLDocumentHandler documentHandler)814 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 815 fDocumentHandler = documentHandler; 816 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 817 } // setDocumentHandler(XMLDocumentHandler) 818 819 820 /** Returns the document handler */ getDocumentHandler()821 public XMLDocumentHandler getDocumentHandler(){ 822 return fDocumentHandler; 823 } 824 825 // 826 // XMLEntityHandler methods 827 // 828 829 /** 830 * This method notifies of the start of an entity. The DTD has the 831 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 832 * general entities are just specified by their name. 833 * 834 * @param name The name of the entity. 835 * @param identifier The resource identifier. 836 * @param encoding The auto-detected IANA encoding name of the entity 837 * stream. This value will be null in those situations 838 * where the entity encoding is not auto-detected (e.g. 839 * internal entities or a document entity that is 840 * parsed from a java.io.Reader). 841 * @param augs Additional information that may include infoset augmentations 842 * 843 * @throws XNIException Thrown by handler to signal an error. 844 */ startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs)845 public void startEntity(String name, 846 XMLResourceIdentifier identifier, 847 String encoding, Augmentations augs) throws XNIException { 848 849 // keep track of this entity before fEntityDepth is increased 850 if (fEntityDepth == fEntityStack.length) { 851 int[] entityarray = new int[fEntityStack.length * 2]; 852 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 853 fEntityStack = entityarray; 854 } 855 fEntityStack[fEntityDepth] = fMarkupDepth; 856 857 super.startEntity(name, identifier, encoding, augs); 858 859 // WFC: entity declared in external subset in standalone doc 860 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 861 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 862 new Object[]{name}); 863 } 864 865 /** we are not calling the handlers yet.. */ 866 // call handler 867 if (fDocumentHandler != null && !fScanningAttribute) { 868 if (!name.equals("[xml]")) { 869 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 870 } 871 } 872 873 } // startEntity(String,XMLResourceIdentifier,String) 874 875 /** 876 * This method notifies the end of an entity. The DTD has the pseudo-name 877 * of "[dtd]" parameter entity names start with '%'; and general entities 878 * are just specified by their name. 879 * 880 * @param name The name of the entity. 881 * @param augs Additional information that may include infoset augmentations 882 * 883 * @throws XNIException Thrown by handler to signal an error. 884 */ endEntity(String name, Augmentations augs)885 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 886 887 /** 888 * // flush possible pending output buffer - see scanContent 889 * if (fInScanContent && fStringBuffer.length != 0 890 * && fDocumentHandler != null) { 891 * fDocumentHandler.characters(fStringBuffer, null); 892 * fStringBuffer.length = 0; // make sure we know it's been flushed 893 * } 894 */ 895 super.endEntity(name, augs); 896 897 // make sure markup is properly balanced 898 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 899 reportFatalError("MarkupEntityMismatch", null); 900 } 901 902 /**/ 903 // call handler 904 if (fDocumentHandler != null && !fScanningAttribute) { 905 if (!name.equals("[xml]")) { 906 fDocumentHandler.endGeneralEntity(name, augs); 907 } 908 } 909 910 911 } // endEntity(String) 912 913 // 914 // Protected methods 915 // 916 917 // Driver factory methods 918 919 /** Creates a content Driver. */ createContentDriver()920 protected Driver createContentDriver() { 921 return new FragmentContentDriver(); 922 } // createContentDriver():Driver 923 924 // scanning methods 925 926 /** 927 * Scans an XML or text declaration. 928 * <p> 929 * <pre> 930 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 931 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 932 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 933 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 934 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 935 * | ('"' ('yes' | 'no') '"')) 936 * 937 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 938 * </pre> 939 * 940 * @param scanningTextDecl True if a text declaration is to 941 * be scanned instead of an XML 942 * declaration. 943 */ scanXMLDeclOrTextDecl(boolean scanningTextDecl)944 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 945 throws IOException, XNIException { 946 947 // scan decl 948 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 949 fMarkupDepth--; 950 951 // pseudo-attribute values 952 String version = fStrings[0]; 953 String encoding = fStrings[1]; 954 String standalone = fStrings[2]; 955 fDeclaredEncoding = encoding; 956 // set standalone 957 fStandaloneSet = standalone != null; 958 fStandalone = fStandaloneSet && standalone.equals("yes"); 959 ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information 960 //but this information is only related with Document Entity. 961 fEntityManager.setStandalone(fStandalone); 962 963 964 // call handler 965 if (fDocumentHandler != null) { 966 if (scanningTextDecl) { 967 fDocumentHandler.textDecl(version, encoding, null); 968 } else { 969 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 970 } 971 } 972 973 if(version != null){ 974 fEntityScanner.setVersion(version); 975 fEntityScanner.setXMLVersion(version); 976 } 977 // set encoding on reader, only if encoding was not specified by the application explicitly 978 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 979 fEntityScanner.setEncoding(encoding); 980 } 981 982 } // scanXMLDeclOrTextDecl(boolean) 983 getPITarget()984 public String getPITarget(){ 985 return fPITarget ; 986 } 987 getPIData()988 public XMLStringBuffer getPIData(){ 989 return fContentBuffer ; 990 } 991 992 //XXX: why not this function behave as per the state of the parser? getCharacterData()993 public XMLString getCharacterData(){ 994 if(fUsebuffer){ 995 return fContentBuffer ; 996 }else{ 997 return fTempString; 998 } 999 1000 } 1001 1002 1003 /** 1004 * Scans a processing data. This is needed to handle the situation 1005 * where a document starts with a processing instruction whose 1006 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1007 * 1008 * @param target The PI target 1009 * @param data The XMLStringBuffer to fill in with the data 1010 */ scanPIData(String target, XMLStringBuffer data)1011 protected void scanPIData(String target, XMLStringBuffer data) 1012 throws IOException, XNIException { 1013 1014 super.scanPIData(target, data); 1015 1016 //set the PI target and values 1017 fPITarget = target ; 1018 1019 fMarkupDepth--; 1020 1021 } // scanPIData(String) 1022 1023 /** 1024 * Scans a comment. 1025 * <p> 1026 * <pre> 1027 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1028 * </pre> 1029 * <p> 1030 * <strong>Note:</strong> Called after scanning past '<!--' 1031 */ scanComment()1032 protected void scanComment() throws IOException, XNIException { 1033 fContentBuffer.clear(); 1034 scanComment(fContentBuffer); 1035 //getTextCharacters can also be called for reading comments 1036 fUsebuffer = true; 1037 fMarkupDepth--; 1038 1039 } // scanComment() 1040 1041 //xxx value returned by this function may not remain valid if another event is scanned. getComment()1042 public String getComment(){ 1043 return fContentBuffer.toString(); 1044 } 1045 addElement(String rawname)1046 void addElement(String rawname){ 1047 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1048 //storing element raw name in a linear list of array 1049 fElementArray[fElementPointer] = rawname ; 1050 //storing elemnetPointer for particular element depth 1051 1052 if(DEBUG_SKIP_ALGORITHM){ 1053 StringBuffer sb = new StringBuffer() ; 1054 sb.append(" Storing element information ") ; 1055 sb.append(" fElementPointer = " + fElementPointer) ; 1056 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1057 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1058 System.out.println(sb.toString()) ; 1059 } 1060 1061 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1062 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1063 short column = storePointerForADepth(fElementPointer); 1064 if(column > 0){ 1065 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1066 //identity comparison shouldn't take much time and we can rely on this 1067 //since its guaranteed to have same object id for same string. 1068 if(rawname == fElementArray[pointer]){ 1069 fShouldSkip = true ; 1070 fLastPointerLocation = pointer ; 1071 //reset the things and return. 1072 resetPointer((short)fElementStack.fDepth , column) ; 1073 fElementArray[fElementPointer] = null ; 1074 return ; 1075 }else{ 1076 fShouldSkip = false ; 1077 } 1078 } 1079 } 1080 fElementPointer++ ; 1081 } 1082 } 1083 1084 resetPointer(short depth, short column)1085 void resetPointer(short depth, short column){ 1086 fPointerInfo[depth] [column] = (short)0; 1087 } 1088 1089 //returns column information at which pointer was stored. storePointerForADepth(short elementPointer)1090 short storePointerForADepth(short elementPointer){ 1091 short depth = (short) fElementStack.fDepth ; 1092 1093 //Stores element pointer locations at particular depth , only 4 pointer locations 1094 //are stored at particular depth for now. 1095 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1096 1097 if(canStore(depth, i)){ 1098 fPointerInfo[depth][i] = elementPointer ; 1099 if(DEBUG_SKIP_ALGORITHM){ 1100 StringBuffer sb = new StringBuffer() ; 1101 sb.append(" Pointer information ") ; 1102 sb.append(" fElementPointer = " + fElementPointer) ; 1103 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1104 sb.append(" column = " + i ) ; 1105 System.out.println(sb.toString()) ; 1106 } 1107 return i; 1108 } 1109 //else 1110 //pointer was not stored because we reached the limit 1111 } 1112 return -1 ; 1113 } 1114 canStore(short depth, short column)1115 boolean canStore(short depth, short column){ 1116 //colum = 0 , means first element at particular depth 1117 //column = 1, means second element at particular depth 1118 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1119 return fPointerInfo[depth][column] == 0 ? true : false ; 1120 } 1121 1122 getElementPointer(short depth, short column)1123 short getElementPointer(short depth, short column){ 1124 //colum = 0 , means first element at particular depth 1125 //column = 1, means second element at particular depth 1126 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1127 return fPointerInfo[depth][column] ; 1128 } 1129 1130 //this function assumes that string passed is not null and skips 1131 //the following string from the buffer this makes sure skipFromTheBuffer(String rawname)1132 boolean skipFromTheBuffer(String rawname) throws IOException{ 1133 if(fEntityScanner.skipString(rawname)){ 1134 char c = (char)fEntityScanner.peekChar() ; 1135 //If the start element was completely skipped we should encounter either ' '(space), 1136 //or '/' (in case of empty element) or '>' 1137 if( c == ' ' || c == '/' || c == '>'){ 1138 fElementRawname = rawname ; 1139 return true ; 1140 } else{ 1141 return false; 1142 } 1143 } else 1144 return false ; 1145 } 1146 skipQElement(String rawname)1147 boolean skipQElement(String rawname) throws IOException{ 1148 1149 final int c = fEntityScanner.getChar(rawname.length()); 1150 //if this character is still valid element name -- this means string can't match 1151 if(XMLChar.isName(c)){ 1152 return false; 1153 }else{ 1154 return fEntityScanner.skipString(rawname); 1155 } 1156 } 1157 skipElement()1158 protected boolean skipElement() throws IOException { 1159 1160 if(!fShouldSkip) return false ; 1161 1162 if(fLastPointerLocation != 0){ 1163 //Look at the next element stored in the array list.. we might just get a match. 1164 String rawname = fElementArray[fLastPointerLocation + 1] ; 1165 if(rawname != null && skipFromTheBuffer(rawname)){ 1166 fLastPointerLocation++ ; 1167 if(DEBUG_SKIP_ALGORITHM){ 1168 System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation); 1169 } 1170 return true ; 1171 } else{ 1172 //reset it back to zero... we haven't got the correct subset yet. 1173 fLastPointerLocation = 0 ; 1174 1175 } 1176 } 1177 //xxx: we can put some logic here as from what column it should start looking 1178 //for now we always start at 0 1179 //fallback to tolerant algorithm, it would look for differnt element stored at different 1180 //depth and get us the pointer location. 1181 return fShouldSkip && skipElement((short)0); 1182 1183 } 1184 1185 //start of the column at which it should try searching skipElement(short column)1186 boolean skipElement(short column) throws IOException { 1187 short depth = (short)fElementStack.fDepth ; 1188 1189 if(depth > MAX_DEPTH_LIMIT){ 1190 return fShouldSkip = false ; 1191 } 1192 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1193 short pointer = getElementPointer(depth , i ) ; 1194 1195 if(pointer == 0){ 1196 return fShouldSkip = false ; 1197 } 1198 1199 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1200 if(DEBUG_SKIP_ALGORITHM){ 1201 System.out.println(); 1202 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column ); 1203 System.out.println(); 1204 } 1205 fLastPointerLocation = pointer ; 1206 return fShouldSkip = true ; 1207 } 1208 } 1209 return fShouldSkip = false ; 1210 } 1211 1212 /** 1213 * Scans a start element. This method will handle the binding of 1214 * namespace information and notifying the handler of the start 1215 * of the element. 1216 * <p> 1217 * <pre> 1218 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1219 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1220 * </pre> 1221 * <p> 1222 * <strong>Note:</strong> This method assumes that the leading 1223 * '<' character has been consumed. 1224 * <p> 1225 * <strong>Note:</strong> This method uses the fElementQName and 1226 * fAttributes variables. The contents of these variables will be 1227 * destroyed. The caller should copy important information out of 1228 * these variables before calling this method. 1229 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1230 * 1231 * @return True if element is empty. (i.e. It matches 1232 * production [44]. 1233 */ 1234 // fElementQName will have the details of element just read.. 1235 // fAttributes will have the details of all the attributes. scanStartElement()1236 protected boolean scanStartElement() 1237 throws IOException, XNIException { 1238 1239 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1240 //when skipping is true and no more elements should be added 1241 if(fSkip && !fAdd){ 1242 //get the stored element -- if everything goes right this should match the 1243 //token in the buffer 1244 1245 QName name = fElementStack.getNext(); 1246 1247 if(DEBUG_SKIP_ALGORITHM){ 1248 System.out.println("Trying to skip String = " + name.rawname); 1249 } 1250 1251 //Be conservative -- if skipping fails -- stop. 1252 fSkip = fEntityScanner.skipString(name.rawname); 1253 1254 if(fSkip){ 1255 if(DEBUG_SKIP_ALGORITHM){ 1256 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1257 } 1258 fElementStack.push(); 1259 fElementQName = name; 1260 }else{ 1261 //if skipping fails reposition the stack or fallback to normal way of processing 1262 fElementStack.reposition(); 1263 if(DEBUG_SKIP_ALGORITHM){ 1264 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1265 } 1266 } 1267 } 1268 1269 //we are still at the stage of adding elements 1270 //the elements were not matched or 1271 //fSkip is not set to true 1272 if(!fSkip || fAdd){ 1273 //get the next element from the stack 1274 fElementQName = fElementStack.nextElement(); 1275 // name 1276 if (fNamespaces) { 1277 fEntityScanner.scanQName(fElementQName, NameType.ELEMENTSTART); 1278 } else { 1279 String name = fEntityScanner.scanName(NameType.ELEMENTSTART); 1280 fElementQName.setValues(null, name, name, null); 1281 } 1282 1283 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1284 if(DEBUG_SKIP_ALGORITHM){ 1285 if(fAdd){ 1286 System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount); 1287 } 1288 } 1289 1290 } 1291 1292 //when the elements are being added , we need to check if we are set for skipping the elements 1293 if(fAdd){ 1294 //this sets the value of fAdd variable 1295 fElementStack.matchElement(fElementQName); 1296 } 1297 1298 1299 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1300 fCurrentElement = fElementQName; 1301 1302 String rawname = fElementQName.rawname; 1303 1304 fEmptyElement = false; 1305 1306 fAttributes.removeAllAttributes(); 1307 1308 checkDepth(rawname); 1309 if(!seekCloseOfStartTag()){ 1310 fReadingAttributes = true; 1311 fAttributeCacheUsedCount =0; 1312 fStringBufferIndex =0; 1313 fAddDefaultAttr = true; 1314 do { 1315 scanAttribute(fAttributes); 1316 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1317 fAttributes.getLength() > fElementAttributeLimit){ 1318 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1319 "ElementAttributeLimit", 1320 new Object[]{rawname, fElementAttributeLimit }, 1321 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1322 } 1323 1324 } while (!seekCloseOfStartTag()); 1325 fReadingAttributes=false; 1326 } 1327 1328 if (fEmptyElement) { 1329 //decrease the markup depth.. 1330 fMarkupDepth--; 1331 1332 // check that this element was opened in the same entity 1333 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1334 reportFatalError("ElementEntityMismatch", 1335 new Object[]{fCurrentElement.rawname}); 1336 } 1337 // call handler 1338 if (fDocumentHandler != null) { 1339 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1340 } 1341 1342 //We should not be popping out the context here in endELement becaause the namespace context is still 1343 //valid when parser is at the endElement state. 1344 //if (fNamespaces) { 1345 // fNamespaceContext.popContext(); 1346 //} 1347 1348 //pop the element off the stack.. 1349 fElementStack.popElement(); 1350 1351 } else { 1352 1353 if(dtdGrammarUtil != null) 1354 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1355 if(fDocumentHandler != null){ 1356 //complete element and attributes are traversed in this function so we can send a callback 1357 //here. 1358 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1359 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1360 } 1361 } 1362 1363 1364 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement); 1365 return fEmptyElement; 1366 1367 } // scanStartElement():boolean 1368 1369 /** 1370 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1371 * Characters are consumed. 1372 */ seekCloseOfStartTag()1373 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1374 // spaces 1375 boolean sawSpace = fEntityScanner.skipSpaces(); 1376 1377 // end tag? 1378 final int c = fEntityScanner.peekChar(); 1379 if (c == '>') { 1380 fEntityScanner.scanChar(null); 1381 return true; 1382 } else if (c == '/') { 1383 fEntityScanner.scanChar(null); 1384 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1385 reportFatalError("ElementUnterminated", 1386 new Object[]{fElementQName.rawname}); 1387 } 1388 fEmptyElement = true; 1389 return true; 1390 } else if (!isValidNameStartChar(c) || !sawSpace) { 1391 // Second chance. Check if this character is a high 1392 // surrogate of a valid name start character. 1393 if (!isValidNameStartHighSurrogate(c) || !sawSpace) { 1394 reportFatalError("ElementUnterminated", 1395 new Object[]{fElementQName.rawname}); 1396 } 1397 } 1398 1399 return false; 1400 } 1401 hasAttributes()1402 public boolean hasAttributes(){ 1403 return fAttributes.getLength() > 0 ? true : false ; 1404 } 1405 1406 1407 /** 1408 * Scans an attribute. 1409 * <p> 1410 * <pre> 1411 * [41] Attribute ::= Name Eq AttValue 1412 * </pre> 1413 * <p> 1414 * <strong>Note:</strong> This method assumes that the next 1415 * character on the stream is the first character of the attribute 1416 * name. 1417 * <p> 1418 * <strong>Note:</strong> This method uses the fAttributeQName and 1419 * fQName variables. The contents of these variables will be 1420 * destroyed. 1421 * 1422 * @param attributes The attributes list for the scanned attribute. 1423 */ 1424 1425 /** 1426 * protected void scanAttribute(AttributeIteratorImpl attributes) 1427 * throws IOException, XNIException { 1428 * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()"); 1429 * 1430 * 1431 * // name 1432 * if (fNamespaces) { 1433 * fEntityScanner.scanQName(fAttributeQName); 1434 * } 1435 * else { 1436 * String name = fEntityScanner.scanName(); 1437 * fAttributeQName.setValues(null, name, name, null); 1438 * } 1439 * 1440 * // equals 1441 * fEntityScanner.skipSpaces(); 1442 * if (!fEntityScanner.skipChar('=')) { 1443 * reportFatalError("EqRequiredInAttribute", 1444 * new Object[]{fAttributeQName.rawname}); 1445 * } 1446 * fEntityScanner.skipSpaces(); 1447 * 1448 * 1449 * // content 1450 * int oldLen = attributes.getLength(); 1451 */ 1452 /**xxx there is one check of duplicate attribute that has been removed. 1453 * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1454 * 1455 * // WFC: Unique Att Spec 1456 * if (oldLen == attributes.getLength()) { 1457 * reportFatalError("AttributeNotUnique", 1458 * new Object[]{fCurrentElement.rawname, 1459 * fAttributeQName.rawname}); 1460 * } 1461 */ 1462 1463 /* 1464 //REVISIT: one more case needs to be included: external PE and standalone is no 1465 boolean isVC = fHasExternalDTD && !fStandalone; 1466 scanAttributeValue(fTempString, fTempString2, 1467 fAttributeQName.rawname, attributes, 1468 oldLen, isVC); 1469 1470 //attributes.setValue(oldLen, fTempString.toString()); 1471 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1472 //attributes.setSpecified(oldLen, true); 1473 1474 AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true); 1475 fAttributes.addAttribute(attribute); 1476 if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()"); 1477 } // scanAttribute(XMLAttributes) 1478 1479 */ 1480 1481 /** return the attribute iterator implementation */ getAttributeIterator()1482 public XMLAttributesIteratorImpl getAttributeIterator(){ 1483 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1484 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1485 fAddDefaultAttr = false; 1486 } 1487 return fAttributes; 1488 } 1489 1490 /** return if standalone is set */ standaloneSet()1491 public boolean standaloneSet(){ 1492 return fStandaloneSet; 1493 } 1494 /** return if the doucment is standalone */ isStandAlone()1495 public boolean isStandAlone(){ 1496 return fStandalone ; 1497 } 1498 /** 1499 * Scans an attribute name value pair. 1500 * <p> 1501 * <pre> 1502 * [41] Attribute ::= Name Eq AttValue 1503 * </pre> 1504 * <p> 1505 * <strong>Note:</strong> This method assumes that the next 1506 * character on the stream is the first character of the attribute 1507 * name. 1508 * <p> 1509 * <strong>Note:</strong> This method uses the fAttributeQName and 1510 * fQName variables. The contents of these variables will be 1511 * destroyed. 1512 * 1513 * @param attributes The attributes list for the scanned attribute. 1514 */ 1515 scanAttribute(XMLAttributes attributes)1516 protected void scanAttribute(XMLAttributes attributes) 1517 throws IOException, XNIException { 1518 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1519 1520 // name 1521 if (fNamespaces) { 1522 fEntityScanner.scanQName(fAttributeQName, NameType.ATTRIBUTENAME); 1523 } else { 1524 String name = fEntityScanner.scanName(NameType.ATTRIBUTENAME); 1525 fAttributeQName.setValues(null, name, name, null); 1526 } 1527 1528 // equals 1529 fEntityScanner.skipSpaces(); 1530 if (!fEntityScanner.skipChar('=', NameType.ATTRIBUTE)) { 1531 reportFatalError("EqRequiredInAttribute", 1532 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1533 } 1534 fEntityScanner.skipSpaces(); 1535 1536 int attIndex = 0 ; 1537 //REVISIT: one more case needs to be included: external PE and standalone is no 1538 boolean isVC = fHasExternalDTD && !fStandalone; 1539 //fTempString would store attribute value 1540 ///fTempString2 would store attribute non-normalized value 1541 1542 //this function doesn't use 'attIndex'. We are adding the attribute later 1543 //after we have figured out that current attribute is not namespace declaration 1544 //since scanAttributeValue doesn't use attIndex parameter therefore we 1545 //can safely add the attribute later.. 1546 XMLString tmpStr = getString(); 1547 1548 scanAttributeValue(tmpStr, fTempString2, fAttributeQName.rawname, attributes, 1549 attIndex, isVC, fCurrentElement.rawname, false); 1550 1551 // content 1552 int oldLen = attributes.getLength(); 1553 //if the attribute name already exists.. new value is replaced with old value 1554 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1555 1556 // WFC: Unique Att Spec 1557 //attributes count will be same if the current attribute name already exists for this element name. 1558 //this means there are two duplicate attributes. 1559 if (oldLen == attributes.getLength()) { 1560 reportFatalError("AttributeNotUnique", 1561 new Object[]{fCurrentElement.rawname, 1562 fAttributeQName.rawname}); 1563 } 1564 1565 //tmpString contains attribute value 1566 //we are passing null as the attribute value 1567 attributes.setValue(attIndex, null, tmpStr); 1568 1569 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1570 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1571 attributes.setSpecified(attIndex, true); 1572 1573 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1574 1575 } // scanAttribute(XMLAttributes) 1576 1577 /** 1578 * Scans element content. 1579 * 1580 * @return Returns the next character on the stream. 1581 */ 1582 //CHANGED: 1583 //EARLIER: scanContent() 1584 //NOW: scanContent(XMLStringBuffer) 1585 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1586 //this function appends the data to the buffer. scanContent(XMLStringBuffer content)1587 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1588 //set the fTempString length to 0 before passing it on to scanContent 1589 //scanContent sets the correct co-ordinates as per the content read 1590 fTempString.length = 0; 1591 int c = fEntityScanner.scanContent(fTempString); 1592 content.append(fTempString); 1593 fTempString.length = 0; 1594 if (c == '\r') { 1595 // happens when there is the character reference 1596 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1597 fEntityScanner.scanChar(null); 1598 content.append((char)c); 1599 c = -1; 1600 } else if (c == ']') { 1601 //fStringBuffer.clear(); 1602 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1603 content.append((char)fEntityScanner.scanChar(null)); 1604 // remember where we are in case we get an endEntity before we 1605 // could flush the buffer out - this happens when we're parsing an 1606 // entity which ends with a ] 1607 fInScanContent = true; 1608 // 1609 // We work on a single character basis to handle cases such as: 1610 // ']]]>' which we might otherwise miss. 1611 // 1612 if (fEntityScanner.skipChar(']', null)) { 1613 content.append(']'); 1614 while (fEntityScanner.skipChar(']', null)) { 1615 content.append(']'); 1616 } 1617 if (fEntityScanner.skipChar('>', null)) { 1618 reportFatalError("CDEndInContent", null); 1619 } 1620 } 1621 fInScanContent = false; 1622 c = -1; 1623 } 1624 if (fDocumentHandler != null && content.length > 0) { 1625 //fDocumentHandler.characters(content, null); 1626 } 1627 return c; 1628 1629 } // scanContent():int 1630 1631 1632 /** 1633 * Scans a CDATA section. 1634 * <p> 1635 * <strong>Note:</strong> This method uses the fTempString and 1636 * fStringBuffer variables. 1637 * 1638 * @param complete True if the CDATA section is to be scanned 1639 * completely. 1640 * 1641 * @return True if CDATA is completely scanned. 1642 */ 1643 //CHANGED: scanCDATASection(XMLStringBuffer contentBuffer, boolean complete)1644 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1645 throws IOException, XNIException { 1646 1647 // call handler 1648 if (fDocumentHandler != null) { 1649 //fDocumentHandler.startCDATA(null); 1650 } 1651 1652 while (true) { 1653 //scanData will fill the contentBuffer 1654 if (!fEntityScanner.scanData("]]>", contentBuffer)) { 1655 break ; 1656 /** We dont need all this code if we pass ']]>' as delimeter.. 1657 * int brackets = 2; 1658 * while (fEntityScanner.skipChar(']')) { 1659 * brackets++; 1660 * } 1661 * 1662 * //When we find more than 2 square brackets 1663 * if (fDocumentHandler != null && brackets > 2) { 1664 * //we dont need to clear the buffer.. 1665 * //contentBuffer.clear(); 1666 * for (int i = 2; i < brackets; i++) { 1667 * contentBuffer.append(']'); 1668 * } 1669 * fDocumentHandler.characters(contentBuffer, null); 1670 * } 1671 * 1672 * if (fEntityScanner.skipChar('>')) { 1673 * break; 1674 * } 1675 * if (fDocumentHandler != null) { 1676 * //we dont need to clear the buffer now.. 1677 * //contentBuffer.clear(); 1678 * contentBuffer.append("]]"); 1679 * fDocumentHandler.characters(contentBuffer, null); 1680 * } 1681 **/ 1682 } else { 1683 int c = fEntityScanner.peekChar(); 1684 if (c != -1 && isInvalidLiteral(c)) { 1685 if (XMLChar.isHighSurrogate(c)) { 1686 //contentBuffer.clear(); 1687 //scan surrogates if any.... 1688 scanSurrogates(contentBuffer); 1689 } else { 1690 reportFatalError("InvalidCharInCDSect", 1691 new Object[]{Integer.toString(c,16)}); 1692 fEntityScanner.scanChar(null); 1693 } 1694 } 1695 //by this time we have also read surrogate contents if any... 1696 if (fDocumentHandler != null) { 1697 //fDocumentHandler.characters(contentBuffer, null); 1698 } 1699 } 1700 } 1701 fMarkupDepth--; 1702 1703 if (fDocumentHandler != null && contentBuffer.length > 0) { 1704 //fDocumentHandler.characters(contentBuffer, null); 1705 } 1706 1707 // call handler 1708 if (fDocumentHandler != null) { 1709 //fDocumentHandler.endCDATA(null); 1710 } 1711 1712 return true; 1713 1714 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1715 1716 /** 1717 * Scans an end element. 1718 * <p> 1719 * <pre> 1720 * [42] ETag ::= '</' Name S? '>' 1721 * </pre> 1722 * <p> 1723 * <strong>Note:</strong> This method uses the fElementQName variable. 1724 * The contents of this variable will be destroyed. The caller should 1725 * copy the needed information out of this variable before calling 1726 * this method. 1727 * 1728 * @return The element depth. 1729 */ scanEndElement()1730 protected int scanEndElement() throws IOException, XNIException { 1731 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1732 1733 // pop context 1734 QName endElementName = fElementStack.popElement(); 1735 1736 String rawname = endElementName.rawname; 1737 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1738 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1739 //In scanners most of the time is consumed on checks done for XML characters, we can 1740 // optimize on it and avoid the checks done for endElement, 1741 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1742 1743 // this should work both for namespace processing true or false... 1744 1745 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1746 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1747 1748 if (!fEntityScanner.skipString(endElementName.rawname)) { 1749 reportFatalError("ETagRequired", new Object[]{rawname}); 1750 } 1751 1752 // end 1753 fEntityScanner.skipSpaces(); 1754 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1755 reportFatalError("ETagUnterminated", 1756 new Object[]{rawname}); 1757 } 1758 fMarkupDepth--; 1759 1760 //we have increased the depth for two markup "<" characters 1761 fMarkupDepth--; 1762 1763 // check that this element was opened in the same entity 1764 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1765 reportFatalError("ElementEntityMismatch", 1766 new Object[]{rawname}); 1767 } 1768 1769 //We should not be popping out the context here in endELement becaause the namespace context is still 1770 //valid when parser is at the endElement state. 1771 1772 //if (fNamespaces) { 1773 // fNamespaceContext.popContext(); 1774 //} 1775 1776 // call handler 1777 if (fDocumentHandler != null ) { 1778 //end element is scanned in this function so we can send a callback 1779 //here. 1780 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1781 1782 fDocumentHandler.endElement(endElementName, null); 1783 } 1784 if(dtdGrammarUtil != null) 1785 dtdGrammarUtil.endElement(endElementName); 1786 1787 return fMarkupDepth; 1788 1789 } // scanEndElement():int 1790 1791 /** 1792 * Scans a character reference. 1793 * <p> 1794 * <pre> 1795 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1796 * </pre> 1797 */ scanCharReference()1798 protected void scanCharReference() 1799 throws IOException, XNIException { 1800 1801 fStringBuffer2.clear(); 1802 int ch = scanCharReferenceValue(fStringBuffer2, null); 1803 fMarkupDepth--; 1804 if (ch != -1) { 1805 // call handler 1806 1807 if (fDocumentHandler != null) { 1808 if (fNotifyCharRefs) { 1809 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1810 } 1811 Augmentations augs = null; 1812 if (fValidation && ch <= 0x20) { 1813 if (fTempAugmentations != null) { 1814 fTempAugmentations.removeAllItems(); 1815 } 1816 else { 1817 fTempAugmentations = new AugmentationsImpl(); 1818 } 1819 augs = fTempAugmentations; 1820 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1821 } 1822 //xxx: How do we deal with this - how to return charReferenceValues 1823 //now this is being commented because this is taken care in scanDocument() 1824 //fDocumentHandler.characters(fStringBuffer2, null); 1825 if (fNotifyCharRefs) { 1826 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1827 } 1828 } 1829 } 1830 1831 } // scanCharReference() 1832 1833 1834 /** 1835 * Scans an entity reference. 1836 * 1837 * @return returns true if the new entity is started. If it was built-in entity 1838 * 'false' is returned. 1839 * @throws IOException Thrown if i/o error occurs. 1840 * @throws XNIException Thrown if handler throws exception upon 1841 * notification. 1842 */ scanEntityReference(XMLStringBuffer content)1843 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1844 String name = fEntityScanner.scanName(NameType.REFERENCE); 1845 if (name == null) { 1846 reportFatalError("NameRequiredInReference", null); 1847 return; 1848 } 1849 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1850 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1851 } 1852 if (fEntityStore.isUnparsedEntity(name)) { 1853 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1854 } 1855 fMarkupDepth--; 1856 fCurrentEntityName = name; 1857 1858 // handle built-in entities 1859 if (name == fAmpSymbol) { 1860 handleCharacter('&', fAmpSymbol, content); 1861 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1862 return ; 1863 } else if (name == fLtSymbol) { 1864 handleCharacter('<', fLtSymbol, content); 1865 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1866 return ; 1867 } else if (name == fGtSymbol) { 1868 handleCharacter('>', fGtSymbol, content); 1869 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1870 return ; 1871 } else if (name == fQuotSymbol) { 1872 handleCharacter('"', fQuotSymbol, content); 1873 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1874 return ; 1875 } else if (name == fAposSymbol) { 1876 handleCharacter('\'', fAposSymbol, content); 1877 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1878 return ; 1879 } 1880 1881 //1. if the entity is external and support to external entities is not required 1882 // 2. or entities should not be replaced 1883 //3. or if it is built in entity reference. 1884 boolean isEE = fEntityStore.isExternalEntity(name); 1885 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1886 fScannerState = SCANNER_STATE_REFERENCE; 1887 return ; 1888 } 1889 // start general entity 1890 if (!fEntityStore.isDeclaredEntity(name)) { 1891 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1892 if (!fSupportDTD && fReplaceEntityReferences) { 1893 reportFatalError("EntityNotDeclared", new Object[]{name}); 1894 return; 1895 } 1896 //REVISIT: one more case needs to be included: external PE and standalone is no 1897 if ( fHasExternalDTD && !fStandalone) { 1898 if (fValidation) 1899 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1900 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1901 } else 1902 reportFatalError("EntityNotDeclared", new Object[]{name}); 1903 } 1904 //we are starting the entity even if the entity was not declared 1905 //if that was the case it its taken care in XMLEntityManager.startEntity() 1906 //we immediately call the endEntity. Application gets to know if there was 1907 //any entity that was not declared. 1908 fEntityManager.startEntity(true, name, false); 1909 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1910 //setScannerState(SCANNER_STATE_CONTENT); 1911 //return true ; 1912 } // scanEntityReference() 1913 1914 // utility methods 1915 1916 /** 1917 * Check if the depth exceeds the maxElementDepth limit 1918 * @param elementName name of the current element 1919 */ checkDepth(String elementName)1920 void checkDepth(String elementName) { 1921 fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); 1922 if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { 1923 fSecurityManager.debugPrint(fLimitAnalyzer); 1924 reportFatalError("MaxElementDepthLimit", new Object[]{elementName, 1925 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1926 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1927 "maxElementDepth"}); 1928 } 1929 } 1930 1931 /** 1932 * Calls document handler with a single character resulting from 1933 * built-in entity resolution. 1934 * 1935 * @param c 1936 * @param entity built-in name 1937 * @param XMLStringBuffer append the character to buffer 1938 * 1939 * we really dont need to call this function -- this function is only required when 1940 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1941 * calling this function to hanlde built-in entity reference. 1942 * 1943 */ handleCharacter(char c, String entity, XMLStringBuffer content)1944 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1945 foundBuiltInRefs = true; 1946 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1947 content.append(c); 1948 if (fDocumentHandler != null) { 1949 fSingleChar[0] = c; 1950 if (fNotifyBuiltInRefs) { 1951 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1952 } 1953 fTempString.setValues(fSingleChar, 0, 1); 1954 //fDocumentHandler.characters(fTempString, null); 1955 1956 if (fNotifyBuiltInRefs) { 1957 fDocumentHandler.endGeneralEntity(entity, null); 1958 } 1959 } 1960 } // handleCharacter(char) 1961 1962 // helper methods 1963 1964 /** 1965 * Sets the scanner state. 1966 * 1967 * @param state The new scanner state. 1968 */ setScannerState(int state)1969 protected final void setScannerState(int state) { 1970 1971 fScannerState = state; 1972 if (DEBUG_SCANNER_STATE) { 1973 System.out.print("### setScannerState: "); 1974 //System.out.print(fScannerState); 1975 System.out.print(getScannerStateName(state)); 1976 System.out.println(); 1977 } 1978 1979 } // setScannerState(int) 1980 1981 1982 /** 1983 * Sets the Driver. 1984 * 1985 * @param Driver The new Driver. 1986 */ setDriver(Driver driver)1987 protected final void setDriver(Driver driver) { 1988 fDriver = driver; 1989 if (DEBUG_DISPATCHER) { 1990 System.out.print("%%% setDriver: "); 1991 System.out.print(getDriverName(driver)); 1992 System.out.println(); 1993 } 1994 } 1995 1996 // 1997 // Private methods 1998 // 1999 2000 /** Returns the scanner state name. */ getScannerStateName(int state)2001 protected String getScannerStateName(int state) { 2002 2003 switch (state) { 2004 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 2005 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 2006 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 2007 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 2008 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 2009 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 2010 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 2011 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 2012 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 2013 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 2014 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 2015 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 2016 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 2017 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 2018 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 2019 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 2020 } 2021 2022 return "??? ("+state+')'; 2023 2024 } // getScannerStateName(int):String getEntityName()2025 public String getEntityName(){ 2026 //return the cached name 2027 return fCurrentEntityName; 2028 } 2029 2030 /** Returns the driver name. */ getDriverName(Driver driver)2031 public String getDriverName(Driver driver) { 2032 2033 if (DEBUG_DISPATCHER) { 2034 if (driver != null) { 2035 String name = driver.getClass().getName(); 2036 int index = name.lastIndexOf('.'); 2037 if (index != -1) { 2038 name = name.substring(index + 1); 2039 index = name.lastIndexOf('$'); 2040 if (index != -1) { 2041 name = name.substring(index + 1); 2042 } 2043 } 2044 return name; 2045 } 2046 } 2047 return "null"; 2048 2049 } // getDriverName():String 2050 2051 /** 2052 * Check the protocol used in the systemId against allowed protocols 2053 * 2054 * @param systemId the Id of the URI 2055 * @param allowedProtocols a list of allowed protocols separated by comma 2056 * @return the name of the protocol if rejected, null otherwise 2057 */ checkAccess(String systemId, String allowedProtocols)2058 String checkAccess(String systemId, String allowedProtocols) throws IOException { 2059 String baseSystemId = fEntityScanner.getBaseSystemId(); 2060 String expandedSystemId = XMLEntityManager.expandSystemId(systemId, baseSystemId, fStrictURI); 2061 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 2062 } 2063 2064 // 2065 // Classes 2066 // 2067 2068 /** 2069 * @author Neeraj Bajaj, Sun Microsystems. 2070 */ 2071 protected static final class Element { 2072 2073 // 2074 // Data 2075 // 2076 2077 /** Symbol. */ 2078 public QName qname; 2079 2080 //raw name stored as characters 2081 public char[] fRawname; 2082 2083 /** The next Element entry. */ 2084 public Element next; 2085 2086 // 2087 // Constructors 2088 // 2089 2090 /** 2091 * Constructs a new Element from the given QName and next Element 2092 * reference. 2093 */ Element(QName qname, Element next)2094 public Element(QName qname, Element next) { 2095 this.qname.setValues(qname); 2096 this.fRawname = qname.rawname.toCharArray(); 2097 this.next = next; 2098 } 2099 2100 } // class Element 2101 2102 /** 2103 * Element stack. 2104 * 2105 * @author Neeraj Bajaj, Sun Microsystems. 2106 */ 2107 protected class ElementStack2 { 2108 2109 // 2110 // Data 2111 // 2112 2113 /** The stack data. */ 2114 protected QName [] fQName = new QName[20]; 2115 2116 //Element depth 2117 protected int fDepth; 2118 //total number of elements 2119 protected int fCount; 2120 //current position 2121 protected int fPosition; 2122 //Mark refers to the position 2123 protected int fMark; 2124 2125 protected int fLastDepth ; 2126 2127 // 2128 // Constructors 2129 // 2130 2131 /** Default constructor. */ ElementStack2()2132 public ElementStack2() { 2133 for (int i = 0; i < fQName.length; i++) { 2134 fQName[i] = new QName(); 2135 } 2136 fMark = fPosition = 1; 2137 } // <init>() 2138 resize()2139 public void resize(){ 2140 /** 2141 * int length = fElements.length; 2142 * Element [] temp = new Element[length * 2]; 2143 * System.arraycopy(fElements, 0, temp, 0, length); 2144 * fElements = temp; 2145 */ 2146 //resize QNames 2147 int oldLength = fQName.length; 2148 QName [] tmp = new QName[oldLength * 2]; 2149 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2150 fQName = tmp; 2151 2152 for (int i = oldLength; i < fQName.length; i++) { 2153 fQName[i] = new QName(); 2154 } 2155 2156 } 2157 2158 2159 // 2160 // Public methods 2161 // 2162 2163 /** Check if the element scanned during the start element 2164 *matches the stored element. 2165 * 2166 *@return true if the match suceeds. 2167 */ matchElement(QName element)2168 public boolean matchElement(QName element) { 2169 //last depth is the depth when last elemnt was pushed 2170 //if last depth is greater than current depth 2171 if(DEBUG_SKIP_ALGORITHM){ 2172 System.out.println("fLastDepth = " + fLastDepth); 2173 System.out.println("fDepth = " + fDepth); 2174 } 2175 boolean match = false; 2176 if(fLastDepth > fDepth && fDepth <= 2){ 2177 if(DEBUG_SKIP_ALGORITHM){ 2178 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2179 } 2180 if(element.rawname == fQName[fDepth].rawname){ 2181 fAdd = false; 2182 //mark this position 2183 //decrease the depth by 1 as arrays are 0 based 2184 fMark = fDepth - 1; 2185 //we found the match and from next element skipping will start, add 1 2186 fPosition = fMark + 1 ; 2187 match = true; 2188 //Once we get match decrease the count -- this was increased by nextElement() 2189 --fCount; 2190 if(DEBUG_SKIP_ALGORITHM){ 2191 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2192 System.out.println("fMark = " + fMark); 2193 System.out.println("fPosition = " + fPosition); 2194 System.out.println("fDepth = " + fDepth); 2195 System.out.println("fCount = " + fCount); 2196 } 2197 }else{ 2198 fAdd = true; 2199 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2200 } 2201 } 2202 //store the last depth 2203 fLastDepth = fDepth++; 2204 return match; 2205 } // pushElement(QName):QName 2206 2207 /** 2208 * This function doesn't increase depth. The function in this function is 2209 *broken down into two functions for efficiency. <@see>matchElement</see>. 2210 * This function just returns the pointer to the object and its values are set. 2211 * 2212 *@return QName reference to the next element in the list 2213 */ nextElement()2214 public QName nextElement() { 2215 2216 //if number of elements becomes equal to the length of array -- stop the skipping 2217 if (fCount == fQName.length) { 2218 fShouldSkip = false; 2219 fAdd = false; 2220 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2221 //xxx: this is not correct, we are returning the last element 2222 //this wont make any difference since flag has been set to 'false' 2223 return fQName[--fCount]; 2224 } 2225 if(DEBUG_SKIP_ALGORITHM){ 2226 System.out.println("fCount = " + fCount); 2227 } 2228 return fQName[fCount++]; 2229 2230 } 2231 2232 /** Note that this function is considerably different than nextElement() 2233 * This function just returns the previously stored elements 2234 */ getNext()2235 public QName getNext(){ 2236 //when position reaches number of elements in the list.. 2237 //set the position back to mark, making it a circular linked list. 2238 if(fPosition == fCount){ 2239 fPosition = fMark; 2240 } 2241 return fQName[fPosition++]; 2242 } 2243 2244 /** returns the current depth 2245 */ popElement()2246 public int popElement(){ 2247 return fDepth--; 2248 } 2249 2250 2251 /** Clears the stack without throwing away existing QName objects. */ clear()2252 public void clear() { 2253 fLastDepth = 0; 2254 fDepth = 0; 2255 fCount = 0 ; 2256 fPosition = fMark = 1; 2257 } // clear() 2258 2259 } // class ElementStack 2260 2261 /** 2262 * Element stack. This stack operates without synchronization, error 2263 * checking, and it re-uses objects instead of throwing popped items 2264 * away. 2265 * 2266 * @author Andy Clark, IBM 2267 */ 2268 protected class ElementStack { 2269 2270 // 2271 // Data 2272 // 2273 2274 /** The stack data. */ 2275 protected QName[] fElements; 2276 protected int [] fInt = new int[20]; 2277 2278 2279 //Element depth 2280 protected int fDepth; 2281 //total number of elements 2282 protected int fCount; 2283 //current position 2284 protected int fPosition; 2285 //Mark refers to the position 2286 protected int fMark; 2287 2288 protected int fLastDepth ; 2289 2290 // 2291 // Constructors 2292 // 2293 2294 /** Default constructor. */ ElementStack()2295 public ElementStack() { 2296 fElements = new QName[20]; 2297 for (int i = 0; i < fElements.length; i++) { 2298 fElements[i] = new QName(); 2299 } 2300 } // <init>() 2301 2302 // 2303 // Public methods 2304 // 2305 2306 /** 2307 * Pushes an element on the stack. 2308 * <p> 2309 * <strong>Note:</strong> The QName values are copied into the 2310 * stack. In other words, the caller does <em>not</em> orphan 2311 * the element to the stack. Also, the QName object returned 2312 * is <em>not</em> orphaned to the caller. It should be 2313 * considered read-only. 2314 * 2315 * @param element The element to push onto the stack. 2316 * 2317 * @return Returns the actual QName object that stores the 2318 */ 2319 //XXX: THIS FUNCTION IS NOT USED pushElement(QName element)2320 public QName pushElement(QName element) { 2321 if (fDepth == fElements.length) { 2322 QName[] array = new QName[fElements.length * 2]; 2323 System.arraycopy(fElements, 0, array, 0, fDepth); 2324 fElements = array; 2325 for (int i = fDepth; i < fElements.length; i++) { 2326 fElements[i] = new QName(); 2327 } 2328 } 2329 fElements[fDepth].setValues(element); 2330 return fElements[fDepth++]; 2331 } // pushElement(QName):QName 2332 2333 2334 /** Note that this function is considerably different than nextElement() 2335 * This function just returns the previously stored elements 2336 */ getNext()2337 public QName getNext(){ 2338 //when position reaches number of elements in the list.. 2339 //set the position back to mark, making it a circular linked list. 2340 if(fPosition == fCount){ 2341 fPosition = fMark; 2342 } 2343 //store the position of last opened tag at particular depth 2344 //fInt[++fDepth] = fPosition; 2345 if(DEBUG_SKIP_ALGORITHM){ 2346 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2347 } 2348 //return fElements[fPosition++]; 2349 return fElements[fPosition]; 2350 } 2351 2352 /** This function should be called only when element was skipped sucessfully. 2353 * 1. Increase the depth - because element was sucessfully skipped. 2354 *2. Store the position of the element token in array "last opened tag" at depth. 2355 *3. increase the position counter so as to point to the next element in the array 2356 */ push()2357 public void push(){ 2358 2359 fInt[++fDepth] = fPosition++; 2360 } 2361 2362 /** Check if the element scanned during the start element 2363 *matches the stored element. 2364 * 2365 *@return true if the match suceeds. 2366 */ matchElement(QName element)2367 public boolean matchElement(QName element) { 2368 //last depth is the depth when last elemnt was pushed 2369 //if last depth is greater than current depth 2370 //if(DEBUG_SKIP_ALGORITHM){ 2371 // System.out.println("Check if the element " + element.rawname + " matches"); 2372 // System.out.println("fLastDepth = " + fLastDepth); 2373 // System.out.println("fDepth = " + fDepth); 2374 //} 2375 boolean match = false; 2376 if(fLastDepth > fDepth && fDepth <= 3){ 2377 if(DEBUG_SKIP_ALGORITHM){ 2378 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2379 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2380 } 2381 if(element.rawname == fElements[fDepth - 1].rawname){ 2382 fAdd = false; 2383 //mark this position 2384 //decrease the depth by 1 as arrays are 0 based 2385 fMark = fDepth - 1; 2386 //we found the match 2387 fPosition = fMark; 2388 match = true; 2389 //Once we get match decrease the count -- this was increased by nextElement() 2390 --fCount; 2391 if(DEBUG_SKIP_ALGORITHM){ 2392 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2393 System.out.println("fMark = " + fMark); 2394 System.out.println("fPosition = " + fPosition); 2395 System.out.println("fDepth = " + fDepth); 2396 System.out.println("fCount = " + fCount); 2397 System.out.println("---------MATCH SUCEEDED-----------------"); 2398 System.out.println(""); 2399 } 2400 }else{ 2401 fAdd = true; 2402 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2403 } 2404 } 2405 //store the position for the current depth 2406 //when we are adding the elements, when skipping 2407 //starts even then this should be tracked ie. when 2408 //calling getNext() 2409 if(match){ 2410 //from next element skipping will start, add 1 2411 fInt[fDepth] = fPosition++; 2412 } else{ 2413 if(DEBUG_SKIP_ALGORITHM){ 2414 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2415 } 2416 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2417 fInt[fDepth] = fCount - 1; 2418 } 2419 2420 //if number of elements becomes equal to the length of array -- stop the skipping 2421 //xxx: should we do "fCount == fInt.length" 2422 if (fCount == fElements.length) { 2423 fSkip = false; 2424 fAdd = false; 2425 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2426 reposition(); 2427 if(DEBUG_SKIP_ALGORITHM){ 2428 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2429 System.out.println("REPOSITIONING THE STACK"); 2430 System.out.println("-----------SKIPPING STOPPED----------"); 2431 System.out.println(""); 2432 } 2433 return false; 2434 } 2435 if(DEBUG_SKIP_ALGORITHM){ 2436 if(match){ 2437 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2438 }else{ 2439 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2440 } 2441 } 2442 //store the last depth 2443 fLastDepth = fDepth; 2444 return match; 2445 } // matchElement(QName):QName 2446 2447 2448 /** 2449 * Returns the next element on the stack. 2450 * 2451 * @return Returns the actual QName object. Callee should 2452 * use this object to store the details of next element encountered. 2453 */ nextElement()2454 public QName nextElement() { 2455 if(fSkip){ 2456 fDepth++; 2457 //boundary checks are done in matchElement() 2458 return fElements[fCount++]; 2459 } else if (fDepth == fElements.length) { 2460 QName[] array = new QName[fElements.length * 2]; 2461 System.arraycopy(fElements, 0, array, 0, fDepth); 2462 fElements = array; 2463 for (int i = fDepth; i < fElements.length; i++) { 2464 fElements[i] = new QName(); 2465 } 2466 } 2467 2468 return fElements[fDepth++]; 2469 2470 } // pushElement(QName):QName 2471 2472 2473 /** 2474 * Pops an element off of the stack by setting the values of 2475 * the specified QName. 2476 * <p> 2477 * <strong>Note:</strong> The object returned is <em>not</em> 2478 * orphaned to the caller. Therefore, the caller should consider 2479 * the object to be read-only. 2480 */ popElement()2481 public QName popElement() { 2482 //return the same object that was pushed -- this would avoid 2483 //setting the values for every end element. 2484 //STRONG: this object is read only -- this object reference shouldn't be stored. 2485 if(fSkip || fAdd ){ 2486 if(DEBUG_SKIP_ALGORITHM){ 2487 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2488 System.out.println(""); 2489 } 2490 return fElements[fInt[fDepth--]]; 2491 } else{ 2492 if(DEBUG_SKIP_ALGORITHM){ 2493 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2494 } 2495 return fElements[--fDepth] ; 2496 } 2497 //element.setValues(fElements[--fDepth]); 2498 } // popElement(QName) 2499 2500 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2501 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2502 *as per the depth. 2503 */ reposition()2504 public void reposition(){ 2505 for( int i = 2 ; i <= fDepth ; i++){ 2506 fElements[i-1] = fElements[fInt[i]]; 2507 } 2508 if(DEBUG_SKIP_ALGORITHM){ 2509 for( int i = 0 ; i < fDepth ; i++){ 2510 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2511 } 2512 } 2513 } 2514 2515 /** Clears the stack without throwing away existing QName objects. */ clear()2516 public void clear() { 2517 fDepth = 0; 2518 fLastDepth = 0; 2519 fCount = 0 ; 2520 fPosition = fMark = 1; 2521 2522 } // clear() 2523 2524 /** 2525 * This function is as a result of optimization done for endElement -- 2526 * we dont need to set the value for every end element encouterd. 2527 * For Well formedness checks we can have the same QName object that was pushed. 2528 * the values will be set only if application need to know about the endElement 2529 * -- neeraj.bajaj@sun.com 2530 */ 2531 getLastPoppedElement()2532 public QName getLastPoppedElement(){ 2533 return fElements[fDepth]; 2534 } 2535 } // class ElementStack 2536 2537 /** 2538 * Drives the parser to the next state/event on the input. Parser is guaranteed 2539 * to stop at the next state/event. 2540 * 2541 * Internally XML document is divided into several states. Each state represents 2542 * a sections of XML document. When this functions returns normally, it has read 2543 * the section of XML document and returns the state corresponding to section of 2544 * document which has been read. For optimizations, a particular driver 2545 * can read ahead of the section of document (state returned) just read and 2546 * can maintain a different internal state. 2547 * 2548 * 2549 * @author Neeraj Bajaj, Sun Microsystems 2550 */ 2551 protected interface Driver { 2552 2553 2554 /** 2555 * Drives the parser to the next state/event on the input. Parser is guaranteed 2556 * to stop at the next state/event. 2557 * 2558 * Internally XML document is divided into several states. Each state represents 2559 * a sections of XML document. When this functions returns normally, it has read 2560 * the section of XML document and returns the state corresponding to section of 2561 * document which has been read. For optimizations, a particular driver 2562 * can read ahead of the section of document (state returned) just read and 2563 * can maintain a different internal state. 2564 * 2565 * @return state representing the section of document just read. 2566 * 2567 * @throws IOException Thrown on i/o error. 2568 * @throws XNIException Thrown on parse error. 2569 */ 2570 next()2571 public int next() throws IOException, XNIException; 2572 2573 } // interface Driver 2574 2575 /** 2576 * Driver to handle content scanning. This driver is capable of reading 2577 * the fragment of XML document. When it has finished reading fragment 2578 * of XML documents, it can pass the job of reading to another driver. 2579 * 2580 * This class has been modified as per the new design which is more suited to 2581 * efficiently build pull parser. Lot of performance improvements have been done and 2582 * the code has been added to support stax functionality/features. 2583 * 2584 * @author Neeraj Bajaj, Sun Microsystems 2585 * 2586 * 2587 * @author Andy Clark, IBM 2588 * @author Eric Ye, IBM 2589 */ 2590 protected class FragmentContentDriver 2591 implements Driver { 2592 2593 // 2594 // Driver methods 2595 // 2596 2597 /** 2598 * decides the appropriate state of the parser 2599 */ startOfMarkup()2600 private void startOfMarkup() throws IOException { 2601 fMarkupDepth++; 2602 final int ch = fEntityScanner.peekChar(); 2603 2604 if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) { 2605 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2606 } else { 2607 switch(ch){ 2608 case '?' :{ 2609 setScannerState(SCANNER_STATE_PI); 2610 fEntityScanner.skipChar(ch, null); 2611 break; 2612 } 2613 case '!' :{ 2614 fEntityScanner.skipChar(ch, null); 2615 if (fEntityScanner.skipChar('-', null)) { 2616 if (!fEntityScanner.skipChar('-', NameType.COMMENT)) { 2617 reportFatalError("InvalidCommentStart", 2618 null); 2619 } 2620 setScannerState(SCANNER_STATE_COMMENT); 2621 } else if (fEntityScanner.skipString(cdata)) { 2622 setScannerState(SCANNER_STATE_CDATA ); 2623 } else if (!scanForDoctypeHook()) { 2624 reportFatalError("MarkupNotRecognizedInContent", 2625 null); 2626 } 2627 break; 2628 } 2629 case '/' :{ 2630 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2631 fEntityScanner.skipChar(ch, NameType.ELEMENTEND); 2632 break; 2633 } 2634 default :{ 2635 reportFatalError("MarkupNotRecognizedInContent", null); 2636 } 2637 } 2638 } 2639 2640 }//startOfMarkup 2641 startOfContent()2642 private void startOfContent() throws IOException { 2643 if (fEntityScanner.skipChar('<', null)) { 2644 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2645 } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { 2646 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2647 } else { 2648 //element content is there.. 2649 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2650 } 2651 }//startOfContent 2652 2653 2654 /** 2655 * 2656 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2657 * At any point of time when in doubt over the current state of the parser, the state should be 2658 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2659 * the parser to one of its sub state. 2660 * sub states are defined in the parser on the basis of different XML component like 2661 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2662 * These sub states help the parser to have fine control over the parsing. These are the 2663 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2664 * decided if paresr needs to stop at next milepost ?? 2665 * 2666 */ decideSubState()2667 public void decideSubState() throws IOException { 2668 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2669 2670 switch (fScannerState) { 2671 2672 case SCANNER_STATE_CONTENT: { 2673 startOfContent() ; 2674 break; 2675 } 2676 2677 case SCANNER_STATE_START_OF_MARKUP: { 2678 startOfMarkup() ; 2679 break; 2680 } 2681 } 2682 } 2683 }//decideSubState 2684 2685 /** 2686 * Drives the parser to the next state/event on the input. Parser is guaranteed 2687 * to stop at the next state/event. Internally XML document 2688 * is divided into several states. Each state represents a sections of XML 2689 * document. When this functions returns normally, it has read the section 2690 * of XML document and returns the state corresponding to section of 2691 * document which has been read. For optimizations, a particular driver 2692 * can read ahead of the section of document (state returned) just read and 2693 * can maintain a different internal state. 2694 * 2695 * State returned corresponds to Stax states. 2696 * 2697 * @return state representing the section of document just read. 2698 * 2699 * @throws IOException Thrown on i/o error. 2700 * @throws XNIException Thrown on parse error. 2701 */ 2702 next()2703 public int next() throws IOException, XNIException { 2704 while (true) { 2705 try { 2706 if(DEBUG_NEXT){ 2707 System.out.println("NOW IN FragmentContentDriver"); 2708 System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState)); 2709 } 2710 2711 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2712 //decideSubState. 2713 2714 switch (fScannerState) { 2715 case SCANNER_STATE_CONTENT: { 2716 final int ch = fEntityScanner.peekChar(); 2717 if (ch == '<') { 2718 fEntityScanner.scanChar(null); 2719 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2720 } else if (ch == '&') { 2721 fEntityScanner.scanChar(NameType.REFERENCE); 2722 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2723 break; 2724 } else { 2725 //element content is there.. 2726 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2727 break; 2728 } 2729 } 2730 2731 case SCANNER_STATE_START_OF_MARKUP: { 2732 startOfMarkup(); 2733 break; 2734 }//case: SCANNER_STATE_START_OF_MARKUP 2735 2736 }//end of switch 2737 //decideSubState() ; 2738 2739 //do some special handling if isCoalesce is set to true. 2740 if(fIsCoalesce){ 2741 fUsebuffer = true ; 2742 //if the last section was character data 2743 if(fLastSectionWasCharacterData){ 2744 2745 //if we dont encounter any CDATA or ENITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA 2746 //return the last scanned charactrer data. 2747 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2748 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2749 fLastSectionWasCharacterData = false; 2750 return XMLEvent.CHARACTERS; 2751 } 2752 }//if last section was CDATA or ENTITY REFERENCE 2753 //xxx: there might be another entity reference or CDATA after this 2754 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2755 else if((fLastSectionWasCData || fLastSectionWasEntityReference)){ 2756 //and current state is not SCANNER_STATE_CHARACTER_DATA 2757 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2758 //this means there is nothing more to be coalesced. 2759 //return the CHARACTERS event. 2760 if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE) 2761 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2762 2763 fLastSectionWasCData = false; 2764 fLastSectionWasEntityReference = false; 2765 return XMLEvent.CHARACTERS; 2766 } 2767 } 2768 } 2769 2770 2771 if(DEBUG_NEXT){ 2772 System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState)); 2773 } 2774 2775 switch(fScannerState){ 2776 2777 case XMLEvent.START_DOCUMENT : 2778 return XMLEvent.START_DOCUMENT; 2779 2780 case SCANNER_STATE_START_ELEMENT_TAG :{ 2781 2782 //xxx this function returns true when element is empty.. can be linked to end element event. 2783 //returns true if the element is empty 2784 fEmptyElement = scanStartElement() ; 2785 //if the element is empty the next event is "end element" 2786 if(fEmptyElement){ 2787 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2788 }else{ 2789 //set the next possible state 2790 setScannerState(SCANNER_STATE_CONTENT); 2791 } 2792 return XMLEvent.START_ELEMENT ; 2793 } 2794 2795 case SCANNER_STATE_CHARACTER_DATA: { 2796 if(DEBUG_COALESCE){ 2797 System.out.println("fLastSectionWasCData = " + fLastSectionWasCData); 2798 System.out.println("fIsCoalesce = " + fIsCoalesce); 2799 } 2800 //if last section was either entity reference or cdata or character data we should be using buffer 2801 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ; 2802 2803 //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared. 2804 if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2805 fLastSectionWasEntityReference = false; 2806 fLastSectionWasCData = false; 2807 fLastSectionWasCharacterData = true ; 2808 fUsebuffer = true; 2809 }else{ 2810 //clear the buffer 2811 fContentBuffer.clear(); 2812 } 2813 2814 //set the fTempString length to 0 before passing it on to scanContent 2815 //scanContent sets the correct co-ordinates as per the content read 2816 fTempString.length = 0; 2817 int c = fEntityScanner.scanContent(fTempString); 2818 if(DEBUG){ 2819 System.out.println("fTempString = " + fTempString); 2820 } 2821 if(fEntityScanner.skipChar('<', null)){ 2822 //check if we have reached end of element 2823 if(fEntityScanner.skipChar('/', NameType.ELEMENTEND)){ 2824 //increase the mark up depth 2825 fMarkupDepth++; 2826 fLastSectionWasCharacterData = false; 2827 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2828 //check if its start of new element 2829 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2830 fMarkupDepth++; 2831 fLastSectionWasCharacterData = false; 2832 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2833 }else{ 2834 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2835 //there can be cdata ahead if coalesce is true we should call again 2836 if(fIsCoalesce){ 2837 fUsebuffer = true; 2838 fLastSectionWasCharacterData = true; 2839 fContentBuffer.append(fTempString); 2840 fTempString.length = 0; 2841 continue; 2842 } 2843 } 2844 //in case last section was either entity reference or cdata or character data -- we should be using buffer 2845 if(fUsebuffer){ 2846 fContentBuffer.append(fTempString); 2847 fTempString.length = 0; 2848 } 2849 if(DEBUG){ 2850 System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString()); 2851 } 2852 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2853 if(DEBUG)System.out.println("Return SPACE EVENT"); 2854 return XMLEvent.SPACE; 2855 }else 2856 return XMLEvent.CHARACTERS; 2857 2858 } else{ 2859 fUsebuffer = true ; 2860 if(DEBUG){ 2861 System.out.println("fContentBuffer = " + fContentBuffer); 2862 System.out.println("fTempString = " + fTempString); 2863 } 2864 fContentBuffer.append(fTempString); 2865 fTempString.length = 0; 2866 } 2867 if (c == '\r') { 2868 if(DEBUG){ 2869 System.out.println("'\r' character found"); 2870 } 2871 // happens when there is the character reference 2872 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2873 fEntityScanner.scanChar(null); 2874 fUsebuffer = true; 2875 fContentBuffer.append((char)c); 2876 c = -1 ; 2877 } else if (c == ']') { 2878 //fStringBuffer.clear(); 2879 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2880 fUsebuffer = true; 2881 fContentBuffer.append((char)fEntityScanner.scanChar(null)); 2882 // remember where we are in case we get an endEntity before we 2883 // could flush the buffer out - this happens when we're parsing an 2884 // entity which ends with a ] 2885 fInScanContent = true; 2886 2887 // We work on a single character basis to handle cases such as: 2888 // ']]]>' which we might otherwise miss. 2889 // 2890 if (fEntityScanner.skipChar(']', null)) { 2891 fContentBuffer.append(']'); 2892 while (fEntityScanner.skipChar(']', null)) { 2893 fContentBuffer.append(']'); 2894 } 2895 if (fEntityScanner.skipChar('>', null)) { 2896 reportFatalError("CDEndInContent", null); 2897 } 2898 } 2899 c = -1 ; 2900 fInScanContent = false; 2901 } 2902 2903 do{ 2904 //xxx: we should be using only one buffer.. 2905 // we need not to grow the buffer only when isCoalesce() is not true; 2906 2907 if (c == '<') { 2908 fEntityScanner.scanChar(null); 2909 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2910 break; 2911 }//xxx what should be the behavior if entity reference is present in the content ? 2912 else if (c == '&') { 2913 fEntityScanner.scanChar(NameType.REFERENCE); 2914 setScannerState(SCANNER_STATE_REFERENCE); 2915 break; 2916 }///xxx since this part is also characters, it should be merged... 2917 else if (c != -1 && isInvalidLiteral(c)) { 2918 if (XMLChar.isHighSurrogate(c)) { 2919 // special case: surrogates 2920 scanSurrogates(fContentBuffer) ; 2921 setScannerState(SCANNER_STATE_CONTENT); 2922 } else { 2923 reportFatalError("InvalidCharInContent", 2924 new Object[] { 2925 Integer.toString(c, 16)}); 2926 fEntityScanner.scanChar(null); 2927 } 2928 break; 2929 } 2930 //xxx: scanContent also gives character callback. 2931 c = scanContent(fContentBuffer) ; 2932 //we should not be iterating again if fIsCoalesce is not set to true 2933 2934 if(!fIsCoalesce){ 2935 setScannerState(SCANNER_STATE_CONTENT); 2936 break; 2937 } 2938 2939 }while(true); 2940 2941 //if (fDocumentHandler != null) { 2942 // fDocumentHandler.characters(fContentBuffer, null); 2943 //} 2944 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2945 //if fIsCoalesce is true there might be more data so call fDriver.next() 2946 if(fIsCoalesce){ 2947 fLastSectionWasCharacterData = true ; 2948 continue; 2949 }else{ 2950 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2951 if(DEBUG)System.out.println("Return SPACE EVENT"); 2952 return XMLEvent.SPACE; 2953 } else 2954 return XMLEvent.CHARACTERS ; 2955 } 2956 } 2957 2958 case SCANNER_STATE_END_ELEMENT_TAG :{ 2959 if(fEmptyElement){ 2960 //set it back to false. 2961 fEmptyElement = false; 2962 setScannerState(SCANNER_STATE_CONTENT); 2963 //check the case when there is comment after single element document 2964 //<foo/> and some comment after this 2965 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2966 2967 } else if(scanEndElement() == 0) { 2968 //It is last element of the document 2969 if (elementDepthIsZeroHook()) { 2970 //if element depth is zero , it indicates the end of the document 2971 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2972 //xxx understand this point once again.. 2973 return XMLEvent.END_ELEMENT ; 2974 } 2975 2976 } 2977 setScannerState(SCANNER_STATE_CONTENT); 2978 return XMLEvent.END_ELEMENT ; 2979 } 2980 2981 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2982 scanComment(); 2983 setScannerState(SCANNER_STATE_CONTENT); 2984 return XMLEvent.COMMENT; 2985 //break; 2986 } 2987 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2988 //clear the buffer first 2989 fContentBuffer.clear() ; 2990 //xxx: which buffer should be passed. Ideally we shouldn't have 2991 //more than two buffers -- 2992 //xxx: where should we add the switch for buffering. 2993 scanPI(fContentBuffer); 2994 setScannerState(SCANNER_STATE_CONTENT); 2995 return XMLEvent.PROCESSING_INSTRUCTION; 2996 //break; 2997 } 2998 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 2999 //xxx: What if CDATA is the first event 3000 //<foo><![CDATA[hello<><>]]>append</foo> 3001 3002 //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or 3003 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3004 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3005 fLastSectionWasCData = true ; 3006 fLastSectionWasEntityReference = false; 3007 fLastSectionWasCharacterData = false; 3008 }//if we dont need to coalesce clear the buffer 3009 else{ 3010 fContentBuffer.clear(); 3011 } 3012 fUsebuffer = true; 3013 //CDATA section is completely read in all the case. 3014 scanCDATASection(fContentBuffer , true); 3015 setScannerState(SCANNER_STATE_CONTENT); 3016 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 3017 //and just call fDispatche.next(). Since we have set the scanner state to 3018 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 3019 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 3020 //2. Check if application has set for reporting CDATA event 3021 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 3022 //return the cdata event as characters. 3023 if(fIsCoalesce){ 3024 fLastSectionWasCData = true ; 3025 //there might be more data to coalesce. 3026 continue; 3027 }else if(fReportCdataEvent){ 3028 return XMLEvent.CDATA; 3029 } else{ 3030 return XMLEvent.CHARACTERS; 3031 } 3032 } 3033 3034 case SCANNER_STATE_REFERENCE :{ 3035 fMarkupDepth++; 3036 foundBuiltInRefs = false; 3037 3038 //we should not clear the buffer only when the last state was either CDATA or 3039 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 3040 if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){ 3041 //fLastSectionWasEntityReference or fLastSectionWasCData are only 3042 //used when fIsCoalesce is set to true. 3043 fLastSectionWasEntityReference = true ; 3044 fLastSectionWasCData = false; 3045 fLastSectionWasCharacterData = false; 3046 }//if we dont need to coalesce clear the buffer 3047 else{ 3048 fContentBuffer.clear(); 3049 } 3050 fUsebuffer = true ; 3051 //take care of character reference 3052 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 3053 scanCharReferenceValue(fContentBuffer, null); 3054 fMarkupDepth--; 3055 if(!fIsCoalesce){ 3056 setScannerState(SCANNER_STATE_CONTENT); 3057 return XMLEvent.CHARACTERS; 3058 } 3059 } else { 3060 // this function also starts new entity 3061 scanEntityReference(fContentBuffer); 3062 //if there was built-in entity reference & coalesce is not true 3063 //return CHARACTERS 3064 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 3065 setScannerState(SCANNER_STATE_CONTENT); 3066 return XMLEvent.CHARACTERS; 3067 } 3068 3069 //if there was a text declaration, call next() it will be taken care. 3070 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3071 fLastSectionWasEntityReference = true ; 3072 continue; 3073 } 3074 3075 if(fScannerState == SCANNER_STATE_REFERENCE){ 3076 setScannerState(SCANNER_STATE_CONTENT); 3077 if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3078 // Skip the entity reference, we don't care 3079 continue; 3080 } 3081 return XMLEvent.ENTITY_REFERENCE; 3082 } 3083 } 3084 //Wether it was character reference, entity reference or built-in entity 3085 //set the next possible state to SCANNER_STATE_CONTENT 3086 setScannerState(SCANNER_STATE_CONTENT); 3087 fLastSectionWasEntityReference = true ; 3088 continue; 3089 } 3090 3091 case SCANNER_STATE_TEXT_DECL: { 3092 // scan text decl 3093 if (fEntityScanner.skipString("<?xml")) { 3094 fMarkupDepth++; 3095 // NOTE: special case where entity starts with a PI 3096 // whose name starts with "xml" (e.g. "xmlfoo") 3097 if (isValidNameChar(fEntityScanner.peekChar())) { 3098 fStringBuffer.clear(); 3099 fStringBuffer.append("xml"); 3100 3101 if (fNamespaces) { 3102 while (isValidNCName(fEntityScanner.peekChar())) { 3103 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3104 } 3105 } else { 3106 while (isValidNameChar(fEntityScanner.peekChar())) { 3107 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3108 } 3109 } 3110 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length); 3111 fContentBuffer.clear(); 3112 scanPIData(target, fContentBuffer); 3113 } 3114 3115 // standard text declaration 3116 else { 3117 //xxx: this function gives callback 3118 scanXMLDeclOrTextDecl(true); 3119 } 3120 } 3121 // now that we've straightened out the readers, we can read in chunks: 3122 fEntityManager.fCurrentEntity.mayReadChunks = true; 3123 setScannerState(SCANNER_STATE_CONTENT); 3124 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3125 //it seems we have to careful when to allow function issue a callback 3126 //and when to allow adapter issue a callback. 3127 continue; 3128 } 3129 3130 3131 case SCANNER_STATE_ROOT_ELEMENT: { 3132 if (scanRootElementHook()) { 3133 fEmptyElement = true; 3134 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3135 return XMLEvent.START_ELEMENT; 3136 } 3137 setScannerState(SCANNER_STATE_CONTENT); 3138 return XMLEvent.START_ELEMENT ; 3139 } 3140 case SCANNER_STATE_CHAR_REFERENCE : { 3141 fContentBuffer.clear(); 3142 scanCharReferenceValue(fContentBuffer, null); 3143 fMarkupDepth--; 3144 setScannerState(SCANNER_STATE_CONTENT); 3145 return XMLEvent.CHARACTERS; 3146 } 3147 default: 3148 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3149 3150 }//switch 3151 } 3152 // premature end of file 3153 catch (EOFException e) { 3154 endOfFileHook(e); 3155 return -1; 3156 } 3157 } //while loop 3158 }//next 3159 3160 // 3161 // Protected methods 3162 // 3163 3164 // hooks 3165 3166 // NOTE: These hook methods are added so that the full document 3167 // scanner can share the majority of code with this class. 3168 3169 /** 3170 * Scan for DOCTYPE hook. This method is a hook for subclasses 3171 * to add code to handle scanning for a the "DOCTYPE" string 3172 * after the string "<!" has been scanned. 3173 * 3174 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3175 * was not scanned. 3176 */ scanForDoctypeHook()3177 protected boolean scanForDoctypeHook() 3178 throws IOException, XNIException { 3179 return false; 3180 } // scanForDoctypeHook():boolean 3181 3182 /** 3183 * Element depth iz zero. This methos is a hook for subclasses 3184 * to add code to handle when the element depth hits zero. When 3185 * scanning a document fragment, an element depth of zero is 3186 * normal. However, when scanning a full XML document, the 3187 * scanner must handle the trailing miscellanous section of 3188 * the document after the end of the document's root element. 3189 * 3190 * @return True if the caller should stop and return true which 3191 * allows the scanner to switch to a new scanning 3192 * driver. A return value of false indicates that 3193 * the content driver should continue as normal. 3194 */ elementDepthIsZeroHook()3195 protected boolean elementDepthIsZeroHook() 3196 throws IOException, XNIException { 3197 return false; 3198 } // elementDepthIsZeroHook():boolean 3199 3200 /** 3201 * Scan for root element hook. This method is a hook for 3202 * subclasses to add code that handles scanning for the root 3203 * element. When scanning a document fragment, there is no 3204 * "root" element. However, when scanning a full XML document, 3205 * the scanner must handle the root element specially. 3206 * 3207 * @return True if the caller should stop and return true which 3208 * allows the scanner to switch to a new scanning 3209 * driver. A return value of false indicates that 3210 * the content driver should continue as normal. 3211 */ scanRootElementHook()3212 protected boolean scanRootElementHook() 3213 throws IOException, XNIException { 3214 return false; 3215 } // scanRootElementHook():boolean 3216 3217 /** 3218 * End of file hook. This method is a hook for subclasses to 3219 * add code that handles the end of file. The end of file in 3220 * a document fragment is OK if the markup depth is zero. 3221 * However, when scanning a full XML document, an end of file 3222 * is always premature. 3223 */ endOfFileHook(EOFException e)3224 protected void endOfFileHook(EOFException e) 3225 throws IOException, XNIException { 3226 3227 // NOTE: An end of file is only only an error if we were 3228 // in the middle of scanning some markup. -Ac 3229 if (fMarkupDepth != 0) { 3230 reportFatalError("PrematureEOF", null); 3231 } 3232 3233 } // endOfFileHook() 3234 3235 } // class FragmentContentDriver 3236 pr(String str)3237 static void pr(String str) { 3238 System.out.println(str) ; 3239 } 3240 3241 protected boolean fUsebuffer ; 3242 3243 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3244 * maintained for attributes. 3245 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3246 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3247 * XMLString. 3248 * 3249 * @return XMLString XMLString used to store an attribute value. 3250 */ 3251 getString()3252 protected XMLString getString(){ 3253 if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){ 3254 return attributeValueCache.get(fAttributeCacheUsedCount++); 3255 } else{ 3256 XMLString str = new XMLString(); 3257 fAttributeCacheUsedCount++; 3258 attributeValueCache.add(str); 3259 return str; 3260 } 3261 } 3262 3263 /** 3264 * Implements XMLBufferListener interface. 3265 */ 3266 refresh()3267 public void refresh(){ 3268 refresh(0); 3269 } 3270 3271 /** 3272 * receives callbacks from {@link XMLEntityReader } when buffer 3273 * is being changed. 3274 * @param refreshPosition 3275 */ refresh(int refreshPosition)3276 public void refresh(int refreshPosition){ 3277 //If you are reading attributes and you got a callback 3278 //cache available attributes. 3279 if(fReadingAttributes){ 3280 fAttributes.refresh(); 3281 } 3282 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3283 //since fTempString directly matches to the underlying main buffer 3284 //store the data into buffer 3285 fContentBuffer.append(fTempString); 3286 //clear the XMLString so that data can't be added again. 3287 fTempString.length = 0; 3288 fUsebuffer = true; 3289 } 3290 } 3291 3292 } // class XMLDocumentFragmentScannerImpl 3293