1 /* 2 * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException; 25 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 26 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 27 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 28 import com.sun.org.apache.xerces.internal.util.XMLChar; 29 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 30 import com.sun.org.apache.xerces.internal.util.XMLSymbols; 31 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 32 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 33 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 34 import com.sun.org.apache.xerces.internal.xni.Augmentations; 35 import com.sun.org.apache.xerces.internal.xni.QName; 36 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 37 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 39 import com.sun.org.apache.xerces.internal.xni.XMLString; 40 import com.sun.org.apache.xerces.internal.xni.XNIException; 41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 44 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 45 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 46 import com.sun.xml.internal.stream.XMLBufferListener; 47 import com.sun.xml.internal.stream.XMLEntityStorage; 48 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 49 import java.io.CharConversionException; 50 import java.io.EOFException; 51 import java.io.IOException; 52 import javax.xml.XMLConstants; 53 import javax.xml.stream.XMLInputFactory; 54 import javax.xml.stream.XMLStreamConstants; 55 import javax.xml.stream.events.XMLEvent; 56 import jdk.xml.internal.JdkXmlUtils; 57 import jdk.xml.internal.SecuritySupport; 58 59 /** 60 * 61 * This class is responsible for scanning the structure and content 62 * of document fragments. 63 * 64 * This class has been modified as per the new design which is more suited to 65 * efficiently build pull parser. Lot of improvements have been done and 66 * the code has been added to support stax functionality/features. 67 * 68 * @author Neeraj Bajaj SUN Microsystems 69 * @author K.Venugopal SUN Microsystems 70 * @author Glenn Marcy, IBM 71 * @author Andy Clark, IBM 72 * @author Arnaud Le Hors, IBM 73 * @author Eric Ye, IBM 74 * @author Sunitha Reddy, SUN Microsystems 75 * 76 * @LastModified: Sep 2017 77 */ 78 public class XMLDocumentFragmentScannerImpl 79 extends XMLScanner 80 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 81 82 // 83 // Constants 84 // 85 86 protected int fElementAttributeLimit, fXMLNameLimit; 87 88 /** External subset resolver. **/ 89 protected ExternalSubsetResolver fExternalSubsetResolver; 90 91 // scanner states 92 93 //XXX this should be divided into more states. 94 /** Scanner state: start of markup. */ 95 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 96 97 /** Scanner state: content. */ 98 protected static final int SCANNER_STATE_CONTENT = 22; 99 100 /** Scanner state: processing instruction. */ 101 protected static final int SCANNER_STATE_PI = 23; 102 103 /** Scanner state: DOCTYPE. */ 104 protected static final int SCANNER_STATE_DOCTYPE = 24; 105 106 /** Scanner state: XML Declaration */ 107 protected static final int SCANNER_STATE_XML_DECL = 25; 108 109 /** Scanner state: root element. */ 110 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 111 112 /** Scanner state: comment. */ 113 protected static final int SCANNER_STATE_COMMENT = 27; 114 115 /** Scanner state: reference. */ 116 protected static final int SCANNER_STATE_REFERENCE = 28; 117 118 // <book type="hard"> reading attribute name 'type' 119 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 120 121 // <book type="hard"> //reading attribute value. 122 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 123 124 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 125 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 126 127 /** Scanner state: end of input. */ 128 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 129 130 /** Scanner state: terminated. */ 131 protected static final int SCANNER_STATE_TERMINATED = 34; 132 133 /** Scanner state: CDATA section. */ 134 protected static final int SCANNER_STATE_CDATA = 35; 135 136 /** Scanner state: Text declaration. */ 137 protected static final int SCANNER_STATE_TEXT_DECL = 36; 138 139 /** Scanner state: Text declaration. */ 140 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 141 142 //<book type="hard">foo</book> 143 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 144 145 //<book type="hard">foo</book> reading </book> 146 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 147 148 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 149 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 150 151 // feature identifiers 152 153 154 /** Feature identifier: notify built-in refereces. */ 155 protected static final String NOTIFY_BUILTIN_REFS = 156 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 157 158 /** Property identifier: entity resolver. */ 159 protected static final String ENTITY_RESOLVER = 160 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 161 162 /** Feature identifier: standard uri conformant */ 163 protected static final String STANDARD_URI_CONFORMANT = 164 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 165 166 /** Property identifier: Security property manager. */ 167 private static final String XML_SECURITY_PROPERTY_MANAGER = 168 Constants.XML_SECURITY_PROPERTY_MANAGER; 169 170 /** access external dtd: file protocol 171 * For DOM/SAX, the secure feature is set to true by default 172 */ 173 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 174 175 // recognized features and properties 176 177 /** Recognized features. */ 178 private static final String[] RECOGNIZED_FEATURES = { 179 NAMESPACES, 180 VALIDATION, 181 NOTIFY_BUILTIN_REFS, 182 NOTIFY_CHAR_REFS, 183 Constants.STAX_REPORT_CDATA_EVENT, 184 XMLConstants.USE_CATALOG 185 }; 186 187 /** Feature defaults. */ 188 private static final Boolean[] FEATURE_DEFAULTS = { 189 Boolean.TRUE, 190 null, 191 Boolean.FALSE, 192 Boolean.FALSE, 193 Boolean.TRUE, 194 JdkXmlUtils.USE_CATALOG_DEFAULT 195 }; 196 197 /** Recognized properties. */ 198 private static final String[] RECOGNIZED_PROPERTIES = { 199 SYMBOL_TABLE, 200 ERROR_REPORTER, 201 ENTITY_MANAGER, 202 XML_SECURITY_PROPERTY_MANAGER, 203 JdkXmlUtils.CATALOG_DEFER, 204 JdkXmlUtils.CATALOG_FILES, 205 JdkXmlUtils.CATALOG_PREFER, 206 JdkXmlUtils.CATALOG_RESOLVE, 207 JdkXmlUtils.CDATA_CHUNK_SIZE 208 }; 209 210 /** Property defaults. */ 211 private static final Object[] PROPERTY_DEFAULTS = { 212 null, 213 null, 214 null, 215 null, 216 null, 217 null, 218 null, 219 null, 220 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT 221 }; 222 223 224 private static final char [] CDATA = {'[','C','D','A','T','A','['}; 225 static final char [] XMLDECL = {'<','?','x','m','l'}; 226 // private static final char [] endTag = {'<','/'}; 227 // debugging 228 229 /** Debug scanner state. */ 230 private static final boolean DEBUG_SCANNER_STATE = false; 231 232 /** Debug driver. */ 233 private static final boolean DEBUG_DISPATCHER = false; 234 235 /** Debug content driver scanning. */ 236 protected static final boolean DEBUG_START_END_ELEMENT = false; 237 238 /** Debug driver next */ 239 protected static final boolean DEBUG = false; 240 241 // 242 // Data 243 // 244 245 // protected data 246 247 /** Document handler. */ 248 protected XMLDocumentHandler fDocumentHandler; 249 protected int fScannerLastState ; 250 251 /** Entity Storage */ 252 protected XMLEntityStorage fEntityStore; 253 254 /** Entity stack. */ 255 protected int[] fEntityStack = new int[4]; 256 257 /** Markup depth. */ 258 protected int fMarkupDepth; 259 260 //is the element empty 261 protected boolean fEmptyElement ; 262 263 //track if we are reading attributes, this is usefule while 264 //there is a callback 265 protected boolean fReadingAttributes = false; 266 267 /** Scanner state. */ 268 protected int fScannerState; 269 270 /** SubScanner state: inside scanContent method. */ 271 protected boolean fInScanContent = false; 272 protected boolean fLastSectionWasCData = false; 273 protected boolean fCDataStart = false; 274 protected boolean fInCData = false; 275 protected boolean fCDataEnd = false; 276 protected boolean fLastSectionWasEntityReference = false; 277 protected boolean fLastSectionWasCharacterData = false; 278 279 /** has external dtd */ 280 protected boolean fHasExternalDTD; 281 282 /** Standalone. */ 283 protected boolean fStandaloneSet; 284 protected boolean fStandalone; 285 protected String fVersion; 286 287 // element information 288 289 /** Current element. */ 290 protected QName fCurrentElement; 291 292 /** Element stack. */ 293 protected ElementStack fElementStack = new ElementStack(); 294 protected ElementStack2 fElementStack2 = new ElementStack2(); 295 296 // other info 297 298 /** Document system identifier. 299 * REVISIT: So what's this used for? - NG 300 * protected String fDocumentSystemId; 301 ******/ 302 303 protected String fPITarget ; 304 305 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 306 protected XMLString fPIData = new XMLString(); 307 308 // features 309 310 311 /** Notify built-in references. */ 312 protected boolean fNotifyBuiltInRefs = false; 313 314 //STAX related properties 315 //defaultValues. 316 protected boolean fSupportDTD = true; 317 protected boolean fReplaceEntityReferences = true; 318 protected boolean fSupportExternalEntities = false; 319 protected boolean fReportCdataEvent = false ; 320 protected boolean fIsCoalesce = false ; 321 protected String fDeclaredEncoding = null; 322 /** Xerces Feature: Disallow doctype declaration. */ 323 protected boolean fDisallowDoctype = false; 324 325 /** 326 * CDATA chunk size limit 327 */ 328 private int fChunkSize; 329 330 /** 331 * comma-delimited list of protocols that are allowed for the purpose 332 * of accessing external dtd or entity references 333 */ 334 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 335 336 /** 337 * standard uri conformant (strict uri). 338 * http://apache.org/xml/features/standard-uri-conformant 339 */ 340 protected boolean fStrictURI; 341 342 // drivers 343 344 /** Active driver. */ 345 protected Driver fDriver; 346 347 /** Content driver. */ 348 protected Driver fContentDriver = createContentDriver(); 349 350 // temporary variables 351 352 /** Element QName. */ 353 protected QName fElementQName = new QName(); 354 355 /** Attribute QName. */ 356 protected QName fAttributeQName = new QName(); 357 358 /** 359 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 360 * implements Iterator interface so we can directly give Attributes in the form of 361 * iterator. 362 */ 363 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 364 365 366 /** String. */ 367 protected XMLString fTempString = new XMLString(); 368 369 /** String. */ 370 protected XMLString fTempString2 = new XMLString(); 371 372 /** Array of 3 strings. */ 373 private final String[] fStrings = new String[3]; 374 375 /** Making the buffer accessible to derived class -- String buffer. */ 376 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 377 378 /** Making the buffer accessible to derived class -- String buffer. */ 379 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 380 381 /** stores character data. */ 382 /** Making the buffer accessible to derived class -- stores PI data */ 383 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 384 385 /** Single character array. */ 386 private final char[] fSingleChar = new char[1]; 387 private String fCurrentEntityName = null; 388 389 // New members 390 protected boolean fScanToEnd = false; 391 392 protected DTDGrammarUtil dtdGrammarUtil= null; 393 394 protected boolean fAddDefaultAttr = false; 395 396 protected boolean foundBuiltInRefs = false; 397 398 /** Built-in reference character event */ 399 protected boolean builtInRefCharacterHandled = false; 400 401 //skip element algorithm 402 static final short MAX_DEPTH_LIMIT = 5 ; 403 static final short ELEMENT_ARRAY_LENGTH = 200 ; 404 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 405 static final boolean DEBUG_SKIP_ALGORITHM = false; 406 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 407 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 408 //pointer location where last element was skipped 409 short fLastPointerLocation = 0 ; 410 short fElementPointer = 0 ; 411 //2D array to store pointer info 412 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 413 protected String fElementRawname ; 414 protected boolean fShouldSkip = false; 415 protected boolean fAdd = false ; 416 protected boolean fSkip = false; 417 418 /** Reusable Augmentations. */ 419 private Augmentations fTempAugmentations = null; 420 // 421 // Constructors 422 // 423 424 /** Default constructor. */ XMLDocumentFragmentScannerImpl()425 public XMLDocumentFragmentScannerImpl() { 426 } // <init>() 427 428 // 429 // XMLDocumentScanner methods 430 // 431 432 /** 433 * Sets the input source. 434 * 435 * @param inputSource The input source. 436 * 437 * @throws IOException Thrown on i/o error. 438 */ setInputSource(XMLInputSource inputSource)439 public void setInputSource(XMLInputSource inputSource) throws IOException { 440 fEntityManager.setEntityHandler(this); 441 fEntityManager.startEntity(false, "$fragment$", inputSource, false, true); 442 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 443 } // setInputSource(XMLInputSource) 444 445 /** 446 * Scans a document. 447 * 448 * @param complete True if the scanner should scan the document 449 * completely, pushing all events to the registered 450 * document handler. A value of false indicates that 451 * that the scanner should only scan the next portion 452 * of the document and return. A scanner instance is 453 * permitted to completely scan a document if it does 454 * not support this "pull" scanning model. 455 * 456 * @return True if there is more to scan, false otherwise. 457 */ scanDocument(boolean complete)458 public boolean scanDocument(boolean complete) 459 throws IOException, XNIException { 460 461 // keep dispatching "events" 462 fEntityManager.setEntityHandler(this); 463 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 464 465 int event = next(); 466 do { 467 switch (event) { 468 case XMLStreamConstants.START_DOCUMENT : 469 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 470 break; 471 case XMLStreamConstants.START_ELEMENT : 472 //System.out.println(" in scann element"); 473 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 474 break; 475 case XMLStreamConstants.CHARACTERS : 476 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 477 fDocumentHandler.characters(getCharacterData(),null); 478 break; 479 case XMLStreamConstants.SPACE: 480 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 481 //System.out.println("in the space"); 482 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 483 break; 484 case XMLStreamConstants.ENTITY_REFERENCE : 485 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 486 //entity reference callback are given in startEntity 487 break; 488 case XMLStreamConstants.PROCESSING_INSTRUCTION : 489 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 490 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 491 break; 492 case XMLStreamConstants.COMMENT : 493 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 494 fDocumentHandler.comment(getCharacterData(),null); 495 break; 496 case XMLStreamConstants.DTD : 497 //all DTD related callbacks are handled in DTDScanner. 498 //1. Stax doesn't define DTD states as it does for XML Document. 499 //therefore we don't need to take care of anything here. So Just break; 500 break; 501 case XMLStreamConstants.CDATA: 502 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 503 if (fCDataStart) { 504 fDocumentHandler.startCDATA(null); 505 fCDataStart = false; 506 fInCData = true; 507 } 508 509 fDocumentHandler.characters(getCharacterData(),null); 510 if (fCDataEnd) { 511 fDocumentHandler.endCDATA(null); 512 fCDataEnd = false; 513 } 514 break; 515 case XMLStreamConstants.NOTATION_DECLARATION : 516 break; 517 case XMLStreamConstants.ENTITY_DECLARATION : 518 break; 519 case XMLStreamConstants.NAMESPACE : 520 break; 521 case XMLStreamConstants.ATTRIBUTE : 522 break; 523 case XMLStreamConstants.END_ELEMENT : 524 //do not give callback here. 525 //this callback is given in scanEndElement function. 526 //fDocumentHandler.endElement(getElementQName(),null); 527 break; 528 default : 529 // Errors should have already been handled by the Scanner 530 return false; 531 532 } 533 //System.out.println("here in before calling next"); 534 event = next(); 535 //System.out.println("here in after calling next"); 536 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 537 538 if(event == XMLStreamConstants.END_DOCUMENT) { 539 fDocumentHandler.endDocument(null); 540 return false; 541 } 542 543 return true; 544 545 } // scanDocument(boolean):boolean 546 547 548 getElementQName()549 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 550 if(fScannerLastState == XMLEvent.END_ELEMENT){ 551 fElementQName.setValues(fElementStack.getLastPoppedElement()); 552 } 553 return fElementQName ; 554 } 555 556 /** return the next state on the input 557 * @return int 558 */ 559 next()560 public int next() throws IOException, XNIException { 561 return fDriver.next(); 562 } 563 564 // 565 // XMLComponent methods 566 // 567 568 /** 569 * Resets the component. The component can query the component manager 570 * about any features and properties that affect the operation of the 571 * component. 572 * 573 * @param componentManager The component manager. 574 * 575 * @throws SAXException Thrown by component on initialization error. 576 * For example, if a feature or property is 577 * required for the operation of the component, the 578 * component manager may throw a 579 * SAXNotRecognizedException or a 580 * SAXNotSupportedException. 581 */ 582 reset(XMLComponentManager componentManager)583 public void reset(XMLComponentManager componentManager) 584 throws XMLConfigurationException { 585 586 super.reset(componentManager); 587 588 // other settings 589 // fDocumentSystemId = null; 590 591 // sax features 592 //fAttributes.setNamespaces(fNamespaces); 593 594 // xerces features 595 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 596 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 597 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 598 599 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 600 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 601 (ExternalSubsetResolver) resolver : null; 602 603 //attribute 604 fReadingAttributes = false; 605 //xxx: external entities are supported in Xerces 606 // it would be good to define feature for this case 607 fSupportExternalEntities = true; 608 fReplaceEntityReferences = true; 609 fIsCoalesce = false; 610 611 // setup Driver 612 setScannerState(SCANNER_STATE_CONTENT); 613 setDriver(fContentDriver); 614 615 // JAXP 1.5 features and properties 616 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 617 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 618 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 619 620 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 621 fChunkSize = JdkXmlUtils.getValue(componentManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE), 622 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT); 623 624 resetCommon(); 625 //fEntityManager.test(); 626 } // reset(XMLComponentManager) 627 628 reset(PropertyManager propertyManager)629 public void reset(PropertyManager propertyManager){ 630 631 super.reset(propertyManager); 632 633 // other settings 634 // fDocumentSystemId = null; 635 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)); 636 fNotifyBuiltInRefs = false ; 637 638 //fElementStack2.clear(); 639 //fReplaceEntityReferences = true; 640 //fSupportExternalEntities = true; 641 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); 642 fReplaceEntityReferences = bo; 643 bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); 644 fSupportExternalEntities = bo; 645 Boolean cdata = (Boolean)propertyManager.getProperty( 646 Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 647 if(cdata != null) 648 fReportCdataEvent = cdata ; 649 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 650 if(coalesce != null) 651 fIsCoalesce = coalesce; 652 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 653 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 654 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 655 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 656 // setup Driver 657 //we dont need to do this -- nb. 658 //setScannerState(SCANNER_STATE_CONTENT); 659 //setDriver(fContentDriver); 660 //fEntityManager.test(); 661 662 // JAXP 1.5 features and properties 663 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 664 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 665 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 666 667 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 668 fChunkSize = JdkXmlUtils.getValue(propertyManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE), 669 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT); 670 resetCommon(); 671 } // reset(XMLComponentManager) 672 resetCommon()673 void resetCommon() { 674 // initialize vars 675 fMarkupDepth = 0; 676 fCurrentElement = null; 677 fElementStack.clear(); 678 fHasExternalDTD = false; 679 fStandaloneSet = false; 680 fStandalone = false; 681 fInScanContent = false; 682 //skipping algorithm 683 fShouldSkip = false; 684 fAdd = false; 685 fSkip = false; 686 687 fEntityStore = fEntityManager.getEntityStore(); 688 dtdGrammarUtil = null; 689 690 if (fSecurityManager != null) { 691 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 692 fXMLNameLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.MAX_NAME_LIMIT); 693 } else { 694 fElementAttributeLimit = 0; 695 fXMLNameLimit = XMLSecurityManager.Limit.MAX_NAME_LIMIT.defaultValue(); 696 } 697 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 698 } 699 700 /** 701 * Returns a list of feature identifiers that are recognized by 702 * this component. This method may return null if no features 703 * are recognized by this component. 704 */ getRecognizedFeatures()705 public String[] getRecognizedFeatures() { 706 return RECOGNIZED_FEATURES.clone(); 707 } // getRecognizedFeatures():String[] 708 709 /** 710 * Sets the state of a feature. This method is called by the component 711 * manager any time after reset when a feature changes state. 712 * <p> 713 * <strong>Note:</strong> Components should silently ignore features 714 * that do not affect the operation of the component. 715 * 716 * @param featureId The feature identifier. 717 * @param state The state of the feature. 718 * 719 * @throws SAXNotRecognizedException The component should not throw 720 * this exception. 721 * @throws SAXNotSupportedException The component should not throw 722 * this exception. 723 */ setFeature(String featureId, boolean state)724 public void setFeature(String featureId, boolean state) 725 throws XMLConfigurationException { 726 727 super.setFeature(featureId, state); 728 729 // Xerces properties 730 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 731 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 732 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 733 fNotifyBuiltInRefs = state; 734 } 735 } 736 737 } // setFeature(String,boolean) 738 739 /** 740 * Returns a list of property identifiers that are recognized by 741 * this component. This method may return null if no properties 742 * are recognized by this component. 743 */ getRecognizedProperties()744 public String[] getRecognizedProperties() { 745 return RECOGNIZED_PROPERTIES.clone(); 746 } // getRecognizedProperties():String[] 747 748 /** 749 * Sets the value of a property. This method is called by the component 750 * manager any time after reset when a property changes value. 751 * <p> 752 * <strong>Note:</strong> Components should silently ignore properties 753 * that do not affect the operation of the component. 754 * 755 * @param propertyId The property identifier. 756 * @param value The value of the property. 757 * 758 * @throws SAXNotRecognizedException The component should not throw 759 * this exception. 760 * @throws SAXNotSupportedException The component should not throw 761 * this exception. 762 */ setProperty(String propertyId, Object value)763 public void setProperty(String propertyId, Object value) 764 throws XMLConfigurationException { 765 766 super.setProperty(propertyId, value); 767 768 // Xerces properties 769 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 770 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 771 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 772 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 773 fEntityManager = (XMLEntityManager)value; 774 return; 775 } 776 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 777 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 778 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 779 (ExternalSubsetResolver) value : null; 780 return; 781 } 782 } 783 784 785 // Xerces properties 786 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 787 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 788 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 789 fEntityManager = (XMLEntityManager)value; 790 } 791 return; 792 } 793 794 //JAXP 1.5 properties 795 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 796 { 797 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 798 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 799 } 800 801 } // setProperty(String,Object) 802 803 /** 804 * Returns the default state for a feature, or null if this 805 * component does not want to report a default value for this 806 * feature. 807 * 808 * @param featureId The feature identifier. 809 * 810 * @since Xerces 2.2.0 811 */ getFeatureDefault(String featureId)812 public Boolean getFeatureDefault(String featureId) { 813 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 814 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 815 return FEATURE_DEFAULTS[i]; 816 } 817 } 818 return null; 819 } // getFeatureDefault(String):Boolean 820 821 /** 822 * Returns the default state for a property, or null if this 823 * component does not want to report a default value for this 824 * property. 825 * 826 * @param propertyId The property identifier. 827 * 828 * @since Xerces 2.2.0 829 */ getPropertyDefault(String propertyId)830 public Object getPropertyDefault(String propertyId) { 831 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 832 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 833 return PROPERTY_DEFAULTS[i]; 834 } 835 } 836 return null; 837 } // getPropertyDefault(String):Object 838 839 // 840 // XMLDocumentSource methods 841 // 842 843 /** 844 * setDocumentHandler 845 * 846 * @param documentHandler 847 */ setDocumentHandler(XMLDocumentHandler documentHandler)848 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 849 fDocumentHandler = documentHandler; 850 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 851 } // setDocumentHandler(XMLDocumentHandler) 852 853 854 /** Returns the document handler */ getDocumentHandler()855 public XMLDocumentHandler getDocumentHandler(){ 856 return fDocumentHandler; 857 } 858 859 // 860 // XMLEntityHandler methods 861 // 862 863 /** 864 * This method notifies of the start of an entity. The DTD has the 865 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 866 * general entities are just specified by their name. 867 * 868 * @param name The name of the entity. 869 * @param identifier The resource identifier. 870 * @param encoding The auto-detected IANA encoding name of the entity 871 * stream. This value will be null in those situations 872 * where the entity encoding is not auto-detected (e.g. 873 * internal entities or a document entity that is 874 * parsed from a java.io.Reader). 875 * @param augs Additional information that may include infoset augmentations 876 * 877 * @throws XNIException Thrown by handler to signal an error. 878 */ startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs)879 public void startEntity(String name, 880 XMLResourceIdentifier identifier, 881 String encoding, Augmentations augs) throws XNIException { 882 883 // keep track of this entity before fEntityDepth is increased 884 if (fEntityDepth == fEntityStack.length) { 885 int[] entityarray = new int[fEntityStack.length * 2]; 886 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 887 fEntityStack = entityarray; 888 } 889 fEntityStack[fEntityDepth] = fMarkupDepth; 890 891 super.startEntity(name, identifier, encoding, augs); 892 893 // WFC: entity declared in external subset in standalone doc 894 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 895 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 896 new Object[]{name}); 897 } 898 899 /** we are not calling the handlers yet.. */ 900 // call handler 901 if (fDocumentHandler != null && !fScanningAttribute) { 902 if (!name.equals("[xml]")) { 903 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 904 } 905 } 906 907 } // startEntity(String,XMLResourceIdentifier,String) 908 909 /** 910 * This method notifies the end of an entity. The DTD has the pseudo-name 911 * of "[dtd]" parameter entity names start with '%'; and general entities 912 * are just specified by their name. 913 * 914 * @param name The name of the entity. 915 * @param augs Additional information that may include infoset augmentations 916 * 917 * @throws XNIException Thrown by handler to signal an error. 918 */ endEntity(String name, Augmentations augs)919 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 920 921 /** 922 * // flush possible pending output buffer - see scanContent 923 * if (fInScanContent && fStringBuffer.length != 0 924 * && fDocumentHandler != null) { 925 * fDocumentHandler.characters(fStringBuffer, null); 926 * fStringBuffer.length = 0; // make sure we know it's been flushed 927 * } 928 */ 929 super.endEntity(name, augs); 930 931 // make sure markup is properly balanced 932 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 933 reportFatalError("MarkupEntityMismatch", null); 934 } 935 936 /**/ 937 // call handler 938 if (fDocumentHandler != null && !fScanningAttribute) { 939 if (!name.equals("[xml]")) { 940 fDocumentHandler.endGeneralEntity(name, augs); 941 } 942 } 943 944 945 } // endEntity(String) 946 947 // 948 // Protected methods 949 // 950 951 // Driver factory methods 952 953 /** Creates a content Driver. */ createContentDriver()954 protected Driver createContentDriver() { 955 return new FragmentContentDriver(); 956 } // createContentDriver():Driver 957 958 // scanning methods 959 960 /** 961 * Scans an XML or text declaration. 962 * <p> 963 * <pre> 964 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 965 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 966 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 967 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 968 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 969 * | ('"' ('yes' | 'no') '"')) 970 * 971 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 972 * </pre> 973 * 974 * @param scanningTextDecl True if a text declaration is to 975 * be scanned instead of an XML 976 * declaration. 977 */ scanXMLDeclOrTextDecl(boolean scanningTextDecl)978 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 979 throws IOException, XNIException { 980 981 // scan decl 982 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 983 fMarkupDepth--; 984 985 // pseudo-attribute values 986 String version = fStrings[0]; 987 String encoding = fStrings[1]; 988 String standalone = fStrings[2]; 989 fDeclaredEncoding = encoding; 990 // set standalone 991 fStandaloneSet = standalone != null; 992 fStandalone = fStandaloneSet && standalone.equals("yes"); 993 ///xxx see where its used.. this is not used anywhere. 994 //it may be useful for entity to store this information 995 //but this information is only related with Document Entity. 996 fEntityManager.setStandalone(fStandalone); 997 998 999 // call handler 1000 if (fDocumentHandler != null) { 1001 if (scanningTextDecl) { 1002 fDocumentHandler.textDecl(version, encoding, null); 1003 } else { 1004 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 1005 } 1006 } 1007 1008 if(version != null){ 1009 fEntityScanner.setVersion(version); 1010 fEntityScanner.setXMLVersion(version); 1011 } 1012 // set encoding on reader, only if encoding was not specified by the application explicitly 1013 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 1014 fEntityScanner.setEncoding(encoding); 1015 } 1016 1017 } // scanXMLDeclOrTextDecl(boolean) 1018 getPITarget()1019 public String getPITarget(){ 1020 return fPITarget ; 1021 } 1022 getPIData()1023 public XMLStringBuffer getPIData(){ 1024 return fContentBuffer ; 1025 } 1026 1027 //XXX: why not this function behave as per the state of the parser? getCharacterData()1028 public XMLString getCharacterData(){ 1029 if(fUsebuffer){ 1030 return fContentBuffer ; 1031 }else{ 1032 return fTempString; 1033 } 1034 1035 } 1036 1037 1038 /** 1039 * Scans a processing data. This is needed to handle the situation 1040 * where a document starts with a processing instruction whose 1041 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1042 * 1043 * @param target The PI target 1044 * @param data The XMLStringBuffer to fill in with the data 1045 */ scanPIData(String target, XMLStringBuffer data)1046 protected void scanPIData(String target, XMLStringBuffer data) 1047 throws IOException, XNIException { 1048 1049 super.scanPIData(target, data); 1050 1051 //set the PI target and values 1052 fPITarget = target ; 1053 1054 fMarkupDepth--; 1055 1056 } // scanPIData(String) 1057 1058 /** 1059 * Scans a comment. 1060 * <p> 1061 * <pre> 1062 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1063 * </pre> 1064 * <p> 1065 * <strong>Note:</strong> Called after scanning past '<!--' 1066 */ scanComment()1067 protected void scanComment() throws IOException, XNIException { 1068 fContentBuffer.clear(); 1069 scanComment(fContentBuffer); 1070 //getTextCharacters can also be called for reading comments 1071 fUsebuffer = true; 1072 fMarkupDepth--; 1073 1074 } // scanComment() 1075 1076 //xxx value returned by this function may not remain valid if another event is scanned. getComment()1077 public String getComment(){ 1078 return fContentBuffer.toString(); 1079 } 1080 addElement(String rawname)1081 void addElement(String rawname){ 1082 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1083 //storing element raw name in a linear list of array 1084 fElementArray[fElementPointer] = rawname ; 1085 //storing elemnetPointer for particular element depth 1086 1087 if(DEBUG_SKIP_ALGORITHM){ 1088 StringBuffer sb = new StringBuffer() ; 1089 sb.append(" Storing element information ") ; 1090 sb.append(" fElementPointer = " + fElementPointer) ; 1091 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1092 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1093 System.out.println(sb.toString()) ; 1094 } 1095 1096 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1097 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1098 short column = storePointerForADepth(fElementPointer); 1099 if(column > 0){ 1100 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1101 //identity comparison shouldn't take much time and we can rely on this 1102 //since its guaranteed to have same object id for same string. 1103 if(rawname == fElementArray[pointer]){ 1104 fShouldSkip = true ; 1105 fLastPointerLocation = pointer ; 1106 //reset the things and return. 1107 resetPointer((short)fElementStack.fDepth , column) ; 1108 fElementArray[fElementPointer] = null ; 1109 return ; 1110 }else{ 1111 fShouldSkip = false ; 1112 } 1113 } 1114 } 1115 fElementPointer++ ; 1116 } 1117 } 1118 1119 resetPointer(short depth, short column)1120 void resetPointer(short depth, short column){ 1121 fPointerInfo[depth] [column] = (short)0; 1122 } 1123 1124 //returns column information at which pointer was stored. storePointerForADepth(short elementPointer)1125 short storePointerForADepth(short elementPointer){ 1126 short depth = (short) fElementStack.fDepth ; 1127 1128 //Stores element pointer locations at particular depth , only 4 pointer locations 1129 //are stored at particular depth for now. 1130 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1131 1132 if(canStore(depth, i)){ 1133 fPointerInfo[depth][i] = elementPointer ; 1134 if(DEBUG_SKIP_ALGORITHM){ 1135 StringBuffer sb = new StringBuffer() ; 1136 sb.append(" Pointer information ") ; 1137 sb.append(" fElementPointer = " + fElementPointer) ; 1138 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1139 sb.append(" column = " + i ) ; 1140 System.out.println(sb.toString()) ; 1141 } 1142 return i; 1143 } 1144 //else 1145 //pointer was not stored because we reached the limit 1146 } 1147 return -1 ; 1148 } 1149 canStore(short depth, short column)1150 boolean canStore(short depth, short column){ 1151 //colum = 0 , means first element at particular depth 1152 //column = 1, means second element at particular depth 1153 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1154 return fPointerInfo[depth][column] == 0 ? true : false ; 1155 } 1156 1157 getElementPointer(short depth, short column)1158 short getElementPointer(short depth, short column){ 1159 //colum = 0 , means first element at particular depth 1160 //column = 1, means second element at particular depth 1161 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1162 return fPointerInfo[depth][column] ; 1163 } 1164 1165 //this function assumes that string passed is not null and skips 1166 //the following string from the buffer this makes sure skipFromTheBuffer(String rawname)1167 boolean skipFromTheBuffer(String rawname) throws IOException{ 1168 if(fEntityScanner.skipString(rawname)){ 1169 char c = (char)fEntityScanner.peekChar() ; 1170 //If the start element was completely skipped we should encounter either ' '(space), 1171 //or '/' (in case of empty element) or '>' 1172 if( c == ' ' || c == '/' || c == '>'){ 1173 fElementRawname = rawname ; 1174 return true ; 1175 } else{ 1176 return false; 1177 } 1178 } else 1179 return false ; 1180 } 1181 skipQElement(String rawname)1182 boolean skipQElement(String rawname) throws IOException{ 1183 1184 final int c = fEntityScanner.getChar(rawname.length()); 1185 //if this character is still valid element name -- this means string can't match 1186 if(XMLChar.isName(c)){ 1187 return false; 1188 }else{ 1189 return fEntityScanner.skipString(rawname); 1190 } 1191 } 1192 skipElement()1193 protected boolean skipElement() throws IOException { 1194 1195 if(!fShouldSkip) return false ; 1196 1197 if(fLastPointerLocation != 0){ 1198 //Look at the next element stored in the array list.. we might just get a match. 1199 String rawname = fElementArray[fLastPointerLocation + 1] ; 1200 if(rawname != null && skipFromTheBuffer(rawname)){ 1201 fLastPointerLocation++ ; 1202 if(DEBUG_SKIP_ALGORITHM){ 1203 System.out.println("Element " + fElementRawname + 1204 " was SKIPPED at pointer location = " + fLastPointerLocation); 1205 } 1206 return true ; 1207 } else{ 1208 //reset it back to zero... we haven't got the correct subset yet. 1209 fLastPointerLocation = 0 ; 1210 1211 } 1212 } 1213 //xxx: we can put some logic here as from what column it should start looking 1214 //for now we always start at 0 1215 //fallback to tolerant algorithm, it would look for differnt element stored at different 1216 //depth and get us the pointer location. 1217 return fShouldSkip && skipElement((short)0); 1218 1219 } 1220 1221 //start of the column at which it should try searching skipElement(short column)1222 boolean skipElement(short column) throws IOException { 1223 short depth = (short)fElementStack.fDepth ; 1224 1225 if(depth > MAX_DEPTH_LIMIT){ 1226 return fShouldSkip = false ; 1227 } 1228 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1229 short pointer = getElementPointer(depth , i ) ; 1230 1231 if(pointer == 0){ 1232 return fShouldSkip = false ; 1233 } 1234 1235 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1236 if(DEBUG_SKIP_ALGORITHM){ 1237 System.out.println(); 1238 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + 1239 fElementStack.fDepth + " column = " + column ); 1240 System.out.println(); 1241 } 1242 fLastPointerLocation = pointer ; 1243 return fShouldSkip = true ; 1244 } 1245 } 1246 return fShouldSkip = false ; 1247 } 1248 1249 /** 1250 * Scans a start element. This method will handle the binding of 1251 * namespace information and notifying the handler of the start 1252 * of the element. 1253 * <p> 1254 * <pre> 1255 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1256 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1257 * </pre> 1258 * <p> 1259 * <strong>Note:</strong> This method assumes that the leading 1260 * '<' character has been consumed. 1261 * <p> 1262 * <strong>Note:</strong> This method uses the fElementQName and 1263 * fAttributes variables. The contents of these variables will be 1264 * destroyed. The caller should copy important information out of 1265 * these variables before calling this method. 1266 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1267 * 1268 * @return True if element is empty. (i.e. It matches 1269 * production [44]. 1270 */ 1271 // fElementQName will have the details of element just read.. 1272 // fAttributes will have the details of all the attributes. scanStartElement()1273 protected boolean scanStartElement() 1274 throws IOException, XNIException { 1275 1276 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1277 //when skipping is true and no more elements should be added 1278 if(fSkip && !fAdd){ 1279 //get the stored element -- if everything goes right this should match the 1280 //token in the buffer 1281 1282 QName name = fElementStack.getNext(); 1283 1284 if(DEBUG_SKIP_ALGORITHM){ 1285 System.out.println("Trying to skip String = " + name.rawname); 1286 } 1287 1288 //Be conservative -- if skipping fails -- stop. 1289 fSkip = fEntityScanner.skipString(name.rawname); 1290 1291 if(fSkip){ 1292 if(DEBUG_SKIP_ALGORITHM){ 1293 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1294 } 1295 fElementStack.push(); 1296 fElementQName = name; 1297 }else{ 1298 //if skipping fails reposition the stack or fallback to normal way of processing 1299 fElementStack.reposition(); 1300 if(DEBUG_SKIP_ALGORITHM){ 1301 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1302 } 1303 } 1304 } 1305 1306 //we are still at the stage of adding elements 1307 //the elements were not matched or 1308 //fSkip is not set to true 1309 if(!fSkip || fAdd){ 1310 //get the next element from the stack 1311 fElementQName = fElementStack.nextElement(); 1312 // name 1313 if (fNamespaces) { 1314 fEntityScanner.scanQName(fElementQName, NameType.ELEMENTSTART); 1315 } else { 1316 String name = fEntityScanner.scanName(NameType.ELEMENTSTART); 1317 fElementQName.setValues(null, name, name, null); 1318 } 1319 1320 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1321 if(DEBUG_SKIP_ALGORITHM){ 1322 if(fAdd){ 1323 System.out.println("Elements are being ADDED -- elemet added is = " + 1324 fElementQName.rawname + " at count = " + fElementStack.fCount); 1325 } 1326 } 1327 1328 } 1329 1330 //when the elements are being added , we need to check if we are set for skipping the elements 1331 if(fAdd){ 1332 //this sets the value of fAdd variable 1333 fElementStack.matchElement(fElementQName); 1334 } 1335 1336 1337 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1338 fCurrentElement = fElementQName; 1339 1340 String rawname = fElementQName.rawname; 1341 1342 fEmptyElement = false; 1343 1344 fAttributes.removeAllAttributes(); 1345 1346 checkDepth(rawname); 1347 if(!seekCloseOfStartTag()){ 1348 fReadingAttributes = true; 1349 fAttributeCacheUsedCount =0; 1350 fStringBufferIndex =0; 1351 fAddDefaultAttr = true; 1352 do { 1353 scanAttribute(fAttributes); 1354 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1355 fAttributes.getLength() > fElementAttributeLimit){ 1356 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1357 "ElementAttributeLimit", 1358 new Object[]{rawname, fElementAttributeLimit }, 1359 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1360 } 1361 1362 } while (!seekCloseOfStartTag()); 1363 fReadingAttributes=false; 1364 } 1365 1366 if (fEmptyElement) { 1367 //decrease the markup depth.. 1368 fMarkupDepth--; 1369 1370 // check that this element was opened in the same entity 1371 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1372 reportFatalError("ElementEntityMismatch", 1373 new Object[]{fCurrentElement.rawname}); 1374 } 1375 // call handler 1376 if (fDocumentHandler != null) { 1377 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1378 } 1379 1380 //We should not be popping out the context here in endELement becaause the namespace context is still 1381 //valid when parser is at the endElement state. 1382 //if (fNamespaces) { 1383 // fNamespaceContext.popContext(); 1384 //} 1385 1386 //pop the element off the stack.. 1387 fElementStack.popElement(); 1388 1389 } else { 1390 1391 if(dtdGrammarUtil != null) 1392 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1393 if(fDocumentHandler != null){ 1394 //complete element and attributes are traversed in this function so we can send a callback 1395 //here. 1396 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1397 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1398 } 1399 } 1400 1401 1402 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + 1403 "<<< scanStartElement(): "+fEmptyElement); 1404 return fEmptyElement; 1405 1406 } // scanStartElement():boolean 1407 1408 /** 1409 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1410 * Characters are consumed. 1411 */ seekCloseOfStartTag()1412 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1413 // spaces 1414 boolean sawSpace = fEntityScanner.skipSpaces(); 1415 1416 // end tag? 1417 final int c = fEntityScanner.peekChar(); 1418 if (c == '>') { 1419 fEntityScanner.scanChar(null); 1420 return true; 1421 } else if (c == '/') { 1422 fEntityScanner.scanChar(null); 1423 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1424 reportFatalError("ElementUnterminated", 1425 new Object[]{fElementQName.rawname}); 1426 } 1427 fEmptyElement = true; 1428 return true; 1429 } else if (!isValidNameStartChar(c) || !sawSpace) { 1430 // Second chance. Check if this character is a high 1431 // surrogate of a valid name start character. 1432 if (!isValidNameStartHighSurrogate(c) || !sawSpace) { 1433 reportFatalError("ElementUnterminated", 1434 new Object[]{fElementQName.rawname}); 1435 } 1436 } 1437 1438 return false; 1439 } 1440 hasAttributes()1441 public boolean hasAttributes(){ 1442 return fAttributes.getLength() > 0; 1443 } 1444 1445 /** return the attribute iterator implementation */ getAttributeIterator()1446 public XMLAttributesIteratorImpl getAttributeIterator(){ 1447 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1448 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1449 fAddDefaultAttr = false; 1450 } 1451 return fAttributes; 1452 } 1453 1454 /** return if standalone is set */ standaloneSet()1455 public boolean standaloneSet(){ 1456 return fStandaloneSet; 1457 } 1458 /** return if the doucment is standalone */ isStandAlone()1459 public boolean isStandAlone(){ 1460 return fStandalone ; 1461 } 1462 /** 1463 * Scans an attribute name value pair. 1464 * <p> 1465 * <pre> 1466 * [41] Attribute ::= Name Eq AttValue 1467 * </pre> 1468 * <p> 1469 * <strong>Note:</strong> This method assumes that the next 1470 * character on the stream is the first character of the attribute 1471 * name. 1472 * <p> 1473 * <strong>Note:</strong> This method uses the fAttributeQName and 1474 * fQName variables. The contents of these variables will be 1475 * destroyed. 1476 * 1477 * @param attributes The attributes list for the scanned attribute. 1478 */ 1479 scanAttribute(XMLAttributes attributes)1480 protected void scanAttribute(XMLAttributes attributes) 1481 throws IOException, XNIException { 1482 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1483 1484 // name 1485 if (fNamespaces) { 1486 fEntityScanner.scanQName(fAttributeQName, NameType.ATTRIBUTENAME); 1487 } else { 1488 String name = fEntityScanner.scanName(NameType.ATTRIBUTENAME); 1489 fAttributeQName.setValues(null, name, name, null); 1490 } 1491 1492 // equals 1493 fEntityScanner.skipSpaces(); 1494 if (!fEntityScanner.skipChar('=', NameType.ATTRIBUTE)) { 1495 reportFatalError("EqRequiredInAttribute", 1496 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1497 } 1498 fEntityScanner.skipSpaces(); 1499 1500 int attIndex = 0 ; 1501 //REVISIT: one more case needs to be included: external PE and standalone is no 1502 boolean isVC = fHasExternalDTD && !fStandalone; 1503 //fTempString would store attribute value 1504 ///fTempString2 would store attribute non-normalized value 1505 1506 //this function doesn't use 'attIndex'. We are adding the attribute later 1507 //after we have figured out that current attribute is not namespace declaration 1508 //since scanAttributeValue doesn't use attIndex parameter therefore we 1509 //can safely add the attribute later.. 1510 XMLString tmpStr = getString(); 1511 1512 scanAttributeValue(tmpStr, fTempString2, fAttributeQName.rawname, attributes, 1513 attIndex, isVC, fCurrentElement.rawname, false); 1514 1515 // content 1516 int oldLen = attributes.getLength(); 1517 //if the attribute name already exists.. new value is replaced with old value 1518 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1519 1520 // WFC: Unique Att Spec 1521 //attributes count will be same if the current attribute name already exists for this element name. 1522 //this means there are two duplicate attributes. 1523 if (oldLen == attributes.getLength()) { 1524 reportFatalError("AttributeNotUnique", 1525 new Object[]{fCurrentElement.rawname, 1526 fAttributeQName.rawname}); 1527 } 1528 1529 //tmpString contains attribute value 1530 //we are passing null as the attribute value 1531 attributes.setValue(attIndex, null, tmpStr); 1532 1533 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1534 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1535 attributes.setSpecified(attIndex, true); 1536 1537 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1538 1539 } // scanAttribute(XMLAttributes) 1540 1541 /** 1542 * Scans element content. 1543 * 1544 * @return Returns the next character on the stream. 1545 */ 1546 //CHANGED: 1547 //EARLIER: scanContent() 1548 //NOW: scanContent(XMLStringBuffer) 1549 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1550 //this function appends the data to the buffer. scanContent(XMLStringBuffer content)1551 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1552 //set the fTempString length to 0 before passing it on to scanContent 1553 //scanContent sets the correct co-ordinates as per the content read 1554 fTempString.length = 0; 1555 int c = fEntityScanner.scanContent(fTempString); 1556 content.append(fTempString); 1557 fTempString.length = 0; 1558 if (c == '\r') { 1559 // happens when there is the character reference 1560 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1561 fEntityScanner.scanChar(null); 1562 content.append((char)c); 1563 c = -1; 1564 } else if (c == ']') { 1565 //fStringBuffer.clear(); 1566 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1567 content.append((char)fEntityScanner.scanChar(null)); 1568 // remember where we are in case we get an endEntity before we 1569 // could flush the buffer out - this happens when we're parsing an 1570 // entity which ends with a ] 1571 fInScanContent = true; 1572 // 1573 // We work on a single character basis to handle cases such as: 1574 // ']]]>' which we might otherwise miss. 1575 // 1576 if (fEntityScanner.skipChar(']', null)) { 1577 content.append(']'); 1578 while (fEntityScanner.skipChar(']', null)) { 1579 content.append(']'); 1580 } 1581 if (fEntityScanner.skipChar('>', null)) { 1582 reportFatalError("CDEndInContent", null); 1583 } 1584 } 1585 fInScanContent = false; 1586 c = -1; 1587 } 1588 if (fDocumentHandler != null && content.length > 0) { 1589 //fDocumentHandler.characters(content, null); 1590 } 1591 return c; 1592 1593 } // scanContent():int 1594 1595 1596 /** 1597 * Scans a CDATA section. 1598 * <p> 1599 * <strong>Note:</strong> This method uses the fTempString and 1600 * fStringBuffer variables. 1601 * 1602 * @param complete True if the CDATA section is to be scanned 1603 * completely. 1604 * 1605 * @return True if CDATA is completely scanned. 1606 */ 1607 //CHANGED: scanCDATASection(XMLStringBuffer contentBuffer, boolean complete)1608 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1609 throws IOException, XNIException { 1610 1611 // call handler 1612 if (fDocumentHandler != null) { 1613 //fDocumentHandler.startCDATA(null); 1614 } 1615 1616 while (true) { 1617 //scanData will fill the contentBuffer 1618 if (!fEntityScanner.scanData("]]>", contentBuffer, fChunkSize)) { 1619 fInCData = false; 1620 fCDataEnd = true; 1621 fMarkupDepth--; 1622 break ; 1623 } else { 1624 int c = fEntityScanner.peekChar(); 1625 if (c != -1 && isInvalidLiteral(c)) { 1626 if (XMLChar.isHighSurrogate(c)) { 1627 //contentBuffer.clear(); 1628 //scan surrogates if any.... 1629 scanSurrogates(contentBuffer); 1630 } else { 1631 reportFatalError("InvalidCharInCDSect", 1632 new Object[]{Integer.toString(c,16)}); 1633 fEntityScanner.scanChar(null); 1634 } 1635 } else { 1636 //CData partially returned due to the size limit 1637 fInCData = true; 1638 fCDataEnd = false; 1639 break; 1640 } 1641 //by this time we have also read surrogate contents if any... 1642 if (fDocumentHandler != null) { 1643 //fDocumentHandler.characters(contentBuffer, null); 1644 } 1645 } 1646 } 1647 1648 return true; 1649 1650 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1651 1652 /** 1653 * Scans an end element. 1654 * <p> 1655 * <pre> 1656 * [42] ETag ::= '</' Name S? '>' 1657 * </pre> 1658 * <p> 1659 * <strong>Note:</strong> This method uses the fElementQName variable. 1660 * The contents of this variable will be destroyed. The caller should 1661 * copy the needed information out of this variable before calling 1662 * this method. 1663 * 1664 * @return The element depth. 1665 */ scanEndElement()1666 protected int scanEndElement() throws IOException, XNIException { 1667 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1668 1669 // pop context 1670 QName endElementName = fElementStack.popElement(); 1671 1672 String rawname = endElementName.rawname; 1673 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1674 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1675 //In scanners most of the time is consumed on checks done for XML characters, we can 1676 // optimize on it and avoid the checks done for endElement, 1677 //we will also avoid symbol table lookup. 1678 1679 // this should work both for namespace processing true or false... 1680 1681 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1682 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1683 1684 if (!fEntityScanner.skipString(endElementName.rawname)) { 1685 reportFatalError("ETagRequired", new Object[]{rawname}); 1686 } 1687 1688 // end 1689 fEntityScanner.skipSpaces(); 1690 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1691 reportFatalError("ETagUnterminated", 1692 new Object[]{rawname}); 1693 } 1694 fMarkupDepth--; 1695 1696 //we have increased the depth for two markup "<" characters 1697 fMarkupDepth--; 1698 1699 // check that this element was opened in the same entity 1700 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1701 reportFatalError("ElementEntityMismatch", 1702 new Object[]{rawname}); 1703 } 1704 1705 //We should not be popping out the context here in endELement becaause the namespace context is still 1706 //valid when parser is at the endElement state. 1707 1708 //if (fNamespaces) { 1709 // fNamespaceContext.popContext(); 1710 //} 1711 1712 // call handler 1713 if (fDocumentHandler != null ) { 1714 //end element is scanned in this function so we can send a callback 1715 //here. 1716 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1717 1718 fDocumentHandler.endElement(endElementName, null); 1719 } 1720 if(dtdGrammarUtil != null) 1721 dtdGrammarUtil.endElement(endElementName); 1722 1723 return fMarkupDepth; 1724 1725 } // scanEndElement():int 1726 1727 /** 1728 * Scans a character reference. 1729 * <p> 1730 * <pre> 1731 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1732 * </pre> 1733 */ scanCharReference()1734 protected void scanCharReference() 1735 throws IOException, XNIException { 1736 1737 fStringBuffer2.clear(); 1738 int ch = scanCharReferenceValue(fStringBuffer2, null); 1739 fMarkupDepth--; 1740 if (ch != -1) { 1741 // call handler 1742 1743 if (fDocumentHandler != null) { 1744 if (fNotifyCharRefs) { 1745 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1746 } 1747 Augmentations augs = null; 1748 if (fValidation && ch <= 0x20) { 1749 if (fTempAugmentations != null) { 1750 fTempAugmentations.removeAllItems(); 1751 } 1752 else { 1753 fTempAugmentations = new AugmentationsImpl(); 1754 } 1755 augs = fTempAugmentations; 1756 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1757 } 1758 //xxx: How do we deal with this - how to return charReferenceValues 1759 //now this is being commented because this is taken care in scanDocument() 1760 //fDocumentHandler.characters(fStringBuffer2, null); 1761 if (fNotifyCharRefs) { 1762 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1763 } 1764 } 1765 } 1766 1767 } // scanCharReference() 1768 1769 1770 /** 1771 * Scans an entity reference. 1772 * 1773 * @return returns true if the new entity is started. If it was built-in entity 1774 * 'false' is returned. 1775 * @throws IOException Thrown if i/o error occurs. 1776 * @throws XNIException Thrown if handler throws exception upon 1777 * notification. 1778 */ scanEntityReference(XMLStringBuffer content)1779 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1780 String name = fEntityScanner.scanName(NameType.REFERENCE); 1781 if (name == null) { 1782 reportFatalError("NameRequiredInReference", null); 1783 return; 1784 } 1785 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1786 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1787 } 1788 if (fEntityStore.isUnparsedEntity(name)) { 1789 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1790 } 1791 fMarkupDepth--; 1792 fCurrentEntityName = name; 1793 1794 // handle built-in entities 1795 if (name == fAmpSymbol) { 1796 handleCharacter('&', fAmpSymbol, content); 1797 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1798 return ; 1799 } else if (name == fLtSymbol) { 1800 handleCharacter('<', fLtSymbol, content); 1801 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1802 return ; 1803 } else if (name == fGtSymbol) { 1804 handleCharacter('>', fGtSymbol, content); 1805 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1806 return ; 1807 } else if (name == fQuotSymbol) { 1808 handleCharacter('"', fQuotSymbol, content); 1809 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1810 return ; 1811 } else if (name == fAposSymbol) { 1812 handleCharacter('\'', fAposSymbol, content); 1813 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1814 return ; 1815 } 1816 1817 //1. if the entity is external and support to external entities is not required 1818 // 2. or entities should not be replaced 1819 //3. or if it is built in entity reference. 1820 boolean isEE = fEntityStore.isExternalEntity(name); 1821 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1822 fScannerState = SCANNER_STATE_REFERENCE; 1823 return ; 1824 } 1825 // start general entity 1826 if (!fEntityStore.isDeclaredEntity(name)) { 1827 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1828 if (!fSupportDTD && fReplaceEntityReferences) { 1829 reportFatalError("EntityNotDeclared", new Object[]{name}); 1830 return; 1831 } 1832 //REVISIT: one more case needs to be included: external PE and standalone is no 1833 if ( fHasExternalDTD && !fStandalone) { 1834 if (fValidation) 1835 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1836 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1837 } else 1838 reportFatalError("EntityNotDeclared", new Object[]{name}); 1839 } 1840 //we are starting the entity even if the entity was not declared 1841 //if that was the case it its taken care in XMLEntityManager.startEntity() 1842 //we immediately call the endEntity. Application gets to know if there was 1843 //any entity that was not declared. 1844 fEntityManager.startEntity(true, name, false); 1845 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1846 //setScannerState(SCANNER_STATE_CONTENT); 1847 //return true ; 1848 } // scanEntityReference() 1849 1850 // utility methods 1851 1852 /** 1853 * Check if the depth exceeds the maxElementDepth limit 1854 * @param elementName name of the current element 1855 */ checkDepth(String elementName)1856 void checkDepth(String elementName) { 1857 fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); 1858 if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { 1859 fSecurityManager.debugPrint(fLimitAnalyzer); 1860 reportFatalError("MaxElementDepthLimit", new Object[]{elementName, 1861 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1862 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1863 "maxElementDepth"}); 1864 } 1865 } 1866 1867 /** 1868 * Calls document handler with a single character resulting from 1869 * built-in entity resolution. 1870 * 1871 * @param c 1872 * @param entity built-in name 1873 * @param XMLStringBuffer append the character to buffer 1874 * 1875 * we really dont need to call this function -- this function is only required when 1876 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1877 * calling this function to hanlde built-in entity reference. 1878 * 1879 */ handleCharacter(char c, String entity, XMLStringBuffer content)1880 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1881 foundBuiltInRefs = true; 1882 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1883 content.append(c); 1884 if (fDocumentHandler != null) { 1885 fSingleChar[0] = c; 1886 if (fNotifyBuiltInRefs) { 1887 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1888 } 1889 fTempString.setValues(fSingleChar, 0, 1); 1890 if(!fIsCoalesce){ 1891 fDocumentHandler.characters(fTempString, null); 1892 builtInRefCharacterHandled = true; 1893 } 1894 1895 if (fNotifyBuiltInRefs) { 1896 fDocumentHandler.endGeneralEntity(entity, null); 1897 } 1898 } 1899 } // handleCharacter(char) 1900 1901 // helper methods 1902 1903 /** 1904 * Sets the scanner state. 1905 * 1906 * @param state The new scanner state. 1907 */ setScannerState(int state)1908 protected final void setScannerState(int state) { 1909 1910 fScannerState = state; 1911 if (DEBUG_SCANNER_STATE) { 1912 System.out.print("### setScannerState: "); 1913 //System.out.print(fScannerState); 1914 System.out.print(getScannerStateName(state)); 1915 System.out.println(); 1916 } 1917 1918 } // setScannerState(int) 1919 1920 1921 /** 1922 * Sets the Driver. 1923 * 1924 * @param Driver The new Driver. 1925 */ setDriver(Driver driver)1926 protected final void setDriver(Driver driver) { 1927 fDriver = driver; 1928 if (DEBUG_DISPATCHER) { 1929 System.out.print("%%% setDriver: "); 1930 System.out.print(getDriverName(driver)); 1931 System.out.println(); 1932 } 1933 } 1934 1935 // 1936 // Private methods 1937 // 1938 1939 /** Returns the scanner state name. */ getScannerStateName(int state)1940 protected String getScannerStateName(int state) { 1941 1942 switch (state) { 1943 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 1944 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 1945 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 1946 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 1947 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 1948 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 1949 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 1950 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 1951 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 1952 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 1953 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 1954 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 1955 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 1956 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 1957 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 1958 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 1959 } 1960 1961 return "??? ("+state+')'; 1962 1963 } // getScannerStateName(int):String getEntityName()1964 public String getEntityName(){ 1965 //return the cached name 1966 return fCurrentEntityName; 1967 } 1968 1969 /** Returns the driver name. */ getDriverName(Driver driver)1970 public String getDriverName(Driver driver) { 1971 1972 if (DEBUG_DISPATCHER) { 1973 if (driver != null) { 1974 String name = driver.getClass().getName(); 1975 int index = name.lastIndexOf('.'); 1976 if (index != -1) { 1977 name = name.substring(index + 1); 1978 index = name.lastIndexOf('$'); 1979 if (index != -1) { 1980 name = name.substring(index + 1); 1981 } 1982 } 1983 return name; 1984 } 1985 } 1986 return "null"; 1987 1988 } // getDriverName():String 1989 1990 /** 1991 * Check the protocol used in the systemId against allowed protocols 1992 * 1993 * @param systemId the Id of the URI 1994 * @param allowedProtocols a list of allowed protocols separated by comma 1995 * @return the name of the protocol if rejected, null otherwise 1996 */ checkAccess(String systemId, String allowedProtocols)1997 String checkAccess(String systemId, String allowedProtocols) throws IOException { 1998 String baseSystemId = fEntityScanner.getBaseSystemId(); 1999 String expandedSystemId = XMLEntityManager.expandSystemId(systemId, baseSystemId, fStrictURI); 2000 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 2001 } 2002 2003 // 2004 // Classes 2005 // 2006 2007 /** 2008 * @author Neeraj Bajaj, Sun Microsystems. 2009 */ 2010 protected static final class Element { 2011 2012 // 2013 // Data 2014 // 2015 2016 /** Symbol. */ 2017 public QName qname; 2018 2019 //raw name stored as characters 2020 public char[] fRawname; 2021 2022 /** The next Element entry. */ 2023 public Element next; 2024 2025 // 2026 // Constructors 2027 // 2028 2029 /** 2030 * Constructs a new Element from the given QName and next Element 2031 * reference. 2032 */ Element(QName qname, Element next)2033 public Element(QName qname, Element next) { 2034 this.qname.setValues(qname); 2035 this.fRawname = qname.rawname.toCharArray(); 2036 this.next = next; 2037 } 2038 2039 } // class Element 2040 2041 /** 2042 * Element stack. 2043 * 2044 * @author Neeraj Bajaj, Sun Microsystems. 2045 */ 2046 protected class ElementStack2 { 2047 2048 // 2049 // Data 2050 // 2051 2052 /** The stack data. */ 2053 protected QName [] fQName = new QName[20]; 2054 2055 //Element depth 2056 protected int fDepth; 2057 //total number of elements 2058 protected int fCount; 2059 //current position 2060 protected int fPosition; 2061 //Mark refers to the position 2062 protected int fMark; 2063 2064 protected int fLastDepth ; 2065 2066 // 2067 // Constructors 2068 // 2069 2070 /** Default constructor. */ ElementStack2()2071 public ElementStack2() { 2072 for (int i = 0; i < fQName.length; i++) { 2073 fQName[i] = new QName(); 2074 } 2075 fMark = fPosition = 1; 2076 } // <init>() 2077 resize()2078 public void resize(){ 2079 /** 2080 * int length = fElements.length; 2081 * Element [] temp = new Element[length * 2]; 2082 * System.arraycopy(fElements, 0, temp, 0, length); 2083 * fElements = temp; 2084 */ 2085 //resize QNames 2086 int oldLength = fQName.length; 2087 QName [] tmp = new QName[oldLength * 2]; 2088 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2089 fQName = tmp; 2090 2091 for (int i = oldLength; i < fQName.length; i++) { 2092 fQName[i] = new QName(); 2093 } 2094 2095 } 2096 2097 2098 // 2099 // Public methods 2100 // 2101 2102 /** Check if the element scanned during the start element 2103 *matches the stored element. 2104 * 2105 *@return true if the match suceeds. 2106 */ matchElement(QName element)2107 public boolean matchElement(QName element) { 2108 //last depth is the depth when last elemnt was pushed 2109 //if last depth is greater than current depth 2110 if(DEBUG_SKIP_ALGORITHM){ 2111 System.out.println("fLastDepth = " + fLastDepth); 2112 System.out.println("fDepth = " + fDepth); 2113 } 2114 boolean match = false; 2115 if(fLastDepth > fDepth && fDepth <= 2){ 2116 if(DEBUG_SKIP_ALGORITHM){ 2117 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2118 } 2119 if(element.rawname == fQName[fDepth].rawname){ 2120 fAdd = false; 2121 //mark this position 2122 //decrease the depth by 1 as arrays are 0 based 2123 fMark = fDepth - 1; 2124 //we found the match and from next element skipping will start, add 1 2125 fPosition = fMark + 1 ; 2126 match = true; 2127 //Once we get match decrease the count -- this was increased by nextElement() 2128 --fCount; 2129 if(DEBUG_SKIP_ALGORITHM){ 2130 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2131 System.out.println("fMark = " + fMark); 2132 System.out.println("fPosition = " + fPosition); 2133 System.out.println("fDepth = " + fDepth); 2134 System.out.println("fCount = " + fCount); 2135 } 2136 }else{ 2137 fAdd = true; 2138 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2139 } 2140 } 2141 //store the last depth 2142 fLastDepth = fDepth++; 2143 return match; 2144 } // pushElement(QName):QName 2145 2146 /** 2147 * This function doesn't increase depth. The function in this function is 2148 *broken down into two functions for efficiency. <@see>matchElement</see>. 2149 * This function just returns the pointer to the object and its values are set. 2150 * 2151 *@return QName reference to the next element in the list 2152 */ nextElement()2153 public QName nextElement() { 2154 2155 //if number of elements becomes equal to the length of array -- stop the skipping 2156 if (fCount == fQName.length) { 2157 fShouldSkip = false; 2158 fAdd = false; 2159 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2160 //xxx: this is not correct, we are returning the last element 2161 //this wont make any difference since flag has been set to 'false' 2162 return fQName[--fCount]; 2163 } 2164 if(DEBUG_SKIP_ALGORITHM){ 2165 System.out.println("fCount = " + fCount); 2166 } 2167 return fQName[fCount++]; 2168 2169 } 2170 2171 /** Note that this function is considerably different than nextElement() 2172 * This function just returns the previously stored elements 2173 */ getNext()2174 public QName getNext(){ 2175 //when position reaches number of elements in the list.. 2176 //set the position back to mark, making it a circular linked list. 2177 if(fPosition == fCount){ 2178 fPosition = fMark; 2179 } 2180 return fQName[fPosition++]; 2181 } 2182 2183 /** returns the current depth 2184 */ popElement()2185 public int popElement(){ 2186 return fDepth--; 2187 } 2188 2189 2190 /** Clears the stack without throwing away existing QName objects. */ clear()2191 public void clear() { 2192 fLastDepth = 0; 2193 fDepth = 0; 2194 fCount = 0 ; 2195 fPosition = fMark = 1; 2196 } // clear() 2197 2198 } // class ElementStack 2199 2200 /** 2201 * Element stack. This stack operates without synchronization, error 2202 * checking, and it re-uses objects instead of throwing popped items 2203 * away. 2204 * 2205 * @author Andy Clark, IBM 2206 */ 2207 protected class ElementStack { 2208 2209 // 2210 // Data 2211 // 2212 2213 /** The stack data. */ 2214 protected QName[] fElements; 2215 protected int [] fInt = new int[20]; 2216 2217 2218 //Element depth 2219 protected int fDepth; 2220 //total number of elements 2221 protected int fCount; 2222 //current position 2223 protected int fPosition; 2224 //Mark refers to the position 2225 protected int fMark; 2226 2227 protected int fLastDepth ; 2228 2229 // 2230 // Constructors 2231 // 2232 2233 /** Default constructor. */ ElementStack()2234 public ElementStack() { 2235 fElements = new QName[20]; 2236 for (int i = 0; i < fElements.length; i++) { 2237 fElements[i] = new QName(); 2238 } 2239 } // <init>() 2240 2241 // 2242 // Public methods 2243 // 2244 2245 /** 2246 * Pushes an element on the stack. 2247 * <p> 2248 * <strong>Note:</strong> The QName values are copied into the 2249 * stack. In other words, the caller does <em>not</em> orphan 2250 * the element to the stack. Also, the QName object returned 2251 * is <em>not</em> orphaned to the caller. It should be 2252 * considered read-only. 2253 * 2254 * @param element The element to push onto the stack. 2255 * 2256 * @return Returns the actual QName object that stores the 2257 */ 2258 //XXX: THIS FUNCTION IS NOT USED pushElement(QName element)2259 public QName pushElement(QName element) { 2260 if (fDepth == fElements.length) { 2261 QName[] array = new QName[fElements.length * 2]; 2262 System.arraycopy(fElements, 0, array, 0, fDepth); 2263 fElements = array; 2264 for (int i = fDepth; i < fElements.length; i++) { 2265 fElements[i] = new QName(); 2266 } 2267 } 2268 fElements[fDepth].setValues(element); 2269 return fElements[fDepth++]; 2270 } // pushElement(QName):QName 2271 2272 2273 /** Note that this function is considerably different than nextElement() 2274 * This function just returns the previously stored elements 2275 */ getNext()2276 public QName getNext(){ 2277 //when position reaches number of elements in the list.. 2278 //set the position back to mark, making it a circular linked list. 2279 if(fPosition == fCount){ 2280 fPosition = fMark; 2281 } 2282 //store the position of last opened tag at particular depth 2283 //fInt[++fDepth] = fPosition; 2284 if(DEBUG_SKIP_ALGORITHM){ 2285 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2286 } 2287 //return fElements[fPosition++]; 2288 return fElements[fPosition]; 2289 } 2290 2291 /** This function should be called only when element was skipped sucessfully. 2292 * 1. Increase the depth - because element was sucessfully skipped. 2293 *2. Store the position of the element token in array "last opened tag" at depth. 2294 *3. increase the position counter so as to point to the next element in the array 2295 */ push()2296 public void push(){ 2297 2298 fInt[++fDepth] = fPosition++; 2299 } 2300 2301 /** Check if the element scanned during the start element 2302 *matches the stored element. 2303 * 2304 *@return true if the match suceeds. 2305 */ matchElement(QName element)2306 public boolean matchElement(QName element) { 2307 //last depth is the depth when last elemnt was pushed 2308 //if last depth is greater than current depth 2309 //if(DEBUG_SKIP_ALGORITHM){ 2310 // System.out.println("Check if the element " + element.rawname + " matches"); 2311 // System.out.println("fLastDepth = " + fLastDepth); 2312 // System.out.println("fDepth = " + fDepth); 2313 //} 2314 boolean match = false; 2315 if(fLastDepth > fDepth && fDepth <= 3){ 2316 if(DEBUG_SKIP_ALGORITHM){ 2317 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2318 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2319 } 2320 if(element.rawname == fElements[fDepth - 1].rawname){ 2321 fAdd = false; 2322 //mark this position 2323 //decrease the depth by 1 as arrays are 0 based 2324 fMark = fDepth - 1; 2325 //we found the match 2326 fPosition = fMark; 2327 match = true; 2328 //Once we get match decrease the count -- this was increased by nextElement() 2329 --fCount; 2330 if(DEBUG_SKIP_ALGORITHM){ 2331 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2332 System.out.println("fMark = " + fMark); 2333 System.out.println("fPosition = " + fPosition); 2334 System.out.println("fDepth = " + fDepth); 2335 System.out.println("fCount = " + fCount); 2336 System.out.println("---------MATCH SUCEEDED-----------------"); 2337 System.out.println(""); 2338 } 2339 }else{ 2340 fAdd = true; 2341 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2342 } 2343 } 2344 //store the position for the current depth 2345 //when we are adding the elements, when skipping 2346 //starts even then this should be tracked ie. when 2347 //calling getNext() 2348 if(match){ 2349 //from next element skipping will start, add 1 2350 fInt[fDepth] = fPosition++; 2351 } else{ 2352 if(DEBUG_SKIP_ALGORITHM){ 2353 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2354 } 2355 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2356 fInt[fDepth] = fCount - 1; 2357 } 2358 2359 //if number of elements becomes equal to the length of array -- stop the skipping 2360 //xxx: should we do "fCount == fInt.length" 2361 if (fCount == fElements.length) { 2362 fSkip = false; 2363 fAdd = false; 2364 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2365 reposition(); 2366 if(DEBUG_SKIP_ALGORITHM){ 2367 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2368 System.out.println("REPOSITIONING THE STACK"); 2369 System.out.println("-----------SKIPPING STOPPED----------"); 2370 System.out.println(""); 2371 } 2372 return false; 2373 } 2374 if(DEBUG_SKIP_ALGORITHM){ 2375 if(match){ 2376 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2377 }else{ 2378 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2379 } 2380 } 2381 //store the last depth 2382 fLastDepth = fDepth; 2383 return match; 2384 } // matchElement(QName):QName 2385 2386 2387 /** 2388 * Returns the next element on the stack. 2389 * 2390 * @return Returns the actual QName object. Callee should 2391 * use this object to store the details of next element encountered. 2392 */ nextElement()2393 public QName nextElement() { 2394 if(fSkip){ 2395 fDepth++; 2396 //boundary checks are done in matchElement() 2397 return fElements[fCount++]; 2398 } else if (fDepth == fElements.length) { 2399 QName[] array = new QName[fElements.length * 2]; 2400 System.arraycopy(fElements, 0, array, 0, fDepth); 2401 fElements = array; 2402 for (int i = fDepth; i < fElements.length; i++) { 2403 fElements[i] = new QName(); 2404 } 2405 } 2406 2407 return fElements[fDepth++]; 2408 2409 } // pushElement(QName):QName 2410 2411 2412 /** 2413 * Pops an element off of the stack by setting the values of 2414 * the specified QName. 2415 * <p> 2416 * <strong>Note:</strong> The object returned is <em>not</em> 2417 * orphaned to the caller. Therefore, the caller should consider 2418 * the object to be read-only. 2419 */ popElement()2420 public QName popElement() { 2421 //return the same object that was pushed -- this would avoid 2422 //setting the values for every end element. 2423 //STRONG: this object is read only -- this object reference shouldn't be stored. 2424 if(fSkip || fAdd ){ 2425 if(DEBUG_SKIP_ALGORITHM){ 2426 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2427 System.out.println(""); 2428 } 2429 return fElements[fInt[fDepth--]]; 2430 } else{ 2431 if(DEBUG_SKIP_ALGORITHM){ 2432 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2433 } 2434 return fElements[--fDepth] ; 2435 } 2436 //element.setValues(fElements[--fDepth]); 2437 } // popElement(QName) 2438 2439 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2440 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2441 *as per the depth. 2442 */ reposition()2443 public void reposition(){ 2444 for( int i = 2 ; i <= fDepth ; i++){ 2445 fElements[i-1] = fElements[fInt[i]]; 2446 } 2447 if(DEBUG_SKIP_ALGORITHM){ 2448 for( int i = 0 ; i < fDepth ; i++){ 2449 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2450 } 2451 } 2452 } 2453 2454 /** Clears the stack without throwing away existing QName objects. */ clear()2455 public void clear() { 2456 fDepth = 0; 2457 fLastDepth = 0; 2458 fCount = 0 ; 2459 fPosition = fMark = 1; 2460 2461 } // clear() 2462 2463 /** 2464 * This function is as a result of optimization done for endElement -- 2465 * we dont need to set the value for every end element encouterd. 2466 * For Well formedness checks we can have the same QName object that was pushed. 2467 * the values will be set only if application need to know about the endElement 2468 */ 2469 getLastPoppedElement()2470 public QName getLastPoppedElement(){ 2471 return fElements[fDepth]; 2472 } 2473 } // class ElementStack 2474 2475 /** 2476 * Drives the parser to the next state/event on the input. Parser is guaranteed 2477 * to stop at the next state/event. 2478 * 2479 * Internally XML document is divided into several states. Each state represents 2480 * a sections of XML document. When this functions returns normally, it has read 2481 * the section of XML document and returns the state corresponding to section of 2482 * document which has been read. For optimizations, a particular driver 2483 * can read ahead of the section of document (state returned) just read and 2484 * can maintain a different internal state. 2485 * 2486 * 2487 * @author Neeraj Bajaj, Sun Microsystems 2488 */ 2489 protected interface Driver { 2490 2491 2492 /** 2493 * Drives the parser to the next state/event on the input. Parser is guaranteed 2494 * to stop at the next state/event. 2495 * 2496 * Internally XML document is divided into several states. Each state represents 2497 * a sections of XML document. When this functions returns normally, it has read 2498 * the section of XML document and returns the state corresponding to section of 2499 * document which has been read. For optimizations, a particular driver 2500 * can read ahead of the section of document (state returned) just read and 2501 * can maintain a different internal state. 2502 * 2503 * @return state representing the section of document just read. 2504 * 2505 * @throws IOException Thrown on i/o error. 2506 * @throws XNIException Thrown on parse error. 2507 */ 2508 next()2509 public int next() throws IOException, XNIException; 2510 2511 } // interface Driver 2512 2513 /** 2514 * Driver to handle content scanning. This driver is capable of reading 2515 * the fragment of XML document. When it has finished reading fragment 2516 * of XML documents, it can pass the job of reading to another driver. 2517 * 2518 * This class has been modified as per the new design which is more suited to 2519 * efficiently build pull parser. Lot of performance improvements have been done and 2520 * the code has been added to support stax functionality/features. 2521 * 2522 * @author Neeraj Bajaj, Sun Microsystems 2523 * 2524 * 2525 * @author Andy Clark, IBM 2526 * @author Eric Ye, IBM 2527 */ 2528 protected class FragmentContentDriver 2529 implements Driver { 2530 2531 // 2532 // Driver methods 2533 // 2534 2535 /** 2536 * decides the appropriate state of the parser 2537 */ startOfMarkup()2538 private void startOfMarkup() throws IOException { 2539 fMarkupDepth++; 2540 final int ch = fEntityScanner.peekChar(); 2541 if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) { 2542 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2543 } else { 2544 switch(ch){ 2545 case '?' :{ 2546 setScannerState(SCANNER_STATE_PI); 2547 fEntityScanner.skipChar(ch, null); 2548 break; 2549 } 2550 case '!' :{ 2551 fEntityScanner.skipChar(ch, null); 2552 if (fEntityScanner.skipChar('-', null)) { 2553 if (!fEntityScanner.skipChar('-', NameType.COMMENT)) { 2554 reportFatalError("InvalidCommentStart", 2555 null); 2556 } 2557 setScannerState(SCANNER_STATE_COMMENT); 2558 } else if (fEntityScanner.skipString(CDATA)) { 2559 fCDataStart = true; 2560 setScannerState(SCANNER_STATE_CDATA ); 2561 } else if (!scanForDoctypeHook()) { 2562 reportFatalError("MarkupNotRecognizedInContent", 2563 null); 2564 } 2565 break; 2566 } 2567 case '/' :{ 2568 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2569 fEntityScanner.skipChar(ch, NameType.ELEMENTEND); 2570 break; 2571 } 2572 default :{ 2573 reportFatalError("MarkupNotRecognizedInContent", null); 2574 } 2575 } 2576 } 2577 2578 }//startOfMarkup 2579 startOfContent()2580 private void startOfContent() throws IOException { 2581 if (fEntityScanner.skipChar('<', null)) { 2582 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2583 } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { 2584 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2585 } else { 2586 //element content is there.. 2587 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2588 } 2589 }//startOfContent 2590 2591 2592 /** 2593 * 2594 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2595 * At any point of time when in doubt over the current state of the parser, the state should be 2596 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2597 * the parser to one of its sub state. 2598 * sub states are defined in the parser on the basis of different XML component like 2599 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2600 * These sub states help the parser to have fine control over the parsing. These are the 2601 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2602 * decided if paresr needs to stop at next milepost ?? 2603 * 2604 */ decideSubState()2605 public void decideSubState() throws IOException { 2606 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2607 2608 switch (fScannerState) { 2609 2610 case SCANNER_STATE_CONTENT: { 2611 startOfContent() ; 2612 break; 2613 } 2614 2615 case SCANNER_STATE_START_OF_MARKUP: { 2616 startOfMarkup() ; 2617 break; 2618 } 2619 } 2620 } 2621 }//decideSubState 2622 2623 /** 2624 * Drives the parser to the next state/event on the input. Parser is guaranteed 2625 * to stop at the next state/event. Internally XML document 2626 * is divided into several states. Each state represents a sections of XML 2627 * document. When this functions returns normally, it has read the section 2628 * of XML document and returns the state corresponding to section of 2629 * document which has been read. For optimizations, a particular driver 2630 * can read ahead of the section of document (state returned) just read and 2631 * can maintain a different internal state. 2632 * 2633 * State returned corresponds to Stax states. 2634 * 2635 * @return state representing the section of document just read. 2636 * 2637 * @throws IOException Thrown on i/o error. 2638 * @throws XNIException Thrown on parse error. 2639 */ 2640 next()2641 public int next() throws IOException, XNIException { 2642 while (true) { 2643 try { 2644 2645 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2646 //decideSubState. 2647 2648 if (fScannerState == SCANNER_STATE_CONTENT) { 2649 final int ch = fEntityScanner.peekChar(); 2650 if (ch == '<') { 2651 fEntityScanner.scanChar(null); 2652 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2653 } else if (ch == '&') { 2654 fEntityScanner.scanChar(NameType.REFERENCE); 2655 setScannerState(SCANNER_STATE_REFERENCE) ; 2656 } else { 2657 //element content is there.. 2658 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2659 } 2660 } 2661 2662 if (fScannerState == SCANNER_STATE_START_OF_MARKUP) { 2663 startOfMarkup(); 2664 } 2665 2666 //decideSubState() ; 2667 2668 //do some special handling if isCoalesce is set to true. 2669 if (fIsCoalesce) { 2670 fUsebuffer = true ; 2671 //if the last section was character data 2672 if (fLastSectionWasCharacterData) { 2673 2674 //if we dont encounter any CDATA or ENTITY REFERENCE and 2675 //current state is also not SCANNER_STATE_CHARACTER_DATA 2676 //return the last scanned charactrer data. 2677 if ((fScannerState != SCANNER_STATE_CDATA) 2678 && (fScannerState != SCANNER_STATE_REFERENCE) 2679 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)) { 2680 fLastSectionWasCharacterData = false; 2681 return XMLEvent.CHARACTERS; 2682 } 2683 }//if last section was CDATA or ENTITY REFERENCE 2684 //xxx: there might be another entity reference or CDATA after this 2685 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2686 else if ((fLastSectionWasCData || fLastSectionWasEntityReference)) { 2687 //and current state is not SCANNER_STATE_CHARACTER_DATA 2688 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2689 //this means there is nothing more to be coalesced. 2690 //return the CHARACTERS event. 2691 if ((fScannerState != SCANNER_STATE_CDATA) 2692 && (fScannerState != SCANNER_STATE_REFERENCE) 2693 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2694 2695 fLastSectionWasCData = false; 2696 fLastSectionWasEntityReference = false; 2697 return XMLEvent.CHARACTERS; 2698 } 2699 } 2700 } 2701 2702 switch(fScannerState){ 2703 2704 case XMLEvent.START_DOCUMENT : 2705 return XMLEvent.START_DOCUMENT; 2706 2707 case SCANNER_STATE_START_ELEMENT_TAG :{ 2708 2709 //returns true if the element is empty 2710 fEmptyElement = scanStartElement() ; 2711 //if the element is empty the next event is "end element" 2712 if(fEmptyElement){ 2713 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2714 }else{ 2715 //set the next possible state 2716 setScannerState(SCANNER_STATE_CONTENT); 2717 } 2718 return XMLEvent.START_ELEMENT ; 2719 } 2720 2721 case SCANNER_STATE_CHARACTER_DATA: { 2722 2723 //if last section was either entity reference or cdata or 2724 //character data we should be using buffer 2725 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData 2726 || fLastSectionWasCharacterData ; 2727 2728 //When coalesce is set to true and last state was REFERENCE or 2729 //CDATA or CHARACTER_DATA, buffer should not be cleared. 2730 if( fIsCoalesce && (fLastSectionWasEntityReference || 2731 fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2732 fLastSectionWasEntityReference = false; 2733 fLastSectionWasCData = false; 2734 fLastSectionWasCharacterData = true ; 2735 fUsebuffer = true; 2736 }else{ 2737 //clear the buffer 2738 fContentBuffer.clear(); 2739 } 2740 2741 //set the fTempString length to 0 before passing it on to scanContent 2742 //scanContent sets the correct co-ordinates as per the content read 2743 fTempString.length = 0; 2744 int c = fEntityScanner.scanContent(fTempString); 2745 2746 if(fEntityScanner.skipChar('<', null)){ 2747 //check if we have reached end of element 2748 if(fEntityScanner.skipChar('/', NameType.ELEMENTEND)){ 2749 //increase the mark up depth 2750 fMarkupDepth++; 2751 fLastSectionWasCharacterData = false; 2752 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2753 //check if its start of new element 2754 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2755 fMarkupDepth++; 2756 fLastSectionWasCharacterData = false; 2757 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2758 }else{ 2759 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2760 //there can be cdata ahead if coalesce is true we should call again 2761 if(fIsCoalesce){ 2762 fLastSectionWasCharacterData = true; 2763 bufferContent(); 2764 continue; 2765 } 2766 } 2767 //in case last section was either entity reference or 2768 //cdata or character data -- we should be using buffer 2769 if(fUsebuffer){ 2770 bufferContent(); 2771 } 2772 2773 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2774 if(DEBUG)System.out.println("Return SPACE EVENT"); 2775 return XMLEvent.SPACE; 2776 }else 2777 return XMLEvent.CHARACTERS; 2778 2779 } else{ 2780 bufferContent(); 2781 } 2782 if (c == '\r') { 2783 if(DEBUG){ 2784 System.out.println("'\r' character found"); 2785 } 2786 // happens when there is the character reference 2787 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2788 fEntityScanner.scanChar(null); 2789 fUsebuffer = true; 2790 fContentBuffer.append((char)c); 2791 c = -1 ; 2792 } else if (c == ']') { 2793 //fStringBuffer.clear(); 2794 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2795 fUsebuffer = true; 2796 fContentBuffer.append((char)fEntityScanner.scanChar(null)); 2797 // remember where we are in case we get an endEntity before we 2798 // could flush the buffer out - this happens when we're parsing an 2799 // entity which ends with a ] 2800 fInScanContent = true; 2801 2802 // We work on a single character basis to handle cases such as: 2803 // ']]]>' which we might otherwise miss. 2804 // 2805 if (fEntityScanner.skipChar(']', null)) { 2806 fContentBuffer.append(']'); 2807 while (fEntityScanner.skipChar(']', null)) { 2808 fContentBuffer.append(']'); 2809 } 2810 if (fEntityScanner.skipChar('>', null)) { 2811 reportFatalError("CDEndInContent", null); 2812 } 2813 } 2814 c = -1 ; 2815 fInScanContent = false; 2816 } 2817 2818 do{ 2819 //xxx: we should be using only one buffer.. 2820 // we need not to grow the buffer only when isCoalesce() is not true; 2821 2822 if (c == '<') { 2823 fEntityScanner.scanChar(null); 2824 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2825 break; 2826 }//xxx what should be the behavior if entity reference is present in the content ? 2827 else if (c == '&') { 2828 fEntityScanner.scanChar(NameType.REFERENCE); 2829 setScannerState(SCANNER_STATE_REFERENCE); 2830 break; 2831 }///xxx since this part is also characters, it should be merged... 2832 else if (c != -1 && isInvalidLiteral(c)) { 2833 if (XMLChar.isHighSurrogate(c)) { 2834 // special case: surrogates 2835 scanSurrogates(fContentBuffer) ; 2836 setScannerState(SCANNER_STATE_CONTENT); 2837 } else { 2838 reportFatalError("InvalidCharInContent", 2839 new Object[] { 2840 Integer.toString(c, 16)}); 2841 fEntityScanner.scanChar(null); 2842 } 2843 break; 2844 } 2845 //xxx: scanContent also gives character callback. 2846 c = scanContent(fContentBuffer) ; 2847 //we should not be iterating again if fIsCoalesce is not set to true 2848 2849 if(!fIsCoalesce){ 2850 setScannerState(SCANNER_STATE_CONTENT); 2851 break; 2852 } 2853 2854 }while(true); 2855 2856 //if (fDocumentHandler != null) { 2857 // fDocumentHandler.characters(fContentBuffer, null); 2858 //} 2859 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2860 //if fIsCoalesce is true there might be more data so call fDriver.next() 2861 if(fIsCoalesce){ 2862 fLastSectionWasCharacterData = true ; 2863 continue; 2864 }else{ 2865 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2866 if(DEBUG)System.out.println("Return SPACE EVENT"); 2867 return XMLEvent.SPACE; 2868 } else 2869 return XMLEvent.CHARACTERS ; 2870 } 2871 } 2872 2873 case SCANNER_STATE_END_ELEMENT_TAG :{ 2874 if(fEmptyElement){ 2875 //set it back to false. 2876 fEmptyElement = false; 2877 setScannerState(SCANNER_STATE_CONTENT); 2878 //check the case when there is comment after single element document 2879 //<foo/> and some comment after this 2880 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? 2881 XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2882 2883 } else if(scanEndElement() == 0) { 2884 //It is last element of the document 2885 if (elementDepthIsZeroHook()) { 2886 //if element depth is zero , it indicates the end of the document 2887 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2888 //xxx understand this point once again.. 2889 return XMLEvent.END_ELEMENT ; 2890 } 2891 2892 } 2893 setScannerState(SCANNER_STATE_CONTENT); 2894 return XMLEvent.END_ELEMENT ; 2895 } 2896 2897 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2898 scanComment(); 2899 setScannerState(SCANNER_STATE_CONTENT); 2900 return XMLEvent.COMMENT; 2901 //break; 2902 } 2903 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2904 //clear the buffer first 2905 fContentBuffer.clear() ; 2906 //xxx: which buffer should be passed. Ideally we shouldn't have 2907 //more than two buffers -- 2908 //xxx: where should we add the switch for buffering. 2909 scanPI(fContentBuffer); 2910 setScannerState(SCANNER_STATE_CONTENT); 2911 return XMLEvent.PROCESSING_INSTRUCTION; 2912 //break; 2913 } 2914 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 2915 //xxx: What if CDATA is the first event 2916 //<foo><![CDATA[hello<><>]]>append</foo> 2917 2918 //we should not clear the buffer only when the last state was 2919 //either SCANNER_STATE_REFERENCE or 2920 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2921 if(fIsCoalesce && ( fLastSectionWasEntityReference || 2922 fLastSectionWasCData || fLastSectionWasCharacterData)){ 2923 fLastSectionWasCData = true ; 2924 fLastSectionWasEntityReference = false; 2925 fLastSectionWasCharacterData = false; 2926 }//if we dont need to coalesce clear the buffer 2927 else{ 2928 fContentBuffer.clear(); 2929 } 2930 fUsebuffer = true; 2931 //CDATA section is read up to the chunk size limit 2932 scanCDATASection(fContentBuffer , true); 2933 if (!fCDataEnd) { 2934 setScannerState(SCANNER_STATE_CDATA); 2935 } else { 2936 setScannerState(SCANNER_STATE_CONTENT); 2937 } 2938 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 2939 //and just call fDispatche.next(). Since we have set the scanner state to 2940 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 2941 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 2942 //2. Check if application has set for reporting CDATA event 2943 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 2944 //return the cdata event as characters. 2945 if (fIsCoalesce) { 2946 fLastSectionWasCData = true ; 2947 //there might be more data to coalesce. 2948 continue; 2949 } else if(fReportCdataEvent) { 2950 return XMLEvent.CDATA; 2951 } else { 2952 return XMLEvent.CHARACTERS; 2953 } 2954 } 2955 2956 case SCANNER_STATE_REFERENCE :{ 2957 fMarkupDepth++; 2958 foundBuiltInRefs = false; 2959 2960 //we should not clear the buffer only when the last state was 2961 //either CDATA or 2962 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2963 if(fIsCoalesce && ( fLastSectionWasEntityReference || 2964 fLastSectionWasCData || fLastSectionWasCharacterData)){ 2965 //fLastSectionWasEntityReference or fLastSectionWasCData are only 2966 //used when fIsCoalesce is set to true. 2967 fLastSectionWasEntityReference = true ; 2968 fLastSectionWasCData = false; 2969 fLastSectionWasCharacterData = false; 2970 }//if we dont need to coalesce clear the buffer 2971 else{ 2972 fContentBuffer.clear(); 2973 } 2974 fUsebuffer = true ; 2975 //take care of character reference 2976 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 2977 scanCharReferenceValue(fContentBuffer, null); 2978 fMarkupDepth--; 2979 if(!fIsCoalesce){ 2980 setScannerState(SCANNER_STATE_CONTENT); 2981 return XMLEvent.CHARACTERS; 2982 } 2983 } else { 2984 // this function also starts new entity 2985 scanEntityReference(fContentBuffer); 2986 //if there was built-in entity reference & coalesce is not true 2987 //return CHARACTERS 2988 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 2989 setScannerState(SCANNER_STATE_CONTENT); 2990 if (builtInRefCharacterHandled) { 2991 builtInRefCharacterHandled = false; 2992 return XMLEvent.ENTITY_REFERENCE; 2993 } else { 2994 return XMLEvent.CHARACTERS; 2995 } 2996 } 2997 2998 //if there was a text declaration, call next() it will be taken care. 2999 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 3000 fLastSectionWasEntityReference = true ; 3001 continue; 3002 } 3003 3004 if(fScannerState == SCANNER_STATE_REFERENCE){ 3005 setScannerState(SCANNER_STATE_CONTENT); 3006 if (fReplaceEntityReferences && 3007 fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3008 // Skip the entity reference, we don't care 3009 continue; 3010 } 3011 return XMLEvent.ENTITY_REFERENCE; 3012 } 3013 } 3014 //Wether it was character reference, entity reference or built-in entity 3015 //set the next possible state to SCANNER_STATE_CONTENT 3016 setScannerState(SCANNER_STATE_CONTENT); 3017 fLastSectionWasEntityReference = true ; 3018 continue; 3019 } 3020 3021 case SCANNER_STATE_TEXT_DECL: { 3022 // scan text decl 3023 if (fEntityScanner.skipString("<?xml")) { 3024 fMarkupDepth++; 3025 // NOTE: special case where entity starts with a PI 3026 // whose name starts with "xml" (e.g. "xmlfoo") 3027 if (isValidNameChar(fEntityScanner.peekChar())) { 3028 fStringBuffer.clear(); 3029 fStringBuffer.append("xml"); 3030 3031 if (fNamespaces) { 3032 while (isValidNCName(fEntityScanner.peekChar())) { 3033 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3034 } 3035 } else { 3036 while (isValidNameChar(fEntityScanner.peekChar())) { 3037 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3038 } 3039 } 3040 String target = fSymbolTable.addSymbol(fStringBuffer.ch, 3041 fStringBuffer.offset, fStringBuffer.length); 3042 fContentBuffer.clear(); 3043 scanPIData(target, fContentBuffer); 3044 } 3045 3046 // standard text declaration 3047 else { 3048 //xxx: this function gives callback 3049 scanXMLDeclOrTextDecl(true); 3050 } 3051 } 3052 // now that we've straightened out the readers, we can read in chunks: 3053 fEntityManager.fCurrentEntity.mayReadChunks = true; 3054 setScannerState(SCANNER_STATE_CONTENT); 3055 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3056 //it seems we have to careful when to allow function issue a callback 3057 //and when to allow adapter issue a callback. 3058 continue; 3059 } 3060 3061 3062 case SCANNER_STATE_ROOT_ELEMENT: { 3063 if (scanRootElementHook()) { 3064 fEmptyElement = true; 3065 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3066 return XMLEvent.START_ELEMENT; 3067 } 3068 setScannerState(SCANNER_STATE_CONTENT); 3069 return XMLEvent.START_ELEMENT ; 3070 } 3071 case SCANNER_STATE_CHAR_REFERENCE : { 3072 fContentBuffer.clear(); 3073 scanCharReferenceValue(fContentBuffer, null); 3074 fMarkupDepth--; 3075 setScannerState(SCANNER_STATE_CONTENT); 3076 return XMLEvent.CHARACTERS; 3077 } 3078 default: 3079 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3080 3081 }//switch 3082 } 3083 // encoding errors 3084 catch (MalformedByteSequenceException e) { 3085 fErrorReporter.reportError(e.getDomain(), e.getKey(), 3086 e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 3087 return -1; 3088 } 3089 catch (CharConversionException e) { 3090 fErrorReporter.reportError( 3091 XMLMessageFormatter.XML_DOMAIN, 3092 "CharConversionFailure", 3093 null, 3094 XMLErrorReporter.SEVERITY_FATAL_ERROR, e); 3095 return -1; 3096 } 3097 // premature end of file 3098 catch (EOFException e) { 3099 endOfFileHook(e); 3100 return -1; 3101 } 3102 } //while loop 3103 }//next 3104 3105 // 3106 // Protected methods 3107 // 3108 3109 // hooks 3110 3111 // NOTE: These hook methods are added so that the full document 3112 // scanner can share the majority of code with this class. 3113 3114 /** 3115 * Scan for DOCTYPE hook. This method is a hook for subclasses 3116 * to add code to handle scanning for a the "DOCTYPE" string 3117 * after the string "<!" has been scanned. 3118 * 3119 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3120 * was not scanned. 3121 */ scanForDoctypeHook()3122 protected boolean scanForDoctypeHook() 3123 throws IOException, XNIException { 3124 return false; 3125 } // scanForDoctypeHook():boolean 3126 3127 /** 3128 * Element depth iz zero. This methos is a hook for subclasses 3129 * to add code to handle when the element depth hits zero. When 3130 * scanning a document fragment, an element depth of zero is 3131 * normal. However, when scanning a full XML document, the 3132 * scanner must handle the trailing miscellanous section of 3133 * the document after the end of the document's root element. 3134 * 3135 * @return True if the caller should stop and return true which 3136 * allows the scanner to switch to a new scanning 3137 * driver. A return value of false indicates that 3138 * the content driver should continue as normal. 3139 */ elementDepthIsZeroHook()3140 protected boolean elementDepthIsZeroHook() 3141 throws IOException, XNIException { 3142 return false; 3143 } // elementDepthIsZeroHook():boolean 3144 3145 /** 3146 * Scan for root element hook. This method is a hook for 3147 * subclasses to add code that handles scanning for the root 3148 * element. When scanning a document fragment, there is no 3149 * "root" element. However, when scanning a full XML document, 3150 * the scanner must handle the root element specially. 3151 * 3152 * @return True if the caller should stop and return true which 3153 * allows the scanner to switch to a new scanning 3154 * driver. A return value of false indicates that 3155 * the content driver should continue as normal. 3156 */ scanRootElementHook()3157 protected boolean scanRootElementHook() 3158 throws IOException, XNIException { 3159 return false; 3160 } // scanRootElementHook():boolean 3161 3162 /** 3163 * End of file hook. This method is a hook for subclasses to 3164 * add code that handles the end of file. The end of file in 3165 * a document fragment is OK if the markup depth is zero. 3166 * However, when scanning a full XML document, an end of file 3167 * is always premature. 3168 */ endOfFileHook(EOFException e)3169 protected void endOfFileHook(EOFException e) 3170 throws IOException, XNIException { 3171 3172 // NOTE: An end of file is only only an error if we were 3173 // in the middle of scanning some markup. -Ac 3174 if (fMarkupDepth != 0) { 3175 reportFatalError("PrematureEOF", null); 3176 } 3177 3178 } // endOfFileHook() 3179 3180 } // class FragmentContentDriver 3181 pr(String str)3182 static void pr(String str) { 3183 System.out.println(str) ; 3184 } 3185 3186 protected boolean fUsebuffer ; 3187 3188 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3189 * maintained for attributes. 3190 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3191 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3192 * XMLString. 3193 * 3194 * @return XMLString XMLString used to store an attribute value. 3195 */ 3196 getString()3197 protected XMLString getString(){ 3198 if(fAttributeCacheUsedCount < initialCacheCount || 3199 fAttributeCacheUsedCount < attributeValueCache.size()){ 3200 return attributeValueCache.get(fAttributeCacheUsedCount++); 3201 } else{ 3202 XMLString str = new XMLString(); 3203 fAttributeCacheUsedCount++; 3204 attributeValueCache.add(str); 3205 return str; 3206 } 3207 } 3208 3209 /** 3210 * Implements XMLBufferListener interface. 3211 */ 3212 refresh()3213 public void refresh(){ 3214 refresh(0); 3215 } 3216 3217 /** 3218 * receives callbacks from {@link XMLEntityReader } when buffer 3219 * is being changed. 3220 * @param refreshPosition 3221 */ refresh(int refreshPosition)3222 public void refresh(int refreshPosition){ 3223 //If you are reading attributes and you got a callback 3224 //cache available attributes. 3225 if(fReadingAttributes){ 3226 fAttributes.refresh(); 3227 } 3228 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3229 bufferContent(); 3230 } 3231 } 3232 3233 /** 3234 * Since 'TempString' shares the buffer (a char array) with the CurrentEntity, 3235 * when the cursor position reaches the end, that is, before the buffer is 3236 * being loaded with new data, the content in the TempString needs to be 3237 * copied into the ContentBuffer. 3238 */ bufferContent()3239 private void bufferContent() { 3240 fContentBuffer.append(fTempString); 3241 //clear the XMLString so that data can't be added again. 3242 fTempString.length = 0; 3243 fUsebuffer = true; 3244 } 3245 } // class XMLDocumentFragmentScannerImpl 3246