1 /*
2  * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*
6  * Licensed to the Apache Software Foundation (ASF) under one or more
7  * contributor license agreements.  See the NOTICE file distributed with
8  * this work for additional information regarding copyright ownership.
9  * The ASF licenses this file to You under the Apache License, Version 2.0
10  * (the "License"); you may not use this file except in compliance with
11  * the License.  You may obtain a copy of the License at
12  *
13  *     http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  */
21 
22 package com.sun.org.apache.xerces.internal.impl;
23 
24 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
25 import com.sun.org.apache.xerces.internal.util.AugmentationsImpl;
26 import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl;
27 import com.sun.org.apache.xerces.internal.util.XMLChar;
28 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
29 import com.sun.org.apache.xerces.internal.util.XMLSymbols;
30 import com.sun.org.apache.xerces.internal.utils.SecuritySupport;
31 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager;
32 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit;
33 import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager;
34 import com.sun.org.apache.xerces.internal.xni.Augmentations;
35 import com.sun.org.apache.xerces.internal.xni.QName;
36 import com.sun.org.apache.xerces.internal.xni.XMLAttributes;
37 import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler;
38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
39 import com.sun.org.apache.xerces.internal.xni.XMLString;
40 import com.sun.org.apache.xerces.internal.xni.XNIException;
41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent;
42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
44 import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner;
45 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource;
46 import com.sun.xml.internal.stream.XMLBufferListener;
47 import com.sun.xml.internal.stream.XMLEntityStorage;
48 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil;
49 import java.io.EOFException;
50 import java.io.IOException;
51 import javax.xml.stream.XMLInputFactory;
52 import javax.xml.stream.XMLStreamConstants;
53 import javax.xml.stream.events.XMLEvent;
54 
55 
56 /**
57  *
58  * This class is responsible for scanning the structure and content
59  * of document fragments.
60  *
61  * This class has been modified as per the new design which is more suited to
62  * efficiently build pull parser. Lot of improvements have been done and
63  * the code has been added to support stax functionality/features.
64  *
65  * @author Neeraj Bajaj SUN Microsystems
66  * @author K.Venugopal SUN Microsystems
67  * @author Glenn Marcy, IBM
68  * @author Andy Clark, IBM
69  * @author Arnaud  Le Hors, IBM
70  * @author Eric Ye, IBM
71  * @author Sunitha Reddy, SUN Microsystems
72  * @version $Id: XMLDocumentFragmentScannerImpl.java,v 1.19 2010-11-02 19:54:55 joehw Exp $
73  *
74  */
75 public class XMLDocumentFragmentScannerImpl
76         extends XMLScanner
77         implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener {
78 
79     //
80     // Constants
81     //
82 
83     protected int fElementAttributeLimit, fXMLNameLimit;
84 
85     /** External subset resolver. **/
86     protected ExternalSubsetResolver fExternalSubsetResolver;
87 
88     // scanner states
89 
90     //XXX this should be divided into more states.
91     /** Scanner state: start of markup. */
92     protected static final int SCANNER_STATE_START_OF_MARKUP = 21;
93 
94     /** Scanner state: content. */
95     protected static final int SCANNER_STATE_CONTENT = 22;
96 
97     /** Scanner state: processing instruction. */
98     protected static final int SCANNER_STATE_PI = 23;
99 
100     /** Scanner state: DOCTYPE. */
101     protected static final int SCANNER_STATE_DOCTYPE = 24;
102 
103     /** Scanner state: XML Declaration */
104     protected static final int SCANNER_STATE_XML_DECL = 25;
105 
106     /** Scanner state: root element. */
107     protected static final int SCANNER_STATE_ROOT_ELEMENT = 26;
108 
109     /** Scanner state: comment. */
110     protected static final int SCANNER_STATE_COMMENT = 27;
111 
112     /** Scanner state: reference. */
113     protected static final int SCANNER_STATE_REFERENCE = 28;
114 
115     // <book type="hard"> reading attribute name 'type'
116     protected static final int SCANNER_STATE_ATTRIBUTE = 29;
117 
118     // <book type="hard"> //reading attribute value.
119     protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30;
120 
121     /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/
122     //protected static final int SCANNER_STATE_TRAILING_MISC = 32;
123 
124     /** Scanner state: end of input. */
125     protected static final int SCANNER_STATE_END_OF_INPUT = 33;
126 
127     /** Scanner state: terminated. */
128     protected static final int SCANNER_STATE_TERMINATED = 34;
129 
130     /** Scanner state: CDATA section. */
131     protected static final int SCANNER_STATE_CDATA = 35;
132 
133     /** Scanner state: Text declaration. */
134     protected static final int SCANNER_STATE_TEXT_DECL = 36;
135 
136     /** Scanner state: Text declaration. */
137     protected static final int SCANNER_STATE_CHARACTER_DATA = 37;
138 
139     //<book type="hard">foo</book>
140     protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38;
141 
142     //<book type="hard">foo</book> reading </book>
143     protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39;
144 
145     protected static final int SCANNER_STATE_CHAR_REFERENCE = 40;
146     protected static final int SCANNER_STATE_BUILT_IN_REFS = 41;
147 
148     // feature identifiers
149 
150 
151     /** Feature identifier: notify built-in refereces. */
152     protected static final String NOTIFY_BUILTIN_REFS =
153             Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE;
154 
155     /** Property identifier: entity resolver. */
156     protected static final String ENTITY_RESOLVER =
157             Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY;
158 
159     /** Feature identifier: standard uri conformant */
160     protected static final String STANDARD_URI_CONFORMANT =
161             Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE;
162 
163     /** Property identifier: Security property manager. */
164     private static final String XML_SECURITY_PROPERTY_MANAGER =
165             Constants.XML_SECURITY_PROPERTY_MANAGER;
166 
167     /** access external dtd: file protocol
168      *  For DOM/SAX, the secure feature is set to true by default
169      */
170     final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT;
171 
172     // recognized features and properties
173 
174     /** Recognized features. */
175     private static final String[] RECOGNIZED_FEATURES = {
176                 NAMESPACES,
177                 VALIDATION,
178                 NOTIFY_BUILTIN_REFS,
179                 NOTIFY_CHAR_REFS,
180                 Constants.STAX_REPORT_CDATA_EVENT
181     };
182 
183     /** Feature defaults. */
184     private static final Boolean[] FEATURE_DEFAULTS = {
185                 Boolean.TRUE,
186                 null,
187                 Boolean.FALSE,
188                 Boolean.FALSE,
189                 Boolean.TRUE
190     };
191 
192     /** Recognized properties. */
193     private static final String[] RECOGNIZED_PROPERTIES = {
194         SYMBOL_TABLE,
195                 ERROR_REPORTER,
196                 ENTITY_MANAGER,
197                 XML_SECURITY_PROPERTY_MANAGER
198     };
199 
200     /** Property defaults. */
201     private static final Object[] PROPERTY_DEFAULTS = {
202                 null,
203                 null,
204                 null,
205                 null
206     };
207 
208     private static final char [] cdata = {'[','C','D','A','T','A','['};
209     static final char [] xmlDecl = {'<','?','x','m','l'};
210     // private static final char [] endTag = {'<','/'};
211     // debugging
212 
213     /** Debug scanner state. */
214     private static final boolean DEBUG_SCANNER_STATE = false;
215 
216     /** Debug driver. */
217     private static final boolean DEBUG_DISPATCHER = false;
218 
219     /** Debug content driver scanning. */
220     protected static final boolean DEBUG_START_END_ELEMENT = false;
221 
222 
223     /** Debug driver next */
224     protected static final boolean DEBUG_NEXT = false ;
225 
226     /** Debug driver next */
227     protected static final boolean DEBUG = false;
228     protected static final boolean DEBUG_COALESCE = false;
229     //
230     // Data
231     //
232 
233     // protected data
234 
235     /** Document handler. */
236     protected XMLDocumentHandler fDocumentHandler;
237     protected int fScannerLastState ;
238 
239     /** Entity Storage */
240     protected XMLEntityStorage fEntityStore;
241 
242     /** Entity stack. */
243     protected int[] fEntityStack = new int[4];
244 
245     /** Markup depth. */
246     protected int fMarkupDepth;
247 
248     //is the element empty
249     protected boolean fEmptyElement ;
250 
251     //track if we are reading attributes, this is usefule while
252     //there is a callback
253     protected boolean fReadingAttributes = false;
254 
255     /** Scanner state. */
256     protected int fScannerState;
257 
258     /** SubScanner state: inside scanContent method. */
259     protected boolean fInScanContent = false;
260     protected boolean fLastSectionWasCData = false;
261     protected boolean fLastSectionWasEntityReference = false;
262     protected boolean fLastSectionWasCharacterData = false;
263 
264     /** has external dtd */
265     protected boolean fHasExternalDTD;
266 
267     /** Standalone. */
268     protected boolean fStandaloneSet;
269     protected boolean fStandalone;
270     protected String fVersion;
271 
272     // element information
273 
274     /** Current element. */
275     protected QName fCurrentElement;
276 
277     /** Element stack. */
278     protected ElementStack fElementStack = new ElementStack();
279     protected ElementStack2 fElementStack2 = new ElementStack2();
280 
281     // other info
282 
283     /** Document system identifier.
284      * REVISIT:  So what's this used for?  - NG
285      * protected String fDocumentSystemId;
286      ******/
287 
288     protected String fPITarget ;
289 
290     //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values
291     protected XMLString fPIData  = new XMLString();
292 
293     // features
294 
295 
296     /** Notify built-in references. */
297     protected boolean fNotifyBuiltInRefs = false;
298 
299     //STAX related properties
300     //defaultValues.
301     protected boolean fSupportDTD = true;
302     protected boolean fReplaceEntityReferences = true;
303     protected boolean fSupportExternalEntities = false;
304     protected boolean fReportCdataEvent = false ;
305     protected boolean fIsCoalesce = false ;
306     protected String fDeclaredEncoding =  null;
307     /** Xerces Feature: Disallow doctype declaration. */
308     protected boolean fDisallowDoctype = false;
309 
310     /**
311      * comma-delimited list of protocols that are allowed for the purpose
312      * of accessing external dtd or entity references
313      */
314     protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT;
315 
316     /**
317      * standard uri conformant (strict uri).
318      * http://apache.org/xml/features/standard-uri-conformant
319      */
320     protected boolean fStrictURI;
321 
322     // drivers
323 
324     /** Active driver. */
325     protected Driver fDriver;
326 
327     /** Content driver. */
328     protected Driver fContentDriver = createContentDriver();
329 
330     // temporary variables
331 
332     /** Element QName. */
333     protected QName fElementQName = new QName();
334 
335     /** Attribute QName. */
336     protected QName fAttributeQName = new QName();
337 
338     /**
339      * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class
340      * implements Iterator interface so we can directly give Attributes in the form of
341      * iterator.
342      */
343     protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl();
344 
345 
346     /** String. */
347     protected XMLString fTempString = new XMLString();
348 
349     /** String. */
350     protected XMLString fTempString2 = new XMLString();
351 
352     /** Array of 3 strings. */
353     private String[] fStrings = new String[3];
354 
355     /** Making the buffer accesible to derived class -- String buffer. */
356     protected XMLStringBuffer fStringBuffer = new XMLStringBuffer();
357 
358     /** Making the buffer accesible to derived class -- String buffer. */
359     protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
360 
361     /** stores character data. */
362     /** Making the buffer accesible to derived class -- stores PI data */
363     protected XMLStringBuffer fContentBuffer = new XMLStringBuffer();
364 
365     /** Single character array. */
366     private final char[] fSingleChar = new char[1];
367     private String fCurrentEntityName = null;
368 
369     // New members
370     protected boolean fScanToEnd = false;
371 
372     protected DTDGrammarUtil dtdGrammarUtil= null;
373 
374     protected boolean fAddDefaultAttr = false;
375 
376     protected boolean foundBuiltInRefs = false;
377 
378 
379     //skip element algorithm
380     static final short MAX_DEPTH_LIMIT = 5 ;
381     static final short ELEMENT_ARRAY_LENGTH = 200 ;
382     static final short MAX_POINTER_AT_A_DEPTH = 4 ;
383     static final boolean DEBUG_SKIP_ALGORITHM = false;
384     //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH
385     String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ;
386     //pointer location where last element was skipped
387     short fLastPointerLocation = 0 ;
388     short fElementPointer = 0 ;
389     //2D array to store pointer info
390     short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ;
391     protected String fElementRawname ;
392     protected boolean fShouldSkip = false;
393     protected boolean fAdd = false ;
394     protected boolean fSkip = false;
395 
396     /** Reusable Augmentations. */
397     private Augmentations fTempAugmentations = null;
398     //
399     // Constructors
400     //
401 
402     /** Default constructor. */
XMLDocumentFragmentScannerImpl()403     public XMLDocumentFragmentScannerImpl() {
404     } // <init>()
405 
406     //
407     // XMLDocumentScanner methods
408     //
409 
410     /**
411      * Sets the input source.
412      *
413      * @param inputSource The input source.
414      *
415      * @throws IOException Thrown on i/o error.
416      */
setInputSource(XMLInputSource inputSource)417     public void setInputSource(XMLInputSource inputSource) throws IOException {
418         fEntityManager.setEntityHandler(this);
419         fEntityManager.startEntity(false, "$fragment$", inputSource, false, true);
420         // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
421     } // setInputSource(XMLInputSource)
422 
423     /**
424      * Scans a document.
425      *
426      * @param complete True if the scanner should scan the document
427      *                 completely, pushing all events to the registered
428      *                 document handler. A value of false indicates that
429      *                 that the scanner should only scan the next portion
430      *                 of the document and return. A scanner instance is
431      *                 permitted to completely scan a document if it does
432      *                 not support this "pull" scanning model.
433      *
434      * @return True if there is more to scan, false otherwise.
435      */
scanDocument(boolean complete)436     public boolean scanDocument(boolean complete)
437     throws IOException, XNIException {
438 
439         // keep dispatching "events"
440         fEntityManager.setEntityHandler(this);
441         //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler );
442 
443         int event = next();
444         do {
445             switch (event) {
446                 case XMLStreamConstants.START_DOCUMENT :
447                     //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get
448                     break;
449                 case XMLStreamConstants.START_ELEMENT :
450                     //System.out.println(" in scann element");
451                     //fDocumentHandler.startElement(getElementQName(),fAttributes,null);
452                     break;
453                 case XMLStreamConstants.CHARACTERS :
454                     fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity);
455                     fDocumentHandler.characters(getCharacterData(),null);
456                     break;
457                 case XMLStreamConstants.SPACE:
458                     //check if getCharacterData() is the right function to retrieve ignorableWhitespace information.
459                     //System.out.println("in the space");
460                     //fDocumentHandler.ignorableWhitespace(getCharacterData(), null);
461                     break;
462                 case XMLStreamConstants.ENTITY_REFERENCE :
463                     fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity);
464                     //entity reference callback are given in startEntity
465                     break;
466                 case XMLStreamConstants.PROCESSING_INSTRUCTION :
467                     fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity);
468                     fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null);
469                     break;
470                 case XMLStreamConstants.COMMENT :
471                     fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity);
472                     fDocumentHandler.comment(getCharacterData(),null);
473                     break;
474                 case XMLStreamConstants.DTD :
475                     //all DTD related callbacks are handled in DTDScanner.
476                     //1. Stax doesn't define DTD states as it does for XML Document.
477                     //therefore we don't need to take care of anything here. So Just break;
478                     break;
479                 case XMLStreamConstants.CDATA:
480                     fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity);
481                     fDocumentHandler.startCDATA(null);
482                     //xxx: check if CDATA values comes from getCharacterData() function
483                     fDocumentHandler.characters(getCharacterData(),null);
484                     fDocumentHandler.endCDATA(null);
485                     //System.out.println(" in CDATA of the XMLNSDocumentScannerImpl");
486                     break;
487                 case XMLStreamConstants.NOTATION_DECLARATION :
488                     break;
489                 case XMLStreamConstants.ENTITY_DECLARATION :
490                     break;
491                 case XMLStreamConstants.NAMESPACE :
492                     break;
493                 case XMLStreamConstants.ATTRIBUTE :
494                     break;
495                 case XMLStreamConstants.END_ELEMENT :
496                     //do not give callback here.
497                     //this callback is given in scanEndElement function.
498                     //fDocumentHandler.endElement(getElementQName(),null);
499                     break;
500                 default :
501                     throw new InternalError("processing event: " + event);
502 
503             }
504             //System.out.println("here in before calling next");
505             event = next();
506             //System.out.println("here in after calling next");
507         } while (event!=XMLStreamConstants.END_DOCUMENT && complete);
508 
509         if(event == XMLStreamConstants.END_DOCUMENT) {
510             fDocumentHandler.endDocument(null);
511             return false;
512         }
513 
514         return true;
515 
516     } // scanDocument(boolean):boolean
517 
518 
519 
getElementQName()520     public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){
521         if(fScannerLastState == XMLEvent.END_ELEMENT){
522             fElementQName.setValues(fElementStack.getLastPoppedElement());
523         }
524         return fElementQName ;
525     }
526 
527     /** return the next state on the input
528      * @return int
529      */
530 
next()531     public int next() throws IOException, XNIException {
532         return fDriver.next();
533     }
534 
535     //
536     // XMLComponent methods
537     //
538 
539     /**
540      * Resets the component. The component can query the component manager
541      * about any features and properties that affect the operation of the
542      * component.
543      *
544      * @param componentManager The component manager.
545      *
546      * @throws SAXException Thrown by component on initialization error.
547      *                      For example, if a feature or property is
548      *                      required for the operation of the component, the
549      *                      component manager may throw a
550      *                      SAXNotRecognizedException or a
551      *                      SAXNotSupportedException.
552      */
553 
reset(XMLComponentManager componentManager)554     public void reset(XMLComponentManager componentManager)
555     throws XMLConfigurationException {
556 
557         super.reset(componentManager);
558 
559         // other settings
560         // fDocumentSystemId = null;
561 
562         // sax features
563         //fAttributes.setNamespaces(fNamespaces);
564 
565         // xerces features
566         fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true);
567         fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null);
568         fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false);
569 
570         Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null);
571         fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ?
572                 (ExternalSubsetResolver) resolver : null;
573 
574         //attribute
575         fReadingAttributes = false;
576         //xxx: external entities are supported in Xerces
577         // it would be good to define feature for this case
578         fSupportExternalEntities = true;
579         fReplaceEntityReferences = true;
580         fIsCoalesce = false;
581 
582         // setup Driver
583         setScannerState(SCANNER_STATE_CONTENT);
584         setDriver(fContentDriver);
585 
586         // JAXP 1.5 features and properties
587         XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)
588                 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null);
589         fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD);
590 
591         fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false);
592 
593         resetCommon();
594         //fEntityManager.test();
595     } // reset(XMLComponentManager)
596 
597 
reset(PropertyManager propertyManager)598     public void reset(PropertyManager propertyManager){
599 
600         super.reset(propertyManager);
601 
602         // other settings
603         // fDocumentSystemId = null;
604         fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue();
605         fNotifyBuiltInRefs = false ;
606 
607         //fElementStack2.clear();
608         //fReplaceEntityReferences = true;
609         //fSupportExternalEntities = true;
610         Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES);
611         fReplaceEntityReferences = bo.booleanValue();
612         bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES);
613         fSupportExternalEntities = bo.booleanValue();
614         Boolean cdata = (Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ;
615         if(cdata != null)
616             fReportCdataEvent = cdata.booleanValue() ;
617         Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ;
618         if(coalesce != null)
619             fIsCoalesce = coalesce.booleanValue();
620         fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ;
621         //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true,
622         //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application
623         fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences;
624         // setup Driver
625         //we dont need to do this -- nb.
626         //setScannerState(SCANNER_STATE_CONTENT);
627         //setDriver(fContentDriver);
628         //fEntityManager.test();
629 
630          // JAXP 1.5 features and properties
631         XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)
632                 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER);
633         fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD);
634 
635         fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER);
636         resetCommon();
637     } // reset(XMLComponentManager)
638 
resetCommon()639     void resetCommon() {
640         // initialize vars
641         fMarkupDepth = 0;
642         fCurrentElement = null;
643         fElementStack.clear();
644         fHasExternalDTD = false;
645         fStandaloneSet = false;
646         fStandalone = false;
647         fInScanContent = false;
648         //skipping algorithm
649         fShouldSkip = false;
650         fAdd = false;
651         fSkip = false;
652 
653         fEntityStore = fEntityManager.getEntityStore();
654         dtdGrammarUtil = null;
655 
656         if (fSecurityManager != null) {
657             fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT);
658             fXMLNameLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.MAX_NAME_LIMIT);
659         } else {
660             fElementAttributeLimit = 0;
661             fXMLNameLimit = XMLSecurityManager.Limit.MAX_NAME_LIMIT.defaultValue();
662         }
663         fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
664     }
665 
666     /**
667      * Returns a list of feature identifiers that are recognized by
668      * this component. This method may return null if no features
669      * are recognized by this component.
670      */
getRecognizedFeatures()671     public String[] getRecognizedFeatures() {
672         return (String[])(RECOGNIZED_FEATURES.clone());
673     } // getRecognizedFeatures():String[]
674 
675     /**
676      * Sets the state of a feature. This method is called by the component
677      * manager any time after reset when a feature changes state.
678      * <p>
679      * <strong>Note:</strong> Components should silently ignore features
680      * that do not affect the operation of the component.
681      *
682      * @param featureId The feature identifier.
683      * @param state     The state of the feature.
684      *
685      * @throws SAXNotRecognizedException The component should not throw
686      *                                   this exception.
687      * @throws SAXNotSupportedException The component should not throw
688      *                                  this exception.
689      */
setFeature(String featureId, boolean state)690     public void setFeature(String featureId, boolean state)
691     throws XMLConfigurationException {
692 
693         super.setFeature(featureId, state);
694 
695         // Xerces properties
696         if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
697             String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length());
698             if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) {
699                 fNotifyBuiltInRefs = state;
700             }
701         }
702 
703     } // setFeature(String,boolean)
704 
705     /**
706      * Returns a list of property identifiers that are recognized by
707      * this component. This method may return null if no properties
708      * are recognized by this component.
709      */
getRecognizedProperties()710     public String[] getRecognizedProperties() {
711         return (String[])(RECOGNIZED_PROPERTIES.clone());
712     } // getRecognizedProperties():String[]
713 
714     /**
715      * Sets the value of a property. This method is called by the component
716      * manager any time after reset when a property changes value.
717      * <p>
718      * <strong>Note:</strong> Components should silently ignore properties
719      * that do not affect the operation of the component.
720      *
721      * @param propertyId The property identifier.
722      * @param value      The value of the property.
723      *
724      * @throws SAXNotRecognizedException The component should not throw
725      *                                   this exception.
726      * @throws SAXNotSupportedException The component should not throw
727      *                                  this exception.
728      */
setProperty(String propertyId, Object value)729     public void setProperty(String propertyId, Object value)
730     throws XMLConfigurationException {
731 
732         super.setProperty(propertyId, value);
733 
734         // Xerces properties
735         if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
736             final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
737             if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() &&
738                     propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
739                 fEntityManager = (XMLEntityManager)value;
740                 return;
741             }
742             if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() &&
743                     propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) {
744                 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ?
745                     (ExternalSubsetResolver) value : null;
746                 return;
747             }
748         }
749 
750 
751                 // Xerces properties
752         if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
753             String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length());
754             if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) {
755                 fEntityManager = (XMLEntityManager)value;
756             }
757             return;
758         }
759 
760         //JAXP 1.5 properties
761         if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER))
762         {
763             XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value;
764             fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD);
765         }
766 
767     } // setProperty(String,Object)
768 
769     /**
770      * Returns the default state for a feature, or null if this
771      * component does not want to report a default value for this
772      * feature.
773      *
774      * @param featureId The feature identifier.
775      *
776      * @since Xerces 2.2.0
777      */
getFeatureDefault(String featureId)778     public Boolean getFeatureDefault(String featureId) {
779         for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
780             if (RECOGNIZED_FEATURES[i].equals(featureId)) {
781                 return FEATURE_DEFAULTS[i];
782             }
783         }
784         return null;
785     } // getFeatureDefault(String):Boolean
786 
787     /**
788      * Returns the default state for a property, or null if this
789      * component does not want to report a default value for this
790      * property.
791      *
792      * @param propertyId The property identifier.
793      *
794      * @since Xerces 2.2.0
795      */
getPropertyDefault(String propertyId)796     public Object getPropertyDefault(String propertyId) {
797         for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
798             if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
799                 return PROPERTY_DEFAULTS[i];
800             }
801         }
802         return null;
803     } // getPropertyDefault(String):Object
804 
805     //
806     // XMLDocumentSource methods
807     //
808 
809     /**
810      * setDocumentHandler
811      *
812      * @param documentHandler
813      */
setDocumentHandler(XMLDocumentHandler documentHandler)814     public void setDocumentHandler(XMLDocumentHandler documentHandler) {
815         fDocumentHandler = documentHandler;
816         //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this);
817     } // setDocumentHandler(XMLDocumentHandler)
818 
819 
820     /** Returns the document handler */
getDocumentHandler()821     public XMLDocumentHandler getDocumentHandler(){
822         return fDocumentHandler;
823     }
824 
825     //
826     // XMLEntityHandler methods
827     //
828 
829     /**
830      * This method notifies of the start of an entity. The DTD has the
831      * pseudo-name of "[dtd]" parameter entity names start with '%'; and
832      * general entities are just specified by their name.
833      *
834      * @param name     The name of the entity.
835      * @param identifier The resource identifier.
836      * @param encoding The auto-detected IANA encoding name of the entity
837      *                 stream. This value will be null in those situations
838      *                 where the entity encoding is not auto-detected (e.g.
839      *                 internal entities or a document entity that is
840      *                 parsed from a java.io.Reader).
841      * @param augs     Additional information that may include infoset augmentations
842      *
843      * @throws XNIException Thrown by handler to signal an error.
844      */
startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs)845     public void startEntity(String name,
846             XMLResourceIdentifier identifier,
847             String encoding, Augmentations augs) throws XNIException {
848 
849         // keep track of this entity before fEntityDepth is increased
850         if (fEntityDepth == fEntityStack.length) {
851             int[] entityarray = new int[fEntityStack.length * 2];
852             System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length);
853             fEntityStack = entityarray;
854         }
855         fEntityStack[fEntityDepth] = fMarkupDepth;
856 
857         super.startEntity(name, identifier, encoding, augs);
858 
859         // WFC:  entity declared in external subset in standalone doc
860         if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) {
861             reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE",
862                     new Object[]{name});
863         }
864 
865         /** we are not calling the handlers yet.. */
866         // call handler
867         if (fDocumentHandler != null && !fScanningAttribute) {
868             if (!name.equals("[xml]")) {
869                 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs);
870             }
871         }
872 
873     } // startEntity(String,XMLResourceIdentifier,String)
874 
875     /**
876      * This method notifies the end of an entity. The DTD has the pseudo-name
877      * of "[dtd]" parameter entity names start with '%'; and general entities
878      * are just specified by their name.
879      *
880      * @param name The name of the entity.
881      * @param augs Additional information that may include infoset augmentations
882      *
883      * @throws XNIException Thrown by handler to signal an error.
884      */
endEntity(String name, Augmentations augs)885     public void endEntity(String name, Augmentations augs) throws IOException, XNIException {
886 
887         /**
888          * // flush possible pending output buffer - see scanContent
889          * if (fInScanContent && fStringBuffer.length != 0
890          * && fDocumentHandler != null) {
891          * fDocumentHandler.characters(fStringBuffer, null);
892          * fStringBuffer.length = 0; // make sure we know it's been flushed
893          * }
894          */
895         super.endEntity(name, augs);
896 
897         // make sure markup is properly balanced
898         if (fMarkupDepth != fEntityStack[fEntityDepth]) {
899             reportFatalError("MarkupEntityMismatch", null);
900         }
901 
902         /**/
903         // call handler
904         if (fDocumentHandler != null && !fScanningAttribute) {
905             if (!name.equals("[xml]")) {
906                 fDocumentHandler.endGeneralEntity(name, augs);
907             }
908         }
909 
910 
911     } // endEntity(String)
912 
913     //
914     // Protected methods
915     //
916 
917     // Driver factory methods
918 
919     /** Creates a content Driver. */
createContentDriver()920     protected Driver createContentDriver() {
921         return new FragmentContentDriver();
922     } // createContentDriver():Driver
923 
924     // scanning methods
925 
926     /**
927      * Scans an XML or text declaration.
928      * <p>
929      * <pre>
930      * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
931      * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
932      * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
933      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
934      * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
935      *                 | ('"' ('yes' | 'no') '"'))
936      *
937      * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
938      * </pre>
939      *
940      * @param scanningTextDecl True if a text declaration is to
941      *                         be scanned instead of an XML
942      *                         declaration.
943      */
scanXMLDeclOrTextDecl(boolean scanningTextDecl)944     protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
945     throws IOException, XNIException {
946 
947         // scan decl
948         super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
949         fMarkupDepth--;
950 
951         // pseudo-attribute values
952         String version = fStrings[0];
953         String encoding = fStrings[1];
954         String standalone = fStrings[2];
955         fDeclaredEncoding = encoding;
956         // set standalone
957         fStandaloneSet = standalone != null;
958         fStandalone = fStandaloneSet && standalone.equals("yes");
959         ///xxx see where its used.. this is not used anywhere. it may be useful for entity to store this information
960         //but this information is only related with Document Entity.
961         fEntityManager.setStandalone(fStandalone);
962 
963 
964         // call handler
965         if (fDocumentHandler != null) {
966             if (scanningTextDecl) {
967                 fDocumentHandler.textDecl(version, encoding, null);
968             } else {
969                 fDocumentHandler.xmlDecl(version, encoding, standalone, null);
970             }
971         }
972 
973         if(version != null){
974             fEntityScanner.setVersion(version);
975             fEntityScanner.setXMLVersion(version);
976         }
977         // set encoding on reader, only if encoding was not specified by the application explicitly
978         if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) {
979              fEntityScanner.setEncoding(encoding);
980         }
981 
982     } // scanXMLDeclOrTextDecl(boolean)
983 
getPITarget()984     public String getPITarget(){
985         return fPITarget ;
986     }
987 
getPIData()988     public XMLStringBuffer getPIData(){
989         return fContentBuffer ;
990     }
991 
992     //XXX: why not this function behave as per the state of the parser?
getCharacterData()993     public XMLString getCharacterData(){
994         if(fUsebuffer){
995             return fContentBuffer ;
996         }else{
997             return fTempString;
998         }
999 
1000     }
1001 
1002 
1003     /**
1004      * Scans a processing data. This is needed to handle the situation
1005      * where a document starts with a processing instruction whose
1006      * target name <em>starts with</em> "xml". (e.g. xmlfoo)
1007      *
1008      * @param target The PI target
1009      * @param data The XMLStringBuffer to fill in with the data
1010      */
scanPIData(String target, XMLStringBuffer data)1011     protected void scanPIData(String target, XMLStringBuffer data)
1012     throws IOException, XNIException {
1013 
1014         super.scanPIData(target, data);
1015 
1016         //set the PI target and values
1017         fPITarget = target ;
1018 
1019         fMarkupDepth--;
1020 
1021     } // scanPIData(String)
1022 
1023     /**
1024      * Scans a comment.
1025      * <p>
1026      * <pre>
1027      * [15] Comment ::= '&lt!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1028      * </pre>
1029      * <p>
1030      * <strong>Note:</strong> Called after scanning past '&lt;!--'
1031      */
scanComment()1032     protected void scanComment() throws IOException, XNIException {
1033         fContentBuffer.clear();
1034         scanComment(fContentBuffer);
1035         //getTextCharacters can also be called for reading comments
1036         fUsebuffer = true;
1037         fMarkupDepth--;
1038 
1039     } // scanComment()
1040 
1041     //xxx value returned by this function may not remain valid if another event is scanned.
getComment()1042     public String getComment(){
1043         return fContentBuffer.toString();
1044     }
1045 
addElement(String rawname)1046     void addElement(String rawname){
1047         if(fElementPointer < ELEMENT_ARRAY_LENGTH){
1048             //storing element raw name in a linear list of array
1049             fElementArray[fElementPointer] = rawname ;
1050             //storing elemnetPointer for particular element depth
1051 
1052             if(DEBUG_SKIP_ALGORITHM){
1053                 StringBuffer sb = new StringBuffer() ;
1054                 sb.append(" Storing element information ") ;
1055                 sb.append(" fElementPointer = " + fElementPointer) ;
1056                 sb.append(" fElementRawname = " + fElementQName.rawname) ;
1057                 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth);
1058                 System.out.println(sb.toString()) ;
1059             }
1060 
1061             //store pointer information only when element depth is less MAX_DEPTH_LIMIT
1062             if(fElementStack.fDepth < MAX_DEPTH_LIMIT){
1063                 short column = storePointerForADepth(fElementPointer);
1064                 if(column > 0){
1065                     short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) );
1066                     //identity comparison shouldn't take much time and we can rely on this
1067                     //since its guaranteed to have same object id for same string.
1068                     if(rawname == fElementArray[pointer]){
1069                         fShouldSkip = true ;
1070                         fLastPointerLocation = pointer ;
1071                         //reset the things and return.
1072                         resetPointer((short)fElementStack.fDepth , column) ;
1073                         fElementArray[fElementPointer] = null ;
1074                         return ;
1075                     }else{
1076                         fShouldSkip = false ;
1077                     }
1078                 }
1079             }
1080             fElementPointer++ ;
1081         }
1082     }
1083 
1084 
resetPointer(short depth, short column)1085     void resetPointer(short depth, short column){
1086         fPointerInfo[depth] [column] = (short)0;
1087     }
1088 
1089     //returns column information at which pointer was stored.
storePointerForADepth(short elementPointer)1090     short storePointerForADepth(short elementPointer){
1091         short depth = (short) fElementStack.fDepth ;
1092 
1093         //Stores element pointer locations at particular depth , only 4 pointer locations
1094         //are stored at particular depth for now.
1095         for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){
1096 
1097             if(canStore(depth, i)){
1098                 fPointerInfo[depth][i] = elementPointer ;
1099                 if(DEBUG_SKIP_ALGORITHM){
1100                     StringBuffer sb = new StringBuffer() ;
1101                     sb.append(" Pointer information ") ;
1102                     sb.append(" fElementPointer = " + fElementPointer) ;
1103                     sb.append(" fElementStack.fDepth = " + fElementStack.fDepth);
1104                     sb.append(" column = " + i ) ;
1105                     System.out.println(sb.toString()) ;
1106                 }
1107                 return i;
1108             }
1109             //else
1110             //pointer was not stored because we reached the limit
1111         }
1112         return -1 ;
1113     }
1114 
canStore(short depth, short column)1115     boolean canStore(short depth, short column){
1116         //colum = 0 , means first element at particular depth
1117         //column = 1, means second element at particular depth
1118         //        calle should make sure that it doesn't call for value outside allowed co-ordinates
1119         return fPointerInfo[depth][column] == 0 ? true : false ;
1120     }
1121 
1122 
getElementPointer(short depth, short column)1123     short getElementPointer(short depth, short column){
1124         //colum = 0 , means first element at particular depth
1125         //column = 1, means second element at particular depth
1126         //        calle should make sure that it doesn't call for value outside allowed co-ordinates
1127         return fPointerInfo[depth][column] ;
1128     }
1129 
1130     //this function assumes that string passed is not null and skips
1131     //the following string from the buffer this makes sure
skipFromTheBuffer(String rawname)1132     boolean skipFromTheBuffer(String rawname) throws IOException{
1133         if(fEntityScanner.skipString(rawname)){
1134             char c = (char)fEntityScanner.peekChar() ;
1135             //If the start element was completely skipped we should encounter either ' '(space),
1136             //or '/' (in case of empty element)  or '>'
1137             if( c == ' ' || c == '/' || c == '>'){
1138                 fElementRawname = rawname ;
1139                 return true ;
1140             } else{
1141                 return false;
1142             }
1143         } else
1144             return false ;
1145     }
1146 
skipQElement(String rawname)1147     boolean skipQElement(String rawname) throws IOException{
1148 
1149         final int c = fEntityScanner.getChar(rawname.length());
1150         //if this character is still valid element name -- this means string can't match
1151         if(XMLChar.isName(c)){
1152             return false;
1153         }else{
1154             return fEntityScanner.skipString(rawname);
1155         }
1156     }
1157 
skipElement()1158     protected boolean skipElement() throws IOException {
1159 
1160         if(!fShouldSkip) return false ;
1161 
1162         if(fLastPointerLocation != 0){
1163             //Look at the next element stored in the array list.. we might just get a match.
1164             String rawname = fElementArray[fLastPointerLocation + 1] ;
1165             if(rawname != null && skipFromTheBuffer(rawname)){
1166                 fLastPointerLocation++ ;
1167                 if(DEBUG_SKIP_ALGORITHM){
1168                     System.out.println("Element " + fElementRawname + " was SKIPPED at pointer location = " + fLastPointerLocation);
1169                 }
1170                 return true ;
1171             } else{
1172                 //reset it back to zero... we haven't got the correct subset yet.
1173                 fLastPointerLocation = 0 ;
1174 
1175             }
1176         }
1177         //xxx: we can put some logic here as from what column it should start looking
1178         //for now we always start at 0
1179         //fallback to tolerant algorithm, it would look for differnt element stored at different
1180         //depth and get us the pointer location.
1181         return fShouldSkip && skipElement((short)0);
1182 
1183     }
1184 
1185     //start of the column at which it should try searching
skipElement(short column)1186     boolean skipElement(short column) throws IOException {
1187         short depth = (short)fElementStack.fDepth ;
1188 
1189         if(depth > MAX_DEPTH_LIMIT){
1190             return fShouldSkip = false ;
1191         }
1192         for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){
1193             short pointer = getElementPointer(depth , i ) ;
1194 
1195             if(pointer == 0){
1196                 return fShouldSkip = false ;
1197             }
1198 
1199             if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){
1200                 if(DEBUG_SKIP_ALGORITHM){
1201                     System.out.println();
1202                     System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + fElementStack.fDepth + " column = " + column );
1203                     System.out.println();
1204                 }
1205                 fLastPointerLocation = pointer ;
1206                 return fShouldSkip = true ;
1207             }
1208         }
1209         return fShouldSkip = false ;
1210     }
1211 
1212     /**
1213      * Scans a start element. This method will handle the binding of
1214      * namespace information and notifying the handler of the start
1215      * of the element.
1216      * <p>
1217      * <pre>
1218      * [44] EmptyElemTag ::= '&lt;' Name (S Attribute)* S? '/>'
1219      * [40] STag ::= '&lt;' Name (S Attribute)* S? '>'
1220      * </pre>
1221      * <p>
1222      * <strong>Note:</strong> This method assumes that the leading
1223      * '&lt;' character has been consumed.
1224      * <p>
1225      * <strong>Note:</strong> This method uses the fElementQName and
1226      * fAttributes variables. The contents of these variables will be
1227      * destroyed. The caller should copy important information out of
1228      * these variables before calling this method.
1229      * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT
1230      *
1231      * @return True if element is empty. (i.e. It matches
1232      *          production [44].
1233      */
1234     // fElementQName will have the details of element just read..
1235     // fAttributes will have the details of all the attributes.
scanStartElement()1236     protected boolean scanStartElement()
1237     throws IOException, XNIException {
1238 
1239         if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()");
1240         //when skipping is true and no more elements should be added
1241         if(fSkip && !fAdd){
1242             //get the stored element -- if everything goes right this should match the
1243             //token in the buffer
1244 
1245             QName name = fElementStack.getNext();
1246 
1247             if(DEBUG_SKIP_ALGORITHM){
1248                 System.out.println("Trying to skip String = " + name.rawname);
1249             }
1250 
1251             //Be conservative -- if skipping fails -- stop.
1252             fSkip = fEntityScanner.skipString(name.rawname);
1253 
1254             if(fSkip){
1255                 if(DEBUG_SKIP_ALGORITHM){
1256                     System.out.println("Element SUCESSFULLY skipped = " + name.rawname);
1257                 }
1258                 fElementStack.push();
1259                 fElementQName = name;
1260             }else{
1261                 //if skipping fails reposition the stack or fallback to normal way of processing
1262                 fElementStack.reposition();
1263                 if(DEBUG_SKIP_ALGORITHM){
1264                     System.out.println("Element was NOT skipped, REPOSITIONING stack" );
1265                 }
1266             }
1267         }
1268 
1269         //we are still at the stage of adding elements
1270         //the elements were not matched or
1271         //fSkip is not set to true
1272         if(!fSkip || fAdd){
1273             //get the next element from the stack
1274             fElementQName = fElementStack.nextElement();
1275             // name
1276             if (fNamespaces) {
1277                 fEntityScanner.scanQName(fElementQName, NameType.ELEMENTSTART);
1278             } else {
1279                 String name = fEntityScanner.scanName(NameType.ELEMENTSTART);
1280                 fElementQName.setValues(null, name, name, null);
1281             }
1282 
1283             if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString());
1284             if(DEBUG_SKIP_ALGORITHM){
1285                 if(fAdd){
1286                     System.out.println("Elements are being ADDED -- elemet added is = " + fElementQName.rawname + " at count = " + fElementStack.fCount);
1287                 }
1288             }
1289 
1290         }
1291 
1292         //when the elements are being added , we need to check if we are set for skipping the elements
1293         if(fAdd){
1294             //this sets the value of fAdd variable
1295             fElementStack.matchElement(fElementQName);
1296         }
1297 
1298 
1299         //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName
1300         fCurrentElement = fElementQName;
1301 
1302         String rawname = fElementQName.rawname;
1303 
1304         fEmptyElement = false;
1305 
1306         fAttributes.removeAllAttributes();
1307 
1308         checkDepth(rawname);
1309         if(!seekCloseOfStartTag()){
1310             fReadingAttributes = true;
1311             fAttributeCacheUsedCount =0;
1312             fStringBufferIndex =0;
1313             fAddDefaultAttr = true;
1314             do {
1315                 scanAttribute(fAttributes);
1316                 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) &&
1317                         fAttributes.getLength() > fElementAttributeLimit){
1318                     fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1319                                                  "ElementAttributeLimit",
1320                                                  new Object[]{rawname, fElementAttributeLimit },
1321                                                  XMLErrorReporter.SEVERITY_FATAL_ERROR );
1322                 }
1323 
1324             } while (!seekCloseOfStartTag());
1325             fReadingAttributes=false;
1326         }
1327 
1328         if (fEmptyElement) {
1329             //decrease the markup depth..
1330             fMarkupDepth--;
1331 
1332             // check that this element was opened in the same entity
1333             if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
1334                 reportFatalError("ElementEntityMismatch",
1335                         new Object[]{fCurrentElement.rawname});
1336             }
1337             // call handler
1338             if (fDocumentHandler != null) {
1339                 fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
1340             }
1341 
1342             //We should not be popping out the context here in endELement becaause the namespace context is still
1343             //valid when parser is at the endElement state.
1344             //if (fNamespaces) {
1345             //  fNamespaceContext.popContext();
1346             //}
1347 
1348             //pop the element off the stack..
1349             fElementStack.popElement();
1350 
1351         } else {
1352 
1353             if(dtdGrammarUtil != null)
1354                 dtdGrammarUtil.startElement(fElementQName, fAttributes);
1355             if(fDocumentHandler != null){
1356                 //complete element and attributes are traversed in this function so we can send a callback
1357                 //here.
1358                 //<strong>we shouldn't be sending callback in scanDocument()</strong>
1359                 fDocumentHandler.startElement(fElementQName, fAttributes, null);
1360             }
1361         }
1362 
1363 
1364         if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + "<<< scanStartElement(): "+fEmptyElement);
1365         return fEmptyElement;
1366 
1367     } // scanStartElement():boolean
1368 
1369     /**
1370      * Looks for the close of start tag, i.e. if it finds '>' or '/>'
1371      * Characters are consumed.
1372      */
seekCloseOfStartTag()1373     protected boolean seekCloseOfStartTag() throws IOException, XNIException {
1374         // spaces
1375         boolean sawSpace = fEntityScanner.skipSpaces();
1376 
1377         // end tag?
1378         final int c = fEntityScanner.peekChar();
1379         if (c == '>') {
1380             fEntityScanner.scanChar(null);
1381             return true;
1382         } else if (c == '/') {
1383             fEntityScanner.scanChar(null);
1384             if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) {
1385                 reportFatalError("ElementUnterminated",
1386                         new Object[]{fElementQName.rawname});
1387             }
1388             fEmptyElement = true;
1389             return true;
1390         } else if (!isValidNameStartChar(c) || !sawSpace) {
1391             // Second chance. Check if this character is a high
1392             // surrogate of a valid name start character.
1393             if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
1394                 reportFatalError("ElementUnterminated",
1395                         new Object[]{fElementQName.rawname});
1396             }
1397         }
1398 
1399         return false;
1400     }
1401 
hasAttributes()1402     public boolean hasAttributes(){
1403         return fAttributes.getLength() > 0 ? true : false ;
1404     }
1405 
1406 
1407     /**
1408      * Scans an attribute.
1409      * <p>
1410      * <pre>
1411      * [41] Attribute ::= Name Eq AttValue
1412      * </pre>
1413      * <p>
1414      * <strong>Note:</strong> This method assumes that the next
1415      * character on the stream is the first character of the attribute
1416      * name.
1417      * <p>
1418      * <strong>Note:</strong> This method uses the fAttributeQName and
1419      * fQName variables. The contents of these variables will be
1420      * destroyed.
1421      *
1422      * @param attributes The attributes list for the scanned attribute.
1423      */
1424 
1425     /**
1426      * protected void scanAttribute(AttributeIteratorImpl attributes)
1427      * throws IOException, XNIException {
1428      * if (DEBUG_START_END_ELEMENT) System.out.println(">>> scanAttribute()");
1429      *
1430      *
1431      * // name
1432      * if (fNamespaces) {
1433      * fEntityScanner.scanQName(fAttributeQName);
1434      * }
1435      * else {
1436      * String name = fEntityScanner.scanName();
1437      * fAttributeQName.setValues(null, name, name, null);
1438      * }
1439      *
1440      * // equals
1441      * fEntityScanner.skipSpaces();
1442      * if (!fEntityScanner.skipChar('=')) {
1443      * reportFatalError("EqRequiredInAttribute",
1444      * new Object[]{fAttributeQName.rawname});
1445      * }
1446      * fEntityScanner.skipSpaces();
1447      *
1448      *
1449      * // content
1450      * int oldLen = attributes.getLength();
1451      */
1452     /**xxx there is one check of duplicate attribute that has been removed.
1453      * attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null);
1454      *
1455      * // WFC: Unique Att Spec
1456      * if (oldLen == attributes.getLength()) {
1457      * reportFatalError("AttributeNotUnique",
1458      * new Object[]{fCurrentElement.rawname,
1459      * fAttributeQName.rawname});
1460      * }
1461      */
1462 
1463     /*
1464         //REVISIT: one more case needs to be included: external PE and standalone is no
1465         boolean isVC =  fHasExternalDTD && !fStandalone;
1466         scanAttributeValue(fTempString, fTempString2,
1467                            fAttributeQName.rawname, attributes,
1468                            oldLen, isVC);
1469 
1470         //attributes.setValue(oldLen, fTempString.toString());
1471         //attributes.setNonNormalizedValue(oldLen, fTempString2.toString());
1472         //attributes.setSpecified(oldLen, true);
1473 
1474         AttributeImpl attribute = new AttributeImpl(fAttributeQName.prefix,fAttributeQName.localpart,fAttributeQName.uri,fTempString.toString(),fTempString2.toString(),XMLSymbols.fCDATASymbol,true);
1475         fAttributes.addAttribute(attribute);
1476         if (DEBUG_START_END_ELEMENT) System.out.println("<<< scanAttribute()");
1477     } // scanAttribute(XMLAttributes)
1478 
1479      */
1480 
1481     /** return the attribute iterator implementation */
getAttributeIterator()1482     public XMLAttributesIteratorImpl getAttributeIterator(){
1483         if(dtdGrammarUtil != null && fAddDefaultAttr){
1484             dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes);
1485             fAddDefaultAttr = false;
1486         }
1487         return fAttributes;
1488     }
1489 
1490     /** return if standalone is set */
standaloneSet()1491     public boolean standaloneSet(){
1492         return fStandaloneSet;
1493     }
1494     /** return if the doucment is standalone */
isStandAlone()1495     public boolean isStandAlone(){
1496         return fStandalone ;
1497     }
1498     /**
1499      * Scans an attribute name value pair.
1500      * <p>
1501      * <pre>
1502      * [41] Attribute ::= Name Eq AttValue
1503      * </pre>
1504      * <p>
1505      * <strong>Note:</strong> This method assumes that the next
1506      * character on the stream is the first character of the attribute
1507      * name.
1508      * <p>
1509      * <strong>Note:</strong> This method uses the fAttributeQName and
1510      * fQName variables. The contents of these variables will be
1511      * destroyed.
1512      *
1513      * @param attributes The attributes list for the scanned attribute.
1514      */
1515 
scanAttribute(XMLAttributes attributes)1516     protected void scanAttribute(XMLAttributes attributes)
1517     throws IOException, XNIException {
1518         if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()");
1519 
1520         // name
1521         if (fNamespaces) {
1522             fEntityScanner.scanQName(fAttributeQName, NameType.ATTRIBUTENAME);
1523         } else {
1524             String name = fEntityScanner.scanName(NameType.ATTRIBUTENAME);
1525             fAttributeQName.setValues(null, name, name, null);
1526         }
1527 
1528         // equals
1529         fEntityScanner.skipSpaces();
1530         if (!fEntityScanner.skipChar('=', NameType.ATTRIBUTE)) {
1531             reportFatalError("EqRequiredInAttribute",
1532                 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname});
1533         }
1534         fEntityScanner.skipSpaces();
1535 
1536         int attIndex = 0 ;
1537         //REVISIT: one more case needs to be included: external PE and standalone is no
1538         boolean isVC =  fHasExternalDTD && !fStandalone;
1539         //fTempString would store attribute value
1540         ///fTempString2 would store attribute non-normalized value
1541 
1542         //this function doesn't use 'attIndex'. We are adding the attribute later
1543         //after we have figured out that current attribute is not namespace declaration
1544         //since scanAttributeValue doesn't use attIndex parameter therefore we
1545         //can safely add the attribute later..
1546         XMLString tmpStr = getString();
1547 
1548         scanAttributeValue(tmpStr, fTempString2, fAttributeQName.rawname, attributes,
1549                 attIndex, isVC, fCurrentElement.rawname, false);
1550 
1551         // content
1552         int oldLen = attributes.getLength();
1553         //if the attribute name already exists.. new value is replaced with old value
1554         attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null);
1555 
1556         // WFC: Unique Att Spec
1557         //attributes count will be same if the current attribute  name already exists for this element name.
1558         //this means there are two duplicate attributes.
1559         if (oldLen == attributes.getLength()) {
1560             reportFatalError("AttributeNotUnique",
1561                     new Object[]{fCurrentElement.rawname,
1562                             fAttributeQName.rawname});
1563         }
1564 
1565         //tmpString contains attribute value
1566         //we are passing null as the attribute value
1567         attributes.setValue(attIndex, null, tmpStr);
1568 
1569         ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM
1570         //attributes.setNonNormalizedValue(oldLen, fTempString2.toString());
1571         attributes.setSpecified(attIndex, true);
1572 
1573         if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()");
1574 
1575     } // scanAttribute(XMLAttributes)
1576 
1577     /**
1578      * Scans element content.
1579      *
1580      * @return Returns the next character on the stream.
1581      */
1582     //CHANGED:
1583     //EARLIER: scanContent()
1584     //NOW: scanContent(XMLStringBuffer)
1585     //It makes things easy if this functions takes XMLStringBuffer as parameter..
1586     //this function appends the data to the buffer.
scanContent(XMLStringBuffer content)1587     protected int scanContent(XMLStringBuffer content) throws IOException, XNIException {
1588         //set the fTempString length to 0 before passing it on to scanContent
1589         //scanContent sets the correct co-ordinates as per the content read
1590         fTempString.length = 0;
1591         int c = fEntityScanner.scanContent(fTempString);
1592         content.append(fTempString);
1593         fTempString.length = 0;
1594         if (c == '\r') {
1595             // happens when there is the character reference &#13;
1596             //xxx: We know the next chracter.. we should just skip it and add ']' directlry
1597             fEntityScanner.scanChar(null);
1598             content.append((char)c);
1599             c = -1;
1600         } else if (c == ']') {
1601             //fStringBuffer.clear();
1602             //xxx: We know the next chracter.. we should just skip it and add ']' directlry
1603             content.append((char)fEntityScanner.scanChar(null));
1604             // remember where we are in case we get an endEntity before we
1605             // could flush the buffer out - this happens when we're parsing an
1606             // entity which ends with a ]
1607             fInScanContent = true;
1608             //
1609             // We work on a single character basis to handle cases such as:
1610             // ']]]>' which we might otherwise miss.
1611             //
1612             if (fEntityScanner.skipChar(']', null)) {
1613                 content.append(']');
1614                 while (fEntityScanner.skipChar(']', null)) {
1615                     content.append(']');
1616                 }
1617                 if (fEntityScanner.skipChar('>', null)) {
1618                     reportFatalError("CDEndInContent", null);
1619                 }
1620             }
1621             fInScanContent = false;
1622             c = -1;
1623         }
1624         if (fDocumentHandler != null && content.length > 0) {
1625             //fDocumentHandler.characters(content, null);
1626         }
1627         return c;
1628 
1629     } // scanContent():int
1630 
1631 
1632     /**
1633      * Scans a CDATA section.
1634      * <p>
1635      * <strong>Note:</strong> This method uses the fTempString and
1636      * fStringBuffer variables.
1637      *
1638      * @param complete True if the CDATA section is to be scanned
1639      *                 completely.
1640      *
1641      * @return True if CDATA is completely scanned.
1642      */
1643     //CHANGED:
scanCDATASection(XMLStringBuffer contentBuffer, boolean complete)1644     protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete)
1645     throws IOException, XNIException {
1646 
1647         // call handler
1648         if (fDocumentHandler != null) {
1649             //fDocumentHandler.startCDATA(null);
1650         }
1651 
1652         while (true) {
1653             //scanData will fill the contentBuffer
1654             if (!fEntityScanner.scanData("]]>", contentBuffer)) {
1655                 break ;
1656                 /** We dont need all this code if we pass ']]>' as delimeter..
1657                  * int brackets = 2;
1658                  * while (fEntityScanner.skipChar(']')) {
1659                  * brackets++;
1660                  * }
1661                  *
1662                  * //When we find more than 2 square brackets
1663                  * if (fDocumentHandler != null && brackets > 2) {
1664                  * //we dont need to clear the buffer..
1665                  * //contentBuffer.clear();
1666                  * for (int i = 2; i < brackets; i++) {
1667                  * contentBuffer.append(']');
1668                  * }
1669                  * fDocumentHandler.characters(contentBuffer, null);
1670                  * }
1671                  *
1672                  * if (fEntityScanner.skipChar('>')) {
1673                  * break;
1674                  * }
1675                  * if (fDocumentHandler != null) {
1676                  * //we dont need to clear the buffer now..
1677                  * //contentBuffer.clear();
1678                  * contentBuffer.append("]]");
1679                  * fDocumentHandler.characters(contentBuffer, null);
1680                  * }
1681                  **/
1682             } else {
1683                 int c = fEntityScanner.peekChar();
1684                 if (c != -1 && isInvalidLiteral(c)) {
1685                     if (XMLChar.isHighSurrogate(c)) {
1686                         //contentBuffer.clear();
1687                         //scan surrogates if any....
1688                         scanSurrogates(contentBuffer);
1689                     } else {
1690                         reportFatalError("InvalidCharInCDSect",
1691                                 new Object[]{Integer.toString(c,16)});
1692                                 fEntityScanner.scanChar(null);
1693                     }
1694                 }
1695                 //by this time we have also read surrogate contents if any...
1696                 if (fDocumentHandler != null) {
1697                     //fDocumentHandler.characters(contentBuffer, null);
1698                 }
1699             }
1700         }
1701         fMarkupDepth--;
1702 
1703         if (fDocumentHandler != null && contentBuffer.length > 0) {
1704             //fDocumentHandler.characters(contentBuffer, null);
1705         }
1706 
1707         // call handler
1708         if (fDocumentHandler != null) {
1709             //fDocumentHandler.endCDATA(null);
1710         }
1711 
1712         return true;
1713 
1714     } // scanCDATASection(XMLStringBuffer, boolean):boolean
1715 
1716     /**
1717      * Scans an end element.
1718      * <p>
1719      * <pre>
1720      * [42] ETag ::= '&lt;/' Name S? '>'
1721      * </pre>
1722      * <p>
1723      * <strong>Note:</strong> This method uses the fElementQName variable.
1724      * The contents of this variable will be destroyed. The caller should
1725      * copy the needed information out of this variable before calling
1726      * this method.
1727      *
1728      * @return The element depth.
1729      */
scanEndElement()1730     protected int scanEndElement() throws IOException, XNIException {
1731         if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()");
1732 
1733         // pop context
1734         QName endElementName = fElementStack.popElement();
1735 
1736         String rawname = endElementName.rawname;
1737         if(DEBUG)System.out.println("endElementName = " + endElementName.toString());
1738         // Take advantage of the fact that next string _should_ be "fElementQName.rawName",
1739         //In scanners most of the time is consumed on checks done for XML characters, we can
1740         // optimize on it and avoid the checks done for endElement,
1741         //we will also avoid symbol table lookup - neeraj.bajaj@sun.com
1742 
1743         // this should work both for namespace processing true or false...
1744 
1745         //REVISIT: if the string is not the same as expected.. we need to do better error handling..
1746         //We can skip this for now... In any case if the string doesn't match -- document is not well formed.
1747 
1748         if (!fEntityScanner.skipString(endElementName.rawname)) {
1749              reportFatalError("ETagRequired", new Object[]{rawname});
1750         }
1751 
1752         // end
1753         fEntityScanner.skipSpaces();
1754         if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) {
1755             reportFatalError("ETagUnterminated",
1756                     new Object[]{rawname});
1757         }
1758         fMarkupDepth--;
1759 
1760         //we have increased the depth for two markup "<" characters
1761         fMarkupDepth--;
1762 
1763         // check that this element was opened in the same entity
1764         if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
1765             reportFatalError("ElementEntityMismatch",
1766                     new Object[]{rawname});
1767         }
1768 
1769         //We should not be popping out the context here in endELement becaause the namespace context is still
1770         //valid when parser is at the endElement state.
1771 
1772         //if (fNamespaces) {
1773         //  fNamespaceContext.popContext();
1774         //}
1775 
1776         // call handler
1777         if (fDocumentHandler != null ) {
1778             //end element is scanned in this function so we can send a callback
1779             //here.
1780             //<strong>we shouldn't be sending callback in scanDocument()</strong>
1781 
1782             fDocumentHandler.endElement(endElementName, null);
1783         }
1784         if(dtdGrammarUtil != null)
1785             dtdGrammarUtil.endElement(endElementName);
1786 
1787         return fMarkupDepth;
1788 
1789     } // scanEndElement():int
1790 
1791     /**
1792      * Scans a character reference.
1793      * <p>
1794      * <pre>
1795      * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1796      * </pre>
1797      */
scanCharReference()1798     protected void scanCharReference()
1799     throws IOException, XNIException {
1800 
1801         fStringBuffer2.clear();
1802         int ch = scanCharReferenceValue(fStringBuffer2, null);
1803         fMarkupDepth--;
1804         if (ch != -1) {
1805             // call handler
1806 
1807             if (fDocumentHandler != null) {
1808                 if (fNotifyCharRefs) {
1809                     fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null);
1810                 }
1811                 Augmentations augs = null;
1812                 if (fValidation && ch <= 0x20) {
1813                     if (fTempAugmentations != null) {
1814                         fTempAugmentations.removeAllItems();
1815                     }
1816                     else {
1817                         fTempAugmentations = new AugmentationsImpl();
1818                     }
1819                     augs = fTempAugmentations;
1820                     augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE);
1821                 }
1822                 //xxx: How do we deal with this - how to return charReferenceValues
1823                 //now this is being commented because this is taken care in scanDocument()
1824                 //fDocumentHandler.characters(fStringBuffer2, null);
1825                 if (fNotifyCharRefs) {
1826                     fDocumentHandler.endGeneralEntity(fCharRefLiteral, null);
1827                 }
1828             }
1829         }
1830 
1831     } // scanCharReference()
1832 
1833 
1834     /**
1835      * Scans an entity reference.
1836      *
1837      * @return returns true if the new entity is started. If it was built-in entity
1838      *         'false' is returned.
1839      * @throws IOException  Thrown if i/o error occurs.
1840      * @throws XNIException Thrown if handler throws exception upon
1841      *                      notification.
1842      */
scanEntityReference(XMLStringBuffer content)1843     protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException {
1844         String name = fEntityScanner.scanName(NameType.REFERENCE);
1845         if (name == null) {
1846             reportFatalError("NameRequiredInReference", null);
1847             return;
1848         }
1849         if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) {
1850             reportFatalError("SemicolonRequiredInReference", new Object []{name});
1851         }
1852         if (fEntityStore.isUnparsedEntity(name)) {
1853             reportFatalError("ReferenceToUnparsedEntity", new Object[]{name});
1854         }
1855         fMarkupDepth--;
1856         fCurrentEntityName = name;
1857 
1858         // handle built-in entities
1859         if (name == fAmpSymbol) {
1860             handleCharacter('&', fAmpSymbol, content);
1861             fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1862             return ;
1863         } else if (name == fLtSymbol) {
1864             handleCharacter('<', fLtSymbol, content);
1865             fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1866             return ;
1867         } else if (name == fGtSymbol) {
1868             handleCharacter('>', fGtSymbol, content);
1869             fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1870             return ;
1871         } else if (name == fQuotSymbol) {
1872             handleCharacter('"', fQuotSymbol, content);
1873             fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1874             return ;
1875         } else if (name == fAposSymbol) {
1876             handleCharacter('\'', fAposSymbol, content);
1877             fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1878             return ;
1879         }
1880 
1881         //1. if the entity is external and support to external entities is not required
1882         // 2. or entities should not be replaced
1883         //3. or if it is built in entity reference.
1884         boolean isEE = fEntityStore.isExternalEntity(name);
1885         if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){
1886             fScannerState = SCANNER_STATE_REFERENCE;
1887             return ;
1888         }
1889         // start general entity
1890         if (!fEntityStore.isDeclaredEntity(name)) {
1891             //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception
1892             if (!fSupportDTD && fReplaceEntityReferences) {
1893                 reportFatalError("EntityNotDeclared", new Object[]{name});
1894                 return;
1895             }
1896             //REVISIT: one more case needs to be included: external PE and standalone is no
1897             if ( fHasExternalDTD && !fStandalone) {
1898                 if (fValidation)
1899                     fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared",
1900                             new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR);
1901             } else
1902                 reportFatalError("EntityNotDeclared", new Object[]{name});
1903         }
1904         //we are starting the entity even if the entity was not declared
1905         //if that was the case it its taken care in XMLEntityManager.startEntity()
1906         //we immediately call the endEntity. Application gets to know if there was
1907         //any entity that was not declared.
1908         fEntityManager.startEntity(true, name, false);
1909         //set the scaner state to content.. parser will automatically revive itself at any point of time.
1910         //setScannerState(SCANNER_STATE_CONTENT);
1911         //return true ;
1912     } // scanEntityReference()
1913 
1914     // utility methods
1915 
1916     /**
1917      * Check if the depth exceeds the maxElementDepth limit
1918      * @param elementName name of the current element
1919      */
checkDepth(String elementName)1920     void checkDepth(String elementName) {
1921         fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth);
1922         if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) {
1923             fSecurityManager.debugPrint(fLimitAnalyzer);
1924             reportFatalError("MaxElementDepthLimit", new Object[]{elementName,
1925                 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT),
1926                 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT),
1927                 "maxElementDepth"});
1928         }
1929     }
1930 
1931     /**
1932      * Calls document handler with a single character resulting from
1933      * built-in entity resolution.
1934      *
1935      * @param c
1936      * @param entity built-in name
1937      * @param XMLStringBuffer append the character to buffer
1938      *
1939      * we really dont need to call this function -- this function is only required when
1940      * we integrate with rest of Xerces2. SO maintaining the current behavior and still
1941      * calling this function to hanlde built-in entity reference.
1942      *
1943      */
handleCharacter(char c, String entity, XMLStringBuffer content)1944     private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException {
1945         foundBuiltInRefs = true;
1946         checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
1947         content.append(c);
1948         if (fDocumentHandler != null) {
1949             fSingleChar[0] = c;
1950             if (fNotifyBuiltInRefs) {
1951                 fDocumentHandler.startGeneralEntity(entity, null, null, null);
1952             }
1953             fTempString.setValues(fSingleChar, 0, 1);
1954             //fDocumentHandler.characters(fTempString, null);
1955 
1956             if (fNotifyBuiltInRefs) {
1957                 fDocumentHandler.endGeneralEntity(entity, null);
1958             }
1959         }
1960     } // handleCharacter(char)
1961 
1962     // helper methods
1963 
1964     /**
1965      * Sets the scanner state.
1966      *
1967      * @param state The new scanner state.
1968      */
setScannerState(int state)1969     protected final void setScannerState(int state) {
1970 
1971         fScannerState = state;
1972         if (DEBUG_SCANNER_STATE) {
1973             System.out.print("### setScannerState: ");
1974             //System.out.print(fScannerState);
1975             System.out.print(getScannerStateName(state));
1976             System.out.println();
1977         }
1978 
1979     } // setScannerState(int)
1980 
1981 
1982     /**
1983      * Sets the Driver.
1984      *
1985      * @param Driver The new Driver.
1986      */
setDriver(Driver driver)1987     protected final void setDriver(Driver driver) {
1988         fDriver = driver;
1989         if (DEBUG_DISPATCHER) {
1990             System.out.print("%%% setDriver: ");
1991             System.out.print(getDriverName(driver));
1992             System.out.println();
1993         }
1994     }
1995 
1996     //
1997     // Private methods
1998     //
1999 
2000     /** Returns the scanner state name. */
getScannerStateName(int state)2001     protected String getScannerStateName(int state) {
2002 
2003         switch (state) {
2004             case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE";
2005             case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT";
2006             case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP";
2007             case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT";
2008             case SCANNER_STATE_PI: return "SCANNER_STATE_PI";
2009             case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT";
2010             case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE";
2011             case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT";
2012             case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED";
2013             case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA";
2014             case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL";
2015             case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE";
2016             case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE";
2017             case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG";
2018             case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG";
2019             case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ;
2020         }
2021 
2022         return "??? ("+state+')';
2023 
2024     } // getScannerStateName(int):String
getEntityName()2025     public String getEntityName(){
2026         //return the cached name
2027         return fCurrentEntityName;
2028     }
2029 
2030     /** Returns the driver name. */
getDriverName(Driver driver)2031     public String getDriverName(Driver driver) {
2032 
2033         if (DEBUG_DISPATCHER) {
2034             if (driver != null) {
2035                 String name = driver.getClass().getName();
2036                 int index = name.lastIndexOf('.');
2037                 if (index != -1) {
2038                     name = name.substring(index + 1);
2039                     index = name.lastIndexOf('$');
2040                     if (index != -1) {
2041                         name = name.substring(index + 1);
2042                     }
2043                 }
2044                 return name;
2045             }
2046         }
2047         return "null";
2048 
2049     } // getDriverName():String
2050 
2051     /**
2052      * Check the protocol used in the systemId against allowed protocols
2053      *
2054      * @param systemId the Id of the URI
2055      * @param allowedProtocols a list of allowed protocols separated by comma
2056      * @return the name of the protocol if rejected, null otherwise
2057      */
checkAccess(String systemId, String allowedProtocols)2058     String checkAccess(String systemId, String allowedProtocols) throws IOException {
2059         String baseSystemId = fEntityScanner.getBaseSystemId();
2060         String expandedSystemId = XMLEntityManager.expandSystemId(systemId, baseSystemId, fStrictURI);
2061         return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL);
2062     }
2063 
2064     //
2065     // Classes
2066     //
2067 
2068     /**
2069      * @author Neeraj Bajaj, Sun Microsystems.
2070      */
2071     protected static final class Element {
2072 
2073         //
2074         // Data
2075         //
2076 
2077         /** Symbol. */
2078         public QName qname;
2079 
2080         //raw name stored as characters
2081         public char[] fRawname;
2082 
2083         /** The next Element entry. */
2084         public Element next;
2085 
2086         //
2087         // Constructors
2088         //
2089 
2090         /**
2091          * Constructs a new Element from the given QName and next Element
2092          * reference.
2093          */
Element(QName qname, Element next)2094         public Element(QName qname, Element next) {
2095             this.qname.setValues(qname);
2096             this.fRawname = qname.rawname.toCharArray();
2097             this.next = next;
2098         }
2099 
2100     } // class Element
2101 
2102     /**
2103      * Element stack.
2104      *
2105      * @author Neeraj Bajaj, Sun Microsystems.
2106      */
2107     protected class ElementStack2 {
2108 
2109         //
2110         // Data
2111         //
2112 
2113         /** The stack data. */
2114         protected QName [] fQName = new QName[20];
2115 
2116         //Element depth
2117         protected int fDepth;
2118         //total number of elements
2119         protected int fCount;
2120         //current position
2121         protected int fPosition;
2122         //Mark refers to the position
2123         protected int fMark;
2124 
2125         protected int fLastDepth ;
2126 
2127         //
2128         // Constructors
2129         //
2130 
2131         /** Default constructor. */
ElementStack2()2132         public ElementStack2() {
2133             for (int i = 0; i < fQName.length; i++) {
2134                 fQName[i] = new QName();
2135             }
2136             fMark = fPosition = 1;
2137         } // <init>()
2138 
resize()2139         public void resize(){
2140             /**
2141              * int length = fElements.length;
2142              * Element [] temp = new Element[length * 2];
2143              * System.arraycopy(fElements, 0, temp, 0, length);
2144              * fElements = temp;
2145              */
2146             //resize QNames
2147             int oldLength = fQName.length;
2148             QName [] tmp = new QName[oldLength * 2];
2149             System.arraycopy(fQName, 0, tmp, 0, oldLength);
2150             fQName = tmp;
2151 
2152             for (int i = oldLength; i < fQName.length; i++) {
2153                 fQName[i] = new QName();
2154             }
2155 
2156         }
2157 
2158 
2159         //
2160         // Public methods
2161         //
2162 
2163         /** Check if the element scanned during the start element
2164          *matches the stored element.
2165          *
2166          *@return true if the match suceeds.
2167          */
matchElement(QName element)2168         public boolean matchElement(QName element) {
2169             //last depth is the depth when last elemnt was pushed
2170             //if last depth is greater than current depth
2171             if(DEBUG_SKIP_ALGORITHM){
2172                 System.out.println("fLastDepth = " + fLastDepth);
2173                 System.out.println("fDepth = " + fDepth);
2174             }
2175             boolean match = false;
2176             if(fLastDepth > fDepth && fDepth <= 2){
2177                 if(DEBUG_SKIP_ALGORITHM){
2178                     System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname);
2179                 }
2180                 if(element.rawname == fQName[fDepth].rawname){
2181                     fAdd = false;
2182                     //mark this position
2183                     //decrease the depth by 1 as arrays are 0 based
2184                     fMark = fDepth - 1;
2185                     //we found the match and from next element skipping will start, add 1
2186                     fPosition = fMark + 1 ;
2187                     match = true;
2188                     //Once we get match decrease the count -- this was increased by nextElement()
2189                     --fCount;
2190                     if(DEBUG_SKIP_ALGORITHM){
2191                         System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED");
2192                         System.out.println("fMark = " + fMark);
2193                         System.out.println("fPosition = " + fPosition);
2194                         System.out.println("fDepth = " + fDepth);
2195                         System.out.println("fCount = " + fCount);
2196                     }
2197                 }else{
2198                     fAdd = true;
2199                     if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd);
2200                 }
2201             }
2202             //store the last depth
2203             fLastDepth = fDepth++;
2204             return match;
2205         } // pushElement(QName):QName
2206 
2207         /**
2208          * This function doesn't increase depth. The function in this function is
2209          *broken down into two functions for efficiency. <@see>matchElement</see>.
2210          * This function just returns the pointer to the object and its values are set.
2211          *
2212          *@return QName reference to the next element in the list
2213          */
nextElement()2214         public QName nextElement() {
2215 
2216             //if number of elements becomes equal to the length of array -- stop the skipping
2217             if (fCount == fQName.length) {
2218                 fShouldSkip = false;
2219                 fAdd = false;
2220                 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip);
2221                 //xxx: this is not correct, we are returning the last element
2222                 //this wont make any difference since flag has been set to 'false'
2223                 return fQName[--fCount];
2224             }
2225             if(DEBUG_SKIP_ALGORITHM){
2226                 System.out.println("fCount = " + fCount);
2227             }
2228             return fQName[fCount++];
2229 
2230         }
2231 
2232         /** Note that this function is considerably different than nextElement()
2233          * This function just returns the previously stored elements
2234          */
getNext()2235         public QName getNext(){
2236             //when position reaches number of elements in the list..
2237             //set the position back to mark,  making it a circular linked list.
2238             if(fPosition == fCount){
2239                 fPosition = fMark;
2240             }
2241             return fQName[fPosition++];
2242         }
2243 
2244         /** returns the current depth
2245          */
popElement()2246         public int popElement(){
2247             return fDepth--;
2248         }
2249 
2250 
2251         /** Clears the stack without throwing away existing QName objects. */
clear()2252         public void clear() {
2253             fLastDepth = 0;
2254             fDepth = 0;
2255             fCount = 0 ;
2256             fPosition = fMark = 1;
2257         } // clear()
2258 
2259     } // class ElementStack
2260 
2261     /**
2262      * Element stack. This stack operates without synchronization, error
2263      * checking, and it re-uses objects instead of throwing popped items
2264      * away.
2265      *
2266      * @author Andy Clark, IBM
2267      */
2268     protected class ElementStack {
2269 
2270         //
2271         // Data
2272         //
2273 
2274         /** The stack data. */
2275         protected QName[] fElements;
2276         protected int []  fInt = new int[20];
2277 
2278 
2279         //Element depth
2280         protected int fDepth;
2281         //total number of elements
2282         protected int fCount;
2283         //current position
2284         protected int fPosition;
2285         //Mark refers to the position
2286         protected int fMark;
2287 
2288         protected int fLastDepth ;
2289 
2290         //
2291         // Constructors
2292         //
2293 
2294         /** Default constructor. */
ElementStack()2295         public ElementStack() {
2296             fElements = new QName[20];
2297             for (int i = 0; i < fElements.length; i++) {
2298                 fElements[i] = new QName();
2299             }
2300         } // <init>()
2301 
2302         //
2303         // Public methods
2304         //
2305 
2306         /**
2307          * Pushes an element on the stack.
2308          * <p>
2309          * <strong>Note:</strong> The QName values are copied into the
2310          * stack. In other words, the caller does <em>not</em> orphan
2311          * the element to the stack. Also, the QName object returned
2312          * is <em>not</em> orphaned to the caller. It should be
2313          * considered read-only.
2314          *
2315          * @param element The element to push onto the stack.
2316          *
2317          * @return Returns the actual QName object that stores the
2318          */
2319         //XXX: THIS FUNCTION IS NOT USED
pushElement(QName element)2320         public QName pushElement(QName element) {
2321             if (fDepth == fElements.length) {
2322                 QName[] array = new QName[fElements.length * 2];
2323                 System.arraycopy(fElements, 0, array, 0, fDepth);
2324                 fElements = array;
2325                 for (int i = fDepth; i < fElements.length; i++) {
2326                     fElements[i] = new QName();
2327                 }
2328             }
2329             fElements[fDepth].setValues(element);
2330             return fElements[fDepth++];
2331         } // pushElement(QName):QName
2332 
2333 
2334         /** Note that this function is considerably different than nextElement()
2335          * This function just returns the previously stored elements
2336          */
getNext()2337         public QName getNext(){
2338             //when position reaches number of elements in the list..
2339             //set the position back to mark,  making it a circular linked list.
2340             if(fPosition == fCount){
2341                 fPosition = fMark;
2342             }
2343             //store the position of last opened tag at particular depth
2344             //fInt[++fDepth] = fPosition;
2345             if(DEBUG_SKIP_ALGORITHM){
2346                 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname);
2347             }
2348             //return fElements[fPosition++];
2349             return fElements[fPosition];
2350         }
2351 
2352         /** This function should be called only when element was skipped sucessfully.
2353          * 1. Increase the depth - because element was sucessfully skipped.
2354          *2. Store the position of the element token in array  "last opened tag" at depth.
2355          *3. increase the position counter so as to point to the next element in the array
2356          */
push()2357         public void push(){
2358 
2359             fInt[++fDepth] = fPosition++;
2360         }
2361 
2362         /** Check if the element scanned during the start element
2363          *matches the stored element.
2364          *
2365          *@return true if the match suceeds.
2366          */
matchElement(QName element)2367         public boolean matchElement(QName element) {
2368             //last depth is the depth when last elemnt was pushed
2369             //if last depth is greater than current depth
2370             //if(DEBUG_SKIP_ALGORITHM){
2371             //   System.out.println("Check if the element " + element.rawname + " matches");
2372             //  System.out.println("fLastDepth = " + fLastDepth);
2373             // System.out.println("fDepth = " + fDepth);
2374             //}
2375             boolean match = false;
2376             if(fLastDepth > fDepth && fDepth <= 3){
2377                 if(DEBUG_SKIP_ALGORITHM){
2378                     System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----");
2379                     System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname);
2380                 }
2381                 if(element.rawname == fElements[fDepth - 1].rawname){
2382                     fAdd = false;
2383                     //mark this position
2384                     //decrease the depth by 1 as arrays are 0 based
2385                     fMark = fDepth - 1;
2386                     //we found the match
2387                     fPosition = fMark;
2388                     match = true;
2389                     //Once we get match decrease the count -- this was increased by nextElement()
2390                     --fCount;
2391                     if(DEBUG_SKIP_ALGORITHM){
2392                         System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false");
2393                         System.out.println("fMark = " + fMark);
2394                         System.out.println("fPosition = " + fPosition);
2395                         System.out.println("fDepth = " + fDepth);
2396                         System.out.println("fCount = " + fCount);
2397                         System.out.println("---------MATCH SUCEEDED-----------------");
2398                         System.out.println("");
2399                     }
2400                 }else{
2401                     fAdd = true;
2402                     if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd);
2403                 }
2404             }
2405             //store the position for the current depth
2406             //when we are adding the elements, when skipping
2407             //starts even then this should be tracked ie. when
2408             //calling getNext()
2409             if(match){
2410                 //from next element skipping will start, add 1
2411                 fInt[fDepth] = fPosition++;
2412             } else{
2413                 if(DEBUG_SKIP_ALGORITHM){
2414                     System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1));
2415                 }
2416                 //sicne fInt[fDepth] contains pointer to the element array which are 0 based.
2417                 fInt[fDepth] = fCount - 1;
2418             }
2419 
2420             //if number of elements becomes equal to the length of array -- stop the skipping
2421             //xxx: should we do "fCount == fInt.length"
2422             if (fCount == fElements.length) {
2423                 fSkip = false;
2424                 fAdd = false;
2425                 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure
2426                 reposition();
2427                 if(DEBUG_SKIP_ALGORITHM){
2428                     System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED");
2429                     System.out.println("REPOSITIONING THE STACK");
2430                     System.out.println("-----------SKIPPING STOPPED----------");
2431                     System.out.println("");
2432                 }
2433                 return false;
2434             }
2435             if(DEBUG_SKIP_ALGORITHM){
2436                 if(match){
2437                     System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth);
2438                 }else{
2439                     System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth);
2440                 }
2441             }
2442             //store the last depth
2443             fLastDepth = fDepth;
2444             return match;
2445         } // matchElement(QName):QName
2446 
2447 
2448         /**
2449          * Returns the next element on the stack.
2450          *
2451          * @return Returns the actual QName object. Callee should
2452          * use this object to store the details of next element encountered.
2453          */
nextElement()2454         public QName nextElement() {
2455             if(fSkip){
2456                 fDepth++;
2457                 //boundary checks are done in matchElement()
2458                 return fElements[fCount++];
2459             } else if (fDepth == fElements.length) {
2460                 QName[] array = new QName[fElements.length * 2];
2461                 System.arraycopy(fElements, 0, array, 0, fDepth);
2462                 fElements = array;
2463                 for (int i = fDepth; i < fElements.length; i++) {
2464                     fElements[i] = new QName();
2465                 }
2466             }
2467 
2468             return fElements[fDepth++];
2469 
2470         } // pushElement(QName):QName
2471 
2472 
2473         /**
2474          * Pops an element off of the stack by setting the values of
2475          * the specified QName.
2476          * <p>
2477          * <strong>Note:</strong> The object returned is <em>not</em>
2478          * orphaned to the caller. Therefore, the caller should consider
2479          * the object to be read-only.
2480          */
popElement()2481         public QName popElement() {
2482             //return the same object that was pushed -- this would avoid
2483             //setting the values for every end element.
2484             //STRONG: this object is read only -- this object reference shouldn't be stored.
2485             if(fSkip || fAdd ){
2486                 if(DEBUG_SKIP_ALGORITHM){
2487                     System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname);
2488                     System.out.println("");
2489                 }
2490                 return fElements[fInt[fDepth--]];
2491             } else{
2492                 if(DEBUG_SKIP_ALGORITHM){
2493                     System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname );
2494                 }
2495                 return fElements[--fDepth] ;
2496             }
2497             //element.setValues(fElements[--fDepth]);
2498         } // popElement(QName)
2499 
2500         /** Reposition the stack. fInt [] contains all the opened tags at particular depth.
2501          * Transfer all the opened tags starting from depth '2' to the current depth and reposition them
2502          *as per the depth.
2503          */
reposition()2504         public void reposition(){
2505             for( int i = 2 ; i <= fDepth ; i++){
2506                 fElements[i-1] = fElements[fInt[i]];
2507             }
2508             if(DEBUG_SKIP_ALGORITHM){
2509                 for( int i = 0 ; i < fDepth ; i++){
2510                     System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname);
2511                 }
2512             }
2513         }
2514 
2515         /** Clears the stack without throwing away existing QName objects. */
clear()2516         public void clear() {
2517             fDepth = 0;
2518             fLastDepth = 0;
2519             fCount = 0 ;
2520             fPosition = fMark = 1;
2521 
2522         } // clear()
2523 
2524         /**
2525          * This function is as a result of optimization done for endElement --
2526          * we dont need to set the value for every end element encouterd.
2527          * For Well formedness checks we can have the same QName object that was pushed.
2528          * the values will be set only if application need to know about the endElement
2529          * -- neeraj.bajaj@sun.com
2530          */
2531 
getLastPoppedElement()2532         public QName getLastPoppedElement(){
2533             return fElements[fDepth];
2534         }
2535     } // class ElementStack
2536 
2537     /**
2538      * Drives the parser to the next state/event on the input. Parser is guaranteed
2539      * to stop at the next state/event.
2540      *
2541      * Internally XML document is divided into several states. Each state represents
2542      * a sections of XML document. When this functions returns normally, it has read
2543      * the section of XML document and returns the state corresponding to section of
2544      * document which has been read. For optimizations, a particular driver
2545      * can read ahead of the section of document (state returned) just read and
2546      * can maintain a different internal state.
2547      *
2548      *
2549      * @author Neeraj Bajaj, Sun Microsystems
2550      */
2551     protected interface Driver {
2552 
2553 
2554         /**
2555          * Drives the parser to the next state/event on the input. Parser is guaranteed
2556          * to stop at the next state/event.
2557          *
2558          * Internally XML document is divided into several states. Each state represents
2559          * a sections of XML document. When this functions returns normally, it has read
2560          * the section of XML document and returns the state corresponding to section of
2561          * document which has been read. For optimizations, a particular driver
2562          * can read ahead of the section of document (state returned) just read and
2563          * can maintain a different internal state.
2564          *
2565          * @return state representing the section of document just read.
2566          *
2567          * @throws IOException  Thrown on i/o error.
2568          * @throws XNIException Thrown on parse error.
2569          */
2570 
next()2571         public int next() throws IOException, XNIException;
2572 
2573     } // interface Driver
2574 
2575     /**
2576      * Driver to handle content scanning. This driver is capable of reading
2577      * the fragment of XML document. When it has finished reading fragment
2578      * of XML documents, it can pass the job of reading to another driver.
2579      *
2580      * This class has been modified as per the new design which is more suited to
2581      * efficiently build pull parser. Lot of performance improvements have been done and
2582      * the code has been added to support stax functionality/features.
2583      *
2584      * @author Neeraj Bajaj, Sun Microsystems
2585      *
2586      *
2587      * @author Andy Clark, IBM
2588      * @author Eric Ye, IBM
2589      */
2590     protected class FragmentContentDriver
2591             implements Driver {
2592 
2593         //
2594         // Driver methods
2595         //
2596 
2597         /**
2598          *  decides the appropriate state of the parser
2599          */
startOfMarkup()2600         private void startOfMarkup() throws IOException {
2601             fMarkupDepth++;
2602             final int ch = fEntityScanner.peekChar();
2603 
2604             if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) {
2605                 setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
2606             } else {
2607                 switch(ch){
2608                     case '?' :{
2609                         setScannerState(SCANNER_STATE_PI);
2610                         fEntityScanner.skipChar(ch, null);
2611                         break;
2612                     }
2613                     case '!' :{
2614                         fEntityScanner.skipChar(ch, null);
2615                         if (fEntityScanner.skipChar('-', null)) {
2616                             if (!fEntityScanner.skipChar('-', NameType.COMMENT)) {
2617                                 reportFatalError("InvalidCommentStart",
2618                                         null);
2619                             }
2620                             setScannerState(SCANNER_STATE_COMMENT);
2621                         } else if (fEntityScanner.skipString(cdata)) {
2622                             setScannerState(SCANNER_STATE_CDATA );
2623                         } else if (!scanForDoctypeHook()) {
2624                             reportFatalError("MarkupNotRecognizedInContent",
2625                                     null);
2626                         }
2627                         break;
2628                     }
2629                     case '/' :{
2630                         setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
2631                         fEntityScanner.skipChar(ch, NameType.ELEMENTEND);
2632                         break;
2633                     }
2634                     default :{
2635                         reportFatalError("MarkupNotRecognizedInContent", null);
2636                     }
2637                 }
2638             }
2639 
2640         }//startOfMarkup
2641 
startOfContent()2642         private void startOfContent() throws IOException {
2643             if (fEntityScanner.skipChar('<', null)) {
2644                 setScannerState(SCANNER_STATE_START_OF_MARKUP);
2645             } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) {
2646                 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE
2647             } else {
2648                 //element content is there..
2649                 setScannerState(SCANNER_STATE_CHARACTER_DATA);
2650             }
2651         }//startOfContent
2652 
2653 
2654         /**
2655          *
2656          * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser.
2657          * At any point of time when in doubt over the current state of the parser, the state should be
2658          * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of
2659          * the parser to one of its sub state.
2660          * sub states are defined in the parser on the basis of different XML component like
2661          * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc..
2662          * These sub states help the parser to have fine control over the parsing. These are the
2663          * different milepost, parser stops at each sub state (milepost). Based on this state it is
2664          * decided if paresr needs to stop at next milepost ??
2665          *
2666          */
decideSubState()2667         public void decideSubState() throws IOException {
2668             while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){
2669 
2670                 switch (fScannerState) {
2671 
2672                     case SCANNER_STATE_CONTENT: {
2673                         startOfContent() ;
2674                         break;
2675                     }
2676 
2677                     case SCANNER_STATE_START_OF_MARKUP: {
2678                         startOfMarkup() ;
2679                         break;
2680                     }
2681                 }
2682             }
2683         }//decideSubState
2684 
2685         /**
2686          * Drives the parser to the next state/event on the input. Parser is guaranteed
2687          * to stop at the next state/event. Internally XML document
2688          * is divided into several states. Each state represents a sections of XML
2689          * document. When this functions returns normally, it has read the section
2690          * of XML document and returns the state corresponding to section of
2691          * document which has been read. For optimizations, a particular driver
2692          * can read ahead of the section of document (state returned) just read and
2693          * can maintain a different internal state.
2694          *
2695          * State returned corresponds to Stax states.
2696          *
2697          * @return state representing the section of document just read.
2698          *
2699          * @throws IOException  Thrown on i/o error.
2700          * @throws XNIException Thrown on parse error.
2701          */
2702 
next()2703         public int next() throws IOException, XNIException {
2704             while (true) {
2705             try {
2706                 if(DEBUG_NEXT){
2707                     System.out.println("NOW IN FragmentContentDriver");
2708                     System.out.println("Entering the FragmentContentDriver with = " + getScannerStateName(fScannerState));
2709                 }
2710 
2711                 //decide the actual sub state of the scanner.For more information refer to the javadoc of
2712                 //decideSubState.
2713 
2714                 switch (fScannerState) {
2715                     case SCANNER_STATE_CONTENT: {
2716                         final int ch = fEntityScanner.peekChar();
2717                         if (ch == '<') {
2718                             fEntityScanner.scanChar(null);
2719                             setScannerState(SCANNER_STATE_START_OF_MARKUP);
2720                         } else if (ch == '&') {
2721                             fEntityScanner.scanChar(NameType.REFERENCE);
2722                             setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE
2723                             break;
2724                         } else {
2725                             //element content is there..
2726                             setScannerState(SCANNER_STATE_CHARACTER_DATA);
2727                             break;
2728                         }
2729                     }
2730 
2731                     case SCANNER_STATE_START_OF_MARKUP: {
2732                         startOfMarkup();
2733                         break;
2734                     }//case: SCANNER_STATE_START_OF_MARKUP
2735 
2736                 }//end of switch
2737                 //decideSubState() ;
2738 
2739                 //do some special handling if isCoalesce is set to true.
2740                 if(fIsCoalesce){
2741                     fUsebuffer = true ;
2742                     //if the last section was character data
2743                     if(fLastSectionWasCharacterData){
2744 
2745                         //if we dont encounter any CDATA or ENITY REFERENCE and current state is also not SCANNER_STATE_CHARACTER_DATA
2746                         //return the last scanned charactrer data.
2747                         if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE)
2748                         && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){
2749                             fLastSectionWasCharacterData = false;
2750                             return XMLEvent.CHARACTERS;
2751                         }
2752                     }//if last section was CDATA or ENTITY REFERENCE
2753                     //xxx: there might be another entity reference or CDATA after this
2754                     //<foo>blah blah &amp;&lt;<![CDATA[[aa]]>blah blah</foo>
2755                     else if((fLastSectionWasCData || fLastSectionWasEntityReference)){
2756                         //and current state is not SCANNER_STATE_CHARACTER_DATA
2757                         //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE
2758                         //this means there is nothing more to be coalesced.
2759                         //return the CHARACTERS event.
2760                         if((fScannerState != SCANNER_STATE_CDATA) && (fScannerState != SCANNER_STATE_REFERENCE)
2761                         && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){
2762 
2763                             fLastSectionWasCData = false;
2764                             fLastSectionWasEntityReference = false;
2765                             return XMLEvent.CHARACTERS;
2766                         }
2767                     }
2768                 }
2769 
2770 
2771                 if(DEBUG_NEXT){
2772                     System.out.println("Actual scanner state set by decideSubState is = " + getScannerStateName(fScannerState));
2773                 }
2774 
2775                 switch(fScannerState){
2776 
2777                     case XMLEvent.START_DOCUMENT :
2778                         return XMLEvent.START_DOCUMENT;
2779 
2780                     case SCANNER_STATE_START_ELEMENT_TAG :{
2781 
2782                         //xxx this function returns true when element is empty.. can be linked to end element event.
2783                         //returns true if the element is empty
2784                         fEmptyElement = scanStartElement() ;
2785                         //if the element is empty the next event is "end element"
2786                         if(fEmptyElement){
2787                             setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
2788                         }else{
2789                             //set the next possible state
2790                             setScannerState(SCANNER_STATE_CONTENT);
2791                         }
2792                         return XMLEvent.START_ELEMENT ;
2793                     }
2794 
2795                     case SCANNER_STATE_CHARACTER_DATA: {
2796                         if(DEBUG_COALESCE){
2797                             System.out.println("fLastSectionWasCData = " + fLastSectionWasCData);
2798                             System.out.println("fIsCoalesce = " + fIsCoalesce);
2799                         }
2800                         //if last section was either entity reference or cdata or character data we should be using buffer
2801                         fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData ;
2802 
2803                         //When coalesce is set to true and last state was REFERENCE or CDATA or CHARACTER_DATA, buffer should not be cleared.
2804                         if( fIsCoalesce && (fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData) ){
2805                             fLastSectionWasEntityReference = false;
2806                             fLastSectionWasCData = false;
2807                             fLastSectionWasCharacterData = true ;
2808                             fUsebuffer = true;
2809                         }else{
2810                             //clear the buffer
2811                             fContentBuffer.clear();
2812                         }
2813 
2814                         //set the fTempString length to 0 before passing it on to scanContent
2815                         //scanContent sets the correct co-ordinates as per the content read
2816                         fTempString.length = 0;
2817                         int c = fEntityScanner.scanContent(fTempString);
2818                         if(DEBUG){
2819                             System.out.println("fTempString = " + fTempString);
2820                         }
2821                         if(fEntityScanner.skipChar('<', null)){
2822                             //check if we have reached end of element
2823                             if(fEntityScanner.skipChar('/', NameType.ELEMENTEND)){
2824                                 //increase the mark up depth
2825                                 fMarkupDepth++;
2826                                 fLastSectionWasCharacterData = false;
2827                                 setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
2828                                 //check if its start of new element
2829                             }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){
2830                                 fMarkupDepth++;
2831                                 fLastSectionWasCharacterData = false;
2832                                 setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
2833                             }else{
2834                                 setScannerState(SCANNER_STATE_START_OF_MARKUP);
2835                                 //there can be cdata ahead if coalesce is true we should call again
2836                                 if(fIsCoalesce){
2837                                     fUsebuffer = true;
2838                                     fLastSectionWasCharacterData = true;
2839                                     fContentBuffer.append(fTempString);
2840                                     fTempString.length = 0;
2841                                     continue;
2842                                 }
2843                             }
2844                             //in case last section was either entity reference or cdata or character data -- we should be using buffer
2845                             if(fUsebuffer){
2846                                 fContentBuffer.append(fTempString);
2847                                 fTempString.length = 0;
2848                             }
2849                             if(DEBUG){
2850                                 System.out.println("NOT USING THE BUFFER, STRING = " + fTempString.toString());
2851                             }
2852                             if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){
2853                                 if(DEBUG)System.out.println("Return SPACE EVENT");
2854                                 return XMLEvent.SPACE;
2855                             }else
2856                                 return XMLEvent.CHARACTERS;
2857 
2858                         } else{
2859                             fUsebuffer = true ;
2860                             if(DEBUG){
2861                                 System.out.println("fContentBuffer = " + fContentBuffer);
2862                                 System.out.println("fTempString = " + fTempString);
2863                             }
2864                             fContentBuffer.append(fTempString);
2865                             fTempString.length = 0;
2866                         }
2867                         if (c == '\r') {
2868                             if(DEBUG){
2869                                 System.out.println("'\r' character found");
2870                             }
2871                             // happens when there is the character reference &#13;
2872                             //xxx: We know the next chracter.. we should just skip it and add ']' directlry
2873                             fEntityScanner.scanChar(null);
2874                             fUsebuffer = true;
2875                             fContentBuffer.append((char)c);
2876                             c = -1 ;
2877                         } else if (c == ']') {
2878                             //fStringBuffer.clear();
2879                             //xxx: We know the next chracter.. we should just skip it and add ']' directlry
2880                             fUsebuffer = true;
2881                             fContentBuffer.append((char)fEntityScanner.scanChar(null));
2882                             // remember where we are in case we get an endEntity before we
2883                             // could flush the buffer out - this happens when we're parsing an
2884                             // entity which ends with a ]
2885                             fInScanContent = true;
2886 
2887                             // We work on a single character basis to handle cases such as:
2888                             // ']]]>' which we might otherwise miss.
2889                             //
2890                             if (fEntityScanner.skipChar(']', null)) {
2891                                 fContentBuffer.append(']');
2892                                 while (fEntityScanner.skipChar(']', null)) {
2893                                     fContentBuffer.append(']');
2894                                 }
2895                                 if (fEntityScanner.skipChar('>', null)) {
2896                                     reportFatalError("CDEndInContent", null);
2897                                 }
2898                             }
2899                             c = -1 ;
2900                             fInScanContent = false;
2901                         }
2902 
2903                         do{
2904                             //xxx: we should be using only one buffer..
2905                             // we need not to grow the buffer only when isCoalesce() is not true;
2906 
2907                             if (c == '<') {
2908                                 fEntityScanner.scanChar(null);
2909                                 setScannerState(SCANNER_STATE_START_OF_MARKUP);
2910                                 break;
2911                             }//xxx what should be the behavior if entity reference is present in the content ?
2912                             else if (c == '&') {
2913                                 fEntityScanner.scanChar(NameType.REFERENCE);
2914                                 setScannerState(SCANNER_STATE_REFERENCE);
2915                                 break;
2916                             }///xxx since this part is also characters, it should be merged...
2917                             else if (c != -1 && isInvalidLiteral(c)) {
2918                                 if (XMLChar.isHighSurrogate(c)) {
2919                                     // special case: surrogates
2920                                     scanSurrogates(fContentBuffer) ;
2921                                     setScannerState(SCANNER_STATE_CONTENT);
2922                                 } else {
2923                                     reportFatalError("InvalidCharInContent",
2924                                             new Object[] {
2925                                         Integer.toString(c, 16)});
2926                                         fEntityScanner.scanChar(null);
2927                                 }
2928                                 break;
2929                             }
2930                             //xxx: scanContent also gives character callback.
2931                             c = scanContent(fContentBuffer) ;
2932                             //we should not be iterating again if fIsCoalesce is not set to true
2933 
2934                             if(!fIsCoalesce){
2935                                 setScannerState(SCANNER_STATE_CONTENT);
2936                                 break;
2937                             }
2938 
2939                         }while(true);
2940 
2941                         //if (fDocumentHandler != null) {
2942                         //  fDocumentHandler.characters(fContentBuffer, null);
2943                         //}
2944                         if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END");
2945                         //if fIsCoalesce is true there might be more data so call fDriver.next()
2946                         if(fIsCoalesce){
2947                             fLastSectionWasCharacterData = true ;
2948                             continue;
2949                         }else{
2950                             if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){
2951                                 if(DEBUG)System.out.println("Return SPACE EVENT");
2952                                 return XMLEvent.SPACE;
2953                             } else
2954                                 return XMLEvent.CHARACTERS ;
2955                         }
2956                     }
2957 
2958                     case SCANNER_STATE_END_ELEMENT_TAG :{
2959                         if(fEmptyElement){
2960                             //set it back to false.
2961                             fEmptyElement = false;
2962                             setScannerState(SCANNER_STATE_CONTENT);
2963                             //check the case when there is comment after single element document
2964                             //<foo/> and some comment after this
2965                             return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ;
2966 
2967                         } else if(scanEndElement() == 0) {
2968                             //It is last element of the document
2969                             if (elementDepthIsZeroHook()) {
2970                                 //if element depth is zero , it indicates the end of the document
2971                                 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function
2972                                 //xxx understand this point once again..
2973                                 return XMLEvent.END_ELEMENT ;
2974                             }
2975 
2976                         }
2977                         setScannerState(SCANNER_STATE_CONTENT);
2978                         return XMLEvent.END_ELEMENT ;
2979                     }
2980 
2981                     case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT:
2982                         scanComment();
2983                         setScannerState(SCANNER_STATE_CONTENT);
2984                         return XMLEvent.COMMENT;
2985                         //break;
2986                     }
2987                     case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: {
2988                         //clear the buffer first
2989                         fContentBuffer.clear() ;
2990                         //xxx: which buffer should be passed. Ideally we shouldn't have
2991                         //more than two buffers --
2992                         //xxx: where should we add the switch for buffering.
2993                         scanPI(fContentBuffer);
2994                         setScannerState(SCANNER_STATE_CONTENT);
2995                         return XMLEvent.PROCESSING_INSTRUCTION;
2996                         //break;
2997                     }
2998                     case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: {
2999                         //xxx: What if CDATA is the first event
3000                         //<foo><![CDATA[hello<><>]]>append</foo>
3001 
3002                         //we should not clear the buffer only when the last state was either SCANNER_STATE_REFERENCE or
3003                         //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE
3004                         if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){
3005                             fLastSectionWasCData = true ;
3006                             fLastSectionWasEntityReference = false;
3007                             fLastSectionWasCharacterData = false;
3008                         }//if we dont need to coalesce clear the buffer
3009                         else{
3010                             fContentBuffer.clear();
3011                         }
3012                         fUsebuffer = true;
3013                         //CDATA section is completely read in all the case.
3014                         scanCDATASection(fContentBuffer , true);
3015                         setScannerState(SCANNER_STATE_CONTENT);
3016                         //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true
3017                         //and just call fDispatche.next(). Since we have set the scanner state to
3018                         //SCANNER_STATE_CONTENT (super state) parser will automatically recover and
3019                         //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event
3020                         //2. Check if application has set for reporting CDATA event
3021                         //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent
3022                         //return the cdata event as characters.
3023                         if(fIsCoalesce){
3024                             fLastSectionWasCData = true ;
3025                             //there might be more data to coalesce.
3026                             continue;
3027                         }else if(fReportCdataEvent){
3028                             return XMLEvent.CDATA;
3029                         } else{
3030                             return XMLEvent.CHARACTERS;
3031                         }
3032                     }
3033 
3034                     case SCANNER_STATE_REFERENCE :{
3035                         fMarkupDepth++;
3036                         foundBuiltInRefs = false;
3037 
3038                         //we should not clear the buffer only when the last state was either CDATA or
3039                         //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE
3040                         if(fIsCoalesce && ( fLastSectionWasEntityReference || fLastSectionWasCData || fLastSectionWasCharacterData)){
3041                             //fLastSectionWasEntityReference or fLastSectionWasCData are only
3042                             //used when fIsCoalesce is set to true.
3043                             fLastSectionWasEntityReference = true ;
3044                             fLastSectionWasCData = false;
3045                             fLastSectionWasCharacterData = false;
3046                         }//if we dont need to coalesce clear the buffer
3047                         else{
3048                             fContentBuffer.clear();
3049                         }
3050                         fUsebuffer = true ;
3051                         //take care of character reference
3052                         if (fEntityScanner.skipChar('#', NameType.REFERENCE)) {
3053                             scanCharReferenceValue(fContentBuffer, null);
3054                             fMarkupDepth--;
3055                             if(!fIsCoalesce){
3056                                 setScannerState(SCANNER_STATE_CONTENT);
3057                                 return XMLEvent.CHARACTERS;
3058                             }
3059                         } else {
3060                             // this function also starts new entity
3061                             scanEntityReference(fContentBuffer);
3062                             //if there was built-in entity reference & coalesce is not true
3063                             //return CHARACTERS
3064                             if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){
3065                                 setScannerState(SCANNER_STATE_CONTENT);
3066                                 return XMLEvent.CHARACTERS;
3067                             }
3068 
3069                             //if there was a text declaration, call next() it will be taken care.
3070                             if(fScannerState == SCANNER_STATE_TEXT_DECL){
3071                                 fLastSectionWasEntityReference = true ;
3072                                 continue;
3073                             }
3074 
3075                             if(fScannerState == SCANNER_STATE_REFERENCE){
3076                                 setScannerState(SCANNER_STATE_CONTENT);
3077                                 if (fReplaceEntityReferences && fEntityStore.isDeclaredEntity(fCurrentEntityName)) {
3078                                     // Skip the entity reference, we don't care
3079                                     continue;
3080                                 }
3081                                 return XMLEvent.ENTITY_REFERENCE;
3082                             }
3083                         }
3084                         //Wether it was character reference, entity reference or built-in entity
3085                         //set the next possible state to SCANNER_STATE_CONTENT
3086                         setScannerState(SCANNER_STATE_CONTENT);
3087                         fLastSectionWasEntityReference = true ;
3088                         continue;
3089                     }
3090 
3091                     case SCANNER_STATE_TEXT_DECL: {
3092                         // scan text decl
3093                         if (fEntityScanner.skipString("<?xml")) {
3094                             fMarkupDepth++;
3095                             // NOTE: special case where entity starts with a PI
3096                             //       whose name starts with "xml" (e.g. "xmlfoo")
3097                             if (isValidNameChar(fEntityScanner.peekChar())) {
3098                                 fStringBuffer.clear();
3099                                 fStringBuffer.append("xml");
3100 
3101                                 if (fNamespaces) {
3102                                     while (isValidNCName(fEntityScanner.peekChar())) {
3103                                         fStringBuffer.append((char)fEntityScanner.scanChar(null));
3104                                     }
3105                                 } else {
3106                                     while (isValidNameChar(fEntityScanner.peekChar())) {
3107                                         fStringBuffer.append((char)fEntityScanner.scanChar(null));
3108                                     }
3109                                 }
3110                                 String target = fSymbolTable.addSymbol(fStringBuffer.ch, fStringBuffer.offset, fStringBuffer.length);
3111                                 fContentBuffer.clear();
3112                                 scanPIData(target, fContentBuffer);
3113                             }
3114 
3115                             // standard text declaration
3116                             else {
3117                                 //xxx: this function gives callback
3118                                 scanXMLDeclOrTextDecl(true);
3119                             }
3120                         }
3121                         // now that we've straightened out the readers, we can read in chunks:
3122                         fEntityManager.fCurrentEntity.mayReadChunks = true;
3123                         setScannerState(SCANNER_STATE_CONTENT);
3124                         //xxx: we don't return any state, so how do we get to know about TEXT declarations.
3125                         //it seems we have to careful when to allow function issue a callback
3126                         //and when to allow adapter issue a callback.
3127                         continue;
3128                     }
3129 
3130 
3131                     case SCANNER_STATE_ROOT_ELEMENT: {
3132                         if (scanRootElementHook()) {
3133                             fEmptyElement = true;
3134                             //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook
3135                             return XMLEvent.START_ELEMENT;
3136                         }
3137                         setScannerState(SCANNER_STATE_CONTENT);
3138                         return XMLEvent.START_ELEMENT ;
3139                     }
3140                     case SCANNER_STATE_CHAR_REFERENCE : {
3141                         fContentBuffer.clear();
3142                         scanCharReferenceValue(fContentBuffer, null);
3143                         fMarkupDepth--;
3144                         setScannerState(SCANNER_STATE_CONTENT);
3145                         return XMLEvent.CHARACTERS;
3146                     }
3147                     default:
3148                         throw new XNIException("Scanner State " + fScannerState + " not Recognized ");
3149 
3150                 }//switch
3151             }
3152             // premature end of file
3153             catch (EOFException e) {
3154                 endOfFileHook(e);
3155                 return -1;
3156             }
3157             } //while loop
3158         }//next
3159 
3160         //
3161         // Protected methods
3162         //
3163 
3164         // hooks
3165 
3166         // NOTE: These hook methods are added so that the full document
3167         //       scanner can share the majority of code with this class.
3168 
3169         /**
3170          * Scan for DOCTYPE hook. This method is a hook for subclasses
3171          * to add code to handle scanning for a the "DOCTYPE" string
3172          * after the string "<!" has been scanned.
3173          *
3174          * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
3175          *          was not scanned.
3176          */
scanForDoctypeHook()3177         protected boolean scanForDoctypeHook()
3178         throws IOException, XNIException {
3179             return false;
3180         } // scanForDoctypeHook():boolean
3181 
3182         /**
3183          * Element depth iz zero. This methos is a hook for subclasses
3184          * to add code to handle when the element depth hits zero. When
3185          * scanning a document fragment, an element depth of zero is
3186          * normal. However, when scanning a full XML document, the
3187          * scanner must handle the trailing miscellanous section of
3188          * the document after the end of the document's root element.
3189          *
3190          * @return True if the caller should stop and return true which
3191          *          allows the scanner to switch to a new scanning
3192          *          driver. A return value of false indicates that
3193          *          the content driver should continue as normal.
3194          */
elementDepthIsZeroHook()3195         protected boolean elementDepthIsZeroHook()
3196         throws IOException, XNIException {
3197             return false;
3198         } // elementDepthIsZeroHook():boolean
3199 
3200         /**
3201          * Scan for root element hook. This method is a hook for
3202          * subclasses to add code that handles scanning for the root
3203          * element. When scanning a document fragment, there is no
3204          * "root" element. However, when scanning a full XML document,
3205          * the scanner must handle the root element specially.
3206          *
3207          * @return True if the caller should stop and return true which
3208          *          allows the scanner to switch to a new scanning
3209          *          driver. A return value of false indicates that
3210          *          the content driver should continue as normal.
3211          */
scanRootElementHook()3212         protected boolean scanRootElementHook()
3213         throws IOException, XNIException {
3214             return false;
3215         } // scanRootElementHook():boolean
3216 
3217         /**
3218          * End of file hook. This method is a hook for subclasses to
3219          * add code that handles the end of file. The end of file in
3220          * a document fragment is OK if the markup depth is zero.
3221          * However, when scanning a full XML document, an end of file
3222          * is always premature.
3223          */
endOfFileHook(EOFException e)3224         protected void endOfFileHook(EOFException e)
3225         throws IOException, XNIException {
3226 
3227             // NOTE: An end of file is only only an error if we were
3228             //       in the middle of scanning some markup. -Ac
3229             if (fMarkupDepth != 0) {
3230                 reportFatalError("PrematureEOF", null);
3231             }
3232 
3233         } // endOfFileHook()
3234 
3235     } // class FragmentContentDriver
3236 
pr(String str)3237     static void pr(String str) {
3238         System.out.println(str) ;
3239     }
3240 
3241     protected boolean fUsebuffer ;
3242 
3243     /** this function gets an XMLString (which is used to store the attribute value) from the special pool
3244      *  maintained for attributes.
3245      *  fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool.
3246      *  if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same
3247      *  XMLString.
3248      *
3249      * @return XMLString XMLString used to store an attribute value.
3250      */
3251 
getString()3252     protected XMLString getString(){
3253         if(fAttributeCacheUsedCount < initialCacheCount || fAttributeCacheUsedCount < attributeValueCache.size()){
3254             return attributeValueCache.get(fAttributeCacheUsedCount++);
3255         } else{
3256             XMLString str = new XMLString();
3257             fAttributeCacheUsedCount++;
3258             attributeValueCache.add(str);
3259             return str;
3260         }
3261     }
3262 
3263     /**
3264      * Implements XMLBufferListener interface.
3265      */
3266 
refresh()3267     public void refresh(){
3268         refresh(0);
3269     }
3270 
3271     /**
3272      * receives callbacks from {@link XMLEntityReader } when buffer
3273      * is being changed.
3274      * @param refreshPosition
3275      */
refresh(int refreshPosition)3276     public void refresh(int refreshPosition){
3277         //If you are reading attributes and you got a callback
3278         //cache available attributes.
3279         if(fReadingAttributes){
3280             fAttributes.refresh();
3281         }
3282         if(fScannerState == SCANNER_STATE_CHARACTER_DATA){
3283             //since fTempString directly matches to the underlying main buffer
3284             //store the data into buffer
3285             fContentBuffer.append(fTempString);
3286             //clear the XMLString so that data can't be added again.
3287             fTempString.length = 0;
3288             fUsebuffer = true;
3289         }
3290     }
3291 
3292 } // class XMLDocumentFragmentScannerImpl
3293