1 /*
2  * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*
6  * Licensed to the Apache Software Foundation (ASF) under one or more
7  * contributor license agreements.  See the NOTICE file distributed with
8  * this work for additional information regarding copyright ownership.
9  * The ASF licenses this file to You under the Apache License, Version 2.0
10  * (the "License"); you may not use this file except in compliance with
11  * the License.  You may obtain a copy of the License at
12  *
13  *     http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  */
21 
22 package com.sun.org.apache.xerces.internal.impl;
23 
24 import com.sun.org.apache.xerces.internal.impl.dtd.XMLDTDDescription;
25 import com.sun.org.apache.xerces.internal.impl.io.MalformedByteSequenceException;
26 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
27 import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager;
28 import com.sun.org.apache.xerces.internal.util.NamespaceSupport;
29 import com.sun.org.apache.xerces.internal.util.XMLChar;
30 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl;
31 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
32 import com.sun.org.apache.xerces.internal.xni.Augmentations;
33 import com.sun.org.apache.xerces.internal.xni.NamespaceContext;
34 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
35 import com.sun.org.apache.xerces.internal.xni.XNIException;
36 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
37 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
38 import com.sun.org.apache.xerces.internal.xni.parser.XMLDTDScanner;
39 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource;
40 import com.sun.xml.internal.stream.Entity;
41 import com.sun.xml.internal.stream.StaxXMLInputSource;
42 import com.sun.xml.internal.stream.dtd.DTDGrammarUtil;
43 import java.io.CharConversionException;
44 import java.io.EOFException;
45 import java.io.IOException;
46 import javax.xml.stream.XMLInputFactory;
47 import javax.xml.stream.events.XMLEvent;
48 import jdk.xml.internal.SecuritySupport;
49 
50 
51 /**
52  * This class is responsible for scanning XML document structure
53  * and content.
54  *
55  * This class has been modified as per the new design which is more suited to
56  * efficiently build pull parser. Lot of improvements have been done and
57  * the code has been added to support stax functionality/features.
58  *
59  * @author Neeraj Bajaj, Sun Microsystems
60  * @author K.Venugopal, Sun Microsystems
61  * @author Glenn Marcy, IBM
62  * @author Andy Clark, IBM
63  * @author Arnaud  Le Hors, IBM
64  * @author Eric Ye, IBM
65  * @author Sunitha Reddy, Sun Microsystems
66  *
67  * Refer to the table in unit-test javax.xml.stream.XMLStreamReaderTest.SupportDTD for changes
68  * related to property SupportDTD.
69  * @author Joe Wang, Sun Microsystems
70  * @LastModified: Sep 2017
71  */
72 public class XMLDocumentScannerImpl
73         extends XMLDocumentFragmentScannerImpl{
74 
75     //
76     // Constants
77     //
78 
79     // scanner states
80 
81     /** Scanner state: XML declaration. */
82     protected static final int SCANNER_STATE_XML_DECL = 42;
83 
84     /** Scanner state: prolog. */
85     protected static final int SCANNER_STATE_PROLOG = 43;
86 
87     /** Scanner state: trailing misc. */
88     protected static final int SCANNER_STATE_TRAILING_MISC = 44;
89 
90     /** Scanner state: DTD internal declarations. */
91     protected static final int SCANNER_STATE_DTD_INTERNAL_DECLS = 45;
92 
93     /** Scanner state: open DTD external subset. */
94     protected static final int SCANNER_STATE_DTD_EXTERNAL = 46;
95 
96     /** Scanner state: DTD external declarations. */
97     protected static final int SCANNER_STATE_DTD_EXTERNAL_DECLS = 47;
98 
99     /** Scanner state: NO MORE ELEMENTS. */
100     protected static final int SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION = 48;
101 
102     // feature identifiers
103 
104     /** Property identifier document scanner: */
105     protected static final String DOCUMENT_SCANNER =
106             Constants.XERCES_PROPERTY_PREFIX + Constants.DOCUMENT_SCANNER_PROPERTY;
107 
108     /** Feature identifier: load external DTD. */
109     protected static final String LOAD_EXTERNAL_DTD =
110             Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE;
111 
112     /** Feature identifier: load external DTD. */
113     protected static final String DISALLOW_DOCTYPE_DECL_FEATURE =
114             Constants.XERCES_FEATURE_PREFIX + Constants.DISALLOW_DOCTYPE_DECL_FEATURE;
115 
116     // property identifiers
117 
118     /** Property identifier: DTD scanner. */
119     protected static final String DTD_SCANNER =
120             Constants.XERCES_PROPERTY_PREFIX + Constants.DTD_SCANNER_PROPERTY;
121 
122     // property identifier:  ValidationManager
123     protected static final String VALIDATION_MANAGER =
124             Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY;
125 
126     /** property identifier:  NamespaceContext */
127     protected static final String NAMESPACE_CONTEXT =
128         Constants.XERCES_PROPERTY_PREFIX + Constants.NAMESPACE_CONTEXT_PROPERTY;
129 
130     // recognized features and properties
131 
132     /** Recognized features. */
133     private static final String[] RECOGNIZED_FEATURES = {
134         LOAD_EXTERNAL_DTD,
135                 DISALLOW_DOCTYPE_DECL_FEATURE,
136     };
137 
138     /** Feature defaults. */
139     private static final Boolean[] FEATURE_DEFAULTS = {
140         Boolean.TRUE,
141                 Boolean.FALSE,
142     };
143 
144     /** Recognized properties. */
145     private static final String[] RECOGNIZED_PROPERTIES = {
146         DTD_SCANNER,
147                 VALIDATION_MANAGER
148     };
149 
150     /** Property defaults. */
151     private static final Object[] PROPERTY_DEFAULTS = {
152             null,
153                 null
154     };
155 
156     //
157     // Data((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)).booleanValue();
158     //
159 
160     // properties
161 
162     /** DTD scanner. */
163     protected XMLDTDScanner fDTDScanner = null;
164 
165     /** Validation manager . */
166     //xxx: fValidationManager code needs to be added yet!
167     protected ValidationManager fValidationManager;
168 
169     protected XMLStringBuffer fDTDDecl = null;
170     protected boolean fReadingDTD = false;
171     protected boolean fAddedListener = false;
172 
173     // protected data
174 
175     // other info
176 
177     /** Doctype name. */
178     protected String fDoctypeName;
179 
180     /** Doctype declaration public identifier. */
181     protected String fDoctypePublicId;
182 
183     /** Doctype declaration system identifier. */
184     protected String fDoctypeSystemId;
185 
186     /** Namespace support. */
187     protected NamespaceContext fNamespaceContext = new NamespaceSupport();
188 
189     // features
190 
191     /** Load external DTD. */
192     protected boolean fLoadExternalDTD = true;
193 
194     // state
195 
196     /** Seen doctype declaration. */
197     protected boolean fSeenDoctypeDecl;
198 
199     protected boolean fScanEndElement;
200 
201     //protected int fScannerLastState ;
202 
203     // drivers
204 
205     /** XML declaration driver. */
206     protected Driver fXMLDeclDriver = new XMLDeclDriver();
207 
208     /** Prolog driver. */
209     protected Driver fPrologDriver = new PrologDriver();
210 
211     /** DTD driver. */
212     protected Driver fDTDDriver = null ;
213 
214     /** Trailing miscellaneous section driver. */
215     protected Driver fTrailingMiscDriver = new TrailingMiscDriver();
216     protected int fStartPos = 0;
217     protected int fEndPos = 0;
218     protected boolean fSeenInternalSubset= false;
219     // temporary variables
220 
221     /** Array of 3 strings. */
222     private String[] fStrings = new String[3];
223 
224     /** External subset source. */
225     private XMLInputSource fExternalSubsetSource = null;
226 
227     /** A DTD Description. */
228     private final XMLDTDDescription fDTDDescription = new XMLDTDDescription(null, null, null, null, null);
229 
230     private static final char [] DOCTYPE = {'D','O','C','T','Y','P','E'};
231     private static final char [] COMMENTSTRING = {'-','-'};
232 
233     //
234     // Constructors
235     //
236 
237     /** Default constructor. */
XMLDocumentScannerImpl()238     public XMLDocumentScannerImpl() {} // <init>()
239 
240 
241     //
242     // XMLDocumentScanner methods
243     //
244 
245 
246     /**
247      * Sets the input source.
248      *
249      * @param inputSource The input source.
250      *
251      * @throws IOException Thrown on i/o error.
252      */
setInputSource(XMLInputSource inputSource)253     public void setInputSource(XMLInputSource inputSource) throws IOException {
254         fEntityManager.setEntityHandler(this);
255         //this starts a new entity and sets the current entity to the document entity.
256         fEntityManager.startDocumentEntity(inputSource);
257         // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
258         setScannerState(XMLEvent.START_DOCUMENT);
259     } // setInputSource(XMLInputSource)
260 
261 
262 
263     /**return the state of the scanner */
getScannetState()264     public int getScannetState(){
265         return fScannerState ;
266     }
267 
268 
269 
270 
reset(PropertyManager propertyManager)271     public void reset(PropertyManager propertyManager) {
272         super.reset(propertyManager);
273         // other settings
274         fDoctypeName = null;
275         fDoctypePublicId = null;
276         fDoctypeSystemId = null;
277         fSeenDoctypeDecl = false;
278         fNamespaceContext.reset();
279         fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue();
280 
281         // xerces features
282         fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue();
283         setScannerState(XMLEvent.START_DOCUMENT);
284         setDriver(fXMLDeclDriver);
285         fSeenInternalSubset = false;
286         if(fDTDScanner != null){
287             ((XMLDTDScannerImpl)fDTDScanner).reset(propertyManager);
288         }
289         fEndPos = 0;
290         fStartPos = 0;
291         if(fDTDDecl != null){
292             fDTDDecl.clear();
293         }
294 
295     }
296 
297     /**
298      * Resets the component. The component can query the component manager
299      * about any features and properties that affect the operation of the
300      * component.
301      *
302      * @param componentManager The component manager.
303      *
304      * @throws SAXException Thrown by component on initialization error.
305      *                      For example, if a feature or property is
306      *                      required for the operation of the component, the
307      *                      component manager may throw a
308      *                      SAXNotRecognizedException or a
309      *                      SAXNotSupportedException.
310      */
reset(XMLComponentManager componentManager)311     public void reset(XMLComponentManager componentManager)
312     throws XMLConfigurationException {
313 
314         super.reset(componentManager);
315 
316         // other settings
317         fDoctypeName = null;
318         fDoctypePublicId = null;
319         fDoctypeSystemId = null;
320         fSeenDoctypeDecl = false;
321         fExternalSubsetSource = null;
322 
323         // xerces features
324         fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true);
325         fDisallowDoctype = componentManager.getFeature(DISALLOW_DOCTYPE_DECL_FEATURE, false);
326 
327         fNamespaces = componentManager.getFeature(NAMESPACES, true);
328 
329         fSeenInternalSubset = false;
330         // xerces properties
331         fDTDScanner = (XMLDTDScanner)componentManager.getProperty(DTD_SCANNER);
332 
333         fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null);
334 
335         try {
336             fNamespaceContext = (NamespaceContext)componentManager.getProperty(NAMESPACE_CONTEXT);
337         }
338         catch (XMLConfigurationException e) { }
339         if (fNamespaceContext == null) {
340             fNamespaceContext = new NamespaceSupport();
341         }
342         fNamespaceContext.reset();
343 
344         fEndPos = 0;
345         fStartPos = 0;
346         if(fDTDDecl != null)
347             fDTDDecl.clear();
348 
349 
350         //fEntityScanner.registerListener((XMLBufferListener)componentManager.getProperty(DOCUMENT_SCANNER));
351 
352         // setup driver
353         setScannerState(SCANNER_STATE_XML_DECL);
354         setDriver(fXMLDeclDriver);
355 
356     } // reset(XMLComponentManager)
357 
358 
359     /**
360      * Returns a list of feature identifiers that are recognized by
361      * this component. This method may return null if no features
362      * are recognized by this component.
363      */
getRecognizedFeatures()364     public String[] getRecognizedFeatures() {
365         String[] featureIds = super.getRecognizedFeatures();
366         int length = featureIds != null ? featureIds.length : 0;
367         String[] combinedFeatureIds = new String[length + RECOGNIZED_FEATURES.length];
368         if (featureIds != null) {
369             System.arraycopy(featureIds, 0, combinedFeatureIds, 0, featureIds.length);
370         }
371         System.arraycopy(RECOGNIZED_FEATURES, 0, combinedFeatureIds, length, RECOGNIZED_FEATURES.length);
372         return combinedFeatureIds;
373     } // getRecognizedFeatures():String[]
374 
375     /**
376      * Sets the state of a feature. This method is called by the component
377      * manager any time after reset when a feature changes state.
378      * <p>
379      * <strong>Note:</strong> Components should silently ignore features
380      * that do not affect the operation of the component.
381      *
382      * @param featureId The feature identifier.
383      * @param state     The state of the feature.
384      *
385      * @throws SAXNotRecognizedException The component should not throw
386      *                                   this exception.
387      * @throws SAXNotSupportedException The component should not throw
388      *                                  this exception.
389      */
setFeature(String featureId, boolean state)390     public void setFeature(String featureId, boolean state)
391     throws XMLConfigurationException {
392 
393         super.setFeature(featureId, state);
394 
395         // Xerces properties
396         if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
397             final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length();
398 
399             if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() &&
400                 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) {
401                 fLoadExternalDTD = state;
402                 return;
403             }
404             else if (suffixLength == Constants.DISALLOW_DOCTYPE_DECL_FEATURE.length() &&
405                 featureId.endsWith(Constants.DISALLOW_DOCTYPE_DECL_FEATURE)) {
406                 fDisallowDoctype = state;
407                 return;
408             }
409         }
410 
411     } // setFeature(String,boolean)
412 
413     /**
414      * Returns a list of property identifiers that are recognized by
415      * this component. This method may return null if no properties
416      * are recognized by this component.
417      */
getRecognizedProperties()418     public String[] getRecognizedProperties() {
419         String[] propertyIds = super.getRecognizedProperties();
420         int length = propertyIds != null ? propertyIds.length : 0;
421         String[] combinedPropertyIds = new String[length + RECOGNIZED_PROPERTIES.length];
422         if (propertyIds != null) {
423             System.arraycopy(propertyIds, 0, combinedPropertyIds, 0, propertyIds.length);
424         }
425         System.arraycopy(RECOGNIZED_PROPERTIES, 0, combinedPropertyIds, length, RECOGNIZED_PROPERTIES.length);
426         return combinedPropertyIds;
427     } // getRecognizedProperties():String[]
428 
429     /**
430      * Sets the value of a property. This method is called by the component
431      * manager any time after reset when a property changes value.
432      * <p>
433      * <strong>Note:</strong> Components should silently ignore properties
434      * that do not affect the operation of the component.
435      *
436      * @param propertyId The property identifier.
437      * @param value      The value of the property.
438      *
439      * @throws SAXNotRecognizedException The component should not throw
440      *                                   this exception.
441      * @throws SAXNotSupportedException The component should not throw
442      *                                  this exception.
443      */
setProperty(String propertyId, Object value)444     public void setProperty(String propertyId, Object value)
445     throws XMLConfigurationException {
446 
447         super.setProperty(propertyId, value);
448 
449         // Xerces properties
450         if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
451             final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
452 
453             if (suffixLength == Constants.DTD_SCANNER_PROPERTY.length() &&
454                 propertyId.endsWith(Constants.DTD_SCANNER_PROPERTY)) {
455                 fDTDScanner = (XMLDTDScanner)value;
456             }
457             if (suffixLength == Constants.NAMESPACE_CONTEXT_PROPERTY.length() &&
458                 propertyId.endsWith(Constants.NAMESPACE_CONTEXT_PROPERTY)) {
459                 if (value != null) {
460                     fNamespaceContext = (NamespaceContext)value;
461                 }
462             }
463 
464             return;
465         }
466 
467     } // setProperty(String,Object)
468 
469     /**
470      * Returns the default state for a feature, or null if this
471      * component does not want to report a default value for this
472      * feature.
473      *
474      * @param featureId The feature identifier.
475      *
476      * @since Xerces 2.2.0
477      */
getFeatureDefault(String featureId)478     public Boolean getFeatureDefault(String featureId) {
479 
480         for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
481             if (RECOGNIZED_FEATURES[i].equals(featureId)) {
482                 return FEATURE_DEFAULTS[i];
483             }
484         }
485         return super.getFeatureDefault(featureId);
486     } // getFeatureDefault(String):Boolean
487 
488     /**
489      * Returns the default state for a property, or null if this
490      * component does not want to report a default value for this
491      * property.
492      *
493      * @param propertyId The property identifier.
494      *
495      * @since Xerces 2.2.0
496      */
getPropertyDefault(String propertyId)497     public Object getPropertyDefault(String propertyId) {
498         for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
499             if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
500                 return PROPERTY_DEFAULTS[i];
501             }
502         }
503         return super.getPropertyDefault(propertyId);
504     } // getPropertyDefault(String):Object
505 
506     //
507     // XMLEntityHandler methods
508     //
509 
510     /**
511      * This method notifies of the start of an entity. The DTD has the
512      * pseudo-name of "[dtd]" parameter entity names start with '%'; and
513      * general entities are just specified by their name.
514      *
515      * @param name     The name of the entity.
516      * @param identifier The resource identifier.
517      * @param encoding The auto-detected IANA encoding name of the entity
518      *                 stream. This value will be null in those situations
519      *                 where the entity encoding is not auto-detected (e.g.
520      *                 internal entities or a document entity that is
521      *                 parsed from a java.io.Reader).
522      *
523      * @throws XNIException Thrown by handler to signal an error.
524      */
startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs)525     public void startEntity(String name,
526             XMLResourceIdentifier identifier,
527             String encoding, Augmentations augs) throws XNIException {
528 
529         super.startEntity(name, identifier, encoding,augs);
530 
531         //register current document scanner as a listener for XMLEntityScanner
532         fEntityScanner.registerListener(this);
533 
534         // prepare to look for a TextDecl if external general entity
535         if (!name.equals("[xml]") && fEntityScanner.isExternal()) {
536             // Don't do this if we're skipping the entity!
537             if (augs == null || !((Boolean) augs.getItem(Constants.ENTITY_SKIPPED)).booleanValue()) {
538                 setScannerState(SCANNER_STATE_TEXT_DECL);
539             }
540         }
541 
542         // call handler
543         /** comment this part.. LOCATOR problem.. */
544         if (fDocumentHandler != null && name.equals("[xml]")) {
545             fDocumentHandler.startDocument(fEntityScanner, encoding, fNamespaceContext, null);
546         }
547 
548     } // startEntity(String,identifier,String)
549 
550 
551     /**
552      * This method notifies the end of an entity. The DTD has the pseudo-name
553      * of "[dtd]" parameter entity names start with '%'; and general entities
554      * are just specified by their name.
555      *
556      * @param name The name of the entity.
557      *
558      * @throws XNIException Thrown by handler to signal an error.
559      */
endEntity(String name, Augmentations augs)560     public void endEntity(String name, Augmentations augs) throws IOException, XNIException {
561 
562         super.endEntity(name, augs);
563 
564         if(name.equals("[xml]")){
565             //if fMarkupDepth has reached 0.
566             //and driver is fTrailingMiscDriver (which
567             //handles end of document in normal case)
568             //set the scanner state of SCANNER_STATE_TERMINATED
569             if(fMarkupDepth == 0 && fDriver == fTrailingMiscDriver){
570                 //set the scanner set to SCANNER_STATE_TERMINATED
571                 setScannerState(SCANNER_STATE_TERMINATED) ;
572             } else{
573                 //else we have reached the end of document prematurely
574                 //so throw EOFException.
575                 throw new java.io.EOFException();
576             }
577 
578             //this is taken care in wrapper which generates XNI callbacks, There are no next events
579 
580             //if (fDocumentHandler != null) {
581                 //fDocumentHandler.endDocument(null);
582             //}
583         }
584     } // endEntity(String)
585 
586 
getDTDDecl()587     public XMLStringBuffer getDTDDecl(){
588         Entity entity = fEntityScanner.getCurrentEntity();
589         fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos);
590         if(fSeenInternalSubset)
591             fDTDDecl.append("]>");
592         return fDTDDecl;
593     }
594 
getCharacterEncodingScheme()595     public String getCharacterEncodingScheme(){
596         return fDeclaredEncoding;
597     }
598 
599     /** return the next state on the input
600      *
601      * @return int
602      */
603 
next()604     public int next() throws IOException, XNIException {
605         return fDriver.next();
606     }
607 
608     //getNamespaceContext
getNamespaceContext()609     public NamespaceContext getNamespaceContext(){
610         return fNamespaceContext ;
611     }
612 
613 
614 
615     //
616     // Protected methods
617     //
618 
619     // driver factory methods
620 
621     /** Creates a content driver. */
createContentDriver()622     protected Driver createContentDriver() {
623         return new ContentDriver();
624     } // createContentDriver():Driver
625 
626     // scanning methods
627 
628     /** Scans a doctype declaration. */
scanDoctypeDecl(boolean supportDTD)629     protected boolean scanDoctypeDecl(boolean supportDTD) throws IOException, XNIException {
630 
631         // spaces
632         if (!fEntityScanner.skipSpaces()) {
633             reportFatalError("MSG_SPACE_REQUIRED_BEFORE_ROOT_ELEMENT_TYPE_IN_DOCTYPEDECL",
634                     null);
635         }
636 
637         // root element name
638         fDoctypeName = fEntityScanner.scanName(NameType.DOCTYPE);
639         if (fDoctypeName == null) {
640             reportFatalError("MSG_ROOT_ELEMENT_TYPE_REQUIRED", null);
641         }
642 
643         // external id
644         if (fEntityScanner.skipSpaces()) {
645             scanExternalID(fStrings, false);
646             fDoctypeSystemId = fStrings[0];
647             fDoctypePublicId = fStrings[1];
648             fEntityScanner.skipSpaces();
649         }
650 
651         fHasExternalDTD = fDoctypeSystemId != null;
652 
653         // Attempt to locate an external subset with an external subset resolver.
654         if (supportDTD && !fHasExternalDTD && fExternalSubsetResolver != null) {
655             fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null);
656             fDTDDescription.setRootName(fDoctypeName);
657             fExternalSubsetSource = fExternalSubsetResolver.getExternalSubset(fDTDDescription);
658             fHasExternalDTD = fExternalSubsetSource != null;
659         }
660 
661         // call handler
662         if (supportDTD && fDocumentHandler != null) {
663             // NOTE: I don't like calling the doctypeDecl callback until
664             //       end of the *full* doctype line (including internal
665             //       subset) is parsed correctly but SAX2 requires that
666             //       it knows the root element name and public and system
667             //       identifier for the startDTD call. -Ac
668             if (fExternalSubsetSource == null) {
669                 fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null);
670             }
671             else {
672                 fDocumentHandler.doctypeDecl(fDoctypeName, fExternalSubsetSource.getPublicId(), fExternalSubsetSource.getSystemId(), null);
673             }
674         }
675 
676         // is there an internal subset?
677         boolean internalSubset = true;
678         if (!fEntityScanner.skipChar('[', null)) {
679             internalSubset = false;
680             fEntityScanner.skipSpaces();
681             if (!fEntityScanner.skipChar('>', null)) {
682                 reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName});
683             }
684             fMarkupDepth--;
685         }
686         return internalSubset;
687 
688     } // scanDoctypeDecl():boolean
689 
690     //
691     // Private methods
692     //
693     /** Set the scanner state after scanning DTD */
setEndDTDScanState()694     protected void setEndDTDScanState() {
695         setScannerState(SCANNER_STATE_PROLOG);
696         setDriver(fPrologDriver);
697         fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
698         fReadingDTD=false;
699     }
700 
701     /** Returns the scanner state name. */
getScannerStateName(int state)702     protected String getScannerStateName(int state) {
703 
704         switch (state) {
705             case SCANNER_STATE_XML_DECL: return "SCANNER_STATE_XML_DECL";
706             case SCANNER_STATE_PROLOG: return "SCANNER_STATE_PROLOG";
707             case SCANNER_STATE_TRAILING_MISC: return "SCANNER_STATE_TRAILING_MISC";
708             case SCANNER_STATE_DTD_INTERNAL_DECLS: return "SCANNER_STATE_DTD_INTERNAL_DECLS";
709             case SCANNER_STATE_DTD_EXTERNAL: return "SCANNER_STATE_DTD_EXTERNAL";
710             case SCANNER_STATE_DTD_EXTERNAL_DECLS: return "SCANNER_STATE_DTD_EXTERNAL_DECLS";
711         }
712         return super.getScannerStateName(state);
713 
714     } // getScannerStateName(int):String
715 
716     //
717     // Classes
718     //
719 
720     /**
721      * Driver to handle XMLDecl scanning.
722      *
723      * This class has been modified as per the new design which is more suited to
724      * efficiently build pull parser. Lots of performance improvements have been done and
725      * the code has been added to support stax functionality/features.
726      *
727      * @author Neeraj Bajaj, Sun Microsystems.
728      *
729      * @author Andy Clark, IBM
730      */
731     protected final class XMLDeclDriver
732             implements Driver {
733 
734         //
735         // Driver methods
736         //
737 
738 
next()739         public int next() throws IOException, XNIException {
740 
741             // next driver is prolog regardless of whether there
742             // is an XMLDecl in this document
743             setScannerState(SCANNER_STATE_PROLOG);
744             setDriver(fPrologDriver);
745 
746             //System.out.println("fEntityScanner = " + fEntityScanner);
747             // scan XMLDecl
748             try {
749                 if (fEntityScanner.skipString(XMLDECL)) {
750                     if (XMLChar.isSpace(fEntityScanner.peekChar())) {
751                         fMarkupDepth++;
752                         scanXMLDeclOrTextDecl(false);
753                     } else {
754                         // PI, reset position
755                         fEntityManager.fCurrentEntity.position = 0;
756                     }
757                 }
758 
759                 //START_OF_THE_DOCUMENT
760                 fEntityManager.fCurrentEntity.mayReadChunks = true;
761                 return XMLEvent.START_DOCUMENT;
762 
763             }
764             // encoding errors
765             catch (MalformedByteSequenceException e) {
766                 fErrorReporter.reportError(e.getDomain(), e.getKey(),
767                         e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
768                 return -1;
769             } catch (CharConversionException e) {
770                 fErrorReporter.reportError(
771                         XMLMessageFormatter.XML_DOMAIN,
772                         "CharConversionFailure",
773                         null,
774                         XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
775                 return -1;
776             }
777             // premature end of file
778             catch (EOFException e) {
779                 reportFatalError("PrematureEOF", null);
780                 return -1;
781                 //throw e;
782             }
783 
784         }
785     } // class XMLDeclDriver
786 
787     /**
788      * Driver to handle prolog scanning.
789      *
790      * @author Andy Clark, IBM
791      */
792     protected final class PrologDriver
793             implements Driver {
794 
795         /**
796          * Drives the parser to the next state/event on the input. Parser is guaranteed
797          * to stop at the next state/event.
798          *
799          * Internally XML document is divided into several states. Each state represents
800          * a sections of XML document. When this functions returns normally, it has read
801          * the section of XML document and returns the state corresponding to section of
802          * document which has been read. For optimizations, a particular driver
803          * can read ahead of the section of document (state returned) just read and
804          * can maintain a different internal state.
805          *
806          * @return state representing the section of document just read.
807          *
808          * @throws IOException  Thrown on i/o error.
809          * @throws XNIException Thrown on parse error.
810          */
811 
next()812         public int next() throws IOException, XNIException {
813 
814             try {
815                 do {
816                     switch (fScannerState) {
817                         case SCANNER_STATE_PROLOG: {
818                             fEntityScanner.skipSpaces();
819                             if (fEntityScanner.skipChar('<', null)) {
820                                 setScannerState(SCANNER_STATE_START_OF_MARKUP);
821                             } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) {
822                                 setScannerState(SCANNER_STATE_REFERENCE);
823                             } else {
824                                 setScannerState(SCANNER_STATE_CONTENT);
825                             }
826                             break;
827                         }
828 
829                         case SCANNER_STATE_START_OF_MARKUP: {
830                             fMarkupDepth++;
831                             if (isValidNameStartChar(fEntityScanner.peekChar()) ||
832                                     isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
833                                 setScannerState(SCANNER_STATE_ROOT_ELEMENT);
834                                 setDriver(fContentDriver);
835                                 //from now onwards this would be handled by fContentDriver,in the same next() call
836                                 return fContentDriver.next();
837                             } else if (fEntityScanner.skipChar('!', null)) {
838                                 if (fEntityScanner.skipChar('-', null)) {
839                                     if (!fEntityScanner.skipChar('-', null)) {
840                                         reportFatalError("InvalidCommentStart",
841                                                 null);
842                                     }
843                                     setScannerState(SCANNER_STATE_COMMENT);
844                                 } else if (fEntityScanner.skipString(DOCTYPE)) {
845                                     setScannerState(SCANNER_STATE_DOCTYPE);
846                                     Entity entity = fEntityScanner.getCurrentEntity();
847                                     if(entity instanceof Entity.ScannedEntity){
848                                         fStartPos=((Entity.ScannedEntity)entity).position;
849                                     }
850                                     fReadingDTD=true;
851                                     if(fDTDDecl == null)
852                                         fDTDDecl = new XMLStringBuffer();
853                                     fDTDDecl.append("<!DOCTYPE");
854 
855                                 } else {
856                                     reportFatalError("MarkupNotRecognizedInProlog",
857                                             null);
858                                 }
859                             } else if (fEntityScanner.skipChar('?', null)) {
860                                 setScannerState(SCANNER_STATE_PI);
861                             } else {
862                                 reportFatalError("MarkupNotRecognizedInProlog",
863                                         null);
864                             }
865                             break;
866                         }
867                     }
868                 } while (fScannerState == SCANNER_STATE_PROLOG || fScannerState == SCANNER_STATE_START_OF_MARKUP );
869 
870                 switch(fScannerState){
871                     /**
872                     //this part is handled by FragmentContentHandler
873                     case SCANNER_STATE_ROOT_ELEMENT: {
874                         //we have read '<' and beginning of reading the start element tag
875                         setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
876                         setDriver(fContentDriver);
877                         //from now onwards this would be handled by fContentDriver,in the same next() call
878                         return fContentDriver.next();
879                     }
880                      */
881                     case SCANNER_STATE_COMMENT: {
882                         //this function fills the data..
883                         scanComment();
884                         setScannerState(SCANNER_STATE_PROLOG);
885                         return XMLEvent.COMMENT;
886                         //setScannerState(SCANNER_STATE_PROLOG);
887                         //break;
888                     }
889                     case SCANNER_STATE_PI: {
890                         fContentBuffer.clear() ;
891                         scanPI(fContentBuffer);
892                         setScannerState(SCANNER_STATE_PROLOG);
893                         return XMLEvent.PROCESSING_INSTRUCTION;
894                     }
895 
896                     case SCANNER_STATE_DOCTYPE: {
897                         if (fDisallowDoctype) {
898                             reportFatalError("DoctypeNotAllowed", null);
899                         }
900 
901                         if (fSeenDoctypeDecl) {
902                             reportFatalError("AlreadySeenDoctype", null);
903                         }
904                         fSeenDoctypeDecl = true;
905 
906                         // scanDoctypeDecl() sends XNI doctypeDecl event that
907                         // in SAX is converted to startDTD() event.
908                         if (scanDoctypeDecl(fSupportDTD)) {
909                             //allow parsing of entity decls to continue in order to stay well-formed
910                             setScannerState(SCANNER_STATE_DTD_INTERNAL_DECLS);
911                             fSeenInternalSubset = true;
912                             if(fDTDDriver == null){
913                                 fDTDDriver = new DTDDriver();
914                             }
915                             setDriver(fContentDriver);
916                             //always return DTD event, the event however, will not contain any entities
917                             return fDTDDriver.next();
918                         }
919 
920                         if(fSeenDoctypeDecl){
921                             Entity entity = fEntityScanner.getCurrentEntity();
922                             if(entity instanceof Entity.ScannedEntity){
923                                 fEndPos = ((Entity.ScannedEntity)entity).position;
924                             }
925                             fReadingDTD = false;
926                         }
927 
928                         // handle external subset
929                         if (fDoctypeSystemId != null) {
930                             if (((fValidation || fLoadExternalDTD)
931                                 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) {
932                                 if (fSupportDTD) {
933                                     setScannerState(SCANNER_STATE_DTD_EXTERNAL);
934                                 } else {
935                                     setScannerState(SCANNER_STATE_PROLOG);
936                                 }
937 
938                                 setDriver(fContentDriver);
939                                 if(fDTDDriver == null) {
940                                     fDTDDriver = new DTDDriver();
941                                 }
942 
943                                 return fDTDDriver.next();
944                             }
945                         }
946                         else if (fExternalSubsetSource != null) {
947                             if (((fValidation || fLoadExternalDTD)
948                                 && (fValidationManager == null || !fValidationManager.isCachedDTD()))) {
949                                 // This handles the case of a DOCTYPE that had neither an internal subset or an external subset.
950                                 fDTDScanner.setInputSource(fExternalSubsetSource);
951                                 fExternalSubsetSource = null;
952                             if (fSupportDTD)
953                                 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
954                             else
955                                 setScannerState(SCANNER_STATE_PROLOG);
956                             setDriver(fContentDriver);
957                             if(fDTDDriver == null)
958                                 fDTDDriver = new DTDDriver();
959                             return fDTDDriver.next();
960                             }
961                         }
962 
963                         // Send endDTD() call if:
964                         // a) systemId is null or if an external subset resolver could not locate an external subset.
965                         // b) "load-external-dtd" and validation are false
966                         // c) DTD grammar is cached
967 
968                         // in XNI this results in 3 events:  doctypeDecl, startDTD, endDTD
969                         // in SAX this results in 2 events: startDTD, endDTD
970                         if (fDTDScanner != null) {
971                             fDTDScanner.setInputSource(null);
972                         }
973                         setScannerState(SCANNER_STATE_PROLOG);
974                         return XMLEvent.DTD;
975                     }
976 
977                     case SCANNER_STATE_CONTENT: {
978                         reportFatalError("ContentIllegalInProlog", null);
979                         fEntityScanner.scanChar(null);
980                         return -1;
981                     }
982                     case SCANNER_STATE_REFERENCE: {
983                         reportFatalError("ReferenceIllegalInProlog", null);
984                         return -1;
985                     }
986 
987                     /**
988                      * if (complete) {
989                      * if (fEntityScanner.scanChar() != '<') {
990                      * reportFatalError("RootElementRequired", null);
991                      * }
992                      * setScannerState(SCANNER_STATE_ROOT_ELEMENT);
993                      * setDriver(fContentDriver);
994                      * }
995                      */
996                 }
997             }
998             // encoding errors
999             catch (MalformedByteSequenceException e) {
1000                 fErrorReporter.reportError(e.getDomain(), e.getKey(),
1001                         e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1002                 return -1;
1003             } catch (CharConversionException e) {
1004                 fErrorReporter.reportError(
1005                         XMLMessageFormatter.XML_DOMAIN,
1006                         "CharConversionFailure",
1007                         null,
1008                         XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1009                 return -1;
1010             }
1011             // premature end of file
1012             catch (EOFException e) {
1013                 reportFatalError("PrematureEOF", null);
1014                 //xxx  what should be returned here.... ???
1015                 return -1 ;
1016                 //throw e;
1017             }
1018             //xxx  what should be returned here.... ???
1019             return -1;
1020 
1021         }
1022 
1023 
1024     } // class PrologDriver
1025 
1026     /**
1027      * Driver to handle the internal and external DTD subsets.
1028      *
1029      * @author Andy Clark, IBM
1030      */
1031     protected final class DTDDriver
1032             implements Driver {
1033 
1034         //
1035         // Driver methods
1036         //
1037 
next()1038         public int next() throws IOException, XNIException{
1039 
1040             dispatch(true);
1041 
1042             //xxx: remove this hack and align this with reusing DTD components
1043             //currently this routine will only be executed from Stax
1044             if(fPropertyManager != null){
1045                 dtdGrammarUtil =  new DTDGrammarUtil(((XMLDTDScannerImpl)fDTDScanner).getGrammar(),fSymbolTable, fNamespaceContext);
1046             }
1047 
1048             return XMLEvent.DTD ;
1049         }
1050 
1051         /**
1052          * Dispatch an XML "event".
1053          *
1054          * @param complete True if this driver is intended to scan
1055          *                 and dispatch as much as possible.
1056          *
1057          * @return True if there is more to dispatch either from this
1058          *          or a another driver.
1059          *
1060          * @throws IOException  Thrown on i/o error.
1061          * @throws XNIException Thrown on parse error.
1062          */
dispatch(boolean complete)1063         public boolean dispatch(boolean complete)
1064         throws IOException, XNIException {
1065             fEntityManager.setEntityHandler(null);
1066             try {
1067                 boolean again;
1068                 XMLResourceIdentifierImpl resourceIdentifier = new XMLResourceIdentifierImpl();
1069                 if( fDTDScanner == null){
1070 
1071                     if (fEntityManager.getEntityScanner() instanceof XML11EntityScanner){
1072                         fDTDScanner = new XML11DTDScannerImpl();
1073                     } else
1074 
1075                     fDTDScanner = new XMLDTDScannerImpl();
1076 
1077                     ((XMLDTDScannerImpl)fDTDScanner).reset(fPropertyManager);
1078                 }
1079 
1080                 fDTDScanner.setLimitAnalyzer(fLimitAnalyzer);
1081                 do {
1082                     again = false;
1083                     switch (fScannerState) {
1084                         case SCANNER_STATE_DTD_INTERNAL_DECLS: {
1085                             boolean moreToScan = false;
1086                             if (!fDTDScanner.skipDTD(fSupportDTD)) {
1087                                 // REVISIT: Should there be a feature for
1088                                 //          the "complete" parameter?
1089                                 boolean completeDTD = true;
1090 
1091                                 moreToScan = fDTDScanner.scanDTDInternalSubset(completeDTD, fStandalone, fHasExternalDTD && fLoadExternalDTD);
1092                             }
1093                             Entity entity = fEntityScanner.getCurrentEntity();
1094                             if(entity instanceof Entity.ScannedEntity){
1095                                 fEndPos=((Entity.ScannedEntity)entity).position;
1096                             }
1097                             fReadingDTD=false;
1098                             if (!moreToScan) {
1099                                 // end doctype declaration
1100                                 if (!fEntityScanner.skipChar(']', null)) {
1101                                     reportFatalError("DoctypedeclNotClosed", new Object[]{fDoctypeName});
1102                                 }
1103                                 fEntityScanner.skipSpaces();
1104                                 if (!fEntityScanner.skipChar('>', null)) {
1105                                     reportFatalError("DoctypedeclUnterminated", new Object[]{fDoctypeName});
1106                                 }
1107                                 fMarkupDepth--;
1108 
1109                                 if (!fSupportDTD) {
1110                                     //simply reset the entity store without having to mess around
1111                                     //with the DTD Scanner code
1112                                     fEntityStore = fEntityManager.getEntityStore();
1113                                     fEntityStore.reset();
1114                                 } else {
1115                                     // scan external subset next unless we are ignoring DTDs
1116                                     if (fDoctypeSystemId != null && (fValidation || fLoadExternalDTD)) {
1117                                         setScannerState(SCANNER_STATE_DTD_EXTERNAL);
1118                                         break;
1119                                     }
1120                                 }
1121 
1122                                 setEndDTDScanState();
1123                                 return true;
1124 
1125                             }
1126                             break;
1127                         }
1128                         case SCANNER_STATE_DTD_EXTERNAL: {
1129                             /**
1130                             fDTDDescription.setValues(fDoctypePublicId, fDoctypeSystemId, null, null);
1131                             fDTDDescription.setRootName(fDoctypeName);
1132                             XMLInputSource xmlInputSource =
1133                                 fEntityManager.resolveEntity(fDTDDescription);
1134                             fDTDScanner.setInputSource(xmlInputSource);
1135                             setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
1136                             again = true;
1137                             break;
1138                              */
1139 
1140                             resourceIdentifier.setValues(fDoctypePublicId, fDoctypeSystemId, null, null);
1141                             XMLInputSource xmlInputSource = null ;
1142                             StaxXMLInputSource staxInputSource =  fEntityManager.resolveEntityAsPerStax(resourceIdentifier);
1143 
1144                             // Check access permission. If the source is resolved by a resolver, the check is skipped.
1145                             if (!staxInputSource.isCreatedByResolver()) {
1146                                 String accessError = checkAccess(fDoctypeSystemId, fAccessExternalDTD);
1147                                 if (accessError != null) {
1148                                     reportFatalError("AccessExternalDTD", new Object[]{ SecuritySupport.sanitizePath(fDoctypeSystemId), accessError });
1149                                 }
1150                             }
1151                             xmlInputSource = staxInputSource.getXMLInputSource();
1152                             fDTDScanner.setInputSource(xmlInputSource);
1153                             if (fEntityScanner.fCurrentEntity != null) {
1154                                 setScannerState(SCANNER_STATE_DTD_EXTERNAL_DECLS);
1155                             } else {
1156                                 setScannerState(SCANNER_STATE_PROLOG);
1157                             }
1158                             again = true;
1159                             break;
1160                         }
1161                         case SCANNER_STATE_DTD_EXTERNAL_DECLS: {
1162                             // REVISIT: Should there be a feature for
1163                             //          the "complete" parameter?
1164                             boolean completeDTD = true;
1165                             boolean moreToScan = fDTDScanner.scanDTDExternalSubset(completeDTD);
1166                             if (!moreToScan) {
1167                                 setEndDTDScanState();
1168                                 return true;
1169                             }
1170                             break;
1171                         }
1172                         case SCANNER_STATE_PROLOG : {
1173                             // skip entity decls
1174                             setEndDTDScanState();
1175                             return true;
1176                         }
1177                         default: {
1178                             throw new XNIException("DTDDriver#dispatch: scanner state="+fScannerState+" ("+getScannerStateName(fScannerState)+')');
1179                         }
1180                     }
1181                 } while (complete || again);
1182             }
1183             // encoding errors
1184             catch (MalformedByteSequenceException e) {
1185                 fErrorReporter.reportError(e.getDomain(), e.getKey(),
1186                         e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1187                 return false;
1188             } catch (CharConversionException e) {
1189                 fErrorReporter.reportError(
1190                         XMLMessageFormatter.XML_DOMAIN,
1191                         "CharConversionFailure",
1192                         null,
1193                         XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1194                 return false;
1195             }
1196             // premature end of file
1197             catch (EOFException e) {
1198                 e.printStackTrace();
1199                 reportFatalError("PrematureEOF", null);
1200                 return false;
1201                 //throw e;
1202             }
1203 
1204             // cleanup
1205             finally {
1206                 fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
1207             }
1208 
1209             return true;
1210 
1211         }
1212 
1213         // dispatch(boolean):boolean
1214 
1215     } // class DTDDriver
1216 
1217     /**
1218      * Driver to handle content scanning.
1219      *
1220      * @author Andy Clark, IBM
1221      * @author Eric Ye, IBM
1222      */
1223     protected class ContentDriver
1224             extends FragmentContentDriver {
1225 
1226         //
1227         // Protected methods
1228         //
1229 
1230         // hooks
1231 
1232         // NOTE: These hook methods are added so that the full document
1233         //       scanner can share the majority of code with this class.
1234 
1235         /**
1236          * Scan for DOCTYPE hook. This method is a hook for subclasses
1237          * to add code to handle scanning for a the "DOCTYPE" string
1238          * after the string "<!" has been scanned.
1239          *
1240          * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
1241          *          was not scanned.
1242          */
scanForDoctypeHook()1243         protected boolean scanForDoctypeHook()
1244         throws IOException, XNIException {
1245 
1246             if (fEntityScanner.skipString(DOCTYPE)) {
1247                 setScannerState(SCANNER_STATE_DOCTYPE);
1248                 //                fEntityScanner.markStartOfDTD();
1249                 return true;
1250             }
1251             return false;
1252 
1253         } // scanForDoctypeHook():boolean
1254 
1255         /**
1256          * Element depth iz zero. This methos is a hook for subclasses
1257          * to add code to handle when the element depth hits zero. When
1258          * scanning a document fragment, an element depth of zero is
1259          * normal. However, when scanning a full XML document, the
1260          * scanner must handle the trailing miscellanous section of
1261          * the document after the end of the document's root element.
1262          *
1263          * @return True if the caller should stop and return true which
1264          *          allows the scanner to switch to a new scanning
1265          *          driver. A return value of false indicates that
1266          *          the content driver should continue as normal.
1267          */
elementDepthIsZeroHook()1268         protected boolean elementDepthIsZeroHook()
1269         throws IOException, XNIException {
1270 
1271             setScannerState(SCANNER_STATE_TRAILING_MISC);
1272             setDriver(fTrailingMiscDriver);
1273             return true;
1274 
1275         } // elementDepthIsZeroHook():boolean
1276 
1277         /**
1278          * Scan for root element hook. This method is a hook for
1279          * subclasses to add code that handles scanning for the root
1280          * element. When scanning a document fragment, there is no
1281          * "root" element. However, when scanning a full XML document,
1282          * the scanner must handle the root element specially.
1283          *
1284          * @return True if the caller should stop and return true which
1285          *          allows the scanner to switch to a new scanning
1286          *          driver. A return value of false indicates that
1287          *          the content driver should continue as normal.
1288          */
scanRootElementHook()1289         protected boolean scanRootElementHook()
1290         throws IOException, XNIException {
1291 
1292             if (scanStartElement()) {
1293                 setScannerState(SCANNER_STATE_TRAILING_MISC);
1294                 setDriver(fTrailingMiscDriver);
1295                 return true;
1296             }
1297             return false;
1298 
1299         } // scanRootElementHook():boolean
1300 
1301         /**
1302          * End of file hook. This method is a hook for subclasses to
1303          * add code that handles the end of file. The end of file in
1304          * a document fragment is OK if the markup depth is zero.
1305          * However, when scanning a full XML document, an end of file
1306          * is always premature.
1307          */
endOfFileHook(EOFException e)1308         protected void endOfFileHook(EOFException e)
1309         throws IOException, XNIException {
1310 
1311             reportFatalError("PrematureEOF", null);
1312             // in case continue-after-fatal-error set, should not do this...
1313             //throw e;
1314 
1315         } // endOfFileHook()
1316 
resolveExternalSubsetAndRead()1317         protected void resolveExternalSubsetAndRead()
1318         throws IOException, XNIException {
1319 
1320             fDTDDescription.setValues(null, null, fEntityManager.getCurrentResourceIdentifier().getExpandedSystemId(), null);
1321             fDTDDescription.setRootName(fElementQName.rawname);
1322             XMLInputSource src = fExternalSubsetResolver.getExternalSubset(fDTDDescription);
1323 
1324             if (src != null) {
1325                 fDoctypeName = fElementQName.rawname;
1326                 fDoctypePublicId = src.getPublicId();
1327                 fDoctypeSystemId = src.getSystemId();
1328                 // call document handler
1329                 if (fDocumentHandler != null) {
1330                     // This inserts a doctypeDecl event into the stream though no
1331                     // DOCTYPE existed in the instance document.
1332                     fDocumentHandler.doctypeDecl(fDoctypeName, fDoctypePublicId, fDoctypeSystemId, null);
1333                 }
1334                 try {
1335                     fDTDScanner.setInputSource(src);
1336                     while (fDTDScanner.scanDTDExternalSubset(true));
1337                 } finally {
1338                     fEntityManager.setEntityHandler(XMLDocumentScannerImpl.this);
1339                 }
1340             }
1341         } // resolveExternalSubsetAndRead()
1342 
1343 
1344 
1345     } // class ContentDriver
1346 
1347     /**
1348      * Driver to handle trailing miscellaneous section scanning.
1349      *
1350      * @author Andy Clark, IBM
1351      * @author Eric Ye, IBM
1352      */
1353     protected final class TrailingMiscDriver
1354             implements Driver {
1355 
1356         //
1357         // Driver methods
1358         //
next()1359         public int next() throws IOException, XNIException{
1360             //this could for cases like <foo/>
1361             //look at scanRootElementHook
1362             if(fEmptyElement){
1363                 fEmptyElement = false;
1364                 return XMLEvent.END_ELEMENT;
1365             }
1366 
1367             try {
1368                 if(fScannerState == SCANNER_STATE_TERMINATED){
1369                     return XMLEvent.END_DOCUMENT ;}
1370                 do {
1371                     switch (fScannerState) {
1372                         case SCANNER_STATE_TRAILING_MISC: {
1373 
1374                             fEntityScanner.skipSpaces();
1375                             //we should have reached the end of the document in
1376                             //most cases.
1377                             if(fScannerState == SCANNER_STATE_TERMINATED ){
1378                                 return XMLEvent.END_DOCUMENT ;
1379                             }
1380                             if (fEntityScanner.skipChar('<', null)) {
1381                                 setScannerState(SCANNER_STATE_START_OF_MARKUP);
1382                             } else {
1383                                 setScannerState(SCANNER_STATE_CONTENT);
1384                             }
1385                             break;
1386                         }
1387                         case SCANNER_STATE_START_OF_MARKUP: {
1388                             fMarkupDepth++;
1389                             if (fEntityScanner.skipChar('?', null)) {
1390                                 setScannerState(SCANNER_STATE_PI);
1391                             } else if (fEntityScanner.skipChar('!', null)) {
1392                                 setScannerState(SCANNER_STATE_COMMENT);
1393                             } else if (fEntityScanner.skipChar('/', null)) {
1394                                 reportFatalError("MarkupNotRecognizedInMisc",
1395                                         null);
1396                             } else if (isValidNameStartChar(fEntityScanner.peekChar()) ||
1397                                     isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
1398                                 reportFatalError("MarkupNotRecognizedInMisc",
1399                                         null);
1400                                 scanStartElement();
1401                                 setScannerState(SCANNER_STATE_CONTENT);
1402                             } else {
1403                                 reportFatalError("MarkupNotRecognizedInMisc",
1404                                         null);
1405                             }
1406                             break;
1407                         }
1408                     }
1409                 } while(fScannerState == SCANNER_STATE_START_OF_MARKUP ||
1410                         fScannerState == SCANNER_STATE_TRAILING_MISC);
1411 
1412                 switch (fScannerState){
1413                     case SCANNER_STATE_PI: {
1414                         fContentBuffer.clear();
1415                         scanPI(fContentBuffer);
1416                         setScannerState(SCANNER_STATE_TRAILING_MISC);
1417                         return XMLEvent.PROCESSING_INSTRUCTION ;
1418                     }
1419                     case SCANNER_STATE_COMMENT: {
1420                         if (!fEntityScanner.skipString(COMMENTSTRING)) {
1421                             reportFatalError("InvalidCommentStart", null);
1422                         }
1423                         scanComment();
1424                         setScannerState(SCANNER_STATE_TRAILING_MISC);
1425                         return XMLEvent.COMMENT;
1426                     }
1427                     case SCANNER_STATE_CONTENT: {
1428                         int ch = fEntityScanner.peekChar();
1429                         if (ch == -1) {
1430                             setScannerState(SCANNER_STATE_TERMINATED);
1431                             return XMLEvent.END_DOCUMENT ;
1432                         } else{
1433                             reportFatalError("ContentIllegalInTrailingMisc",
1434                                     null);
1435                             fEntityScanner.scanChar(null);
1436                             setScannerState(SCANNER_STATE_TRAILING_MISC);
1437                             return XMLEvent.CHARACTERS;
1438                         }
1439 
1440                     }
1441                     case SCANNER_STATE_REFERENCE: {
1442                         reportFatalError("ReferenceIllegalInTrailingMisc",
1443                                 null);
1444                         setScannerState(SCANNER_STATE_TRAILING_MISC);
1445                         return XMLEvent.ENTITY_REFERENCE ;
1446                     }
1447                     case SCANNER_STATE_TERMINATED: {
1448                         //there can't be any element after SCANNER_STATE_TERMINATED or when the parser
1449                         //has reached the end of document
1450                         setScannerState(SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION);
1451                         //xxx what to do when the scanner has reached the terminating state.
1452                         return XMLEvent.END_DOCUMENT ;
1453                     }
1454                     case SCANNER_STATE_NO_SUCH_ELEMENT_EXCEPTION:{
1455                         throw new java.util.NoSuchElementException("No more events to be parsed");
1456                     }
1457                     default: throw new XNIException("Scanner State " + fScannerState + " not Recognized ");
1458                 }//switch
1459             // encoding errors
1460             } catch (MalformedByteSequenceException e) {
1461                 fErrorReporter.reportError(e.getDomain(), e.getKey(),
1462                         e.getArguments(), XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1463                 return -1;
1464             } catch (CharConversionException e) {
1465                 fErrorReporter.reportError(
1466                         XMLMessageFormatter.XML_DOMAIN,
1467                         "CharConversionFailure",
1468                         null,
1469                         XMLErrorReporter.SEVERITY_FATAL_ERROR, e);
1470                 return -1;
1471             } catch (EOFException e) {
1472                 // NOTE: This is the only place we're allowed to reach
1473                 //       the real end of the document stream. Unless the
1474                 //       end of file was reached prematurely.
1475                 if (fMarkupDepth != 0) {
1476                     reportFatalError("PrematureEOF", null);
1477                     return -1;
1478                     //throw e;
1479                 }
1480                 //System.out.println("EOFException thrown") ;
1481                 setScannerState(SCANNER_STATE_TERMINATED);
1482             }
1483 
1484             return XMLEvent.END_DOCUMENT;
1485 
1486         }//next
1487 
1488     } // class TrailingMiscDriver
1489 
1490     /**
1491      * Implements XMLBufferListener interface.
1492      */
1493 
1494 
1495     /**
1496      * receives callbacks from {@link XMLEntityReader } when buffer
1497      * is being changed.
1498      * @param refreshPosition
1499      */
refresh(int refreshPosition)1500     public void refresh(int refreshPosition){
1501         super.refresh(refreshPosition);
1502         if(fReadingDTD){
1503             Entity entity = fEntityScanner.getCurrentEntity();
1504             if(entity instanceof Entity.ScannedEntity){
1505                 fEndPos=((Entity.ScannedEntity)entity).position;
1506             }
1507             fDTDDecl.append(((Entity.ScannedEntity)entity).ch,fStartPos , fEndPos-fStartPos);
1508             fStartPos = refreshPosition;
1509         }
1510     }
1511 
1512 } // class XMLDocumentScannerImpl
1513