1 /*
2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xerces.internal.impl.dtd;
22 
23 import java.io.EOFException;
24 import java.io.IOException;
25 import java.io.StringReader;
26 import java.util.Locale;
27 
28 import com.sun.org.apache.xerces.internal.impl.Constants;
29 import com.sun.org.apache.xerces.internal.impl.XMLDTDScannerImpl;
30 import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter;
31 import com.sun.org.apache.xerces.internal.impl.XMLEntityManager;
32 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
33 
34 import com.sun.org.apache.xerces.internal.util.Status;
35 import com.sun.org.apache.xerces.internal.util.SymbolTable;
36 import com.sun.org.apache.xerces.internal.util.DefaultErrorHandler;
37 
38 import com.sun.org.apache.xerces.internal.xni.XNIException;
39 import com.sun.org.apache.xerces.internal.xni.grammars.XMLGrammarPool;
40 import com.sun.org.apache.xerces.internal.xni.grammars.XMLGrammarLoader;
41 import com.sun.org.apache.xerces.internal.xni.grammars.Grammar;
42 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
43 import com.sun.org.apache.xerces.internal.xni.parser.XMLErrorHandler;
44 import com.sun.org.apache.xerces.internal.xni.parser.XMLEntityResolver;
45 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource;
46 
47 
48 /**
49  * The DTD loader. The loader knows how to build grammars from XMLInputSources.
50  * It extends the DTD processor in order to do this; it's
51  * a separate class because DTD processors don't need to know how
52  * to talk to the outside world in their role as instance-document
53  * helpers.
54  * <p>
55  * This component requires the following features and properties.  It
56  * know ho to set them if no one else does:from the
57  * <ul>
58  *  <li>http://xml.org/sax/features/namespaces</li>
59  *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
60  *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
61  *  <li>http://apache.org/xml/properties/internal/grammar-pool</li>
62  *  <li>http://apache.org/xml/properties/internal/datatype-validator-factory</li>
63  * </ul>
64  *
65  * @xerces.internal
66  *
67  * @author Neil Graham, IBM
68  * @author Michael Glavassevich, IBM
69  *
70  * @LastModified: Nov 2017
71  */
72 public class XMLDTDLoader
73         extends XMLDTDProcessor
74         implements XMLGrammarLoader {
75 
76     //
77     // Constants
78     //
79 
80     // feature identifiers
81 
82     /** Feature identifier: standard uri conformant feature. */
83     protected static final String STANDARD_URI_CONFORMANT_FEATURE =
84         Constants.XERCES_FEATURE_PREFIX + Constants.STANDARD_URI_CONFORMANT_FEATURE;
85 
86     /** Feature identifier: balance syntax trees. */
87     protected static final String BALANCE_SYNTAX_TREES =
88         Constants.XERCES_FEATURE_PREFIX + Constants.BALANCE_SYNTAX_TREES;
89 
90     // recognized features:
91     private static final String[] LOADER_RECOGNIZED_FEATURES = {
92         VALIDATION,
93         WARN_ON_DUPLICATE_ATTDEF,
94         WARN_ON_UNDECLARED_ELEMDEF,
95         NOTIFY_CHAR_REFS,
96         STANDARD_URI_CONFORMANT_FEATURE,
97         BALANCE_SYNTAX_TREES
98     };
99 
100     // property identifiers
101 
102     /** Property identifier: error handler. */
103     protected static final String ERROR_HANDLER =
104         Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_HANDLER_PROPERTY;
105 
106     /** Property identifier: entity resolver. */
107     public static final String ENTITY_RESOLVER =
108         Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY;
109 
110     /** Property identifier: locale. */
111     public static final String LOCALE =
112         Constants.XERCES_PROPERTY_PREFIX + Constants.LOCALE_PROPERTY;
113 
114     /** Recognized properties. */
115     private static final String[] LOADER_RECOGNIZED_PROPERTIES = {
116         SYMBOL_TABLE,
117         ERROR_REPORTER,
118         ERROR_HANDLER,
119         ENTITY_RESOLVER,
120         GRAMMAR_POOL,
121         DTD_VALIDATOR,
122         LOCALE
123     };
124 
125     // enforcing strict uri?
126     private boolean fStrictURI = false;
127 
128     /** Controls whether the DTD grammar produces balanced syntax trees. */
129     private boolean fBalanceSyntaxTrees = false;
130 
131     /** Entity resolver . */
132     protected XMLEntityResolver fEntityResolver;
133 
134     // the scanner we use to actually read the DTD
135     protected XMLDTDScannerImpl fDTDScanner;
136 
137     // the entity manager the scanner needs.
138     protected XMLEntityManager fEntityManager;
139 
140     // what's our Locale?
141     protected Locale fLocale;
142 
143     //
144     // Constructors
145     //
146 
147     /** Deny default construction; we need a SymtolTable! */
XMLDTDLoader()148     public XMLDTDLoader() {
149         this(new SymbolTable());
150     } // <init>()
151 
XMLDTDLoader(SymbolTable symbolTable)152     public XMLDTDLoader(SymbolTable symbolTable) {
153         this(symbolTable, null);
154     } // init(SymbolTable)
155 
XMLDTDLoader(SymbolTable symbolTable, XMLGrammarPool grammarPool)156     public XMLDTDLoader(SymbolTable symbolTable,
157                 XMLGrammarPool grammarPool) {
158         this(symbolTable, grammarPool, null, new XMLEntityManager());
159     } // init(SymbolTable, XMLGrammarPool)
160 
XMLDTDLoader(SymbolTable symbolTable, XMLGrammarPool grammarPool, XMLErrorReporter errorReporter, XMLEntityResolver entityResolver)161     XMLDTDLoader(SymbolTable symbolTable,
162                 XMLGrammarPool grammarPool, XMLErrorReporter errorReporter,
163                 XMLEntityResolver entityResolver) {
164         fSymbolTable = symbolTable;
165         fGrammarPool = grammarPool;
166         if(errorReporter == null) {
167             errorReporter = new XMLErrorReporter();
168             errorReporter.setProperty(ERROR_HANDLER, new DefaultErrorHandler());
169         }
170         fErrorReporter = errorReporter;
171         // Add XML message formatter if there isn't one.
172         if (fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN) == null) {
173             XMLMessageFormatter xmft = new XMLMessageFormatter();
174             fErrorReporter.putMessageFormatter(XMLMessageFormatter.XML_DOMAIN, xmft);
175             fErrorReporter.putMessageFormatter(XMLMessageFormatter.XMLNS_DOMAIN, xmft);
176         }
177         fEntityResolver = entityResolver;
178         if(fEntityResolver instanceof XMLEntityManager) {
179             fEntityManager = (XMLEntityManager)fEntityResolver;
180         } else {
181             fEntityManager = new XMLEntityManager();
182         }
183         fEntityManager.setProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY, errorReporter);
184         fDTDScanner = createDTDScanner(fSymbolTable, fErrorReporter, fEntityManager);
185         fDTDScanner.setDTDHandler(this);
186         fDTDScanner.setDTDContentModelHandler(this);
187         reset();
188     } // init(SymbolTable, XMLGrammarPool, XMLErrorReporter, XMLEntityResolver)
189 
190     // XMLGrammarLoader methods
191 
192     /**
193      * Returns a list of feature identifiers that are recognized by
194      * this component. This method may return null if no features
195      * are recognized by this component.
196      */
getRecognizedFeatures()197     public String[] getRecognizedFeatures() {
198         return LOADER_RECOGNIZED_FEATURES.clone();
199     } // getRecognizedFeatures():String[]
200 
201     /**
202      * Sets the state of a feature. This method is called by the component
203      * manager any time after reset when a feature changes state.
204      * <p>
205      * <strong>Note:</strong> Components should silently ignore features
206      * that do not affect the operation of the component.
207      *
208      * @param featureId The feature identifier.
209      * @param state     The state of the feature.
210      *
211      * @throws SAXNotRecognizedException The component should not throw
212      *                                   this exception.
213      * @throws SAXNotSupportedException The component should not throw
214      *                                  this exception.
215      */
setFeature(String featureId, boolean state)216     public void setFeature(String featureId, boolean state)
217             throws XMLConfigurationException {
218         if (featureId.equals(VALIDATION)) {
219             fValidation = state;
220         }
221         else if (featureId.equals(WARN_ON_DUPLICATE_ATTDEF)) {
222             fWarnDuplicateAttdef = state;
223         }
224         else if (featureId.equals(WARN_ON_UNDECLARED_ELEMDEF)) {
225             fWarnOnUndeclaredElemdef = state;
226         }
227         else if (featureId.equals(NOTIFY_CHAR_REFS)) {
228             fDTDScanner.setFeature(featureId, state);
229         }
230         else if (featureId.equals(STANDARD_URI_CONFORMANT_FEATURE)) {
231             fStrictURI = state;
232         }
233         else if (featureId.equals(BALANCE_SYNTAX_TREES)) {
234             fBalanceSyntaxTrees = state;
235         }
236         else {
237             throw new XMLConfigurationException(Status.NOT_RECOGNIZED, featureId);
238         }
239     } // setFeature(String,boolean)
240 
241     /**
242      * Returns a list of property identifiers that are recognized by
243      * this component. This method may return null if no properties
244      * are recognized by this component.
245      */
getRecognizedProperties()246     public String[] getRecognizedProperties() {
247         return LOADER_RECOGNIZED_PROPERTIES.clone();
248     } // getRecognizedProperties():String[]
249 
250     /**
251      * Returns the state of a property.
252      *
253      * @param propertyId The property identifier.
254      *
255      * @throws XMLConfigurationException Thrown on configuration error.
256      */
getProperty(String propertyId)257     public Object getProperty(String propertyId)
258             throws XMLConfigurationException {
259         if (propertyId.equals(SYMBOL_TABLE)) {
260             return fSymbolTable;
261         }
262         else if (propertyId.equals(ERROR_REPORTER)) {
263             return fErrorReporter;
264         }
265         else if (propertyId.equals(ERROR_HANDLER)) {
266             return fErrorReporter.getErrorHandler();
267         }
268         else if (propertyId.equals(ENTITY_RESOLVER)) {
269             return fEntityResolver;
270         }
271         else if (propertyId.equals(LOCALE)) {
272             return getLocale();
273         }
274         else if (propertyId.equals(GRAMMAR_POOL)) {
275             return fGrammarPool;
276         }
277         else if (propertyId.equals(DTD_VALIDATOR)) {
278             return fValidator;
279         }
280         throw new XMLConfigurationException(Status.NOT_RECOGNIZED, propertyId);
281     } // getProperty(String):  Object
282 
283     /**
284      * Sets the value of a property. This method is called by the component
285      * manager any time after reset when a property changes value.
286      * <p>
287      * <strong>Note:</strong> Components should silently ignore properties
288      * that do not affect the operation of the component.
289      *
290      * @param propertyId The property identifier.
291      * @param value      The value of the property.
292      *
293      * @throws SAXNotRecognizedException The component should not throw
294      *                                   this exception.
295      * @throws SAXNotSupportedException The component should not throw
296      *                                  this exception.
297      */
setProperty(String propertyId, Object value)298     public void setProperty(String propertyId, Object value)
299             throws XMLConfigurationException {
300         if (propertyId.equals(SYMBOL_TABLE)) {
301             fSymbolTable = (SymbolTable)value;
302             fDTDScanner.setProperty(propertyId, value);
303             fEntityManager.setProperty(propertyId, value);
304         }
305         else if(propertyId.equals(ERROR_REPORTER)) {
306             fErrorReporter = (XMLErrorReporter)value;
307             // Add XML message formatter if there isn't one.
308             if (fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN) == null) {
309                 XMLMessageFormatter xmft = new XMLMessageFormatter();
310                 fErrorReporter.putMessageFormatter(XMLMessageFormatter.XML_DOMAIN, xmft);
311                 fErrorReporter.putMessageFormatter(XMLMessageFormatter.XMLNS_DOMAIN, xmft);
312             }
313             fDTDScanner.setProperty(propertyId, value);
314             fEntityManager.setProperty(propertyId, value);
315         }
316         else if (propertyId.equals(ERROR_HANDLER)) {
317             fErrorReporter.setProperty(propertyId, value);
318         }
319         else if (propertyId.equals(ENTITY_RESOLVER)) {
320             fEntityResolver = (XMLEntityResolver)value;
321             fEntityManager.setProperty(propertyId, value);
322         }
323         else if (propertyId.equals(LOCALE)) {
324             setLocale((Locale) value);
325         }
326         else if(propertyId.equals(GRAMMAR_POOL)) {
327             fGrammarPool = (XMLGrammarPool)value;
328         }
329         else {
330             throw new XMLConfigurationException(Status.NOT_RECOGNIZED, propertyId);
331         }
332     } // setProperty(String,Object)
333 
334     /**
335      * Returns the state of a feature.
336      *
337      * @param featureId The feature identifier.
338      *
339      * @throws XMLConfigurationException Thrown on configuration error.
340      */
getFeature(String featureId)341     public boolean getFeature(String featureId)
342             throws XMLConfigurationException {
343         if (featureId.equals(VALIDATION)) {
344             return fValidation;
345         }
346         else if (featureId.equals(WARN_ON_DUPLICATE_ATTDEF)) {
347             return fWarnDuplicateAttdef;
348         }
349         else if (featureId.equals(WARN_ON_UNDECLARED_ELEMDEF)) {
350             return fWarnOnUndeclaredElemdef;
351         }
352         else if (featureId.equals(NOTIFY_CHAR_REFS)) {
353             return fDTDScanner.getFeature(featureId);
354         }
355         else if (featureId.equals(STANDARD_URI_CONFORMANT_FEATURE)) {
356             return fStrictURI;
357         }
358         else if (featureId.equals(BALANCE_SYNTAX_TREES)) {
359             return fBalanceSyntaxTrees;
360         }
361         throw new XMLConfigurationException(Status.NOT_RECOGNIZED, featureId);
362     } //getFeature(String):  boolean
363 
364     /**
365      * Set the locale to use for messages.
366      *
367      * @param locale The locale object to use for localization of messages.
368      *
369      * @exception XNIException Thrown if the parser does not support the
370      *                         specified locale.
371      */
setLocale(Locale locale)372     public void setLocale(Locale locale) {
373         fLocale = locale;
374         fErrorReporter.setLocale(locale);
375     } // setLocale(Locale)
376 
377     /** Return the Locale the XMLGrammarLoader is using. */
getLocale()378     public Locale getLocale() {
379         return fLocale;
380     } // getLocale():  Locale
381 
382 
383     /**
384      * Sets the error handler.
385      *
386      * @param errorHandler The error handler.
387      */
setErrorHandler(XMLErrorHandler errorHandler)388     public void setErrorHandler(XMLErrorHandler errorHandler) {
389         fErrorReporter.setProperty(ERROR_HANDLER, errorHandler);
390     } // setErrorHandler(XMLErrorHandler)
391 
392     /** Returns the registered error handler.  */
getErrorHandler()393     public XMLErrorHandler getErrorHandler() {
394         return fErrorReporter.getErrorHandler();
395     } // getErrorHandler():  XMLErrorHandler
396 
397     /**
398      * Sets the entity resolver.
399      *
400      * @param entityResolver The new entity resolver.
401      */
setEntityResolver(XMLEntityResolver entityResolver)402     public void setEntityResolver(XMLEntityResolver entityResolver) {
403         fEntityResolver = entityResolver;
404         fEntityManager.setProperty(ENTITY_RESOLVER, entityResolver);
405     } // setEntityResolver(XMLEntityResolver)
406 
407     /** Returns the registered entity resolver.  */
getEntityResolver()408     public XMLEntityResolver getEntityResolver() {
409         return fEntityResolver;
410     } // getEntityResolver():  XMLEntityResolver
411 
412     /**
413      * Returns a Grammar object by parsing the contents of the
414      * entity pointed to by source.
415      *
416      * @param source        the location of the entity which forms
417      *                          the starting point of the grammar to be constructed.
418      * @throws IOException      When a problem is encountered reading the entity
419      *          XNIException    When a condition arises (such as a FatalError) that requires parsing
420      *                              of the entity be terminated.
421      */
loadGrammar(XMLInputSource source)422     public Grammar loadGrammar(XMLInputSource source)
423             throws IOException, XNIException {
424         reset();
425         // First chance checking strict URI
426         String eid = XMLEntityManager.expandSystemId(source.getSystemId(), source.getBaseSystemId(), fStrictURI);
427         XMLDTDDescription desc = new XMLDTDDescription(source.getPublicId(), source.getSystemId(), source.getBaseSystemId(), eid, null);
428         if (!fBalanceSyntaxTrees) {
429             fDTDGrammar = new DTDGrammar(fSymbolTable, desc);
430         }
431         else {
432             fDTDGrammar = new BalancedDTDGrammar(fSymbolTable, desc);
433         }
434         fGrammarBucket = new DTDGrammarBucket();
435         fGrammarBucket.setStandalone(false);
436         fGrammarBucket.setActiveGrammar(fDTDGrammar);
437         // no reason to use grammar bucket's "put" method--we
438         // know which grammar it is, and we don't know the root name anyway...
439 
440         // actually start the parsing!
441         try {
442             fDTDScanner.setInputSource(source);
443             fDTDScanner.scanDTDExternalSubset(true);
444         } catch (EOFException e) {
445             // expected behaviour...
446         }
447         finally {
448             // Close all streams opened by the parser.
449             fEntityManager.closeReaders();
450         }
451         if(fDTDGrammar != null && fGrammarPool != null) {
452             fGrammarPool.cacheGrammars(XMLDTDDescription.XML_DTD, new Grammar[] {fDTDGrammar});
453         }
454         return fDTDGrammar;
455     } // loadGrammar(XMLInputSource):  Grammar
456 
457     /**
458      * Parse a DTD internal and/or external subset and insert the content
459      * into the existing DTD grammar owned by the given DTDValidator.
460      */
loadGrammarWithContext(XMLDTDValidator validator, String rootName, String publicId, String systemId, String baseSystemId, String internalSubset)461     public void loadGrammarWithContext(XMLDTDValidator validator, String rootName,
462             String publicId, String systemId, String baseSystemId, String internalSubset)
463         throws IOException, XNIException {
464         final DTDGrammarBucket grammarBucket = validator.getGrammarBucket();
465         final DTDGrammar activeGrammar = grammarBucket.getActiveGrammar();
466         if (activeGrammar != null && !activeGrammar.isImmutable()) {
467             fGrammarBucket = grammarBucket;
468             fEntityManager.setScannerVersion(getScannerVersion());
469             reset();
470             try {
471                 // process internal subset
472                 if (internalSubset != null) {
473                     // To get the DTD scanner to end at the right place we have to fool
474                     // it into thinking that it reached the end of the internal subset
475                     // in a real document.
476                     StringBuffer buffer = new StringBuffer(internalSubset.length() + 2);
477                     buffer.append(internalSubset).append("]>");
478                     XMLInputSource is = new XMLInputSource(null, baseSystemId,
479                             null, new StringReader(buffer.toString()), null);
480                     fEntityManager.startDocumentEntity(is);
481                     fDTDScanner.scanDTDInternalSubset(true, false, systemId != null);
482                 }
483                 // process external subset
484                 if (systemId != null) {
485                     XMLDTDDescription desc = new XMLDTDDescription(publicId, systemId, baseSystemId, null, rootName);
486                     XMLInputSource source = fEntityManager.resolveEntity(desc);
487                     fDTDScanner.setInputSource(source);
488                     fDTDScanner.scanDTDExternalSubset(true);
489                 }
490             }
491             catch (EOFException e) {
492                 // expected behaviour...
493             }
494             finally {
495                 // Close all streams opened by the parser.
496                 fEntityManager.closeReaders();
497             }
498         }
499     } // loadGrammarWithContext(XMLDTDValidator, String, String, String, String, String)
500 
501     // reset all the components that we rely upon
reset()502     protected void reset() {
503         super.reset();
504         fDTDScanner.reset();
505         fEntityManager.reset();
506         fErrorReporter.setDocumentLocator(fEntityManager.getEntityScanner());
507     }
508 
createDTDScanner(SymbolTable symbolTable, XMLErrorReporter errorReporter, XMLEntityManager entityManager)509     protected XMLDTDScannerImpl createDTDScanner(SymbolTable symbolTable,
510             XMLErrorReporter errorReporter, XMLEntityManager entityManager) {
511         return new XMLDTDScannerImpl(symbolTable, errorReporter, entityManager);
512     } // createDTDScanner(SymbolTable, XMLErrorReporter, XMLEntityManager) : XMLDTDScannerImpl
513 
getScannerVersion()514     protected short getScannerVersion() {
515         return Constants.XML_VERSION_1_0;
516     } // getScannerVersion() : short
517 
518 } // class XMLDTDLoader
519