1 /*
2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3  */
4 /*
5  * Licensed to the Apache Software Foundation (ASF) under one or more
6  * contributor license agreements.  See the NOTICE file distributed with
7  * this work for additional information regarding copyright ownership.
8  * The ASF licenses this file to You under the Apache License, Version 2.0
9  * (the "License"); you may not use this file except in compliance with
10  * the License.  You may obtain a copy of the License at
11  *
12  *     http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  */
20 
21 package com.sun.org.apache.xerces.internal.impl;
22 
23 import com.sun.org.apache.xerces.internal.util.Status;
24 import com.sun.xml.internal.stream.XMLEntityStorage;
25 import java.io.IOException;
26 import java.util.ArrayList;
27 import javax.xml.stream.events.XMLEvent;
28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
29 import com.sun.org.apache.xerces.internal.util.SymbolTable;
30 import com.sun.org.apache.xerces.internal.util.XMLChar;
31 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl;
32 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
33 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer;
34 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager;
35 import com.sun.org.apache.xerces.internal.xni.Augmentations;
36 import com.sun.org.apache.xerces.internal.xni.XMLAttributes;
37 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
38 import com.sun.org.apache.xerces.internal.xni.XMLString;
39 import com.sun.org.apache.xerces.internal.xni.XNIException;
40 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent;
41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
42 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
43 import com.sun.xml.internal.stream.Entity;
44 
45 //import com.sun.xml.stream.XMLEntityManager;
46 //import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter;
47 
48 /**
49  * This class is responsible for holding scanning methods common to
50  * scanning the XML document structure and content as well as the DTD
51  * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
52  * from this base class.
53  *
54  * <p>
55  * This component requires the following features and properties from the
56  * component manager that uses it:
57  * <ul>
58  *  <li>http://xml.org/sax/features/validation</li>
59  *  <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
60  *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
61  *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
62  *  <li>http://apache.org/xml/properties/internal/entity-manager</li>
63  * </ul>
64  *
65  * @author Andy Clark, IBM
66  * @author Arnaud  Le Hors, IBM
67  * @author Eric Ye, IBM
68  * @author K.Venugopal SUN Microsystems
69  * @author Sunitha Reddy, SUN Microsystems
70  * @LastModified: Feb 2020
71  */
72 public abstract class XMLScanner
73         implements XMLComponent {
74 
75     //
76     // Constants
77     //
78 
79     // feature identifiers
80 
81     /** Feature identifier: namespaces. */
82     protected static final String NAMESPACES =
83             Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
84 
85     /** Feature identifier: validation. */
86     protected static final String VALIDATION =
87             Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE;
88 
89     /** Feature identifier: notify character references. */
90     protected static final String NOTIFY_CHAR_REFS =
91             Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE;
92 
93     // property identifiers
94 
95     protected static final String PARSER_SETTINGS =
96                                 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;
97     /** Property identifier: symbol table. */
98     protected static final String SYMBOL_TABLE =
99             Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
100 
101     /** Property identifier: error reporter. */
102     protected static final String ERROR_REPORTER =
103             Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
104 
105     /** Property identifier: entity manager. */
106     protected static final String ENTITY_MANAGER =
107             Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;
108 
109     /** Property identifier: Security manager. */
110     private static final String SECURITY_MANAGER = Constants.SECURITY_MANAGER;
111 
112     // debugging
113 
114     /** Debug attribute normalization. */
115     protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
116 
117     /**
118      * Type of names
119      */
120     public static enum NameType {
121         ATTRIBUTE("attribute"),
122         ATTRIBUTENAME("attribute name"),
123         COMMENT("comment"),
124         DOCTYPE("doctype"),
125         ELEMENTSTART("startelement"),
126         ELEMENTEND("endelement"),
127         ENTITY("entity"),
128         NOTATION("notation"),
129         PI("pi"),
130         REFERENCE("reference");
131 
132         final String literal;
NameType(String literal)133         NameType(String literal) {
134             this.literal = literal;
135         }
136 
literal()137         String literal() {
138             return literal;
139         }
140     }
141 
142     //xxx: setting the default value as false, as we dont need to calculate this value
143     //we should have a feature when set to true computes this value
144     private boolean fNeedNonNormalizedValue = false;
145 
146     protected ArrayList<XMLString> attributeValueCache = new ArrayList<>();
147     protected ArrayList<XMLStringBuffer> stringBufferCache = new ArrayList<>();
148     protected int fStringBufferIndex = 0;
149     protected boolean fAttributeCacheInitDone = false;
150     protected int fAttributeCacheUsedCount = 0;
151 
152     //
153     // Data
154     //
155 
156     // features
157 
158     /**
159      * Validation. This feature identifier is:
160      * http://xml.org/sax/features/validation
161      */
162     protected boolean fValidation = false;
163 
164     /** Namespaces. */
165     protected boolean fNamespaces;
166 
167     /** Character references notification. */
168     protected boolean fNotifyCharRefs = false;
169 
170     /** Internal parser-settings feature */
171     protected boolean fParserSettings = true;
172 
173     // properties
174 
175     protected PropertyManager fPropertyManager = null ;
176     /** Symbol table. */
177     protected SymbolTable fSymbolTable;
178 
179     /** Error reporter. */
180     protected XMLErrorReporter fErrorReporter;
181 
182     /** Entity manager. */
183     //protected XMLEntityManager fEntityManager = PropertyManager.getEntityManager();
184     protected XMLEntityManager fEntityManager = null ;
185 
186     /** xxx this should be available from EntityManager Entity storage */
187     protected XMLEntityStorage fEntityStore = null ;
188 
189     /** Security manager. */
190     protected XMLSecurityManager fSecurityManager = null;
191 
192     /** Limit analyzer. */
193     protected XMLLimitAnalyzer fLimitAnalyzer = null;
194 
195     // protected data
196 
197     /** event type */
198     protected XMLEvent fEvent ;
199 
200     /** Entity scanner, this always works on last entity that was opened. */
201     protected XMLEntityScanner fEntityScanner = null;
202 
203     /** Entity depth. */
204     protected int fEntityDepth;
205 
206     /** Literal value of the last character reference scanned. */
207     protected String fCharRefLiteral = null;
208 
209     /** Scanning attribute. */
210     protected boolean fScanningAttribute;
211 
212     /** Report entity boundary. */
213     protected boolean fReportEntity;
214 
215     // symbols
216 
217     /** Symbol: "version". */
218     protected final static String fVersionSymbol = "version".intern();
219 
220     /** Symbol: "encoding". */
221     protected final static String fEncodingSymbol = "encoding".intern();
222 
223     /** Symbol: "standalone". */
224     protected final static String fStandaloneSymbol = "standalone".intern();
225 
226     /** Symbol: "amp". */
227     protected final static String fAmpSymbol = "amp".intern();
228 
229     /** Symbol: "lt". */
230     protected final static String fLtSymbol = "lt".intern();
231 
232     /** Symbol: "gt". */
233     protected final static String fGtSymbol = "gt".intern();
234 
235     /** Symbol: "quot". */
236     protected final static String fQuotSymbol = "quot".intern();
237 
238     /** Symbol: "apos". */
239     protected final static String fAposSymbol = "apos".intern();
240 
241     // temporary variables
242 
243     // NOTE: These objects are private to help prevent accidental modification
244     //       of values by a subclass. If there were protected *and* the sub-
245     //       modified the values, it would be difficult to track down the real
246     //       cause of the bug. By making these private, we avoid this
247     //       possibility.
248 
249     /** String. */
250     private XMLString fString = new XMLString();
251 
252     /** String buffer. */
253     private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
254 
255     /** String buffer. */
256     private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
257 
258     /** String buffer. */
259     private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
260 
261     // temporary location for Resource identification information.
262     protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
263     int initialCacheCount = 6;
264     //
265     // XMLComponent methods
266     //
267 
268     /**
269      *
270      *
271      * @param componentManager The component manager.
272      *
273      * @throws SAXException Throws exception if required features and
274      *                      properties cannot be found.
275      */
reset(XMLComponentManager componentManager)276     public void reset(XMLComponentManager componentManager)
277     throws XMLConfigurationException {
278 
279                 fParserSettings = componentManager.getFeature(PARSER_SETTINGS, true);
280 
281                 if (!fParserSettings) {
282                         // parser settings have not been changed
283                         init();
284                         return;
285                 }
286 
287 
288         // Xerces properties
289         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
290         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
291         fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER);
292         fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER);
293 
294         //this step is extra because we have separated the storage of entity
295         fEntityStore = fEntityManager.getEntityStore() ;
296 
297         // sax features
298         fValidation = componentManager.getFeature(VALIDATION, false);
299         fNamespaces = componentManager.getFeature(NAMESPACES, true);
300         fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS, false);
301 
302         init();
303     } // reset(XMLComponentManager)
304 
setPropertyManager(PropertyManager propertyManager)305     protected void setPropertyManager(PropertyManager propertyManager){
306         fPropertyManager = propertyManager ;
307     }
308 
309     /**
310      * Sets the value of a property during parsing.
311      *
312      * @param propertyId
313      * @param value
314      */
setProperty(String propertyId, Object value)315     public void setProperty(String propertyId, Object value)
316     throws XMLConfigurationException {
317 
318         // Xerces properties
319         if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
320             String property =
321                     propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length());
322             if (property.equals(Constants.SYMBOL_TABLE_PROPERTY)) {
323                 fSymbolTable = (SymbolTable)value;
324             } else if (property.equals(Constants.ERROR_REPORTER_PROPERTY)) {
325                 fErrorReporter = (XMLErrorReporter)value;
326             } else if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) {
327                 fEntityManager = (XMLEntityManager)value;
328             }
329         }
330 
331         if (propertyId.equals(SECURITY_MANAGER)) {
332             fSecurityManager = (XMLSecurityManager)value;
333         }
334                 /*else if(propertyId.equals(Constants.STAX_PROPERTIES)){
335             fStaxProperties = (HashMap)value;
336             //TODO::discuss with neeraj what are his thoughts on passing properties.
337             //For now use this
338         }*/
339 
340     } // setProperty(String,Object)
341 
342     /*
343      * Sets the feature of the scanner.
344      */
setFeature(String featureId, boolean value)345     public void setFeature(String featureId, boolean value)
346     throws XMLConfigurationException {
347 
348         if (VALIDATION.equals(featureId)) {
349             fValidation = value;
350         } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
351             fNotifyCharRefs = value;
352         }
353     }
354 
355     /*
356      * Gets the state of the feature of the scanner.
357      */
getFeature(String featureId)358     public boolean getFeature(String featureId)
359     throws XMLConfigurationException {
360 
361         if (VALIDATION.equals(featureId)) {
362             return fValidation;
363         } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
364             return fNotifyCharRefs;
365         }
366         throw new XMLConfigurationException(Status.NOT_RECOGNIZED, featureId);
367     }
368 
369     //
370     // Protected methods
371     //
372 
373     // anybody calling this had better have set Symtoltable!
reset()374     protected void reset() {
375         init();
376 
377         // DTD preparsing defaults:
378         fValidation = true;
379         fNotifyCharRefs = false;
380 
381     }
382 
reset(PropertyManager propertyManager)383     public void reset(PropertyManager propertyManager) {
384         init();
385         // Xerces properties
386         fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY);
387 
388         fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY);
389 
390         fEntityManager = (XMLEntityManager)propertyManager.getProperty(ENTITY_MANAGER);
391         fEntityStore = fEntityManager.getEntityStore() ;
392         fEntityScanner = fEntityManager.getEntityScanner() ;
393         fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER);
394 
395         //fEntityManager.reset();
396         // DTD preparsing defaults:
397         fValidation = false;
398         fNotifyCharRefs = false;
399 
400     }
401     // common scanning methods
402 
403     /**
404      * Scans an XML or text declaration.
405      * <p>
406      * <pre>
407      * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
408      * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
409      * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
410      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
411      * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
412      *                 | ('"' ('yes' | 'no') '"'))
413      *
414      * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
415      * </pre>
416      *
417      * @param scanningTextDecl True if a text declaration is to
418      *                         be scanned instead of an XML
419      *                         declaration.
420      * @param pseudoAttributeValues An array of size 3 to return the version,
421      *                         encoding and standalone pseudo attribute values
422      *                         (in that order).
423      *
424      * <strong>Note:</strong> This method uses fString, anything in it
425      * at the time of calling is lost.
426      */
scanXMLDeclOrTextDecl(boolean scanningTextDecl, String[] pseudoAttributeValues)427     protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
428             String[] pseudoAttributeValues)
429             throws IOException, XNIException {
430 
431         // pseudo-attribute values
432         String version = null;
433         String encoding = null;
434         String standalone = null;
435 
436         // scan pseudo-attributes
437         final int STATE_VERSION = 0;
438         final int STATE_ENCODING = 1;
439         final int STATE_STANDALONE = 2;
440         final int STATE_DONE = 3;
441         int state = STATE_VERSION;
442 
443         boolean dataFoundForTarget = false;
444         boolean sawSpace = fEntityScanner.skipSpaces();
445         // since pseudoattributes are *not* attributes,
446         // their quotes don't need to be preserved in external parameter entities.
447         // the XMLEntityScanner#scanLiteral method will continue to
448         // emit -1 in such cases when it finds a quote; this is
449         // fine for other methods that parse scanned entities,
450         // but not for the scanning of pseudoattributes.  So,
451         // temporarily, we must mark the current entity as not being "literal"
452         Entity.ScannedEntity currEnt = fEntityManager.getCurrentEntity();
453         boolean currLiteral = currEnt.literal;
454         currEnt.literal = false;
455         while (fEntityScanner.peekChar() != '?') {
456             dataFoundForTarget = true;
457             String name = scanPseudoAttribute(scanningTextDecl, fString);
458             switch (state) {
459                 case STATE_VERSION: {
460                     if (name.equals(fVersionSymbol)) {
461                         if (!sawSpace) {
462                             reportFatalError(scanningTextDecl
463                                     ? "SpaceRequiredBeforeVersionInTextDecl"
464                                     : "SpaceRequiredBeforeVersionInXMLDecl",
465                                     null);
466                         }
467                         version = fString.toString();
468                         state = STATE_ENCODING;
469                         if (!versionSupported(version)) {
470                             reportFatalError("VersionNotSupported",
471                                     new Object[]{version});
472                         }
473 
474                         if (version.equals("1.1")) {
475                             Entity.ScannedEntity top = fEntityManager.getTopLevelEntity();
476                             if (top != null && (top.version == null || top.version.equals("1.0"))) {
477                                 reportFatalError("VersionMismatch", null);
478                             }
479                             fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1);
480                         }
481 
482                     } else if (name.equals(fEncodingSymbol)) {
483                         if (!scanningTextDecl) {
484                             reportFatalError("VersionInfoRequired", null);
485                         }
486                         if (!sawSpace) {
487                             reportFatalError(scanningTextDecl
488                                     ? "SpaceRequiredBeforeEncodingInTextDecl"
489                                     : "SpaceRequiredBeforeEncodingInXMLDecl",
490                                     null);
491                         }
492                         encoding = fString.toString();
493                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
494                     } else {
495                         if (scanningTextDecl) {
496                             reportFatalError("EncodingDeclRequired", null);
497                         } else {
498                             reportFatalError("VersionInfoRequired", null);
499                         }
500                     }
501                     break;
502                 }
503                 case STATE_ENCODING: {
504                     if (name.equals(fEncodingSymbol)) {
505                         if (!sawSpace) {
506                             reportFatalError(scanningTextDecl
507                                     ? "SpaceRequiredBeforeEncodingInTextDecl"
508                                     : "SpaceRequiredBeforeEncodingInXMLDecl",
509                                     null);
510                         }
511                         encoding = fString.toString();
512                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
513                         // TODO: check encoding name; set encoding on
514                         //       entity scanner
515                     } else if (!scanningTextDecl && name.equals(fStandaloneSymbol)) {
516                         if (!sawSpace) {
517                             reportFatalError("SpaceRequiredBeforeStandalone",
518                                     null);
519                         }
520                         standalone = fString.toString();
521                         state = STATE_DONE;
522                         if (!standalone.equals("yes") && !standalone.equals("no")) {
523                             reportFatalError("SDDeclInvalid", new Object[] {standalone});
524                         }
525                     } else {
526                         reportFatalError("EncodingDeclRequired", null);
527                     }
528                     break;
529                 }
530                 case STATE_STANDALONE: {
531                     if (name.equals(fStandaloneSymbol)) {
532                         if (!sawSpace) {
533                             reportFatalError("SpaceRequiredBeforeStandalone",
534                                     null);
535                         }
536                         standalone = fString.toString();
537                         state = STATE_DONE;
538                         if (!standalone.equals("yes") && !standalone.equals("no")) {
539                             reportFatalError("SDDeclInvalid",  new Object[] {standalone});
540                         }
541                     } else {
542                         reportFatalError("SDDeclNameInvalid", null);
543                     }
544                     break;
545                 }
546                 default: {
547                     reportFatalError("NoMorePseudoAttributes", null);
548                 }
549             }
550             sawSpace = fEntityScanner.skipSpaces();
551         }
552         // restore original literal value
553         if(currLiteral) {
554             currEnt.literal = true;
555         }
556         // REVISIT: should we remove this error reporting?
557         if (scanningTextDecl && state != STATE_DONE) {
558             reportFatalError("MorePseudoAttributes", null);
559         }
560 
561         // If there is no data in the xml or text decl then we fail to report error
562         // for version or encoding info above.
563         if (scanningTextDecl) {
564             if (!dataFoundForTarget && encoding == null) {
565                 reportFatalError("EncodingDeclRequired", null);
566             }
567         } else {
568             if (!dataFoundForTarget && version == null) {
569                 reportFatalError("VersionInfoRequired", null);
570             }
571         }
572 
573         // end
574         if (!fEntityScanner.skipChar('?', null)) {
575             reportFatalError("XMLDeclUnterminated", null);
576         }
577         if (!fEntityScanner.skipChar('>', null)) {
578             reportFatalError("XMLDeclUnterminated", null);
579 
580         }
581 
582         // fill in return array
583         pseudoAttributeValues[0] = version;
584         pseudoAttributeValues[1] = encoding;
585         pseudoAttributeValues[2] = standalone;
586 
587     } // scanXMLDeclOrTextDecl(boolean)
588 
589     /**
590      * Scans a pseudo attribute.
591      *
592      * @param scanningTextDecl True if scanning this pseudo-attribute for a
593      *                         TextDecl; false if scanning XMLDecl. This
594      *                         flag is needed to report the correct type of
595      *                         error.
596      * @param value            The string to fill in with the attribute
597      *                         value.
598      *
599      * @return The name of the attribute
600      *
601      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
602      * at the time of calling is lost.
603      */
scanPseudoAttribute(boolean scanningTextDecl, XMLString value)604     protected String scanPseudoAttribute(boolean scanningTextDecl,
605             XMLString value)
606             throws IOException, XNIException {
607 
608         String name = scanPseudoAttributeName();
609         // XMLEntityManager.print(fEntityManager.getCurrentEntity());
610 
611         if (name == null) {
612             reportFatalError("PseudoAttrNameExpected", null);
613         }
614         fEntityScanner.skipSpaces();
615         if (!fEntityScanner.skipChar('=', null)) {
616             reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
617                     : "EqRequiredInXMLDecl", new Object[]{name});
618         }
619         fEntityScanner.skipSpaces();
620         int quote = fEntityScanner.peekChar();
621         if (quote != '\'' && quote != '"') {
622             reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
623                     : "QuoteRequiredInXMLDecl" , new Object[]{name});
624         }
625         fEntityScanner.scanChar(NameType.ATTRIBUTE);
626         int c = fEntityScanner.scanLiteral(quote, value, false);
627         if (c != quote) {
628             fStringBuffer2.clear();
629             do {
630                 fStringBuffer2.append(value);
631                 if (c != -1) {
632                     if (c == '&' || c == '%' || c == '<' || c == ']') {
633                         fStringBuffer2.append((char)fEntityScanner.scanChar(NameType.ATTRIBUTE));
634                     } else if (XMLChar.isHighSurrogate(c)) {
635                         scanSurrogates(fStringBuffer2);
636                     } else if (isInvalidLiteral(c)) {
637                         String key = scanningTextDecl
638                                 ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
639                         reportFatalError(key,
640                                 new Object[] {Integer.toString(c, 16)});
641                                 fEntityScanner.scanChar(null);
642                     }
643                 }
644                 c = fEntityScanner.scanLiteral(quote, value, false);
645             } while (c != quote);
646             fStringBuffer2.append(value);
647             value.setValues(fStringBuffer2);
648         }
649         if (!fEntityScanner.skipChar(quote, null)) {
650             reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl"
651                     : "CloseQuoteMissingInXMLDecl",
652                     new Object[]{name});
653         }
654 
655         // return
656         return name;
657 
658     } // scanPseudoAttribute(XMLString):String
659 
660     /**
661      * Scans the name of a pseudo attribute. The only legal names
662      * in XML 1.0/1.1 documents are 'version', 'encoding' and 'standalone'.
663      *
664      * @return the name of the pseudo attribute or <code>null</code>
665      * if a legal pseudo attribute name could not be scanned.
666      */
scanPseudoAttributeName()667     private String scanPseudoAttributeName() throws IOException, XNIException {
668         final int ch = fEntityScanner.peekChar();
669         switch (ch) {
670             case 'v':
671                 if (fEntityScanner.skipString(fVersionSymbol)) {
672                     return fVersionSymbol;
673                 }
674                 break;
675             case 'e':
676                 if (fEntityScanner.skipString(fEncodingSymbol)) {
677                     return fEncodingSymbol;
678                 }
679                 break;
680             case 's':
681                 if (fEntityScanner.skipString(fStandaloneSymbol)) {
682                     return fStandaloneSymbol;
683                 }
684                 break;
685         }
686         return null;
687     } // scanPseudoAttributeName()
688 
689     /**
690      * Scans a processing instruction.
691      * <p>
692      * <pre>
693      * [16] PI ::= '&lt;?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
694      * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
695      * </pre>
696      */
697     //CHANGED:
698     //EARLIER: scanPI()
699     //NOW: scanPI(XMLStringBuffer)
700     //it makes things more easy if XMLStringBUffer is passed. Motivation for this change is same
701     // as that for scanContent()
702 
scanPI(XMLStringBuffer data)703     protected void scanPI(XMLStringBuffer data) throws IOException, XNIException {
704 
705         // target
706         fReportEntity = false;
707         String target = fEntityScanner.scanName(NameType.PI);
708         if (target == null) {
709             reportFatalError("PITargetRequired", null);
710         }
711 
712         // scan data
713         scanPIData(target, data);
714         fReportEntity = true;
715 
716     } // scanPI(XMLStringBuffer)
717 
718     /**
719      * Scans a processing data. This is needed to handle the situation
720      * where a document starts with a processing instruction whose
721      * target name <em>starts with</em> "xml". (e.g. xmlfoo)
722      *
723      * This method would always read the whole data. We have while loop and data is buffered
724      * until delimeter is encountered.
725      *
726      * @param target The PI target
727      * @param data The string to fill in with the data
728      */
729 
730     //CHANGED:
731     //Earlier:This method uses the fStringBuffer and later buffer values are set to
732     //the supplied XMLString....
733     //Now: Changed the signature of this function to pass XMLStringBuffer.. and data would
734     //be appended to that buffer
735 
scanPIData(String target, XMLStringBuffer data)736     protected void scanPIData(String target, XMLStringBuffer data)
737     throws IOException, XNIException {
738 
739         // check target
740         if (target.length() == 3) {
741             char c0 = Character.toLowerCase(target.charAt(0));
742             char c1 = Character.toLowerCase(target.charAt(1));
743             char c2 = Character.toLowerCase(target.charAt(2));
744             if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
745                 reportFatalError("ReservedPITarget", null);
746             }
747         }
748 
749         // spaces
750         if (!fEntityScanner.skipSpaces()) {
751             if (fEntityScanner.skipString("?>")) {
752                 // we found the end, there is no data just return
753                 return;
754             } else {
755                 // if there is data there should be some space
756                 reportFatalError("SpaceRequiredInPI", null);
757             }
758         }
759 
760         // since scanData appends the parsed data to the buffer passed
761         // a while loop would append the whole of parsed data to the buffer(data:XMLStringBuffer)
762         //until all of the data is buffered.
763         if (fEntityScanner.scanData("?>", data, 0)) {
764             do {
765                 int c = fEntityScanner.peekChar();
766                 if (c != -1) {
767                     if (XMLChar.isHighSurrogate(c)) {
768                         scanSurrogates(data);
769                     } else if (isInvalidLiteral(c)) {
770                         reportFatalError("InvalidCharInPI",
771                                 new Object[]{Integer.toHexString(c)});
772                                 fEntityScanner.scanChar(null);
773                     }
774                 }
775             } while (fEntityScanner.scanData("?>", data, 0));
776         }
777 
778     } // scanPIData(String,XMLString)
779 
780     /**
781      * Scans a comment.
782      * <p>
783      * <pre>
784      * [15] Comment ::= '&lt!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
785      * </pre>
786      * <p>
787      * <strong>Note:</strong> Called after scanning past '&lt;!--'
788      * <strong>Note:</strong> This method uses fString, anything in it
789      * at the time of calling is lost.
790      *
791      * @param text The buffer to fill in with the text.
792      */
scanComment(XMLStringBuffer text)793     protected void scanComment(XMLStringBuffer text)
794     throws IOException, XNIException {
795 
796         //System.out.println( "XMLScanner#scanComment# In Scan Comment" );
797         // text
798         // REVISIT: handle invalid character, eof
799         text.clear();
800         while (fEntityScanner.scanData("--", text, 0)) {
801             int c = fEntityScanner.peekChar();
802 
803             //System.out.println( "XMLScanner#scanComment#text.toString() == " + text.toString() );
804             //System.out.println( "XMLScanner#scanComment#c == " + c );
805 
806             if (c != -1) {
807                 if (XMLChar.isHighSurrogate(c)) {
808                     scanSurrogates(text);
809                 }
810                 else if (isInvalidLiteral(c)) {
811                     reportFatalError("InvalidCharInComment",
812                             new Object[] { Integer.toHexString(c) });
813                             fEntityScanner.scanChar(NameType.COMMENT);
814                 }
815             }
816         }
817         if (!fEntityScanner.skipChar('>', NameType.COMMENT)) {
818             reportFatalError("DashDashInComment", null);
819         }
820 
821     } // scanComment()
822 
823     /**
824      * Scans an attribute value and normalizes whitespace converting all
825      * whitespace characters to space characters.
826      *
827      * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
828      *
829      * @param value The XMLString to fill in with the value.
830      * @param nonNormalizedValue The XMLString to fill in with the
831      *                           non-normalized value.
832      * @param atName The name of the attribute being parsed (for error msgs).
833      * @param attributes The attributes list for the scanned attribute.
834      * @param attrIndex The index of the attribute to use from the list.
835      * @param checkEntities true if undeclared entities should be reported as VC violation,
836      *                      false if undeclared entities should be reported as WFC violation.
837      * @param eleName The name of element to which this attribute belongs.
838      * @param isNSURI a flag indicating whether the content is a Namespace URI
839      *
840      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
841      * at the time of calling is lost.
842      **/
scanAttributeValue(XMLString value, XMLString nonNormalizedValue, String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities, String eleName, boolean isNSURI)843     protected void scanAttributeValue(XMLString value, XMLString nonNormalizedValue,
844             String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities,
845             String eleName, boolean isNSURI)
846             throws IOException, XNIException {
847         XMLStringBuffer stringBuffer = null;
848         // quote
849         int quote = fEntityScanner.peekChar();
850         if (quote != '\'' && quote != '"') {
851             reportFatalError("OpenQuoteExpected", new Object[]{eleName, atName});
852         }
853 
854         fEntityScanner.scanChar(NameType.ATTRIBUTE);
855         int entityDepth = fEntityDepth;
856 
857         int c = fEntityScanner.scanLiteral(quote, value, isNSURI);
858         if (DEBUG_ATTR_NORMALIZATION) {
859             System.out.println("** scanLiteral -> \""
860                     + value.toString() + "\"");
861         }
862         if(fNeedNonNormalizedValue){
863             fStringBuffer2.clear();
864             fStringBuffer2.append(value);
865         }
866         if(fEntityScanner.whiteSpaceLen > 0)
867             normalizeWhitespace(value);
868         if (DEBUG_ATTR_NORMALIZATION) {
869             System.out.println("** normalizeWhitespace -> \""
870                     + value.toString() + "\"");
871         }
872         if (c != quote) {
873             fScanningAttribute = true;
874             stringBuffer = getStringBuffer();
875             stringBuffer.clear();
876             do {
877                 stringBuffer.append(value);
878                 if (DEBUG_ATTR_NORMALIZATION) {
879                     System.out.println("** value2: \""
880                             + stringBuffer.toString() + "\"");
881                 }
882                 if (c == '&') {
883                     fEntityScanner.skipChar('&', NameType.REFERENCE);
884                     if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) {
885                         fStringBuffer2.append('&');
886                     }
887                     if (fEntityScanner.skipChar('#', NameType.REFERENCE)) {
888                         if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) {
889                             fStringBuffer2.append('#');
890                         }
891                         int ch ;
892                         if (fNeedNonNormalizedValue)
893                             ch = scanCharReferenceValue(stringBuffer, fStringBuffer2);
894                         else
895                             ch = scanCharReferenceValue(stringBuffer, null);
896 
897                         if (ch != -1) {
898                             if (DEBUG_ATTR_NORMALIZATION) {
899                                 System.out.println("** value3: \""
900                                         + stringBuffer.toString()
901                                         + "\"");
902                             }
903                         }
904                     } else {
905                         String entityName = fEntityScanner.scanName(NameType.ENTITY);
906                         if (entityName == null) {
907                             reportFatalError("NameRequiredInReference", null);
908                         } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
909                             fStringBuffer2.append(entityName);
910                         }
911                         if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) {
912                             reportFatalError("SemicolonRequiredInReference",
913                                     new Object []{entityName});
914                         } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
915                             fStringBuffer2.append(';');
916                         }
917                         if (resolveCharacter(entityName, stringBuffer)) {
918                             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
919                         } else {
920                             if (fEntityStore.isExternalEntity(entityName)) {
921                                 reportFatalError("ReferenceToExternalEntity",
922                                         new Object[] { entityName });
923                             } else {
924                                 if (!fEntityStore.isDeclaredEntity(entityName)) {
925                                     //WFC & VC: Entity Declared
926                                     if (checkEntities) {
927                                         if (fValidation) {
928                                             fErrorReporter.reportError(fEntityScanner,XMLMessageFormatter.XML_DOMAIN,
929                                                     "EntityNotDeclared",
930                                                     new Object[]{entityName},
931                                                     XMLErrorReporter.SEVERITY_ERROR);
932                                         }
933                                     } else {
934                                         reportFatalError("EntityNotDeclared",
935                                                 new Object[]{entityName});
936                                     }
937                                 }
938                                 fEntityManager.startEntity(true, entityName, true);
939                             }
940                         }
941                     }
942                 } else if (c == '<') {
943                     reportFatalError("LessthanInAttValue",
944                             new Object[] { eleName, atName });
945                             fEntityScanner.scanChar(null);
946                             if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
947                                 fStringBuffer2.append((char)c);
948                             }
949                 } else if (c == '%' || c == ']') {
950                     fEntityScanner.scanChar(null);
951                     stringBuffer.append((char)c);
952                     if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
953                         fStringBuffer2.append((char)c);
954                     }
955                     if (DEBUG_ATTR_NORMALIZATION) {
956                         System.out.println("** valueF: \""
957                                 + stringBuffer.toString() + "\"");
958                     }
959                 } else if (c == '\n' || c == '\r') {
960                     fEntityScanner.scanChar(null);
961                     stringBuffer.append(' ');
962                     if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
963                         fStringBuffer2.append('\n');
964                     }
965                 } else if (c != -1 && XMLChar.isHighSurrogate(c)) {
966                     fStringBuffer3.clear();
967                     if (scanSurrogates(fStringBuffer3)) {
968                         stringBuffer.append(fStringBuffer3);
969                         if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
970                             fStringBuffer2.append(fStringBuffer3);
971                         }
972                         if (DEBUG_ATTR_NORMALIZATION) {
973                             System.out.println("** valueI: \""
974                                     + stringBuffer.toString()
975                                     + "\"");
976                         }
977                     }
978                 } else if (c != -1 && isInvalidLiteral(c)) {
979                     reportFatalError("InvalidCharInAttValue",
980                             new Object[] {eleName, atName, Integer.toString(c, 16)});
981                             fEntityScanner.scanChar(null);
982                             if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
983                                 fStringBuffer2.append((char)c);
984                             }
985                 }
986                 c = fEntityScanner.scanLiteral(quote, value, isNSURI);
987                 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
988                     fStringBuffer2.append(value);
989                 }
990                 if(fEntityScanner.whiteSpaceLen > 0)
991                     normalizeWhitespace(value);
992                 //Todo ::Move this check  to Attributes , do conversion
993                 //only if attribute is being accessed. -Venu
994             } while (c != quote || entityDepth != fEntityDepth);
995             stringBuffer.append(value);
996             if (DEBUG_ATTR_NORMALIZATION) {
997                 System.out.println("** valueN: \""
998                         + stringBuffer.toString() + "\"");
999             }
1000             value.setValues(stringBuffer);
1001             fScanningAttribute = false;
1002         }
1003         if(fNeedNonNormalizedValue)
1004             nonNormalizedValue.setValues(fStringBuffer2);
1005 
1006         // quote
1007         int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE);
1008         if (cquote != quote) {
1009             reportFatalError("CloseQuoteExpected", new Object[]{eleName, atName});
1010         }
1011     } // scanAttributeValue()
1012 
1013 
1014     /**
1015      * Resolves character entity references.
1016      * @param entityName the name of the entity
1017      * @param stringBuffer the current XMLStringBuffer to append the character to.
1018      * @return true if resolved, false otherwise
1019      */
resolveCharacter(String entityName, XMLStringBuffer stringBuffer)1020     protected boolean resolveCharacter(String entityName, XMLStringBuffer stringBuffer) {
1021         /**
1022          * entityNames (symbols) are interned. The equals method would do the same,
1023          * but I'm leaving it as comparisons by references are common in the impl
1024          * and it made it explicit to others who read this code.
1025          */
1026         if (entityName == fAmpSymbol) {
1027             stringBuffer.append('&');
1028             return true;
1029         } else if (entityName == fAposSymbol) {
1030             stringBuffer.append('\'');
1031             return true;
1032         } else if (entityName == fLtSymbol) {
1033             stringBuffer.append('<');
1034             return true;
1035         } else if (entityName == fGtSymbol) {
1036             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
1037             stringBuffer.append('>');
1038             return true;
1039         } else if (entityName == fQuotSymbol) {
1040             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
1041             stringBuffer.append('"');
1042             return true;
1043         }
1044         return false;
1045     }
1046 
1047     /**
1048      * Scans External ID and return the public and system IDs.
1049      *
1050      * @param identifiers An array of size 2 to return the system id,
1051      *                    and public id (in that order).
1052      * @param optionalSystemId Specifies whether the system id is optional.
1053      *
1054      * <strong>Note:</strong> This method uses fString and fStringBuffer,
1055      * anything in them at the time of calling is lost.
1056      */
scanExternalID(String[] identifiers, boolean optionalSystemId)1057     protected void scanExternalID(String[] identifiers,
1058             boolean optionalSystemId)
1059             throws IOException, XNIException {
1060 
1061         String systemId = null;
1062         String publicId = null;
1063         if (fEntityScanner.skipString("PUBLIC")) {
1064             if (!fEntityScanner.skipSpaces()) {
1065                 reportFatalError("SpaceRequiredAfterPUBLIC", null);
1066             }
1067             scanPubidLiteral(fString);
1068             publicId = fString.toString();
1069 
1070             if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
1071                 reportFatalError("SpaceRequiredBetweenPublicAndSystem", null);
1072             }
1073         }
1074 
1075         if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
1076             if (publicId == null && !fEntityScanner.skipSpaces()) {
1077                 reportFatalError("SpaceRequiredAfterSYSTEM", null);
1078             }
1079             int quote = fEntityScanner.peekChar();
1080             if (quote != '\'' && quote != '"') {
1081                 if (publicId != null && optionalSystemId) {
1082                     // looks like we don't have any system id
1083                     // simply return the public id
1084                     identifiers[0] = null;
1085                     identifiers[1] = publicId;
1086                     return;
1087                 }
1088                 reportFatalError("QuoteRequiredInSystemID", null);
1089             }
1090             fEntityScanner.scanChar(null);
1091             XMLString ident = fString;
1092             if (fEntityScanner.scanLiteral(quote, ident, false) != quote) {
1093                 fStringBuffer.clear();
1094                 do {
1095                     fStringBuffer.append(ident);
1096                     int c = fEntityScanner.peekChar();
1097                     if (XMLChar.isMarkup(c) || c == ']') {
1098                         fStringBuffer.append((char)fEntityScanner.scanChar(null));
1099                     } else if (c != -1 && isInvalidLiteral(c)) {
1100                         reportFatalError("InvalidCharInSystemID",
1101                             new Object[] {Integer.toString(c, 16)});
1102                     }
1103                 } while (fEntityScanner.scanLiteral(quote, ident, false) != quote);
1104                 fStringBuffer.append(ident);
1105                 ident = fStringBuffer;
1106             }
1107             systemId = ident.toString();
1108             if (!fEntityScanner.skipChar(quote, null)) {
1109                 reportFatalError("SystemIDUnterminated", null);
1110             }
1111         }
1112 
1113         // store result in array
1114         identifiers[0] = systemId;
1115         identifiers[1] = publicId;
1116     }
1117 
1118 
1119     /**
1120      * Scans public ID literal.
1121      *
1122      * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1123      * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
1124      *
1125      * The returned string is normalized according to the following rule,
1126      * from http://www.w3.org/TR/REC-xml#dt-pubid:
1127      *
1128      * Before a match is attempted, all strings of white space in the public
1129      * identifier must be normalized to single space characters (#x20), and
1130      * leading and trailing white space must be removed.
1131      *
1132      * @param literal The string to fill in with the public ID literal.
1133      * @return True on success.
1134      *
1135      * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
1136      * the time of calling is lost.
1137      */
scanPubidLiteral(XMLString literal)1138     protected boolean scanPubidLiteral(XMLString literal)
1139     throws IOException, XNIException {
1140         int quote = fEntityScanner.scanChar(null);
1141         if (quote != '\'' && quote != '"') {
1142             reportFatalError("QuoteRequiredInPublicID", null);
1143             return false;
1144         }
1145 
1146         fStringBuffer.clear();
1147         // skip leading whitespace
1148         boolean skipSpace = true;
1149         boolean dataok = true;
1150         while (true) {
1151             int c = fEntityScanner.scanChar(null);
1152             if (c == ' ' || c == '\n' || c == '\r') {
1153                 if (!skipSpace) {
1154                     // take the first whitespace as a space and skip the others
1155                     fStringBuffer.append(' ');
1156                     skipSpace = true;
1157                 }
1158             } else if (c == quote) {
1159                 if (skipSpace) {
1160                     // if we finished on a space let's trim it
1161                     fStringBuffer.length--;
1162                 }
1163                 literal.setValues(fStringBuffer);
1164                 break;
1165             } else if (XMLChar.isPubid(c)) {
1166                 fStringBuffer.append((char)c);
1167                 skipSpace = false;
1168             } else if (c == -1) {
1169                 reportFatalError("PublicIDUnterminated", null);
1170                 return false;
1171             } else {
1172                 dataok = false;
1173                 reportFatalError("InvalidCharInPublicID",
1174                         new Object[]{Integer.toHexString(c)});
1175             }
1176         }
1177         return dataok;
1178     }
1179 
1180 
1181     /**
1182      * Normalize whitespace in an XMLString converting all whitespace
1183      * characters to space characters.
1184      */
normalizeWhitespace(XMLString value)1185     protected void normalizeWhitespace(XMLString value) {
1186         int i=0;
1187         int j=0;
1188         int [] buff = fEntityScanner.whiteSpaceLookup;
1189         int buffLen = fEntityScanner.whiteSpaceLen;
1190         int end = value.offset + value.length;
1191         while(i < buffLen){
1192             j = buff[i];
1193             if(j < end ){
1194                 value.ch[j] = ' ';
1195             }
1196             i++;
1197         }
1198     }
1199 
1200     //
1201     // XMLEntityHandler methods
1202     //
1203 
1204     /**
1205      * This method notifies of the start of an entity. The document entity
1206      * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1207      * parameter entity names start with '%'; and general entities are just
1208      * specified by their name.
1209      *
1210      * @param name     The name of the entity.
1211      * @param identifier The resource identifier.
1212      * @param encoding The auto-detected IANA encoding name of the entity
1213      *                 stream. This value will be null in those situations
1214      *                 where the entity encoding is not auto-detected (e.g.
1215      *                 internal entities or a document entity that is
1216      *                 parsed from a java.io.Reader).
1217      *
1218      * @throws XNIException Thrown by handler to signal an error.
1219      */
startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs)1220     public void startEntity(String name,
1221             XMLResourceIdentifier identifier,
1222             String encoding, Augmentations augs) throws XNIException {
1223 
1224         // keep track of the entity depth
1225         fEntityDepth++;
1226         // must reset entity scanner
1227         fEntityScanner = fEntityManager.getEntityScanner();
1228         fEntityStore = fEntityManager.getEntityStore() ;
1229     } // startEntity(String,XMLResourceIdentifier,String)
1230 
1231     /**
1232      * This method notifies the end of an entity. The document entity has
1233      * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1234      * parameter entity names start with '%'; and general entities are just
1235      * specified by their name.
1236      *
1237      * @param name The name of the entity.
1238      *
1239      * @throws XNIException Thrown by handler to signal an error.
1240      */
endEntity(String name, Augmentations augs)1241     public void endEntity(String name, Augmentations augs) throws IOException, XNIException {
1242         // keep track of the entity depth
1243         if (fEntityDepth > 0) {
1244             fEntityDepth--;
1245         }
1246     } // endEntity(String)
1247 
1248     /**
1249      * Scans a character reference and append the corresponding chars to the
1250      * specified buffer.
1251      *
1252      * <p>
1253      * <pre>
1254      * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1255      * </pre>
1256      *
1257      * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1258      * at the time of calling is lost.
1259      *
1260      * @param buf the character buffer to append chars to
1261      * @param buf2 the character buffer to append non-normalized chars to
1262      *
1263      * @return the character value or (-1) on conversion failure
1264      */
scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)1265     protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)
1266     throws IOException, XNIException {
1267         int initLen = buf.length;
1268         // scan hexadecimal value
1269         boolean hex = false;
1270         if (fEntityScanner.skipChar('x', NameType.REFERENCE)) {
1271             if (buf2 != null) { buf2.append('x'); }
1272             hex = true;
1273             fStringBuffer3.clear();
1274             boolean digit = true;
1275 
1276             int c = fEntityScanner.peekChar();
1277             digit = (c >= '0' && c <= '9') ||
1278                     (c >= 'a' && c <= 'f') ||
1279                     (c >= 'A' && c <= 'F');
1280             if (digit) {
1281                 if (buf2 != null) { buf2.append((char)c); }
1282                 fEntityScanner.scanChar(NameType.REFERENCE);
1283                 fStringBuffer3.append((char)c);
1284 
1285                 do {
1286                     c = fEntityScanner.peekChar();
1287                     digit = (c >= '0' && c <= '9') ||
1288                             (c >= 'a' && c <= 'f') ||
1289                             (c >= 'A' && c <= 'F');
1290                     if (digit) {
1291                         if (buf2 != null) { buf2.append((char)c); }
1292                         fEntityScanner.scanChar(NameType.REFERENCE);
1293                         fStringBuffer3.append((char)c);
1294                     }
1295                 } while (digit);
1296             } else {
1297                 reportFatalError("HexdigitRequiredInCharRef", null);
1298             }
1299         }
1300 
1301         // scan decimal value
1302         else {
1303             fStringBuffer3.clear();
1304             boolean digit = true;
1305 
1306             int c = fEntityScanner.peekChar();
1307             digit = c >= '0' && c <= '9';
1308             if (digit) {
1309                 if (buf2 != null) { buf2.append((char)c); }
1310                 fEntityScanner.scanChar(NameType.REFERENCE);
1311                 fStringBuffer3.append((char)c);
1312 
1313                 do {
1314                     c = fEntityScanner.peekChar();
1315                     digit = c >= '0' && c <= '9';
1316                     if (digit) {
1317                         if (buf2 != null) { buf2.append((char)c); }
1318                         fEntityScanner.scanChar(NameType.REFERENCE);
1319                         fStringBuffer3.append((char)c);
1320                     }
1321                 } while (digit);
1322             } else {
1323                 reportFatalError("DigitRequiredInCharRef", null);
1324             }
1325         }
1326 
1327         // end
1328         if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) {
1329             reportFatalError("SemicolonRequiredInCharRef", null);
1330         }
1331         if (buf2 != null) { buf2.append(';'); }
1332 
1333         // convert string to number
1334         int value = -1;
1335         try {
1336             value = Integer.parseInt(fStringBuffer3.toString(),
1337                     hex ? 16 : 10);
1338 
1339             // character reference must be a valid XML character
1340             if (isInvalid(value)) {
1341                 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
1342                 if (hex) errorBuf.append('x');
1343                 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1344                 reportFatalError("InvalidCharRef",
1345                         new Object[]{errorBuf.toString()});
1346             }
1347         } catch (NumberFormatException e) {
1348             // Conversion failed, let -1 value drop through.
1349             // If we end up here, the character reference was invalid.
1350             StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
1351             if (hex) errorBuf.append('x');
1352             errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1353             reportFatalError("InvalidCharRef",
1354                     new Object[]{errorBuf.toString()});
1355         }
1356 
1357         // append corresponding chars to the given buffer
1358         if (!XMLChar.isSupplemental(value)) {
1359             buf.append((char) value);
1360         } else {
1361             // character is supplemental, split it into surrogate chars
1362             buf.append(XMLChar.highSurrogate(value));
1363             buf.append(XMLChar.lowSurrogate(value));
1364         }
1365 
1366         // char refs notification code
1367         if (fNotifyCharRefs && value != -1) {
1368             String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
1369             if (!fScanningAttribute) {
1370                 fCharRefLiteral = literal;
1371             }
1372         }
1373 
1374         if (fEntityScanner.fCurrentEntity.isGE) {
1375             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, buf.length - initLen);
1376         }
1377         return value;
1378     }
1379     // returns true if the given character is not
1380     // valid with respect to the version of
1381     // XML understood by this scanner.
isInvalid(int value)1382     protected boolean isInvalid(int value) {
1383         return (XMLChar.isInvalid(value));
1384     } // isInvalid(int):  boolean
1385 
1386     // returns true if the given character is not
1387     // valid or may not be used outside a character reference
1388     // with respect to the version of XML understood by this scanner.
isInvalidLiteral(int value)1389     protected boolean isInvalidLiteral(int value) {
1390         return (XMLChar.isInvalid(value));
1391     } // isInvalidLiteral(int):  boolean
1392 
1393     // returns true if the given character is
1394     // a valid nameChar with respect to the version of
1395     // XML understood by this scanner.
isValidNameChar(int value)1396     protected boolean isValidNameChar(int value) {
1397         return (XMLChar.isName(value));
1398     } // isValidNameChar(int):  boolean
1399 
1400     // returns true if the given character is
1401     // a valid NCName character with respect to the version of
1402     // XML understood by this scanner.
isValidNCName(int value)1403     protected boolean isValidNCName(int value) {
1404         return (XMLChar.isNCName(value));
1405     } // isValidNCName(int):  boolean
1406 
1407     // returns true if the given character is
1408     // a valid nameStartChar with respect to the version of
1409     // XML understood by this scanner.
isValidNameStartChar(int value)1410     protected boolean isValidNameStartChar(int value) {
1411         return (XMLChar.isNameStart(value));
1412     } // isValidNameStartChar(int):  boolean
1413 
1414     // returns true if the given character is
1415     // a valid high surrogate for a nameStartChar
1416     // with respect to the version of XML understood
1417     // by this scanner.
isValidNameStartHighSurrogate(int value)1418     protected boolean isValidNameStartHighSurrogate(int value) {
1419         return false;
1420     } // isValidNameStartHighSurrogate(int):  boolean
1421 
versionSupported(String version )1422     protected boolean versionSupported(String version ) {
1423         return version.equals("1.0") || version.equals("1.1");
1424     } // version Supported
1425 
1426     /**
1427      * Scans surrogates and append them to the specified buffer.
1428      * <p>
1429      * <strong>Note:</strong> This assumes the current char has already been
1430      * identified as a high surrogate.
1431      *
1432      * @param buf The StringBuffer to append the read surrogates to.
1433      * @return True if it succeeded.
1434      */
scanSurrogates(XMLStringBuffer buf)1435     protected boolean scanSurrogates(XMLStringBuffer buf)
1436     throws IOException, XNIException {
1437 
1438         int high = fEntityScanner.scanChar(null);
1439         int low = fEntityScanner.peekChar();
1440         if (!XMLChar.isLowSurrogate(low)) {
1441             reportFatalError("InvalidCharInContent",
1442                     new Object[] {Integer.toString(high, 16)});
1443                     return false;
1444         }
1445         fEntityScanner.scanChar(null);
1446 
1447         // convert surrogates to supplemental character
1448         int c = XMLChar.supplemental((char)high, (char)low);
1449 
1450         // supplemental character must be a valid XML character
1451         if (isInvalid(c)) {
1452             reportFatalError("InvalidCharInContent",
1453                     new Object[]{Integer.toString(c, 16)});
1454                     return false;
1455         }
1456 
1457         // fill in the buffer
1458         buf.append((char)high);
1459         buf.append((char)low);
1460 
1461         return true;
1462 
1463     } // scanSurrogates():boolean
1464 
1465 
1466     /**
1467      * Convenience function used in all XML scanners.
1468      */
reportFatalError(String msgId, Object[] args)1469     protected void reportFatalError(String msgId, Object[] args)
1470     throws XNIException {
1471         fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,
1472                 msgId, args,
1473                 XMLErrorReporter.SEVERITY_FATAL_ERROR);
1474     }
1475 
1476     // private methods
init()1477     private void init() {
1478         // initialize scanner
1479         fEntityScanner = null;
1480         // initialize vars
1481         fEntityDepth = 0;
1482         fReportEntity = true;
1483         fResourceIdentifier.clear();
1484 
1485         if(!fAttributeCacheInitDone){
1486             for(int i = 0; i < initialCacheCount; i++){
1487                 attributeValueCache.add(new XMLString());
1488                 stringBufferCache.add(new XMLStringBuffer());
1489             }
1490             fAttributeCacheInitDone = true;
1491         }
1492         fStringBufferIndex = 0;
1493         fAttributeCacheUsedCount = 0;
1494 
1495     }
1496 
getStringBuffer()1497     XMLStringBuffer getStringBuffer(){
1498         if((fStringBufferIndex < initialCacheCount )|| (fStringBufferIndex < stringBufferCache.size())){
1499             return stringBufferCache.get(fStringBufferIndex++);
1500         }else{
1501             XMLStringBuffer tmpObj = new XMLStringBuffer();
1502             fStringBufferIndex++;
1503             stringBufferCache.add(tmpObj);
1504             return tmpObj;
1505         }
1506     }
1507 
1508     /**
1509      * Add the count of the content buffer and check if the accumulated
1510      * value exceeds the limit
1511      * @param isPEDecl a flag to indicate whether the entity is parameter
1512      * @param entityName entity name
1513      * @param buffer content buffer
1514      */
checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer)1515     void checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer) {
1516         checkEntityLimit(isPEDecl, entityName, buffer.length);
1517     }
1518 
1519     /**
1520      * Add the count and check limit
1521      * @param isPEDecl a flag to indicate whether the entity is parameter
1522      * @param entityName entity name
1523      * @param len length of the buffer
1524      */
checkEntityLimit(boolean isPEDecl, String entityName, int len)1525     void checkEntityLimit(boolean isPEDecl, String entityName, int len) {
1526         if (fLimitAnalyzer == null) {
1527             fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
1528         }
1529         if (isPEDecl) {
1530             fLimitAnalyzer.addValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, "%" + entityName, len);
1531             if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
1532                         fSecurityManager.debugPrint(fLimitAnalyzer);
1533                 reportFatalError("MaxEntitySizeLimit", new Object[]{"%" + entityName,
1534                     fLimitAnalyzer.getValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT),
1535                     fSecurityManager.getLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT),
1536                     fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT)});
1537             }
1538         } else {
1539             fLimitAnalyzer.addValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, entityName, len);
1540             if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
1541                         fSecurityManager.debugPrint(fLimitAnalyzer);
1542                 reportFatalError("MaxEntitySizeLimit", new Object[]{entityName,
1543                     fLimitAnalyzer.getValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT),
1544                     fSecurityManager.getLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT),
1545                     fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT)});
1546             }
1547         }
1548         if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
1549             fSecurityManager.debugPrint(fLimitAnalyzer);
1550             reportFatalError("TotalEntitySizeLimit",
1551                 new Object[]{fLimitAnalyzer.getTotalValue(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT),
1552                 fSecurityManager.getLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT),
1553                 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT)});
1554         }
1555     }
1556 } // class XMLScanner
1557