1 /*
2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*
6  * Licensed to the Apache Software Foundation (ASF) under one or more
7  * contributor license agreements.  See the NOTICE file distributed with
8  * this work for additional information regarding copyright ownership.
9  * The ASF licenses this file to You under the Apache License, Version 2.0
10  * (the "License"); you may not use this file except in compliance with
11  * the License.  You may obtain a copy of the License at
12  *
13  *     http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  */
21 
22 package com.sun.org.apache.xerces.internal.impl;
23 
24 import com.sun.org.apache.xerces.internal.util.Status;
25 import com.sun.xml.internal.stream.XMLEntityStorage;
26 import java.io.IOException;
27 import java.util.ArrayList;
28 import javax.xml.stream.events.XMLEvent;
29 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
30 import com.sun.org.apache.xerces.internal.util.SymbolTable;
31 import com.sun.org.apache.xerces.internal.util.XMLChar;
32 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl;
33 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
34 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer;
35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager;
36 import com.sun.org.apache.xerces.internal.xni.Augmentations;
37 import com.sun.org.apache.xerces.internal.xni.XMLAttributes;
38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
39 import com.sun.org.apache.xerces.internal.xni.XMLString;
40 import com.sun.org.apache.xerces.internal.xni.XNIException;
41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent;
42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
44 import com.sun.xml.internal.stream.Entity;
45 
46 //import com.sun.xml.stream.XMLEntityManager;
47 //import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter;
48 
49 /**
50  * This class is responsible for holding scanning methods common to
51  * scanning the XML document structure and content as well as the DTD
52  * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit
53  * from this base class.
54  *
55  * <p>
56  * This component requires the following features and properties from the
57  * component manager that uses it:
58  * <ul>
59  *  <li>http://xml.org/sax/features/validation</li>
60  *  <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
61  *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
62  *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
63  *  <li>http://apache.org/xml/properties/internal/entity-manager</li>
64  * </ul>
65  *
66  * @author Andy Clark, IBM
67  * @author Arnaud  Le Hors, IBM
68  * @author Eric Ye, IBM
69  * @author K.Venugopal SUN Microsystems
70  * @author Sunitha Reddy, SUN Microsystems
71  * @version $Id: XMLScanner.java,v 1.12 2010-11-01 04:39:41 joehw Exp $
72  * @LastModified: Feb 2020
73  */
74 public abstract class XMLScanner
75         implements XMLComponent {
76 
77     //
78     // Constants
79     //
80 
81     // feature identifiers
82 
83     /** Feature identifier: namespaces. */
84     protected static final String NAMESPACES =
85             Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
86 
87     /** Feature identifier: validation. */
88     protected static final String VALIDATION =
89             Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE;
90 
91     /** Feature identifier: notify character references. */
92     protected static final String NOTIFY_CHAR_REFS =
93             Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE;
94 
95     // property identifiers
96 
97     protected static final String PARSER_SETTINGS =
98                                 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;
99     /** Property identifier: symbol table. */
100     protected static final String SYMBOL_TABLE =
101             Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
102 
103     /** Property identifier: error reporter. */
104     protected static final String ERROR_REPORTER =
105             Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
106 
107     /** Property identifier: entity manager. */
108     protected static final String ENTITY_MANAGER =
109             Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;
110 
111     /** Property identifier: Security manager. */
112     private static final String SECURITY_MANAGER = Constants.SECURITY_MANAGER;
113 
114     // debugging
115 
116     /** Debug attribute normalization. */
117     protected static final boolean DEBUG_ATTR_NORMALIZATION = false;
118 
119     /**
120      * Type of names
121      */
122     public static enum NameType {
123         ATTRIBUTE("attribute"),
124         ATTRIBUTENAME("attribute name"),
125         COMMENT("comment"),
126         DOCTYPE("doctype"),
127         ELEMENTSTART("startelement"),
128         ELEMENTEND("endelement"),
129         ENTITY("entity"),
130         NOTATION("notation"),
131         PI("pi"),
132         REFERENCE("reference");
133 
134         final String literal;
NameType(String literal)135         NameType(String literal) {
136             this.literal = literal;
137         }
138 
literal()139         String literal() {
140             return literal;
141         }
142     }
143 
144     //xxx: setting the default value as false, as we dont need to calculate this value
145     //we should have a feature when set to true computes this value
146     private boolean fNeedNonNormalizedValue = false;
147 
148     protected ArrayList<XMLString> attributeValueCache = new ArrayList<>();
149     protected ArrayList<XMLStringBuffer> stringBufferCache = new ArrayList<>();
150     protected int fStringBufferIndex = 0;
151     protected boolean fAttributeCacheInitDone = false;
152     protected int fAttributeCacheUsedCount = 0;
153 
154     //
155     // Data
156     //
157 
158     // features
159 
160     /**
161      * Validation. This feature identifier is:
162      * http://xml.org/sax/features/validation
163      */
164     protected boolean fValidation = false;
165 
166     /** Namespaces. */
167     protected boolean fNamespaces;
168 
169     /** Character references notification. */
170     protected boolean fNotifyCharRefs = false;
171 
172     /** Internal parser-settings feature */
173     protected boolean fParserSettings = true;
174 
175     // properties
176 
177     protected PropertyManager fPropertyManager = null ;
178     /** Symbol table. */
179     protected SymbolTable fSymbolTable;
180 
181     /** Error reporter. */
182     protected XMLErrorReporter fErrorReporter;
183 
184     /** Entity manager. */
185     //protected XMLEntityManager fEntityManager = PropertyManager.getEntityManager();
186     protected XMLEntityManager fEntityManager = null ;
187 
188     /** xxx this should be available from EntityManager Entity storage */
189     protected XMLEntityStorage fEntityStore = null ;
190 
191     /** Security manager. */
192     protected XMLSecurityManager fSecurityManager = null;
193 
194     /** Limit analyzer. */
195     protected XMLLimitAnalyzer fLimitAnalyzer = null;
196 
197     // protected data
198 
199     /** event type */
200     protected XMLEvent fEvent ;
201 
202     /** Entity scanner, this always works on last entity that was opened. */
203     protected XMLEntityScanner fEntityScanner = null;
204 
205     /** Entity depth. */
206     protected int fEntityDepth;
207 
208     /** Literal value of the last character reference scanned. */
209     protected String fCharRefLiteral = null;
210 
211     /** Scanning attribute. */
212     protected boolean fScanningAttribute;
213 
214     /** Report entity boundary. */
215     protected boolean fReportEntity;
216 
217     // symbols
218 
219     /** Symbol: "version". */
220     protected final static String fVersionSymbol = "version".intern();
221 
222     /** Symbol: "encoding". */
223     protected final static String fEncodingSymbol = "encoding".intern();
224 
225     /** Symbol: "standalone". */
226     protected final static String fStandaloneSymbol = "standalone".intern();
227 
228     /** Symbol: "amp". */
229     protected final static String fAmpSymbol = "amp".intern();
230 
231     /** Symbol: "lt". */
232     protected final static String fLtSymbol = "lt".intern();
233 
234     /** Symbol: "gt". */
235     protected final static String fGtSymbol = "gt".intern();
236 
237     /** Symbol: "quot". */
238     protected final static String fQuotSymbol = "quot".intern();
239 
240     /** Symbol: "apos". */
241     protected final static String fAposSymbol = "apos".intern();
242 
243     // temporary variables
244 
245     // NOTE: These objects are private to help prevent accidental modification
246     //       of values by a subclass. If there were protected *and* the sub-
247     //       modified the values, it would be difficult to track down the real
248     //       cause of the bug. By making these private, we avoid this
249     //       possibility.
250 
251     /** String. */
252     private XMLString fString = new XMLString();
253 
254     /** String buffer. */
255     private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
256 
257     /** String buffer. */
258     private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
259 
260     /** String buffer. */
261     private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
262 
263     // temporary location for Resource identification information.
264     protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl();
265     int initialCacheCount = 6;
266     //
267     // XMLComponent methods
268     //
269 
270     /**
271      *
272      *
273      * @param componentManager The component manager.
274      *
275      * @throws SAXException Throws exception if required features and
276      *                      properties cannot be found.
277      */
reset(XMLComponentManager componentManager)278     public void reset(XMLComponentManager componentManager)
279     throws XMLConfigurationException {
280 
281                 fParserSettings = componentManager.getFeature(PARSER_SETTINGS, true);
282 
283                 if (!fParserSettings) {
284                         // parser settings have not been changed
285                         init();
286                         return;
287                 }
288 
289 
290         // Xerces properties
291         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
292         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
293         fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER);
294         fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER);
295 
296         //this step is extra because we have separated the storage of entity
297         fEntityStore = fEntityManager.getEntityStore() ;
298 
299         // sax features
300         fValidation = componentManager.getFeature(VALIDATION, false);
301         fNamespaces = componentManager.getFeature(NAMESPACES, true);
302         fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS, false);
303 
304         init();
305     } // reset(XMLComponentManager)
306 
setPropertyManager(PropertyManager propertyManager)307     protected void setPropertyManager(PropertyManager propertyManager){
308         fPropertyManager = propertyManager ;
309     }
310 
311     /**
312      * Sets the value of a property during parsing.
313      *
314      * @param propertyId
315      * @param value
316      */
setProperty(String propertyId, Object value)317     public void setProperty(String propertyId, Object value)
318     throws XMLConfigurationException {
319 
320         // Xerces properties
321         if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
322             String property =
323                     propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length());
324             if (property.equals(Constants.SYMBOL_TABLE_PROPERTY)) {
325                 fSymbolTable = (SymbolTable)value;
326             } else if (property.equals(Constants.ERROR_REPORTER_PROPERTY)) {
327                 fErrorReporter = (XMLErrorReporter)value;
328             } else if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) {
329                 fEntityManager = (XMLEntityManager)value;
330             }
331         }
332 
333         if (propertyId.equals(SECURITY_MANAGER)) {
334             fSecurityManager = (XMLSecurityManager)value;
335         }
336                 /*else if(propertyId.equals(Constants.STAX_PROPERTIES)){
337             fStaxProperties = (HashMap)value;
338             //TODO::discuss with neeraj what are his thoughts on passing properties.
339             //For now use this
340         }*/
341 
342     } // setProperty(String,Object)
343 
344     /*
345      * Sets the feature of the scanner.
346      */
setFeature(String featureId, boolean value)347     public void setFeature(String featureId, boolean value)
348     throws XMLConfigurationException {
349 
350         if (VALIDATION.equals(featureId)) {
351             fValidation = value;
352         } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
353             fNotifyCharRefs = value;
354         }
355     }
356 
357     /*
358      * Gets the state of the feature of the scanner.
359      */
getFeature(String featureId)360     public boolean getFeature(String featureId)
361     throws XMLConfigurationException {
362 
363         if (VALIDATION.equals(featureId)) {
364             return fValidation;
365         } else if (NOTIFY_CHAR_REFS.equals(featureId)) {
366             return fNotifyCharRefs;
367         }
368         throw new XMLConfigurationException(Status.NOT_RECOGNIZED, featureId);
369     }
370 
371     //
372     // Protected methods
373     //
374 
375     // anybody calling this had better have set Symtoltable!
reset()376     protected void reset() {
377         init();
378 
379         // DTD preparsing defaults:
380         fValidation = true;
381         fNotifyCharRefs = false;
382 
383     }
384 
reset(PropertyManager propertyManager)385     public void reset(PropertyManager propertyManager) {
386         init();
387         // Xerces properties
388         fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY);
389 
390         fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY);
391 
392         fEntityManager = (XMLEntityManager)propertyManager.getProperty(ENTITY_MANAGER);
393         fEntityStore = fEntityManager.getEntityStore() ;
394         fEntityScanner = (XMLEntityScanner)fEntityManager.getEntityScanner() ;
395         fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER);
396 
397         //fEntityManager.reset();
398         // DTD preparsing defaults:
399         fValidation = false;
400         fNotifyCharRefs = false;
401 
402     }
403     // common scanning methods
404 
405     /**
406      * Scans an XML or text declaration.
407      * <p>
408      * <pre>
409      * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
410      * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
411      * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
412      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
413      * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
414      *                 | ('"' ('yes' | 'no') '"'))
415      *
416      * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
417      * </pre>
418      *
419      * @param scanningTextDecl True if a text declaration is to
420      *                         be scanned instead of an XML
421      *                         declaration.
422      * @param pseudoAttributeValues An array of size 3 to return the version,
423      *                         encoding and standalone pseudo attribute values
424      *                         (in that order).
425      *
426      * <strong>Note:</strong> This method uses fString, anything in it
427      * at the time of calling is lost.
428      */
scanXMLDeclOrTextDecl(boolean scanningTextDecl, String[] pseudoAttributeValues)429     protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
430             String[] pseudoAttributeValues)
431             throws IOException, XNIException {
432 
433         // pseudo-attribute values
434         String version = null;
435         String encoding = null;
436         String standalone = null;
437 
438         // scan pseudo-attributes
439         final int STATE_VERSION = 0;
440         final int STATE_ENCODING = 1;
441         final int STATE_STANDALONE = 2;
442         final int STATE_DONE = 3;
443         int state = STATE_VERSION;
444 
445         boolean dataFoundForTarget = false;
446         boolean sawSpace = fEntityScanner.skipSpaces();
447         // since pseudoattributes are *not* attributes,
448         // their quotes don't need to be preserved in external parameter entities.
449         // the XMLEntityScanner#scanLiteral method will continue to
450         // emit -1 in such cases when it finds a quote; this is
451         // fine for other methods that parse scanned entities,
452         // but not for the scanning of pseudoattributes.  So,
453         // temporarily, we must mark the current entity as not being "literal"
454         Entity.ScannedEntity currEnt = fEntityManager.getCurrentEntity();
455         boolean currLiteral = currEnt.literal;
456         currEnt.literal = false;
457         while (fEntityScanner.peekChar() != '?') {
458             dataFoundForTarget = true;
459             String name = scanPseudoAttribute(scanningTextDecl, fString);
460             switch (state) {
461                 case STATE_VERSION: {
462                     if (name.equals(fVersionSymbol)) {
463                         if (!sawSpace) {
464                             reportFatalError(scanningTextDecl
465                                     ? "SpaceRequiredBeforeVersionInTextDecl"
466                                     : "SpaceRequiredBeforeVersionInXMLDecl",
467                                     null);
468                         }
469                         version = fString.toString();
470                         state = STATE_ENCODING;
471                         if (!versionSupported(version)) {
472                             reportFatalError("VersionNotSupported",
473                                     new Object[]{version});
474                         }
475 
476                         if (version.equals("1.1")) {
477                             Entity.ScannedEntity top = fEntityManager.getTopLevelEntity();
478                             if (top != null && (top.version == null || top.version.equals("1.0"))) {
479                                 reportFatalError("VersionMismatch", null);
480                             }
481                             fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1);
482                         }
483 
484                     } else if (name.equals(fEncodingSymbol)) {
485                         if (!scanningTextDecl) {
486                             reportFatalError("VersionInfoRequired", null);
487                         }
488                         if (!sawSpace) {
489                             reportFatalError(scanningTextDecl
490                                     ? "SpaceRequiredBeforeEncodingInTextDecl"
491                                     : "SpaceRequiredBeforeEncodingInXMLDecl",
492                                     null);
493                         }
494                         encoding = fString.toString();
495                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
496                     } else {
497                         if (scanningTextDecl) {
498                             reportFatalError("EncodingDeclRequired", null);
499                         } else {
500                             reportFatalError("VersionInfoRequired", null);
501                         }
502                     }
503                     break;
504                 }
505                 case STATE_ENCODING: {
506                     if (name.equals(fEncodingSymbol)) {
507                         if (!sawSpace) {
508                             reportFatalError(scanningTextDecl
509                                     ? "SpaceRequiredBeforeEncodingInTextDecl"
510                                     : "SpaceRequiredBeforeEncodingInXMLDecl",
511                                     null);
512                         }
513                         encoding = fString.toString();
514                         state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
515                         // TODO: check encoding name; set encoding on
516                         //       entity scanner
517                     } else if (!scanningTextDecl && name.equals(fStandaloneSymbol)) {
518                         if (!sawSpace) {
519                             reportFatalError("SpaceRequiredBeforeStandalone",
520                                     null);
521                         }
522                         standalone = fString.toString();
523                         state = STATE_DONE;
524                         if (!standalone.equals("yes") && !standalone.equals("no")) {
525                             reportFatalError("SDDeclInvalid", new Object[] {standalone});
526                         }
527                     } else {
528                         reportFatalError("EncodingDeclRequired", null);
529                     }
530                     break;
531                 }
532                 case STATE_STANDALONE: {
533                     if (name.equals(fStandaloneSymbol)) {
534                         if (!sawSpace) {
535                             reportFatalError("SpaceRequiredBeforeStandalone",
536                                     null);
537                         }
538                         standalone = fString.toString();
539                         state = STATE_DONE;
540                         if (!standalone.equals("yes") && !standalone.equals("no")) {
541                             reportFatalError("SDDeclInvalid",  new Object[] {standalone});
542                         }
543                     } else {
544                         reportFatalError("SDDeclNameInvalid", null);
545                     }
546                     break;
547                 }
548                 default: {
549                     reportFatalError("NoMorePseudoAttributes", null);
550                 }
551             }
552             sawSpace = fEntityScanner.skipSpaces();
553         }
554         // restore original literal value
555         if(currLiteral) {
556             currEnt.literal = true;
557         }
558         // REVISIT: should we remove this error reporting?
559         if (scanningTextDecl && state != STATE_DONE) {
560             reportFatalError("MorePseudoAttributes", null);
561         }
562 
563         // If there is no data in the xml or text decl then we fail to report error
564         // for version or encoding info above.
565         if (scanningTextDecl) {
566             if (!dataFoundForTarget && encoding == null) {
567                 reportFatalError("EncodingDeclRequired", null);
568             }
569         } else {
570             if (!dataFoundForTarget && version == null) {
571                 reportFatalError("VersionInfoRequired", null);
572             }
573         }
574 
575         // end
576         if (!fEntityScanner.skipChar('?', null)) {
577             reportFatalError("XMLDeclUnterminated", null);
578         }
579         if (!fEntityScanner.skipChar('>', null)) {
580             reportFatalError("XMLDeclUnterminated", null);
581 
582         }
583 
584         // fill in return array
585         pseudoAttributeValues[0] = version;
586         pseudoAttributeValues[1] = encoding;
587         pseudoAttributeValues[2] = standalone;
588 
589     } // scanXMLDeclOrTextDecl(boolean)
590 
591     /**
592      * Scans a pseudo attribute.
593      *
594      * @param scanningTextDecl True if scanning this pseudo-attribute for a
595      *                         TextDecl; false if scanning XMLDecl. This
596      *                         flag is needed to report the correct type of
597      *                         error.
598      * @param value            The string to fill in with the attribute
599      *                         value.
600      *
601      * @return The name of the attribute
602      *
603      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
604      * at the time of calling is lost.
605      */
scanPseudoAttribute(boolean scanningTextDecl, XMLString value)606     protected String scanPseudoAttribute(boolean scanningTextDecl,
607             XMLString value)
608             throws IOException, XNIException {
609 
610         String name = scanPseudoAttributeName();
611         // XMLEntityManager.print(fEntityManager.getCurrentEntity());
612 
613         if (name == null) {
614             reportFatalError("PseudoAttrNameExpected", null);
615         }
616         fEntityScanner.skipSpaces();
617         if (!fEntityScanner.skipChar('=', null)) {
618             reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl"
619                     : "EqRequiredInXMLDecl", new Object[]{name});
620         }
621         fEntityScanner.skipSpaces();
622         int quote = fEntityScanner.peekChar();
623         if (quote != '\'' && quote != '"') {
624             reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl"
625                     : "QuoteRequiredInXMLDecl" , new Object[]{name});
626         }
627         fEntityScanner.scanChar(NameType.ATTRIBUTE);
628         int c = fEntityScanner.scanLiteral(quote, value, false);
629         if (c != quote) {
630             fStringBuffer2.clear();
631             do {
632                 fStringBuffer2.append(value);
633                 if (c != -1) {
634                     if (c == '&' || c == '%' || c == '<' || c == ']') {
635                         fStringBuffer2.append((char)fEntityScanner.scanChar(NameType.ATTRIBUTE));
636                     } else if (XMLChar.isHighSurrogate(c)) {
637                         scanSurrogates(fStringBuffer2);
638                     } else if (isInvalidLiteral(c)) {
639                         String key = scanningTextDecl
640                                 ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl";
641                         reportFatalError(key,
642                                 new Object[] {Integer.toString(c, 16)});
643                                 fEntityScanner.scanChar(null);
644                     }
645                 }
646                 c = fEntityScanner.scanLiteral(quote, value, false);
647             } while (c != quote);
648             fStringBuffer2.append(value);
649             value.setValues(fStringBuffer2);
650         }
651         if (!fEntityScanner.skipChar(quote, null)) {
652             reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl"
653                     : "CloseQuoteMissingInXMLDecl",
654                     new Object[]{name});
655         }
656 
657         // return
658         return name;
659 
660     } // scanPseudoAttribute(XMLString):String
661 
662     /**
663      * Scans the name of a pseudo attribute. The only legal names
664      * in XML 1.0/1.1 documents are 'version', 'encoding' and 'standalone'.
665      *
666      * @return the name of the pseudo attribute or <code>null</code>
667      * if a legal pseudo attribute name could not be scanned.
668      */
scanPseudoAttributeName()669     private String scanPseudoAttributeName() throws IOException, XNIException {
670         final int ch = fEntityScanner.peekChar();
671         switch (ch) {
672             case 'v':
673                 if (fEntityScanner.skipString(fVersionSymbol)) {
674                     return fVersionSymbol;
675                 }
676                 break;
677             case 'e':
678                 if (fEntityScanner.skipString(fEncodingSymbol)) {
679                     return fEncodingSymbol;
680                 }
681                 break;
682             case 's':
683                 if (fEntityScanner.skipString(fStandaloneSymbol)) {
684                     return fStandaloneSymbol;
685                 }
686                 break;
687         }
688         return null;
689     } // scanPseudoAttributeName()
690 
691     /**
692      * Scans a processing instruction.
693      * <p>
694      * <pre>
695      * [16] PI ::= '&lt;?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
696      * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
697      * </pre>
698      */
699     //CHANGED:
700     //EARLIER: scanPI()
701     //NOW: scanPI(XMLStringBuffer)
702     //it makes things more easy if XMLStringBUffer is passed. Motivation for this change is same
703     // as that for scanContent()
704 
scanPI(XMLStringBuffer data)705     protected void scanPI(XMLStringBuffer data) throws IOException, XNIException {
706 
707         // target
708         fReportEntity = false;
709         String target = fEntityScanner.scanName(NameType.PI);
710         if (target == null) {
711             reportFatalError("PITargetRequired", null);
712         }
713 
714         // scan data
715         scanPIData(target, data);
716         fReportEntity = true;
717 
718     } // scanPI(XMLStringBuffer)
719 
720     /**
721      * Scans a processing data. This is needed to handle the situation
722      * where a document starts with a processing instruction whose
723      * target name <em>starts with</em> "xml". (e.g. xmlfoo)
724      *
725      * This method would always read the whole data. We have while loop and data is buffered
726      * until delimeter is encountered.
727      *
728      * @param target The PI target
729      * @param data The string to fill in with the data
730      */
731 
732     //CHANGED:
733     //Earlier:This method uses the fStringBuffer and later buffer values are set to
734     //the supplied XMLString....
735     //Now: Changed the signature of this function to pass XMLStringBuffer.. and data would
736     //be appended to that buffer
737 
scanPIData(String target, XMLStringBuffer data)738     protected void scanPIData(String target, XMLStringBuffer data)
739     throws IOException, XNIException {
740 
741         // check target
742         if (target.length() == 3) {
743             char c0 = Character.toLowerCase(target.charAt(0));
744             char c1 = Character.toLowerCase(target.charAt(1));
745             char c2 = Character.toLowerCase(target.charAt(2));
746             if (c0 == 'x' && c1 == 'm' && c2 == 'l') {
747                 reportFatalError("ReservedPITarget", null);
748             }
749         }
750 
751         // spaces
752         if (!fEntityScanner.skipSpaces()) {
753             if (fEntityScanner.skipString("?>")) {
754                 // we found the end, there is no data just return
755                 return;
756             } else {
757                 // if there is data there should be some space
758                 reportFatalError("SpaceRequiredInPI", null);
759             }
760         }
761 
762         // since scanData appends the parsed data to the buffer passed
763         // a while loop would append the whole of parsed data to the buffer(data:XMLStringBuffer)
764         //until all of the data is buffered.
765         if (fEntityScanner.scanData("?>", data)) {
766             do {
767                 int c = fEntityScanner.peekChar();
768                 if (c != -1) {
769                     if (XMLChar.isHighSurrogate(c)) {
770                         scanSurrogates(data);
771                     } else if (isInvalidLiteral(c)) {
772                         reportFatalError("InvalidCharInPI",
773                                 new Object[]{Integer.toHexString(c)});
774                                 fEntityScanner.scanChar(null);
775                     }
776                 }
777             } while (fEntityScanner.scanData("?>", data));
778         }
779 
780     } // scanPIData(String,XMLString)
781 
782     /**
783      * Scans a comment.
784      * <p>
785      * <pre>
786      * [15] Comment ::= '&lt!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
787      * </pre>
788      * <p>
789      * <strong>Note:</strong> Called after scanning past '&lt;!--'
790      * <strong>Note:</strong> This method uses fString, anything in it
791      * at the time of calling is lost.
792      *
793      * @param text The buffer to fill in with the text.
794      */
scanComment(XMLStringBuffer text)795     protected void scanComment(XMLStringBuffer text)
796     throws IOException, XNIException {
797 
798         //System.out.println( "XMLScanner#scanComment# In Scan Comment" );
799         // text
800         // REVISIT: handle invalid character, eof
801         text.clear();
802         while (fEntityScanner.scanData("--", text)) {
803             int c = fEntityScanner.peekChar();
804 
805             //System.out.println( "XMLScanner#scanComment#text.toString() == " + text.toString() );
806             //System.out.println( "XMLScanner#scanComment#c == " + c );
807 
808             if (c != -1) {
809                 if (XMLChar.isHighSurrogate(c)) {
810                     scanSurrogates(text);
811                 }
812                 else if (isInvalidLiteral(c)) {
813                     reportFatalError("InvalidCharInComment",
814                             new Object[] { Integer.toHexString(c) });
815                             fEntityScanner.scanChar(NameType.COMMENT);
816                 }
817             }
818         }
819         if (!fEntityScanner.skipChar('>', NameType.COMMENT)) {
820             reportFatalError("DashDashInComment", null);
821         }
822 
823     } // scanComment()
824 
825     /**
826      * Scans an attribute value and normalizes whitespace converting all
827      * whitespace characters to space characters.
828      *
829      * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
830      *
831      * @param value The XMLString to fill in with the value.
832      * @param nonNormalizedValue The XMLString to fill in with the
833      *                           non-normalized value.
834      * @param atName The name of the attribute being parsed (for error msgs).
835      * @param attributes The attributes list for the scanned attribute.
836      * @param attrIndex The index of the attribute to use from the list.
837      * @param checkEntities true if undeclared entities should be reported as VC violation,
838      *                      false if undeclared entities should be reported as WFC violation.
839      * @param eleName The name of element to which this attribute belongs.
840      * @param isNSURI a flag indicating whether the content is a Namespace URI
841      *
842      * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
843      * at the time of calling is lost.
844      **/
scanAttributeValue(XMLString value, XMLString nonNormalizedValue, String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities, String eleName, boolean isNSURI)845     protected void scanAttributeValue(XMLString value, XMLString nonNormalizedValue,
846             String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities,
847             String eleName, boolean isNSURI)
848             throws IOException, XNIException {
849         XMLStringBuffer stringBuffer = null;
850         // quote
851         int quote = fEntityScanner.peekChar();
852         if (quote != '\'' && quote != '"') {
853             reportFatalError("OpenQuoteExpected", new Object[]{eleName, atName});
854         }
855 
856         fEntityScanner.scanChar(NameType.ATTRIBUTE);
857         int entityDepth = fEntityDepth;
858 
859         int c = fEntityScanner.scanLiteral(quote, value, isNSURI);
860         if (DEBUG_ATTR_NORMALIZATION) {
861             System.out.println("** scanLiteral -> \""
862                     + value.toString() + "\"");
863         }
864         if(fNeedNonNormalizedValue){
865             fStringBuffer2.clear();
866             fStringBuffer2.append(value);
867         }
868         if(fEntityScanner.whiteSpaceLen > 0)
869             normalizeWhitespace(value);
870         if (DEBUG_ATTR_NORMALIZATION) {
871             System.out.println("** normalizeWhitespace -> \""
872                     + value.toString() + "\"");
873         }
874         if (c != quote) {
875             fScanningAttribute = true;
876             stringBuffer = getStringBuffer();
877             stringBuffer.clear();
878             do {
879                 stringBuffer.append(value);
880                 if (DEBUG_ATTR_NORMALIZATION) {
881                     System.out.println("** value2: \""
882                             + stringBuffer.toString() + "\"");
883                 }
884                 if (c == '&') {
885                     fEntityScanner.skipChar('&', NameType.REFERENCE);
886                     if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) {
887                         fStringBuffer2.append('&');
888                     }
889                     if (fEntityScanner.skipChar('#', NameType.REFERENCE)) {
890                         if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) {
891                             fStringBuffer2.append('#');
892                         }
893                         int ch ;
894                         if (fNeedNonNormalizedValue)
895                             ch = scanCharReferenceValue(stringBuffer, fStringBuffer2);
896                         else
897                             ch = scanCharReferenceValue(stringBuffer, null);
898 
899                         if (ch != -1) {
900                             if (DEBUG_ATTR_NORMALIZATION) {
901                                 System.out.println("** value3: \""
902                                         + stringBuffer.toString()
903                                         + "\"");
904                             }
905                         }
906                     } else {
907                         String entityName = fEntityScanner.scanName(NameType.ENTITY);
908                         if (entityName == null) {
909                             reportFatalError("NameRequiredInReference", null);
910                         } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
911                             fStringBuffer2.append(entityName);
912                         }
913                         if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) {
914                             reportFatalError("SemicolonRequiredInReference",
915                                     new Object []{entityName});
916                         } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
917                             fStringBuffer2.append(';');
918                         }
919                         if (resolveCharacter(entityName, stringBuffer)) {
920                             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
921                         } else {
922                             if (fEntityStore.isExternalEntity(entityName)) {
923                                 reportFatalError("ReferenceToExternalEntity",
924                                         new Object[] { entityName });
925                             } else {
926                                 if (!fEntityStore.isDeclaredEntity(entityName)) {
927                                     //WFC & VC: Entity Declared
928                                     if (checkEntities) {
929                                         if (fValidation) {
930                                             fErrorReporter.reportError(fEntityScanner,XMLMessageFormatter.XML_DOMAIN,
931                                                     "EntityNotDeclared",
932                                                     new Object[]{entityName},
933                                                     XMLErrorReporter.SEVERITY_ERROR);
934                                         }
935                                     } else {
936                                         reportFatalError("EntityNotDeclared",
937                                                 new Object[]{entityName});
938                                     }
939                                 }
940                                 fEntityManager.startEntity(true, entityName, true);
941                             }
942                         }
943                     }
944                 } else if (c == '<') {
945                     reportFatalError("LessthanInAttValue",
946                             new Object[] { eleName, atName });
947                             fEntityScanner.scanChar(null);
948                             if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
949                                 fStringBuffer2.append((char)c);
950                             }
951                 } else if (c == '%' || c == ']') {
952                     fEntityScanner.scanChar(null);
953                     stringBuffer.append((char)c);
954                     if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
955                         fStringBuffer2.append((char)c);
956                     }
957                     if (DEBUG_ATTR_NORMALIZATION) {
958                         System.out.println("** valueF: \""
959                                 + stringBuffer.toString() + "\"");
960                     }
961                 } else if (c == '\n' || c == '\r') {
962                     fEntityScanner.scanChar(null);
963                     stringBuffer.append(' ');
964                     if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
965                         fStringBuffer2.append('\n');
966                     }
967                 } else if (c != -1 && XMLChar.isHighSurrogate(c)) {
968                     fStringBuffer3.clear();
969                     if (scanSurrogates(fStringBuffer3)) {
970                         stringBuffer.append(fStringBuffer3);
971                         if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
972                             fStringBuffer2.append(fStringBuffer3);
973                         }
974                         if (DEBUG_ATTR_NORMALIZATION) {
975                             System.out.println("** valueI: \""
976                                     + stringBuffer.toString()
977                                     + "\"");
978                         }
979                     }
980                 } else if (c != -1 && isInvalidLiteral(c)) {
981                     reportFatalError("InvalidCharInAttValue",
982                             new Object[] {eleName, atName, Integer.toString(c, 16)});
983                             fEntityScanner.scanChar(null);
984                             if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
985                                 fStringBuffer2.append((char)c);
986                             }
987                 }
988                 c = fEntityScanner.scanLiteral(quote, value, isNSURI);
989                 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) {
990                     fStringBuffer2.append(value);
991                 }
992                 if(fEntityScanner.whiteSpaceLen > 0)
993                     normalizeWhitespace(value);
994                 //Todo ::Move this check  to Attributes , do conversion
995                 //only if attribute is being accessed. -Venu
996             } while (c != quote || entityDepth != fEntityDepth);
997             stringBuffer.append(value);
998             if (DEBUG_ATTR_NORMALIZATION) {
999                 System.out.println("** valueN: \""
1000                         + stringBuffer.toString() + "\"");
1001             }
1002             value.setValues(stringBuffer);
1003             fScanningAttribute = false;
1004         }
1005         if(fNeedNonNormalizedValue)
1006             nonNormalizedValue.setValues(fStringBuffer2);
1007 
1008         // quote
1009         int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE);
1010         if (cquote != quote) {
1011             reportFatalError("CloseQuoteExpected", new Object[]{eleName, atName});
1012         }
1013     } // scanAttributeValue()
1014 
1015 
1016     /**
1017      * Resolves character entity references.
1018      * @param entityName the name of the entity
1019      * @param stringBuffer the current XMLStringBuffer to append the character to.
1020      * @return true if resolved, false otherwise
1021      */
resolveCharacter(String entityName, XMLStringBuffer stringBuffer)1022     protected boolean resolveCharacter(String entityName, XMLStringBuffer stringBuffer) {
1023         /**
1024          * entityNames (symbols) are interned. The equals method would do the same,
1025          * but I'm leaving it as comparisons by references are common in the impl
1026          * and it made it explicit to others who read this code.
1027          */
1028         if (entityName == fAmpSymbol) {
1029             stringBuffer.append('&');
1030             return true;
1031         } else if (entityName == fAposSymbol) {
1032             stringBuffer.append('\'');
1033             return true;
1034         } else if (entityName == fLtSymbol) {
1035             stringBuffer.append('<');
1036             return true;
1037         } else if (entityName == fGtSymbol) {
1038             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
1039             stringBuffer.append('>');
1040             return true;
1041         } else if (entityName == fQuotSymbol) {
1042             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
1043             stringBuffer.append('"');
1044             return true;
1045         }
1046         return false;
1047     }
1048 
1049     /**
1050      * Scans External ID and return the public and system IDs.
1051      *
1052      * @param identifiers An array of size 2 to return the system id,
1053      *                    and public id (in that order).
1054      * @param optionalSystemId Specifies whether the system id is optional.
1055      *
1056      * <strong>Note:</strong> This method uses fString and fStringBuffer,
1057      * anything in them at the time of calling is lost.
1058      */
scanExternalID(String[] identifiers, boolean optionalSystemId)1059     protected void scanExternalID(String[] identifiers,
1060             boolean optionalSystemId)
1061             throws IOException, XNIException {
1062 
1063         String systemId = null;
1064         String publicId = null;
1065         if (fEntityScanner.skipString("PUBLIC")) {
1066             if (!fEntityScanner.skipSpaces()) {
1067                 reportFatalError("SpaceRequiredAfterPUBLIC", null);
1068             }
1069             scanPubidLiteral(fString);
1070             publicId = fString.toString();
1071 
1072             if (!fEntityScanner.skipSpaces() && !optionalSystemId) {
1073                 reportFatalError("SpaceRequiredBetweenPublicAndSystem", null);
1074             }
1075         }
1076 
1077         if (publicId != null || fEntityScanner.skipString("SYSTEM")) {
1078             if (publicId == null && !fEntityScanner.skipSpaces()) {
1079                 reportFatalError("SpaceRequiredAfterSYSTEM", null);
1080             }
1081             int quote = fEntityScanner.peekChar();
1082             if (quote != '\'' && quote != '"') {
1083                 if (publicId != null && optionalSystemId) {
1084                     // looks like we don't have any system id
1085                     // simply return the public id
1086                     identifiers[0] = null;
1087                     identifiers[1] = publicId;
1088                     return;
1089                 }
1090                 reportFatalError("QuoteRequiredInSystemID", null);
1091             }
1092             fEntityScanner.scanChar(null);
1093             XMLString ident = fString;
1094             if (fEntityScanner.scanLiteral(quote, ident, false) != quote) {
1095                 fStringBuffer.clear();
1096                 do {
1097                     fStringBuffer.append(ident);
1098                     int c = fEntityScanner.peekChar();
1099                     if (XMLChar.isMarkup(c) || c == ']') {
1100                         fStringBuffer.append((char)fEntityScanner.scanChar(null));
1101                     } else if (c != -1 && isInvalidLiteral(c)) {
1102                         reportFatalError("InvalidCharInSystemID",
1103                             new Object[] {Integer.toString(c, 16)});
1104                     }
1105                 } while (fEntityScanner.scanLiteral(quote, ident, false) != quote);
1106                 fStringBuffer.append(ident);
1107                 ident = fStringBuffer;
1108             }
1109             systemId = ident.toString();
1110             if (!fEntityScanner.skipChar(quote, null)) {
1111                 reportFatalError("SystemIDUnterminated", null);
1112             }
1113         }
1114 
1115         // store result in array
1116         identifiers[0] = systemId;
1117         identifiers[1] = publicId;
1118     }
1119 
1120 
1121     /**
1122      * Scans public ID literal.
1123      *
1124      * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1125      * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
1126      *
1127      * The returned string is normalized according to the following rule,
1128      * from http://www.w3.org/TR/REC-xml#dt-pubid:
1129      *
1130      * Before a match is attempted, all strings of white space in the public
1131      * identifier must be normalized to single space characters (#x20), and
1132      * leading and trailing white space must be removed.
1133      *
1134      * @param literal The string to fill in with the public ID literal.
1135      * @return True on success.
1136      *
1137      * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
1138      * the time of calling is lost.
1139      */
scanPubidLiteral(XMLString literal)1140     protected boolean scanPubidLiteral(XMLString literal)
1141     throws IOException, XNIException {
1142         int quote = fEntityScanner.scanChar(null);
1143         if (quote != '\'' && quote != '"') {
1144             reportFatalError("QuoteRequiredInPublicID", null);
1145             return false;
1146         }
1147 
1148         fStringBuffer.clear();
1149         // skip leading whitespace
1150         boolean skipSpace = true;
1151         boolean dataok = true;
1152         while (true) {
1153             int c = fEntityScanner.scanChar(null);
1154             if (c == ' ' || c == '\n' || c == '\r') {
1155                 if (!skipSpace) {
1156                     // take the first whitespace as a space and skip the others
1157                     fStringBuffer.append(' ');
1158                     skipSpace = true;
1159                 }
1160             } else if (c == quote) {
1161                 if (skipSpace) {
1162                     // if we finished on a space let's trim it
1163                     fStringBuffer.length--;
1164                 }
1165                 literal.setValues(fStringBuffer);
1166                 break;
1167             } else if (XMLChar.isPubid(c)) {
1168                 fStringBuffer.append((char)c);
1169                 skipSpace = false;
1170             } else if (c == -1) {
1171                 reportFatalError("PublicIDUnterminated", null);
1172                 return false;
1173             } else {
1174                 dataok = false;
1175                 reportFatalError("InvalidCharInPublicID",
1176                         new Object[]{Integer.toHexString(c)});
1177             }
1178         }
1179         return dataok;
1180     }
1181 
1182 
1183     /**
1184      * Normalize whitespace in an XMLString converting all whitespace
1185      * characters to space characters.
1186      */
normalizeWhitespace(XMLString value)1187     protected void normalizeWhitespace(XMLString value) {
1188         int i=0;
1189         int j=0;
1190         int [] buff = fEntityScanner.whiteSpaceLookup;
1191         int buffLen = fEntityScanner.whiteSpaceLen;
1192         int end = value.offset + value.length;
1193         while(i < buffLen){
1194             j = buff[i];
1195             if(j < end ){
1196                 value.ch[j] = ' ';
1197             }
1198             i++;
1199         }
1200     }
1201 
1202     //
1203     // XMLEntityHandler methods
1204     //
1205 
1206     /**
1207      * This method notifies of the start of an entity. The document entity
1208      * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1209      * parameter entity names start with '%'; and general entities are just
1210      * specified by their name.
1211      *
1212      * @param name     The name of the entity.
1213      * @param identifier The resource identifier.
1214      * @param encoding The auto-detected IANA encoding name of the entity
1215      *                 stream. This value will be null in those situations
1216      *                 where the entity encoding is not auto-detected (e.g.
1217      *                 internal entities or a document entity that is
1218      *                 parsed from a java.io.Reader).
1219      *
1220      * @throws XNIException Thrown by handler to signal an error.
1221      */
startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs)1222     public void startEntity(String name,
1223             XMLResourceIdentifier identifier,
1224             String encoding, Augmentations augs) throws XNIException {
1225 
1226         // keep track of the entity depth
1227         fEntityDepth++;
1228         // must reset entity scanner
1229         fEntityScanner = fEntityManager.getEntityScanner();
1230         fEntityStore = fEntityManager.getEntityStore() ;
1231     } // startEntity(String,XMLResourceIdentifier,String)
1232 
1233     /**
1234      * This method notifies the end of an entity. The document entity has
1235      * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]"
1236      * parameter entity names start with '%'; and general entities are just
1237      * specified by their name.
1238      *
1239      * @param name The name of the entity.
1240      *
1241      * @throws XNIException Thrown by handler to signal an error.
1242      */
endEntity(String name, Augmentations augs)1243     public void endEntity(String name, Augmentations augs) throws IOException, XNIException {
1244         // keep track of the entity depth
1245         if (fEntityDepth > 0) {
1246             fEntityDepth--;
1247         }
1248     } // endEntity(String)
1249 
1250     /**
1251      * Scans a character reference and append the corresponding chars to the
1252      * specified buffer.
1253      *
1254      * <p>
1255      * <pre>
1256      * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1257      * </pre>
1258      *
1259      * <strong>Note:</strong> This method uses fStringBuffer, anything in it
1260      * at the time of calling is lost.
1261      *
1262      * @param buf the character buffer to append chars to
1263      * @param buf2 the character buffer to append non-normalized chars to
1264      *
1265      * @return the character value or (-1) on conversion failure
1266      */
scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)1267     protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)
1268     throws IOException, XNIException {
1269         int initLen = buf.length;
1270         // scan hexadecimal value
1271         boolean hex = false;
1272         if (fEntityScanner.skipChar('x', NameType.REFERENCE)) {
1273             if (buf2 != null) { buf2.append('x'); }
1274             hex = true;
1275             fStringBuffer3.clear();
1276             boolean digit = true;
1277 
1278             int c = fEntityScanner.peekChar();
1279             digit = (c >= '0' && c <= '9') ||
1280                     (c >= 'a' && c <= 'f') ||
1281                     (c >= 'A' && c <= 'F');
1282             if (digit) {
1283                 if (buf2 != null) { buf2.append((char)c); }
1284                 fEntityScanner.scanChar(NameType.REFERENCE);
1285                 fStringBuffer3.append((char)c);
1286 
1287                 do {
1288                     c = fEntityScanner.peekChar();
1289                     digit = (c >= '0' && c <= '9') ||
1290                             (c >= 'a' && c <= 'f') ||
1291                             (c >= 'A' && c <= 'F');
1292                     if (digit) {
1293                         if (buf2 != null) { buf2.append((char)c); }
1294                         fEntityScanner.scanChar(NameType.REFERENCE);
1295                         fStringBuffer3.append((char)c);
1296                     }
1297                 } while (digit);
1298             } else {
1299                 reportFatalError("HexdigitRequiredInCharRef", null);
1300             }
1301         }
1302 
1303         // scan decimal value
1304         else {
1305             fStringBuffer3.clear();
1306             boolean digit = true;
1307 
1308             int c = fEntityScanner.peekChar();
1309             digit = c >= '0' && c <= '9';
1310             if (digit) {
1311                 if (buf2 != null) { buf2.append((char)c); }
1312                 fEntityScanner.scanChar(NameType.REFERENCE);
1313                 fStringBuffer3.append((char)c);
1314 
1315                 do {
1316                     c = fEntityScanner.peekChar();
1317                     digit = c >= '0' && c <= '9';
1318                     if (digit) {
1319                         if (buf2 != null) { buf2.append((char)c); }
1320                         fEntityScanner.scanChar(NameType.REFERENCE);
1321                         fStringBuffer3.append((char)c);
1322                     }
1323                 } while (digit);
1324             } else {
1325                 reportFatalError("DigitRequiredInCharRef", null);
1326             }
1327         }
1328 
1329         // end
1330         if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) {
1331             reportFatalError("SemicolonRequiredInCharRef", null);
1332         }
1333         if (buf2 != null) { buf2.append(';'); }
1334 
1335         // convert string to number
1336         int value = -1;
1337         try {
1338             value = Integer.parseInt(fStringBuffer3.toString(),
1339                     hex ? 16 : 10);
1340 
1341             // character reference must be a valid XML character
1342             if (isInvalid(value)) {
1343                 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
1344                 if (hex) errorBuf.append('x');
1345                 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1346                 reportFatalError("InvalidCharRef",
1347                         new Object[]{errorBuf.toString()});
1348             }
1349         } catch (NumberFormatException e) {
1350             // Conversion failed, let -1 value drop through.
1351             // If we end up here, the character reference was invalid.
1352             StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1);
1353             if (hex) errorBuf.append('x');
1354             errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length);
1355             reportFatalError("InvalidCharRef",
1356                     new Object[]{errorBuf.toString()});
1357         }
1358 
1359         // append corresponding chars to the given buffer
1360         if (!XMLChar.isSupplemental(value)) {
1361             buf.append((char) value);
1362         } else {
1363             // character is supplemental, split it into surrogate chars
1364             buf.append(XMLChar.highSurrogate(value));
1365             buf.append(XMLChar.lowSurrogate(value));
1366         }
1367 
1368         // char refs notification code
1369         if (fNotifyCharRefs && value != -1) {
1370             String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString();
1371             if (!fScanningAttribute) {
1372                 fCharRefLiteral = literal;
1373             }
1374         }
1375 
1376         if (fEntityScanner.fCurrentEntity.isGE) {
1377             checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, buf.length - initLen);
1378         }
1379         return value;
1380     }
1381     // returns true if the given character is not
1382     // valid with respect to the version of
1383     // XML understood by this scanner.
isInvalid(int value)1384     protected boolean isInvalid(int value) {
1385         return (XMLChar.isInvalid(value));
1386     } // isInvalid(int):  boolean
1387 
1388     // returns true if the given character is not
1389     // valid or may not be used outside a character reference
1390     // with respect to the version of XML understood by this scanner.
isInvalidLiteral(int value)1391     protected boolean isInvalidLiteral(int value) {
1392         return (XMLChar.isInvalid(value));
1393     } // isInvalidLiteral(int):  boolean
1394 
1395     // returns true if the given character is
1396     // a valid nameChar with respect to the version of
1397     // XML understood by this scanner.
isValidNameChar(int value)1398     protected boolean isValidNameChar(int value) {
1399         return (XMLChar.isName(value));
1400     } // isValidNameChar(int):  boolean
1401 
1402     // returns true if the given character is
1403     // a valid NCName character with respect to the version of
1404     // XML understood by this scanner.
isValidNCName(int value)1405     protected boolean isValidNCName(int value) {
1406         return (XMLChar.isNCName(value));
1407     } // isValidNCName(int):  boolean
1408 
1409     // returns true if the given character is
1410     // a valid nameStartChar with respect to the version of
1411     // XML understood by this scanner.
isValidNameStartChar(int value)1412     protected boolean isValidNameStartChar(int value) {
1413         return (XMLChar.isNameStart(value));
1414     } // isValidNameStartChar(int):  boolean
1415 
1416     // returns true if the given character is
1417     // a valid high surrogate for a nameStartChar
1418     // with respect to the version of XML understood
1419     // by this scanner.
isValidNameStartHighSurrogate(int value)1420     protected boolean isValidNameStartHighSurrogate(int value) {
1421         return false;
1422     } // isValidNameStartHighSurrogate(int):  boolean
1423 
versionSupported(String version )1424     protected boolean versionSupported(String version ) {
1425         return version.equals("1.0") || version.equals("1.1");
1426     } // version Supported
1427 
1428     /**
1429      * Scans surrogates and append them to the specified buffer.
1430      * <p>
1431      * <strong>Note:</strong> This assumes the current char has already been
1432      * identified as a high surrogate.
1433      *
1434      * @param buf The StringBuffer to append the read surrogates to.
1435      * @return True if it succeeded.
1436      */
scanSurrogates(XMLStringBuffer buf)1437     protected boolean scanSurrogates(XMLStringBuffer buf)
1438     throws IOException, XNIException {
1439 
1440         int high = fEntityScanner.scanChar(null);
1441         int low = fEntityScanner.peekChar();
1442         if (!XMLChar.isLowSurrogate(low)) {
1443             reportFatalError("InvalidCharInContent",
1444                     new Object[] {Integer.toString(high, 16)});
1445                     return false;
1446         }
1447         fEntityScanner.scanChar(null);
1448 
1449         // convert surrogates to supplemental character
1450         int c = XMLChar.supplemental((char)high, (char)low);
1451 
1452         // supplemental character must be a valid XML character
1453         if (isInvalid(c)) {
1454             reportFatalError("InvalidCharInContent",
1455                     new Object[]{Integer.toString(c, 16)});
1456                     return false;
1457         }
1458 
1459         // fill in the buffer
1460         buf.append((char)high);
1461         buf.append((char)low);
1462 
1463         return true;
1464 
1465     } // scanSurrogates():boolean
1466 
1467 
1468     /**
1469      * Convenience function used in all XML scanners.
1470      */
reportFatalError(String msgId, Object[] args)1471     protected void reportFatalError(String msgId, Object[] args)
1472     throws XNIException {
1473         fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,
1474                 msgId, args,
1475                 XMLErrorReporter.SEVERITY_FATAL_ERROR);
1476     }
1477 
1478     // private methods
init()1479     private void init() {
1480         // initialize scanner
1481         fEntityScanner = null;
1482         // initialize vars
1483         fEntityDepth = 0;
1484         fReportEntity = true;
1485         fResourceIdentifier.clear();
1486 
1487         if(!fAttributeCacheInitDone){
1488             for(int i = 0; i < initialCacheCount; i++){
1489                 attributeValueCache.add(new XMLString());
1490                 stringBufferCache.add(new XMLStringBuffer());
1491             }
1492             fAttributeCacheInitDone = true;
1493         }
1494         fStringBufferIndex = 0;
1495         fAttributeCacheUsedCount = 0;
1496 
1497     }
1498 
getStringBuffer()1499     XMLStringBuffer getStringBuffer(){
1500         if((fStringBufferIndex < initialCacheCount )|| (fStringBufferIndex < stringBufferCache.size())){
1501             return stringBufferCache.get(fStringBufferIndex++);
1502         }else{
1503             XMLStringBuffer tmpObj = new XMLStringBuffer();
1504             fStringBufferIndex++;
1505             stringBufferCache.add(tmpObj);
1506             return tmpObj;
1507         }
1508     }
1509 
1510     /**
1511      * Add the count of the content buffer and check if the accumulated
1512      * value exceeds the limit
1513      * @param isPEDecl a flag to indicate whether the entity is parameter
1514      * @param entityName entity name
1515      * @param buffer content buffer
1516      */
checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer)1517     void checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer) {
1518         checkEntityLimit(isPEDecl, entityName, buffer.length);
1519     }
1520 
1521     /**
1522      * Add the count and check limit
1523      * @param isPEDecl a flag to indicate whether the entity is parameter
1524      * @param entityName entity name
1525      * @param len length of the buffer
1526      */
checkEntityLimit(boolean isPEDecl, String entityName, int len)1527     void checkEntityLimit(boolean isPEDecl, String entityName, int len) {
1528         if (fLimitAnalyzer == null) {
1529             fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
1530         }
1531         if (isPEDecl) {
1532             fLimitAnalyzer.addValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, "%" + entityName, len);
1533             if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
1534                         fSecurityManager.debugPrint(fLimitAnalyzer);
1535                 reportFatalError("MaxEntitySizeLimit", new Object[]{"%" + entityName,
1536                     fLimitAnalyzer.getValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT),
1537                     fSecurityManager.getLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT),
1538                     fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT)});
1539             }
1540         } else {
1541             fLimitAnalyzer.addValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, entityName, len);
1542             if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
1543                         fSecurityManager.debugPrint(fLimitAnalyzer);
1544                 reportFatalError("MaxEntitySizeLimit", new Object[]{entityName,
1545                     fLimitAnalyzer.getValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT),
1546                     fSecurityManager.getLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT),
1547                     fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT)});
1548             }
1549         }
1550         if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
1551             fSecurityManager.debugPrint(fLimitAnalyzer);
1552             reportFatalError("TotalEntitySizeLimit",
1553                 new Object[]{fLimitAnalyzer.getTotalValue(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT),
1554                 fSecurityManager.getLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT),
1555                 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT)});
1556         }
1557     }
1558 } // class XMLScanner
1559