1 /* 2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl; 23 24 import com.sun.org.apache.xerces.internal.util.Status; 25 import com.sun.xml.internal.stream.XMLEntityStorage; 26 import java.io.IOException; 27 import java.util.ArrayList; 28 import javax.xml.stream.events.XMLEvent; 29 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 30 import com.sun.org.apache.xerces.internal.util.SymbolTable; 31 import com.sun.org.apache.xerces.internal.util.XMLChar; 32 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 33 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 34 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 35 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 36 import com.sun.org.apache.xerces.internal.xni.Augmentations; 37 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 38 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 39 import com.sun.org.apache.xerces.internal.xni.XMLString; 40 import com.sun.org.apache.xerces.internal.xni.XNIException; 41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 42 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 43 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 44 import com.sun.xml.internal.stream.Entity; 45 46 //import com.sun.xml.stream.XMLEntityManager; 47 //import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter; 48 49 /** 50 * This class is responsible for holding scanning methods common to 51 * scanning the XML document structure and content as well as the DTD 52 * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit 53 * from this base class. 54 * 55 * <p> 56 * This component requires the following features and properties from the 57 * component manager that uses it: 58 * <ul> 59 * <li>http://xml.org/sax/features/validation</li> 60 * <li>http://apache.org/xml/features/scanner/notify-char-refs</li> 61 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 62 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 63 * <li>http://apache.org/xml/properties/internal/entity-manager</li> 64 * </ul> 65 * 66 * @author Andy Clark, IBM 67 * @author Arnaud Le Hors, IBM 68 * @author Eric Ye, IBM 69 * @author K.Venugopal SUN Microsystems 70 * @author Sunitha Reddy, SUN Microsystems 71 * @version $Id: XMLScanner.java,v 1.12 2010-11-01 04:39:41 joehw Exp $ 72 * @LastModified: Feb 2020 73 */ 74 public abstract class XMLScanner 75 implements XMLComponent { 76 77 // 78 // Constants 79 // 80 81 // feature identifiers 82 83 /** Feature identifier: namespaces. */ 84 protected static final String NAMESPACES = 85 Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE; 86 87 /** Feature identifier: validation. */ 88 protected static final String VALIDATION = 89 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 90 91 /** Feature identifier: notify character references. */ 92 protected static final String NOTIFY_CHAR_REFS = 93 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE; 94 95 // property identifiers 96 97 protected static final String PARSER_SETTINGS = 98 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 99 /** Property identifier: symbol table. */ 100 protected static final String SYMBOL_TABLE = 101 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 102 103 /** Property identifier: error reporter. */ 104 protected static final String ERROR_REPORTER = 105 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 106 107 /** Property identifier: entity manager. */ 108 protected static final String ENTITY_MANAGER = 109 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY; 110 111 /** Property identifier: Security manager. */ 112 private static final String SECURITY_MANAGER = Constants.SECURITY_MANAGER; 113 114 // debugging 115 116 /** Debug attribute normalization. */ 117 protected static final boolean DEBUG_ATTR_NORMALIZATION = false; 118 119 /** 120 * Type of names 121 */ 122 public static enum NameType { 123 ATTRIBUTE("attribute"), 124 ATTRIBUTENAME("attribute name"), 125 COMMENT("comment"), 126 DOCTYPE("doctype"), 127 ELEMENTSTART("startelement"), 128 ELEMENTEND("endelement"), 129 ENTITY("entity"), 130 NOTATION("notation"), 131 PI("pi"), 132 REFERENCE("reference"); 133 134 final String literal; NameType(String literal)135 NameType(String literal) { 136 this.literal = literal; 137 } 138 literal()139 String literal() { 140 return literal; 141 } 142 } 143 144 //xxx: setting the default value as false, as we dont need to calculate this value 145 //we should have a feature when set to true computes this value 146 private boolean fNeedNonNormalizedValue = false; 147 148 protected ArrayList<XMLString> attributeValueCache = new ArrayList<>(); 149 protected ArrayList<XMLStringBuffer> stringBufferCache = new ArrayList<>(); 150 protected int fStringBufferIndex = 0; 151 protected boolean fAttributeCacheInitDone = false; 152 protected int fAttributeCacheUsedCount = 0; 153 154 // 155 // Data 156 // 157 158 // features 159 160 /** 161 * Validation. This feature identifier is: 162 * http://xml.org/sax/features/validation 163 */ 164 protected boolean fValidation = false; 165 166 /** Namespaces. */ 167 protected boolean fNamespaces; 168 169 /** Character references notification. */ 170 protected boolean fNotifyCharRefs = false; 171 172 /** Internal parser-settings feature */ 173 protected boolean fParserSettings = true; 174 175 // properties 176 177 protected PropertyManager fPropertyManager = null ; 178 /** Symbol table. */ 179 protected SymbolTable fSymbolTable; 180 181 /** Error reporter. */ 182 protected XMLErrorReporter fErrorReporter; 183 184 /** Entity manager. */ 185 //protected XMLEntityManager fEntityManager = PropertyManager.getEntityManager(); 186 protected XMLEntityManager fEntityManager = null ; 187 188 /** xxx this should be available from EntityManager Entity storage */ 189 protected XMLEntityStorage fEntityStore = null ; 190 191 /** Security manager. */ 192 protected XMLSecurityManager fSecurityManager = null; 193 194 /** Limit analyzer. */ 195 protected XMLLimitAnalyzer fLimitAnalyzer = null; 196 197 // protected data 198 199 /** event type */ 200 protected XMLEvent fEvent ; 201 202 /** Entity scanner, this always works on last entity that was opened. */ 203 protected XMLEntityScanner fEntityScanner = null; 204 205 /** Entity depth. */ 206 protected int fEntityDepth; 207 208 /** Literal value of the last character reference scanned. */ 209 protected String fCharRefLiteral = null; 210 211 /** Scanning attribute. */ 212 protected boolean fScanningAttribute; 213 214 /** Report entity boundary. */ 215 protected boolean fReportEntity; 216 217 // symbols 218 219 /** Symbol: "version". */ 220 protected final static String fVersionSymbol = "version".intern(); 221 222 /** Symbol: "encoding". */ 223 protected final static String fEncodingSymbol = "encoding".intern(); 224 225 /** Symbol: "standalone". */ 226 protected final static String fStandaloneSymbol = "standalone".intern(); 227 228 /** Symbol: "amp". */ 229 protected final static String fAmpSymbol = "amp".intern(); 230 231 /** Symbol: "lt". */ 232 protected final static String fLtSymbol = "lt".intern(); 233 234 /** Symbol: "gt". */ 235 protected final static String fGtSymbol = "gt".intern(); 236 237 /** Symbol: "quot". */ 238 protected final static String fQuotSymbol = "quot".intern(); 239 240 /** Symbol: "apos". */ 241 protected final static String fAposSymbol = "apos".intern(); 242 243 // temporary variables 244 245 // NOTE: These objects are private to help prevent accidental modification 246 // of values by a subclass. If there were protected *and* the sub- 247 // modified the values, it would be difficult to track down the real 248 // cause of the bug. By making these private, we avoid this 249 // possibility. 250 251 /** String. */ 252 private XMLString fString = new XMLString(); 253 254 /** String buffer. */ 255 private XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 256 257 /** String buffer. */ 258 private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 259 260 /** String buffer. */ 261 private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer(); 262 263 // temporary location for Resource identification information. 264 protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 265 int initialCacheCount = 6; 266 // 267 // XMLComponent methods 268 // 269 270 /** 271 * 272 * 273 * @param componentManager The component manager. 274 * 275 * @throws SAXException Throws exception if required features and 276 * properties cannot be found. 277 */ reset(XMLComponentManager componentManager)278 public void reset(XMLComponentManager componentManager) 279 throws XMLConfigurationException { 280 281 fParserSettings = componentManager.getFeature(PARSER_SETTINGS, true); 282 283 if (!fParserSettings) { 284 // parser settings have not been changed 285 init(); 286 return; 287 } 288 289 290 // Xerces properties 291 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 292 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 293 fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER); 294 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER); 295 296 //this step is extra because we have separated the storage of entity 297 fEntityStore = fEntityManager.getEntityStore() ; 298 299 // sax features 300 fValidation = componentManager.getFeature(VALIDATION, false); 301 fNamespaces = componentManager.getFeature(NAMESPACES, true); 302 fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS, false); 303 304 init(); 305 } // reset(XMLComponentManager) 306 setPropertyManager(PropertyManager propertyManager)307 protected void setPropertyManager(PropertyManager propertyManager){ 308 fPropertyManager = propertyManager ; 309 } 310 311 /** 312 * Sets the value of a property during parsing. 313 * 314 * @param propertyId 315 * @param value 316 */ setProperty(String propertyId, Object value)317 public void setProperty(String propertyId, Object value) 318 throws XMLConfigurationException { 319 320 // Xerces properties 321 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 322 String property = 323 propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 324 if (property.equals(Constants.SYMBOL_TABLE_PROPERTY)) { 325 fSymbolTable = (SymbolTable)value; 326 } else if (property.equals(Constants.ERROR_REPORTER_PROPERTY)) { 327 fErrorReporter = (XMLErrorReporter)value; 328 } else if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 329 fEntityManager = (XMLEntityManager)value; 330 } 331 } 332 333 if (propertyId.equals(SECURITY_MANAGER)) { 334 fSecurityManager = (XMLSecurityManager)value; 335 } 336 /*else if(propertyId.equals(Constants.STAX_PROPERTIES)){ 337 fStaxProperties = (HashMap)value; 338 //TODO::discuss with neeraj what are his thoughts on passing properties. 339 //For now use this 340 }*/ 341 342 } // setProperty(String,Object) 343 344 /* 345 * Sets the feature of the scanner. 346 */ setFeature(String featureId, boolean value)347 public void setFeature(String featureId, boolean value) 348 throws XMLConfigurationException { 349 350 if (VALIDATION.equals(featureId)) { 351 fValidation = value; 352 } else if (NOTIFY_CHAR_REFS.equals(featureId)) { 353 fNotifyCharRefs = value; 354 } 355 } 356 357 /* 358 * Gets the state of the feature of the scanner. 359 */ getFeature(String featureId)360 public boolean getFeature(String featureId) 361 throws XMLConfigurationException { 362 363 if (VALIDATION.equals(featureId)) { 364 return fValidation; 365 } else if (NOTIFY_CHAR_REFS.equals(featureId)) { 366 return fNotifyCharRefs; 367 } 368 throw new XMLConfigurationException(Status.NOT_RECOGNIZED, featureId); 369 } 370 371 // 372 // Protected methods 373 // 374 375 // anybody calling this had better have set Symtoltable! reset()376 protected void reset() { 377 init(); 378 379 // DTD preparsing defaults: 380 fValidation = true; 381 fNotifyCharRefs = false; 382 383 } 384 reset(PropertyManager propertyManager)385 public void reset(PropertyManager propertyManager) { 386 init(); 387 // Xerces properties 388 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 389 390 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 391 392 fEntityManager = (XMLEntityManager)propertyManager.getProperty(ENTITY_MANAGER); 393 fEntityStore = fEntityManager.getEntityStore() ; 394 fEntityScanner = (XMLEntityScanner)fEntityManager.getEntityScanner() ; 395 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER); 396 397 //fEntityManager.reset(); 398 // DTD preparsing defaults: 399 fValidation = false; 400 fNotifyCharRefs = false; 401 402 } 403 // common scanning methods 404 405 /** 406 * Scans an XML or text declaration. 407 * <p> 408 * <pre> 409 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 410 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 411 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 412 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 413 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 414 * | ('"' ('yes' | 'no') '"')) 415 * 416 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 417 * </pre> 418 * 419 * @param scanningTextDecl True if a text declaration is to 420 * be scanned instead of an XML 421 * declaration. 422 * @param pseudoAttributeValues An array of size 3 to return the version, 423 * encoding and standalone pseudo attribute values 424 * (in that order). 425 * 426 * <strong>Note:</strong> This method uses fString, anything in it 427 * at the time of calling is lost. 428 */ scanXMLDeclOrTextDecl(boolean scanningTextDecl, String[] pseudoAttributeValues)429 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl, 430 String[] pseudoAttributeValues) 431 throws IOException, XNIException { 432 433 // pseudo-attribute values 434 String version = null; 435 String encoding = null; 436 String standalone = null; 437 438 // scan pseudo-attributes 439 final int STATE_VERSION = 0; 440 final int STATE_ENCODING = 1; 441 final int STATE_STANDALONE = 2; 442 final int STATE_DONE = 3; 443 int state = STATE_VERSION; 444 445 boolean dataFoundForTarget = false; 446 boolean sawSpace = fEntityScanner.skipSpaces(); 447 // since pseudoattributes are *not* attributes, 448 // their quotes don't need to be preserved in external parameter entities. 449 // the XMLEntityScanner#scanLiteral method will continue to 450 // emit -1 in such cases when it finds a quote; this is 451 // fine for other methods that parse scanned entities, 452 // but not for the scanning of pseudoattributes. So, 453 // temporarily, we must mark the current entity as not being "literal" 454 Entity.ScannedEntity currEnt = fEntityManager.getCurrentEntity(); 455 boolean currLiteral = currEnt.literal; 456 currEnt.literal = false; 457 while (fEntityScanner.peekChar() != '?') { 458 dataFoundForTarget = true; 459 String name = scanPseudoAttribute(scanningTextDecl, fString); 460 switch (state) { 461 case STATE_VERSION: { 462 if (name.equals(fVersionSymbol)) { 463 if (!sawSpace) { 464 reportFatalError(scanningTextDecl 465 ? "SpaceRequiredBeforeVersionInTextDecl" 466 : "SpaceRequiredBeforeVersionInXMLDecl", 467 null); 468 } 469 version = fString.toString(); 470 state = STATE_ENCODING; 471 if (!versionSupported(version)) { 472 reportFatalError("VersionNotSupported", 473 new Object[]{version}); 474 } 475 476 if (version.equals("1.1")) { 477 Entity.ScannedEntity top = fEntityManager.getTopLevelEntity(); 478 if (top != null && (top.version == null || top.version.equals("1.0"))) { 479 reportFatalError("VersionMismatch", null); 480 } 481 fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1); 482 } 483 484 } else if (name.equals(fEncodingSymbol)) { 485 if (!scanningTextDecl) { 486 reportFatalError("VersionInfoRequired", null); 487 } 488 if (!sawSpace) { 489 reportFatalError(scanningTextDecl 490 ? "SpaceRequiredBeforeEncodingInTextDecl" 491 : "SpaceRequiredBeforeEncodingInXMLDecl", 492 null); 493 } 494 encoding = fString.toString(); 495 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; 496 } else { 497 if (scanningTextDecl) { 498 reportFatalError("EncodingDeclRequired", null); 499 } else { 500 reportFatalError("VersionInfoRequired", null); 501 } 502 } 503 break; 504 } 505 case STATE_ENCODING: { 506 if (name.equals(fEncodingSymbol)) { 507 if (!sawSpace) { 508 reportFatalError(scanningTextDecl 509 ? "SpaceRequiredBeforeEncodingInTextDecl" 510 : "SpaceRequiredBeforeEncodingInXMLDecl", 511 null); 512 } 513 encoding = fString.toString(); 514 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; 515 // TODO: check encoding name; set encoding on 516 // entity scanner 517 } else if (!scanningTextDecl && name.equals(fStandaloneSymbol)) { 518 if (!sawSpace) { 519 reportFatalError("SpaceRequiredBeforeStandalone", 520 null); 521 } 522 standalone = fString.toString(); 523 state = STATE_DONE; 524 if (!standalone.equals("yes") && !standalone.equals("no")) { 525 reportFatalError("SDDeclInvalid", new Object[] {standalone}); 526 } 527 } else { 528 reportFatalError("EncodingDeclRequired", null); 529 } 530 break; 531 } 532 case STATE_STANDALONE: { 533 if (name.equals(fStandaloneSymbol)) { 534 if (!sawSpace) { 535 reportFatalError("SpaceRequiredBeforeStandalone", 536 null); 537 } 538 standalone = fString.toString(); 539 state = STATE_DONE; 540 if (!standalone.equals("yes") && !standalone.equals("no")) { 541 reportFatalError("SDDeclInvalid", new Object[] {standalone}); 542 } 543 } else { 544 reportFatalError("SDDeclNameInvalid", null); 545 } 546 break; 547 } 548 default: { 549 reportFatalError("NoMorePseudoAttributes", null); 550 } 551 } 552 sawSpace = fEntityScanner.skipSpaces(); 553 } 554 // restore original literal value 555 if(currLiteral) { 556 currEnt.literal = true; 557 } 558 // REVISIT: should we remove this error reporting? 559 if (scanningTextDecl && state != STATE_DONE) { 560 reportFatalError("MorePseudoAttributes", null); 561 } 562 563 // If there is no data in the xml or text decl then we fail to report error 564 // for version or encoding info above. 565 if (scanningTextDecl) { 566 if (!dataFoundForTarget && encoding == null) { 567 reportFatalError("EncodingDeclRequired", null); 568 } 569 } else { 570 if (!dataFoundForTarget && version == null) { 571 reportFatalError("VersionInfoRequired", null); 572 } 573 } 574 575 // end 576 if (!fEntityScanner.skipChar('?', null)) { 577 reportFatalError("XMLDeclUnterminated", null); 578 } 579 if (!fEntityScanner.skipChar('>', null)) { 580 reportFatalError("XMLDeclUnterminated", null); 581 582 } 583 584 // fill in return array 585 pseudoAttributeValues[0] = version; 586 pseudoAttributeValues[1] = encoding; 587 pseudoAttributeValues[2] = standalone; 588 589 } // scanXMLDeclOrTextDecl(boolean) 590 591 /** 592 * Scans a pseudo attribute. 593 * 594 * @param scanningTextDecl True if scanning this pseudo-attribute for a 595 * TextDecl; false if scanning XMLDecl. This 596 * flag is needed to report the correct type of 597 * error. 598 * @param value The string to fill in with the attribute 599 * value. 600 * 601 * @return The name of the attribute 602 * 603 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 604 * at the time of calling is lost. 605 */ scanPseudoAttribute(boolean scanningTextDecl, XMLString value)606 protected String scanPseudoAttribute(boolean scanningTextDecl, 607 XMLString value) 608 throws IOException, XNIException { 609 610 String name = scanPseudoAttributeName(); 611 // XMLEntityManager.print(fEntityManager.getCurrentEntity()); 612 613 if (name == null) { 614 reportFatalError("PseudoAttrNameExpected", null); 615 } 616 fEntityScanner.skipSpaces(); 617 if (!fEntityScanner.skipChar('=', null)) { 618 reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl" 619 : "EqRequiredInXMLDecl", new Object[]{name}); 620 } 621 fEntityScanner.skipSpaces(); 622 int quote = fEntityScanner.peekChar(); 623 if (quote != '\'' && quote != '"') { 624 reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl" 625 : "QuoteRequiredInXMLDecl" , new Object[]{name}); 626 } 627 fEntityScanner.scanChar(NameType.ATTRIBUTE); 628 int c = fEntityScanner.scanLiteral(quote, value, false); 629 if (c != quote) { 630 fStringBuffer2.clear(); 631 do { 632 fStringBuffer2.append(value); 633 if (c != -1) { 634 if (c == '&' || c == '%' || c == '<' || c == ']') { 635 fStringBuffer2.append((char)fEntityScanner.scanChar(NameType.ATTRIBUTE)); 636 } else if (XMLChar.isHighSurrogate(c)) { 637 scanSurrogates(fStringBuffer2); 638 } else if (isInvalidLiteral(c)) { 639 String key = scanningTextDecl 640 ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl"; 641 reportFatalError(key, 642 new Object[] {Integer.toString(c, 16)}); 643 fEntityScanner.scanChar(null); 644 } 645 } 646 c = fEntityScanner.scanLiteral(quote, value, false); 647 } while (c != quote); 648 fStringBuffer2.append(value); 649 value.setValues(fStringBuffer2); 650 } 651 if (!fEntityScanner.skipChar(quote, null)) { 652 reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl" 653 : "CloseQuoteMissingInXMLDecl", 654 new Object[]{name}); 655 } 656 657 // return 658 return name; 659 660 } // scanPseudoAttribute(XMLString):String 661 662 /** 663 * Scans the name of a pseudo attribute. The only legal names 664 * in XML 1.0/1.1 documents are 'version', 'encoding' and 'standalone'. 665 * 666 * @return the name of the pseudo attribute or <code>null</code> 667 * if a legal pseudo attribute name could not be scanned. 668 */ scanPseudoAttributeName()669 private String scanPseudoAttributeName() throws IOException, XNIException { 670 final int ch = fEntityScanner.peekChar(); 671 switch (ch) { 672 case 'v': 673 if (fEntityScanner.skipString(fVersionSymbol)) { 674 return fVersionSymbol; 675 } 676 break; 677 case 'e': 678 if (fEntityScanner.skipString(fEncodingSymbol)) { 679 return fEncodingSymbol; 680 } 681 break; 682 case 's': 683 if (fEntityScanner.skipString(fStandaloneSymbol)) { 684 return fStandaloneSymbol; 685 } 686 break; 687 } 688 return null; 689 } // scanPseudoAttributeName() 690 691 /** 692 * Scans a processing instruction. 693 * <p> 694 * <pre> 695 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 696 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 697 * </pre> 698 */ 699 //CHANGED: 700 //EARLIER: scanPI() 701 //NOW: scanPI(XMLStringBuffer) 702 //it makes things more easy if XMLStringBUffer is passed. Motivation for this change is same 703 // as that for scanContent() 704 scanPI(XMLStringBuffer data)705 protected void scanPI(XMLStringBuffer data) throws IOException, XNIException { 706 707 // target 708 fReportEntity = false; 709 String target = fEntityScanner.scanName(NameType.PI); 710 if (target == null) { 711 reportFatalError("PITargetRequired", null); 712 } 713 714 // scan data 715 scanPIData(target, data); 716 fReportEntity = true; 717 718 } // scanPI(XMLStringBuffer) 719 720 /** 721 * Scans a processing data. This is needed to handle the situation 722 * where a document starts with a processing instruction whose 723 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 724 * 725 * This method would always read the whole data. We have while loop and data is buffered 726 * until delimeter is encountered. 727 * 728 * @param target The PI target 729 * @param data The string to fill in with the data 730 */ 731 732 //CHANGED: 733 //Earlier:This method uses the fStringBuffer and later buffer values are set to 734 //the supplied XMLString.... 735 //Now: Changed the signature of this function to pass XMLStringBuffer.. and data would 736 //be appended to that buffer 737 scanPIData(String target, XMLStringBuffer data)738 protected void scanPIData(String target, XMLStringBuffer data) 739 throws IOException, XNIException { 740 741 // check target 742 if (target.length() == 3) { 743 char c0 = Character.toLowerCase(target.charAt(0)); 744 char c1 = Character.toLowerCase(target.charAt(1)); 745 char c2 = Character.toLowerCase(target.charAt(2)); 746 if (c0 == 'x' && c1 == 'm' && c2 == 'l') { 747 reportFatalError("ReservedPITarget", null); 748 } 749 } 750 751 // spaces 752 if (!fEntityScanner.skipSpaces()) { 753 if (fEntityScanner.skipString("?>")) { 754 // we found the end, there is no data just return 755 return; 756 } else { 757 // if there is data there should be some space 758 reportFatalError("SpaceRequiredInPI", null); 759 } 760 } 761 762 // since scanData appends the parsed data to the buffer passed 763 // a while loop would append the whole of parsed data to the buffer(data:XMLStringBuffer) 764 //until all of the data is buffered. 765 if (fEntityScanner.scanData("?>", data)) { 766 do { 767 int c = fEntityScanner.peekChar(); 768 if (c != -1) { 769 if (XMLChar.isHighSurrogate(c)) { 770 scanSurrogates(data); 771 } else if (isInvalidLiteral(c)) { 772 reportFatalError("InvalidCharInPI", 773 new Object[]{Integer.toHexString(c)}); 774 fEntityScanner.scanChar(null); 775 } 776 } 777 } while (fEntityScanner.scanData("?>", data)); 778 } 779 780 } // scanPIData(String,XMLString) 781 782 /** 783 * Scans a comment. 784 * <p> 785 * <pre> 786 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 787 * </pre> 788 * <p> 789 * <strong>Note:</strong> Called after scanning past '<!--' 790 * <strong>Note:</strong> This method uses fString, anything in it 791 * at the time of calling is lost. 792 * 793 * @param text The buffer to fill in with the text. 794 */ scanComment(XMLStringBuffer text)795 protected void scanComment(XMLStringBuffer text) 796 throws IOException, XNIException { 797 798 //System.out.println( "XMLScanner#scanComment# In Scan Comment" ); 799 // text 800 // REVISIT: handle invalid character, eof 801 text.clear(); 802 while (fEntityScanner.scanData("--", text)) { 803 int c = fEntityScanner.peekChar(); 804 805 //System.out.println( "XMLScanner#scanComment#text.toString() == " + text.toString() ); 806 //System.out.println( "XMLScanner#scanComment#c == " + c ); 807 808 if (c != -1) { 809 if (XMLChar.isHighSurrogate(c)) { 810 scanSurrogates(text); 811 } 812 else if (isInvalidLiteral(c)) { 813 reportFatalError("InvalidCharInComment", 814 new Object[] { Integer.toHexString(c) }); 815 fEntityScanner.scanChar(NameType.COMMENT); 816 } 817 } 818 } 819 if (!fEntityScanner.skipChar('>', NameType.COMMENT)) { 820 reportFatalError("DashDashInComment", null); 821 } 822 823 } // scanComment() 824 825 /** 826 * Scans an attribute value and normalizes whitespace converting all 827 * whitespace characters to space characters. 828 * 829 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" 830 * 831 * @param value The XMLString to fill in with the value. 832 * @param nonNormalizedValue The XMLString to fill in with the 833 * non-normalized value. 834 * @param atName The name of the attribute being parsed (for error msgs). 835 * @param attributes The attributes list for the scanned attribute. 836 * @param attrIndex The index of the attribute to use from the list. 837 * @param checkEntities true if undeclared entities should be reported as VC violation, 838 * false if undeclared entities should be reported as WFC violation. 839 * @param eleName The name of element to which this attribute belongs. 840 * @param isNSURI a flag indicating whether the content is a Namespace URI 841 * 842 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 843 * at the time of calling is lost. 844 **/ scanAttributeValue(XMLString value, XMLString nonNormalizedValue, String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities, String eleName, boolean isNSURI)845 protected void scanAttributeValue(XMLString value, XMLString nonNormalizedValue, 846 String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities, 847 String eleName, boolean isNSURI) 848 throws IOException, XNIException { 849 XMLStringBuffer stringBuffer = null; 850 // quote 851 int quote = fEntityScanner.peekChar(); 852 if (quote != '\'' && quote != '"') { 853 reportFatalError("OpenQuoteExpected", new Object[]{eleName, atName}); 854 } 855 856 fEntityScanner.scanChar(NameType.ATTRIBUTE); 857 int entityDepth = fEntityDepth; 858 859 int c = fEntityScanner.scanLiteral(quote, value, isNSURI); 860 if (DEBUG_ATTR_NORMALIZATION) { 861 System.out.println("** scanLiteral -> \"" 862 + value.toString() + "\""); 863 } 864 if(fNeedNonNormalizedValue){ 865 fStringBuffer2.clear(); 866 fStringBuffer2.append(value); 867 } 868 if(fEntityScanner.whiteSpaceLen > 0) 869 normalizeWhitespace(value); 870 if (DEBUG_ATTR_NORMALIZATION) { 871 System.out.println("** normalizeWhitespace -> \"" 872 + value.toString() + "\""); 873 } 874 if (c != quote) { 875 fScanningAttribute = true; 876 stringBuffer = getStringBuffer(); 877 stringBuffer.clear(); 878 do { 879 stringBuffer.append(value); 880 if (DEBUG_ATTR_NORMALIZATION) { 881 System.out.println("** value2: \"" 882 + stringBuffer.toString() + "\""); 883 } 884 if (c == '&') { 885 fEntityScanner.skipChar('&', NameType.REFERENCE); 886 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) { 887 fStringBuffer2.append('&'); 888 } 889 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 890 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) { 891 fStringBuffer2.append('#'); 892 } 893 int ch ; 894 if (fNeedNonNormalizedValue) 895 ch = scanCharReferenceValue(stringBuffer, fStringBuffer2); 896 else 897 ch = scanCharReferenceValue(stringBuffer, null); 898 899 if (ch != -1) { 900 if (DEBUG_ATTR_NORMALIZATION) { 901 System.out.println("** value3: \"" 902 + stringBuffer.toString() 903 + "\""); 904 } 905 } 906 } else { 907 String entityName = fEntityScanner.scanName(NameType.ENTITY); 908 if (entityName == null) { 909 reportFatalError("NameRequiredInReference", null); 910 } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 911 fStringBuffer2.append(entityName); 912 } 913 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 914 reportFatalError("SemicolonRequiredInReference", 915 new Object []{entityName}); 916 } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 917 fStringBuffer2.append(';'); 918 } 919 if (resolveCharacter(entityName, stringBuffer)) { 920 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 921 } else { 922 if (fEntityStore.isExternalEntity(entityName)) { 923 reportFatalError("ReferenceToExternalEntity", 924 new Object[] { entityName }); 925 } else { 926 if (!fEntityStore.isDeclaredEntity(entityName)) { 927 //WFC & VC: Entity Declared 928 if (checkEntities) { 929 if (fValidation) { 930 fErrorReporter.reportError(fEntityScanner,XMLMessageFormatter.XML_DOMAIN, 931 "EntityNotDeclared", 932 new Object[]{entityName}, 933 XMLErrorReporter.SEVERITY_ERROR); 934 } 935 } else { 936 reportFatalError("EntityNotDeclared", 937 new Object[]{entityName}); 938 } 939 } 940 fEntityManager.startEntity(true, entityName, true); 941 } 942 } 943 } 944 } else if (c == '<') { 945 reportFatalError("LessthanInAttValue", 946 new Object[] { eleName, atName }); 947 fEntityScanner.scanChar(null); 948 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 949 fStringBuffer2.append((char)c); 950 } 951 } else if (c == '%' || c == ']') { 952 fEntityScanner.scanChar(null); 953 stringBuffer.append((char)c); 954 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 955 fStringBuffer2.append((char)c); 956 } 957 if (DEBUG_ATTR_NORMALIZATION) { 958 System.out.println("** valueF: \"" 959 + stringBuffer.toString() + "\""); 960 } 961 } else if (c == '\n' || c == '\r') { 962 fEntityScanner.scanChar(null); 963 stringBuffer.append(' '); 964 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 965 fStringBuffer2.append('\n'); 966 } 967 } else if (c != -1 && XMLChar.isHighSurrogate(c)) { 968 fStringBuffer3.clear(); 969 if (scanSurrogates(fStringBuffer3)) { 970 stringBuffer.append(fStringBuffer3); 971 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 972 fStringBuffer2.append(fStringBuffer3); 973 } 974 if (DEBUG_ATTR_NORMALIZATION) { 975 System.out.println("** valueI: \"" 976 + stringBuffer.toString() 977 + "\""); 978 } 979 } 980 } else if (c != -1 && isInvalidLiteral(c)) { 981 reportFatalError("InvalidCharInAttValue", 982 new Object[] {eleName, atName, Integer.toString(c, 16)}); 983 fEntityScanner.scanChar(null); 984 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 985 fStringBuffer2.append((char)c); 986 } 987 } 988 c = fEntityScanner.scanLiteral(quote, value, isNSURI); 989 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 990 fStringBuffer2.append(value); 991 } 992 if(fEntityScanner.whiteSpaceLen > 0) 993 normalizeWhitespace(value); 994 //Todo ::Move this check to Attributes , do conversion 995 //only if attribute is being accessed. -Venu 996 } while (c != quote || entityDepth != fEntityDepth); 997 stringBuffer.append(value); 998 if (DEBUG_ATTR_NORMALIZATION) { 999 System.out.println("** valueN: \"" 1000 + stringBuffer.toString() + "\""); 1001 } 1002 value.setValues(stringBuffer); 1003 fScanningAttribute = false; 1004 } 1005 if(fNeedNonNormalizedValue) 1006 nonNormalizedValue.setValues(fStringBuffer2); 1007 1008 // quote 1009 int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE); 1010 if (cquote != quote) { 1011 reportFatalError("CloseQuoteExpected", new Object[]{eleName, atName}); 1012 } 1013 } // scanAttributeValue() 1014 1015 1016 /** 1017 * Resolves character entity references. 1018 * @param entityName the name of the entity 1019 * @param stringBuffer the current XMLStringBuffer to append the character to. 1020 * @return true if resolved, false otherwise 1021 */ resolveCharacter(String entityName, XMLStringBuffer stringBuffer)1022 protected boolean resolveCharacter(String entityName, XMLStringBuffer stringBuffer) { 1023 /** 1024 * entityNames (symbols) are interned. The equals method would do the same, 1025 * but I'm leaving it as comparisons by references are common in the impl 1026 * and it made it explicit to others who read this code. 1027 */ 1028 if (entityName == fAmpSymbol) { 1029 stringBuffer.append('&'); 1030 return true; 1031 } else if (entityName == fAposSymbol) { 1032 stringBuffer.append('\''); 1033 return true; 1034 } else if (entityName == fLtSymbol) { 1035 stringBuffer.append('<'); 1036 return true; 1037 } else if (entityName == fGtSymbol) { 1038 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1039 stringBuffer.append('>'); 1040 return true; 1041 } else if (entityName == fQuotSymbol) { 1042 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1043 stringBuffer.append('"'); 1044 return true; 1045 } 1046 return false; 1047 } 1048 1049 /** 1050 * Scans External ID and return the public and system IDs. 1051 * 1052 * @param identifiers An array of size 2 to return the system id, 1053 * and public id (in that order). 1054 * @param optionalSystemId Specifies whether the system id is optional. 1055 * 1056 * <strong>Note:</strong> This method uses fString and fStringBuffer, 1057 * anything in them at the time of calling is lost. 1058 */ scanExternalID(String[] identifiers, boolean optionalSystemId)1059 protected void scanExternalID(String[] identifiers, 1060 boolean optionalSystemId) 1061 throws IOException, XNIException { 1062 1063 String systemId = null; 1064 String publicId = null; 1065 if (fEntityScanner.skipString("PUBLIC")) { 1066 if (!fEntityScanner.skipSpaces()) { 1067 reportFatalError("SpaceRequiredAfterPUBLIC", null); 1068 } 1069 scanPubidLiteral(fString); 1070 publicId = fString.toString(); 1071 1072 if (!fEntityScanner.skipSpaces() && !optionalSystemId) { 1073 reportFatalError("SpaceRequiredBetweenPublicAndSystem", null); 1074 } 1075 } 1076 1077 if (publicId != null || fEntityScanner.skipString("SYSTEM")) { 1078 if (publicId == null && !fEntityScanner.skipSpaces()) { 1079 reportFatalError("SpaceRequiredAfterSYSTEM", null); 1080 } 1081 int quote = fEntityScanner.peekChar(); 1082 if (quote != '\'' && quote != '"') { 1083 if (publicId != null && optionalSystemId) { 1084 // looks like we don't have any system id 1085 // simply return the public id 1086 identifiers[0] = null; 1087 identifiers[1] = publicId; 1088 return; 1089 } 1090 reportFatalError("QuoteRequiredInSystemID", null); 1091 } 1092 fEntityScanner.scanChar(null); 1093 XMLString ident = fString; 1094 if (fEntityScanner.scanLiteral(quote, ident, false) != quote) { 1095 fStringBuffer.clear(); 1096 do { 1097 fStringBuffer.append(ident); 1098 int c = fEntityScanner.peekChar(); 1099 if (XMLChar.isMarkup(c) || c == ']') { 1100 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 1101 } else if (c != -1 && isInvalidLiteral(c)) { 1102 reportFatalError("InvalidCharInSystemID", 1103 new Object[] {Integer.toString(c, 16)}); 1104 } 1105 } while (fEntityScanner.scanLiteral(quote, ident, false) != quote); 1106 fStringBuffer.append(ident); 1107 ident = fStringBuffer; 1108 } 1109 systemId = ident.toString(); 1110 if (!fEntityScanner.skipChar(quote, null)) { 1111 reportFatalError("SystemIDUnterminated", null); 1112 } 1113 } 1114 1115 // store result in array 1116 identifiers[0] = systemId; 1117 identifiers[1] = publicId; 1118 } 1119 1120 1121 /** 1122 * Scans public ID literal. 1123 * 1124 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 1125 * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 1126 * 1127 * The returned string is normalized according to the following rule, 1128 * from http://www.w3.org/TR/REC-xml#dt-pubid: 1129 * 1130 * Before a match is attempted, all strings of white space in the public 1131 * identifier must be normalized to single space characters (#x20), and 1132 * leading and trailing white space must be removed. 1133 * 1134 * @param literal The string to fill in with the public ID literal. 1135 * @return True on success. 1136 * 1137 * <strong>Note:</strong> This method uses fStringBuffer, anything in it at 1138 * the time of calling is lost. 1139 */ scanPubidLiteral(XMLString literal)1140 protected boolean scanPubidLiteral(XMLString literal) 1141 throws IOException, XNIException { 1142 int quote = fEntityScanner.scanChar(null); 1143 if (quote != '\'' && quote != '"') { 1144 reportFatalError("QuoteRequiredInPublicID", null); 1145 return false; 1146 } 1147 1148 fStringBuffer.clear(); 1149 // skip leading whitespace 1150 boolean skipSpace = true; 1151 boolean dataok = true; 1152 while (true) { 1153 int c = fEntityScanner.scanChar(null); 1154 if (c == ' ' || c == '\n' || c == '\r') { 1155 if (!skipSpace) { 1156 // take the first whitespace as a space and skip the others 1157 fStringBuffer.append(' '); 1158 skipSpace = true; 1159 } 1160 } else if (c == quote) { 1161 if (skipSpace) { 1162 // if we finished on a space let's trim it 1163 fStringBuffer.length--; 1164 } 1165 literal.setValues(fStringBuffer); 1166 break; 1167 } else if (XMLChar.isPubid(c)) { 1168 fStringBuffer.append((char)c); 1169 skipSpace = false; 1170 } else if (c == -1) { 1171 reportFatalError("PublicIDUnterminated", null); 1172 return false; 1173 } else { 1174 dataok = false; 1175 reportFatalError("InvalidCharInPublicID", 1176 new Object[]{Integer.toHexString(c)}); 1177 } 1178 } 1179 return dataok; 1180 } 1181 1182 1183 /** 1184 * Normalize whitespace in an XMLString converting all whitespace 1185 * characters to space characters. 1186 */ normalizeWhitespace(XMLString value)1187 protected void normalizeWhitespace(XMLString value) { 1188 int i=0; 1189 int j=0; 1190 int [] buff = fEntityScanner.whiteSpaceLookup; 1191 int buffLen = fEntityScanner.whiteSpaceLen; 1192 int end = value.offset + value.length; 1193 while(i < buffLen){ 1194 j = buff[i]; 1195 if(j < end ){ 1196 value.ch[j] = ' '; 1197 } 1198 i++; 1199 } 1200 } 1201 1202 // 1203 // XMLEntityHandler methods 1204 // 1205 1206 /** 1207 * This method notifies of the start of an entity. The document entity 1208 * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 1209 * parameter entity names start with '%'; and general entities are just 1210 * specified by their name. 1211 * 1212 * @param name The name of the entity. 1213 * @param identifier The resource identifier. 1214 * @param encoding The auto-detected IANA encoding name of the entity 1215 * stream. This value will be null in those situations 1216 * where the entity encoding is not auto-detected (e.g. 1217 * internal entities or a document entity that is 1218 * parsed from a java.io.Reader). 1219 * 1220 * @throws XNIException Thrown by handler to signal an error. 1221 */ startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs)1222 public void startEntity(String name, 1223 XMLResourceIdentifier identifier, 1224 String encoding, Augmentations augs) throws XNIException { 1225 1226 // keep track of the entity depth 1227 fEntityDepth++; 1228 // must reset entity scanner 1229 fEntityScanner = fEntityManager.getEntityScanner(); 1230 fEntityStore = fEntityManager.getEntityStore() ; 1231 } // startEntity(String,XMLResourceIdentifier,String) 1232 1233 /** 1234 * This method notifies the end of an entity. The document entity has 1235 * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 1236 * parameter entity names start with '%'; and general entities are just 1237 * specified by their name. 1238 * 1239 * @param name The name of the entity. 1240 * 1241 * @throws XNIException Thrown by handler to signal an error. 1242 */ endEntity(String name, Augmentations augs)1243 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 1244 // keep track of the entity depth 1245 if (fEntityDepth > 0) { 1246 fEntityDepth--; 1247 } 1248 } // endEntity(String) 1249 1250 /** 1251 * Scans a character reference and append the corresponding chars to the 1252 * specified buffer. 1253 * 1254 * <p> 1255 * <pre> 1256 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1257 * </pre> 1258 * 1259 * <strong>Note:</strong> This method uses fStringBuffer, anything in it 1260 * at the time of calling is lost. 1261 * 1262 * @param buf the character buffer to append chars to 1263 * @param buf2 the character buffer to append non-normalized chars to 1264 * 1265 * @return the character value or (-1) on conversion failure 1266 */ scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)1267 protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2) 1268 throws IOException, XNIException { 1269 int initLen = buf.length; 1270 // scan hexadecimal value 1271 boolean hex = false; 1272 if (fEntityScanner.skipChar('x', NameType.REFERENCE)) { 1273 if (buf2 != null) { buf2.append('x'); } 1274 hex = true; 1275 fStringBuffer3.clear(); 1276 boolean digit = true; 1277 1278 int c = fEntityScanner.peekChar(); 1279 digit = (c >= '0' && c <= '9') || 1280 (c >= 'a' && c <= 'f') || 1281 (c >= 'A' && c <= 'F'); 1282 if (digit) { 1283 if (buf2 != null) { buf2.append((char)c); } 1284 fEntityScanner.scanChar(NameType.REFERENCE); 1285 fStringBuffer3.append((char)c); 1286 1287 do { 1288 c = fEntityScanner.peekChar(); 1289 digit = (c >= '0' && c <= '9') || 1290 (c >= 'a' && c <= 'f') || 1291 (c >= 'A' && c <= 'F'); 1292 if (digit) { 1293 if (buf2 != null) { buf2.append((char)c); } 1294 fEntityScanner.scanChar(NameType.REFERENCE); 1295 fStringBuffer3.append((char)c); 1296 } 1297 } while (digit); 1298 } else { 1299 reportFatalError("HexdigitRequiredInCharRef", null); 1300 } 1301 } 1302 1303 // scan decimal value 1304 else { 1305 fStringBuffer3.clear(); 1306 boolean digit = true; 1307 1308 int c = fEntityScanner.peekChar(); 1309 digit = c >= '0' && c <= '9'; 1310 if (digit) { 1311 if (buf2 != null) { buf2.append((char)c); } 1312 fEntityScanner.scanChar(NameType.REFERENCE); 1313 fStringBuffer3.append((char)c); 1314 1315 do { 1316 c = fEntityScanner.peekChar(); 1317 digit = c >= '0' && c <= '9'; 1318 if (digit) { 1319 if (buf2 != null) { buf2.append((char)c); } 1320 fEntityScanner.scanChar(NameType.REFERENCE); 1321 fStringBuffer3.append((char)c); 1322 } 1323 } while (digit); 1324 } else { 1325 reportFatalError("DigitRequiredInCharRef", null); 1326 } 1327 } 1328 1329 // end 1330 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1331 reportFatalError("SemicolonRequiredInCharRef", null); 1332 } 1333 if (buf2 != null) { buf2.append(';'); } 1334 1335 // convert string to number 1336 int value = -1; 1337 try { 1338 value = Integer.parseInt(fStringBuffer3.toString(), 1339 hex ? 16 : 10); 1340 1341 // character reference must be a valid XML character 1342 if (isInvalid(value)) { 1343 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1); 1344 if (hex) errorBuf.append('x'); 1345 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length); 1346 reportFatalError("InvalidCharRef", 1347 new Object[]{errorBuf.toString()}); 1348 } 1349 } catch (NumberFormatException e) { 1350 // Conversion failed, let -1 value drop through. 1351 // If we end up here, the character reference was invalid. 1352 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1); 1353 if (hex) errorBuf.append('x'); 1354 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length); 1355 reportFatalError("InvalidCharRef", 1356 new Object[]{errorBuf.toString()}); 1357 } 1358 1359 // append corresponding chars to the given buffer 1360 if (!XMLChar.isSupplemental(value)) { 1361 buf.append((char) value); 1362 } else { 1363 // character is supplemental, split it into surrogate chars 1364 buf.append(XMLChar.highSurrogate(value)); 1365 buf.append(XMLChar.lowSurrogate(value)); 1366 } 1367 1368 // char refs notification code 1369 if (fNotifyCharRefs && value != -1) { 1370 String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString(); 1371 if (!fScanningAttribute) { 1372 fCharRefLiteral = literal; 1373 } 1374 } 1375 1376 if (fEntityScanner.fCurrentEntity.isGE) { 1377 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, buf.length - initLen); 1378 } 1379 return value; 1380 } 1381 // returns true if the given character is not 1382 // valid with respect to the version of 1383 // XML understood by this scanner. isInvalid(int value)1384 protected boolean isInvalid(int value) { 1385 return (XMLChar.isInvalid(value)); 1386 } // isInvalid(int): boolean 1387 1388 // returns true if the given character is not 1389 // valid or may not be used outside a character reference 1390 // with respect to the version of XML understood by this scanner. isInvalidLiteral(int value)1391 protected boolean isInvalidLiteral(int value) { 1392 return (XMLChar.isInvalid(value)); 1393 } // isInvalidLiteral(int): boolean 1394 1395 // returns true if the given character is 1396 // a valid nameChar with respect to the version of 1397 // XML understood by this scanner. isValidNameChar(int value)1398 protected boolean isValidNameChar(int value) { 1399 return (XMLChar.isName(value)); 1400 } // isValidNameChar(int): boolean 1401 1402 // returns true if the given character is 1403 // a valid NCName character with respect to the version of 1404 // XML understood by this scanner. isValidNCName(int value)1405 protected boolean isValidNCName(int value) { 1406 return (XMLChar.isNCName(value)); 1407 } // isValidNCName(int): boolean 1408 1409 // returns true if the given character is 1410 // a valid nameStartChar with respect to the version of 1411 // XML understood by this scanner. isValidNameStartChar(int value)1412 protected boolean isValidNameStartChar(int value) { 1413 return (XMLChar.isNameStart(value)); 1414 } // isValidNameStartChar(int): boolean 1415 1416 // returns true if the given character is 1417 // a valid high surrogate for a nameStartChar 1418 // with respect to the version of XML understood 1419 // by this scanner. isValidNameStartHighSurrogate(int value)1420 protected boolean isValidNameStartHighSurrogate(int value) { 1421 return false; 1422 } // isValidNameStartHighSurrogate(int): boolean 1423 versionSupported(String version )1424 protected boolean versionSupported(String version ) { 1425 return version.equals("1.0") || version.equals("1.1"); 1426 } // version Supported 1427 1428 /** 1429 * Scans surrogates and append them to the specified buffer. 1430 * <p> 1431 * <strong>Note:</strong> This assumes the current char has already been 1432 * identified as a high surrogate. 1433 * 1434 * @param buf The StringBuffer to append the read surrogates to. 1435 * @return True if it succeeded. 1436 */ scanSurrogates(XMLStringBuffer buf)1437 protected boolean scanSurrogates(XMLStringBuffer buf) 1438 throws IOException, XNIException { 1439 1440 int high = fEntityScanner.scanChar(null); 1441 int low = fEntityScanner.peekChar(); 1442 if (!XMLChar.isLowSurrogate(low)) { 1443 reportFatalError("InvalidCharInContent", 1444 new Object[] {Integer.toString(high, 16)}); 1445 return false; 1446 } 1447 fEntityScanner.scanChar(null); 1448 1449 // convert surrogates to supplemental character 1450 int c = XMLChar.supplemental((char)high, (char)low); 1451 1452 // supplemental character must be a valid XML character 1453 if (isInvalid(c)) { 1454 reportFatalError("InvalidCharInContent", 1455 new Object[]{Integer.toString(c, 16)}); 1456 return false; 1457 } 1458 1459 // fill in the buffer 1460 buf.append((char)high); 1461 buf.append((char)low); 1462 1463 return true; 1464 1465 } // scanSurrogates():boolean 1466 1467 1468 /** 1469 * Convenience function used in all XML scanners. 1470 */ reportFatalError(String msgId, Object[] args)1471 protected void reportFatalError(String msgId, Object[] args) 1472 throws XNIException { 1473 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN, 1474 msgId, args, 1475 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1476 } 1477 1478 // private methods init()1479 private void init() { 1480 // initialize scanner 1481 fEntityScanner = null; 1482 // initialize vars 1483 fEntityDepth = 0; 1484 fReportEntity = true; 1485 fResourceIdentifier.clear(); 1486 1487 if(!fAttributeCacheInitDone){ 1488 for(int i = 0; i < initialCacheCount; i++){ 1489 attributeValueCache.add(new XMLString()); 1490 stringBufferCache.add(new XMLStringBuffer()); 1491 } 1492 fAttributeCacheInitDone = true; 1493 } 1494 fStringBufferIndex = 0; 1495 fAttributeCacheUsedCount = 0; 1496 1497 } 1498 getStringBuffer()1499 XMLStringBuffer getStringBuffer(){ 1500 if((fStringBufferIndex < initialCacheCount )|| (fStringBufferIndex < stringBufferCache.size())){ 1501 return stringBufferCache.get(fStringBufferIndex++); 1502 }else{ 1503 XMLStringBuffer tmpObj = new XMLStringBuffer(); 1504 fStringBufferIndex++; 1505 stringBufferCache.add(tmpObj); 1506 return tmpObj; 1507 } 1508 } 1509 1510 /** 1511 * Add the count of the content buffer and check if the accumulated 1512 * value exceeds the limit 1513 * @param isPEDecl a flag to indicate whether the entity is parameter 1514 * @param entityName entity name 1515 * @param buffer content buffer 1516 */ checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer)1517 void checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer) { 1518 checkEntityLimit(isPEDecl, entityName, buffer.length); 1519 } 1520 1521 /** 1522 * Add the count and check limit 1523 * @param isPEDecl a flag to indicate whether the entity is parameter 1524 * @param entityName entity name 1525 * @param len length of the buffer 1526 */ checkEntityLimit(boolean isPEDecl, String entityName, int len)1527 void checkEntityLimit(boolean isPEDecl, String entityName, int len) { 1528 if (fLimitAnalyzer == null) { 1529 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 1530 } 1531 if (isPEDecl) { 1532 fLimitAnalyzer.addValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, "%" + entityName, len); 1533 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1534 fSecurityManager.debugPrint(fLimitAnalyzer); 1535 reportFatalError("MaxEntitySizeLimit", new Object[]{"%" + entityName, 1536 fLimitAnalyzer.getValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT), 1537 fSecurityManager.getLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT), 1538 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT)}); 1539 } 1540 } else { 1541 fLimitAnalyzer.addValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, entityName, len); 1542 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1543 fSecurityManager.debugPrint(fLimitAnalyzer); 1544 reportFatalError("MaxEntitySizeLimit", new Object[]{entityName, 1545 fLimitAnalyzer.getValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT), 1546 fSecurityManager.getLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT), 1547 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT)}); 1548 } 1549 } 1550 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1551 fSecurityManager.debugPrint(fLimitAnalyzer); 1552 reportFatalError("TotalEntitySizeLimit", 1553 new Object[]{fLimitAnalyzer.getTotalValue(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT), 1554 fSecurityManager.getLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT), 1555 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT)}); 1556 } 1557 } 1558 } // class XMLScanner 1559