1 /* 2 * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl; 22 23 import com.sun.org.apache.xerces.internal.util.Status; 24 import com.sun.xml.internal.stream.XMLEntityStorage; 25 import java.io.IOException; 26 import java.util.ArrayList; 27 import javax.xml.stream.events.XMLEvent; 28 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 29 import com.sun.org.apache.xerces.internal.util.SymbolTable; 30 import com.sun.org.apache.xerces.internal.util.XMLChar; 31 import com.sun.org.apache.xerces.internal.util.XMLResourceIdentifierImpl; 32 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 33 import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 34 import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 35 import com.sun.org.apache.xerces.internal.xni.Augmentations; 36 import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 37 import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 38 import com.sun.org.apache.xerces.internal.xni.XMLString; 39 import com.sun.org.apache.xerces.internal.xni.XNIException; 40 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 41 import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 42 import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 43 import com.sun.xml.internal.stream.Entity; 44 45 //import com.sun.xml.stream.XMLEntityManager; 46 //import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter; 47 48 /** 49 * This class is responsible for holding scanning methods common to 50 * scanning the XML document structure and content as well as the DTD 51 * structure and content. Both XMLDocumentScanner and XMLDTDScanner inherit 52 * from this base class. 53 * 54 * <p> 55 * This component requires the following features and properties from the 56 * component manager that uses it: 57 * <ul> 58 * <li>http://xml.org/sax/features/validation</li> 59 * <li>http://apache.org/xml/features/scanner/notify-char-refs</li> 60 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 61 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 62 * <li>http://apache.org/xml/properties/internal/entity-manager</li> 63 * </ul> 64 * 65 * @author Andy Clark, IBM 66 * @author Arnaud Le Hors, IBM 67 * @author Eric Ye, IBM 68 * @author K.Venugopal SUN Microsystems 69 * @author Sunitha Reddy, SUN Microsystems 70 * @LastModified: Feb 2020 71 */ 72 public abstract class XMLScanner 73 implements XMLComponent { 74 75 // 76 // Constants 77 // 78 79 // feature identifiers 80 81 /** Feature identifier: namespaces. */ 82 protected static final String NAMESPACES = 83 Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE; 84 85 /** Feature identifier: validation. */ 86 protected static final String VALIDATION = 87 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 88 89 /** Feature identifier: notify character references. */ 90 protected static final String NOTIFY_CHAR_REFS = 91 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_CHAR_REFS_FEATURE; 92 93 // property identifiers 94 95 protected static final String PARSER_SETTINGS = 96 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 97 /** Property identifier: symbol table. */ 98 protected static final String SYMBOL_TABLE = 99 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 100 101 /** Property identifier: error reporter. */ 102 protected static final String ERROR_REPORTER = 103 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 104 105 /** Property identifier: entity manager. */ 106 protected static final String ENTITY_MANAGER = 107 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY; 108 109 /** Property identifier: Security manager. */ 110 private static final String SECURITY_MANAGER = Constants.SECURITY_MANAGER; 111 112 // debugging 113 114 /** Debug attribute normalization. */ 115 protected static final boolean DEBUG_ATTR_NORMALIZATION = false; 116 117 /** 118 * Type of names 119 */ 120 public static enum NameType { 121 ATTRIBUTE("attribute"), 122 ATTRIBUTENAME("attribute name"), 123 COMMENT("comment"), 124 DOCTYPE("doctype"), 125 ELEMENTSTART("startelement"), 126 ELEMENTEND("endelement"), 127 ENTITY("entity"), 128 NOTATION("notation"), 129 PI("pi"), 130 REFERENCE("reference"); 131 132 final String literal; NameType(String literal)133 NameType(String literal) { 134 this.literal = literal; 135 } 136 literal()137 String literal() { 138 return literal; 139 } 140 } 141 142 //xxx: setting the default value as false, as we dont need to calculate this value 143 //we should have a feature when set to true computes this value 144 private boolean fNeedNonNormalizedValue = false; 145 146 protected ArrayList<XMLString> attributeValueCache = new ArrayList<>(); 147 protected ArrayList<XMLStringBuffer> stringBufferCache = new ArrayList<>(); 148 protected int fStringBufferIndex = 0; 149 protected boolean fAttributeCacheInitDone = false; 150 protected int fAttributeCacheUsedCount = 0; 151 152 // 153 // Data 154 // 155 156 // features 157 158 /** 159 * Validation. This feature identifier is: 160 * http://xml.org/sax/features/validation 161 */ 162 protected boolean fValidation = false; 163 164 /** Namespaces. */ 165 protected boolean fNamespaces; 166 167 /** Character references notification. */ 168 protected boolean fNotifyCharRefs = false; 169 170 /** Internal parser-settings feature */ 171 protected boolean fParserSettings = true; 172 173 // properties 174 175 protected PropertyManager fPropertyManager = null ; 176 /** Symbol table. */ 177 protected SymbolTable fSymbolTable; 178 179 /** Error reporter. */ 180 protected XMLErrorReporter fErrorReporter; 181 182 /** Entity manager. */ 183 //protected XMLEntityManager fEntityManager = PropertyManager.getEntityManager(); 184 protected XMLEntityManager fEntityManager = null ; 185 186 /** xxx this should be available from EntityManager Entity storage */ 187 protected XMLEntityStorage fEntityStore = null ; 188 189 /** Security manager. */ 190 protected XMLSecurityManager fSecurityManager = null; 191 192 /** Limit analyzer. */ 193 protected XMLLimitAnalyzer fLimitAnalyzer = null; 194 195 // protected data 196 197 /** event type */ 198 protected XMLEvent fEvent ; 199 200 /** Entity scanner, this always works on last entity that was opened. */ 201 protected XMLEntityScanner fEntityScanner = null; 202 203 /** Entity depth. */ 204 protected int fEntityDepth; 205 206 /** Literal value of the last character reference scanned. */ 207 protected String fCharRefLiteral = null; 208 209 /** Scanning attribute. */ 210 protected boolean fScanningAttribute; 211 212 /** Report entity boundary. */ 213 protected boolean fReportEntity; 214 215 // symbols 216 217 /** Symbol: "version". */ 218 protected final static String fVersionSymbol = "version".intern(); 219 220 /** Symbol: "encoding". */ 221 protected final static String fEncodingSymbol = "encoding".intern(); 222 223 /** Symbol: "standalone". */ 224 protected final static String fStandaloneSymbol = "standalone".intern(); 225 226 /** Symbol: "amp". */ 227 protected final static String fAmpSymbol = "amp".intern(); 228 229 /** Symbol: "lt". */ 230 protected final static String fLtSymbol = "lt".intern(); 231 232 /** Symbol: "gt". */ 233 protected final static String fGtSymbol = "gt".intern(); 234 235 /** Symbol: "quot". */ 236 protected final static String fQuotSymbol = "quot".intern(); 237 238 /** Symbol: "apos". */ 239 protected final static String fAposSymbol = "apos".intern(); 240 241 // temporary variables 242 243 // NOTE: These objects are private to help prevent accidental modification 244 // of values by a subclass. If there were protected *and* the sub- 245 // modified the values, it would be difficult to track down the real 246 // cause of the bug. By making these private, we avoid this 247 // possibility. 248 249 /** String. */ 250 private XMLString fString = new XMLString(); 251 252 /** String buffer. */ 253 private XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 254 255 /** String buffer. */ 256 private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 257 258 /** String buffer. */ 259 private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer(); 260 261 // temporary location for Resource identification information. 262 protected XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 263 int initialCacheCount = 6; 264 // 265 // XMLComponent methods 266 // 267 268 /** 269 * 270 * 271 * @param componentManager The component manager. 272 * 273 * @throws SAXException Throws exception if required features and 274 * properties cannot be found. 275 */ reset(XMLComponentManager componentManager)276 public void reset(XMLComponentManager componentManager) 277 throws XMLConfigurationException { 278 279 fParserSettings = componentManager.getFeature(PARSER_SETTINGS, true); 280 281 if (!fParserSettings) { 282 // parser settings have not been changed 283 init(); 284 return; 285 } 286 287 288 // Xerces properties 289 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 290 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 291 fEntityManager = (XMLEntityManager)componentManager.getProperty(ENTITY_MANAGER); 292 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER); 293 294 //this step is extra because we have separated the storage of entity 295 fEntityStore = fEntityManager.getEntityStore() ; 296 297 // sax features 298 fValidation = componentManager.getFeature(VALIDATION, false); 299 fNamespaces = componentManager.getFeature(NAMESPACES, true); 300 fNotifyCharRefs = componentManager.getFeature(NOTIFY_CHAR_REFS, false); 301 302 init(); 303 } // reset(XMLComponentManager) 304 setPropertyManager(PropertyManager propertyManager)305 protected void setPropertyManager(PropertyManager propertyManager){ 306 fPropertyManager = propertyManager ; 307 } 308 309 /** 310 * Sets the value of a property during parsing. 311 * 312 * @param propertyId 313 * @param value 314 */ setProperty(String propertyId, Object value)315 public void setProperty(String propertyId, Object value) 316 throws XMLConfigurationException { 317 318 // Xerces properties 319 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 320 String property = 321 propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 322 if (property.equals(Constants.SYMBOL_TABLE_PROPERTY)) { 323 fSymbolTable = (SymbolTable)value; 324 } else if (property.equals(Constants.ERROR_REPORTER_PROPERTY)) { 325 fErrorReporter = (XMLErrorReporter)value; 326 } else if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 327 fEntityManager = (XMLEntityManager)value; 328 } 329 } 330 331 if (propertyId.equals(SECURITY_MANAGER)) { 332 fSecurityManager = (XMLSecurityManager)value; 333 } 334 /*else if(propertyId.equals(Constants.STAX_PROPERTIES)){ 335 fStaxProperties = (HashMap)value; 336 //TODO::discuss with neeraj what are his thoughts on passing properties. 337 //For now use this 338 }*/ 339 340 } // setProperty(String,Object) 341 342 /* 343 * Sets the feature of the scanner. 344 */ setFeature(String featureId, boolean value)345 public void setFeature(String featureId, boolean value) 346 throws XMLConfigurationException { 347 348 if (VALIDATION.equals(featureId)) { 349 fValidation = value; 350 } else if (NOTIFY_CHAR_REFS.equals(featureId)) { 351 fNotifyCharRefs = value; 352 } 353 } 354 355 /* 356 * Gets the state of the feature of the scanner. 357 */ getFeature(String featureId)358 public boolean getFeature(String featureId) 359 throws XMLConfigurationException { 360 361 if (VALIDATION.equals(featureId)) { 362 return fValidation; 363 } else if (NOTIFY_CHAR_REFS.equals(featureId)) { 364 return fNotifyCharRefs; 365 } 366 throw new XMLConfigurationException(Status.NOT_RECOGNIZED, featureId); 367 } 368 369 // 370 // Protected methods 371 // 372 373 // anybody calling this had better have set Symtoltable! reset()374 protected void reset() { 375 init(); 376 377 // DTD preparsing defaults: 378 fValidation = true; 379 fNotifyCharRefs = false; 380 381 } 382 reset(PropertyManager propertyManager)383 public void reset(PropertyManager propertyManager) { 384 init(); 385 // Xerces properties 386 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 387 388 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 389 390 fEntityManager = (XMLEntityManager)propertyManager.getProperty(ENTITY_MANAGER); 391 fEntityStore = fEntityManager.getEntityStore() ; 392 fEntityScanner = fEntityManager.getEntityScanner() ; 393 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER); 394 395 //fEntityManager.reset(); 396 // DTD preparsing defaults: 397 fValidation = false; 398 fNotifyCharRefs = false; 399 400 } 401 // common scanning methods 402 403 /** 404 * Scans an XML or text declaration. 405 * <p> 406 * <pre> 407 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 408 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 409 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 410 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 411 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 412 * | ('"' ('yes' | 'no') '"')) 413 * 414 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 415 * </pre> 416 * 417 * @param scanningTextDecl True if a text declaration is to 418 * be scanned instead of an XML 419 * declaration. 420 * @param pseudoAttributeValues An array of size 3 to return the version, 421 * encoding and standalone pseudo attribute values 422 * (in that order). 423 * 424 * <strong>Note:</strong> This method uses fString, anything in it 425 * at the time of calling is lost. 426 */ scanXMLDeclOrTextDecl(boolean scanningTextDecl, String[] pseudoAttributeValues)427 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl, 428 String[] pseudoAttributeValues) 429 throws IOException, XNIException { 430 431 // pseudo-attribute values 432 String version = null; 433 String encoding = null; 434 String standalone = null; 435 436 // scan pseudo-attributes 437 final int STATE_VERSION = 0; 438 final int STATE_ENCODING = 1; 439 final int STATE_STANDALONE = 2; 440 final int STATE_DONE = 3; 441 int state = STATE_VERSION; 442 443 boolean dataFoundForTarget = false; 444 boolean sawSpace = fEntityScanner.skipSpaces(); 445 // since pseudoattributes are *not* attributes, 446 // their quotes don't need to be preserved in external parameter entities. 447 // the XMLEntityScanner#scanLiteral method will continue to 448 // emit -1 in such cases when it finds a quote; this is 449 // fine for other methods that parse scanned entities, 450 // but not for the scanning of pseudoattributes. So, 451 // temporarily, we must mark the current entity as not being "literal" 452 Entity.ScannedEntity currEnt = fEntityManager.getCurrentEntity(); 453 boolean currLiteral = currEnt.literal; 454 currEnt.literal = false; 455 while (fEntityScanner.peekChar() != '?') { 456 dataFoundForTarget = true; 457 String name = scanPseudoAttribute(scanningTextDecl, fString); 458 switch (state) { 459 case STATE_VERSION: { 460 if (name.equals(fVersionSymbol)) { 461 if (!sawSpace) { 462 reportFatalError(scanningTextDecl 463 ? "SpaceRequiredBeforeVersionInTextDecl" 464 : "SpaceRequiredBeforeVersionInXMLDecl", 465 null); 466 } 467 version = fString.toString(); 468 state = STATE_ENCODING; 469 if (!versionSupported(version)) { 470 reportFatalError("VersionNotSupported", 471 new Object[]{version}); 472 } 473 474 if (version.equals("1.1")) { 475 Entity.ScannedEntity top = fEntityManager.getTopLevelEntity(); 476 if (top != null && (top.version == null || top.version.equals("1.0"))) { 477 reportFatalError("VersionMismatch", null); 478 } 479 fEntityManager.setScannerVersion(Constants.XML_VERSION_1_1); 480 } 481 482 } else if (name.equals(fEncodingSymbol)) { 483 if (!scanningTextDecl) { 484 reportFatalError("VersionInfoRequired", null); 485 } 486 if (!sawSpace) { 487 reportFatalError(scanningTextDecl 488 ? "SpaceRequiredBeforeEncodingInTextDecl" 489 : "SpaceRequiredBeforeEncodingInXMLDecl", 490 null); 491 } 492 encoding = fString.toString(); 493 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; 494 } else { 495 if (scanningTextDecl) { 496 reportFatalError("EncodingDeclRequired", null); 497 } else { 498 reportFatalError("VersionInfoRequired", null); 499 } 500 } 501 break; 502 } 503 case STATE_ENCODING: { 504 if (name.equals(fEncodingSymbol)) { 505 if (!sawSpace) { 506 reportFatalError(scanningTextDecl 507 ? "SpaceRequiredBeforeEncodingInTextDecl" 508 : "SpaceRequiredBeforeEncodingInXMLDecl", 509 null); 510 } 511 encoding = fString.toString(); 512 state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE; 513 // TODO: check encoding name; set encoding on 514 // entity scanner 515 } else if (!scanningTextDecl && name.equals(fStandaloneSymbol)) { 516 if (!sawSpace) { 517 reportFatalError("SpaceRequiredBeforeStandalone", 518 null); 519 } 520 standalone = fString.toString(); 521 state = STATE_DONE; 522 if (!standalone.equals("yes") && !standalone.equals("no")) { 523 reportFatalError("SDDeclInvalid", new Object[] {standalone}); 524 } 525 } else { 526 reportFatalError("EncodingDeclRequired", null); 527 } 528 break; 529 } 530 case STATE_STANDALONE: { 531 if (name.equals(fStandaloneSymbol)) { 532 if (!sawSpace) { 533 reportFatalError("SpaceRequiredBeforeStandalone", 534 null); 535 } 536 standalone = fString.toString(); 537 state = STATE_DONE; 538 if (!standalone.equals("yes") && !standalone.equals("no")) { 539 reportFatalError("SDDeclInvalid", new Object[] {standalone}); 540 } 541 } else { 542 reportFatalError("SDDeclNameInvalid", null); 543 } 544 break; 545 } 546 default: { 547 reportFatalError("NoMorePseudoAttributes", null); 548 } 549 } 550 sawSpace = fEntityScanner.skipSpaces(); 551 } 552 // restore original literal value 553 if(currLiteral) { 554 currEnt.literal = true; 555 } 556 // REVISIT: should we remove this error reporting? 557 if (scanningTextDecl && state != STATE_DONE) { 558 reportFatalError("MorePseudoAttributes", null); 559 } 560 561 // If there is no data in the xml or text decl then we fail to report error 562 // for version or encoding info above. 563 if (scanningTextDecl) { 564 if (!dataFoundForTarget && encoding == null) { 565 reportFatalError("EncodingDeclRequired", null); 566 } 567 } else { 568 if (!dataFoundForTarget && version == null) { 569 reportFatalError("VersionInfoRequired", null); 570 } 571 } 572 573 // end 574 if (!fEntityScanner.skipChar('?', null)) { 575 reportFatalError("XMLDeclUnterminated", null); 576 } 577 if (!fEntityScanner.skipChar('>', null)) { 578 reportFatalError("XMLDeclUnterminated", null); 579 580 } 581 582 // fill in return array 583 pseudoAttributeValues[0] = version; 584 pseudoAttributeValues[1] = encoding; 585 pseudoAttributeValues[2] = standalone; 586 587 } // scanXMLDeclOrTextDecl(boolean) 588 589 /** 590 * Scans a pseudo attribute. 591 * 592 * @param scanningTextDecl True if scanning this pseudo-attribute for a 593 * TextDecl; false if scanning XMLDecl. This 594 * flag is needed to report the correct type of 595 * error. 596 * @param value The string to fill in with the attribute 597 * value. 598 * 599 * @return The name of the attribute 600 * 601 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 602 * at the time of calling is lost. 603 */ scanPseudoAttribute(boolean scanningTextDecl, XMLString value)604 protected String scanPseudoAttribute(boolean scanningTextDecl, 605 XMLString value) 606 throws IOException, XNIException { 607 608 String name = scanPseudoAttributeName(); 609 // XMLEntityManager.print(fEntityManager.getCurrentEntity()); 610 611 if (name == null) { 612 reportFatalError("PseudoAttrNameExpected", null); 613 } 614 fEntityScanner.skipSpaces(); 615 if (!fEntityScanner.skipChar('=', null)) { 616 reportFatalError(scanningTextDecl ? "EqRequiredInTextDecl" 617 : "EqRequiredInXMLDecl", new Object[]{name}); 618 } 619 fEntityScanner.skipSpaces(); 620 int quote = fEntityScanner.peekChar(); 621 if (quote != '\'' && quote != '"') { 622 reportFatalError(scanningTextDecl ? "QuoteRequiredInTextDecl" 623 : "QuoteRequiredInXMLDecl" , new Object[]{name}); 624 } 625 fEntityScanner.scanChar(NameType.ATTRIBUTE); 626 int c = fEntityScanner.scanLiteral(quote, value, false); 627 if (c != quote) { 628 fStringBuffer2.clear(); 629 do { 630 fStringBuffer2.append(value); 631 if (c != -1) { 632 if (c == '&' || c == '%' || c == '<' || c == ']') { 633 fStringBuffer2.append((char)fEntityScanner.scanChar(NameType.ATTRIBUTE)); 634 } else if (XMLChar.isHighSurrogate(c)) { 635 scanSurrogates(fStringBuffer2); 636 } else if (isInvalidLiteral(c)) { 637 String key = scanningTextDecl 638 ? "InvalidCharInTextDecl" : "InvalidCharInXMLDecl"; 639 reportFatalError(key, 640 new Object[] {Integer.toString(c, 16)}); 641 fEntityScanner.scanChar(null); 642 } 643 } 644 c = fEntityScanner.scanLiteral(quote, value, false); 645 } while (c != quote); 646 fStringBuffer2.append(value); 647 value.setValues(fStringBuffer2); 648 } 649 if (!fEntityScanner.skipChar(quote, null)) { 650 reportFatalError(scanningTextDecl ? "CloseQuoteMissingInTextDecl" 651 : "CloseQuoteMissingInXMLDecl", 652 new Object[]{name}); 653 } 654 655 // return 656 return name; 657 658 } // scanPseudoAttribute(XMLString):String 659 660 /** 661 * Scans the name of a pseudo attribute. The only legal names 662 * in XML 1.0/1.1 documents are 'version', 'encoding' and 'standalone'. 663 * 664 * @return the name of the pseudo attribute or <code>null</code> 665 * if a legal pseudo attribute name could not be scanned. 666 */ scanPseudoAttributeName()667 private String scanPseudoAttributeName() throws IOException, XNIException { 668 final int ch = fEntityScanner.peekChar(); 669 switch (ch) { 670 case 'v': 671 if (fEntityScanner.skipString(fVersionSymbol)) { 672 return fVersionSymbol; 673 } 674 break; 675 case 'e': 676 if (fEntityScanner.skipString(fEncodingSymbol)) { 677 return fEncodingSymbol; 678 } 679 break; 680 case 's': 681 if (fEntityScanner.skipString(fStandaloneSymbol)) { 682 return fStandaloneSymbol; 683 } 684 break; 685 } 686 return null; 687 } // scanPseudoAttributeName() 688 689 /** 690 * Scans a processing instruction. 691 * <p> 692 * <pre> 693 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' 694 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) 695 * </pre> 696 */ 697 //CHANGED: 698 //EARLIER: scanPI() 699 //NOW: scanPI(XMLStringBuffer) 700 //it makes things more easy if XMLStringBUffer is passed. Motivation for this change is same 701 // as that for scanContent() 702 scanPI(XMLStringBuffer data)703 protected void scanPI(XMLStringBuffer data) throws IOException, XNIException { 704 705 // target 706 fReportEntity = false; 707 String target = fEntityScanner.scanName(NameType.PI); 708 if (target == null) { 709 reportFatalError("PITargetRequired", null); 710 } 711 712 // scan data 713 scanPIData(target, data); 714 fReportEntity = true; 715 716 } // scanPI(XMLStringBuffer) 717 718 /** 719 * Scans a processing data. This is needed to handle the situation 720 * where a document starts with a processing instruction whose 721 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 722 * 723 * This method would always read the whole data. We have while loop and data is buffered 724 * until delimeter is encountered. 725 * 726 * @param target The PI target 727 * @param data The string to fill in with the data 728 */ 729 730 //CHANGED: 731 //Earlier:This method uses the fStringBuffer and later buffer values are set to 732 //the supplied XMLString.... 733 //Now: Changed the signature of this function to pass XMLStringBuffer.. and data would 734 //be appended to that buffer 735 scanPIData(String target, XMLStringBuffer data)736 protected void scanPIData(String target, XMLStringBuffer data) 737 throws IOException, XNIException { 738 739 // check target 740 if (target.length() == 3) { 741 char c0 = Character.toLowerCase(target.charAt(0)); 742 char c1 = Character.toLowerCase(target.charAt(1)); 743 char c2 = Character.toLowerCase(target.charAt(2)); 744 if (c0 == 'x' && c1 == 'm' && c2 == 'l') { 745 reportFatalError("ReservedPITarget", null); 746 } 747 } 748 749 // spaces 750 if (!fEntityScanner.skipSpaces()) { 751 if (fEntityScanner.skipString("?>")) { 752 // we found the end, there is no data just return 753 return; 754 } else { 755 // if there is data there should be some space 756 reportFatalError("SpaceRequiredInPI", null); 757 } 758 } 759 760 // since scanData appends the parsed data to the buffer passed 761 // a while loop would append the whole of parsed data to the buffer(data:XMLStringBuffer) 762 //until all of the data is buffered. 763 if (fEntityScanner.scanData("?>", data, 0)) { 764 do { 765 int c = fEntityScanner.peekChar(); 766 if (c != -1) { 767 if (XMLChar.isHighSurrogate(c)) { 768 scanSurrogates(data); 769 } else if (isInvalidLiteral(c)) { 770 reportFatalError("InvalidCharInPI", 771 new Object[]{Integer.toHexString(c)}); 772 fEntityScanner.scanChar(null); 773 } 774 } 775 } while (fEntityScanner.scanData("?>", data, 0)); 776 } 777 778 } // scanPIData(String,XMLString) 779 780 /** 781 * Scans a comment. 782 * <p> 783 * <pre> 784 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 785 * </pre> 786 * <p> 787 * <strong>Note:</strong> Called after scanning past '<!--' 788 * <strong>Note:</strong> This method uses fString, anything in it 789 * at the time of calling is lost. 790 * 791 * @param text The buffer to fill in with the text. 792 */ scanComment(XMLStringBuffer text)793 protected void scanComment(XMLStringBuffer text) 794 throws IOException, XNIException { 795 796 //System.out.println( "XMLScanner#scanComment# In Scan Comment" ); 797 // text 798 // REVISIT: handle invalid character, eof 799 text.clear(); 800 while (fEntityScanner.scanData("--", text, 0)) { 801 int c = fEntityScanner.peekChar(); 802 803 //System.out.println( "XMLScanner#scanComment#text.toString() == " + text.toString() ); 804 //System.out.println( "XMLScanner#scanComment#c == " + c ); 805 806 if (c != -1) { 807 if (XMLChar.isHighSurrogate(c)) { 808 scanSurrogates(text); 809 } 810 else if (isInvalidLiteral(c)) { 811 reportFatalError("InvalidCharInComment", 812 new Object[] { Integer.toHexString(c) }); 813 fEntityScanner.scanChar(NameType.COMMENT); 814 } 815 } 816 } 817 if (!fEntityScanner.skipChar('>', NameType.COMMENT)) { 818 reportFatalError("DashDashInComment", null); 819 } 820 821 } // scanComment() 822 823 /** 824 * Scans an attribute value and normalizes whitespace converting all 825 * whitespace characters to space characters. 826 * 827 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" 828 * 829 * @param value The XMLString to fill in with the value. 830 * @param nonNormalizedValue The XMLString to fill in with the 831 * non-normalized value. 832 * @param atName The name of the attribute being parsed (for error msgs). 833 * @param attributes The attributes list for the scanned attribute. 834 * @param attrIndex The index of the attribute to use from the list. 835 * @param checkEntities true if undeclared entities should be reported as VC violation, 836 * false if undeclared entities should be reported as WFC violation. 837 * @param eleName The name of element to which this attribute belongs. 838 * @param isNSURI a flag indicating whether the content is a Namespace URI 839 * 840 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 841 * at the time of calling is lost. 842 **/ scanAttributeValue(XMLString value, XMLString nonNormalizedValue, String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities, String eleName, boolean isNSURI)843 protected void scanAttributeValue(XMLString value, XMLString nonNormalizedValue, 844 String atName, XMLAttributes attributes, int attrIndex, boolean checkEntities, 845 String eleName, boolean isNSURI) 846 throws IOException, XNIException { 847 XMLStringBuffer stringBuffer = null; 848 // quote 849 int quote = fEntityScanner.peekChar(); 850 if (quote != '\'' && quote != '"') { 851 reportFatalError("OpenQuoteExpected", new Object[]{eleName, atName}); 852 } 853 854 fEntityScanner.scanChar(NameType.ATTRIBUTE); 855 int entityDepth = fEntityDepth; 856 857 int c = fEntityScanner.scanLiteral(quote, value, isNSURI); 858 if (DEBUG_ATTR_NORMALIZATION) { 859 System.out.println("** scanLiteral -> \"" 860 + value.toString() + "\""); 861 } 862 if(fNeedNonNormalizedValue){ 863 fStringBuffer2.clear(); 864 fStringBuffer2.append(value); 865 } 866 if(fEntityScanner.whiteSpaceLen > 0) 867 normalizeWhitespace(value); 868 if (DEBUG_ATTR_NORMALIZATION) { 869 System.out.println("** normalizeWhitespace -> \"" 870 + value.toString() + "\""); 871 } 872 if (c != quote) { 873 fScanningAttribute = true; 874 stringBuffer = getStringBuffer(); 875 stringBuffer.clear(); 876 do { 877 stringBuffer.append(value); 878 if (DEBUG_ATTR_NORMALIZATION) { 879 System.out.println("** value2: \"" 880 + stringBuffer.toString() + "\""); 881 } 882 if (c == '&') { 883 fEntityScanner.skipChar('&', NameType.REFERENCE); 884 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) { 885 fStringBuffer2.append('&'); 886 } 887 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 888 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue ) { 889 fStringBuffer2.append('#'); 890 } 891 int ch ; 892 if (fNeedNonNormalizedValue) 893 ch = scanCharReferenceValue(stringBuffer, fStringBuffer2); 894 else 895 ch = scanCharReferenceValue(stringBuffer, null); 896 897 if (ch != -1) { 898 if (DEBUG_ATTR_NORMALIZATION) { 899 System.out.println("** value3: \"" 900 + stringBuffer.toString() 901 + "\""); 902 } 903 } 904 } else { 905 String entityName = fEntityScanner.scanName(NameType.ENTITY); 906 if (entityName == null) { 907 reportFatalError("NameRequiredInReference", null); 908 } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 909 fStringBuffer2.append(entityName); 910 } 911 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 912 reportFatalError("SemicolonRequiredInReference", 913 new Object []{entityName}); 914 } else if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 915 fStringBuffer2.append(';'); 916 } 917 if (resolveCharacter(entityName, stringBuffer)) { 918 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 919 } else { 920 if (fEntityStore.isExternalEntity(entityName)) { 921 reportFatalError("ReferenceToExternalEntity", 922 new Object[] { entityName }); 923 } else { 924 if (!fEntityStore.isDeclaredEntity(entityName)) { 925 //WFC & VC: Entity Declared 926 if (checkEntities) { 927 if (fValidation) { 928 fErrorReporter.reportError(fEntityScanner,XMLMessageFormatter.XML_DOMAIN, 929 "EntityNotDeclared", 930 new Object[]{entityName}, 931 XMLErrorReporter.SEVERITY_ERROR); 932 } 933 } else { 934 reportFatalError("EntityNotDeclared", 935 new Object[]{entityName}); 936 } 937 } 938 fEntityManager.startEntity(true, entityName, true); 939 } 940 } 941 } 942 } else if (c == '<') { 943 reportFatalError("LessthanInAttValue", 944 new Object[] { eleName, atName }); 945 fEntityScanner.scanChar(null); 946 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 947 fStringBuffer2.append((char)c); 948 } 949 } else if (c == '%' || c == ']') { 950 fEntityScanner.scanChar(null); 951 stringBuffer.append((char)c); 952 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 953 fStringBuffer2.append((char)c); 954 } 955 if (DEBUG_ATTR_NORMALIZATION) { 956 System.out.println("** valueF: \"" 957 + stringBuffer.toString() + "\""); 958 } 959 } else if (c == '\n' || c == '\r') { 960 fEntityScanner.scanChar(null); 961 stringBuffer.append(' '); 962 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 963 fStringBuffer2.append('\n'); 964 } 965 } else if (c != -1 && XMLChar.isHighSurrogate(c)) { 966 fStringBuffer3.clear(); 967 if (scanSurrogates(fStringBuffer3)) { 968 stringBuffer.append(fStringBuffer3); 969 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 970 fStringBuffer2.append(fStringBuffer3); 971 } 972 if (DEBUG_ATTR_NORMALIZATION) { 973 System.out.println("** valueI: \"" 974 + stringBuffer.toString() 975 + "\""); 976 } 977 } 978 } else if (c != -1 && isInvalidLiteral(c)) { 979 reportFatalError("InvalidCharInAttValue", 980 new Object[] {eleName, atName, Integer.toString(c, 16)}); 981 fEntityScanner.scanChar(null); 982 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 983 fStringBuffer2.append((char)c); 984 } 985 } 986 c = fEntityScanner.scanLiteral(quote, value, isNSURI); 987 if (entityDepth == fEntityDepth && fNeedNonNormalizedValue) { 988 fStringBuffer2.append(value); 989 } 990 if(fEntityScanner.whiteSpaceLen > 0) 991 normalizeWhitespace(value); 992 //Todo ::Move this check to Attributes , do conversion 993 //only if attribute is being accessed. -Venu 994 } while (c != quote || entityDepth != fEntityDepth); 995 stringBuffer.append(value); 996 if (DEBUG_ATTR_NORMALIZATION) { 997 System.out.println("** valueN: \"" 998 + stringBuffer.toString() + "\""); 999 } 1000 value.setValues(stringBuffer); 1001 fScanningAttribute = false; 1002 } 1003 if(fNeedNonNormalizedValue) 1004 nonNormalizedValue.setValues(fStringBuffer2); 1005 1006 // quote 1007 int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE); 1008 if (cquote != quote) { 1009 reportFatalError("CloseQuoteExpected", new Object[]{eleName, atName}); 1010 } 1011 } // scanAttributeValue() 1012 1013 1014 /** 1015 * Resolves character entity references. 1016 * @param entityName the name of the entity 1017 * @param stringBuffer the current XMLStringBuffer to append the character to. 1018 * @return true if resolved, false otherwise 1019 */ resolveCharacter(String entityName, XMLStringBuffer stringBuffer)1020 protected boolean resolveCharacter(String entityName, XMLStringBuffer stringBuffer) { 1021 /** 1022 * entityNames (symbols) are interned. The equals method would do the same, 1023 * but I'm leaving it as comparisons by references are common in the impl 1024 * and it made it explicit to others who read this code. 1025 */ 1026 if (entityName == fAmpSymbol) { 1027 stringBuffer.append('&'); 1028 return true; 1029 } else if (entityName == fAposSymbol) { 1030 stringBuffer.append('\''); 1031 return true; 1032 } else if (entityName == fLtSymbol) { 1033 stringBuffer.append('<'); 1034 return true; 1035 } else if (entityName == fGtSymbol) { 1036 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1037 stringBuffer.append('>'); 1038 return true; 1039 } else if (entityName == fQuotSymbol) { 1040 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1041 stringBuffer.append('"'); 1042 return true; 1043 } 1044 return false; 1045 } 1046 1047 /** 1048 * Scans External ID and return the public and system IDs. 1049 * 1050 * @param identifiers An array of size 2 to return the system id, 1051 * and public id (in that order). 1052 * @param optionalSystemId Specifies whether the system id is optional. 1053 * 1054 * <strong>Note:</strong> This method uses fString and fStringBuffer, 1055 * anything in them at the time of calling is lost. 1056 */ scanExternalID(String[] identifiers, boolean optionalSystemId)1057 protected void scanExternalID(String[] identifiers, 1058 boolean optionalSystemId) 1059 throws IOException, XNIException { 1060 1061 String systemId = null; 1062 String publicId = null; 1063 if (fEntityScanner.skipString("PUBLIC")) { 1064 if (!fEntityScanner.skipSpaces()) { 1065 reportFatalError("SpaceRequiredAfterPUBLIC", null); 1066 } 1067 scanPubidLiteral(fString); 1068 publicId = fString.toString(); 1069 1070 if (!fEntityScanner.skipSpaces() && !optionalSystemId) { 1071 reportFatalError("SpaceRequiredBetweenPublicAndSystem", null); 1072 } 1073 } 1074 1075 if (publicId != null || fEntityScanner.skipString("SYSTEM")) { 1076 if (publicId == null && !fEntityScanner.skipSpaces()) { 1077 reportFatalError("SpaceRequiredAfterSYSTEM", null); 1078 } 1079 int quote = fEntityScanner.peekChar(); 1080 if (quote != '\'' && quote != '"') { 1081 if (publicId != null && optionalSystemId) { 1082 // looks like we don't have any system id 1083 // simply return the public id 1084 identifiers[0] = null; 1085 identifiers[1] = publicId; 1086 return; 1087 } 1088 reportFatalError("QuoteRequiredInSystemID", null); 1089 } 1090 fEntityScanner.scanChar(null); 1091 XMLString ident = fString; 1092 if (fEntityScanner.scanLiteral(quote, ident, false) != quote) { 1093 fStringBuffer.clear(); 1094 do { 1095 fStringBuffer.append(ident); 1096 int c = fEntityScanner.peekChar(); 1097 if (XMLChar.isMarkup(c) || c == ']') { 1098 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 1099 } else if (c != -1 && isInvalidLiteral(c)) { 1100 reportFatalError("InvalidCharInSystemID", 1101 new Object[] {Integer.toString(c, 16)}); 1102 } 1103 } while (fEntityScanner.scanLiteral(quote, ident, false) != quote); 1104 fStringBuffer.append(ident); 1105 ident = fStringBuffer; 1106 } 1107 systemId = ident.toString(); 1108 if (!fEntityScanner.skipChar(quote, null)) { 1109 reportFatalError("SystemIDUnterminated", null); 1110 } 1111 } 1112 1113 // store result in array 1114 identifiers[0] = systemId; 1115 identifiers[1] = publicId; 1116 } 1117 1118 1119 /** 1120 * Scans public ID literal. 1121 * 1122 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 1123 * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 1124 * 1125 * The returned string is normalized according to the following rule, 1126 * from http://www.w3.org/TR/REC-xml#dt-pubid: 1127 * 1128 * Before a match is attempted, all strings of white space in the public 1129 * identifier must be normalized to single space characters (#x20), and 1130 * leading and trailing white space must be removed. 1131 * 1132 * @param literal The string to fill in with the public ID literal. 1133 * @return True on success. 1134 * 1135 * <strong>Note:</strong> This method uses fStringBuffer, anything in it at 1136 * the time of calling is lost. 1137 */ scanPubidLiteral(XMLString literal)1138 protected boolean scanPubidLiteral(XMLString literal) 1139 throws IOException, XNIException { 1140 int quote = fEntityScanner.scanChar(null); 1141 if (quote != '\'' && quote != '"') { 1142 reportFatalError("QuoteRequiredInPublicID", null); 1143 return false; 1144 } 1145 1146 fStringBuffer.clear(); 1147 // skip leading whitespace 1148 boolean skipSpace = true; 1149 boolean dataok = true; 1150 while (true) { 1151 int c = fEntityScanner.scanChar(null); 1152 if (c == ' ' || c == '\n' || c == '\r') { 1153 if (!skipSpace) { 1154 // take the first whitespace as a space and skip the others 1155 fStringBuffer.append(' '); 1156 skipSpace = true; 1157 } 1158 } else if (c == quote) { 1159 if (skipSpace) { 1160 // if we finished on a space let's trim it 1161 fStringBuffer.length--; 1162 } 1163 literal.setValues(fStringBuffer); 1164 break; 1165 } else if (XMLChar.isPubid(c)) { 1166 fStringBuffer.append((char)c); 1167 skipSpace = false; 1168 } else if (c == -1) { 1169 reportFatalError("PublicIDUnterminated", null); 1170 return false; 1171 } else { 1172 dataok = false; 1173 reportFatalError("InvalidCharInPublicID", 1174 new Object[]{Integer.toHexString(c)}); 1175 } 1176 } 1177 return dataok; 1178 } 1179 1180 1181 /** 1182 * Normalize whitespace in an XMLString converting all whitespace 1183 * characters to space characters. 1184 */ normalizeWhitespace(XMLString value)1185 protected void normalizeWhitespace(XMLString value) { 1186 int i=0; 1187 int j=0; 1188 int [] buff = fEntityScanner.whiteSpaceLookup; 1189 int buffLen = fEntityScanner.whiteSpaceLen; 1190 int end = value.offset + value.length; 1191 while(i < buffLen){ 1192 j = buff[i]; 1193 if(j < end ){ 1194 value.ch[j] = ' '; 1195 } 1196 i++; 1197 } 1198 } 1199 1200 // 1201 // XMLEntityHandler methods 1202 // 1203 1204 /** 1205 * This method notifies of the start of an entity. The document entity 1206 * has the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 1207 * parameter entity names start with '%'; and general entities are just 1208 * specified by their name. 1209 * 1210 * @param name The name of the entity. 1211 * @param identifier The resource identifier. 1212 * @param encoding The auto-detected IANA encoding name of the entity 1213 * stream. This value will be null in those situations 1214 * where the entity encoding is not auto-detected (e.g. 1215 * internal entities or a document entity that is 1216 * parsed from a java.io.Reader). 1217 * 1218 * @throws XNIException Thrown by handler to signal an error. 1219 */ startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs)1220 public void startEntity(String name, 1221 XMLResourceIdentifier identifier, 1222 String encoding, Augmentations augs) throws XNIException { 1223 1224 // keep track of the entity depth 1225 fEntityDepth++; 1226 // must reset entity scanner 1227 fEntityScanner = fEntityManager.getEntityScanner(); 1228 fEntityStore = fEntityManager.getEntityStore() ; 1229 } // startEntity(String,XMLResourceIdentifier,String) 1230 1231 /** 1232 * This method notifies the end of an entity. The document entity has 1233 * the pseudo-name of "[xml]" the DTD has the pseudo-name of "[dtd]" 1234 * parameter entity names start with '%'; and general entities are just 1235 * specified by their name. 1236 * 1237 * @param name The name of the entity. 1238 * 1239 * @throws XNIException Thrown by handler to signal an error. 1240 */ endEntity(String name, Augmentations augs)1241 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 1242 // keep track of the entity depth 1243 if (fEntityDepth > 0) { 1244 fEntityDepth--; 1245 } 1246 } // endEntity(String) 1247 1248 /** 1249 * Scans a character reference and append the corresponding chars to the 1250 * specified buffer. 1251 * 1252 * <p> 1253 * <pre> 1254 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1255 * </pre> 1256 * 1257 * <strong>Note:</strong> This method uses fStringBuffer, anything in it 1258 * at the time of calling is lost. 1259 * 1260 * @param buf the character buffer to append chars to 1261 * @param buf2 the character buffer to append non-normalized chars to 1262 * 1263 * @return the character value or (-1) on conversion failure 1264 */ scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2)1265 protected int scanCharReferenceValue(XMLStringBuffer buf, XMLStringBuffer buf2) 1266 throws IOException, XNIException { 1267 int initLen = buf.length; 1268 // scan hexadecimal value 1269 boolean hex = false; 1270 if (fEntityScanner.skipChar('x', NameType.REFERENCE)) { 1271 if (buf2 != null) { buf2.append('x'); } 1272 hex = true; 1273 fStringBuffer3.clear(); 1274 boolean digit = true; 1275 1276 int c = fEntityScanner.peekChar(); 1277 digit = (c >= '0' && c <= '9') || 1278 (c >= 'a' && c <= 'f') || 1279 (c >= 'A' && c <= 'F'); 1280 if (digit) { 1281 if (buf2 != null) { buf2.append((char)c); } 1282 fEntityScanner.scanChar(NameType.REFERENCE); 1283 fStringBuffer3.append((char)c); 1284 1285 do { 1286 c = fEntityScanner.peekChar(); 1287 digit = (c >= '0' && c <= '9') || 1288 (c >= 'a' && c <= 'f') || 1289 (c >= 'A' && c <= 'F'); 1290 if (digit) { 1291 if (buf2 != null) { buf2.append((char)c); } 1292 fEntityScanner.scanChar(NameType.REFERENCE); 1293 fStringBuffer3.append((char)c); 1294 } 1295 } while (digit); 1296 } else { 1297 reportFatalError("HexdigitRequiredInCharRef", null); 1298 } 1299 } 1300 1301 // scan decimal value 1302 else { 1303 fStringBuffer3.clear(); 1304 boolean digit = true; 1305 1306 int c = fEntityScanner.peekChar(); 1307 digit = c >= '0' && c <= '9'; 1308 if (digit) { 1309 if (buf2 != null) { buf2.append((char)c); } 1310 fEntityScanner.scanChar(NameType.REFERENCE); 1311 fStringBuffer3.append((char)c); 1312 1313 do { 1314 c = fEntityScanner.peekChar(); 1315 digit = c >= '0' && c <= '9'; 1316 if (digit) { 1317 if (buf2 != null) { buf2.append((char)c); } 1318 fEntityScanner.scanChar(NameType.REFERENCE); 1319 fStringBuffer3.append((char)c); 1320 } 1321 } while (digit); 1322 } else { 1323 reportFatalError("DigitRequiredInCharRef", null); 1324 } 1325 } 1326 1327 // end 1328 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1329 reportFatalError("SemicolonRequiredInCharRef", null); 1330 } 1331 if (buf2 != null) { buf2.append(';'); } 1332 1333 // convert string to number 1334 int value = -1; 1335 try { 1336 value = Integer.parseInt(fStringBuffer3.toString(), 1337 hex ? 16 : 10); 1338 1339 // character reference must be a valid XML character 1340 if (isInvalid(value)) { 1341 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1); 1342 if (hex) errorBuf.append('x'); 1343 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length); 1344 reportFatalError("InvalidCharRef", 1345 new Object[]{errorBuf.toString()}); 1346 } 1347 } catch (NumberFormatException e) { 1348 // Conversion failed, let -1 value drop through. 1349 // If we end up here, the character reference was invalid. 1350 StringBuffer errorBuf = new StringBuffer(fStringBuffer3.length + 1); 1351 if (hex) errorBuf.append('x'); 1352 errorBuf.append(fStringBuffer3.ch, fStringBuffer3.offset, fStringBuffer3.length); 1353 reportFatalError("InvalidCharRef", 1354 new Object[]{errorBuf.toString()}); 1355 } 1356 1357 // append corresponding chars to the given buffer 1358 if (!XMLChar.isSupplemental(value)) { 1359 buf.append((char) value); 1360 } else { 1361 // character is supplemental, split it into surrogate chars 1362 buf.append(XMLChar.highSurrogate(value)); 1363 buf.append(XMLChar.lowSurrogate(value)); 1364 } 1365 1366 // char refs notification code 1367 if (fNotifyCharRefs && value != -1) { 1368 String literal = "#" + (hex ? "x" : "") + fStringBuffer3.toString(); 1369 if (!fScanningAttribute) { 1370 fCharRefLiteral = literal; 1371 } 1372 } 1373 1374 if (fEntityScanner.fCurrentEntity.isGE) { 1375 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, buf.length - initLen); 1376 } 1377 return value; 1378 } 1379 // returns true if the given character is not 1380 // valid with respect to the version of 1381 // XML understood by this scanner. isInvalid(int value)1382 protected boolean isInvalid(int value) { 1383 return (XMLChar.isInvalid(value)); 1384 } // isInvalid(int): boolean 1385 1386 // returns true if the given character is not 1387 // valid or may not be used outside a character reference 1388 // with respect to the version of XML understood by this scanner. isInvalidLiteral(int value)1389 protected boolean isInvalidLiteral(int value) { 1390 return (XMLChar.isInvalid(value)); 1391 } // isInvalidLiteral(int): boolean 1392 1393 // returns true if the given character is 1394 // a valid nameChar with respect to the version of 1395 // XML understood by this scanner. isValidNameChar(int value)1396 protected boolean isValidNameChar(int value) { 1397 return (XMLChar.isName(value)); 1398 } // isValidNameChar(int): boolean 1399 1400 // returns true if the given character is 1401 // a valid NCName character with respect to the version of 1402 // XML understood by this scanner. isValidNCName(int value)1403 protected boolean isValidNCName(int value) { 1404 return (XMLChar.isNCName(value)); 1405 } // isValidNCName(int): boolean 1406 1407 // returns true if the given character is 1408 // a valid nameStartChar with respect to the version of 1409 // XML understood by this scanner. isValidNameStartChar(int value)1410 protected boolean isValidNameStartChar(int value) { 1411 return (XMLChar.isNameStart(value)); 1412 } // isValidNameStartChar(int): boolean 1413 1414 // returns true if the given character is 1415 // a valid high surrogate for a nameStartChar 1416 // with respect to the version of XML understood 1417 // by this scanner. isValidNameStartHighSurrogate(int value)1418 protected boolean isValidNameStartHighSurrogate(int value) { 1419 return false; 1420 } // isValidNameStartHighSurrogate(int): boolean 1421 versionSupported(String version )1422 protected boolean versionSupported(String version ) { 1423 return version.equals("1.0") || version.equals("1.1"); 1424 } // version Supported 1425 1426 /** 1427 * Scans surrogates and append them to the specified buffer. 1428 * <p> 1429 * <strong>Note:</strong> This assumes the current char has already been 1430 * identified as a high surrogate. 1431 * 1432 * @param buf The StringBuffer to append the read surrogates to. 1433 * @return True if it succeeded. 1434 */ scanSurrogates(XMLStringBuffer buf)1435 protected boolean scanSurrogates(XMLStringBuffer buf) 1436 throws IOException, XNIException { 1437 1438 int high = fEntityScanner.scanChar(null); 1439 int low = fEntityScanner.peekChar(); 1440 if (!XMLChar.isLowSurrogate(low)) { 1441 reportFatalError("InvalidCharInContent", 1442 new Object[] {Integer.toString(high, 16)}); 1443 return false; 1444 } 1445 fEntityScanner.scanChar(null); 1446 1447 // convert surrogates to supplemental character 1448 int c = XMLChar.supplemental((char)high, (char)low); 1449 1450 // supplemental character must be a valid XML character 1451 if (isInvalid(c)) { 1452 reportFatalError("InvalidCharInContent", 1453 new Object[]{Integer.toString(c, 16)}); 1454 return false; 1455 } 1456 1457 // fill in the buffer 1458 buf.append((char)high); 1459 buf.append((char)low); 1460 1461 return true; 1462 1463 } // scanSurrogates():boolean 1464 1465 1466 /** 1467 * Convenience function used in all XML scanners. 1468 */ reportFatalError(String msgId, Object[] args)1469 protected void reportFatalError(String msgId, Object[] args) 1470 throws XNIException { 1471 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN, 1472 msgId, args, 1473 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1474 } 1475 1476 // private methods init()1477 private void init() { 1478 // initialize scanner 1479 fEntityScanner = null; 1480 // initialize vars 1481 fEntityDepth = 0; 1482 fReportEntity = true; 1483 fResourceIdentifier.clear(); 1484 1485 if(!fAttributeCacheInitDone){ 1486 for(int i = 0; i < initialCacheCount; i++){ 1487 attributeValueCache.add(new XMLString()); 1488 stringBufferCache.add(new XMLStringBuffer()); 1489 } 1490 fAttributeCacheInitDone = true; 1491 } 1492 fStringBufferIndex = 0; 1493 fAttributeCacheUsedCount = 0; 1494 1495 } 1496 getStringBuffer()1497 XMLStringBuffer getStringBuffer(){ 1498 if((fStringBufferIndex < initialCacheCount )|| (fStringBufferIndex < stringBufferCache.size())){ 1499 return stringBufferCache.get(fStringBufferIndex++); 1500 }else{ 1501 XMLStringBuffer tmpObj = new XMLStringBuffer(); 1502 fStringBufferIndex++; 1503 stringBufferCache.add(tmpObj); 1504 return tmpObj; 1505 } 1506 } 1507 1508 /** 1509 * Add the count of the content buffer and check if the accumulated 1510 * value exceeds the limit 1511 * @param isPEDecl a flag to indicate whether the entity is parameter 1512 * @param entityName entity name 1513 * @param buffer content buffer 1514 */ checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer)1515 void checkEntityLimit(boolean isPEDecl, String entityName, XMLString buffer) { 1516 checkEntityLimit(isPEDecl, entityName, buffer.length); 1517 } 1518 1519 /** 1520 * Add the count and check limit 1521 * @param isPEDecl a flag to indicate whether the entity is parameter 1522 * @param entityName entity name 1523 * @param len length of the buffer 1524 */ checkEntityLimit(boolean isPEDecl, String entityName, int len)1525 void checkEntityLimit(boolean isPEDecl, String entityName, int len) { 1526 if (fLimitAnalyzer == null) { 1527 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 1528 } 1529 if (isPEDecl) { 1530 fLimitAnalyzer.addValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, "%" + entityName, len); 1531 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1532 fSecurityManager.debugPrint(fLimitAnalyzer); 1533 reportFatalError("MaxEntitySizeLimit", new Object[]{"%" + entityName, 1534 fLimitAnalyzer.getValue(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT), 1535 fSecurityManager.getLimit(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT), 1536 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.PARAMETER_ENTITY_SIZE_LIMIT)}); 1537 } 1538 } else { 1539 fLimitAnalyzer.addValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, entityName, len); 1540 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1541 fSecurityManager.debugPrint(fLimitAnalyzer); 1542 reportFatalError("MaxEntitySizeLimit", new Object[]{entityName, 1543 fLimitAnalyzer.getValue(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT), 1544 fSecurityManager.getLimit(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT), 1545 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT)}); 1546 } 1547 } 1548 if (fSecurityManager.isOverLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 1549 fSecurityManager.debugPrint(fLimitAnalyzer); 1550 reportFatalError("TotalEntitySizeLimit", 1551 new Object[]{fLimitAnalyzer.getTotalValue(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT), 1552 fSecurityManager.getLimit(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT), 1553 fSecurityManager.getStateLiteral(XMLSecurityManager.Limit.TOTAL_ENTITY_SIZE_LIMIT)}); 1554 } 1555 } 1556 } // class XMLScanner 1557